diff options
| author | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2025-10-31 23:41:32 -0400 |
|---|---|---|
| committer | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2025-10-31 23:41:32 -0400 |
| commit | 06a02a3a50baf261a0f1c998bfd02269c3ed45de (patch) | |
| tree | 00aa66e09a31b2563221c385e5ac129a57082729 /src/lexer.c | |
| download | hush-06a02a3a50baf261a0f1c998bfd02269c3ed45de.tar.gz | |
Initial commit
Diffstat (limited to 'src/lexer.c')
| -rw-r--r-- | src/lexer.c | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..a6af008 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,225 @@ +/* + * + */ + +#include "lexer.h" +#include "config.h" + +enum parser_state { + LEX_COMMAND, + LEX_IFILE, + LEX_OFILE, + LEX_ARGUMENT, + LEX_VARIABLE +}; + +static char *token_tostr(enum token_type type) { + switch (type) { + case TKN_FILENAME: + return "FILENAME"; + case TKN_COMMAND: + return "COMMAND"; + case TKN_PIPE: + return "PIPE"; + case TKN_IN_REDIR: + return "INPUT REDIR"; + case TKN_OUT_REDIR: + return "OUT REDIR"; + case TKN_ARG: + return "ARGUMENT"; + case TKN_INVALID: + return "INVALID"; + case TKN_VARKEY: + return "VARKEY"; + case TKN_VARVAL: + return "VARVAL"; + default: + return "INVALID"; + } +} + +static token *token_create(char *text) { + token *tkn = malloc(sizeof(token)); + if (!tkn) { + return NULL; + } + + tkn->text = text; + tkn->type = TKN_INVALID; + tkn->next = NULL; + + return tkn; +} + +char *trim(char *str) { + if (!str) { + return NULL; + } + + /* skip any leading whitespace */ + for (; isspace(*str); str++) + ; + + /* skip any tailing whitespace */ + size_t len = strlen(str); + char *str_p = str + len; + for (; isspace(*str_p) && str_p >= str; str_p--) { + *str_p = '\0'; + } + + return str; +} + +static enum token_type token_type_from_state(enum parser_state state) { + switch (state) { + case LEX_ARGUMENT: + return TKN_ARG; + case LEX_IFILE: + case LEX_OFILE: + return TKN_FILENAME; + case LEX_COMMAND: + return TKN_COMMAND; + case LEX_VARIABLE: + return TKN_VARVAL; + default: + return TKN_INVALID; + } +} + +token *parse_command(char *cmdstr) { + + token *tkn_head = token_create(cmdstr); + token **tkn = &tkn_head; + enum parser_state state = LEX_COMMAND; + bool previous_whitespace = false; + + do { + /* determine if parser state should change due to special characters */ + if (*cmdstr == '<') { + (*tkn)->type = TKN_IN_REDIR; + state = LEX_IFILE; + goto advance_token; + } else if (*cmdstr == '>') { + (*tkn)->type = TKN_OUT_REDIR; + state = LEX_OFILE; + goto advance_token; + } else if (*cmdstr == '|') { + (*tkn)->type = TKN_PIPE; + state = LEX_COMMAND; + goto advance_token; + } else if (*cmdstr == '=') { + (*tkn)->type = TKN_VARKEY; + state = LEX_VARIABLE; + goto advance_token; + } else if (isspace(*cmdstr)) { + (*tkn)->type = token_type_from_state(state); + + if (state == LEX_VARIABLE) { + state = LEX_COMMAND; + } else if (state == LEX_COMMAND) { + state = LEX_ARGUMENT; + } + + goto advance_token; + } + + /* otherwise, maintain current state and move to next character */ + continue; + + advance_token: + *cmdstr = '\0'; + + /* advance over any extra whitespace between tokens */ + while (isspace(*(cmdstr + 1))) { + cmdstr++; + } + + if (*(cmdstr + 1)) { + if (!((*tkn)->next = token_create(cmdstr + 1))) { + fprintf(stderr, "ERROR: Out of memory\n"); + exit(EXIT_FAILURE); + } + tkn = &(*tkn)->next; + } + + } while (*(++cmdstr)); + + return tkn_head; +} + +token *validate_command(token *cmd) { + token *prev = NULL; + size_t arg_cnt = 0; + bool infile_defined = false; + bool outfile_defined = false; + bool looking_for_file = false; + + for (token *tkn = cmd; tkn; tkn = tkn->next) { + if (!prev) { + if (tkn->type != TKN_COMMAND && tkn->type != TKN_VARKEY) { + return tkn; + } + } else if (prev->type == TKN_VARVAL) { + return tkn; + } else if (tkn->type == TKN_ARG) { + arg_cnt++; + if ((prev->type != TKN_COMMAND && prev->type != TKN_ARG) || + arg_cnt > MAX_ARGUMENT_CNT) { + return tkn; + } + } else if (tkn->type == TKN_FILENAME) { + if (prev->type == TKN_IN_REDIR && !infile_defined) { + infile_defined = true; + looking_for_file = false; + } else if (prev->type == TKN_OUT_REDIR && !outfile_defined) { + outfile_defined = true; + looking_for_file = false; + } else { + return tkn; + } + } else if (tkn->type == TKN_COMMAND) { + if (prev->type != TKN_PIPE) { + return tkn; + } + + arg_cnt = 0; + } else if (tkn->type == TKN_IN_REDIR || tkn->type == TKN_OUT_REDIR) { + if (prev->type == TKN_PIPE || prev->type == TKN_IN_REDIR || + prev->type == TKN_OUT_REDIR) { + return tkn; + } + + looking_for_file = true; + } else if (tkn->type == TKN_INVALID) { + return tkn; + } else if (tkn->type == TKN_VARVAL) { + if (prev->type != TKN_VARKEY) { + return tkn; + } + } + + if (looking_for_file && + (tkn->type != TKN_IN_REDIR && tkn->type != TKN_OUT_REDIR)) { + return tkn; + } + + prev = tkn; + } + + return NULL; +} + +void print_parsed_command(FILE *file, token *cmd) { + for (token *tkn = cmd; tkn; tkn = tkn->next) { + fprintf(file, "%s\t%s\n", tkn->text, token_tostr(tkn->type)); + } +} + +void destroy_tokens(token *tkns) { + token **tkn_ptr = &tkns->next; + while (*tkn_ptr) { + free(tkns); + tkns = *tkn_ptr; + tkn_ptr = &tkns->next; + } +} |