aboutsummaryrefslogtreecommitdiffstats
path: root/src/lexer.c
diff options
context:
space:
mode:
authorDouglas B. Rumbaugh <doug@douglasrumbaugh.com>2025-10-31 23:41:32 -0400
committerDouglas B. Rumbaugh <doug@douglasrumbaugh.com>2025-10-31 23:41:32 -0400
commit06a02a3a50baf261a0f1c998bfd02269c3ed45de (patch)
tree00aa66e09a31b2563221c385e5ac129a57082729 /src/lexer.c
downloadhush-06a02a3a50baf261a0f1c998bfd02269c3ed45de.tar.gz
Initial commit
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c225
1 files changed, 225 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..a6af008
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,225 @@
+/*
+ *
+ */
+
+#include "lexer.h"
+#include "config.h"
+
+enum parser_state {
+ LEX_COMMAND,
+ LEX_IFILE,
+ LEX_OFILE,
+ LEX_ARGUMENT,
+ LEX_VARIABLE
+};
+
+static char *token_tostr(enum token_type type) {
+ switch (type) {
+ case TKN_FILENAME:
+ return "FILENAME";
+ case TKN_COMMAND:
+ return "COMMAND";
+ case TKN_PIPE:
+ return "PIPE";
+ case TKN_IN_REDIR:
+ return "INPUT REDIR";
+ case TKN_OUT_REDIR:
+ return "OUT REDIR";
+ case TKN_ARG:
+ return "ARGUMENT";
+ case TKN_INVALID:
+ return "INVALID";
+ case TKN_VARKEY:
+ return "VARKEY";
+ case TKN_VARVAL:
+ return "VARVAL";
+ default:
+ return "INVALID";
+ }
+}
+
+static token *token_create(char *text) {
+ token *tkn = malloc(sizeof(token));
+ if (!tkn) {
+ return NULL;
+ }
+
+ tkn->text = text;
+ tkn->type = TKN_INVALID;
+ tkn->next = NULL;
+
+ return tkn;
+}
+
+char *trim(char *str) {
+ if (!str) {
+ return NULL;
+ }
+
+ /* skip any leading whitespace */
+ for (; isspace(*str); str++)
+ ;
+
+ /* skip any tailing whitespace */
+ size_t len = strlen(str);
+ char *str_p = str + len;
+ for (; isspace(*str_p) && str_p >= str; str_p--) {
+ *str_p = '\0';
+ }
+
+ return str;
+}
+
+static enum token_type token_type_from_state(enum parser_state state) {
+ switch (state) {
+ case LEX_ARGUMENT:
+ return TKN_ARG;
+ case LEX_IFILE:
+ case LEX_OFILE:
+ return TKN_FILENAME;
+ case LEX_COMMAND:
+ return TKN_COMMAND;
+ case LEX_VARIABLE:
+ return TKN_VARVAL;
+ default:
+ return TKN_INVALID;
+ }
+}
+
+token *parse_command(char *cmdstr) {
+
+ token *tkn_head = token_create(cmdstr);
+ token **tkn = &tkn_head;
+ enum parser_state state = LEX_COMMAND;
+ bool previous_whitespace = false;
+
+ do {
+ /* determine if parser state should change due to special characters */
+ if (*cmdstr == '<') {
+ (*tkn)->type = TKN_IN_REDIR;
+ state = LEX_IFILE;
+ goto advance_token;
+ } else if (*cmdstr == '>') {
+ (*tkn)->type = TKN_OUT_REDIR;
+ state = LEX_OFILE;
+ goto advance_token;
+ } else if (*cmdstr == '|') {
+ (*tkn)->type = TKN_PIPE;
+ state = LEX_COMMAND;
+ goto advance_token;
+ } else if (*cmdstr == '=') {
+ (*tkn)->type = TKN_VARKEY;
+ state = LEX_VARIABLE;
+ goto advance_token;
+ } else if (isspace(*cmdstr)) {
+ (*tkn)->type = token_type_from_state(state);
+
+ if (state == LEX_VARIABLE) {
+ state = LEX_COMMAND;
+ } else if (state == LEX_COMMAND) {
+ state = LEX_ARGUMENT;
+ }
+
+ goto advance_token;
+ }
+
+ /* otherwise, maintain current state and move to next character */
+ continue;
+
+ advance_token:
+ *cmdstr = '\0';
+
+ /* advance over any extra whitespace between tokens */
+ while (isspace(*(cmdstr + 1))) {
+ cmdstr++;
+ }
+
+ if (*(cmdstr + 1)) {
+ if (!((*tkn)->next = token_create(cmdstr + 1))) {
+ fprintf(stderr, "ERROR: Out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+ tkn = &(*tkn)->next;
+ }
+
+ } while (*(++cmdstr));
+
+ return tkn_head;
+}
+
+token *validate_command(token *cmd) {
+ token *prev = NULL;
+ size_t arg_cnt = 0;
+ bool infile_defined = false;
+ bool outfile_defined = false;
+ bool looking_for_file = false;
+
+ for (token *tkn = cmd; tkn; tkn = tkn->next) {
+ if (!prev) {
+ if (tkn->type != TKN_COMMAND && tkn->type != TKN_VARKEY) {
+ return tkn;
+ }
+ } else if (prev->type == TKN_VARVAL) {
+ return tkn;
+ } else if (tkn->type == TKN_ARG) {
+ arg_cnt++;
+ if ((prev->type != TKN_COMMAND && prev->type != TKN_ARG) ||
+ arg_cnt > MAX_ARGUMENT_CNT) {
+ return tkn;
+ }
+ } else if (tkn->type == TKN_FILENAME) {
+ if (prev->type == TKN_IN_REDIR && !infile_defined) {
+ infile_defined = true;
+ looking_for_file = false;
+ } else if (prev->type == TKN_OUT_REDIR && !outfile_defined) {
+ outfile_defined = true;
+ looking_for_file = false;
+ } else {
+ return tkn;
+ }
+ } else if (tkn->type == TKN_COMMAND) {
+ if (prev->type != TKN_PIPE) {
+ return tkn;
+ }
+
+ arg_cnt = 0;
+ } else if (tkn->type == TKN_IN_REDIR || tkn->type == TKN_OUT_REDIR) {
+ if (prev->type == TKN_PIPE || prev->type == TKN_IN_REDIR ||
+ prev->type == TKN_OUT_REDIR) {
+ return tkn;
+ }
+
+ looking_for_file = true;
+ } else if (tkn->type == TKN_INVALID) {
+ return tkn;
+ } else if (tkn->type == TKN_VARVAL) {
+ if (prev->type != TKN_VARKEY) {
+ return tkn;
+ }
+ }
+
+ if (looking_for_file &&
+ (tkn->type != TKN_IN_REDIR && tkn->type != TKN_OUT_REDIR)) {
+ return tkn;
+ }
+
+ prev = tkn;
+ }
+
+ return NULL;
+}
+
+void print_parsed_command(FILE *file, token *cmd) {
+ for (token *tkn = cmd; tkn; tkn = tkn->next) {
+ fprintf(file, "%s\t%s\n", tkn->text, token_tostr(tkn->type));
+ }
+}
+
+void destroy_tokens(token *tkns) {
+ token **tkn_ptr = &tkns->next;
+ while (*tkn_ptr) {
+ free(tkns);
+ tkns = *tkn_ptr;
+ tkn_ptr = &tkns->next;
+ }
+}