diff options
| author | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2025-10-31 23:41:32 -0400 |
|---|---|---|
| committer | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2025-10-31 23:41:32 -0400 |
| commit | 06a02a3a50baf261a0f1c998bfd02269c3ed45de (patch) | |
| tree | 00aa66e09a31b2563221c385e5ac129a57082729 | |
| download | hush-06a02a3a50baf261a0f1c998bfd02269c3ed45de.tar.gz | |
Initial commit
| -rw-r--r-- | Makefile | 22 | ||||
| -rw-r--r-- | include/command.h | 35 | ||||
| -rw-r--r-- | include/config.h | 12 | ||||
| -rw-r--r-- | include/lexer.h | 75 | ||||
| -rw-r--r-- | src/command.c | 103 | ||||
| -rw-r--r-- | src/hush.c | 106 | ||||
| -rw-r--r-- | src/lexer.c | 225 |
7 files changed, 578 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b89409d --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +all: bin/hush + +bin/hush: build/lexer.o build/command.o src/hush.c build + gcc -Iinclude build/lexer.o build/command.o src/hush.c -ggdb -o bin/hush + + +build/lexer.o: include/lexer.h src/lexer.c build + gcc -Iinclude -c src/lexer.c -ggdb -o build/lexer.o + +build/command.o: include/lexer.h include/command.h src/command.c + gcc -Iinclude -c src/command.c -ggdb -o build/command.o + +.PHONY: build clean +build: + -mkdir build + -mkdir bin + +clean: + -rm -r build + -rm -r bin + + diff --git a/include/command.h b/include/command.h new file mode 100644 index 0000000..3df724b --- /dev/null +++ b/include/command.h @@ -0,0 +1,35 @@ +/* + * + */ + +#ifndef H_HUSH_COMMAND +#define H_HUSH_COMMAND + +#include <stdio.h> + +#include "lexer.h" +#include "config.h" + +typedef struct command { + char *command; + char *infile; + char *outfile; + struct command *next; + pid_t pid; + + int pipe[2]; + int *read_pipe; + + char *args[MAX_ARGUMENT_CNT + 2]; +} command; + + +command *commands_from_tokens(token *parsed_cmdstr, size_t *cnt); + +void print_commands(FILE *file, command *cmds); + +void destroy_commands(command *cmds); + +int execute_command(command *cmd); + +#endif diff --git a/include/config.h b/include/config.h new file mode 100644 index 0000000..789a1bb --- /dev/null +++ b/include/config.h @@ -0,0 +1,12 @@ +/* + * + */ +#ifndef H_HUSH_CONFIG +#define H_HUSH_CONFIG + +#include <stdlib.h> + +static const size_t MAX_LINE_LEN = 1024; +#define MAX_ARGUMENT_CNT 8 + +#endif diff --git a/include/lexer.h b/include/lexer.h new file mode 100644 index 0000000..bec44a9 --- /dev/null +++ b/include/lexer.h @@ -0,0 +1,75 @@ +/* + * Header file for the HUSH lexical analysis + * module + */ +#ifndef H_HUSH_LEXER +#define H_HUSH_LEXER + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdio.h> + +#include "config.h" + +enum token_type { + TKN_COMMAND, + TKN_OUT_REDIR, + TKN_IN_REDIR, + TKN_PIPE, + TKN_FILENAME, + TKN_ARG, + TKN_INVALID, + TKN_VARKEY, + TKN_VARVAL +}; + +typedef struct token { + char *text; + enum token_type type; + struct token *next; +} token; + +/* + * Accepts a null-terminate string representing + * a full command and returns the pointer to the + * head of a list of parsed tokens. The provided + * string must have been trimmed to remove all + * leading and trailing whitespace characters. + * + * The tokens contain pointers into the provided + * command string, and so the parsed command is + * only valid so long as the provided string has + * not been altered. + * + * This function's behavior is undefined if the + * input string is *not* null-terminated, or if + * it has leading or trailing whitespace. + */ + +token *parse_command(char *cmdstr); + +/* + * Accepts the pointer to the head of a list + * of parsed tokens and validates that the + * tokens are syntactically valid. If the + * sequence is valid, NULL will be returned. + * If the sequence is invalid, a pointer to + * the first invalid token will be returned. + */ +token *validate_command(token *cmd); + +/* + * Removes all leading and trailing whitespace from + * the provided string, and returns a pointer to the + * beginning of the trimmed string. Necessary as the + * first stage of parsing. + */ +char *trim(char *str); + +void print_parsed_command(FILE *file, token *cmd); + +void destroy_tokens(token *tokens); + +#endif diff --git a/src/command.c b/src/command.c new file mode 100644 index 0000000..73a2027 --- /dev/null +++ b/src/command.c @@ -0,0 +1,103 @@ +/* + * + */ + +#include "command.h" +#include "config.h" +#include "lexer.h" +#include <stdio.h> +#include <unistd.h> + +command *commands_from_tokens(token *parsed_cmdstr, size_t *cnt) { + + command *cmd_head = calloc(1, sizeof(command)); + command *cmd = cmd_head; + size_t arg_cnt = 1; + + *cnt = 0; + + token *prev_tkn = NULL; + + for (token *tkn = parsed_cmdstr; tkn; prev_tkn = tkn, tkn = tkn->next) { + if (tkn->type == TKN_COMMAND) { + cmd->command = tkn->text; + cmd->args[0] = tkn->text; + arg_cnt = 1; + (*cnt)++; + } else if (tkn->type == TKN_ARG) { + cmd->args[arg_cnt++] = tkn->text; + } else if (tkn->type == TKN_FILENAME) { + if (prev_tkn->type == TKN_IN_REDIR) { + cmd->infile = tkn->text; + } else { + cmd->outfile = tkn->text; + } + } else if (tkn->type == TKN_PIPE) { + cmd->next = calloc(1, sizeof(command)); + cmd->next->read_pipe = cmd->pipe; + cmd = cmd->next; + } + + } + + return cmd_head; +} + +void print_commands(FILE *file, command *cmds) { + for (command *cmd=cmds; cmd; cmd = cmd->next) { + fprintf(file, "Command: %s\n", cmd->command); + fprintf(file, "\t"); + for (size_t i=0; i<MAX_ARGUMENT_CNT; i++) { + fprintf(file, "%s ", cmd->args[i]); + } + fprintf(file, "\n\tInfile: %s\n\tOutfile: %s\n", cmd->infile, cmd->outfile); + } +} + +void destroy_commands(command *cmds) { + command **cmd_ptr = &cmds->next; + while (*cmd_ptr) { + free(cmds); + cmds = *cmd_ptr; + cmd_ptr = &cmds->next; + } +} + +pid_t execute_command(command *cmd) { + if (cmd->next) { + pipe(cmd->pipe); + } + + pid_t res = fork(); + if (res == 0) { + if (cmd->infile) { + if (!freopen(cmd->infile, "r", stdin)) { + perror("Could not open input file"); + exit(EXIT_FAILURE); + } + } else if (cmd->read_pipe) { + dup2(*(cmd->read_pipe), STDIN_FILENO); + close(*(cmd->read_pipe)); + } + + if (cmd->outfile) { + if (!freopen(cmd->outfile, "w", stdout)) { + perror("Could not open output file"); + exit(EXIT_FAILURE); + } + } else if (cmd->next) { + dup2(cmd->pipe[1], STDOUT_FILENO); + close(cmd->pipe[1]); + } + + + int res = execvp(cmd->command, cmd->args); + perror("Could not run command"); + exit(EXIT_FAILURE); + } else if (res < 0) { + perror("Could not run command"); + } + + cmd->pid = res; + return res; +} diff --git a/src/hush.c b/src/hush.c new file mode 100644 index 0000000..478103f --- /dev/null +++ b/src/hush.c @@ -0,0 +1,106 @@ +/* + * hush -- the HU Shell + * + * A minimal shell for CISC 301, Operating Systems + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/wait.h> + +#include "config.h" +#include "lexer.h" +#include "command.h" + +FILE *open_input(int argc, char **argv) { + FILE *input_file = (argc > 1) ? fopen(argv[1], "r") : stdin; + if (!input_file) { + fprintf(stderr, "ERROR: Failed to open input file %s\n", argv[1]); + exit(EXIT_FAILURE); + } + + return input_file; +} + +static size_t get_command_len(char *cmdstr) { + size_t len = strlen(cmdstr); + if (cmdstr[len - 1] != '\n') { + len = 0; + } + + return len; +} + +int main(int argc, char **argv) { + FILE *input_file = open_input(argc, argv); + char buffer[MAX_LINE_LEN]; + + fprintf(stdout, "$ "); + while (fgets(buffer, MAX_LINE_LEN, input_file)) { + char *cmdstr = trim(buffer); + if (!cmdstr) { + fprintf(stderr, "ERROR: Specified command is too long\n"); + goto draw_prompt; + } + + if (strlen(cmdstr) == 0) { + goto draw_prompt; + } + + token *parsed_cmd; + if (!(parsed_cmd = parse_command(cmdstr))) { + fprintf(stderr, "ERROR: Failed to parse command\n"); + goto draw_prompt; + } + + { + token *error; + if ((error = validate_command(parsed_cmd))) { + fprintf(stderr, "ERROR: Invalid syntax near %s\n", error->text); + print_parsed_command(stdout, parsed_cmd); + goto free_tokens; + } + } + + // print_parsed_command(stdout, parsed_cmd); + + if (parsed_cmd->type == TKN_VARKEY) { + /* handle variable creation */ + goto free_tokens; + } + + size_t commands; + command *cmds; + if (!(cmds = commands_from_tokens(parsed_cmd, &commands ))) { + fprintf(stderr, "ERROR: Unable to create commands from tokens"); + goto free_tokens; + } + + // print_commands(stdout, cmds); + + for (command *cmd = cmds; cmd; cmd = cmd->next) { + pid_t result = execute_command(cmd); + if (result < 0) { + break; + } + } + + for (command *cmd = cmds; cmd; cmd = cmd->next) { + if (cmd->pid > 0) { + waitpid(cmd->pid, NULL, 0); + } + } + + free_tokens: + destroy_tokens(parsed_cmd); + + free_commands: + destroy_commands(cmds); + + draw_prompt: + fprintf(stdout, "$ "); + } +} diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..a6af008 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,225 @@ +/* + * + */ + +#include "lexer.h" +#include "config.h" + +enum parser_state { + LEX_COMMAND, + LEX_IFILE, + LEX_OFILE, + LEX_ARGUMENT, + LEX_VARIABLE +}; + +static char *token_tostr(enum token_type type) { + switch (type) { + case TKN_FILENAME: + return "FILENAME"; + case TKN_COMMAND: + return "COMMAND"; + case TKN_PIPE: + return "PIPE"; + case TKN_IN_REDIR: + return "INPUT REDIR"; + case TKN_OUT_REDIR: + return "OUT REDIR"; + case TKN_ARG: + return "ARGUMENT"; + case TKN_INVALID: + return "INVALID"; + case TKN_VARKEY: + return "VARKEY"; + case TKN_VARVAL: + return "VARVAL"; + default: + return "INVALID"; + } +} + +static token *token_create(char *text) { + token *tkn = malloc(sizeof(token)); + if (!tkn) { + return NULL; + } + + tkn->text = text; + tkn->type = TKN_INVALID; + tkn->next = NULL; + + return tkn; +} + +char *trim(char *str) { + if (!str) { + return NULL; + } + + /* skip any leading whitespace */ + for (; isspace(*str); str++) + ; + + /* skip any tailing whitespace */ + size_t len = strlen(str); + char *str_p = str + len; + for (; isspace(*str_p) && str_p >= str; str_p--) { + *str_p = '\0'; + } + + return str; +} + +static enum token_type token_type_from_state(enum parser_state state) { + switch (state) { + case LEX_ARGUMENT: + return TKN_ARG; + case LEX_IFILE: + case LEX_OFILE: + return TKN_FILENAME; + case LEX_COMMAND: + return TKN_COMMAND; + case LEX_VARIABLE: + return TKN_VARVAL; + default: + return TKN_INVALID; + } +} + +token *parse_command(char *cmdstr) { + + token *tkn_head = token_create(cmdstr); + token **tkn = &tkn_head; + enum parser_state state = LEX_COMMAND; + bool previous_whitespace = false; + + do { + /* determine if parser state should change due to special characters */ + if (*cmdstr == '<') { + (*tkn)->type = TKN_IN_REDIR; + state = LEX_IFILE; + goto advance_token; + } else if (*cmdstr == '>') { + (*tkn)->type = TKN_OUT_REDIR; + state = LEX_OFILE; + goto advance_token; + } else if (*cmdstr == '|') { + (*tkn)->type = TKN_PIPE; + state = LEX_COMMAND; + goto advance_token; + } else if (*cmdstr == '=') { + (*tkn)->type = TKN_VARKEY; + state = LEX_VARIABLE; + goto advance_token; + } else if (isspace(*cmdstr)) { + (*tkn)->type = token_type_from_state(state); + + if (state == LEX_VARIABLE) { + state = LEX_COMMAND; + } else if (state == LEX_COMMAND) { + state = LEX_ARGUMENT; + } + + goto advance_token; + } + + /* otherwise, maintain current state and move to next character */ + continue; + + advance_token: + *cmdstr = '\0'; + + /* advance over any extra whitespace between tokens */ + while (isspace(*(cmdstr + 1))) { + cmdstr++; + } + + if (*(cmdstr + 1)) { + if (!((*tkn)->next = token_create(cmdstr + 1))) { + fprintf(stderr, "ERROR: Out of memory\n"); + exit(EXIT_FAILURE); + } + tkn = &(*tkn)->next; + } + + } while (*(++cmdstr)); + + return tkn_head; +} + +token *validate_command(token *cmd) { + token *prev = NULL; + size_t arg_cnt = 0; + bool infile_defined = false; + bool outfile_defined = false; + bool looking_for_file = false; + + for (token *tkn = cmd; tkn; tkn = tkn->next) { + if (!prev) { + if (tkn->type != TKN_COMMAND && tkn->type != TKN_VARKEY) { + return tkn; + } + } else if (prev->type == TKN_VARVAL) { + return tkn; + } else if (tkn->type == TKN_ARG) { + arg_cnt++; + if ((prev->type != TKN_COMMAND && prev->type != TKN_ARG) || + arg_cnt > MAX_ARGUMENT_CNT) { + return tkn; + } + } else if (tkn->type == TKN_FILENAME) { + if (prev->type == TKN_IN_REDIR && !infile_defined) { + infile_defined = true; + looking_for_file = false; + } else if (prev->type == TKN_OUT_REDIR && !outfile_defined) { + outfile_defined = true; + looking_for_file = false; + } else { + return tkn; + } + } else if (tkn->type == TKN_COMMAND) { + if (prev->type != TKN_PIPE) { + return tkn; + } + + arg_cnt = 0; + } else if (tkn->type == TKN_IN_REDIR || tkn->type == TKN_OUT_REDIR) { + if (prev->type == TKN_PIPE || prev->type == TKN_IN_REDIR || + prev->type == TKN_OUT_REDIR) { + return tkn; + } + + looking_for_file = true; + } else if (tkn->type == TKN_INVALID) { + return tkn; + } else if (tkn->type == TKN_VARVAL) { + if (prev->type != TKN_VARKEY) { + return tkn; + } + } + + if (looking_for_file && + (tkn->type != TKN_IN_REDIR && tkn->type != TKN_OUT_REDIR)) { + return tkn; + } + + prev = tkn; + } + + return NULL; +} + +void print_parsed_command(FILE *file, token *cmd) { + for (token *tkn = cmd; tkn; tkn = tkn->next) { + fprintf(file, "%s\t%s\n", tkn->text, token_tostr(tkn->type)); + } +} + +void destroy_tokens(token *tkns) { + token **tkn_ptr = &tkns->next; + while (*tkn_ptr) { + free(tkns); + tkns = *tkn_ptr; + tkn_ptr = &tkns->next; + } +} |