diff options
| author | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2025-01-06 12:23:42 -0500 |
|---|---|---|
| committer | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2025-01-06 12:23:42 -0500 |
| commit | 177bea75a11bc62915095213c351c8385ea12226 (patch) | |
| tree | 3e5c5c7103e082d152c23bcaead60740bd1d8550 | |
| download | math-utils-177bea75a11bc62915095213c351c8385ea12226.tar.gz | |
Initial commit of cdf program and basic build system
| -rw-r--r-- | .gitignore | 5 | ||||
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | include/cdf.h | 40 | ||||
| -rw-r--r-- | src/cdf.c | 244 |
4 files changed, 293 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d5d3e26 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.cache +compile_commands.json +bin/* +build/* +*.bak diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0374e62 --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +all: bin/cdf + +bin/cdf: src/cdf.c include/cdf.h + gcc -std=c23 -Iinclude src/cdf.c -o bin/cdf diff --git a/include/cdf.h b/include/cdf.h new file mode 100644 index 0000000..418ffeb --- /dev/null +++ b/include/cdf.h @@ -0,0 +1,40 @@ +/* + */ + +#ifndef H_CDF +#define H_CDF + +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <ctype.h> +#include <getopt.h> +#include <string.h> +#include <stdint.h> +#include <assert.h> + +typedef union { + int64_t i; + uint64_t u; + double d; +} Number; + +typedef struct { + Number data; + uint64_t count; +} DistRecord; + +static int parse_options(int argc, char*const* argv); +static void help(); +static int process_data(FILE *file); +static DistRecord *expand_array(DistRecord *records, size_t *capacity); +static int read_data_int(DistRecord **records, size_t capacity, FILE *file); +static int read_data_uint(DistRecord **records, size_t capacity, FILE *file); +static int read_data_fp(DistRecord **records, size_t capacity, FILE *file); +static int print_data_fp(DistRecord *records, long double *freqs, size_t cnt); +static int print_data_int(DistRecord *records, long double *freqs, size_t cnt); +static int print_data_uint(DistRecord *records, long double *freqs, size_t cnt); + +#endif + + diff --git a/src/cdf.c b/src/cdf.c new file mode 100644 index 0000000..7d0bd87 --- /dev/null +++ b/src/cdf.c @@ -0,0 +1,244 @@ +/* + * + */ + + +#include "cdf.h" + +/* + * Global configuration variables. These are set based on command-line + * arguments, and read-only beyond that point. + */ +static bool ARG_REVERSE_CDF = false; +static bool ARG_FP_INPUT = false; +static bool ARG_UINT_INPUT = false; +static bool ARG_HELP = false; + +static int parse_options(int argc, char*const* argv) { + int arg_index = 0; + int arg; + bool error = false; + + while ((arg = getopt(argc, argv, "frhu")) != -1) { + switch (arg) { + case 'f': + ARG_FP_INPUT = true; + break; + case 'r': + ARG_REVERSE_CDF = true; + break; + case 'u': + ARG_UINT_INPUT = true; + case 'h': + ARG_HELP = true; + break; + case '?': + if (isprint(optopt)) { + fprintf(stderr, "Unknown option `-%c`.\n", optopt); + } else { + fprintf(stderr, "Unknown option character `\\x%x`.\n", optopt); + } + error = true; + break; + default: + error = true; + break; + } + } + + if (ARG_UINT_INPUT && ARG_FP_INPUT) { + fprintf(stderr, "Error: the -u and -f flags are mutually exclusive.\n"); + error = true; + } + + if (!error) { + arg_index = optind; + } + + return arg_index; +} + +static void help() { + fprintf(stderr, "Usage:\ncdf [-f] [-r] [filename]\n"); +} + +static DistRecord *expand_array(DistRecord *records, size_t *capacity) { + (*capacity) *= 2; + DistRecord *new = realloc(records, *capacity); + if (!new) { + fprintf(stderr, "ERROR: Memory allocation failed\n"); + return nullptr; + } + + return new; +} + +static int read_data_int(DistRecord **records, size_t capacity, FILE *file) { + size_t reccnt = 0; + while (fscanf(file, "%ld %ld\n", &(*records + reccnt)->count, + &(*records + reccnt)->data.i) != EOF) { + + reccnt++; + if (reccnt == capacity) { + if (!(*records = expand_array(*records, &capacity))) { + return -1; + } + } + } + + return reccnt; +} + +static int read_data_fp(DistRecord **records, size_t capacity, FILE *file) { + size_t reccnt = 0; + while (fscanf(file, "%ld %lf\n", &(*records + reccnt)->count, + &(*records + reccnt)->data.d) != EOF) { + + reccnt++; + if (reccnt == capacity) { + if (!(*records = expand_array(*records, &capacity))) { + return -1; + } + } + } + + return reccnt; +} + +static int read_data_uint(DistRecord **records, size_t capacity, FILE *file) { + size_t reccnt = 0; + while (fscanf(file, "%ld %ld\n", &(*records + reccnt)->count, + &(*records + reccnt)->data.u) != EOF) { + + reccnt++; + if (reccnt == capacity) { + if (!(*records = expand_array(*records, &capacity))) { + return -1; + } + } + } + + return reccnt; +} + +static int print_data_fp(DistRecord *records, long double *freqs, size_t cnt) { + for (size_t i=0; i<cnt; i++) { + fprintf(stdout, "%Lf\t%lf\n", freqs[i], records[i].data.d); + } + + return 1; +} + +static int print_data_int(DistRecord *records, long double *freqs, size_t cnt) { + for (size_t i=0; i<cnt; i++) { + fprintf(stdout, "%Lf\t%ld\n", freqs[i], records[i].data.i); + } + + return 1; +} + +static int print_data_uint(DistRecord *records, long double *freqs, size_t cnt) { + for (size_t i=0; i<cnt; i++) { + fprintf(stdout, "%Lf\t%ld\n", freqs[i], records[i].data.u); + } + + return 1; +} + +static int process_data(FILE *file) { + int rc = 1; + + size_t reccap = 100; + DistRecord *records = malloc(reccap*sizeof(DistRecord)); + + ssize_t cnt; + /* FIXME: this could probably use a type-based macro to collapse the + if statements into a single macro call + */ + if (ARG_FP_INPUT) { + cnt = read_data_fp(&records, reccap, file); + } else if (ARG_UINT_INPUT) { + cnt = read_data_uint(&records, reccap, file); + } else { + cnt = read_data_int(&records, reccap, file); + } + + /* propogate the error */ + if (cnt == -1) { + rc = 0; + goto free_records; + } + + /* calculate total sum of counts */ + uint64_t total_count = 0; + for (size_t i=0; i<cnt; i++) { + total_count += records[i].count; + } + + /* calculate relative frequency for each item */ + long double *freqs = malloc(sizeof(long double) * cnt); + if (!freqs) { + fprintf(stderr, "ERROR: memory allocation failure.\n"); + goto free_freqs; + } + + for (size_t i=0; i<cnt; i++) { + freqs[i] = (long double) (records[i].count) / (long double) (total_count); + } + + if (ARG_FP_INPUT) { + rc = print_data_fp(records, freqs, cnt); + } else if (ARG_UINT_INPUT) { + rc = print_data_uint(records, freqs, cnt); + } else { + rc = print_data_int(records, freqs, cnt); + } + +free_freqs: + free(freqs); + +free_records: + free(records); + +process_data_end: + return rc; +} + + +int main(int argc, char **argv) { + int rc = EXIT_SUCCESS; + int file_index = 0; + if (!(file_index = parse_options(argc, argv))) { + help(); + rc = EXIT_FAILURE; + goto program_exit; + } + + /* if the -h argument is supplied, print usage and exit successfully */ + if (ARG_HELP) { + help(); + goto program_exit; + } + + /* open the input file, if one is specified, otherwise default to stdin */ + FILE *input_file; + if (file_index < argc && strcmp(argv[file_index], "-") != 0) { + if (!(input_file = fopen(argv[file_index], "r"))) { + fprintf(stderr, "Error: Unable to open input file %s\n", argv[file_index]); + rc = EXIT_FAILURE; + goto program_exit; + } + } else { + input_file = stdin; + } + + if (!process_data(input_file)) { + rc = EXIT_FAILURE; + } + +close_file: + fclose(input_file); + +program_exit: + exit(rc); +} |