summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore5
-rw-r--r--Makefile4
-rw-r--r--include/cdf.h40
-rw-r--r--src/cdf.c244
4 files changed, 293 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d5d3e26
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.cache
+compile_commands.json
+bin/*
+build/*
+*.bak
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0374e62
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,4 @@
+all: bin/cdf
+
+bin/cdf: src/cdf.c include/cdf.h
+ gcc -std=c23 -Iinclude src/cdf.c -o bin/cdf
diff --git a/include/cdf.h b/include/cdf.h
new file mode 100644
index 0000000..418ffeb
--- /dev/null
+++ b/include/cdf.h
@@ -0,0 +1,40 @@
+/*
+ */
+
+#ifndef H_CDF
+#define H_CDF
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <getopt.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+
+typedef union {
+ int64_t i;
+ uint64_t u;
+ double d;
+} Number;
+
+typedef struct {
+ Number data;
+ uint64_t count;
+} DistRecord;
+
+static int parse_options(int argc, char*const* argv);
+static void help();
+static int process_data(FILE *file);
+static DistRecord *expand_array(DistRecord *records, size_t *capacity);
+static int read_data_int(DistRecord **records, size_t capacity, FILE *file);
+static int read_data_uint(DistRecord **records, size_t capacity, FILE *file);
+static int read_data_fp(DistRecord **records, size_t capacity, FILE *file);
+static int print_data_fp(DistRecord *records, long double *freqs, size_t cnt);
+static int print_data_int(DistRecord *records, long double *freqs, size_t cnt);
+static int print_data_uint(DistRecord *records, long double *freqs, size_t cnt);
+
+#endif
+
+
diff --git a/src/cdf.c b/src/cdf.c
new file mode 100644
index 0000000..7d0bd87
--- /dev/null
+++ b/src/cdf.c
@@ -0,0 +1,244 @@
+/*
+ *
+ */
+
+
+#include "cdf.h"
+
+/*
+ * Global configuration variables. These are set based on command-line
+ * arguments, and read-only beyond that point.
+ */
+static bool ARG_REVERSE_CDF = false;
+static bool ARG_FP_INPUT = false;
+static bool ARG_UINT_INPUT = false;
+static bool ARG_HELP = false;
+
+static int parse_options(int argc, char*const* argv) {
+ int arg_index = 0;
+ int arg;
+ bool error = false;
+
+ while ((arg = getopt(argc, argv, "frhu")) != -1) {
+ switch (arg) {
+ case 'f':
+ ARG_FP_INPUT = true;
+ break;
+ case 'r':
+ ARG_REVERSE_CDF = true;
+ break;
+ case 'u':
+ ARG_UINT_INPUT = true;
+ case 'h':
+ ARG_HELP = true;
+ break;
+ case '?':
+ if (isprint(optopt)) {
+ fprintf(stderr, "Unknown option `-%c`.\n", optopt);
+ } else {
+ fprintf(stderr, "Unknown option character `\\x%x`.\n", optopt);
+ }
+ error = true;
+ break;
+ default:
+ error = true;
+ break;
+ }
+ }
+
+ if (ARG_UINT_INPUT && ARG_FP_INPUT) {
+ fprintf(stderr, "Error: the -u and -f flags are mutually exclusive.\n");
+ error = true;
+ }
+
+ if (!error) {
+ arg_index = optind;
+ }
+
+ return arg_index;
+}
+
+static void help() {
+ fprintf(stderr, "Usage:\ncdf [-f] [-r] [filename]\n");
+}
+
+static DistRecord *expand_array(DistRecord *records, size_t *capacity) {
+ (*capacity) *= 2;
+ DistRecord *new = realloc(records, *capacity);
+ if (!new) {
+ fprintf(stderr, "ERROR: Memory allocation failed\n");
+ return nullptr;
+ }
+
+ return new;
+}
+
+static int read_data_int(DistRecord **records, size_t capacity, FILE *file) {
+ size_t reccnt = 0;
+ while (fscanf(file, "%ld %ld\n", &(*records + reccnt)->count,
+ &(*records + reccnt)->data.i) != EOF) {
+
+ reccnt++;
+ if (reccnt == capacity) {
+ if (!(*records = expand_array(*records, &capacity))) {
+ return -1;
+ }
+ }
+ }
+
+ return reccnt;
+}
+
+static int read_data_fp(DistRecord **records, size_t capacity, FILE *file) {
+ size_t reccnt = 0;
+ while (fscanf(file, "%ld %lf\n", &(*records + reccnt)->count,
+ &(*records + reccnt)->data.d) != EOF) {
+
+ reccnt++;
+ if (reccnt == capacity) {
+ if (!(*records = expand_array(*records, &capacity))) {
+ return -1;
+ }
+ }
+ }
+
+ return reccnt;
+}
+
+static int read_data_uint(DistRecord **records, size_t capacity, FILE *file) {
+ size_t reccnt = 0;
+ while (fscanf(file, "%ld %ld\n", &(*records + reccnt)->count,
+ &(*records + reccnt)->data.u) != EOF) {
+
+ reccnt++;
+ if (reccnt == capacity) {
+ if (!(*records = expand_array(*records, &capacity))) {
+ return -1;
+ }
+ }
+ }
+
+ return reccnt;
+}
+
+static int print_data_fp(DistRecord *records, long double *freqs, size_t cnt) {
+ for (size_t i=0; i<cnt; i++) {
+ fprintf(stdout, "%Lf\t%lf\n", freqs[i], records[i].data.d);
+ }
+
+ return 1;
+}
+
+static int print_data_int(DistRecord *records, long double *freqs, size_t cnt) {
+ for (size_t i=0; i<cnt; i++) {
+ fprintf(stdout, "%Lf\t%ld\n", freqs[i], records[i].data.i);
+ }
+
+ return 1;
+}
+
+static int print_data_uint(DistRecord *records, long double *freqs, size_t cnt) {
+ for (size_t i=0; i<cnt; i++) {
+ fprintf(stdout, "%Lf\t%ld\n", freqs[i], records[i].data.u);
+ }
+
+ return 1;
+}
+
+static int process_data(FILE *file) {
+ int rc = 1;
+
+ size_t reccap = 100;
+ DistRecord *records = malloc(reccap*sizeof(DistRecord));
+
+ ssize_t cnt;
+ /* FIXME: this could probably use a type-based macro to collapse the
+ if statements into a single macro call
+ */
+ if (ARG_FP_INPUT) {
+ cnt = read_data_fp(&records, reccap, file);
+ } else if (ARG_UINT_INPUT) {
+ cnt = read_data_uint(&records, reccap, file);
+ } else {
+ cnt = read_data_int(&records, reccap, file);
+ }
+
+ /* propogate the error */
+ if (cnt == -1) {
+ rc = 0;
+ goto free_records;
+ }
+
+ /* calculate total sum of counts */
+ uint64_t total_count = 0;
+ for (size_t i=0; i<cnt; i++) {
+ total_count += records[i].count;
+ }
+
+ /* calculate relative frequency for each item */
+ long double *freqs = malloc(sizeof(long double) * cnt);
+ if (!freqs) {
+ fprintf(stderr, "ERROR: memory allocation failure.\n");
+ goto free_freqs;
+ }
+
+ for (size_t i=0; i<cnt; i++) {
+ freqs[i] = (long double) (records[i].count) / (long double) (total_count);
+ }
+
+ if (ARG_FP_INPUT) {
+ rc = print_data_fp(records, freqs, cnt);
+ } else if (ARG_UINT_INPUT) {
+ rc = print_data_uint(records, freqs, cnt);
+ } else {
+ rc = print_data_int(records, freqs, cnt);
+ }
+
+free_freqs:
+ free(freqs);
+
+free_records:
+ free(records);
+
+process_data_end:
+ return rc;
+}
+
+
+int main(int argc, char **argv) {
+ int rc = EXIT_SUCCESS;
+ int file_index = 0;
+ if (!(file_index = parse_options(argc, argv))) {
+ help();
+ rc = EXIT_FAILURE;
+ goto program_exit;
+ }
+
+ /* if the -h argument is supplied, print usage and exit successfully */
+ if (ARG_HELP) {
+ help();
+ goto program_exit;
+ }
+
+ /* open the input file, if one is specified, otherwise default to stdin */
+ FILE *input_file;
+ if (file_index < argc && strcmp(argv[file_index], "-") != 0) {
+ if (!(input_file = fopen(argv[file_index], "r"))) {
+ fprintf(stderr, "Error: Unable to open input file %s\n", argv[file_index]);
+ rc = EXIT_FAILURE;
+ goto program_exit;
+ }
+ } else {
+ input_file = stdin;
+ }
+
+ if (!process_data(input_file)) {
+ rc = EXIT_FAILURE;
+ }
+
+close_file:
+ fclose(input_file);
+
+program_exit:
+ exit(rc);
+}