diff options
Diffstat (limited to 'vartak-results-data')
-rwxr-xr-x | vartak-results-data | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/vartak-results-data b/vartak-results-data new file mode 100755 index 0000000..2e63f44 --- /dev/null +++ b/vartak-results-data @@ -0,0 +1,176 @@ +#!/bin/sh + +help() { echo "vartak-results-data - convert vartak results pdf into CSV data files + +USAGE: + vartak-results-data [OPTION]... <FILE> + +OPTIONS: + -d DEST produce data in the specified DESTination path/directory + -t top header + -h show this help message"; } + +warn() { printf "WARNING: %s\n" "$@" >&2; } +err() { printf "vartak-result-data: %b\n" "$@" >&2; exit 1; } + +while getopts 'td:h' o; do case "$o" in + d) export DEST="$OPTARG" ;; + t) tflag=1 ;; + h) help; exit ;; + *) err "invalid option -- '$OPTARG'" ;; +esac done +shift $((OPTIND - 1)) + +[ "$#" -lt 1 ] && help >&2 && exit 1 + +[ "${1#*ATKT}" != "$1" ] && err "ATKT files are not supported" + +filetype="$(file --mime-type --brief "$1")" +case "$filetype" in + application/pdf) pdftotext -layout "$1"; file="${1%.pdf}.txt"; tmp=1 ;; + text/plain) file="$1" ;; + *) err "only PDF and text files are supported\n$1 :-\n\t$filetype" ;; +esac + +file="$(realpath "$file")" +printf "\n:: Preparing data in : %s\n" "${DEST:=$PWD}" +mkdir -pv "$DEST" +cd "$DEST" || exit + +if [ "$tflag" = 1 ]; then + header="$(sed -nE "0,/^Seat .*Name \s{2,}(.*Total).*$/ s//\1/p" "$file" | + sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')" +else + header="$(sed -nE '0,/^[0-9]{5} [/A-Z ]*\s{2,}(.*)/ s//\1/p' "$file" | + sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')" +fi + +if [ -f names.csv ]; then + echo "names.csv already exists, skipping..." +else + echo "Seat No,Name" >> names.csv + awk -f - "$file" >> names.csv << EOF + /^[0-9]{5}/ { + name = "" + for (i = 2; \$i !~ /^(${header%%[ ,]*}|INTER|TW)/; i++) { name = name " " \$i } + sub(/^\s*/, "", name) + print \$1 "," name + } +EOF +fi + +header="Seat No,$header" + +if [ -f marks.csv ]; then + echo "marks.csv already exists, skipping..." +else + echo "$header" > marks.csv + awk ' + /^[0-9]{5}/ { printf("%d", $1) } + !/^[0-9]/ && !/Seat/ && !/Total\s*\[/ && /Total/ { + start = 1 + while ($start != "Total") start++ + start++ + for (i = start; i <= NF; i++) { + if ($i ~ /^AB$/) { + printf(",%s", "0") + } else if ($i ~ /^[0-9]+$/) { + printf(",%d", $i) + } + } + printf("\n") + } + ' "$file" >> marks.csv +fi + +if [ -f GP.csv ]; then + echo "GP.csv already exists, skipping..." +else + echo "${header%,Total}" > GP.csv + awk ' + /^[0-9]{5}/ { printf("%d", $1) } + / GP / { + start = 1 + while ($start != "GP") start++ + start++ + for (i = start; i <= NF; i++) { + if ($i ~ /F/) { + printf(",%s", "0") + } else if ($i ~ /^[0-9]+$/) { + printf(",%d", $i) + } + } + printf("\n") + } + ' "$file" >> GP.csv +fi + +if [ -f CGP.csv ]; then + echo "CGP.csv already exists, skipping..." +else + echo "$header,CGPA" > CGP.csv + awk ' + /^[0-9]{5}/ { printf("%d", $1) } + / GPA / { gpa = $NF } + / CG / { + start = 1 + while ($start != "CG") start++ + start++ + for (i = start; i <= NF; i++) { + if ($i ~ /F/) { + printf(",%s", "0") + } else if ($i ~ /^[0-9]+$/) { + printf(",%d", $i) + } + } + printf(",%.2f\n", gpa) + } + ' "$file" >> CGP.csv +fi + +rows() { + [ -z "$(sed -n '1!d; /^Seat No,/p' "$1")" ] && + warn "missing header in file: $1"; + grep -cv 'Seat No' "$1" +} + +# no. of rows for each files +nr_names="$(rows names.csv)" +nr_marks="$(rows marks.csv)" +nr_GP="$(rows GP.csv)" +nr_CGP="$(rows CGP.csv)" + +if [ "$nr_names" != "$nr_marks" ] || + [ "$nr_names" != "$nr_GP" ] || + [ "$nr_names" != "$nr_CGP" ]; then + warn "inconsitent number of rows" +fi + +cols() { awk -F, ' + NR == 1 { min = NF } + { if (NF > max) max = NF; if (NF < min) min = NF } + END { + if (max != min) + print "WARNING: inconsitent columns in file: " FILENAME > "/dev/stderr" + print max + } + ' "$1"; } + +# no. of columns for each file +nc_names="$(cols names.csv)" +nc_marks="$(cols marks.csv)" +nc_GP="$(cols GP.csv)" +nc_CGP="$(cols CGP.csv)" + +[ "$nc_names" != 2 ] && + warn "names data doesn't have exactly 2 columns" +[ "$nc_marks" != "$(( nc_GP + 1))" ] && + warn "marks data doesn't have an additional column GP data" +[ "$nc_CGP" != "$(( nc_marks + 1 ))" ] && + warn "CGP data doesn't have an additional column to marks data" + +printf ":: Finished\n" + +# clean up +[ "$tmp" = 1 ] && rm -f "$file" + |