summaryrefslogtreecommitdiff
path: root/vartak-results-data
diff options
context:
space:
mode:
authorVikas Kushwaha <dev@vikas.rocks>2024-11-21 14:37:35 +0530
committerVikas Kushwaha <dev@vikas.rocks>2024-11-21 14:37:35 +0530
commitfe4da131de10a4318ab415d6540f9e939a5b17f2 (patch)
tree59a582c39e0ef71530ceaa13ecf0d5d0e3670cc6 /vartak-results-data
Initial commitHEADmaster
Diffstat (limited to 'vartak-results-data')
-rwxr-xr-xvartak-results-data176
1 files changed, 176 insertions, 0 deletions
diff --git a/vartak-results-data b/vartak-results-data
new file mode 100755
index 0000000..2e63f44
--- /dev/null
+++ b/vartak-results-data
@@ -0,0 +1,176 @@
+#!/bin/sh
+
+help() { echo "vartak-results-data - convert vartak results pdf into CSV data files
+
+USAGE:
+ vartak-results-data [OPTION]... <FILE>
+
+OPTIONS:
+ -d DEST produce data in the specified DESTination path/directory
+ -t top header
+ -h show this help message"; }
+
+warn() { printf "WARNING: %s\n" "$@" >&2; }
+err() { printf "vartak-result-data: %b\n" "$@" >&2; exit 1; }
+
+while getopts 'td:h' o; do case "$o" in
+ d) export DEST="$OPTARG" ;;
+ t) tflag=1 ;;
+ h) help; exit ;;
+ *) err "invalid option -- '$OPTARG'" ;;
+esac done
+shift $((OPTIND - 1))
+
+[ "$#" -lt 1 ] && help >&2 && exit 1
+
+[ "${1#*ATKT}" != "$1" ] && err "ATKT files are not supported"
+
+filetype="$(file --mime-type --brief "$1")"
+case "$filetype" in
+ application/pdf) pdftotext -layout "$1"; file="${1%.pdf}.txt"; tmp=1 ;;
+ text/plain) file="$1" ;;
+ *) err "only PDF and text files are supported\n$1 :-\n\t$filetype" ;;
+esac
+
+file="$(realpath "$file")"
+printf "\n:: Preparing data in : %s\n" "${DEST:=$PWD}"
+mkdir -pv "$DEST"
+cd "$DEST" || exit
+
+if [ "$tflag" = 1 ]; then
+ header="$(sed -nE "0,/^Seat .*Name \s{2,}(.*Total).*$/ s//\1/p" "$file" |
+ sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')"
+else
+ header="$(sed -nE '0,/^[0-9]{5} [/A-Z ]*\s{2,}(.*)/ s//\1/p' "$file" |
+ sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')"
+fi
+
+if [ -f names.csv ]; then
+ echo "names.csv already exists, skipping..."
+else
+ echo "Seat No,Name" >> names.csv
+ awk -f - "$file" >> names.csv << EOF
+ /^[0-9]{5}/ {
+ name = ""
+ for (i = 2; \$i !~ /^(${header%%[ ,]*}|INTER|TW)/; i++) { name = name " " \$i }
+ sub(/^\s*/, "", name)
+ print \$1 "," name
+ }
+EOF
+fi
+
+header="Seat No,$header"
+
+if [ -f marks.csv ]; then
+ echo "marks.csv already exists, skipping..."
+else
+ echo "$header" > marks.csv
+ awk '
+ /^[0-9]{5}/ { printf("%d", $1) }
+ !/^[0-9]/ && !/Seat/ && !/Total\s*\[/ && /Total/ {
+ start = 1
+ while ($start != "Total") start++
+ start++
+ for (i = start; i <= NF; i++) {
+ if ($i ~ /^AB$/) {
+ printf(",%s", "0")
+ } else if ($i ~ /^[0-9]+$/) {
+ printf(",%d", $i)
+ }
+ }
+ printf("\n")
+ }
+ ' "$file" >> marks.csv
+fi
+
+if [ -f GP.csv ]; then
+ echo "GP.csv already exists, skipping..."
+else
+ echo "${header%,Total}" > GP.csv
+ awk '
+ /^[0-9]{5}/ { printf("%d", $1) }
+ / GP / {
+ start = 1
+ while ($start != "GP") start++
+ start++
+ for (i = start; i <= NF; i++) {
+ if ($i ~ /F/) {
+ printf(",%s", "0")
+ } else if ($i ~ /^[0-9]+$/) {
+ printf(",%d", $i)
+ }
+ }
+ printf("\n")
+ }
+ ' "$file" >> GP.csv
+fi
+
+if [ -f CGP.csv ]; then
+ echo "CGP.csv already exists, skipping..."
+else
+ echo "$header,CGPA" > CGP.csv
+ awk '
+ /^[0-9]{5}/ { printf("%d", $1) }
+ / GPA / { gpa = $NF }
+ / CG / {
+ start = 1
+ while ($start != "CG") start++
+ start++
+ for (i = start; i <= NF; i++) {
+ if ($i ~ /F/) {
+ printf(",%s", "0")
+ } else if ($i ~ /^[0-9]+$/) {
+ printf(",%d", $i)
+ }
+ }
+ printf(",%.2f\n", gpa)
+ }
+ ' "$file" >> CGP.csv
+fi
+
+rows() {
+ [ -z "$(sed -n '1!d; /^Seat No,/p' "$1")" ] &&
+ warn "missing header in file: $1";
+ grep -cv 'Seat No' "$1"
+}
+
+# no. of rows for each files
+nr_names="$(rows names.csv)"
+nr_marks="$(rows marks.csv)"
+nr_GP="$(rows GP.csv)"
+nr_CGP="$(rows CGP.csv)"
+
+if [ "$nr_names" != "$nr_marks" ] ||
+ [ "$nr_names" != "$nr_GP" ] ||
+ [ "$nr_names" != "$nr_CGP" ]; then
+ warn "inconsitent number of rows"
+fi
+
+cols() { awk -F, '
+ NR == 1 { min = NF }
+ { if (NF > max) max = NF; if (NF < min) min = NF }
+ END {
+ if (max != min)
+ print "WARNING: inconsitent columns in file: " FILENAME > "/dev/stderr"
+ print max
+ }
+ ' "$1"; }
+
+# no. of columns for each file
+nc_names="$(cols names.csv)"
+nc_marks="$(cols marks.csv)"
+nc_GP="$(cols GP.csv)"
+nc_CGP="$(cols CGP.csv)"
+
+[ "$nc_names" != 2 ] &&
+ warn "names data doesn't have exactly 2 columns"
+[ "$nc_marks" != "$(( nc_GP + 1))" ] &&
+ warn "marks data doesn't have an additional column GP data"
+[ "$nc_CGP" != "$(( nc_marks + 1 ))" ] &&
+ warn "CGP data doesn't have an additional column to marks data"
+
+printf ":: Finished\n"
+
+# clean up
+[ "$tmp" = 1 ] && rm -f "$file"
+