1 files changed, 176 insertions, 0 deletions
diff --git a/vartak-results-data b/vartak-results-data
new file mode 100755
index 0000000..2e63f44
--- /dev/null
+++ b/vartak-results-data
@@ -0,0 +1,176 @@
+#!/bin/sh
+
+help() { echo "vartak-results-data - convert vartak results pdf into CSV data files
+
+USAGE:
+	vartak-results-data [OPTION]... <FILE>
+
+OPTIONS:
+	-d DEST   produce data in the specified DESTination path/directory
+	-t        top header
+	-h        show this help message"; }
+
+warn() { printf "WARNING: %s\n" "$@" >&2; }
+err() { printf "vartak-result-data: %b\n" "$@" >&2; exit 1; }
+
+while getopts 'td:h' o; do case "$o" in
+	d) export DEST="$OPTARG" ;;
+	t) tflag=1 ;;
+	h) help; exit ;;
+	*) err "invalid option -- '$OPTARG'" ;;
+esac done
+shift $((OPTIND - 1))
+
+[ "$#" -lt 1 ] && help >&2 && exit 1
+
+[ "${1#*ATKT}" != "$1" ] && err "ATKT files are not supported"
+
+filetype="$(file --mime-type --brief "$1")"
+case "$filetype" in
+	application/pdf) pdftotext -layout "$1"; file="${1%.pdf}.txt"; tmp=1 ;;
+	text/plain) file="$1" ;;
+	*) err "only PDF and text files are supported\n$1 :-\n\t$filetype" ;;
+esac
+
+file="$(realpath "$file")"
+printf "\n:: Preparing data in : %s\n" "${DEST:=$PWD}"
+mkdir -pv "$DEST"
+cd "$DEST" || exit
+
+if [ "$tflag" = 1 ]; then
+	header="$(sed -nE "0,/^Seat .*Name \s{2,}(.*Total).*$/ s//\1/p" "$file" |
+		sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')"
+else
+	header="$(sed -nE '0,/^[0-9]{5} [/A-Z ]*\s{2,}(.*)/ s//\1/p' "$file" |
+		sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')"
+fi
+
+if [ -f names.csv ]; then
+	echo "names.csv already exists, skipping..."
+else
+	echo "Seat No,Name" >> names.csv
+	awk -f - "$file" >> names.csv << EOF
+		/^[0-9]{5}/ {
+			name = ""
+			for (i = 2; \$i !~ /^(${header%%[ ,]*}|INTER|TW)/; i++) { name = name " " \$i }
+			sub(/^\s*/, "", name)
+			print \$1 "," name
+		}
+EOF
+fi
+
+header="Seat No,$header"
+
+if [ -f marks.csv ]; then
+	echo "marks.csv already exists, skipping..."
+else
+	echo "$header" > marks.csv
+	awk '
+		/^[0-9]{5}/ { printf("%d", $1) }
+		!/^[0-9]/ && !/Seat/ && !/Total\s*\[/ && /Total/ {
+			start = 1
+			while ($start != "Total") start++
+			start++
+			for (i = start; i <= NF; i++) {
+				if ($i ~ /^AB$/) {
+					printf(",%s", "0")
+				} else if ($i ~ /^[0-9]+$/) {
+					printf(",%d", $i)
+				}
+			}
+			printf("\n")
+		}
+	' "$file" >> marks.csv
+fi
+
+if [ -f GP.csv ]; then
+	echo "GP.csv already exists, skipping..."
+else
+	echo "${header%,Total}" > GP.csv
+	awk '
+		/^[0-9]{5}/ { printf("%d", $1) }
+		/ GP / {
+			start = 1
+			while ($start != "GP") start++
+			start++
+			for (i = start; i <= NF; i++) {
+				if ($i ~ /F/) {
+					printf(",%s", "0")
+				} else if ($i ~ /^[0-9]+$/) {
+					printf(",%d", $i)
+				}
+			}
+			printf("\n")
+		}
+	' "$file" >> GP.csv
+fi
+
+if [ -f CGP.csv ]; then
+	echo "CGP.csv already exists, skipping..."
+else
+	echo "$header,CGPA" > CGP.csv
+	awk '
+		/^[0-9]{5}/ { printf("%d", $1) }
+		/ GPA / { gpa = $NF }
+		/ CG / {
+			start = 1
+			while ($start != "CG") start++
+			start++
+			for (i = start; i <= NF; i++) {
+				if ($i ~ /F/) {
+					printf(",%s", "0")
+				} else if ($i ~ /^[0-9]+$/) {
+					printf(",%d", $i)
+				}
+			}
+			printf(",%.2f\n", gpa)
+		}
+	' "$file" >> CGP.csv
+fi
+
+rows() {
+	[ -z "$(sed -n '1!d; /^Seat No,/p' "$1")" ] &&
+		warn "missing header in file: $1";
+	grep -cv 'Seat No' "$1"
+}
+
+# no. of rows for each files
+nr_names="$(rows names.csv)"
+nr_marks="$(rows marks.csv)"
+nr_GP="$(rows GP.csv)"
+nr_CGP="$(rows CGP.csv)"
+
+if [ "$nr_names" != "$nr_marks" ] ||
+		[ "$nr_names" != "$nr_GP" ] ||
+		[ "$nr_names" != "$nr_CGP" ]; then
+	warn "inconsitent number of rows"
+fi
+
+cols() { awk -F, '
+		NR == 1 { min = NF }
+		{ if (NF > max) max = NF; if (NF < min) min = NF }
+		END {
+			if (max != min)
+				print "WARNING: inconsitent columns in file: " FILENAME > "/dev/stderr"
+			print max
+		}
+	' "$1"; }
+
+# no. of columns for each file
+nc_names="$(cols names.csv)"
+nc_marks="$(cols marks.csv)"
+nc_GP="$(cols GP.csv)"
+nc_CGP="$(cols CGP.csv)"
+
+[ "$nc_names" != 2 ] &&
+	warn "names data doesn't have exactly 2 columns"
+[ "$nc_marks" != "$(( nc_GP + 1))" ] &&
+	warn "marks data doesn't have an additional column GP data"
+[ "$nc_CGP" != "$(( nc_marks + 1 ))" ] &&
+	warn "CGP data doesn't have an additional column to marks data"
+
+printf ":: Finished\n"
+
+# clean up
+[ "$tmp" = 1 ] && rm -f "$file"
+