#!/bin/sh help() { echo "vartak-results-data - convert vartak results pdf into CSV data files USAGE: vartak-results-data [OPTION]... OPTIONS: -d DEST produce data in the specified DESTination path/directory -t top header -h show this help message"; } warn() { printf "WARNING: %s\n" "$@" >&2; } err() { printf "vartak-result-data: %b\n" "$@" >&2; exit 1; } while getopts 'td:h' o; do case "$o" in d) export DEST="$OPTARG" ;; t) tflag=1 ;; h) help; exit ;; *) err "invalid option -- '$OPTARG'" ;; esac done shift $((OPTIND - 1)) [ "$#" -lt 1 ] && help >&2 && exit 1 [ "${1#*ATKT}" != "$1" ] && err "ATKT files are not supported" filetype="$(file --mime-type --brief "$1")" case "$filetype" in application/pdf) pdftotext -layout "$1"; file="${1%.pdf}.txt"; tmp=1 ;; text/plain) file="$1" ;; *) err "only PDF and text files are supported\n$1 :-\n\t$filetype" ;; esac file="$(realpath "$file")" printf "\n:: Preparing data in : %s\n" "${DEST:=$PWD}" mkdir -pv "$DEST" cd "$DEST" || exit if [ "$tflag" = 1 ]; then header="$(sed -nE "0,/^Seat .*Name \s{2,}(.*Total).*$/ s//\1/p" "$file" | sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')" else header="$(sed -nE '0,/^[0-9]{5} [/A-Z ]*\s{2,}(.*)/ s//\1/p' "$file" | sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')" fi if [ -f names.csv ]; then echo "names.csv already exists, skipping..." else echo "Seat No,Name" >> names.csv awk -f - "$file" >> names.csv << EOF /^[0-9]{5}/ { name = "" for (i = 2; \$i !~ /^(${header%%[ ,]*}|INTER|TW)/; i++) { name = name " " \$i } sub(/^\s*/, "", name) print \$1 "," name } EOF fi header="Seat No,$header" if [ -f marks.csv ]; then echo "marks.csv already exists, skipping..." else echo "$header" > marks.csv awk ' /^[0-9]{5}/ { printf("%d", $1) } !/^[0-9]/ && !/Seat/ && !/Total\s*\[/ && /Total/ { start = 1 while ($start != "Total") start++ start++ for (i = start; i <= NF; i++) { if ($i ~ /^AB$/) { printf(",%s", "0") } else if ($i ~ /^[0-9]+$/) { printf(",%d", $i) } } printf("\n") } ' "$file" >> marks.csv fi if [ -f GP.csv ]; then echo "GP.csv already exists, skipping..." else echo "${header%,Total}" > GP.csv awk ' /^[0-9]{5}/ { printf("%d", $1) } / GP / { start = 1 while ($start != "GP") start++ start++ for (i = start; i <= NF; i++) { if ($i ~ /F/) { printf(",%s", "0") } else if ($i ~ /^[0-9]+$/) { printf(",%d", $i) } } printf("\n") } ' "$file" >> GP.csv fi if [ -f CGP.csv ]; then echo "CGP.csv already exists, skipping..." else echo "$header,CGPA" > CGP.csv awk ' /^[0-9]{5}/ { printf("%d", $1) } / GPA / { gpa = $NF } / CG / { start = 1 while ($start != "CG") start++ start++ for (i = start; i <= NF; i++) { if ($i ~ /F/) { printf(",%s", "0") } else if ($i ~ /^[0-9]+$/) { printf(",%d", $i) } } printf(",%.2f\n", gpa) } ' "$file" >> CGP.csv fi rows() { [ -z "$(sed -n '1!d; /^Seat No,/p' "$1")" ] && warn "missing header in file: $1"; grep -cv 'Seat No' "$1" } # no. of rows for each files nr_names="$(rows names.csv)" nr_marks="$(rows marks.csv)" nr_GP="$(rows GP.csv)" nr_CGP="$(rows CGP.csv)" if [ "$nr_names" != "$nr_marks" ] || [ "$nr_names" != "$nr_GP" ] || [ "$nr_names" != "$nr_CGP" ]; then warn "inconsitent number of rows" fi cols() { awk -F, ' NR == 1 { min = NF } { if (NF > max) max = NF; if (NF < min) min = NF } END { if (max != min) print "WARNING: inconsitent columns in file: " FILENAME > "/dev/stderr" print max } ' "$1"; } # no. of columns for each file nc_names="$(cols names.csv)" nc_marks="$(cols marks.csv)" nc_GP="$(cols GP.csv)" nc_CGP="$(cols CGP.csv)" [ "$nc_names" != 2 ] && warn "names data doesn't have exactly 2 columns" [ "$nc_marks" != "$(( nc_GP + 1))" ] && warn "marks data doesn't have an additional column GP data" [ "$nc_CGP" != "$(( nc_marks + 1 ))" ] && warn "CGP data doesn't have an additional column to marks data" printf ":: Finished\n" # clean up [ "$tmp" = 1 ] && rm -f "$file"