1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
#!/bin/sh
help() { echo "vartak-results-data - convert vartak results pdf into CSV data files
USAGE:
vartak-results-data [OPTION]... <FILE>
OPTIONS:
-d DEST produce data in the specified DESTination path/directory
-t top header
-h show this help message"; }
warn() { printf "WARNING: %s\n" "$@" >&2; }
err() { printf "vartak-result-data: %b\n" "$@" >&2; exit 1; }
while getopts 'td:h' o; do case "$o" in
d) export DEST="$OPTARG" ;;
t) tflag=1 ;;
h) help; exit ;;
*) err "invalid option -- '$OPTARG'" ;;
esac done
shift $((OPTIND - 1))
[ "$#" -lt 1 ] && help >&2 && exit 1
[ "${1#*ATKT}" != "$1" ] && err "ATKT files are not supported"
filetype="$(file --mime-type --brief "$1")"
case "$filetype" in
application/pdf) pdftotext -layout "$1"; file="${1%.pdf}.txt"; tmp=1 ;;
text/plain) file="$1" ;;
*) err "only PDF and text files are supported\n$1 :-\n\t$filetype" ;;
esac
file="$(realpath "$file")"
printf "\n:: Preparing data in : %s\n" "${DEST:=$PWD}"
mkdir -pv "$DEST"
cd "$DEST" || exit
if [ "$tflag" = 1 ]; then
header="$(sed -nE "0,/^Seat .*Name \s{2,}(.*Total).*$/ s//\1/p" "$file" |
sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')"
else
header="$(sed -nE '0,/^[0-9]{5} [/A-Z ]*\s{2,}(.*)/ s//\1/p' "$file" |
sed -E 's/\s?\[ [0-9]+(\s\])?\s*/,/g; s/,$//')"
fi
if [ -f names.csv ]; then
echo "names.csv already exists, skipping..."
else
echo "Seat No,Name" >> names.csv
awk -f - "$file" >> names.csv << EOF
/^[0-9]{5}/ {
name = ""
for (i = 2; \$i !~ /^(${header%%[ ,]*}|INTER|TW)/; i++) { name = name " " \$i }
sub(/^\s*/, "", name)
print \$1 "," name
}
EOF
fi
header="Seat No,$header"
if [ -f marks.csv ]; then
echo "marks.csv already exists, skipping..."
else
echo "$header" > marks.csv
awk '
/^[0-9]{5}/ { printf("%d", $1) }
!/^[0-9]/ && !/Seat/ && !/Total\s*\[/ && /Total/ {
start = 1
while ($start != "Total") start++
start++
for (i = start; i <= NF; i++) {
if ($i ~ /^AB$/) {
printf(",%s", "0")
} else if ($i ~ /^[0-9]+$/) {
printf(",%d", $i)
}
}
printf("\n")
}
' "$file" >> marks.csv
fi
if [ -f GP.csv ]; then
echo "GP.csv already exists, skipping..."
else
echo "${header%,Total}" > GP.csv
awk '
/^[0-9]{5}/ { printf("%d", $1) }
/ GP / {
start = 1
while ($start != "GP") start++
start++
for (i = start; i <= NF; i++) {
if ($i ~ /F/) {
printf(",%s", "0")
} else if ($i ~ /^[0-9]+$/) {
printf(",%d", $i)
}
}
printf("\n")
}
' "$file" >> GP.csv
fi
if [ -f CGP.csv ]; then
echo "CGP.csv already exists, skipping..."
else
echo "$header,CGPA" > CGP.csv
awk '
/^[0-9]{5}/ { printf("%d", $1) }
/ GPA / { gpa = $NF }
/ CG / {
start = 1
while ($start != "CG") start++
start++
for (i = start; i <= NF; i++) {
if ($i ~ /F/) {
printf(",%s", "0")
} else if ($i ~ /^[0-9]+$/) {
printf(",%d", $i)
}
}
printf(",%.2f\n", gpa)
}
' "$file" >> CGP.csv
fi
rows() {
[ -z "$(sed -n '1!d; /^Seat No,/p' "$1")" ] &&
warn "missing header in file: $1";
grep -cv 'Seat No' "$1"
}
# no. of rows for each files
nr_names="$(rows names.csv)"
nr_marks="$(rows marks.csv)"
nr_GP="$(rows GP.csv)"
nr_CGP="$(rows CGP.csv)"
if [ "$nr_names" != "$nr_marks" ] ||
[ "$nr_names" != "$nr_GP" ] ||
[ "$nr_names" != "$nr_CGP" ]; then
warn "inconsitent number of rows"
fi
cols() { awk -F, '
NR == 1 { min = NF }
{ if (NF > max) max = NF; if (NF < min) min = NF }
END {
if (max != min)
print "WARNING: inconsitent columns in file: " FILENAME > "/dev/stderr"
print max
}
' "$1"; }
# no. of columns for each file
nc_names="$(cols names.csv)"
nc_marks="$(cols marks.csv)"
nc_GP="$(cols GP.csv)"
nc_CGP="$(cols CGP.csv)"
[ "$nc_names" != 2 ] &&
warn "names data doesn't have exactly 2 columns"
[ "$nc_marks" != "$(( nc_GP + 1))" ] &&
warn "marks data doesn't have an additional column GP data"
[ "$nc_CGP" != "$(( nc_marks + 1 ))" ] &&
warn "CGP data doesn't have an additional column to marks data"
printf ":: Finished\n"
# clean up
[ "$tmp" = 1 ] && rm -f "$file"
|