summaryrefslogtreecommitdiff
path: root/.local/bin/web/w3mman
blob: 5013fbde56c8cf254da72cc5082c7a97c4a2bacb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
#!/bin/bash

# w3m-man
#
# Use wget, w3m and less to download and view man pages.
#
# Homepage:  https://github.com/sc0ttj/w3m-man
#
# Supports using $MANPAGER, $HTMLPAGER, $TERM_BROWSER, and
# falls back to w3m and less if these are not set.
#
# How it works:
#
# - saves manpage from online sources to $HOME/.w3m-manpages/
# - not proper man pages, just plain text output of websites
# - prettifies them a bit (removes junk headers, links, messages)
#
# Usage:
#
#  man <command>        # view man page in plain text in $PAGER
#  man -H <command>     # view man page as HTML in $BROWSER
#  man <command> --url  # print the URL of the man page to STDOUT

# TODO:
#
# - Display the path searched for manpages:
#
#   man --path
#
# - Display the location of a manpage rather than the manpage itself:
#
#   man -w command
#
# - Search for manpages containing a search string:
#
#   man -k "search_string"
#

if [ "$1" = "--help" ];then

  echo "# man (w3m-man)
# Use wget, w3m and less to download and view man pages
# See https://github.com/sc0ttj/w3m-man

Usage:

  man <command>         # (download and) print the man page

  man <command> --url   # print the URL from which the man
                        # page was/would be downloaded

  man --help            # print this help info

Where <command> is the command name you want to read about.

Examples:

  man diff                # view 'diff' command (section 1)

  man mount.8             # view 'mount' in section 8 (config stuff)

  man 8 mount             # same as above, but doesn't work with -H

  man -H mount            # view 'mount' man page as HTML using $BROWSER

  man <command> --url     # only print the URL from which the man page
                          # was/would be downloaded

Man pages are divided into sections, as follows:

 1. User: most user commands and programs.

 2. System: calls by the Linux kernel.

 3. Library: documents provided by the standard C library.

 4. Devices: documents various devices, most of which reside in /dev.

 5. Files: describes various file formats and filesystems and proc(5).

 7. Overviews, conventions, and miscellaneous.

 8. Superuser and system administration commands.

Checks the following URLs:

the Ubuntu or Debian man pages, also:

http://man.he.net/?topic=\${command}&section=\${section}
http://manpages.org/\${command}/\${section}
https://linux.die.net/man/\${section}/\${command}
https://www.mankier.com/\${section}/\${command}
https://man7.org/linux/man-pages/man\${section}/\${command}.\${section}.html
http://manpages.org/\${command}
http://man.he.net/?topic=\${command}&section=all
https://ss64.com/bash/\${command}.html
"
  exit 0
fi

old_man="$(command -v man)"
unset man

command="$1"
section="1"

DISTRO_COMPAT_VERSION="latest"
if [ -f /etc/DISTRO_SPECS ];then
  source /etc/DISTRO_SPECS
fi


# support GNU man -H option (to view as HTML)
if [ "$1" = "-H" ];then
  command="$2"
fi

# support GNU man syntax `man 1 mount`
re='^[1-8]+$'
if [[ $1 =~ $re ]] ; then
  command="${2}.$1"
  section="$1"
fi

# support GNU man syntax `man mount.1`, `man mount.2`, etc
section="${command//*\./}"
command="${command//\.*/}"
if [ "$command" = "$section" ];then
  section=1
fi

manpage_file="$HOME/.w3m-manpages/$command.$section"

# support env var $MANPAGER
pager=${MANPAGER:-$PAGER}
pager=${pager:-less -XR}
w3m='w3m -o auto_image=false -o display_image=false'
browser=${TERM_BROWSER:-$TERMBROWSER}
browser=${browser:-$w3m}
htmlpager=${HTMLPAGER:-w3m -dump}

# Remove some junk from the plain text files generated by w3m,
# such as headers, links to adverts, etc
function prettifier {
  url="$1"
  case "$url" in

    *'ubuntu.com'*|*'debian.org'*)
      sed \
        -e '1,5d' \
        -e 's/ bug$//' \
        -e 's/      / /g'
      ;;

    *'linux.die.net'*)
      sed -e '1d' -e 's/\[INS::INS\]//g' -e 's/    \[          \]//g' \
        -e 's/Site Search//g' 2>/dev/null | head -n -12
      ;;

    *mankier*)
      grep -vEi '  • [A-Z]|      □ [A-Z]' | sed -e '1,7d' -e 's/tldr.sh//g' \
        -e "s/$command /$command($section) /g"
      ;;

    *ss64*com*)
      sed '1,5d' 2>/dev/null
      ;;

    *'man7.org'*)
      sed -e '1,4d' -e '8,13d' -e 's/         top/            /g' \
        | grep -vE 'StatCounter' | head -n -15
    ;;

    *manpages*org*)
      sed -e '1,3d' \
        | head -n -14
      ;;

    *man*he*net*) cat - ;;

    *) cat - ;;
  esac
}

# create the config dir, if needed
[ ! -d $HOME/.w3m-manpages ] && mkdir -p $HOME/.w3m-manpages

# if we already have a proper man page, just print it and exit
if [ ! -z "$MANPATH" ] && [ "$(which groff)" != "" ] && [ "$1" != "-H" ] && [ "$2" != "--url" ];then
  usegroff=false
  groff_file=''
  zipped=false
  paths=$(echo "${MANPATH}" | tr ':' '\n')
  # for each path
  for dir in ./ $paths
  do
    zipped=false
    # let's find the man page file
    groff_file=$dir/man${section}/${command}.${section}
    # it might be gzipped
    [ ! -f $groff_file ] && groff_file=$dir/man${section}/${command}.${section}.gz && zipped=true
    # it might be in the current directory
    [ -f ./${command}.${section} ] && groff_file="./${command}.${section}"
    [ -f ./${command}.${section}.gz ] && groff_file="./${command}.${section}.gz" && zipped=true
    # if we found the file
    if [ -f $groff_file ];then
      # unpack it, if needed
      [ $zipped = true ] && zcat $groff_file > /tmp/unzipped && groff_file=/tmp/unzipped
      # now lets read the man page and exit
      groff -T utf8 -man $groff_file | $pager && exit 0
    fi
  done
fi

# if we already have the man page as plain text, just print it and exit
[ "$1" != "-H" ] && [ "$2" != "--url" ] && [ -f "$manpage_file" ] && cat "$manpage_file" | $pager && exit 0

# set a list of urls to check:

# these man pages cover the correct versions of the program you have installed,
# for the OS you're actually using (if using an Ubuntu or Debian based pup)
if [ "$DISTRO_BINARY_COMPAT" = "ubuntu" ];then
  URLS="https://manpages.ubuntu.com/manpages/${DISTRO_COMPAT_VERSION}/man${section}/${command}.${section}.html"
elif [ "$DISTRO_BINARY_COMPAT" = "debian" ];then
  URLS="https://manpages.debian.org/${DISTRO_COMPAT_VERSION}/${command}/${command}.${section}.en.html"
fi

# add the rest
URLS="$URLS
http://man.he.net/?topic=${command}&section=${section}
http://manpages.org/${command}/${section}
https://linux.die.net/man/${section}/${command}
https://www.mankier.com/${section}/${command}
https://man7.org/linux/man-pages/man${section}/${command}.${section}.html
http://manpages.org/${command}
http://man.he.net/?topic=${command}&section=all
https://ss64.com/bash/${command}.html
"
# for each url in the list
for url in ${URLS}
do
  # skip any empty urls
  [ "$url" != ""  ] || continue
  [ "$url" != " " ] || continue

  # crawl the url
  wget --timeout=2 --spider -S -o /tmp/"$command".html "$url"

  # get the status code
  grep -m1 'HTTP/[1-3].[0-9] [0-9][0-9][0-9]' /tmp/"$command".html | grep -E '200 OK' > /tmp/response

  # if HTTP status not 200, skip this url
  grep -q -m1 "200" /tmp/response || continue

  # print the URL if --url given
  [ "$2" = "--url" ]  && echo "$url" && exit 0

  # grab the URL contents as plain text, put it into a file
  timeout 2 $htmlpager "$url" | prettifier "$url" > "$manpage_file"

  # if file is empty, remove it, skip this url
  [ ! -s "$manpage_file" ] && rm "$manpage_file" && continue

  # check if we if we got a "not found" page
  notfound=false
  grep -qiE "^Couldn|Hmmm|Invalid characters|No matches for \"|t found manual page under category" "$manpage_file" && notfound=true

  # if we DID get a "not found" page, delete the man page, skip this url
  [ "$notfound" = true   ] && rm "$manpage_file" && continue

  # if man page not a file, skip this url
  [ ! -f "$manpage_file" ] && continue

  # if -H was given, print it out as HTML (like GNU man)
  if [ "$1" = "-H" ] ;then
    $browser "$url"
  fi

  #
  # add a footer to the man page
  #
  echo >> "$manpage_file"
  echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >> "$manpage_file"
  echo >> "$manpage_file"
  echo "Generated by https://github.com/sc0ttj/w3m-manpage" >> "$manpage_file"
  echo >> "$manpage_file"
  echo "Source URL   $url" >> "$manpage_file"
  echo >> "$manpage_file"

  # man page created, dont need to do another url
  break
done

# if we still did not _create_ a file, see if we have local
# matching ones, in a different section
if [ ! -f "$manpage_file" ];then
  local_pages=$(ls $HOME/.w3m-manpages | grep "${command}.")
  if [ "$local_pages" != "" ];then
    if [ $(echo "$local_pages" | wc -l) = 1 ];then
      echo
      echo "Loading $local_pages" | tr -d '\n'
      echo
      sleep 2
      manpage_file="$HOME/.w3m-manpages/$local_pages"
    elif [ $(echo "$local_pages" | wc -l) -ge 2 ];then
      echo
      echo "Did you mean any of these?"
      echo
      echo "$local_pages"
      exit 1
    fi
  fi
fi

# if we have a man page, and didnt already print the html, print it here
[ "$1" != "-H" ] && [ -f "$manpage_file" ] \
  && cat "$manpage_file" | $pager && exit 0

# if _still_ nothing, try download it online from the next section
if [ ! -f "$manpage_file" ];then
  if [ -f /tmp/man_loopcount ] && [ $(wc -l /tmp/man_loopcount | cut -f1 -d' ') -ge 8 ];then
    rm /tmp/man_loopcount
    exit 1
  fi
  next_section=$(($section + 1))
  [ $next_section -eq 9 ] && next_section=1
  echo -n "."
  echo "." >> /tmp/man_loopcount
  exec $0 $next_section $command
  retval=$?
  [ $retval -eq 0 ] && rm /tmp/manprogress
  echo
fi

man="$old_man"