diff options
author | Vikas Kushwaha <dev@vikas.rocks> | 2024-11-21 13:30:52 +0530 |
---|---|---|
committer | Vikas Kushwaha <dev@vikas.rocks> | 2024-11-21 13:30:52 +0530 |
commit | 5c916d69d457101326803eb076a746060e3618cf (patch) | |
tree | d6fce3256eede1c1bf78fb6a1be75b9cc4b84cee /.local/bin/web/w3mman |
Moved from github
Diffstat (limited to '.local/bin/web/w3mman')
-rwxr-xr-x | .local/bin/web/w3mman | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/.local/bin/web/w3mman b/.local/bin/web/w3mman new file mode 100755 index 0000000..5013fbd --- /dev/null +++ b/.local/bin/web/w3mman @@ -0,0 +1,335 @@ +#!/bin/bash + +# w3m-man +# +# Use wget, w3m and less to download and view man pages. +# +# Homepage: https://github.com/sc0ttj/w3m-man +# +# Supports using $MANPAGER, $HTMLPAGER, $TERM_BROWSER, and +# falls back to w3m and less if these are not set. +# +# How it works: +# +# - saves manpage from online sources to $HOME/.w3m-manpages/ +# - not proper man pages, just plain text output of websites +# - prettifies them a bit (removes junk headers, links, messages) +# +# Usage: +# +# man <command> # view man page in plain text in $PAGER +# man -H <command> # view man page as HTML in $BROWSER +# man <command> --url # print the URL of the man page to STDOUT + +# TODO: +# +# - Display the path searched for manpages: +# +# man --path +# +# - Display the location of a manpage rather than the manpage itself: +# +# man -w command +# +# - Search for manpages containing a search string: +# +# man -k "search_string" +# + +if [ "$1" = "--help" ];then + + echo "# man (w3m-man) +# Use wget, w3m and less to download and view man pages +# See https://github.com/sc0ttj/w3m-man + +Usage: + + man <command> # (download and) print the man page + + man <command> --url # print the URL from which the man + # page was/would be downloaded + + man --help # print this help info + +Where <command> is the command name you want to read about. + +Examples: + + man diff # view 'diff' command (section 1) + + man mount.8 # view 'mount' in section 8 (config stuff) + + man 8 mount # same as above, but doesn't work with -H + + man -H mount # view 'mount' man page as HTML using $BROWSER + + man <command> --url # only print the URL from which the man page + # was/would be downloaded + +Man pages are divided into sections, as follows: + + 1. User: most user commands and programs. + + 2. System: calls by the Linux kernel. + + 3. Library: documents provided by the standard C library. + + 4. Devices: documents various devices, most of which reside in /dev. + + 5. Files: describes various file formats and filesystems and proc(5). + + 7. Overviews, conventions, and miscellaneous. + + 8. Superuser and system administration commands. + +Checks the following URLs: + +the Ubuntu or Debian man pages, also: + +http://man.he.net/?topic=\${command}§ion=\${section} +http://manpages.org/\${command}/\${section} +https://linux.die.net/man/\${section}/\${command} +https://www.mankier.com/\${section}/\${command} +https://man7.org/linux/man-pages/man\${section}/\${command}.\${section}.html +http://manpages.org/\${command} +http://man.he.net/?topic=\${command}§ion=all +https://ss64.com/bash/\${command}.html +" + exit 0 +fi + +old_man="$(command -v man)" +unset man + +command="$1" +section="1" + +DISTRO_COMPAT_VERSION="latest" +if [ -f /etc/DISTRO_SPECS ];then + source /etc/DISTRO_SPECS +fi + + +# support GNU man -H option (to view as HTML) +if [ "$1" = "-H" ];then + command="$2" +fi + +# support GNU man syntax `man 1 mount` +re='^[1-8]+$' +if [[ $1 =~ $re ]] ; then + command="${2}.$1" + section="$1" +fi + +# support GNU man syntax `man mount.1`, `man mount.2`, etc +section="${command//*\./}" +command="${command//\.*/}" +if [ "$command" = "$section" ];then + section=1 +fi + +manpage_file="$HOME/.w3m-manpages/$command.$section" + +# support env var $MANPAGER +pager=${MANPAGER:-$PAGER} +pager=${pager:-less -XR} +w3m='w3m -o auto_image=false -o display_image=false' +browser=${TERM_BROWSER:-$TERMBROWSER} +browser=${browser:-$w3m} +htmlpager=${HTMLPAGER:-w3m -dump} + +# Remove some junk from the plain text files generated by w3m, +# such as headers, links to adverts, etc +function prettifier { + url="$1" + case "$url" in + + *'ubuntu.com'*|*'debian.org'*) + sed \ + -e '1,5d' \ + -e 's/ bug$//' \ + -e 's/ / /g' + ;; + + *'linux.die.net'*) + sed -e '1d' -e 's/\[INS::INS\]//g' -e 's/ \[ \]//g' \ + -e 's/Site Search//g' 2>/dev/null | head -n -12 + ;; + + *mankier*) + grep -vEi ' • [A-Z]| □ [A-Z]' | sed -e '1,7d' -e 's/tldr.sh//g' \ + -e "s/$command /$command($section) /g" + ;; + + *ss64*com*) + sed '1,5d' 2>/dev/null + ;; + + *'man7.org'*) + sed -e '1,4d' -e '8,13d' -e 's/ top/ /g' \ + | grep -vE 'StatCounter' | head -n -15 + ;; + + *manpages*org*) + sed -e '1,3d' \ + | head -n -14 + ;; + + *man*he*net*) cat - ;; + + *) cat - ;; + esac +} + +# create the config dir, if needed +[ ! -d $HOME/.w3m-manpages ] && mkdir -p $HOME/.w3m-manpages + +# if we already have a proper man page, just print it and exit +if [ ! -z "$MANPATH" ] && [ "$(which groff)" != "" ] && [ "$1" != "-H" ] && [ "$2" != "--url" ];then + usegroff=false + groff_file='' + zipped=false + paths=$(echo "${MANPATH}" | tr ':' '\n') + # for each path + for dir in ./ $paths + do + zipped=false + # let's find the man page file + groff_file=$dir/man${section}/${command}.${section} + # it might be gzipped + [ ! -f $groff_file ] && groff_file=$dir/man${section}/${command}.${section}.gz && zipped=true + # it might be in the current directory + [ -f ./${command}.${section} ] && groff_file="./${command}.${section}" + [ -f ./${command}.${section}.gz ] && groff_file="./${command}.${section}.gz" && zipped=true + # if we found the file + if [ -f $groff_file ];then + # unpack it, if needed + [ $zipped = true ] && zcat $groff_file > /tmp/unzipped && groff_file=/tmp/unzipped + # now lets read the man page and exit + groff -T utf8 -man $groff_file | $pager && exit 0 + fi + done +fi + +# if we already have the man page as plain text, just print it and exit +[ "$1" != "-H" ] && [ "$2" != "--url" ] && [ -f "$manpage_file" ] && cat "$manpage_file" | $pager && exit 0 + +# set a list of urls to check: + +# these man pages cover the correct versions of the program you have installed, +# for the OS you're actually using (if using an Ubuntu or Debian based pup) +if [ "$DISTRO_BINARY_COMPAT" = "ubuntu" ];then + URLS="https://manpages.ubuntu.com/manpages/${DISTRO_COMPAT_VERSION}/man${section}/${command}.${section}.html" +elif [ "$DISTRO_BINARY_COMPAT" = "debian" ];then + URLS="https://manpages.debian.org/${DISTRO_COMPAT_VERSION}/${command}/${command}.${section}.en.html" +fi + +# add the rest +URLS="$URLS +http://man.he.net/?topic=${command}§ion=${section} +http://manpages.org/${command}/${section} +https://linux.die.net/man/${section}/${command} +https://www.mankier.com/${section}/${command} +https://man7.org/linux/man-pages/man${section}/${command}.${section}.html +http://manpages.org/${command} +http://man.he.net/?topic=${command}§ion=all +https://ss64.com/bash/${command}.html +" +# for each url in the list +for url in ${URLS} +do + # skip any empty urls + [ "$url" != "" ] || continue + [ "$url" != " " ] || continue + + # crawl the url + wget --timeout=2 --spider -S -o /tmp/"$command".html "$url" + + # get the status code + grep -m1 'HTTP/[1-3].[0-9] [0-9][0-9][0-9]' /tmp/"$command".html | grep -E '200 OK' > /tmp/response + + # if HTTP status not 200, skip this url + grep -q -m1 "200" /tmp/response || continue + + # print the URL if --url given + [ "$2" = "--url" ] && echo "$url" && exit 0 + + # grab the URL contents as plain text, put it into a file + timeout 2 $htmlpager "$url" | prettifier "$url" > "$manpage_file" + + # if file is empty, remove it, skip this url + [ ! -s "$manpage_file" ] && rm "$manpage_file" && continue + + # check if we if we got a "not found" page + notfound=false + grep -qiE "^Couldn|Hmmm|Invalid characters|No matches for \"|t found manual page under category" "$manpage_file" && notfound=true + + # if we DID get a "not found" page, delete the man page, skip this url + [ "$notfound" = true ] && rm "$manpage_file" && continue + + # if man page not a file, skip this url + [ ! -f "$manpage_file" ] && continue + + # if -H was given, print it out as HTML (like GNU man) + if [ "$1" = "-H" ] ;then + $browser "$url" + fi + + # + # add a footer to the man page + # + echo >> "$manpage_file" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >> "$manpage_file" + echo >> "$manpage_file" + echo "Generated by https://github.com/sc0ttj/w3m-manpage" >> "$manpage_file" + echo >> "$manpage_file" + echo "Source URL $url" >> "$manpage_file" + echo >> "$manpage_file" + + # man page created, dont need to do another url + break +done + +# if we still did not _create_ a file, see if we have local +# matching ones, in a different section +if [ ! -f "$manpage_file" ];then + local_pages=$(ls $HOME/.w3m-manpages | grep "${command}.") + if [ "$local_pages" != "" ];then + if [ $(echo "$local_pages" | wc -l) = 1 ];then + echo + echo "Loading $local_pages" | tr -d '\n' + echo + sleep 2 + manpage_file="$HOME/.w3m-manpages/$local_pages" + elif [ $(echo "$local_pages" | wc -l) -ge 2 ];then + echo + echo "Did you mean any of these?" + echo + echo "$local_pages" + exit 1 + fi + fi +fi + +# if we have a man page, and didnt already print the html, print it here +[ "$1" != "-H" ] && [ -f "$manpage_file" ] \ + && cat "$manpage_file" | $pager && exit 0 + +# if _still_ nothing, try download it online from the next section +if [ ! -f "$manpage_file" ];then + if [ -f /tmp/man_loopcount ] && [ $(wc -l /tmp/man_loopcount | cut -f1 -d' ') -ge 8 ];then + rm /tmp/man_loopcount + exit 1 + fi + next_section=$(($section + 1)) + [ $next_section -eq 9 ] && next_section=1 + echo -n "." + echo "." >> /tmp/man_loopcount + exec $0 $next_section $command + retval=$? + [ $retval -eq 0 ] && rm /tmp/manprogress + echo +fi + +man="$old_man" |