summaryrefslogtreecommitdiff
path: root/.local/bin/web/w3mman
diff options
context:
space:
mode:
authorVikas Kushwaha <dev@vikas.rocks>2024-11-21 13:30:52 +0530
committerVikas Kushwaha <dev@vikas.rocks>2024-11-21 13:30:52 +0530
commit5c916d69d457101326803eb076a746060e3618cf (patch)
treed6fce3256eede1c1bf78fb6a1be75b9cc4b84cee /.local/bin/web/w3mman
Moved from github
Diffstat (limited to '.local/bin/web/w3mman')
-rwxr-xr-x.local/bin/web/w3mman335
1 files changed, 335 insertions, 0 deletions
diff --git a/.local/bin/web/w3mman b/.local/bin/web/w3mman
new file mode 100755
index 0000000..5013fbd
--- /dev/null
+++ b/.local/bin/web/w3mman
@@ -0,0 +1,335 @@
+#!/bin/bash
+
+# w3m-man
+#
+# Use wget, w3m and less to download and view man pages.
+#
+# Homepage: https://github.com/sc0ttj/w3m-man
+#
+# Supports using $MANPAGER, $HTMLPAGER, $TERM_BROWSER, and
+# falls back to w3m and less if these are not set.
+#
+# How it works:
+#
+# - saves manpage from online sources to $HOME/.w3m-manpages/
+# - not proper man pages, just plain text output of websites
+# - prettifies them a bit (removes junk headers, links, messages)
+#
+# Usage:
+#
+# man <command> # view man page in plain text in $PAGER
+# man -H <command> # view man page as HTML in $BROWSER
+# man <command> --url # print the URL of the man page to STDOUT
+
+# TODO:
+#
+# - Display the path searched for manpages:
+#
+# man --path
+#
+# - Display the location of a manpage rather than the manpage itself:
+#
+# man -w command
+#
+# - Search for manpages containing a search string:
+#
+# man -k "search_string"
+#
+
+if [ "$1" = "--help" ];then
+
+ echo "# man (w3m-man)
+# Use wget, w3m and less to download and view man pages
+# See https://github.com/sc0ttj/w3m-man
+
+Usage:
+
+ man <command> # (download and) print the man page
+
+ man <command> --url # print the URL from which the man
+ # page was/would be downloaded
+
+ man --help # print this help info
+
+Where <command> is the command name you want to read about.
+
+Examples:
+
+ man diff # view 'diff' command (section 1)
+
+ man mount.8 # view 'mount' in section 8 (config stuff)
+
+ man 8 mount # same as above, but doesn't work with -H
+
+ man -H mount # view 'mount' man page as HTML using $BROWSER
+
+ man <command> --url # only print the URL from which the man page
+ # was/would be downloaded
+
+Man pages are divided into sections, as follows:
+
+ 1. User: most user commands and programs.
+
+ 2. System: calls by the Linux kernel.
+
+ 3. Library: documents provided by the standard C library.
+
+ 4. Devices: documents various devices, most of which reside in /dev.
+
+ 5. Files: describes various file formats and filesystems and proc(5).
+
+ 7. Overviews, conventions, and miscellaneous.
+
+ 8. Superuser and system administration commands.
+
+Checks the following URLs:
+
+the Ubuntu or Debian man pages, also:
+
+http://man.he.net/?topic=\${command}&section=\${section}
+http://manpages.org/\${command}/\${section}
+https://linux.die.net/man/\${section}/\${command}
+https://www.mankier.com/\${section}/\${command}
+https://man7.org/linux/man-pages/man\${section}/\${command}.\${section}.html
+http://manpages.org/\${command}
+http://man.he.net/?topic=\${command}&section=all
+https://ss64.com/bash/\${command}.html
+"
+ exit 0
+fi
+
+old_man="$(command -v man)"
+unset man
+
+command="$1"
+section="1"
+
+DISTRO_COMPAT_VERSION="latest"
+if [ -f /etc/DISTRO_SPECS ];then
+ source /etc/DISTRO_SPECS
+fi
+
+
+# support GNU man -H option (to view as HTML)
+if [ "$1" = "-H" ];then
+ command="$2"
+fi
+
+# support GNU man syntax `man 1 mount`
+re='^[1-8]+$'
+if [[ $1 =~ $re ]] ; then
+ command="${2}.$1"
+ section="$1"
+fi
+
+# support GNU man syntax `man mount.1`, `man mount.2`, etc
+section="${command//*\./}"
+command="${command//\.*/}"
+if [ "$command" = "$section" ];then
+ section=1
+fi
+
+manpage_file="$HOME/.w3m-manpages/$command.$section"
+
+# support env var $MANPAGER
+pager=${MANPAGER:-$PAGER}
+pager=${pager:-less -XR}
+w3m='w3m -o auto_image=false -o display_image=false'
+browser=${TERM_BROWSER:-$TERMBROWSER}
+browser=${browser:-$w3m}
+htmlpager=${HTMLPAGER:-w3m -dump}
+
+# Remove some junk from the plain text files generated by w3m,
+# such as headers, links to adverts, etc
+function prettifier {
+ url="$1"
+ case "$url" in
+
+ *'ubuntu.com'*|*'debian.org'*)
+ sed \
+ -e '1,5d' \
+ -e 's/ bug$//' \
+ -e 's/ / /g'
+ ;;
+
+ *'linux.die.net'*)
+ sed -e '1d' -e 's/\[INS::INS\]//g' -e 's/ \[ \]//g' \
+ -e 's/Site Search//g' 2>/dev/null | head -n -12
+ ;;
+
+ *mankier*)
+ grep -vEi ' • [A-Z]| □ [A-Z]' | sed -e '1,7d' -e 's/tldr.sh//g' \
+ -e "s/$command /$command($section) /g"
+ ;;
+
+ *ss64*com*)
+ sed '1,5d' 2>/dev/null
+ ;;
+
+ *'man7.org'*)
+ sed -e '1,4d' -e '8,13d' -e 's/ top/ /g' \
+ | grep -vE 'StatCounter' | head -n -15
+ ;;
+
+ *manpages*org*)
+ sed -e '1,3d' \
+ | head -n -14
+ ;;
+
+ *man*he*net*) cat - ;;
+
+ *) cat - ;;
+ esac
+}
+
+# create the config dir, if needed
+[ ! -d $HOME/.w3m-manpages ] && mkdir -p $HOME/.w3m-manpages
+
+# if we already have a proper man page, just print it and exit
+if [ ! -z "$MANPATH" ] && [ "$(which groff)" != "" ] && [ "$1" != "-H" ] && [ "$2" != "--url" ];then
+ usegroff=false
+ groff_file=''
+ zipped=false
+ paths=$(echo "${MANPATH}" | tr ':' '\n')
+ # for each path
+ for dir in ./ $paths
+ do
+ zipped=false
+ # let's find the man page file
+ groff_file=$dir/man${section}/${command}.${section}
+ # it might be gzipped
+ [ ! -f $groff_file ] && groff_file=$dir/man${section}/${command}.${section}.gz && zipped=true
+ # it might be in the current directory
+ [ -f ./${command}.${section} ] && groff_file="./${command}.${section}"
+ [ -f ./${command}.${section}.gz ] && groff_file="./${command}.${section}.gz" && zipped=true
+ # if we found the file
+ if [ -f $groff_file ];then
+ # unpack it, if needed
+ [ $zipped = true ] && zcat $groff_file > /tmp/unzipped && groff_file=/tmp/unzipped
+ # now lets read the man page and exit
+ groff -T utf8 -man $groff_file | $pager && exit 0
+ fi
+ done
+fi
+
+# if we already have the man page as plain text, just print it and exit
+[ "$1" != "-H" ] && [ "$2" != "--url" ] && [ -f "$manpage_file" ] && cat "$manpage_file" | $pager && exit 0
+
+# set a list of urls to check:
+
+# these man pages cover the correct versions of the program you have installed,
+# for the OS you're actually using (if using an Ubuntu or Debian based pup)
+if [ "$DISTRO_BINARY_COMPAT" = "ubuntu" ];then
+ URLS="https://manpages.ubuntu.com/manpages/${DISTRO_COMPAT_VERSION}/man${section}/${command}.${section}.html"
+elif [ "$DISTRO_BINARY_COMPAT" = "debian" ];then
+ URLS="https://manpages.debian.org/${DISTRO_COMPAT_VERSION}/${command}/${command}.${section}.en.html"
+fi
+
+# add the rest
+URLS="$URLS
+http://man.he.net/?topic=${command}&section=${section}
+http://manpages.org/${command}/${section}
+https://linux.die.net/man/${section}/${command}
+https://www.mankier.com/${section}/${command}
+https://man7.org/linux/man-pages/man${section}/${command}.${section}.html
+http://manpages.org/${command}
+http://man.he.net/?topic=${command}&section=all
+https://ss64.com/bash/${command}.html
+"
+# for each url in the list
+for url in ${URLS}
+do
+ # skip any empty urls
+ [ "$url" != "" ] || continue
+ [ "$url" != " " ] || continue
+
+ # crawl the url
+ wget --timeout=2 --spider -S -o /tmp/"$command".html "$url"
+
+ # get the status code
+ grep -m1 'HTTP/[1-3].[0-9] [0-9][0-9][0-9]' /tmp/"$command".html | grep -E '200 OK' > /tmp/response
+
+ # if HTTP status not 200, skip this url
+ grep -q -m1 "200" /tmp/response || continue
+
+ # print the URL if --url given
+ [ "$2" = "--url" ] && echo "$url" && exit 0
+
+ # grab the URL contents as plain text, put it into a file
+ timeout 2 $htmlpager "$url" | prettifier "$url" > "$manpage_file"
+
+ # if file is empty, remove it, skip this url
+ [ ! -s "$manpage_file" ] && rm "$manpage_file" && continue
+
+ # check if we if we got a "not found" page
+ notfound=false
+ grep -qiE "^Couldn|Hmmm|Invalid characters|No matches for \"|t found manual page under category" "$manpage_file" && notfound=true
+
+ # if we DID get a "not found" page, delete the man page, skip this url
+ [ "$notfound" = true ] && rm "$manpage_file" && continue
+
+ # if man page not a file, skip this url
+ [ ! -f "$manpage_file" ] && continue
+
+ # if -H was given, print it out as HTML (like GNU man)
+ if [ "$1" = "-H" ] ;then
+ $browser "$url"
+ fi
+
+ #
+ # add a footer to the man page
+ #
+ echo >> "$manpage_file"
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >> "$manpage_file"
+ echo >> "$manpage_file"
+ echo "Generated by https://github.com/sc0ttj/w3m-manpage" >> "$manpage_file"
+ echo >> "$manpage_file"
+ echo "Source URL $url" >> "$manpage_file"
+ echo >> "$manpage_file"
+
+ # man page created, dont need to do another url
+ break
+done
+
+# if we still did not _create_ a file, see if we have local
+# matching ones, in a different section
+if [ ! -f "$manpage_file" ];then
+ local_pages=$(ls $HOME/.w3m-manpages | grep "${command}.")
+ if [ "$local_pages" != "" ];then
+ if [ $(echo "$local_pages" | wc -l) = 1 ];then
+ echo
+ echo "Loading $local_pages" | tr -d '\n'
+ echo
+ sleep 2
+ manpage_file="$HOME/.w3m-manpages/$local_pages"
+ elif [ $(echo "$local_pages" | wc -l) -ge 2 ];then
+ echo
+ echo "Did you mean any of these?"
+ echo
+ echo "$local_pages"
+ exit 1
+ fi
+ fi
+fi
+
+# if we have a man page, and didnt already print the html, print it here
+[ "$1" != "-H" ] && [ -f "$manpage_file" ] \
+ && cat "$manpage_file" | $pager && exit 0
+
+# if _still_ nothing, try download it online from the next section
+if [ ! -f "$manpage_file" ];then
+ if [ -f /tmp/man_loopcount ] && [ $(wc -l /tmp/man_loopcount | cut -f1 -d' ') -ge 8 ];then
+ rm /tmp/man_loopcount
+ exit 1
+ fi
+ next_section=$(($section + 1))
+ [ $next_section -eq 9 ] && next_section=1
+ echo -n "."
+ echo "." >> /tmp/man_loopcount
+ exec $0 $next_section $command
+ retval=$?
+ [ $retval -eq 0 ] && rm /tmp/manprogress
+ echo
+fi
+
+man="$old_man"