#!/bin/bash # w3m-man # # Use wget, w3m and less to download and view man pages. # # Homepage: https://github.com/sc0ttj/w3m-man # # Supports using $MANPAGER, $HTMLPAGER, $TERM_BROWSER, and # falls back to w3m and less if these are not set. # # How it works: # # - saves manpage from online sources to $HOME/.w3m-manpages/ # - not proper man pages, just plain text output of websites # - prettifies them a bit (removes junk headers, links, messages) # # Usage: # # man # view man page in plain text in $PAGER # man -H # view man page as HTML in $BROWSER # man --url # print the URL of the man page to STDOUT # TODO: # # - Display the path searched for manpages: # # man --path # # - Display the location of a manpage rather than the manpage itself: # # man -w command # # - Search for manpages containing a search string: # # man -k "search_string" # if [ "$1" = "--help" ];then echo "# man (w3m-man) # Use wget, w3m and less to download and view man pages # See https://github.com/sc0ttj/w3m-man Usage: man # (download and) print the man page man --url # print the URL from which the man # page was/would be downloaded man --help # print this help info Where is the command name you want to read about. Examples: man diff # view 'diff' command (section 1) man mount.8 # view 'mount' in section 8 (config stuff) man 8 mount # same as above, but doesn't work with -H man -H mount # view 'mount' man page as HTML using $BROWSER man --url # only print the URL from which the man page # was/would be downloaded Man pages are divided into sections, as follows: 1. User: most user commands and programs. 2. System: calls by the Linux kernel. 3. Library: documents provided by the standard C library. 4. Devices: documents various devices, most of which reside in /dev. 5. Files: describes various file formats and filesystems and proc(5). 7. Overviews, conventions, and miscellaneous. 8. Superuser and system administration commands. Checks the following URLs: the Ubuntu or Debian man pages, also: http://man.he.net/?topic=\${command}§ion=\${section} http://manpages.org/\${command}/\${section} https://linux.die.net/man/\${section}/\${command} https://www.mankier.com/\${section}/\${command} https://man7.org/linux/man-pages/man\${section}/\${command}.\${section}.html http://manpages.org/\${command} http://man.he.net/?topic=\${command}§ion=all https://ss64.com/bash/\${command}.html " exit 0 fi old_man="$(command -v man)" unset man command="$1" section="1" DISTRO_COMPAT_VERSION="latest" if [ -f /etc/DISTRO_SPECS ];then source /etc/DISTRO_SPECS fi # support GNU man -H option (to view as HTML) if [ "$1" = "-H" ];then command="$2" fi # support GNU man syntax `man 1 mount` re='^[1-8]+$' if [[ $1 =~ $re ]] ; then command="${2}.$1" section="$1" fi # support GNU man syntax `man mount.1`, `man mount.2`, etc section="${command//*\./}" command="${command//\.*/}" if [ "$command" = "$section" ];then section=1 fi manpage_file="$HOME/.w3m-manpages/$command.$section" # support env var $MANPAGER pager=${MANPAGER:-$PAGER} pager=${pager:-less -XR} w3m='w3m -o auto_image=false -o display_image=false' browser=${TERM_BROWSER:-$TERMBROWSER} browser=${browser:-$w3m} htmlpager=${HTMLPAGER:-w3m -dump} # Remove some junk from the plain text files generated by w3m, # such as headers, links to adverts, etc function prettifier { url="$1" case "$url" in *'ubuntu.com'*|*'debian.org'*) sed \ -e '1,5d' \ -e 's/ bug$//' \ -e 's/ / /g' ;; *'linux.die.net'*) sed -e '1d' -e 's/\[INS::INS\]//g' -e 's/ \[ \]//g' \ -e 's/Site Search//g' 2>/dev/null | head -n -12 ;; *mankier*) grep -vEi ' • [A-Z]| □ [A-Z]' | sed -e '1,7d' -e 's/tldr.sh//g' \ -e "s/$command /$command($section) /g" ;; *ss64*com*) sed '1,5d' 2>/dev/null ;; *'man7.org'*) sed -e '1,4d' -e '8,13d' -e 's/ top/ /g' \ | grep -vE 'StatCounter' | head -n -15 ;; *manpages*org*) sed -e '1,3d' \ | head -n -14 ;; *man*he*net*) cat - ;; *) cat - ;; esac } # create the config dir, if needed [ ! -d $HOME/.w3m-manpages ] && mkdir -p $HOME/.w3m-manpages # if we already have a proper man page, just print it and exit if [ ! -z "$MANPATH" ] && [ "$(which groff)" != "" ] && [ "$1" != "-H" ] && [ "$2" != "--url" ];then usegroff=false groff_file='' zipped=false paths=$(echo "${MANPATH}" | tr ':' '\n') # for each path for dir in ./ $paths do zipped=false # let's find the man page file groff_file=$dir/man${section}/${command}.${section} # it might be gzipped [ ! -f $groff_file ] && groff_file=$dir/man${section}/${command}.${section}.gz && zipped=true # it might be in the current directory [ -f ./${command}.${section} ] && groff_file="./${command}.${section}" [ -f ./${command}.${section}.gz ] && groff_file="./${command}.${section}.gz" && zipped=true # if we found the file if [ -f $groff_file ];then # unpack it, if needed [ $zipped = true ] && zcat $groff_file > /tmp/unzipped && groff_file=/tmp/unzipped # now lets read the man page and exit groff -T utf8 -man $groff_file | $pager && exit 0 fi done fi # if we already have the man page as plain text, just print it and exit [ "$1" != "-H" ] && [ "$2" != "--url" ] && [ -f "$manpage_file" ] && cat "$manpage_file" | $pager && exit 0 # set a list of urls to check: # these man pages cover the correct versions of the program you have installed, # for the OS you're actually using (if using an Ubuntu or Debian based pup) if [ "$DISTRO_BINARY_COMPAT" = "ubuntu" ];then URLS="https://manpages.ubuntu.com/manpages/${DISTRO_COMPAT_VERSION}/man${section}/${command}.${section}.html" elif [ "$DISTRO_BINARY_COMPAT" = "debian" ];then URLS="https://manpages.debian.org/${DISTRO_COMPAT_VERSION}/${command}/${command}.${section}.en.html" fi # add the rest URLS="$URLS http://man.he.net/?topic=${command}§ion=${section} http://manpages.org/${command}/${section} https://linux.die.net/man/${section}/${command} https://www.mankier.com/${section}/${command} https://man7.org/linux/man-pages/man${section}/${command}.${section}.html http://manpages.org/${command} http://man.he.net/?topic=${command}§ion=all https://ss64.com/bash/${command}.html " # for each url in the list for url in ${URLS} do # skip any empty urls [ "$url" != "" ] || continue [ "$url" != " " ] || continue # crawl the url wget --timeout=2 --spider -S -o /tmp/"$command".html "$url" # get the status code grep -m1 'HTTP/[1-3].[0-9] [0-9][0-9][0-9]' /tmp/"$command".html | grep -E '200 OK' > /tmp/response # if HTTP status not 200, skip this url grep -q -m1 "200" /tmp/response || continue # print the URL if --url given [ "$2" = "--url" ] && echo "$url" && exit 0 # grab the URL contents as plain text, put it into a file timeout 2 $htmlpager "$url" | prettifier "$url" > "$manpage_file" # if file is empty, remove it, skip this url [ ! -s "$manpage_file" ] && rm "$manpage_file" && continue # check if we if we got a "not found" page notfound=false grep -qiE "^Couldn|Hmmm|Invalid characters|No matches for \"|t found manual page under category" "$manpage_file" && notfound=true # if we DID get a "not found" page, delete the man page, skip this url [ "$notfound" = true ] && rm "$manpage_file" && continue # if man page not a file, skip this url [ ! -f "$manpage_file" ] && continue # if -H was given, print it out as HTML (like GNU man) if [ "$1" = "-H" ] ;then $browser "$url" fi # # add a footer to the man page # echo >> "$manpage_file" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >> "$manpage_file" echo >> "$manpage_file" echo "Generated by https://github.com/sc0ttj/w3m-manpage" >> "$manpage_file" echo >> "$manpage_file" echo "Source URL $url" >> "$manpage_file" echo >> "$manpage_file" # man page created, dont need to do another url break done # if we still did not _create_ a file, see if we have local # matching ones, in a different section if [ ! -f "$manpage_file" ];then local_pages=$(ls $HOME/.w3m-manpages | grep "${command}.") if [ "$local_pages" != "" ];then if [ $(echo "$local_pages" | wc -l) = 1 ];then echo echo "Loading $local_pages" | tr -d '\n' echo sleep 2 manpage_file="$HOME/.w3m-manpages/$local_pages" elif [ $(echo "$local_pages" | wc -l) -ge 2 ];then echo echo "Did you mean any of these?" echo echo "$local_pages" exit 1 fi fi fi # if we have a man page, and didnt already print the html, print it here [ "$1" != "-H" ] && [ -f "$manpage_file" ] \ && cat "$manpage_file" | $pager && exit 0 # if _still_ nothing, try download it online from the next section if [ ! -f "$manpage_file" ];then if [ -f /tmp/man_loopcount ] && [ $(wc -l /tmp/man_loopcount | cut -f1 -d' ') -ge 8 ];then rm /tmp/man_loopcount exit 1 fi next_section=$(($section + 1)) [ $next_section -eq 9 ] && next_section=1 echo -n "." echo "." >> /tmp/man_loopcount exec $0 $next_section $command retval=$? [ $retval -eq 0 ] && rm /tmp/manprogress echo fi man="$old_man"