packet:xrouter:manpages:parsing
This is an old revision of the document!
Script to parse Xrouter's MAN and HLP files
- parse-pzt-manhlp.sh
#!/bin/bash ################################## # by Jason M0MZF (not a programmer!) # bash / awk / hammer / nail etc. # License - MIT. Crack on people. # # Script to parse Paula G8PZT's Xrouter MAN and HLP files into DocuWiki # "some simple markup language" (SSML). DocuWiki ML syntax is here: # https://www.dokuwiki.org/wiki:syntax # # The intention is to parse all MAN / HLP files within the folders and # write them with appropriate formatting to files which can then be # pasted directly into the wiki. # # This could also be done with groff > HTML > pandoc > ssml but pandoc's # output format for SSML doesn't sort out proper ====headers==== and I # don't know Lua. Yet. Maybe something like this with a custom output formatter: # cat ${manpage} | groff -Thtml -P -l -mmandoc 2>/dev/null | pandoc -f html -t dokuwiki -o "$manpage".txt # But when all you've got is awk, everything looks like a record / field... ;) # ################################## # Instructions (destructions?) # # - This script does not take any arguments # - The only required configuration is to set the following path # BASEPATH=/home/jason/radio/packet/xrouter/Xrouter # # This folder should contain the two directories "XRouter Help Files" # and "XRouter Manual Files". A new directory will be created here # called "docuwiki-date" and contains two directories for the # concatenated and reformatted files. A manually-created # index page exists in https://wiki.oarc.uk/packet:xrouter:manpages with # top-level contents, and the pages linked therein have their contents # copypasta'd from this script's output. # ################################## # Changelog # 20250418 - Implemented MAN page parsing # 20250419 - Implemented HLP page parsing # 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers ################################## # Globals DATE=$(date +"%Y%m%d-%H%M%S") MANFILES="$BASEPATH/XRouter Manual Files" HLPFILES="$BASEPATH/XRouter Help Files" OUTPUTDIR="$BASEPATH/docuwiki-$DATE" # Colourise output echoRed () { echo -e "\e[1;31m----$1----\e[0m" } echoGreen () { echo -e "\e[1;32m----$1----\e[0m" } checkRoot () { if [[ $UID -eq 0 ]]; then echoError "This script must NOT be run as root!" exit 1 fi } awkParseMan=' { if (NR==1 || NR==2) # For the first two lines { gsub("\r", "") # remove all carriage return chars if (/^;/ || NF==0) {next} # skip comment or empty line print "<code>" $0 # and start a ssml code block } if (NR>=3) # For the other lines { if (/^[A-Z]/) # If the line begins with a character { starthead="</code> **" # end previous code block endhead="** <code>" # set bold and start code block gsub("\r", "") # remove all carriage return chars print starthead $0 endhead # and output the line } else # else for all other lines { if (/^;/) {next} # skip comment lines gsub("\r", "") # remove all carriage return chars print $0 # and output the line } } } ' awkParseHlp=' { endhead="<code>" gsub("\r", "") # remove all carriage return chars if (NR==1) {print endhead} # start code block on first line if (/^;/ || NF==0) {next} # skip comment / empty lines print $0 } ' awkSectionHeader=' BEGIN { FS="/" } #Set field separator { header="=======" # create header formatting print header $(NF-1) header # we need the penultimate field as that } # is the folder name /it/looks/like/this/ ' awkFileHeader=' BEGIN { FS="." } # Set field separator { # to separate out the .MAN .HLP header="=====" # create header formatting print header $1 header # output first field with headers } ' parseFiles () { mkdir -p "${OUTPUTDIR}/$1" # Traverse folders, skipping files in base directory for folder in "${!1}"/*/ do # Get the penultimate field in file path, i.e. the section (folder) name section=$(echo $folder | awk -F/ '{print $(NF-1)}') # Format the section name as a docuWiki header echo "$folder" | awk "$awkSectionHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki # Spit some stuff out to the shell echoRed "$section" # Traverse through files for file in "$folder"* do # Get the last field in file path, i.e. file name title=$(echo $file | awk -F/ '{print $NF}') # Format the file name as a docuwiki header echo "$title" | awk "$awkFileHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki case "$1" in # For MAN files, after awk has done it's job we need to remove the last line; this last line breaks # the following <code> statement and is just an EOF message, so we don't lose anything. MANFILES) awk "$awkParseMan" "$file" | head -n -1 >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki echo -e "</code>\n----" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki ;; # For HLP files we don't want to remove the last line because that truly is real content HLPFILES) awk "$awkParseHlp" "$file" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki echo -e "</code>" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki ;; esac done done } #Let's go! checkRoot echoGreen "Parsing MAN files from $MANFILES" parseFiles MANFILES echoGreen "Parsing HLP files from $HLPFILES" parseFiles HLPFILES
packet/xrouter/manpages/parsing.1745131331.txt.gz · Last modified: 2025/04/20 06:42 by m0mzf