packet:xrouter:docs:parsing
- parse-pzt-ssml.sh
#!/bin/bash ################################## # by Jason M0MZF (not a programmer!) # bash / awk / hammer / nail etc. # License - MIT. Crack on people. # # Script to parse Paula G8PZT's Xrouter MAN and HLP files into DocuWiki # "some simple markup language" (SSML). DocuWiki ML syntax is here: # https://www.dokuwiki.org/wiki:syntax # # The intention is to parse all MAN / HLP files within the folders and # write them with appropriate formatting to files which can then be # pasted directly into the wiki. # # This could also be done with groff > HTML > pandoc > ssml but pandoc's # output format for SSML doesn't sort out proper ====headers==== and I # don't know Lua. Yet. Maybe something like this with a custom output formatter: # cat ${manpage} | groff -Thtml -P -l -mmandoc 2>/dev/null | pandoc -f html -t dokuwiki -o "$manpage".txt # But when all you've got is awk, everything looks like a record / field... ;) # ################################## # Instructions (destructions?) # # - This script does not take any arguments # - The only required configuration is to set the following path BASEPATH=/home/jason/radio/packet/xrouter/Xrouter # and the Wiki namespace NAMESPACE="packet:xrouter:docs:" # This folder should contain the two directories "XRouter Help Files" # and "XRouter Manual Files". A new directory will be created here # called "docuwiki-date" and contains two directories for the # concatenated and reformatted files. These files should be pasted into # wiki pages created by the index file. # # The index file created contains a list of all files parsed by this # script with docuwiki-formatted links to each command. This index file # should be pasted into the namespace to update the index. # # The file structure we're working with currently is # BASEPATH/MANPAGES/SECTION1/ENTRY1.MAN # /ENTRY2.MAN ... # SECTION2/ENTRY1.MAN # /ENTRY2.MAN ... # BASEPATH/HLPFILES/SECTION1/ENTRY1.HLP # /ENTRY2.HLP # SECTION2/ENTRY1.HLP # /ENTRY2.HLP ... # # and the output structure is # OUTPUTDIR/index.docuwiki # OUTPUTDIR/MAN/SECTION1.docuwiki # SECTION2.docuwiki ... # OUTPUTDIR/HLP/SECTION1.docuwiki # SECTION2.docuwiki ... ################################## # Changelog # 20250418 - Implemented MAN page parsing # 20250419 - Implemented HLP page parsing # 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers # 20250422 - Tidy up, create an index of commands and create links to sections # 20250425 - Tidy up, Create SEE ALSO links within manpages # 20250426 - Refactor SEE ALSO links ################################## # Globals DATE=$(date +"%Y%m%d-%H%M%S") MAN="$BASEPATH/XRouter Manual Files" HLP="$BASEPATH/XRouter Help Files" OUTPUTDIR="$BASEPATH/docuwiki-$DATE" INDEXFILE="${OUTPUTDIR}"/index.docuwiki # Handy functions echoRed () { echo -e "\e[1;31m----$1----\e[0m" } echoGreen () { echo -e "\e[1;32m----$1----\e[0m" } checkRoot () { if [[ $UID -eq 0 ]]; then echoRed "Don't run this as root please" exit fi } # awk functions awkFormatIndexMANTitle=' { gsub(/[^0-9]/, "") # Strip out any character which is NaN print "[["ns"MAN"$0"#"ti"|"ti"]]"" | " # and ouptut a namespace link to the MAN entry } ' awkFormatIndexHLPTitle=' { gsub(" ", "") # Strip out any whitespace print "[["ns$0"#"ti"|"ti"]]"" | " # and output a namespace link to the HLP entry } ' awkParseMan=' { if (NR>=(recs - 1) && flag==0) {print "</code>" ; exit} # If we have EOF and didnt have a SEE ALSO section write code close and exit if (NR>=(recs - 1) && flag==1) {print "" ; exit} # If we are EOF and have a SEE ALSO section write nothing and exit if (NR==1 || NR==2) # For the first two lines { gsub("\r", "") # remove all carriage return chars if (/^;/ || NF==0) {next} # skip the subsequent print function for comment or empty lines print "<code>" $0 # and output the line prepended with a code block start } if (NR>=3 && NR<recs) # For the other items leading up to EOF { FS="[()]" # Field separator so we can generate section & title if (/^SEE ALSO/) # Only for the SEE ONLY line { lno=NR # Set the counter to current record number flag=1 # Set the this file has a see also section flag print "</code> **SEE ALSO:** \\\\" # Output a reformatted see also line } else if (NR>lno && NR<(recs -1) && NF>0) # If we are in SEE ALSO and are not EOF and have non-empty lines { line=$0 # Save the inital line to print later gsub(" ", "", $1) # Strip the whitespace out of field #1 (i.e. CONFIG.SYS) print "[["ns"MAN" $2"#"$1"|"line"]]"" \\\\" #and ouput the line as a docuWiki formatted link lno++ } else if (/^[A-Z]/) # If the line begins with a character { starthead="</code> **" # end previous code block endhead="** <code>" # set bold and start code block gsub("\r", "") # remove all carriage return chars print starthead $0 endhead # and output the line } else # else for all other lines { if (/^;/ || /^ ;/) {next} # skip comment lines gsub("\r", "") # remove all carriage return chars print $0 # and output the line } } } ' awkParseHlp=' { endhead="<code>" gsub("\r", "") # remove all carriage return chars if (NR==1) {print endhead} # start code block on first line if (/^;/ || NF==0) {next} # skip comment / empty lines print $0 # output the refined line } ' # File enumerator / reader / writer parseFiles () { mkdir "${OUTPUTDIR}/$1" echo "===== $1 Files =====" >> "$INDEXFILE" # Traverse folders, skipping files in base directory for folder in "${!1}"/*/ do # Get the penultimate field in file path, i.e. the section (folder) name local section=$(echo $folder | awk -F/ '{print $(NF-1)}') # Use the section name to generate a file foreach section local outputpath="${OUTPUTDIR}"/"$1"/"${section}".docuwiki # and generate a numerical section number from the path local sectionnumber=$(echo $folder | awk '{gsub(/[^0-9]/, ""); print}') # Create formatted section for wiki namespace echo "==== $section ====" >> "$INDEXFILE" # Format the section name as a docuWiki header echo "$section" | awk '{hdr="======"; print hdr $0 hdr}' >> "$outputpath" # Spit some stuff out to the shell echoRed "$section" # Traverse through files for file in "$folder"* do case "$1" in MAN) # Section 8 has links to actual real filenames which we want to keep if [ $sectionnumber -eq 8 ]; then # Get file name from last filed file path | and strip the last file extension but keep the penultimate one. local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1 "." $2 }') # but all other sections have .MAN or .HLP extensions which we want to remove else # Get file name from last filed file path | and strip file extension local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1}') fi # Begin by writing a docuwiki header containing file name echo "===== $title =====" >> "$outputpath" # Get line count for the file recs=$(wc -l "$file" | awk '{print $1}') # Parse the file! awk -v flag=0 -v recs="$recs" -v lno=99999 -v ns="$NAMESPACE" "$awkParseMan" "$file" >> "$outputpath" # Add a link back to index page echo "[[$NAMESPACE|< Back to Index]]" >> "$outputpath" # Add a line break after each MAN entry echo -e "\n----" >> "$outputpath" # and add an entry to the index echo "$section" | awk -v ns="$NAMESPACE" -v ti="$title" "$awkFormatIndexMANTitle" >> "$INDEXFILE" ;; HLP) local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1}') echo "==== $title ====" >> "$outputpath" awk "$awkParseHlp" "$file" >> "$outputpath" echo "</code>" >> "$outputpath" # Add a link back to index page echo "[[$NAMESPACE|< Back to Index]]" >> "$outputpath" echo "$section" | awk -v ns="$NAMESPACE" -v ti="$title" "$awkFormatIndexHLPTitle" >> "$INDEXFILE" ;; DOC) echoRed "No code to parse docs yet" ;; esac done done } #Let's go! checkRoot mkdir "$OUTPUTDIR" echo "======= XRouter Documentation =======" >> "$INDEXFILE" echo "This content is auto-generated from the XRouter documentation using [[$NAMESPACE:parsing|this bash / awk script]] to parse MAN / HLP files into docuWiki some simple markup language" >> "$INDEXFILE" echoGreen "Parsing MAN files from $MAN" parseFiles MAN echoGreen "Parsing HLP files from $HLP" parseFiles HLP #echoGreen "Parsing DOC files from $DOC" #parseFiles DOC
packet/xrouter/docs/parsing.txt · Last modified: by m0mzf