Differences

This shows you the differences between two versions of the page.

--- packet:xrouter:manpages:parsing [2025/04/19 18:30] – m0mzf
+++ packet:xrouter:manpages:parsing [2025/04/20 07:28] (current) – m0mzf
@@ Line 1: / Line 1: @@
-====== G8PZT MAN page parsing script =======
+====== Script to parse Xrouter's MAN and HLP files =======
-Does what it says on the tin :)
+<file awk parse-pzt-manhlp.sh>
-<file>
 #!/bin/bash
 ##################################
-# by Jason M0MZF
+# by Jason M0MZF (not a programmer!)
-# bash / awk / hammer / nail etc :)
+# bash / awk / hammer / nail etc.
 # License - MIT. Crack on people.
 #
@@ Line 15: / Line 13: @@
 # The intention is to parse all MAN / HLP files within the folders and
 # write them with appropriate formatting to files which can then be
-# copypasta'd directly into the wiki.
+# pasted directly into the wiki.
 #
 # This could also be done with groff > HTML > pandoc > ssml but pandoc's
 # output format for SSML doesn't sort out proper ====headers==== and I
-# don't know Lua. Yet.
+# don't know Lua. Yet. Maybe something like this with a custom output formatter:
-#
+# cat ${manpage} | groff -Thtml -P -l -mmandoc 2>/dev/null | pandoc -f html -t dokuwiki -o "$manpage".txt
+# But when all you've got is awk, everything looks like a record / field... ;)
 #
 ##################################
@@ Line 42: / Line 41: @@
 # 20250418 - Implemented MAN page parsing
 # 20250419 - Implemented HLP page parsing
-# 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP
+# 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers
 ##################################
@@ Line 51: / Line 50: @@
 OUTPUTDIR="$BASEPATH/docuwiki-$DATE"
-# Colourise output
+# Handy functions
 echoRed () {
 	echo -e "\e[1;31m----$1----\e[0m"
@@ Line 58: / Line 57: @@
 	echo -e "\e[1;32m----$1----\e[0m"
 }
 checkRoot () {
-	if [[ $UID -eq 0 ]];
+	if [[ $UID -eq 0 ]]; then
-	then
+		echoRed "Don't run this as root please"
-		echoError "This script must NOT be run as root!"
 		exit 1
 	fi
 }
-awkParseMan='
+# Use awk to:
-{
+#  strip out comment lines and remove any <CR> from <CR><LF> lines
+#  turn the MAN page header into a code block, it contains a revision date
+#  find every subsequent MAN page header and turn it into a docuwiki header and
+#   terminate the previous code block before the header, then
+#   create a new code block after the header, thus encapsulating the subsection
+# (the final encapsulation is done using "echo" in the bash script below)
+awkParseMan='{
 	if (NR==1 || NR==2)				# For the first two lines
 	{
 		gsub("\r", "")				# remove all carriage return chars
-		if (/^;/ || NF==0) {next}		# skip comment or empty line
+		if (/^;/ || NF==0) {next}		# skip the subsequent print function for comment or empty lines
-		print "<code>" $0			# and start a ssml code block
+		print "<code>" $0			# annd output the line prepended with a code block start
 	}
@@ Line 80: / Line 83: @@
 		if (/^[A-Z]/) 				# If the line begins with a character
 		{
-			endcode="</code> **"		# end previous code block
+			starthead="</code> **"		# end previous code block
-			startcode="** <code>"		# set bold and start code block
+			endhead="** <code>"		# set bold and start code block
 			gsub("\r", "")			# remove all carriage return chars
-			print endcode $0 startcode 	# and output the line
+			print starthead $0 endhead 	# and output the line
 		}
 		else 					# else for all other lines
@@ Line 94: / Line 97: @@
 }
 '
+# Use awk to:
-awkParseHlp='
+#  strip out comment lines (this is always line 1, sometime 2 and 3) and remove any <CR> from <CR><LF> lines
-{
+#  insert a start code block in place of the now-empty line 1
-	header1="<code>"
+# (the final encapsulation is done using "echo" in the bash script below)
+awkParseHlp=' {
+	endhead="<code>"
 	gsub("\r", "")					# remove all carriage return chars
-	if (NR==1) {print header1}			# start code block on first line
+	if (NR==1) {print endhead}			# start code block on first line
 	if (/^;/ || NF==0) {next}			# skip comment / empty lines
-	print $0
+	print $0					# output the refined line
 }
 '
+# Use awk to extract a section name from the directory structure
 awkSectionHeader='
-	BEGIN { FS="/" }				#Set field separator
+	BEGIN { FS="/" }				# Set field separator to get section name from path
-{
+{							# /path/looks/like/this/Section Header Name/
-		header="======="			# create header formatting
+	header="======="				# create header formatting
-		print header $(NF-1) header		# we need the penultimate field as that
+	print header $(NF-1) header			# the penultimate field is the section name
-}							# is the folder name /it/looks/like/this/
+}
 '
+# Use awk to extract a name from the filename.extension
 awkFileHeader='
-	BEGIN { FS="." }				# Set field separator
+	BEGIN { FS="." }				# Set field separator to separate file extenstion
-{							# to separate out the .MAN .HLP
+{							# because we want the file name from FILENAME.MAN
 	header="====="					# create header formatting
-	print header $1 header				# output first field with headers
+	print header $1 header				# the first field is the file name
 }
 '
@@ Line 127: / Line 132: @@
 	do
 		# Get the penultimate field in file path, i.e. the section (folder) name
-		section=$(echo $folder | awk  -F/ '{print $(NF-1)}')
+		local section=$(echo $folder | awk  -F/ '{print $(NF-1)}')
-		# Prepend the section name as a docuWiki header
+		# Format the section name as a docuWiki header
 		echo "$folder" | awk "$awkSectionHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
 		# Spit some stuff out to the shell
@@ Line 136: / Line 141: @@
 		do
 			# Get the last field in file path, i.e. file name
-			title=$(echo $file | awk -F/ '{print $NF}')
+			local title=$(echo $file | awk -F/ '{print $NF}')
-			# Prepend the file name as a docuwiki header
+			local outputpath="${OUTPUTDIR}"/"$1"/"${section}".docuwiki
-			echo "$title" | awk "$awkFileHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
+			# Format the file name as a docuwiki header
+			echo "$title" | awk "$awkFileHeader" >> "$outputpath"
 			case "$1" in
 			# For MAN files, after awk has done it's job we need to remove the last line; this last line breaks
 			# the following <code> statement and is just an EOF message, so we don't lose anything.
-			MANFILES)	awk "$awkParseMan" "$file" | head -n -1 >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
+			MANFILES)	awk "$awkParseMan" "$file" | head -n -1 >> "$outputpath"
-						echo -e "</code>\n----" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
+					echo -e "</code>\n----" >> "$outputpath"
 			;;
 			# For HLP files we don't want to remove the last line because that truly is real content
-			HLPFILES)	awk "$awkParseHlp" "$file" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
+			HLPFILES)	awk "$awkParseHlp" "$file" >> "$outputpath"
-						echo -e "</code>" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
+					echo -e "</code>" >> "$outputpath"
 			;;
 			esac