User Tools

Site Tools


packet:xrouter:docs:parsing

This is an old revision of the document!


parse-pzt-ssml.sh
#!/bin/bash
##################################
# by Jason M0MZF (not a programmer!)
# bash / awk / hammer / nail etc.
# License - MIT. Crack on people.
# 
# Script to parse Paula G8PZT's Xrouter MAN and HLP files into DocuWiki
# "some simple markup language" (SSML). DocuWiki ML syntax is here:
# https://www.dokuwiki.org/wiki:syntax
#
# The intention is to parse all MAN / HLP files within the folders and
# write them with appropriate formatting to files which can then be
# pasted directly into the wiki.
#
# This could also be done with groff > HTML > pandoc > ssml but pandoc's
# output format for SSML doesn't sort out proper ====headers==== and I
# don't know Lua. Yet. Maybe something like this with a custom output formatter:
# cat ${manpage} | groff -Thtml -P -l -mmandoc 2>/dev/null | pandoc -f html -t dokuwiki -o "$manpage".txt
# But when all you've got is awk, everything looks like a record / field... ;)
#
##################################
# Instructions (destructions?)
#
# - This script does not take any arguments
# - The only required configuration is to set the following path
BASEPATH=/home/jason/radio/packet/xrouter/Xrouter
# and the Wiki namespace
NAMESPACE="packet:xrouter:docs:"
# This folder should contain the two directories "XRouter Help Files"
# and "XRouter Manual Files". A new directory will be created here
# called "docuwiki-date" and contains two directories for the
# concatenated and reformatted files. These files should be pasted into
# wiki pages created by the index file.
#
# The index file created contains a list of all files parsed by this
# script with docuwiki-formatted links to each command. This index file
# should be pasted into the namespace to update the index.
#
# The file structure we're working with currently is
# BASEPATH/MANPAGES/SECTION1/ENTRY1.MAN
#			    /ENTRY2.MAN ...
# 		    SECTION2/ENTRY1.MAN
# 		    	    /ENTRY2.MAN ...
# BASEPATH/HLPFILES/SECTION1/ENTRY1.HLP
#			    /ENTRY2.HLP
# 		    SECTION2/ENTRY1.HLP
#			    /ENTRY2.HLP ...
#
# and the output structure is
# OUTPUTDIR/index.docuwiki
# OUTPUTDIR/MAN/SECTION1.docuwiki
#  		SECTION2.docuwiki ...
# OUTPUTDIR/HLP/SECTION1.docuwiki
#  		SECTION2.docuwiki ...
##################################
# Changelog
# 20250418 - Implemented MAN page parsing
# 20250419 - Implemented HLP page parsing
# 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers
# 20250422 - Tidy up, create an index of commands and create links to sections
# 20250426 - Tidy up, Create SEE ALSO links within manpages
##################################
 
# Globals
DATE=$(date +"%Y%m%d-%H%M%S")
MAN="$BASEPATH/XRouter Manual Files"
HLP="$BASEPATH/XRouter Help Files"
OUTPUTDIR="$BASEPATH/docuwiki-$DATE"
INDEXFILE="${OUTPUTDIR}"/index.docuwiki
 
# Wiki MANPAGE namespace structure. We pass this into awk later but define it here
 
 
# Handy functions
echoRed () {
	echo -e "\e[1;31m----$1----\e[0m"
}
echoGreen () {
	echo -e "\e[1;32m----$1----\e[0m"
}
checkRoot () {
	if [[ $UID -eq 0 ]]; then
		echoRed "Don't run this as root please"
		exit
	fi
}
 
awkFormatIndexMANTitle='
{
	gsub(/[^0-9]/, "")				# Strip out any character which is NaN
	print "[["ns"MAN"$0"#"ti"|"ti"]]"" | "		# and ouptut a namespace link to the MAN entry
}
'
 
awkFormatIndexHLPTitle='
{
	gsub(" ", "")					# Strip out any whitespace
	print "[["ns$0"#"ti"|"ti"]]"" | "		# and output a namespace link to the HLP entry
}
'
 
awkParseMan='
{
	if (NR==1 || NR==2)				# For the first two lines
	{
		gsub("\r", "")				# remove all carriage return chars
		if (/^;/ || NF==0) {next}		# skip the subsequent print function for comment or empty lines
		print "<code>" $0			# and output the line prepended with a code block start
	}
 
	if (NR>=3)					# For the other lines
	{
		if (/^[A-Z]/) 				# If the line begins with a character
		{
			if(/^SEE ALSO/) { next }
			starthead="</code> **"		# end previous code block
			endhead="** <code>"		# set bold and start code block
			gsub("\r", "")			# remove all carriage return chars
			print starthead $0 endhead 	# and output the line
		}
		else 					# else for all other lines
		{
			if (/^;/) {next}		# skip comment lines
			gsub("\r", "")			# remove all carriage return chars
			print $0			# and output the line
		}
	}
}
'
 
awkCreateLinks='					# we have set globalvar ln=999999, will break if a man page has more than 1M lines
{
	line=$0						# Keep the original line before using the field separator, we print this later
	FS="[()]"					# Set the field separator so we have CONFIG.SYS and 8 separately
	if (/^SEE ALSO/)				# For the SEE ALSO line
	{
		ln=NR					# Set ln to the current record number
		print "</code> **SEE ALSO:** \\\\"	# and output docuWiki formatted  title
	}
	if (NR>ln)					# for lines after SEE ALSO
	{
		gsub(" ", "", $1)			# Strip the whitespace out of field #1 (i.e. CONFIG.SYS)
		print "[["ns"MAN" $2"#"$1"|"line"]]"" \\\\"	#and ouput the line as a docuWiki formatted link
		ln++
	}
	else {print $0}					# else just output the line to the next awk pass. Fortunately SEE ALSO: is at the
}							# bottom of the file, we dont need to find its start and end, just its start. Phew
'
 
awkParseHlp='
{
	endhead="<code>"
	gsub("\r", "")					# remove all carriage return chars
	if (NR==1) {print endhead}			# start code block on first line
	if (/^;/ || NF==0) {next}			# skip comment / empty lines
	print $0					# output the refined line
}
'
 
parseFiles () {
	mkdir "${OUTPUTDIR}/$1"
	echo "==== $1 Files ====" >> "$INDEXFILE"
	# Traverse folders, skipping files in base directory
	for folder in "${!1}"/*/
	do
		# Get the penultimate field in file path, i.e. the section (folder) name
		local section=$(echo $folder | awk  -F/ '{print $(NF-1)}')
		# Use the section name to generate a file foreach section
		local outputpath="${OUTPUTDIR}"/"$1"/"${section}".docuwiki
		# and generate a numerical section number from the path
		local sectionnumber=$(echo $folder | awk '{gsub(/[^0-9]/, ""); print}')
		# Create formatted section for wiki namespace
		echo "== $section ==" >> "$INDEXFILE"
		# Format the section name as a docuWiki header
		echo "$section" | awk '{hdr="======"; print hdr $0 hdr}' >> "$outputpath"
		# Spit some stuff out to the shell
		echoRed "$section"
		# Traverse through files
		for file in "$folder"*
		do
			case "$1" in
			# For MAN files, after awk has done it's job we need to remove the last two lines; this last line breaks
			# the following <code> statement and is just an EOF message, the penultimate line is blank so we don't lose anything.
			MAN)
				# Section 8 has links to actual real filenames which we want to keep
				if [ $sectionnumber -eq 8 ]; then
					# Get file name from last filed file path        | and strip the last file extension but keep the penultimate one.
					local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1 "." $2 }')
				# but all other sections have .MAN or .HLP extensions which we want to remove
				else
					# Get file name from last filed file path        | and strip file extension
					local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1}')
				fi
				# Begin by writing a docuwiki header containing file name
				echo "==== $title ====" >> "$outputpath"
				# First pass through awk creates links in the SEE ALSO section, second pass docuWiki-fys it.
				awk -v ln=999999 -v ns="$NAMESPACE" "$awkCreateLinks" "$file" | awk "$awkParseMan" | head -n -2 >> "$outputpath"
				# Add a line break after each MAN entry
				echo -e "\n----" >> "$outputpath"
				# and add an entry to the index
				echo "$section" | awk -v ns="$NAMESPACE" -v ti="$title" "$awkFormatIndexMANTitle" >> "$INDEXFILE"
			;;
			# For HLP files we don't want to remove the last line because that truly is real content
			HLP)
				local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1}')
				echo "==== $title ====" >> "$outputpath"
				awk "$awkParseHlp" "$file" >> "$outputpath"
				echo "</code>" >> "$outputpath"
				echo "$section" | awk -v ns="$NAMESPACE" -v ti="$title" "$awkFormatIndexHLPTitle" >> "$INDEXFILE"
			;;
			DOC)	echoRed "No code to parse docs yet"
			;;
			esac
		done
	done
}
 
#Let's go!
checkRoot
mkdir "$OUTPUTDIR"
echo "======= XRouter Documentation =======" >> "$INDEXFILE"
echo "The content below is auto-generated from the XRouter documentation using [[$NAMESPACE:parsing|this bash / awk script]] to parse MAN / HLP files into docuWiki some simple markup language" >> "$INDEXFILE"
echoGreen "Parsing MAN files from $MANFILES"
parseFiles MAN
echoGreen "Parsing HLP files from $HLPFILES"
parseFiles HLP
#echoGreen "Parsing DOC files from $DOCFILES"
#parseFiles DOC
packet/xrouter/docs/parsing.1745682300.txt.gz · Last modified: by m0mzf