User Tools

Site Tools


packet:xrouter:docs:parsing

This is an old revision of the document!


#!/bin/bash ################################## # by Jason M0MZF (not a programmer!) # bash / awk / hammer / nail etc. # License - MIT. Crack on people. # # Script to parse Paula G8PZT's Xrouter MAN and HLP files into DocuWiki # “some simple markup language” (SSML). DocuWiki ML syntax is here: # https://www.dokuwiki.org/wiki:syntax # # The intention is to parse all MAN / HLP files within the folders and # write them with appropriate formatting to files which can then be # pasted directly into the wiki. # # This could also be done with groff > HTML > pandoc > ssml but pandoc's # output format for SSML doesn't sort out proper ====headers==== and I # don't know Lua. Yet. Maybe something like this with a custom output formatter: # cat ${manpage} | groff -Thtml -P -l -mmandoc 2>/dev/null | pandoc -f html -t dokuwiki -o “$manpage”.txt # But when all you've got is awk, everything looks like a record / field… ;) # ################################## # Instructions (destructions?) # # - This script does not take any arguments # - The only required configuration is to set the following path BASEPATH=/home/jason/radio/packet/xrouter/Xrouter # and the Wiki namespace NAMESPACE=“packet:xrouter:docs:” # This folder should contain the two directories “XRouter Help Files” # and “XRouter Manual Files”. A new directory will be created here # called “docuwiki-date” and contains two directories for the # concatenated and reformatted files. These files should be pasted into # wiki pages created by the index file. # # The index file created contains a list of all files parsed by this # script with docuwiki-formatted links to each command. This index file # should be pasted into the namespace to update the index. # # The file structure we're working with currently is # BASEPATH/MANPAGES/SECTION1/ENTRY1.MAN # /ENTRY2.MAN … # SECTION2/ENTRY1.MAN # /ENTRY2.MAN … # BASEPATH/HLPFILES/SECTION1/ENTRY1.HLP # /ENTRY2.HLP # SECTION2/ENTRY1.HLP # /ENTRY2.HLP … # # and the output structure is # OUTPUTDIR/index.docuwiki # OUTPUTDIR/MAN/SECTION1.docuwiki # SECTION2.docuwiki … # OUTPUTDIR/HLP/SECTION1.docuwiki # SECTION2.docuwiki … ################################## # Changelog # 20250418 - Implemented MAN page parsing # 20250419 - Implemented HLP page parsing # 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers # 20250422 - Tidy up, create an index of commands and create links to sections # 20250425 - Tidy up, Create SEE ALSO links within manpages # 20250426 - Refactor SEE ALSO links ##################################

# Globals DATE=$(date +“%Y%m%d-%H%M%S”) MAN=“$BASEPATH/XRouter Manual Files” HLP=“$BASEPATH/XRouter Help Files” OUTPUTDIR=“$BASEPATH/docuwiki-$DATE” INDEXFILE=“${OUTPUTDIR}”/index.docuwiki

# Wiki MANPAGE namespace structure. We pass this into awk later but define it here

# Handy functions echoRed () {

echo -e "\e[1;31m----$1----\e[0m"

} echoGreen () {

echo -e "\e[1;32m----$1----\e[0m"

} checkRoot () {

if [[ $UID -eq 0 ]]; then
	echoRed "Don't run this as root please"
	exit
fi

}

awkFormatIndexMANTitle=' {

gsub(/[^0-9]/, "")				# Strip out any character which is NaN
print "[["ns"MAN"$0"#"ti"|"ti"]]"" | "		# and ouptut a namespace link to the MAN entry

} '

awkFormatIndexHLPTitle=' {

gsub(" ", "")					# Strip out any whitespace
print "[["ns$0"#"ti"|"ti"]]"" | "		# and output a namespace link to the HLP entry

} '

awkParseMan=' {

if (NR>=(recs - 1) && flag==0) {print "</code>" ; exit} # If we have EOF and didn't have a see also line write code close and exit
if (NR>=(recs - 1) && flag==1) {print "" ; exit} # If we are EOF and have a see also line write nothing and exit
if (NR==1 || NR==2)				# For the first two lines
{
	gsub("\r", "")				# remove all carriage return chars
	if (/^;/ || NF==0) {next}		# skip the subsequent print function for comment or empty lines
	print "<code>" $0			# and output the line prepended with a code block start
}
if (NR>=3 && NR<recs)				# For the other items leading up toEOF
{
	FS="[()]"				# Field separator so we can generate section & title
	if (/^SEE ALSO/)			# Only for the SEE ONLY line
	{
		lno=NR				# Set the counter to current record number
		flag=1				# Set the this file has a see also section flag
		print "</code> **SEE ALSO:** \\\\" # Output a reformatted see also line
	}
	else if (NR>lno && NR<(recs -1) && NF>0) # If we're in see also and are not EOF and have non-empty lines
	{
		line=$0				# Save the inital line to print later
		gsub(" ", "", $1)		# Strip the whitespace out of field #1 (i.e. CONFIG.SYS)
		print "[["ns"MAN" $2"#"$1"|"line"]]"" \\\\"	#and ouput the line as a docuWiki formatted link
		lno++
	}
	else if (/^[A-Z]/) 				# If the line begins with a character
	{
		starthead="</code> **"		# end previous code block
		endhead="** <code>"		# set bold and start code block
		gsub("\r", "")			# remove all carriage return chars
		print starthead $0 endhead 	# and output the line
	}
	else 					# else for all other lines
	{
		if (/^;/ || /^ ;/) {next}	# skip comment lines
		gsub("\r", "")			# remove all carriage return chars
		print $0			# and output the line
	}
}

} '

awkParseHlp=' {

endhead="<code>"
gsub("\r", "")					# remove all carriage return chars
if (NR==1) {print endhead}			# start code block on first line
if (/^;/ || NF==0) {next}			# skip comment / empty lines
print $0					# output the refined line

} '

parseFiles () {

mkdir "${OUTPUTDIR}/$1"
echo "===== $1 Files =====" >> "$INDEXFILE"
# Traverse folders, skipping files in base directory
for folder in "${!1}"/*/
do
	# Get the penultimate field in file path, i.e. the section (folder) name
	local section=$(echo $folder | awk  -F/ '{print $(NF-1)}')
	# Use the section name to generate a file foreach section
	local outputpath="${OUTPUTDIR}"/"$1"/"${section}".docuwiki
	# and generate a numerical section number from the path
	local sectionnumber=$(echo $folder | awk '{gsub(/[^0-9]/, ""); print}')
	# Create formatted section for wiki namespace
	echo "==== $section ====" >> "$INDEXFILE"
	# Format the section name as a docuWiki header
	echo "$section" | awk '{hdr="======"; print hdr $0 hdr}' >> "$outputpath"
	# Spit some stuff out to the shell
	echoRed "$section"
	# Traverse through files
	for file in "$folder"*
	do
		case "$1" in
		# For MAN files, after awk has done it's job we need to remove the last two lines; this last line breaks
		# the following <code> statement and is just an EOF message, the penultimate line is blank so we don't lose anything.
		MAN)
			# Section 8 has links to actual real filenames which we want to keep
			if [ $sectionnumber -eq 8 ]; then
				# Get file name from last filed file path        | and strip the last file extension but keep the penultimate one.
				local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1 "." $2 }')
			# but all other sections have .MAN or .HLP extensions which we want to remove
			else
				# Get file name from last filed file path        | and strip file extension
				local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1}')
			fi
			# Begin by writing a docuwiki header containing file name
			echo "===== $title =====" >> "$outputpath"
			recs=$(wc -l "$file" | awk '{print $1}')
			awk -v flag=0 -v recs="$recs" -v lno=99999 -v ns="$NAMESPACE" "$awkParseMan" "$file" >> "$outputpath"
			# Add a line break after each MAN entry
			echo -e "\n\n----\n\n" >> "$outputpath"
			# and add an entry to the index
			echo "$section" | awk -v ns="$NAMESPACE" -v ti="$title" "$awkFormatIndexMANTitle" >> "$INDEXFILE"
		;;
		# For HLP files we don't want to remove the last line because that truly is real content
		HLP)
			local title=$(echo $file | awk -F/ '{print $NF}' | awk -F. '{print $1}')
			echo "==== $title ====" >> "$outputpath"
			awk "$awkParseHlp" "$file" >> "$outputpath"
			echo "</code>" >> "$outputpath"
			echo "$section" | awk -v ns="$NAMESPACE" -v ti="$title" "$awkFormatIndexHLPTitle" >> "$INDEXFILE"
		;;
		DOC)	echoRed "No code to parse docs yet"
		;;
		esac
	done
done

}

#Let's go! checkRoot mkdir “$OUTPUTDIR” echo “======= XRouter Documentation =======” » “$INDEXFILE” echo “The content below is auto-generated from the XRouter documentation using this bash / awk script to parse MAN / HLP files into docuWiki some simple markup language” » “$INDEXFILE” echoGreen “Parsing MAN files from $MANFILES” parseFiles MAN echoGreen “Parsing HLP files from $HLPFILES” parseFiles HLP #echoGreen “Parsing DOC files from $DOCFILES” #parseFiles DOC

packet/xrouter/docs/parsing.1745693773.txt.gz · Last modified: by m0mzf