Differences

This shows you the differences between two versions of the page.

--- packet:xrouter:manpages:parsing [2025/04/19 19:31] – m0mzf
+++ packet:xrouter:manpages:parsing [2025/04/22 02:40] (current) – removed m0mzf
@@ Line 1: / Line 1: @@
-====== Script to parse Xrouter's MAN and HLP files =======
-<file awk parse-pzt-manhlp.sh>
-#!/bin/bash
-##################################
-# by Jason M0MZF (not a programmer!)
-# bash / awk / hammer / nail etc.
-# License - MIT. Crack on people.
-#
-# Script to parse Paula G8PZT's Xrouter MAN and HLP files into DocuWiki
-# "some simple markup language" (SSML). DocuWiki ML syntax is here:
-# https://www.dokuwiki.org/wiki:syntax
-#
-# The intention is to parse all MAN / HLP files within the folders and
-# write them with appropriate formatting to files which can then be
-# copypasta'd directly into the wiki.
-#
-# This could also be done with groff > HTML > pandoc > ssml but pandoc's
-# output format for SSML doesn't sort out proper ====headers==== and I
-# don't know Lua. Yet. Maybe something like this with a custom output formatter:
-# cat ${manpage} | groff -Thtml -P -l -mmandoc 2>/dev/null | pandoc -f html -t dokuwiki -o "$manpage".txt
-# But when all you've got is awk, everything looks like a record / field... ;)
-#
-##################################
-# Instructions (destructions?)
-#
-# - This script does not take any arguments
-# - The only required configuration is to set the following path
-#
-BASEPATH=/home/jason/radio/packet/xrouter/Xrouter
-#
-# This folder should contain the two directories "XRouter Help Files"
-# and "XRouter Manual Files". A new directory will be created here
-# called "docuwiki-date" and contains two directories for the
-# concatenated and reformatted files. A manually-created
-# index page exists in https://wiki.oarc.uk/packet:xrouter:manpages with
-# top-level contents, and the pages linked therein have their contents
-# copypasta'd from this script's output.
-#
-##################################
-# Changelog
-# 20250418 - Implemented MAN page parsing
-# 20250419 - Implemented HLP page parsing
-# 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers
-##################################
-# Globals
-DATE=$(date +"%Y%m%d-%H%M%S")
-MANFILES="$BASEPATH/XRouter Manual Files"
-HLPFILES="$BASEPATH/XRouter Help Files"
-OUTPUTDIR="$BASEPATH/docuwiki-$DATE"
-# Colourise output
-echoRed () {
-	echo -e "\e[1;31m----$1----\e[0m"
-}
-echoGreen () {
-	echo -e "\e[1;32m----$1----\e[0m"
-}
-checkRoot () {
-	if [[ $UID -eq 0 ]];
-	then
-		echoError "This script must NOT be run as root!"
-		exit 1
-	fi
-}
-awkParseMan='
-{
-	if (NR==1 || NR==2)				# For the first two lines
-	{
-		gsub("\r", "")				# remove all carriage return chars
-		if (/^;/ || NF==0) {next}		# skip comment or empty line
-		print "<code>" $0			# and start a ssml code block
-	}
-	if (NR>=3)					# For the other lines
-	{
-		if (/^[A-Z]/) 				# If the line begins with a character
-		{
-			starthead="</code> **"		# end previous code block
-			endhead="** <code>"		# set bold and start code block
-			gsub("\r", "")			# remove all carriage return chars
-			print starthead $0 endhead 	# and output the line
-		}
-		else 					# else for all other lines
-		{
-			if (/^;/) {next}		# skip comment lines
-			gsub("\r", "")			# remove all carriage return chars
-			print $0			# and output the line
-		}
-	}
-}
-'
-awkParseHlp='
-{
-	endhead="<code>"
-	gsub("\r", "")					# remove all carriage return chars
-	if (NR==1) {print endhead}			# start code block on first line
-	if (/^;/ || NF==0) {next}			# skip comment / empty lines
-	print $0
-}
-'
-awkSectionHeader='
-	BEGIN { FS="/" }				#Set field separator
-{
-	header="======="				# create header formatting
-	print header $(NF-1) header			# we need the penultimate field as that
-}							# is the folder name /it/looks/like/this/
-'
-awkFileHeader='
-	BEGIN { FS="." }				# Set field separator
-{							# to separate out the .MAN .HLP
-	header="====="					# create header formatting
-	print header $1 header				# output first field with headers
-}
-'
-parseFiles () {
-	mkdir -p "${OUTPUTDIR}/$1"
-	# Traverse folders, skipping files in base directory
-	for folder in "${!1}"/*/
-	do
-		# Get the penultimate field in file path, i.e. the section (folder) name
-		section=$(echo $folder | awk  -F/ '{print $(NF-1)}')
-		# Format the section name as a docuWiki header
-		echo "$folder" | awk "$awkSectionHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
-		# Spit some stuff out to the shell
-		echoRed "$section"
-		# Traverse through files
-		for file in "$folder"*
-		do
-			# Get the last field in file path, i.e. file name
-			title=$(echo $file | awk -F/ '{print $NF}')
-			# Format the file name as a docuwiki header
-			echo "$title" | awk "$awkFileHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
-			case "$1" in
-			# For MAN files, after awk has done it's job we need to remove the last line; this last line breaks
-			# the following <code> statement and is just an EOF message, so we don't lose anything.
-			MANFILES)	awk "$awkParseMan" "$file" | head -n -1 >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
-					echo -e "</code>\n----" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
-			;;
-			# For HLP files we don't want to remove the last line because that truly is real content
-			HLPFILES)	awk "$awkParseHlp" "$file" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
-					echo -e "</code>" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
-			;;
-			esac
-		done
-	done
-}
-#Let's go!
-checkRoot
-echoGreen "Parsing MAN files from $MANFILES"
-parseFiles MANFILES
-echoGreen "Parsing HLP files from $HLPFILES"
-parseFiles HLPFILES
-</file>