packet:xrouter:manpages:parsing
This is an old revision of the document!
G8PZT MAN page parsing script
Does what it says on the tin :)
#!/bin/bash
##################################
# by Jason M0MZF (not a programmer!)
# bash / awk / hammer / nail etc.
# License - MIT. Crack on people.
#
# Script to parse Paula G8PZT's Xrouter MAN and HLP files into DocuWiki
# "some simple markup language" (SSML). DocuWiki ML syntax is here:
# https://www.dokuwiki.org/wiki:syntax
#
# The intention is to parse all MAN / HLP files within the folders and
# write them with appropriate formatting to files which can then be
# copypasta'd directly into the wiki.
#
# This could also be done with groff > HTML > pandoc > ssml but pandoc's
# output format for SSML doesn't sort out proper ====headers==== and I
# don't know Lua. Yet.
#
#
##################################
# Instructions (destructions?)
#
# - This script does not take any arguments
# - The only required configuration is to set the following path
#
BASEPATH=/home/jason/radio/packet/xrouter/Xrouter
#
# This folder should contain the two directories "XRouter Help Files"
# and "XRouter Manual Files". A new directory will be created here
# called "docuwiki-date" and contains two directories for the
# concatenated and reformatted files. A manually-created
# index page exists in https://wiki.oarc.uk/packet:xrouter:manpages with
# top-level contents, and the pages linked therein have their contents
# copypasta'd from this script's output.
#
##################################
# Changelog
# 20250418 - Implemented MAN page parsing
# 20250419 - Implemented HLP page parsing
# 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP
##################################
# Globals
DATE=$(date +"%Y%m%d-%H%M%S")
MANFILES="$BASEPATH/XRouter Manual Files"
HLPFILES="$BASEPATH/XRouter Help Files"
OUTPUTDIR="$BASEPATH/docuwiki-$DATE"
# Colourise output
echoRed () {
echo -e "\e[1;31m----$1----\e[0m"
}
echoGreen () {
echo -e "\e[1;32m----$1----\e[0m"
}
checkRoot () {
if [[ $UID -eq 0 ]];
then
echoError "This script must NOT be run as root!"
exit 1
fi
}
awkParseMan='
{
if (NR==1 || NR==2) # For the first two lines
{
gsub("\r", "") # remove all carriage return chars
if (/^;/ || NF==0) {next} # skip comment or empty line
print "<code>" $0 # and start a ssml code block
}
if (NR>=3) # For the other lines
{
if (/^[A-Z]/) # If the line begins with a character
{
endcode="</code> **" # end previous code block
startcode="** <code>" # set bold and start code block
gsub("\r", "") # remove all carriage return chars
print endcode $0 startcode # and output the line
}
else # else for all other lines
{
if (/^;/) {next} # skip comment lines
gsub("\r", "") # remove all carriage return chars
print $0 # and output the line
}
}
}
'
awkParseHlp='
{
header1="<code>"
gsub("\r", "") # remove all carriage return chars
if (NR==1) {print header1} # start code block on first line
if (/^;/ || NF==0) {next} # skip comment / empty lines
print $0
}
'
awkSectionHeader='
BEGIN { FS="/" } #Set field separator
{
header="=======" # create header formatting
print header $(NF-1) header # we need the penultimate field as that
} # is the folder name /it/looks/like/this/
'
awkFileHeader='
BEGIN { FS="." } # Set field separator
{ # to separate out the .MAN .HLP
header="=====" # create header formatting
print header $1 header # output first field with headers
}
'
parseFiles () {
mkdir -p "${OUTPUTDIR}/$1"
# Traverse folders, skipping files in base directory
for folder in "${!1}"/*/
do
# Get the penultimate field in file path, i.e. the section (folder) name
section=$(echo $folder | awk -F/ '{print $(NF-1)}')
# Prepend the section name as a docuWiki header
echo "$folder" | awk "$awkSectionHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
# Spit some stuff out to the shell
echoRed "$section"
# Traverse through files
for file in "$folder"*
do
# Get the last field in file path, i.e. file name
title=$(echo $file | awk -F/ '{print $NF}')
# Prepend the file name as a docuwiki header
echo "$title" | awk "$awkFileHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
case "$1" in
# For MAN files, after awk has done it's job we need to remove the last line; this last line breaks
# the following <code> statement and is just an EOF message, so we don't lose anything.
MANFILES) awk "$awkParseMan" "$file" | head -n -1 >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
echo -e "</code>\n----" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
;;
# For HLP files we don't want to remove the last line because that truly is real content
HLPFILES) awk "$awkParseHlp" "$file" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
echo -e "</code>" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki
;;
esac
done
done
}
#Let's go!
checkRoot
echoGreen "Parsing MAN files from $MANFILES"
parseFiles MANFILES
echoGreen "Parsing HLP files from $HLPFILES"
parseFiles HLPFILES
packet/xrouter/manpages/parsing.1745087430.txt.gz ยท Last modified: by m0mzf
