User Tools

Site Tools


packet:xrouter:manpages:parsing

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
packet:xrouter:manpages:parsing [2025/04/19 19:31] m0mzfpacket:xrouter:manpages:parsing [2025/04/22 02:40] (current) – removed m0mzf
Line 1: Line 1:
-====== Script to parse Xrouter's MAN and HLP files ======= 
-<file awk parse-pzt-manhlp.sh> 
-#!/bin/bash 
-################################## 
-# by Jason M0MZF (not a programmer!) 
-# bash / awk / hammer / nail etc. 
-# License - MIT. Crack on people. 
- 
-# Script to parse Paula G8PZT's Xrouter MAN and HLP files into DocuWiki 
-# "some simple markup language" (SSML). DocuWiki ML syntax is here: 
-# https://www.dokuwiki.org/wiki:syntax 
-# 
-# The intention is to parse all MAN / HLP files within the folders and 
-# write them with appropriate formatting to files which can then be 
-# copypasta'd directly into the wiki. 
-# 
-# This could also be done with groff > HTML > pandoc > ssml but pandoc's 
-# output format for SSML doesn't sort out proper ====headers==== and I  
-# don't know Lua. Yet. Maybe something like this with a custom output formatter: 
-# cat ${manpage} | groff -Thtml -P -l -mmandoc 2>/dev/null | pandoc -f html -t dokuwiki -o "$manpage".txt 
-# But when all you've got is awk, everything looks like a record / field... ;) 
-# 
-################################## 
-# Instructions (destructions?) 
-# 
-# - This script does not take any arguments 
-# - The only required configuration is to set the following path 
-# 
-BASEPATH=/home/jason/radio/packet/xrouter/Xrouter 
-# 
-# This folder should contain the two directories "XRouter Help Files" 
-# and "XRouter Manual Files". A new directory will be created here 
-# called "docuwiki-date" and contains two directories for the  
-# concatenated and reformatted files. A manually-created 
-# index page exists in https://wiki.oarc.uk/packet:xrouter:manpages with 
-# top-level contents, and the pages linked therein have their contents 
-# copypasta'd from this script's output. 
-# 
-################################## 
-# Changelog 
-# 20250418 - Implemented MAN page parsing 
-# 20250419 - Implemented HLP page parsing 
-# 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers 
-################################## 
  
-# Globals 
-DATE=$(date +"%Y%m%d-%H%M%S") 
-MANFILES="$BASEPATH/XRouter Manual Files" 
-HLPFILES="$BASEPATH/XRouter Help Files" 
-OUTPUTDIR="$BASEPATH/docuwiki-$DATE" 
- 
-# Colourise output 
-echoRed () { 
- echo -e "\e[1;31m----$1----\e[0m" 
-} 
-echoGreen () { 
- echo -e "\e[1;32m----$1----\e[0m" 
-} 
- 
-checkRoot () { 
- if [[ $UID -eq 0 ]];  
- then 
- echoError "This script must NOT be run as root!" 
- exit 1 
- fi 
-} 
- 
-awkParseMan=' 
-{ 
- if (NR==1 || NR==2) # For the first two lines 
- { 
- gsub("\r", "") # remove all carriage return chars 
- if (/^;/ || NF==0) {next} # skip comment or empty line 
- print "<code>" $0 # and start a ssml code block 
- } 
- 
- if (NR>=3) # For the other lines 
- { 
- if (/^[A-Z]/) # If the line begins with a character 
- { 
- starthead="</code> **" # end previous code block 
- endhead="** <code>" # set bold and start code block 
- gsub("\r", "") # remove all carriage return chars 
- print starthead $0 endhead # and output the line 
- } 
- else # else for all other lines 
- { 
- if (/^;/) {next} # skip comment lines 
- gsub("\r", "") # remove all carriage return chars 
- print $0 # and output the line 
- } 
- } 
-} 
-' 
- 
-awkParseHlp=' 
-{ 
- endhead="<code>" 
- gsub("\r", "") # remove all carriage return chars 
- if (NR==1) {print endhead} # start code block on first line 
- if (/^;/ || NF==0) {next} # skip comment / empty lines 
- print $0 
-} 
-' 
- 
-awkSectionHeader=' 
- BEGIN { FS="/" } #Set field separator 
-{ 
- header="=======" # create header formatting 
- print header $(NF-1) header # we need the penultimate field as that 
-} # is the folder name /it/looks/like/this/ 
-' 
- 
-awkFileHeader=' 
- BEGIN { FS="." } # Set field separator 
-{ # to separate out the .MAN .HLP 
- header="=====" # create header formatting 
- print header $1 header # output first field with headers 
-} 
-' 
- 
-parseFiles () { 
- mkdir -p "${OUTPUTDIR}/$1" 
- # Traverse folders, skipping files in base directory 
- for folder in "${!1}"/*/ 
- do 
- # Get the penultimate field in file path, i.e. the section (folder) name 
- section=$(echo $folder | awk  -F/ '{print $(NF-1)}') 
- # Format the section name as a docuWiki header 
- echo "$folder" | awk "$awkSectionHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki 
- # Spit some stuff out to the shell 
- echoRed "$section" 
- # Traverse through files 
- for file in "$folder"* 
- do 
- # Get the last field in file path, i.e. file name 
- title=$(echo $file | awk -F/ '{print $NF}') 
- # Format the file name as a docuwiki header 
- echo "$title" | awk "$awkFileHeader" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki 
- case "$1" in 
- # For MAN files, after awk has done it's job we need to remove the last line; this last line breaks 
- # the following <code> statement and is just an EOF message, so we don't lose anything. 
- MANFILES) awk "$awkParseMan" "$file" | head -n -1 >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki 
- echo -e "</code>\n----" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki 
- ;; 
- # For HLP files we don't want to remove the last line because that truly is real content 
- HLPFILES) awk "$awkParseHlp" "$file" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki 
- echo -e "</code>" >> "${OUTPUTDIR}"/"$1"/"${section}".docuwiki 
- ;; 
- esac 
- done 
- done 
-} 
- 
-#Let's go! 
-checkRoot 
-echoGreen "Parsing MAN files from $MANFILES" 
-parseFiles MANFILES 
-echoGreen "Parsing HLP files from $HLPFILES" 
-parseFiles HLPFILES 
- 
-</file> 
packet/xrouter/manpages/parsing.1745091065.txt.gz · Last modified: by m0mzf