packet:xrouter:manpages:parsing
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
packet:xrouter:manpages:parsing [2025/04/19 19:27] – m0mzf | packet:xrouter:manpages:parsing [2025/04/20 07:28] (current) – m0mzf | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== Script to parse Xrouter' | ====== Script to parse Xrouter' | ||
- | <file awk> | + | <file awk parse-pzt-manhlp.sh> |
#!/bin/bash | #!/bin/bash | ||
################################## | ################################## | ||
Line 13: | Line 13: | ||
# The intention is to parse all MAN / HLP files within the folders and | # The intention is to parse all MAN / HLP files within the folders and | ||
# write them with appropriate formatting to files which can then be | # write them with appropriate formatting to files which can then be | ||
- | # copypasta' | + | # pasted |
# | # | ||
# This could also be done with groff > HTML > pandoc > ssml but pandoc' | # This could also be done with groff > HTML > pandoc > ssml but pandoc' | ||
Line 41: | Line 41: | ||
# 20250418 - Implemented MAN page parsing | # 20250418 - Implemented MAN page parsing | ||
# 20250419 - Implemented HLP page parsing | # 20250419 - Implemented HLP page parsing | ||
- | # 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP | + | # 20250419 - Tidy up, more awk less bash, remove .MAN / .HLP from outputted headers |
################################## | ################################## | ||
Line 50: | Line 50: | ||
OUTPUTDIR=" | OUTPUTDIR=" | ||
- | # Colourise output | + | # Handy functions |
echoRed () { | echoRed () { | ||
echo -e " | echo -e " | ||
Line 57: | Line 57: | ||
echo -e " | echo -e " | ||
} | } | ||
- | |||
checkRoot () { | checkRoot () { | ||
- | if [[ $UID -eq 0 ]]; | + | if [[ $UID -eq 0 ]]; then |
- | then | + | echoRed |
- | echoError | + | |
exit 1 | exit 1 | ||
fi | fi | ||
} | } | ||
- | awkParseMan=' | + | # Use awk to: |
- | { | + | # strip out comment lines and remove any <CR> from < |
+ | # turn the MAN page header into a code block, it contains a revision date | ||
+ | # find every subsequent MAN page header and turn it into a docuwiki header and | ||
+ | # | ||
+ | # | ||
+ | # (the final encapsulation is done using " | ||
+ | awkParseMan=' | ||
if (NR==1 || NR==2) # For the first two lines | if (NR==1 || NR==2) # For the first two lines | ||
{ | { | ||
gsub(" | gsub(" | ||
- | if (/^;/ || NF==0) {next} # skip comment or empty line | + | if (/^;/ || NF==0) {next} # skip the subsequent print function for comment or empty lines |
- | print "< | + | print "< |
} | } | ||
Line 93: | Line 97: | ||
} | } | ||
' | ' | ||
- | + | # Use awk to: | |
- | awkParseHlp=' | + | # strip out comment lines (this is always line 1, sometime 2 and 3) and remove any <CR> from < |
- | { | + | # insert a start code block in place of the now-empty line 1 |
+ | # (the final encapsulation is done using " | ||
+ | awkParseHlp=' | ||
endhead="< | endhead="< | ||
gsub(" | gsub(" | ||
if (NR==1) {print endhead} # | if (NR==1) {print endhead} # | ||
if (/^;/ || NF==0) {next} # skip comment / empty lines | if (/^;/ || NF==0) {next} # skip comment / empty lines | ||
- | print $0 | + | print $0 # output the refined line |
} | } | ||
' | ' | ||
+ | # Use awk to extract a section name from the directory structure | ||
awkSectionHeader=' | awkSectionHeader=' | ||
- | BEGIN { FS="/" | + | BEGIN { FS="/" |
- | { | + | { # / |
header=" | header=" | ||
- | print header $(NF-1) header # | + | print header $(NF-1) header # the penultimate field is the section |
- | } # | + | } |
' | ' | ||
+ | # Use awk to extract a name from the filename.extension | ||
awkFileHeader=' | awkFileHeader=' | ||
- | BEGIN { FS=" | + | BEGIN { FS=" |
- | { # | + | { # |
header=" | header=" | ||
- | print header $1 header # | + | print header $1 header # |
} | } | ||
' | ' | ||
Line 126: | Line 132: | ||
do | do | ||
# Get the penultimate field in file path, i.e. the section (folder) name | # Get the penultimate field in file path, i.e. the section (folder) name | ||
- | section=$(echo $folder | awk -F/ ' | + | local section=$(echo $folder | awk -F/ ' |
# Format the section name as a docuWiki header | # Format the section name as a docuWiki header | ||
echo " | echo " | ||
Line 135: | Line 141: | ||
do | do | ||
# Get the last field in file path, i.e. file name | # Get the last field in file path, i.e. file name | ||
- | title=$(echo $file | awk -F/ ' | + | local title=$(echo $file | awk -F/ ' |
+ | local outputpath=" | ||
# Format the file name as a docuwiki header | # Format the file name as a docuwiki header | ||
- | echo " | + | echo " |
case " | case " | ||
# For MAN files, after awk has done it's job we need to remove the last line; this last line breaks | # For MAN files, after awk has done it's job we need to remove the last line; this last line breaks | ||
# the following < | # the following < | ||
- | MANFILES) awk " | + | MANFILES) awk " |
- | echo -e "</ | + | echo -e "</ |
;; | ;; | ||
# For HLP files we don't want to remove the last line because that truly is real content | # For HLP files we don't want to remove the last line because that truly is real content | ||
- | HLPFILES) awk " | + | HLPFILES) awk " |
- | echo -e "</ | + | echo -e "</ |
;; | ;; | ||
esac | esac |
packet/xrouter/manpages/parsing.1745090838.txt.gz · Last modified: 2025/04/19 19:27 by m0mzf