1+ #! /bin/bash
2+
3+ # Get the absolute path to where the script is located
4+ SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
5+
6+ # Define the path to parse, assuming it's always in "/docs/docs/guides" relative to the repository root
7+ GUIDES_DIR=" $( cd " $SCRIPT_DIR /../.." && pwd) /docs/guides"
8+
9+ # Define the output CSV file in the script's directory
10+ OUTPUT_FILE=" $SCRIPT_DIR /urls.csv"
11+
12+ # Define the base URL
13+ BASE_URL=" https://www.linode.com/docs/guides/"
14+
15+ # Initialize the CSV file with a header
16+ echo " URL,Title,Description,Keyword(s),Deprecation Status,Published Date,Updated Date" > " $OUTPUT_FILE "
17+
18+ # Function to extract and trim a given field from a line
19+ extract_field () {
20+ echo " $1 " | sed " s/^$2 :\s*//" | xargs
21+ }
22+
23+ # Function to sanitize the description field
24+ sanitize_description () {
25+ echo " $1 " | sed ' s/"/' \' ' /g; s/' " '" ' /\\' " '" ' /g; s/,/\\,/g; s/:/\\:/g; s/;/\\;/g' | xargs
26+ }
27+
28+ # Function to clean and format the keywords field
29+ format_keywords () {
30+ echo " $1 " | sed ' s/[][]//g' | sed ' s/, */, /g' | xargs
31+ }
32+
33+ # Function to parse fields and build the CSV
34+ parse_directory_recursively () {
35+ local dir=" $1 "
36+
37+ # Find all index.md files recursively in the directory, excluding specified folders
38+ find " $dir " -type d \( -name " _shortguides" -o -name " concentrations" -o -name " audiences" -o -name " linode-writers-formatting-guide" \) -prune -o -type f -name " index.md" -print | while read -r file; do
39+ # Initialize default values
40+ slug=" "
41+ title=" "
42+ description=" "
43+ keywords=" "
44+ deprecated=" false"
45+ published_date=" "
46+ updated_date=" "
47+
48+ # Extract fields from each line
49+ while read -r line; do
50+ case " $line " in
51+ slug:* )
52+ slug=$( extract_field " $line " " slug" )
53+ ;;
54+ title:* )
55+ title=$( extract_field " $line " " title" )
56+ ;;
57+ description:* )
58+ description=$( extract_field " $line " " description" )
59+ description=$( sanitize_description " $description " )
60+ ;;
61+ keywords:* )
62+ keywords=$( extract_field " $line " " keywords" )
63+ keywords=$( format_keywords " $keywords " )
64+ ;;
65+ deprecated:* )
66+ deprecated_value=$( extract_field " $line " " deprecated" )
67+ if [ " $deprecated_value " = " true" ]; then
68+ deprecated=" true"
69+ fi
70+ ;;
71+ published:* )
72+ published_date=$( extract_field " $line " " published" )
73+ ;;
74+ modified:* )
75+ updated_date=$( extract_field " $line " " modified" )
76+ ;;
77+ esac
78+ done < " $file "
79+
80+ # Construct the full URL without spaces, if slug exists
81+ if [ -n " $slug " ]; then
82+ full_url=" ${BASE_URL}${slug} "
83+
84+ # Append the data to the CSV file
85+ echo " \" $full_url \" ,\" $title \" ,\" $description \" ,\" $keywords \" ,\" $deprecated \" ,\" $published_date \" ,\" $updated_date \" " >> " $OUTPUT_FILE "
86+ fi
87+ done
88+ }
89+
90+ # Parse the designated guides directory
91+ parse_directory_recursively " $GUIDES_DIR "
92+
93+ echo " Data has been written to $OUTPUT_FILE "
0 commit comments