11#! /bin/bash
2- set -e
2+ set -ex # Enable verbose debugging
33
44# Get the directory where the script is located
55SCRIPT_DIR=" $( cd " $( dirname " $0 " ) " && pwd) "
@@ -11,102 +11,164 @@ LOCALES_DIR="$SCRIPT_DIR"
1111DOMAIN=" messages"
1212POT_FILE=" $LOCALES_DIR /$DOMAIN .pot" # This needs to be an absolute path
1313
14- # Extract untranslated and fuzzy entries
15- echo " Extracting untranslated and fuzzy entries..."
14+ # AWK script to extract and clean msgid strings, handling multi-line and unescaping
15+ # This script is written to a temporary file to avoid issues with 'read -r -d'
16+ AWK_EXTRACT_MSGID_SCRIPT_PATH=" $LOCALES_DIR /awk_extract_msgid.awk"
17+ cat << 'EOF_AWK_EXTRACT_MSGID ' > "$AWK_EXTRACT_MSGID_SCRIPT_PATH"
18+ BEGIN {
19+ current_msgid_raw = "";
20+ in_msgid_block = 0;
21+ }
22+
23+ /^msgid / {
24+ if (in_msgid_block) {
25+ cleaned_msgid = current_msgid_raw;
26+ sub(/^msgid /, "", cleaned_msgid);
27+ if (length(cleaned_msgid) > 0 && substr(cleaned_msgid, 1, 1) == "\"" && substr(cleaned_msgid, length(cleaned_msgid), 1) == "\"") {
28+ cleaned_msgid = substr(cleaned_msgid, 2, length(cleaned_msgid) - 2);
29+ }
30+ gsub(/\n"/, "\n", cleaned_msgid);
31+ gsub(/\\"/, "\"", cleaned_msgid);
32+ gsub(/\\n/, "\n", cleaned_msgid);
33+ print cleaned_msgid;
34+ }
35+ current_msgid_raw = $0;
36+ in_msgid_block = 1;
37+ next;
38+ }
39+
40+ /^msgstr / {
41+ cleaned_msgid = current_msgid_raw;
42+ sub(/^msgid /, "", cleaned_msgid);
43+ if (length(cleaned_msgid) > 0 && substr(cleaned_msgid, 1, 1) == "\"" && substr(cleaned_msgid, length(cleaned_msgid), 1) == "\"") {
44+ cleaned_msgid = substr(cleaned_msgid, 2, length(cleaned_msgid) - 2);
45+ }
46+ gsub(/\n"/, "\n", cleaned_msgid);
47+ gsub(/\\"/, "\"", cleaned_msgid);
48+ gsub(/\\n/, "\n", cleaned_msgid);
49+ print cleaned_msgid;
50+
51+ current_msgid_raw = "";
52+ in_msgid_block = 0;
53+ next;
54+ }
55+
56+ /^"/ {
57+ if (in_msgid_block) {
58+ current_msgid_raw = current_msgid_raw "\n" $0;
59+ }
60+ next;
61+ }
62+
63+ /^#/ {
64+ next;
65+ }
66+
67+ /^$/ {
68+ if (in_msgid_block) {
69+ cleaned_msgid = current_msgid_raw;
70+ sub(/^msgid /, "", cleaned_msgid);
71+ if (length(cleaned_msgid) > 0 && substr(cleaned_msgid, 1, 1) == "\"" && substr(cleaned_msgid, length(cleaned_msgid), 1) == "\"") {
72+ cleaned_msgid = substr(cleaned_msgid, 2, length(cleaned_msgid) - 2);
73+ }
74+ gsub(/\n"/, "\n", cleaned_msgid);
75+ gsub(/\\"/, "\"", cleaned_msgid);
76+ gsub(/\\n/, "\n", cleaned_msgid);
77+ print cleaned_msgid;
78+ }
79+ current_msgid_raw = "";
80+ in_msgid_block = 0;
81+ next;
82+ }
83+
84+ END {
85+ if (in_msgid_block && current_msgid_raw != "") {
86+ cleaned_msgid = current_msgid_raw;
87+ sub(/^msgid /, "", cleaned_msgid);
88+ if (length(cleaned_msgid) > 0 && substr(cleaned_msgid, 1, 1) == "\"" && substr(cleaned_msgid, length(cleaned_msgid), 1) == "\"") {
89+ cleaned_msgid = substr(cleaned_msgid, 2, length(cleaned_msgid) - 2);
90+ }
91+ gsub(/\n"/, "\n", cleaned_msgid);
92+ gsub(/\\"/, "\"", cleaned_msgid);
93+ gsub(/\\n/, "\n", cleaned_msgid);
94+ print cleaned_msgid;
95+ }
96+ }
97+ EOF_AWK_EXTRACT_MSGID
98+
99+ echo " Extracting untranslated and missing entries..."
16100for po in " $LOCALES_DIR " /* /LC_MESSAGES/$DOMAIN .po; do
17101 [ -f " $po " ] || continue
18102 lang=$( basename " $( dirname " $( dirname " $po " ) " ) " )
19103 tmpfile=" $LOCALES_DIR /untranslated_${lang} .tmp"
104+ tmpfile_pot_msgids=" $LOCALES_DIR /all_pot_msgids.tmp"
105+ tmpfile_po_translated_msgids=" $LOCALES_DIR /po_translated_msgids_${lang} .tmp"
20106
21- # Clear the tmpfile first
107+ # Clear tmp files
22108 : > " $tmpfile "
109+ : > " $tmpfile_pot_msgids "
110+ : > " $tmpfile_po_translated_msgids "
23111
24- # Extract untranslated messages by directly parsing the .po file
25- awk -f - " $po " > " $tmpfile " << 'EOF_AWK '
26- BEGIN {
27- current_msgid = "";
28- current_msgstr = "";
29- is_fuzzy = 0;
30- in_entry = 0; # 0: outside entry, 1: in msgid, 2: in msgstr
31- }
112+ echo " --- Processing language: $lang ---"
32113
33- # Handle comments and fuzzy flag
34- /^#/ {
35- if ($0 ~ /#, fuzzy/) {
36- is_fuzzy = 1;
37- }
38- next;
39- }
114+ # 1. Extract all msgids from the .pot file
115+ echo " Step 1: Extracting all msgids from $POT_FILE ..."
116+ awk -f " $AWK_EXTRACT_MSGID_SCRIPT_PATH " " $POT_FILE " | sort -u > " $tmpfile_pot_msgids "
117+ echo " Step 1 Complete: All msgids from $POT_FILE extracted to $tmpfile_pot_msgids "
40118
41- # Start of a new msgid
119+ # 2. Extract all *translated* msgids from the .po file
120+ echo " Step 2: Extracting translated msgids from $po ..."
121+ grep -P -A 1 ' ^msgid ' " $po " | awk '
122+ BEGIN {
123+ current_msgid_block = "";
124+ in_msgid_section = 0;
125+ }
42126 /^msgid / {
43- # Process the previous entry before starting a new one
44- if (in_entry == 2 && current_msgid != "" && current_msgstr == "" && is_fuzzy == 0) {
45- print current_msgid "|||" current_msgstr;
46- }
47-
48- # Reset for the new entry
49- current_msgid = $0;
50- sub(/^msgid /, "", current_msgid); # Remove "msgid "
51- # Remove leading and trailing quotes from msgid
52- if (current_msgid ~ /^".*"$/) {
53- current_msgid = substr(current_msgid, 2, length(current_msgid) - 2);
54- }
55-
56- current_msgstr = "";
57- is_fuzzy = 0;
58- in_entry = 1; # Now in msgid block
127+ current_msgid_block = $0;
128+ in_msgid_section = 1;
59129 next;
60130 }
61-
62- # Start of a new msgstr
63- /^msgstr / {
64- current_msgstr = $0;
65- sub(/^msgstr /, "", current_msgstr); # Remove "msgstr "
66- # Remove leading and trailing quotes from msgstr
67- if (current_msgstr ~ /^".*"$/) {
68- current_msgstr = substr(current_msgstr, 2, length(current_msgstr) - 2);
131+ /^msgstr "[^"]+"$/ { # msgstr is not empty
132+ if (in_msgid_section) {
133+ print current_msgid_block; # Print the msgid block
69134 }
70- in_entry = 2; # Now in msgstr block
135+ in_msgid_section = 0;
136+ current_msgid_block = "";
71137 next;
72138 }
73-
74- # Continuation lines (quoted strings)
75- /^"/ {
76- line_content = $0;
77- # Remove leading and trailing quotes from continuation lines
78- if (line_content ~ /^".*"$/) {
79- line_content = substr(line_content, 2, length(line_content) - 2);
80- }
81-
82- if (in_entry == 1) { # Appending to msgid
83- current_msgid = current_msgid line_content;
84- } else if (in_entry == 2) { # Appending to msgstr
85- current_msgstr = current_msgstr line_content;
139+ /^"/ { # Continuation lines for msgid
140+ if (in_msgid_section) {
141+ current_msgid_block = current_msgid_block "\n" $0;
86142 }
87143 next;
88144 }
89-
90- # Empty line (marks end of an entry)
91- /^$/ {
92- # Check if the completed entry is untranslated and not fuzzy
93- if (in_entry == 2 && current_msgid != "" && current_msgstr == "" && is_fuzzy == 0) {
94- print current_msgid "|||" current_msgstr;
95- }
96- # Reset for the next entry
97- current_msgid = "";
98- current_msgstr = "";
99- is_fuzzy = 0;
100- in_entry = 0;
145+ /^#/ { next; } # Ignore comments
146+ /^$/ { # Empty line, end of entry
147+ in_msgid_section = 0;
148+ current_msgid_block = "";
101149 next;
102150 }
103-
104- # End of file: process the last entry if it exists
105151 END {
106- if (in_entry == 2 && current_msgid != "" && current_msgstr == "" && is_fuzzy == 0) {
107- print current_msgid "|||" current_msgstr;
152+ # Handle case where last entry is a translated msgid
153+ if (in_msgid_section && current_msgid_block != "") {
154+ # This case is tricky, as we only print if msgstr is non-empty.
155+ # The grep -A 1 should handle this by providing the msgstr line.
156+ # So, no need to print here, as it would have been printed by the /^msgstr/ block.
108157 }
109158 }
110- EOF_AWK
111- echo " Untranslated entries written to: $tmpfile "
159+ ' | awk -f " $AWK_EXTRACT_MSGID_SCRIPT_PATH " - | sort -u > " $tmpfile_po_translated_msgids "
160+ echo " Step 2 Complete: Translated msgids from $po extracted to $tmpfile_po_translated_msgids "
161+
162+ # 3. Compare the two lists to find untranslated/missing entries
163+ echo " Step 3: Comparing msgids to find untranslated and missing entries..."
164+ comm -23 " $tmpfile_pot_msgids " " $tmpfile_po_translated_msgids " > " $tmpfile "
165+ echo " Step 3 Complete: Untranslated and missing entries written to: $tmpfile "
166+
167+ # Clean up temporary files for the current language
168+ echo " Cleaning up temporary files for language $lang ..."
169+ rm " $tmpfile_pot_msgids " " $tmpfile_po_translated_msgids "
112170done
171+
172+ # Clean up the AWK script file after all languages are processed
173+ echo " Cleaning up AWK script file..."
174+ rm " $AWK_EXTRACT_MSGID_SCRIPT_PATH "
0 commit comments