Skip to content

Commit 0106e07

Browse files
author
Masashi Yoshikawa
committed
update
1 parent cc38bc7 commit 0106e07

File tree

1 file changed

+7
-38
lines changed

1 file changed

+7
-38
lines changed

en/rte_en_mp_any.sh

Lines changed: 7 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ for parser in `cat en/parser_location.txt`; do
110110
fi
111111
fi
112112
if [ "${parser_name}" == "depccg" ]; then
113-
depccg_dir=${parser_dir}
114-
if [ ! -d "${depccg_dir}" ] || [ ! -e "${depccg_dir}"/src/run.py ]; then
113+
depccg_exists=`pip freeze | grep depccg`
114+
if [ "${depccg_exists}" == "" ]; then
115115
echo "depccg parser directory incorrect. Exit."
116116
exit 1
117117
fi
@@ -220,45 +220,14 @@ function parse_easysrl() {
220220
2> ${parsed_dir}/${base_fname}.easysrl.xml.log
221221
}
222222

223-
function lemmatize() {
224-
# apply easyccg's lemmatizer to input file
225-
input_file=$1
226-
lemmatized=`mktemp -t tmp-XXX`
227-
cat $input_file | java -cp ${easyccg_dir}/easyccg.jar \
228-
uk.ac.ed.easyccg.lemmatizer.MorphaStemmer \
229-
> $lemmatized \
230-
2>/dev/null
231-
paste -d "|" $input_file $lemmatized | \
232-
awk '{split($0, res, "|");
233-
slen = split(res[1], sent1);split(res[2], sent2);
234-
for (i=1; i <= slen; i++) {
235-
printf sent1[i] "|" sent2[i]
236-
if (i < slen) printf " "
237-
}; print ""}'
238-
}
239-
240223
function parse_depccg() {
241224
# Parse using depccg.
242225
base_fname=$1
243-
lemmatize ${plain_dir}/${base_fname}.tok | \
244-
${candc_dir}/bin/pos \
245-
--model ${candc_dir}/models/pos \
246-
--ifmt "%w|%l \n" \
247-
--ofmt "%w|%l|%p \n" \
248-
2> /dev/null | \
249-
${candc_dir}/bin/ner \
250-
--model ${candc_dir}/models/ner \
251-
--ifmt "%w|%l|%p \n" \
252-
--ofmt "%w|%l|%p|%n \n" \
253-
2> /dev/null | \
254-
python ${depccg_dir}/src/run.py \
255-
${depccg_dir}/models/tri_headfirst \
256-
en \
257-
--input-format POSandNERtagged \
258-
--format xml \
259-
2> ${parsed_dir}/${base_fname}.depccg.xml.log \
260-
> ${parsed_dir}/${base_fname}.depccg.xml
261-
python en/candc2transccg.py ${parsed_dir}/${base_fname}.depccg.xml \
226+
cat ${plain_dir}/${base_fname}.tok | \
227+
env CANDC=${candc_dir} depccg_en \
228+
--input-format raw \
229+
--annotator candc \
230+
--format jigg_xml \
262231
> ${parsed_dir}/${base_fname}.depccg.jigg.xml \
263232
2> ${parsed_dir}/${base_fname}.log
264233
}

0 commit comments

Comments
 (0)