@@ -110,8 +110,8 @@ for parser in `cat en/parser_location.txt`; do
110110 fi
111111 fi
112112 if [ " ${parser_name} " == " depccg" ]; then
113- depccg_dir= ${parser_dir}
114- if [ ! -d " ${depccg_dir } " ] || [ ! -e " ${depccg_dir} " /src/run.py ]; then
113+ depccg_exists= ` pip freeze | grep depccg `
114+ if [ " ${depccg_exists } " == " " ]; then
115115 echo " depccg parser directory incorrect. Exit."
116116 exit 1
117117 fi
@@ -220,45 +220,14 @@ function parse_easysrl() {
220220 2> ${parsed_dir} /${base_fname} .easysrl.xml.log
221221}
222222
223- function lemmatize() {
224- # apply easyccg's lemmatizer to input file
225- input_file=$1
226- lemmatized=` mktemp -t tmp-XXX`
227- cat $input_file | java -cp ${easyccg_dir} /easyccg.jar \
228- uk.ac.ed.easyccg.lemmatizer.MorphaStemmer \
229- > $lemmatized \
230- 2> /dev/null
231- paste -d " |" $input_file $lemmatized | \
232- awk ' {split($0, res, "|");
233- slen = split(res[1], sent1);split(res[2], sent2);
234- for (i=1; i <= slen; i++) {
235- printf sent1[i] "|" sent2[i]
236- if (i < slen) printf " "
237- }; print ""}'
238- }
239-
240223function parse_depccg() {
241224 # Parse using depccg.
242225 base_fname=$1
243- lemmatize ${plain_dir} /${base_fname} .tok | \
244- ${candc_dir} /bin/pos \
245- --model ${candc_dir} /models/pos \
246- --ifmt " %w|%l \n" \
247- --ofmt " %w|%l|%p \n" \
248- 2> /dev/null | \
249- ${candc_dir} /bin/ner \
250- --model ${candc_dir} /models/ner \
251- --ifmt " %w|%l|%p \n" \
252- --ofmt " %w|%l|%p|%n \n" \
253- 2> /dev/null | \
254- python ${depccg_dir} /src/run.py \
255- ${depccg_dir} /models/tri_headfirst \
256- en \
257- --input-format POSandNERtagged \
258- --format xml \
259- 2> ${parsed_dir} /${base_fname} .depccg.xml.log \
260- > ${parsed_dir} /${base_fname} .depccg.xml
261- python en/candc2transccg.py ${parsed_dir} /${base_fname} .depccg.xml \
226+ cat ${plain_dir} /${base_fname} .tok | \
227+ env CANDC=${candc_dir} depccg_en \
228+ --input-format raw \
229+ --annotator candc \
230+ --format jigg_xml \
262231 > ${parsed_dir} /${base_fname} .depccg.jigg.xml \
263232 2> ${parsed_dir} /${base_fname} .log
264233}
0 commit comments