51 static int align_main(
int argc,
char **argv);
57 int &total,
int &ins,
int &del,
int &
sub,
int &correct);
69 int main(
int argc,
char **argv)
72 align_main(argc,argv);
78 static int align_main(
int argc,
char **argv)
89 "Summary: align an hypothesis with a reference string\n"+
90 "-rfile <ifile> Reference file\n"+
91 "-hfile <ifile> Hypothesis file\n"+
92 "-rstring <string> Reference string\n"+
93 "-hstring <string> Hypothesis string\n"+
95 " Supported formats: strings, nisttool\n",
99 outfile = al.
val(
"-o");
104 format = al.
val(
"-format");
108 if (format ==
"strings")
110 else if (format ==
"nisttool")
113 cout <<
"Unknown or unhandled format: " << format << endl;
121 float ins,
float del,
float sub);
128 int total,ins,del,
sub,correct;
130 load_sentence(u,
"ref",refStr);
131 load_sentence(u,
"hypo",hypStr);
132 align(u,
"ref",
"hypo",
"align");
133 align_score(u,
"ref",
"hypo",
"align",total,ins,del,sub,correct);
134 fprintf(stdout,
"words %d\n",total);
135 fprintf(stdout,
"insertions %d\n",ins);
136 fprintf(stdout,
"deletions %d\n",del);
137 fprintf(stdout,
"substitutions %d\n",sub);
138 fprintf(stdout,
"correct %d\n",correct);
139 fprintf(stdout,
"WER %f\n",(100.0 * (
float)(ins+del+sub))/total);
152 int total,ins,del,
sub,correct;
153 int s_total,s_ins,s_del,s_sub,s_correct;
155 if (rts.
open(reffile) != 0) {
158 if (hts.
open(hypofile) != 0) {
161 s_total=s_ins=s_del=s_sub=s_correct=0;
167 load_sentence(u,
"ref",rts);
168 load_sentence(u,
"hypo",hts);
174 cerr <<
"Align: failed to match sentence " <<
175 sents <<
" at id " << r->
name() << endl;
182 align(u,
"ref",
"hypo",
"align");
189 align_score(u,
"ref",
"hypo",
"align",
190 total,ins,del,sub,correct);
195 s_correct += correct;
202 fprintf(stdout,
"sentences %d\n",sents);
203 fprintf(stdout,
"words %d\n",s_total);
204 fprintf(stdout,
"insertions %d\n",s_ins);
205 fprintf(stdout,
"deletions %d\n",s_del);
206 fprintf(stdout,
"substitutions %d\n",s_sub);
207 fprintf(stdout,
"correct %d\n",s_correct);
208 fprintf(stdout,
"WER %f\n",(100.0 * (
float)(s_ins+s_del+s_sub))/s_total);
222 while ((!ts.
eoln()) && (!ts.
eof()));
234 for (iter.
begin(strlist); iter; ++iter)
244 int &total,
int &ins,
int &del,
int &
sub,
int &correct)
248 total=ins=del=correct=
sub=0;
255 for ( ; (
as(hi,alignrel) == 0) && hi ; hi=hi->
next())
257 fprintf(stdout,
"inserted: %s\n",(
const char *)hi->
name());
262 fprintf(stdout,
"deleted: %s\n",(
const char *)ri->
name());
267 if (name_distance(ri,
daughter1(ri,alignrel)) == 0)
269 fprintf(stdout,
"correct: %s\n",(
const char *)ri->
name());
274 fprintf(stdout,
"substituted: %s\n",(
const char *)ri->
name());
279 for ( ; hi ; hi=hi->
next())
281 fprintf(stdout,
"inserted: %s\n",(
const char *)hi->
name());
296 if ((rname == hname) ||
316 float to_insert,to_del,to_subs;
325 dpt(0,0) = subs_cost * name_distance(ri,hi);
327 for (i=1; i<r_size+1; i++)
329 dpt(i,0) = insdel_cost + dpt(i-1,0);
332 for (j=1; j < h_size+1; j++)
334 dpt(0,j) = insdel_cost + dpt(0,j-1);
339 for (i=1; ri; ri=ri->
next(),i++)
343 for (j=1; hi; hi=hi->
next(),j++)
345 cost = name_distance(ri,hi);
346 to_insert = insdel_cost + dpt(i,j-1);
347 to_del = insdel_cost + dpt(i-1,j);
348 to_subs = (cost * subs_cost) + dpt(i-1,j-1);
349 if (to_insert < to_del)
351 if (to_insert < to_subs)
353 dpt(i,j) = to_insert;
364 if (to_del < to_subs)
386 for (i=r_size,j=h_size,
389 ri; i--,ri=ri->
prev())
391 while (dpp(i,j) == 1)
EST_TokenStream & get(EST_Token &t)
get next token in stream
const EST_String name() const
EST_Item * append_daughter(EST_Item *n, EST_Item *p=0)
EST_Item * as(const EST_Item *n, const char *relname)
EST_Relation * create_relation(const EST_String &relname)
create a new relation called n.
A Regular expression class to go with the CSTR EST_String class.
void close(void)
Close stream.
int open(const EST_String &filename)
open a EST_TokenStream for a file.
int main(int argc, char **argv)
void set_name(const EST_String &name) const
EST_String downcase(const EST_String &s)
EST_FMatrix sub(const EST_FMatrix &a, ssize_t row, ssize_t col)
void StringtoStrList(EST_String s, EST_StrList &l, EST_String sep)
Convert a EST_String to a EST_StrList by separating tokens in s delimited by the separator sep...
void align(EST_Utterance &utt, const EST_String &refrel, const EST_String &hyporel, const EST_String &alignrel)
bool dp_match(const EST_Relation &lexical, const EST_Relation &surface, EST_Relation &match, float ins, float del, float sub)
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
void begin(const Container &over)
Set the iterator ready to run over this container.
int present(const K &rkey) const
Returns true if key is present.
EST_Relation * relation(const char *name, int err_on_not_found=1) const
get relation by name
EST_Item * append(EST_Item *si)
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
EST_Item * daughter1(const EST_Item *n)
return first daughter of n