76 const int max_i,
const int max_j);
77 static void load_vocab(
const EST_String &vfile);
81 static bool show_cost=
FALSE;
82 static int prune_width = 100;
108 int main(
int argc,
char **argv)
121 "dp <options> \"pattern 1\" \"pattern 2\"\n"+
122 "Find the best alignment of a pair of symbol sequences (e.g. word pronuciations).\n"+
123 "-vocab <string> file containing vocabulary\n"+
124 "-place_holder <string> which vocab item is the place holder (default is " + null_sym->
name() +
" )\n"+
125 "-show_cost show cost of matching path\n"+
126 "-o <string> output file\n"+
127 "-p <int> 'beam' width\n"+
129 "-i <float> insertion cost\n"+
130 "-d <float> deletion cost\n"+
131 "-s <float> substitution cost\n"+
133 "-cost_matrix <string> file containing cost matrix\n",
139 load_vocab(al.
val(
"-vocab"));
142 cerr << argv[0] <<
": no vocab file specified" << endl;
147 prune_width = al.
ival(
"-p");
149 if (al.
present(
"-cost_matrix"))
153 cerr <<
"Can't have ins/del/subs costs as well as matrix !" << endl;
156 distance_measure=
"matrix";
157 cost_matrix.
load(al.
val(
"-cost_matrix"));
159 if(al.
present(
"-place_holder"))
164 cerr <<
"The place holder symbol '" << null_sym->
name();
165 cerr <<
"' is not in the vocbulary !" << endl;
171 cerr <<
"Cost matrix number of columns must match vocabulary size !" << endl;
176 cerr <<
"Cost matrix number of rows must match vocabulary size !" << endl;
189 cerr <<
"Must give either ins/del/subs costs or cost matrix !" << endl;
199 cerr <<
"Must give 2 patterns !" << endl;
212 for(p=pattern1_l.
head();p != 0; p=p->
next())
216 cerr <<
pattern1_l(p) <<
" is not in the vocabulary !" << endl;
224 for(p=pattern2_l.
head();p != 0; p=p->
next())
228 cerr <<
pattern2_l(p) <<
" is not in the vocabulary !" << endl;
245 cerr <<
"No match could be found." << endl;
255 static void load_vocab(
const EST_String &vfile)
260 if (ts.
open(vfile) == -1)
262 cerr <<
"can't find vocab file \"" << vfile <<
"\"" << endl;
283 if(distance_measure ==
"simple")
291 else if(s2 == null_sym)
307 const int max_i,
const int max_j)
313 float scale = (float)max_i / (
float)max_j;
315 float near_j = (float)i / scale;
316 float near_i = (float)j * scale;
325 if( (abs((
int)(near_i - (float)i)) > prune_width) ||
326 (abs((
int)(near_j - (
float)j)) > prune_width) )
EST_TokenStream & get(EST_Token &t)
get next token in stream
const EST_String name() const
EST_write_status save(const EST_String &filename, const EST_String &type="est_ascii") const
void StrList_to_StrVector(EST_StrList &l, EST_StrVector &v)
Convert a list of strings to a vector of strings.
EST_FVector DP_insertion_cost
EST_Relation * create_relation(const EST_String &relname)
create a new relation called n.
ssize_t num_columns() const
return number of columns
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
int ival(const EST_String &rkey, int m=1) const
EST_read_status load_TList_of_StrVector(EST_TList< EST_StrVector > &w, const EST_String &filename, const int vec_len)
bool local_prune(const int i, const int j, const int max_i, const int max_j)
void close(void)
Close stream.
ssize_t num_rows() const
return number of rows
float fval(const EST_String &rkey, int m=1) const
int open(const EST_String &filename)
open a EST_TokenStream for a file.
void set_name(const EST_String &name) const
bool dp_match(const EST_Relation &lexical, const EST_Relation &surface, EST_Relation &match, local_cost_function lcf, local_pruning_function lpf, EST_Item *null_sym)
INLINE ssize_t length() const
number of items in vector.
void StringtoStrList(EST_String s, EST_StrList &l, EST_String sep)
Convert a EST_String to a EST_StrList by separating tokens in s delimited by the separator sep...
float local_cost(const EST_Item *s1, const EST_Item *s2)
EST_FVector DP_deletion_cost
float(* local_cost_function)(const EST_Item *item1, const EST_Item *item2)
EST_read_status load(const EST_String &filename)
Load from file (ascii or binary as defined in file)
EST_Token & peek(void)
peek at next token
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
void append(const T &item)
add item onto end of list
int main(int argc, char **argv)
int present(const K &rkey) const
Returns true if key is present.
long int StrVector_index(const EST_StrVector &v, const EST_String &s)
Search the vector and return the position of the first occurance of string s in the vector...
bool(* local_pruning_function)(const int i, const int j, const int max_i, const int max_j)
EST_Item * append(EST_Item *si)
EST_String distance_measure
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
EST_FMatrix DP_substitution_cost