58 #if defined(INSTANTIATE_TEMPLATES) 59 #include "../base_class/EST_TList.cc" 63 #include "../base_class/EST_TVector.cc" 72 int EST_WFST::traverse_tag = 0;
85 p_name = state.p_name;
86 p_type = state.p_type;
96 for (p=transitions.head(); p != 0; p=p->
next())
97 delete transitions(p);
108 transitions.append(s);
122 for (
int i=0; i < p_num_states; ++i)
139 p_in_symbols = wfst.p_in_symbols;
140 p_out_symbols = wfst.p_out_symbols;
141 p_start_state = wfst.p_start_state;
142 current_tag = wfst.current_tag;
143 p_num_states = wfst.p_num_states;
144 p_states.resize(p_num_states);
145 for (
int i=0; i < p_num_states; ++i)
146 p_states[i] =
new EST_WFST_State(*wfst.
state(i));
155 p_states.resize(init_num_states);
156 for (i=0; i < p_states.length(); i++)
158 p_num_states = init_num_states;
169 for (iin=in_alphabet; iin !=
NIL; iin=
cdr(iin))
174 out.
append(
"__epsilon__");
176 for (oout=out_alphabet; oout !=
NIL; oout=
cdr(oout))
181 p_in_symbols.init(in);
182 p_out_symbols.init(out);
189 int in_i = p_in_symbols.name(in);
194 cerr <<
"WFST transduce: \"" << in <<
"\" not in alphabet" << endl;
200 out = p_out_symbols.name(out_i);
207 EST_WFST_State *s = p_states(state);
225 EST_WFST_State *s = p_states(state);
243 return transition(state,inout.
before(
"/"),inout.
after(
"/"));
245 return transition(state,inout,inout);
251 int in_i = p_in_symbols.name(in);
252 int out_i = p_out_symbols.name(out);
254 if ((in_i == -1) || (out_i == -1))
256 cerr <<
"WFST: one of " << in <<
"/" << out <<
" not in alphabet" 261 return transition(state,in_i,out_i);
268 return transition(state,in,out,prob);
274 EST_WFST_State *s = p_states(state);
304 return trans->
state();
312 int num_transitions, type, in, out, next_state;
315 for (i=0; i<p_num_states; i++)
317 num_transitions = p_states[i]->num_transitions();
318 fwrite(&num_transitions,4,1,fd);
327 fwrite(&type,4,1,fd);
328 for (j=p_states[i]->transitions.head(); j != 0; j=j->
next())
330 in = p_states[i]->transitions(j)->in_symbol();
331 out = p_states[i]->transitions(j)->out_symbol();
332 next_state = p_states[i]->transitions(j)->state();
333 weight = p_states[i]->transitions(j)->weight();
345 fwrite(&next_state,4,1,fd);
346 fwrite(&weight,4,1,fd);
358 static EST_Regex needquotes(
".*[()'\";., \t\n\r].*");
363 else if ((ofd = fopen(filename,
"wb")) == NULL)
365 cerr <<
"WFST: cannot write to file \"" << filename <<
"\"" << endl;
369 fprintf(ofd,
"EST_File fst\n");
370 fprintf(ofd,
"DataType %s\n",(
const char *)type);
371 fprintf(ofd,
"in %s\n",
373 p_in_symbols.print_to_string(
TRUE)+
")",
375 fprintf(ofd,
"out %s\n",
377 p_out_symbols.print_to_string(
TRUE)+
")",
379 fprintf(ofd,
"NumStates %d\n",p_num_states);
381 fprintf(ofd,
"EST_Header_End\n");
383 if (type ==
"binary")
387 for (i=0; i < p_num_states; i++)
389 EST_WFST_State *s=p_states[i];
390 fprintf(ofd,
"((%d ",s->
name());
394 fprintf(ofd,
"final ");
397 fprintf(ofd,
"nonfinal ");
400 fprintf(ofd,
"licence ");
403 fprintf(ofd,
"error ");
411 fprintf(ofd,
" (%s ",(
const char *)
quote_string(in,
"\"",
"\\",1));
413 fprintf(ofd,
" (%s ",(
const char *)in);
415 fprintf(ofd,
" %s ",(
const char *)
quote_string(out,
"\"",
"\\",1));
417 fprintf(ofd,
" %s ",(
const char *)out);
418 fprintf(ofd,
"%d %g)\n",
431 static float get_float(FILE *fd,
int swap)
434 if (fread(&f,4,1,fd) != 1)
436 cerr <<
"Could not get float from WFST" << endl;
443 static int get_int(FILE *fd,
int swap)
446 if (fread(&i,4,1,fd) != 1)
448 cerr <<
"Could not get int from WFST" << endl;
465 int num_trans, state_type;
466 int in_sym, out_sym, next_state;
471 for (i=0; i < num_states; i++)
473 num_trans = get_int(fd,swap);
474 state_type = get_int(fd,swap);
486 cerr <<
"WFST load: unknown state type \"" <<
487 state_type <<
"\"" << endl;
494 cerr <<
"WFST load: internal error: unexpected state misalignment" 500 for (j=0; j < num_trans; j++)
502 in_sym = get_int(fd,swap);
509 out_sym = get_int(fd,swap);
510 next_state = get_int(fd,swap);
511 trans_cost = get_float(fd,swap);
513 p_states[i]->add_transition(trans_cost,next_state,in_sym,out_sym);
533 if ((fd=fopen(filename,
"r")) == NULL)
535 cerr <<
"WFST load: unable to open \"" << filename
536 <<
"\" for reading" << endl;
545 cerr <<
"WFST load: not a WFST file \"" << filename <<
"\"" <<endl;
558 init(inalpha,outalpha);
560 int num_states = hinfo.
ival(
"NumStates");
565 if (!hinfo.
present(
"ByteOrder"))
572 r = load_binary(fd,hinfo,num_states,swap);
576 for (i=0; i < num_states; i++)
581 cerr <<
"WFST load: expected description of state " << i <<
582 " but found \"" <<
siod_sprint(sd) <<
"\"" << endl;
594 cerr <<
"WFST load: unknown state type \"" <<
602 cerr <<
"WFST load: internal error: unexpected state misalignment" 620 EST_read_status EST_WFST::load_transitions_from_lisp(
int s, LISP trans)
624 for (t=trans; t !=
NIL; t=
cdr(t))
631 if ((in == -1) || (out == -1))
633 cerr <<
"WFST load: unknown vocabulary in state transition" 638 for (i=0;i<p_states[s]->transitions.length();i++)
639 delete (p_states[s]->transitions).nth(i);
642 p_states[s]->add_transition(w,end,in,out);
652 for (i=0; i < p_num_states; i++)
653 tt += p_states(i)->transitions.
length();
660 void EST_WFST::more_states(
int new_max)
664 p_states.resize(new_max);
665 for (i=p_num_states; i < new_max; i++)
672 EST_WFST_State *s =
new EST_WFST_State(p_num_states);
674 if (p_num_states >= p_states.length())
677 more_states((
int)((
float)(p_states.length()+1)*1.5));
680 p_states[p_num_states] = s;
694 for (i=0; i < p_num_states; i++)
696 EST_WFST_State *s=p_states[i];
709 for (i=0; i < p_num_states; i++)
711 EST_WFST_State *s=p_states[i];
int transduce(const EST_WFST &wfst, const EST_StrList &in, EST_StrList &out)
float end(const EST_Item &item)
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.
float get_c_float(LISP x)
int transduce(int state, int in, int &out) const
Transduce in to out from state.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
an internal class for EST_WFST for representing transitions in an WFST
a call representing a weighted finite-state transducer
void clear()
clear removing existing states if any
int ival(const EST_String &rkey, int m=1) const
A Regular expression class to go with the CSTR EST_String class.
long int get_c_int(LISP x)
EST_String itoString(int n)
Make a EST_String object from an integer.
#define Instantiate_TVector_T(TYPE, TAG)
EST_write_status save_binary(FILE *fd)
void start_cumulate()
Clear and start cumulation.
LISP siod_nth(int nth, LISP list)
int open(const EST_String &filename)
open a EST_TokenStream for a file.
void stop_cumulate()
Stop cumulation and calculate probabilities on transitions.
An error occurred while reading.
const char * get_c_string(LISP x)
EST_String siod_sprint(LISP exp)
an internal class for EST_WFST used to represent a state in a WFST
The file was written successfully.
enum wfst_state_type type() const
LISP read_from_string(const char *)
const EST_WFST_State * state(int i) const
Return internal state information.
EST_WFST_Transition * add_transition(float w, int end, int in, int out)
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
EST_read_status read_est_header(EST_TokenStream &ts, EST_Option &hinfo, bool &ascii, EST_EstFileType &t)
Declare_TList_T(EST_WFST_Transition *, EST_WFST_TransitionP) Declare_TVector_Base_T(EST_WFST_State *
The file exists but is not in the format specified.
EST_read_status load_binary(FILE *fd, EST_Option &hinfo, int num_states, int swap)
void set_type(wfst_state_type t)
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
void append(const T &item)
add item onto end of list
#define Declare_TVector_Base_T(TYPE, DEFAULT, ERROR, TAG)
size_t length(void) const
Length of string ({not} length of underlying chunk)
EST_WFST_Transition * find_transition(int state, int in, int out) const
Find (first) transition given in and out symbols.
int present(const K &rkey) const
Returns true if key is present.
EST_String summary() const
#define Instantiate_TList_T(TYPE, TAG)
EST_String after(int pos, int len=1) const
Part after pos+len.
void init(int init_num_states=10)
Clear with (estimation of number of states required)
EST_String before(int pos, int len=0) const
Part before position.
EST_String quote_string(const EST_String &s, const EST_String "e, const EST_String &escape, int force)
void copy(const EST_WFST &wfst)
Copy from existing wfst.
wfst_translist transitions
EST_read_status load(const EST_String &filename)
?
int num_transitions() const