70 LISP EST_SCFG_Chart::print_edge(
int start,
int end,
int p,
117 grammar_local =
TRUE;
134 grammar_local =
FALSE;
135 grammar = &imported_grammar;
140 grammar->set_rules(r);
147 setup_wfst(s->
head(),0,name);
158 for (n_vertices=1,p=s; p != e; p=p->
next())
162 for (n=0,p=s; p != e; p=p->
next(),n++)
164 int term = grammar->terminal(p->
f(name).
string());
167 cerr <<
"SCFG_Chart: unknown terminal symbol \"" <<
168 p->
f(name).
string() <<
"\"" << endl;
175 void EST_SCFG_Chart::delete_edge_table()
179 if (wfst == 0)
return;
181 for (i=0; i < n_vertices; i++)
184 for (j=0; j < n_vertices; j++)
186 for (k=0; k < grammar->num_nonterminals(); k++)
187 if (edges[i][j][k] != emptyedge)
188 delete edges[i][j][k];
189 delete [] edges[i][j];
202 void EST_SCFG_Chart::setup_edge_table()
205 int nt = grammar->num_nonterminals();
211 for (i=0; i < n_vertices; i++)
215 for (j=0; j < n_vertices; j++)
218 for (k=0; k < nt; k++)
224 double EST_SCFG_Chart::find_best_tree_cal(
int start,
int end,
int p)
228 int best_q = -1, best_r = -1;
229 double best_prob = 0;
233 best_prob = grammar->prob_U(p,wfst[
start]->d1());
237 wfst[
start]->d1(),0,-1);
247 int nt = grammar->num_nonterminals();
249 for (q=0; q < nt; q++)
250 for (r=0; r < nt; r++)
252 double pBpqr = grammar->prob_B(p,q,r);
257 left=find_best_tree(
start,j,q);
260 right = find_best_tree(j,end,r);
261 t_prob = pBpqr * left *
right;
262 if (t_prob > best_prob)
287 if (n_vertices - 1 > 0)
288 find_best_tree(0,n_vertices-1,grammar->distinguished_symbol());
297 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
302 return print_edge(0,n_vertices-1,grammar->distinguished_symbol(),top);
313 extract_parse(syn,word->
head(),0,force);
324 for (num_words=0,p=s; p != e; p=p->
next())
327 if (num_words != (n_vertices-1))
329 cerr <<
"SCFG_Chart: extract_parse, number of items in link stream " <<
330 " different from those in parse tree" << endl;
337 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
345 extract_edge(0,n_vertices-1,grammar->distinguished_symbol(),top,
349 extract_forced_parse(0,n_vertices-1,ss,w);
354 void EST_SCFG_Chart::extract_forced_parse(
int start,
int end,
363 s->
set_name(grammar->nonterminal(grammar->distinguished_symbol()));
370 st->
set_name(grammar->nonterminal(grammar->distinguished_symbol()));
373 extract_forced_parse(
start+1,
end,st,nw);
377 void EST_SCFG_Chart::extract_edge(
int start,
int end,
int p,
392 s->
set_name(grammar->nonterminal(p));
393 s->
set(
"prob",(
float)e->
prob());
394 if (*word ==
NULL || **word ==
NULL) {
395 cerr <<
"SCFG_Chart: extract_edge, null word. Should not happen." << endl;
398 *word = (*word)->
next();
416 s->
set_name(grammar->nonterminal(p));
417 s->
set(
"prob",(
float)e->
prob());
429 for (w=sent; w !=
NIL; w=
cdr(w))
529 int fully_contained=0;
531 for (c=0; c < corpus.length(); c++)
533 LISP flat =
siod_flatten(corpus.a_no_check(c).string());
552 cout <<
"cross bracketing " << cb.
mean()*100 <<
" (" << failed <<
553 " failed " << (float)(100.0*fully_contained)/corpus.length() <<
554 "% fully consistent from " << corpus.length()
555 <<
" sentences)" << endl;
567 EST_error(
"bracket_crossing: sentences of different lengths");
570 for (i=0; i < ref.
length(); i++)
571 for (j=i+1; j <= ref.
length(); j++)
572 if (test.
valid(i,j) == 1)
574 if (ref.
valid(i,j) == 0)
int valid(int i, int k) const
If a bracketing from i to k is valid in string.
float end(const EST_Item &item)
int d2()
(Non)terminal of daughter 2
float get_c_float(LISP x)
int pos(void)
Postion, 0 1 or 2, where 0 is empty, 1 is incomplete 2 is complete.
EST_Item * append_daughter(EST_Item *li=0)
void set(const EST_String &name, ssize_t ival)
int d1()
(Non)terminal of daughter 1
STATIC void left(STATUS Change)
double mean(void) const
mean of currently cummulated values
STATIC void right(STATUS Change)
void parse()
Parses the loaded WFST with the loaded grammar.
A class representing a stochastic context free grammar (SCFG).
LISP siod_flatten(LISP tree)
This class represents a bracketed string used in training of SCFGs.
LISP find_parse()
Return the parse in full LISP form.
EST_Item * daughter2(const EST_Item *n)
return second daughter of n
LISP siod_nth(int nth, LISP list)
LISP scfg_bracketing_only(LISP parse)
const EST_Val f(const EST_String &name) const
void extract_parse(EST_Relation *syn, EST_Relation *word, int force=0)
Extract parse tree and add it to syn linking leafs to word.
const char * get_c_string(LISP x)
void set_name(const EST_String &name) const
LISP cons(LISP x, LISP y)
void test_crossbrackets()
section options Options< strong > or ngram_per_line Pseudo words
void EST_SCFG_chart_load_relation(EST_Relation &s, LISP sent)
void count_bracket_crossing(const EST_bracketed_string &ref, const EST_bracketed_string &test, EST_SuffStats &vs)
const EST_String & string(void) const
void setup_wfst(EST_Relation *s, const EST_String &name="name")
float start(const EST_Item &item)
double prob(void)
Edge probability.
LISP rintern(const char *name)
void set_grammar_rules(LISP r)
Initialize from LISP rules set.
A class for parsing with a probabilistic grammars.
EST_Item * append(EST_Item *si)
void reverse(EST_Wave &sig)
void scfg_parse(EST_Relation *Word, const EST_String &name, EST_Relation *Syntax, EST_SCFG &grammar)
An internal class for EST_SCFG_Chart for representing edges in the chart during parsing with SCFGs...
EST_Item * daughter1(const EST_Item *n)
return first daughter of n