40 #ifndef __EST_SCFG_H__ 41 #define __EST_SCFG_H__ 63 int find_num_nodes(LISP
string);
64 int set_leaf_indices(LISP
string,
int i,LISP *symbols);
65 int num_leafs(LISP l)
const;
66 void find_valid(
int i,LISP t)
const;
79 int length()
const {
return p_length;}
86 int valid(
int i,
int k)
const {
return valid_spans[i][k]; }
90 {
return (!(
this == &a)); }
92 {
return ((
this == &a)); }
95 { (void)a; s <<
"[a bracketed string]" << std::endl;
return s; }
133 p_daughter1 = 0;p_daughter2 =0;}
136 {p_mother = r.p_mother; p_daughter1 = r.p_daughter1;
137 p_daughter2 = r.p_daughter2; p_type=r.p_type; p_prob = r.p_prob;}
143 double prob()
const {
return p_prob;}
157 void set_rule(
double prob,
int p,
int m);
159 void set_rule(
double prob,
int p,
int q,
int r);
186 int p_distinguished_symbol;
192 void rule_prob_cache();
194 void delete_rule_prob_cache();
207 void set_rules(LISP rules);
230 double prob_B(
int p,
int q,
int r)
const {
return p_prob_B[p][q][r]; }
232 double prob_U(
int p,
int m)
const {
return p_prob_U[p][m]; }
234 void set_rule_prob_cache();
272 double f_I_cal(
int c,
int p,
int i,
int k);
274 double f_I(
int c,
int p,
int i,
int k)
276 if ((r=inside[p][i][k]) != -1)
return r;
277 else return f_I_cal(c,p,i,k); }
279 double f_O_cal(
int c,
int p,
int i,
int k);
281 double f_O(
int c,
int p,
int i,
int k)
283 if ((r=outside[p][i][k]) != -1)
return r;
284 else return f_O_cal(c,p,i,k); }
290 double f_P(
int c,
int p);
292 void reestimate_rule_prob_B(
int c,
int ri,
int p,
int q,
int r);
294 void reestimate_rule_prob_U(
int c,
int ri,
int p,
int m);
296 void reestimate_grammar_probs(
int passes,
302 double cross_entropy();
304 void init_io_cache(
int c,
int nt);
306 void clear_io_cache(
int c);
321 void test_crossbrackets();
341 void train_inout(
int passes,
int valid(int i, int k) const
If a bracketing from i to k is valid in string.
void set_prob(double p)
set the probability
void set_bracketed_string(LISP string)
int operator!=(const EST_bracketed_string &a) const
EST_String nonterminal(int p) const
Convert nonterminal index to string form.
A class used to train (and test) SCFGs is an extension of EST_SCFG.
double prob_B(int p, int q, int r) const
The rule probability of given binary rule.
int terminal(const EST_String &m) const
Convert terminal string to index.
bool save(Lattice &lattice, EST_String filename)
const EST_String & name(const int n) const
The name given the index.
int nonterminal(const EST_String &p) const
Convert nonterminal string to index.
int length(void) const
The number of members in the discrete.
A class representing a stochastic context free grammar (SCFG).
bool load(Lattice &lattice, EST_String filename)
This class represents a bracketed string used in training of SCFGs.
A stochastic context free grammar rule.
int distinguished_symbol() const
int num_nonterminals() const
Number of nonterminals.
EST_SCFG_Rule(const EST_SCFG_Rule &r)
est_scfg_rtype type() const
rule type
const char * get_c_string(LISP x)
EST_String terminal(int m) const
Convert terminal index to string form.
double prob() const
The rule's probability.
int num_terminals() const
Number of terminals.
SCFGRuleList rules
The rules themselves.
A vector class for double precision floating point numbers. EST_DVector x should be used instead of f...
void count_bracket_crossing(const EST_bracketed_string &ref, const EST_bracketed_string &test, EST_SuffStats &vs)
const EST_String symbol_at(int i) const
The nth symbol in the string.
EST_TVector< EST_bracketed_string > EST_Bcorpus
LISP scfg_bracketing_only(LISP parse)
double prob_U(int p, int m) const
The rule probability of given unary rule.
EST_TList< EST_SCFG_Rule > SCFGRuleList
friend ostream & operator<<(ostream &s, const EST_bracketed_string &a)
int operator==(const EST_bracketed_string &a) const