Edinburgh Speech Tools  2.1-release
EST_SCFG.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : October 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A class for representing Stochastic Context Free Grammars */
38 /* */
39 /*=======================================================================*/
40 #include <iostream>
41 #include "EST_Pathname.h"
42 #include "EST_SCFG.h"
43 
44 using namespace std;
45 
46 EST_SCFG_Rule::EST_SCFG_Rule(double prob,int p, int m)
47 {
48  set_rule(prob,p,m);
49  p_daughter2 = 0;
50 }
51 
52 EST_SCFG_Rule::EST_SCFG_Rule(double prob,int p, int q, int r)
53 {
54  set_rule(prob,p,q,r);
55 }
56 
57 void EST_SCFG_Rule::set_rule(double prob,int p, int m)
58 {
59  p_prob = prob;
60  p_mother = p;
61  p_daughter1 = m;
62  p_type = est_scfg_unary_rule;
63 }
64 
65 void EST_SCFG_Rule::set_rule(double prob,int p, int q, int r)
66 {
67  p_prob = prob;
68  p_mother = p;
69  p_daughter1 = q;
70  p_daughter2 = r;
71  p_type = est_scfg_binary_rule;
72 }
73 
75 {
76  p_prob_B=0;
77  p_prob_U=0;
78  p_distinguished_symbol = 0;
79 }
80 
82 {
83  p_prob_B=0;
84  p_prob_U=0;
85  set_rules(rs);
86 }
87 
89 {
90 
91  delete_rule_prob_cache();
92 
93 }
94 
96 {
97  // Cummulate the nonterminals and terminals
98  LISP r;
99 
100  for (r=rs; r != NIL; r=cdr(r))
101  {
102  LISP p = car(cdr(car(r)));
103  if (!strlist_member(nt,get_c_string(p)))
104  nt.append(get_c_string(p));
105  if (siod_llength(car(r)) == 3) // unary rule
106  {
107  LISP d = car(cdr(cdr(car(r))));
108  if (!strlist_member(t,get_c_string(d)))
109  t.append(get_c_string(d));
110  }
111  else // binary rules
112  {
113  LISP d1 = car(cdr(cdr(car(r))));
114  LISP d2 = car(cdr(cdr(cdr(car(r)))));
115  if (!strlist_member(nt,get_c_string(d1)))
116  nt.append(get_c_string(d1));
117  if (!strlist_member(nt,get_c_string(d2)))
118  nt.append(get_c_string(d2));
119  }
120  }
121 
122 }
123 
124 void EST_SCFG::set_rules(LISP lrules)
125 {
126  // Initialise rule base from Lisp form
127  LISP r;
128  EST_StrList nt_list, term_list;
129 
130  rules.clear();
131  delete_rule_prob_cache();
132 
133  find_terms_nonterms(nt_list,term_list,lrules);
134  nonterminals.init(nt_list);
135  terminals.init(term_list);
136 
137  if (!consp(car(cdr(car(lrules)))))
138  p_distinguished_symbol =
139  nonterminal(get_c_string(car(cdr(car(lrules)))));
140  else
141  cerr << "SCFG: no distinguished symbol" << endl;
142 
143  for (r=lrules; r != NIL; r=cdr(r))
144  {
145  if ((siod_llength(car(r)) < 3) ||
146  (siod_llength(car(r)) > 4) ||
147  (!numberp(car(car(r)))))
148  cerr << "SCFG rule is malformed" << endl;
149 // est_error("SCFG rule is malformed\n");
150  else
151  {
152  EST_SCFG_Rule rule;
153  if (siod_llength(car(r)) == 3)
154  {
155  int m = nonterminal(get_c_string(car(cdr(car(r)))));
156  int d = terminal(get_c_string(car(cdr(cdr(car(r))))));
157  rule.set_rule(get_c_float(car(car(r))),m,d);
158  }
159  else
160  {
161  int p = nonterminal(get_c_string(car(cdr(car(r)))));
162  int d1=nonterminal(get_c_string(car(cdr(cdr(car(r))))));
163  int d2 = nonterminal(get_c_string(car(cdr(cdr(cdr(car(r)))))));
164  rule.set_rule(get_c_float(car(car(r))),p,d1,d2);
165  }
166  rules.append(rule);
167  }
168  }
169 
170  rule_prob_cache();
171 }
172 
174 {
175  // Return LISP form of rules
176  EST_Litem *p;
177  LISP r;
178 
179  for (r=NIL,p=rules.head(); p != 0; p=p->next())
180  {
181  if (rules(p).type() == est_scfg_unary_rule)
182  r = cons(cons(flocons(rules(p).prob()),
183  cons(rintern(nonterminal(rules(p).mother())),
184  cons(rintern(terminal(rules(p).daughter1())),NIL))),
185  r);
186  else if (rules(p).type() == est_scfg_binary_rule)
187  r = cons(cons(flocons(rules(p).prob()),
188  cons(rintern(nonterminal(rules(p).mother())),
189  cons(rintern(nonterminal(rules(p).daughter1())),
190  cons(rintern(nonterminal(rules(p).daughter2())),
191  NIL)))),
192  r);
193  }
194  return reverse(r);
195 }
196 
198 {
199  LISP rs;
200 
201  rs = vload(filename,1);
202 
203  set_rules(rs);
204 
205  return format_ok;
206 }
207 
209 {
210  EST_Pathname outfile(filename);
211  FILE *fd;
212  LISP r;
213 
214  if (outfile == "-")
215  fd = stdout;
216  else
217  {
218  if ((fd=fopen(outfile,"w")) == NULL)
219  {
220  cerr << "scfg_train: failed to open file \"" << outfile <<
221  "\" for writing" << endl;
222  return misc_write_error;
223  }
224  }
225 
226  for (r=get_rules(); r != NIL; r=cdr(r))
227  pprint_to_fd(fd,car(r));
228 
229  if (fd != stdout)
230  fclose(fd);
231 
232  return write_ok;
233 }
234 
235 
236 void EST_SCFG::rule_prob_cache()
237 {
238  // Build access cache for the probabilities of binary rules
239  // This will have to made much more efficient
240  int i,j;
241 
242  p_prob_B = new double**[num_nonterminals()];
243  p_prob_U = new double*[num_nonterminals()];
244  for (i=0; i < num_nonterminals(); i++)
245  {
246  p_prob_B[i] = new double*[num_nonterminals()];
247  p_prob_U[i] = new double[num_terminals()];
248  memset(p_prob_U[i],0,sizeof(double)*num_terminals());
249  for (j=0; j < num_nonterminals(); j++)
250  {
251  p_prob_B[i][j] = new double[num_nonterminals()];
252  memset(p_prob_B[i][j],0,sizeof(double)*num_nonterminals());
253  }
254  }
255 
256  set_rule_prob_cache();
257 
258 }
259 
261 {
262  EST_Litem *pp;
263 
264  for (pp=rules.head(); pp != 0; pp = pp->next())
265  {
266  if (rules(pp).type() == est_scfg_binary_rule)
267  {
268  int p = rules(pp).mother();
269  int q = rules(pp).daughter1();
270  int r = rules(pp).daughter2();
271  p_prob_B[p][q][r] = rules(pp).prob();
272  }
273  else if (rules(pp).type() == est_scfg_unary_rule)
274  {
275  int p = rules(pp).mother();
276  int m = rules(pp).daughter1();
277  p_prob_U[p][m] = rules(pp).prob();
278  }
279  }
280 }
281 
282 void EST_SCFG::delete_rule_prob_cache()
283 {
284  int i,j;
285 
286  if (p_prob_B == 0)
287  return;
288 
289  for (i=0; i < num_nonterminals(); i++)
290  {
291  for (j=0; j < num_nonterminals(); j++)
292  delete [] p_prob_B[i][j];
293  delete [] p_prob_B[i];
294  delete [] p_prob_U[i];
295  }
296  delete [] p_prob_B;
297  delete [] p_prob_U;
298 
299  p_prob_B = 0;
300  p_prob_U = 0;
301 }
302 
303 ostream &operator << (ostream &s, const EST_SCFG_Rule &rule)
304 {
305  (void)rule;
306  return s << "<<EST_SCFG_Rule>>";
307 }
308 
310 #if defined(INSTANTIATE_TEMPLATES)
311 #include "../base_class/EST_TList.cc"
312 #include "../base_class/EST_TSortable.cc"
313 
315 #endif
316 
LISP numberp(LISP x)
Definition: slib_math.cc:15
EST_read_status load(const EST_String &filename)
Load grammar from named file.
Definition: EST_SCFG.cc:197
~EST_SCFG()
Definition: EST_SCFG.cc:88
ostream & operator<<(ostream &s, const EST_SCFG_Rule &rule)
Definition: EST_SCFG.cc:303
void find_terms_nonterms(EST_StrList &nt, EST_StrList &t, LISP rules)
Definition: EST_SCFG.cc:95
void set_rule_prob_cache()
(re-)set rule probability caches
Definition: EST_SCFG.cc:260
float get_c_float(LISP x)
Definition: slib.cc:1858
EST_write_status
#define NIL
Definition: siod_defs.h:92
#define Instantiate_TList(TYPE)
Definition: EST_TListI.h:61
int siod_llength(LISP list)
Definition: siod.cc:202
#define Declare_TList(TYPE)
Definition: EST_TListI.h:85
A stochastic context free grammar rule.
Definition: EST_SCFG.h:123
EST_Item * daughter2(const EST_Item *n)
return second daughter of n
void set_rules(LISP rules)
Set (or reset) rules from external source after construction.
Definition: EST_SCFG.cc:124
EST_write_status save(const EST_String &filename)
Save current grammar to named file.
Definition: EST_SCFG.cc:208
EST_UItem * next()
Definition: EST_UList.h:55
LISP vload(const char *fname, long cflag)
Definition: slib_file.cc:632
void set_rule(double prob, int p, int m)
Definition: EST_SCFG.cc:57
const char * get_c_string(LISP x)
Definition: slib.cc:638
#define misc_write_error
LISP cons(LISP x, LISP y)
Definition: slib_list.cc:97
The file was written successfully.
EST_String outfile
NULL
Definition: EST_WFST.cc:55
LISP consp(LISP x)
Definition: slib_list.cc:112
LISP get_rules()
Return rules as LISP list.
Definition: EST_SCFG.cc:173
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
void pprint_to_fd(FILE *fd, LISP exp)
Definition: slib_file.cc:74
EST_read_status
LISP rintern(const char *name)
Definition: slib.cc:734
int strlist_member(const EST_StrList &l, const EST_String &s)
Return true if s is in list l.
#define format_ok
LISP flocons(double x)
Definition: slib.cc:673
LISP car(LISP x)
Definition: slib_list.cc:115
void reverse(EST_Wave &sig)
EST_Item * daughter1(const EST_Item *n)
return first daughter of n
void clear(void)
remove all items in list
Definition: EST_TList.h:244
LISP cdr(LISP x)
Definition: slib_list.cc:124
EST_SCFG()
Definition: EST_SCFG.cc:74