Edinburgh Speech Tools  2.1-release
EST_lattice_io.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Simon King */
34 /* Date : November 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* Lattice/Finite State Network i/o functions */
37 /* */
38 /*=======================================================================*/
39 
40 #include <fstream>
41 #include <cstdlib>
42 #include "EST_lattice.h"
43 #include "EST_types.h"
44 #include "EST_Token.h"
45 #include "EST_StringTrie.h"
46 
47 using namespace std;
48 
49 bool save(Lattice &lattice, EST_String filename)
50 {
51  ostream *outf;
52  EST_Litem *n_ptr, *a_ptr;
53  int acount=0,ncount=0;
54  int i,from,to;
55  Lattice::symbol_t *symbol;
56  EST_String word;
57 
58  if (filename == "-")
59  outf = &cout;
60  else
61  outf = new ofstream(filename);
62 
63  if (!(*outf))
64  {
65  cerr << "lattice save: can't open lattice output file \""
66  << filename << "\"" << endl;
67  return false;
68  }
69 
70  // count
71  for (n_ptr = lattice.nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
72  ncount++;
73  for (a_ptr = lattice.nodes(n_ptr)->arcs_out.head();
74  a_ptr != 0; a_ptr = a_ptr->next())
75  acount++;
76  }
77 
78  // size line (duh!)
79  *outf << "# " << "Generated by Edinburgh Speech Tools" << endl << "#" << endl;
80  *outf << "# Header" << endl;
81  *outf << "VERSION=1.1" << endl << "#" << endl;
82  *outf << "# Size line" << endl;
83  *outf << "N=" << ncount << " L=" << acount << endl;
84  *outf << "#" << endl;
85 
86 
87  // to do : for HTK name nodes, not arcs
88  // since all arcs in have same label
89 
90  // nodes
91  for(i=0;i<=1;i++){
92 
93  if(i==0)
94  *outf << "# Nodes" << endl;
95  else
96  *outf << "# Arcs" << endl;
97 
98  acount=0;
99 
100  for (n_ptr = lattice.nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
101 
102  from=lattice.node_index(lattice.nodes(n_ptr));
103 
104  if(i==0){
105  *outf << "I=" << from << endl;
106 
107  }else
108  for (a_ptr = lattice.nodes(n_ptr)->arcs_out.head();
109  a_ptr != 0; a_ptr = a_ptr->next()){
110 
111  to = lattice.node_index(lattice.nodes(n_ptr)->arcs_out(a_ptr)->to);
112 
113  symbol = lattice.alphabet_index_to_symbol(lattice.nodes(n_ptr)->arcs_out(a_ptr)->label);
114 
115  if(lattice.nodes(n_ptr)->arcs_out(a_ptr)->label == lattice.e_move_symbol_index){
116  *outf << "J=" << acount++ << " S=" << from << " E=" << to
117  << " W=!NULL" << endl;
118 
119  }else{
120  *outf << "J=" << acount++ << " S=" << from << " E=" << to
121  << " l=" << lattice.qmap_index_to_value(symbol->qmap_index)
122  << " W=" << lattice.nmap_index_to_name(symbol->nmap_index)
123  << endl;
124  }
125  }
126  }
127  }
128 
129  return true;
130 }
131 
132 bool
133 load(Lattice &lattice,EST_String filename)
134 {
135 
136  EST_String name, next_token, str;
137  EST_TokenStream ts;
138  EST_Token t;
139  int i,j;
140  // temporary storage
141  struct arc_t{
142  int start;
143  int end;
144  float logprob;
145  EST_String word;
146  };
147 
148  arc_t *temp_arcs = NULL;
149  int arcindex=0;
150  int nodeindex=0;
151 
152  // nodes can have labels too - but this is not yet supported
153 
154 
155 
156  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
157  {
158  cerr << "Can't open lattice input file " << filename << endl;
159  return false;
160  }
161 
162  // read file into a arrays, make alphabet, then make lattice
163 
164  // find 'size' line
165 
166  int numnodes=0;
167  int numarcs=0;
168 
169  int narcs=-1;
170  int nnodes=-1;
171  int nodenum=-1;
172  int arcnum=-1;
173  int startnode=-1;
174  int endnode=-1;
175  float logprob = 0.0;
176  EST_String word="";
177 
178  while(!ts.eof())
179  {
180 
181  str = ts.get().string();
182 
183  if(!str.contains("="))
184  continue;
185 
186  EST_String left=str.before("=");
187  EST_String right=str.after("=");
188 
189  if(left == "N")
190  nnodes=atoi(right);
191  else if(left == "L")
192  narcs=atoi(right);
193  else if(left == "I")
194  nodenum=atoi(right);
195  else if(left == "J")
196  arcnum=atoi(right);
197  else if(left == "S")
198  startnode=atoi(right);
199  else if(left == "E")
200  endnode=atoi(right);
201  else if(left == "l")
202  logprob=atof(right);
203  else if(left == "W")
204  word=right;
205 
206 
207  if(ts.eoln()){
208 
209  // do something
210 
211  if( (narcs>0) && (nnodes>0) ){
212  // it's the size line
213  if(temp_arcs != NULL){
214  cerr << "Error in lattice file : 2 size lines found"
215  << " : line " << ts.linenum() << endl;
216  ts.close();
217  return false;
218  }else{
219  numarcs=narcs;
220  numnodes=nnodes;
221  temp_arcs = new arc_t[numarcs];
222  cerr << "size : " << numnodes << " " << numarcs << endl;
223  }
224 
225  }else if(nodenum>=0){
226 
227  if(arcnum>0){
228  cerr << "Error in lattice file at line "
229  << ts.linenum() << endl;
230  ts.close();
231  return false;
232  }
233 
234  if(nodenum>=numnodes){
235  cerr << "Error in lattice file at line "
236  << ts.linenum()
237  << " : node index (" << nodenum << ") out of range"
238  << endl;
239  ts.close();
240  return false;
241  }
242 
243  nodeindex++;
244 
245  }else if(arcnum>=0){
246  if(nodenum>0){
247  cerr << "Error in lattice file at line "
248  << ts.linenum() << endl;
249  ts.close();
250  return false;
251  }
252 
253  if(arcnum>=numarcs){
254  cerr << "Error in lattice file at line "
255  << ts.linenum()
256  << " : arc index (" << arcnum << ") out of range"
257  << endl;
258  ts.close();
259  return false;
260  }
261 
262  if((startnode<0) || (startnode>=numnodes)){
263  cerr << "Error in lattice file at line " << ts.linenum()
264  << endl
265  << " arc starts at out of range node "
266  << startnode << endl;
267  return false;
268  }
269 
270  if((endnode<0) || (endnode>=numnodes)){
271  cerr << "Error in lattice file at line " << ts.linenum()
272  << endl
273  << " arc ends at out of range node "
274  << endnode << endl;
275  return false;
276  }
277 
278  // make arc
279  temp_arcs[arcindex].start = startnode;
280  temp_arcs[arcindex].end = endnode;
281  temp_arcs[arcindex].logprob = logprob;
282  temp_arcs[arcindex].word = word;
283  arcindex++;
284 
285  }
286 
287  narcs=-1;
288  nnodes=-1;
289  nodenum=-1;
290  arcnum=-1;
291  startnode=-1;
292  endnode=-1;
293  logprob=-1;
294  word="";
295 
296  }
297 
298  }
299 
300  if(arcindex != numarcs){
301  cerr << "Error in lattice file at line "
302  << "found " << arcindex << " arcs, but expected "
303  << numarcs << endl;
304  return false;
305  }
306 
307  if(nodeindex != numnodes){
308  cerr << "Error in lattice file at line "
309  << "found " << nodeindex << " nodes, but expected "
310  << numnodes << endl;
311  return false;
312  }
313 
314 
315  // make nmap
316  EST_StringTrie seen_before;
317  EST_StrList list_nmap;
318 
319  for(i=0;i<numarcs;i++){
320 
321  if(seen_before.lookup(temp_arcs[i].word) != (void *)(1)){
322  seen_before.add(temp_arcs[i].word,(void *)(1));
323  list_nmap.append(temp_arcs[i].word);
324  }
325  }
326  qsort(list_nmap);
327 
328  //cerr << "here is the list nmap" << list_nmap << endl;
329 
330  i=0;
331  EST_Litem *l_ptr;
332  bool flag;
333  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
334  i++;
335 
336  // transfer to array
337  lattice.nmap.resize(i);
338  i=0;
339  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
340  lattice.nmap[i++] = list_nmap(l_ptr);
341 
342  list_nmap.clear();
343  cerr << "Built nmap with " << i << " entries" << endl;
344 
345 
346  // make qmap
347  // should be a separate fn
348 
349  EST_TList<float> list_qmap;
350 
351  float error_margin = 1.0e-02; // temporary hack
352 
353  for(i=0;i<numarcs;i++){
354 
355  flag = false;
356  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
357  if(fabs(list_qmap(l_ptr) - temp_arcs[i].logprob) <= error_margin){
358  flag = true;
359  break;
360  }
361 
362  if(!flag)
363  list_qmap.append(temp_arcs[i].logprob);
364 
365  }
366 
367  // special zero (within error_margin) entry, if not already there
368  flag = false;
369  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
370  if(fabs(list_qmap(l_ptr)) <= error_margin){
371  flag = true;
372  break;
373  }
374 
375  if(!flag)
376  list_qmap.append(0);
377 
378  qsort(list_qmap);
379 
380  i=0;
381  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
382  i++;
383 
384  // transfer to array
385  lattice.qmap.resize(i);
386  i=0;
387  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
388  lattice.qmap[i++] = list_qmap(l_ptr);
389 
390  list_qmap.clear();
391  cerr << "Built qmap with " << i << " entries" << endl;
392 
393 
394  // make alphabet
395 
396  // temporary list
397  bool **used; // index nmap,qmap
398  int nl = lattice.nmap.n();
399  int ql = lattice.qmap.n();
400  used = new bool*[nl];
401  for(i=0;i<nl;i++)
402  used[i] = new bool[ql];
403 
404  for(i=0;i<nl;i++)
405  for(j=0;j<ql;j++)
406  used[i][j] = false;
407 
408  // get all combinations of word and log probability actually used
409  for(i=0;i<numarcs;i++){
410 
411  //cerr << "arc " << i << " " << temp_arcs[i].logprob
412  //<< " " << temp_arcs[i].word << endl;
413 
414  used[lattice.nmap_name_to_index(temp_arcs[i].word)][lattice.qmap_value_to_index(temp_arcs[i].logprob)] = true;
415  }
416 
417  int count = 0;
418  for(i=0;i<nl;i++)
419  for(j=0;j<ql;j++)
420  if(used[i][j])
421  count++;
422 
423  lattice.alphabet.resize(count);
424  count=0;
425  Lattice::symbol_t *sym;
426  // ordered this way so already sorted, first by q then by n
427  for(j=0;j<ql;j++)
428  for(i=0;i<nl;i++)
429  if(used[i][j]){
430  sym = new Lattice::symbol_t;
431  sym->nmap_index=i;
432  sym->qmap_index=j;
433  lattice.alphabet[count++] = *sym;
434 
435  }
436 
437  cerr << "Alphabet has " << count << " symbols " << endl;
438 
439  // make lattice itself
440 
441  // nodes
442  for(i=0;i<numnodes;i++){
443  Lattice::Node *new_node;
444  new_node = new Lattice::Node;
445  lattice.nodes.append(new_node);
446 
447  }
448 
449  // arcs
450  EST_Litem *n_ptr;
451  for(j=0;j<numarcs;j++){
452 
453  // find from and to nodes by counting down node list
454 
455  // from node
456 
457  for (n_ptr =lattice. nodes.head(),count=0;
458  count<temp_arcs[j].start;
459  n_ptr = n_ptr->next(),count++){
460 
461  if(n_ptr == NULL){
462  cerr << "Couldn't find 'from' node ";
463  for (i=0;i<lattice.alphabet.length();i++)
464  delete &(lattice.alphabet[i]);
465  delete[] used;
466  return false;
467  }
468  }
469  Lattice::Node *from_node = lattice.nodes(n_ptr);
470 
471  // double check
472  if(from_node == NULL){
473  cerr << "Couldn't find from node, index "
474  << temp_arcs[j].start << endl;
475  return false;
476  }
477 
478 
479  for (n_ptr = lattice.nodes.head(),count=0;
480  count<temp_arcs[j].end;
481  n_ptr = n_ptr->next(),count++){
482 
483  if(n_ptr == NULL){
484  cerr << "Couldn't find 'to' node ";
485  return false;
486  }
487  }
488  Lattice::Node *to_node = lattice.nodes(n_ptr);
489 
490  // double check
491  if(to_node == NULL){
492  cerr << "Couldn't find to node, index "
493  << temp_arcs[j].end << endl;
494  return false;
495  }
496 
497 
498  int word_index = lattice.nmap_name_to_index(temp_arcs[j].word);
499 
500  // get arc symbol
501  int symbol = lattice.alphabet_index_lookup(word_index,
502  lattice.qmap_value_to_index(temp_arcs[j].logprob));
503  if(symbol < 0){
504  cerr << "Couldn't lookup symbol in alphabet !" << endl;
505  return false;
506  }
507 
508  Lattice::Arc *new_arc = new Lattice::Arc;
509  new_arc->label = symbol;
510  new_arc->to = to_node;
511 
512  if(to_node->name.head() == NULL)
513  to_node->name.append(word_index); // only name of first arc in .. !
514 
515  from_node->arcs_out.append(new_arc);
516 
517  }
518 
519  // find final nodes
520  for (n_ptr = lattice.nodes.head(),count=0;
521  n_ptr!= NULL;
522  n_ptr = n_ptr->next()){
523 
524  if(lattice.nodes(n_ptr)->arcs_out.head() == NULL){
525  lattice.final_nodes.append(lattice.nodes(n_ptr));
526  count++;
527  }
528  }
529 
530  cerr << "found " << count << " final nodes" << endl;
531 
532 
533 
534  cerr << "Lattice loaded !" << endl;
535 
536  delete [] used;
537  delete [] temp_arcs;
538 
539  return true;
540 }
541 
symbol_t * alphabet_index_to_symbol(int index)
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499
float end(const EST_Item &item)
Definition: EST_item_aux.cc:96
void qsort(EST_TList< T > &a)
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
EST_String nmap_index_to_name(int index)
int alphabet_index_lookup(int nmap_index, int qmap_index)
STATIC void left(STATUS Change)
Definition: editline.c:523
float qmap_index_to_value(int index)
void add(const EST_String &key, void *item)
Add item indexed by key, overwriting previous contents.
int e_move_symbol_index
Definition: EST_lattice.h:113
EST_TList< Node * > final_nodes
Definition: EST_lattice.h:129
bool save(Lattice &lattice, EST_String filename)
void close(void)
Close stream.
Definition: EST_Token.cc:419
STATIC void right(STATUS Change)
Definition: editline.c:538
EST_TVector< EST_String > nmap
Definition: EST_lattice.h:106
bool load(Lattice &lattice, EST_String filename)
int qmap_value_to_index(float value)
EST_UItem * next()
Definition: EST_UList.h:55
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213
int eof()
end of file
Definition: EST_Token.h:362
void resize(ssize_t n, int set=1)
Definition: EST_TVector.cc:196
EST_FVector qmap
Definition: EST_lattice.h:103
EST_IList name
Definition: EST_lattice.h:83
int linenum(void) const
returns line number of EST_TokenStream
Definition: EST_Token.h:360
EST_TVector< symbol_t > alphabet
Definition: EST_lattice.h:112
int nmap_name_to_index(const EST_String &name)
NULL
Definition: EST_WFST.cc:55
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
float start(const EST_Item &item)
Definition: EST_item_aux.cc:52
EST_TList< Arc * > arcs_out
Definition: EST_lattice.h:84
A string tree index class for indexing arbitrary objects by strings of characters.
EST_UItem * head() const
Definition: EST_UList.h:97
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_String before(int pos, int len=0) const
Part before position.
Definition: EST_String.h:276
INLINE ssize_t n() const
number of items in vector.
Definition: EST_TVector.h:251
EST_TList< Node * > nodes
Definition: EST_lattice.h:125
void * lookup(const EST_String &key) const
Find contents index by key, 0 if there is not contents.
void clear(void)
remove all items in list
Definition: EST_TList.h:244
void resize(int n, int set=1)
resize vector
int eoln()
end of line
Definition: EST_Token.cc:832
int node_index(Node *n)