Edinburgh Speech Tools  2.1-release
wfst_regex.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : November 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* WFST functions for building from REGEXs */
38 /* */
39 /*=======================================================================*/
40 #include <iostream>
41 #include "EST_cutils.h"
42 #include "EST_WFST.h"
43 
44 using namespace std;
45 
46 void EST_WFST::build_or_transition(int start, int end, LISP disjunctions)
47 {
48  // Choice of either disjunct
49  LISP l;
50  int intermed;
51 
52  if (disjunctions == NIL)
53  cerr << "WFST construct: disjunct is nil\n";
54 
55  for (l=disjunctions; l != NIL; l=cdr(l))
56  {
57  // Can't go directly to end as other transitions could be added there
58  intermed = add_state(wfst_nonfinal);
59  build_wfst(start,intermed,car(l));
60  build_wfst(intermed,end,epsilon_label());
61  }
62 }
63 
64 void EST_WFST::build_and_transition(int start, int end, LISP conjunctions)
65 {
66  // require each conjunct in turn
67  int intermed,lstart;
68  LISP l;
69 
70  if (conjunctions == NIL)
71  cerr << "WFST build: conjunct is nil\n";
72 
73  lstart = start;
74  for (l=conjunctions; cdr(l) != NIL; l=cdr(l))
75  {
76  intermed = add_state(wfst_nonfinal);
77  build_wfst(lstart,intermed,car(l));
78  lstart = intermed;
79  }
80  build_wfst(lstart,end,car(l));
81 
82 }
83 
84 int EST_WFST::terminal(LISP l)
85 {
86  // true, l is a terminal in a regex
87 
88  if (atomp(l))
89  return TRUE;
90  else
91  return FALSE;
92 }
93 
94 int EST_WFST::operator_or(LISP l)
95 {
96  if (l && !consp(l) && (streq("or",get_c_string(l))))
97  return TRUE;
98  else
99  return FALSE;
100 }
101 
102 int EST_WFST::operator_plus(LISP l)
103 {
104  if (l && !consp(l) && (streq("+",get_c_string(l))))
105  return TRUE;
106  else
107  return FALSE;
108 }
109 
110 int EST_WFST::operator_not(LISP l)
111 {
112  if (l && !consp(l) && (streq("not",get_c_string(l))))
113  return TRUE;
114  else
115  return FALSE;
116 }
117 
118 int EST_WFST::operator_star(LISP l)
119 {
120  if (l && !consp(l) && (streq("*",get_c_string(l))))
121  return TRUE;
122  else
123  return FALSE;
124 }
125 
126 int EST_WFST::operator_optional(LISP l)
127 {
128  if (l && !consp(l) && (streq("?",get_c_string(l))))
129  return TRUE;
130  else
131  return FALSE;
132 }
133 
134 int EST_WFST::operator_and(LISP l)
135 {
136  if (l && !consp(l) && (streq("and",get_c_string(l))))
137  return TRUE;
138  else
139  return FALSE;
140 }
141 
142 void EST_WFST::build_wfst(int start, int end,LISP regex)
143 {
144  if (terminal(regex))
145  {
146  // unpack the label
147  int in,out;
148  EST_String s_name(get_c_string(regex));
149  if (s_name.contains("/"))
150  {
151  in = p_in_symbols.name(s_name.before("/"));
152  out = p_out_symbols.name(s_name.after("/"));
153  }
154  else
155  {
156  in = p_in_symbols.name(get_c_string(regex));
157  out = p_out_symbols.name(get_c_string(regex));
158  }
159  if ((in == -1) || (out == -1))
160  cerr << "WFST_build: symbol " << get_c_string(regex) <<
161  " not in alphabet\n";
162  p_states[start]->add_transition(0,end,in,out);
163  }
164  else if (operator_or(car(regex)))
165  build_or_transition(start,end,cdr(regex));
166  else if (operator_plus(car(regex)))
167  {
168  build_wfst(start,end,cdr(regex));
169  build_wfst(end,end,cdr(regex));
170  }
171  else if (operator_star(car(regex)))
172  {
173  build_wfst(start,start,cdr(regex));
174  build_wfst(start,end,epsilon_label());
175  }
176  else if (operator_not(car(regex)))
177  {
178  int errstate = add_state(wfst_error);
179  build_and_transition(start,errstate,cdr(regex));
180  }
181  else if (operator_optional(car(regex)))
182  {
183  build_wfst(start,end,cdr(regex));
184  build_wfst(start,end,epsilon_label());
185  }
186  else if (operator_and(car(regex)))
187  build_and_transition(start,end,cdr(regex));
188  else
189  build_and_transition(start,end,regex); // default is and
190 }
191 
192 void EST_WFST::build_from_regex(LISP inalpha, LISP outalpha, LISP regex)
193 {
194 
195  clear();
196 
197  cout << "building from regex: " << endl;
198  pprint(regex);
199 
200  init(inalpha,outalpha); // alphabets
201  if (regex == NIL)
202  p_start_state = add_state(wfst_final); // empty WFST
203  else
204  {
205  p_start_state = add_state(wfst_nonfinal);
206  int end = add_state(wfst_final);
207  build_wfst(p_start_state,end,regex);
208  }
209 }
210 
float end(const EST_Item &item)
Definition: EST_item_aux.cc:96
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
void build_or_transition(int start, int end, LISP disjunctions)
Basic disjunction constructor.
Definition: wfst_regex.cc:46
#define NIL
Definition: siod_defs.h:92
#define streq(X, Y)
Definition: EST_cutils.h:57
void pprint(LISP exp)
Definition: slib_file.cc:95
const char * get_c_string(LISP x)
Definition: slib.cc:638
void build_from_regex(LISP inalpha, LISP outalpha, LISP regex)
Definition: wfst_regex.cc:192
#define FALSE
Definition: EST_bool.h:119
LISP consp(LISP x)
Definition: slib_list.cc:112
float start(const EST_Item &item)
Definition: EST_item_aux.cc:52
void build_wfst(int start, int end, LISP regex)
Basic regex constructor.
Definition: wfst_regex.cc:142
LISP car(LISP x)
Definition: slib_list.cc:115
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_String before(int pos, int len=0) const
Part before position.
Definition: EST_String.h:276
void build_and_transition(int start, int end, LISP conjunctions)
Basic conjunction constructor.
Definition: wfst_regex.cc:64
#define TRUE
Definition: EST_bool.h:118
LISP atomp(LISP x)
Definition: slib_list.cc:104
LISP cdr(LISP x)
Definition: slib_list.cc:124