Edinburgh Speech Tools  2.1-release
string_regression.cc
Go to the documentation of this file.
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* Author: Richard Caley */
34 /* Date: May 1997 */
35 /************************************************************************/
36 #include "EST_String.h"
37 #include <iostream>
38 
39 using namespace std;
40 
41 int main()
42 
43 {
44 EST_String line("\n");
45 EST_String zeroth("");
46 EST_String first("hello world");
47 EST_String second("lo w");
48 EST_String third("l");
49 EST_String fourth("the lazy dog.");
50 EST_String fifth("two\nlines");
51 EST_String sixth("-o:F");
52 EST_String seventh("-o");
53 EST_String eighth("some,words-with[punctuation]left..after,a-vowel!");
54 EST_String space(" ");
55 EST_String quoted("\"some tokens\" which are \"quoted with \"\"\"");
56 
57 EST_String bits1[10], bits2[10], bits3[10], bits4[10], bits5[10], bits6[2];
58 
59 EST_String sub1 = first;
60 EST_String sub2 = first;
61 
62 EST_Regex reg0(".*");
63 EST_Regex reg1("l+");
64 EST_Regex reg2("l\\(l+\\|azy\\)");
65 EST_Regex reg3("lll+");
66 EST_Regex reg4(second);
67 EST_Regex reg5(".*l+.*l+.*");
68 EST_Regex reg6(".. ..");
69 EST_Regex reg7("[a-z]\\.[a-z]");
70 EST_Regex reg8("o\\>");
71 EST_Regex reg9("ll\nrr");
72 EST_Regex reg10("o\nl");
73 EST_Regex reg11("\\([^aeiou]\\)\\(\\]\\|[-[.,!?]\\)+");
74 
75 EST_String result0 = zeroth.before(".", -1);
76 EST_String result1 = first.before(second);
77 EST_String result2 = first.before(second,4);
78 EST_String result3 = first.before(third,4);
79 EST_String result4 = first.before(third, -6);
80 EST_String result5 = first.before(third, -7);
81 EST_String result6b = first.before(5);
82 
83 EST_String result1a = first.after(second);
84 EST_String result2a = first.after(second,4);
85 EST_String result3a = first.after(third,4);
86 EST_String result4a = first.after(third, -6);
87 EST_String result5a = first.after(third, -7);
88 EST_String result6a = first.after(5);
89 
90 EST_String result6 = second;
91 EST_String result7 = second;
92 result6 += " sw eet";
93 result7 += third;
94 
95 int test0 = zeroth.contains(reg0);
96 int test1 = first.contains(reg1);
97 int test2 = first.contains(reg2);
98 int test3 = first.contains(reg3);
99 int test4 = first.contains(second);
100 int test5 = fourth.contains(reg2);
101 int test6 = fourth.contains(reg7);
102 int test7 = first.contains(reg8);
103 int test8 = fourth.contains(reg8);
104 int test9 = first.contains(reg9);
105 int test10 = fifth.contains(reg10);
106 int test11 = first.contains(second,3);
107 int test12 = first.contains(second,0);
108 int test13 = second.contains(third, 0);
109 int test14 = sixth.contains(seventh, 0);
110 int test15 = seventh.contains(seventh, 0);
111 
112 int test0m = zeroth.matches(reg0);
113 int test1m = first.matches(reg4);
114 int test2m = second.matches(reg4);
115 int test3m = first.matches(reg5);
116 
117 EST_String result1r = first.before(second);
118 EST_String result2r = first.before(third, -1);
119 EST_String result3r = first.after(third, 5);
120 
121 EST_String result1at = first.at(second);
122 EST_String result2at = first.at(reg6);
123 EST_String result3at = first.at(2,4);
124 
125 EST_String result8 = eighth;
126 result8.gsub(reg11,1);
127 
128 
129 int num1 = split(first, bits1, 10, reg1);
130 int num2 = split(first, bits2, 2, reg1);
131 int num7 = split(first, bits3, 10, space);
132 int num8 = split(quoted, bits4, 10, space, '"');
133 int num9 = split(quoted, bits5, 10, RXwhite, '"');
134 int num10 = split(first, bits6, 2, ".");
135 
136 int num3 = first.freq("o");
137 int num4 = first.freq(third);
138 // numx = first.freq(reg1); // GNU can't do this
139 
140 int num5 = sub1.gsub("l", "[an ell]");
141 int num6 = sub2.gsub(reg1, "[some ells]");
142 
143 cout << "First '"<< first << "'\n";
144 cout << "Second '"<< second << "'\n";
145 cout << "Third '"<< third << "'\n";
146 
147 cout << "Result 0 '"<< result0 << "'\n";
148 
149 cout << "Result 1 '"<< result1 << "'\n";
150 cout << "Result 2 '"<< result2 << "'\n";
151 cout << "Result 3 '"<< result3 << "'\n";
152 cout << "Result 4 '"<< result4 << "'\n";
153 cout << "Result 5 '"<< result5 << "'\n";
154 cout << "Result 6b '"<< result6b << "'\n";
155 
156 cout << "Result 1a '"<< result1a << "'\n";
157 cout << "Result 2a '"<< result2a << "'\n";
158 cout << "Result 3a '"<< result3a << "'\n";
159 cout << "Result 4a '"<< result4a << "'\n";
160 cout << "Result 5a '"<< result5a << "'\n";
161 cout << "Result 6a '"<< result6a << "'\n";
162 
163 cout << "Result 6 '"<< result6 << "'\n";
164 cout << "Result 7 '"<< result7 << "'\n";
165 cout << "Result 8 '"<< result8 << "'\n";
166 
167 cout << "Test 0 '"<< test0 << "'\n";
168 cout << "Test 1 '"<< test1 << "'\n";
169 cout << "Test 2 '"<< test2 << "'\n";
170 cout << "Test 3 '"<< test3 << "'\n";
171 cout << "Test 4 '"<< test4 << "'\n";
172 cout << "Test 5 '"<< test5 << "'\n";
173 cout << "Test 6 '"<< test6 << "'\n";
174 cout << "Test 7 '"<< test7 << "'\n";
175 cout << "Test 8 '"<< test8 << "'\n";
176 cout << "Test 9 '"<< test9 << "'\n";
177 cout << "Test 10 '"<< test10 << "'\n";
178 cout << "Test 11 '"<< test11 << "'\n";
179 cout << "Test 12 '"<< test12 << "'\n";
180 cout << "Test 13 '"<< test13 << "'\n";
181 cout << "Test 14 '"<< test14 << "'\n";
182 cout << "Test 15 '"<< test15 << "'\n";
183 
184 cout << "Test 0m '"<< test0m << "'\n";
185 cout << "Test 1m '"<< test1m << "'\n";
186 cout << "Test 2m '"<< test2m << "'\n";
187 cout << "Test 3m '"<< test3m << "'\n";
188 
189 cout << "Result 1r '"<< result1r << "'\n";
190 cout << "Result 2r '"<< result2r << "'\n";
191 cout << "Result 3r '"<< result3r << "'\n";
192 
193 cout << "Result 1at '"<< result1at << "'\n";
194 cout << "Result 2at '"<< result2at << "'\n";
195 cout << "Result 3at '"<< result3at << "'\n";
196 
197 cout << "Num 1 '"<< num1 << "'\n";
198 cout << "bits1[0] '"<<bits1[0] << "'\n";
199 cout << "bits1[1] '"<<bits1[1] << "'\n";
200 cout << "bits1[2] '"<<bits1[2] << "'\n";
201 
202 cout << "Num 2 '"<< num2 << "'\n";
203 cout << "bits2[0] '"<<bits2[0] << "'\n";
204 cout << "bits2[1] '"<<bits2[1] << "'\n";
205 cout << "bits2[2] '"<<bits2[2] << "'\n";
206 
207 cout << "Num 7 '"<< num7 << "'\n";
208 cout << "bits3[0] '"<<bits3[0] << "'\n";
209 cout << "bits3[1] '"<<bits3[1] << "'\n";
210 cout << "bits3[2] '"<<bits3[2] << "'\n";
211 
212 cout << "Num 8 '"<< num8 << "'\n";
213 cout << "bits4[0] '"<<bits4[0] << "'\n";
214 cout << "bits4[1] '"<<bits4[1] << "'\n";
215 cout << "bits4[2] '"<<bits4[2] << "'\n";
216 cout << "bits4[3] '"<<bits4[3] << "'\n";
217 cout << "bits4[4] '"<<bits4[4] << "'\n";
218 cout << "bits4[5] '"<<bits4[5] << "'\n";
219 
220 cout << "Num 9 '"<< num9 << "'\n";
221 cout << "bits5[0] '"<<bits5[0] << "'\n";
222 cout << "bits5[1] '"<<bits5[1] << "'\n";
223 cout << "bits5[2] '"<<bits5[2] << "'\n";
224 cout << "bits5[3] '"<<bits5[3] << "'\n";
225 cout << "bits5[4] '"<<bits5[4] << "'\n";
226 
227 cout << "Num 10 '"<< num10 << "'\n";
228 cout << "bits6[0] '"<<bits6[0] << "'\n";
229 cout << "bits6[1] '"<<bits6[1] << "'\n";
230 
231 cout << "Num 3 '"<< num3 << "'\n";
232 cout << "Num 4 '"<< num4 << "'\n";
233 
234 cout << "Num 5 '"<< num5 << "'\n";
235 cout << "Sub 1 '"<< sub1 << "'\n";
236 
237 cout << "Num 6 '"<< num6 << "'\n";
238 cout << "Sub 1 '"<< sub2 << "'\n";
239 
240 return (0);
241 }
242 
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
int main()
size_t freq(const char *s) const
Number of occurrences of substring.
Definition: EST_String.cc:985
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
int gsub(const char *os, const EST_String &s)
Substitute one string for another.
Definition: EST_String.h:391
EST_Regex RXwhite("[ \n\t\r]+")
White space.
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
Definition: EST_String.cc:651
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_String before(int pos, int len=0) const
Part before position.
Definition: EST_String.h:276
EST_String at(int from, int len=0) const
Return part at position.
Definition: EST_String.h:292