Edinburgh Speech Tools  2.1-release
hash_regression.cc
Go to the documentation of this file.
1 
2  /************************************************************************/
3  /* */
4  /* Centre for Speech Technology Research */
5  /* University of Edinburgh, UK */
6  /* Copyright (c) 1996,1997 */
7  /* All Rights Reserved. */
8  /* */
9  /* Permission is hereby granted, free of charge, to use and distribute */
10  /* this software and its documentation without restriction, including */
11  /* without limitation the rights to use, copy, modify, merge, publish, */
12  /* distribute, sublicense, and/or sell copies of this work, and to */
13  /* permit persons to whom this work is furnished to do so, subject to */
14  /* the following conditions: */
15  /* 1. The code must retain the above copyright notice, this list of */
16  /* conditions and the following disclaimer. */
17  /* 2. Any modifications must be clearly marked as such. */
18  /* 3. Original authors' names are not deleted. */
19  /* 4. The authors' names are not used to endorse or promote products */
20  /* derived from this software without specific prior written */
21  /* permission. */
22  /* */
23  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
24  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
25  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
26  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
27  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
28  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
29  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
30  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
31  /* THIS SOFTWARE. */
32  /* */
33  /************************************************************************/
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* Date: Wed Apr 9 1997 */
36  /************************************************************************/
37  /* */
38  /* Simple inverted index as a test of the hash type. */
39  /* */
40  /************************************************************************/
41 
42 #include <iostream>
43 #include <fstream>
44 #include "EST_String.h"
45 #include "EST_Token.h"
46 #include "EST_THash.h"
47 
48 using namespace std;
49 
50 #define LINE_LENGTH 1000
51 
52 EST_Regex RX_Word("[A-Z]?[a-z]+\\('[a-z]+\\)?");
53 
54 #define WORD "Latitude"
55 
56 int
57 main(int argc, const char *argv[])
58 {
59  EST_TStringHash<int> places(10);
60  int line_no = 1;
61  EST_TokenStream file;
62 
63  if (argc != 2)
64  return 1;
65 
66  if (file.open(argv[1]) != 0) {
67  return -1;
68  }
69  file.set_WhiteSpaceChars("");
70  file.set_SingleCharSymbols("\n");
71  file.set_PunctuationSymbols("");
73 
74 while(! file.eof())
75  {
76  EST_String line;
77 
78  line = (EST_String)file.get();
79 
80  if (file.eof())
81  break;
82 
83  if (line == "\n")
84  line_no++;
85 
86  ssize_t p=0;
87  size_t len;
88 
89  while((size_t)(p = line.search(RX_Word, len, p)) != EST_STRING_ERR_IDX)
90  {
91  EST_String word(line.at(p, len));
92 
93  places.add_item(word, line_no);
94  p += len;
95  }
96  }
97 
98 cout << WORD " is on line " << places.val(WORD) << "\n";
99 
100 places.dump(cout);
101 
102 return 0;
103 }
104 
void set_WhiteSpaceChars(const EST_String &ws)
set which characters are to be treated as whitespace
Definition: EST_Token.h:341
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition: EST_Token.h:344
#define WORD
A specialised hash table for when the key is an EST_String.
Definition: EST_THash.h:304
int ssize_t
void set_PrePunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition: EST_Token.h:350
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213
void set_PunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition: EST_Token.h:347
size_t search(const char *s, size_t len, size_t &mlen, ssize_t pos=0) const
Find a substring.
Definition: EST_String.h:324
int eof()
end of file
Definition: EST_Token.h:362
int main(int argc, const char *argv[])
V & val(const EST_String &key, int &found) const
#define EST_STRING_ERR_IDX
Definition: EST_String.h:116
int add_item(const EST_String &key, const V &value, int no_search=0)
Add an entry to the table.
EST_Regex RX_Word("[A-Z]?[a-z]+\\('[a-z]+\\)?")
void dump(ostream &stream, int all=0)
Print the table to stream in a human readable format.
EST_String
EST_String at(int from, int len=0) const
Return part at position.
Definition: EST_String.h:292