Edinburgh Speech Tools  2.1-release
EST_Regex.h
Go to the documentation of this file.
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1997 */
6  /* All Rights Reserved. */
7  /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /************************************************************************/
33 
34 #ifndef __EST_REGEX_H__
35 #define __EST_REGEX_H__
36 
37 class EST_Regex;
38 
39 #include "EST_String.h"
40 
41 /** @class EST_Regex
42  * \ingroup stringclasses
43  * \brief A Regular expression class to go with the CSTR EST_String class.
44  *
45  * The regular expression syntax is the FSF syntax used in emacs and
46  * in the FSF String library. This is translated into the syntax supported
47  * by Henry Spensor's regular expression library, this translation is a place
48  * to look if you find regular expressions not matching where expected.
49  *
50  * @see EST_String
51  * @see string_example
52  * @author Richard Caley <rjc@cstr.ed.ac.uk>
53  * @author (regular expression library by Henry Spencer, University of Toronto)
54  * @version $Id: EST_Regex.h,v 1.3 2004/05/04 00:00:16 awb Exp $
55  */
56 class EST_Regex : protected EST_String {
57 
58 private:
59  /// The compiled form.
60  void *compiled;
61  /// Compiled form for whole string match.
62  void *compiled_match;
63 
64 protected:
65  /// Compile expression.
66  void compile();
67  /// Compile expression in a form which only matches whole string.
68  void compile_match();
69  /// Translate the expression into the internally used syntax.
70  char *regularize(int match) const;
71 
72 public:
73  /// Empty constructor, just for form.
74  EST_Regex(void);
75 
76  /// Construct from EST_String.
78 
79  /// Construct from C string.
80  EST_Regex(const char *ex);
81 
82  /// Copy constructor.
83  EST_Regex(const EST_Regex &ex);
84 
85  /// Destructor.
86  ~EST_Regex();
87 
88  /// Size of the expression.
89  int size() const { return EST_String::size; };
90 
91  /// Run to find a matching substring
92  int run(const char *on, size_t from, size_t &start, size_t &end, size_t *starts=NULL, size_t *ends=NULL);
93  /// Run to see if it matches the entire string.
94  int run_match(const char *on, size_t from=0, size_t *starts=NULL, size_t *ends=NULL);
95 
96  /// Get the expression as a string.
97  EST_String tostring(void) const {return (*this);};
98 
99  /// Cast operator, disambiguates it for some compilers
100  operator const char *() const { return (const char *)tostring(); }
101 
102  int operator == (const EST_Regex ex) const
103  { return (const EST_String)*this == (const EST_String)ex; }
104 
105  int operator != (const EST_Regex ex) const
106  { return (const EST_String)*this != (const EST_String)ex; }
107 
108  /**@name Assignment */
109  ///@{
110  ///
111  EST_Regex &operator = (const EST_Regex ex);
112  ///
113  EST_Regex &operator = (const EST_String s);
114  ///
115  EST_Regex &operator = (const char *s);
116  ///@}
117 
118  /// Stream output of regular expression.
119  friend ostream &operator << (ostream &s, const EST_Regex &str);
120 };
121 
122 ostream &operator << (ostream &s, const EST_Regex &str);
123 
124 /**@name Predefined_regular_expressions
125  * Some regular expressions matching common things are predefined
126  */
127 ///@{
128 /// White space
129 extern EST_Regex RXwhite; // "[ \n\t\r]+"
130 /// Sequence of alphabetic characters.
131 extern EST_Regex RXalpha; // "[A-Za-z]+"
132 /// Sequence of lower case alphabetic characters.
133 extern EST_Regex RXlowercase; // "[a-z]+"
134 /// Sequence of upper case alphabetic characters.
135 extern EST_Regex RXuppercase; // "[A-Z]+"
136 /// Sequence of letters and/or digits.
137 extern EST_Regex RXalphanum; // "[0-9A-Za-z]+"
138 /// Initial letter or underscore followed by letters underscores or digits.
139 extern EST_Regex RXidentifier; // "[A-Za-z_][0-9A-Za-z_]+"
140 /// Integer.
141 extern EST_Regex RXint; // "-?[0-9]+"
142 /// Floating point number.
143 extern EST_Regex RXdouble; // "-?\\(\\([0-9]+\\.[0-9]*\\)\\|\\([0-9]+\\)\\|\\(\\.[0-9]+\\)\\)\\([eE][---+]?[0-9]+\\)?"
144 ///@}
145 
146 // GCC lets us use the static constant to declare arrays, Sun CC
147 // doesn't, so for a quiet, if ugly, life we declare it here with a suitable
148 // value and check in EST_Regex.cc to make sure it`s OK
149 
150 #define EST_Regex_max_subexpressions 10
151 
152 #endif
EST_Regex RXuppercase
Sequence of upper case alphabetic characters.
float end(const EST_Item &item)
Definition: EST_item_aux.cc:96
EST_String(void)
Construct an empty string.
Definition: EST_String.h:201
EST_Regex RXalphanum
Sequence of letters and/or digits.
EST_Regex RXwhite
White space.
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
friend ostream & operator<<(ostream &s, const EST_Regex &str)
Stream output of regular expression.
Definition: EST_Regex.cc:332
int operator==(const EST_Regex ex) const
Definition: EST_Regex.h:102
EST_Regex(void)
Empty constructor, just for form.
Definition: EST_Regex.cc:113
EST_Regex RXlowercase
Sequence of lower case alphabetic characters.
EST_Regex RXidentifier
Initial letter or underscore followed by letters underscores or digits.
void compile_match()
Compile expression in a form which only matches whole string.
Definition: EST_Regex.cc:229
EST_Regex RXalpha
Sequence of alphabetic characters.
EST_Regex & operator=(const EST_Regex ex)
Definition: EST_Regex.cc:305
~EST_Regex()
Destructor.
Definition: EST_Regex.cc:142
int run_match(const char *on, size_t from=0, size_t *starts=NULL, size_t *ends=NULL)
Run to see if it matches the entire string.
Definition: EST_Regex.cc:277
EST_Regex RXint
Integer.
NULL
Definition: EST_WFST.cc:55
char * regularize(int match) const
Translate the expression into the internally used syntax.
Definition: EST_Regex.cc:154
void compile()
Compile expression.
Definition: EST_Regex.cc:215
EST_Regex RXdouble
Floating point number.
float start(const EST_Item &item)
Definition: EST_item_aux.cc:52
int operator!=(const EST_Regex ex) const
Definition: EST_Regex.h:105
const char * str(void) const
Get a const-pointer to the actual memory.
Definition: EST_String.h:235
EST_String tostring(void) const
Get the expression as a string.
Definition: EST_Regex.h:97
int run(const char *on, size_t from, size_t &start, size_t &end, size_t *starts=NULL, size_t *ends=NULL)
Run to find a matching substring.
Definition: EST_Regex.cc:244
int size() const
Size of the expression.
Definition: EST_Regex.h:89