Edinburgh Speech Tools  2.1-release
EST_Token.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : April 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A Tokenize class, both for Tokens (Strings plus alpha) */
38 /* EST_TokenStream for strings, FILE *, files, pipes etc */
39 /* */
40 /*=======================================================================*/
41 #include <cstdio>
42 #include <iostream>
43 #include "EST_unix.h"
44 #include <cstdlib>
45 #include <climits>
46 #include <cstring>
47 #include "EST_math.h"
48 #include "EST_Token.h"
49 #include "EST_string_aux.h"
50 #include "EST_cutils.h"
51 #include "EST_error.h"
52 
53 using namespace std;
54 
59 const EST_String Token_Origin_FD = "existing file descriptor";
60 const EST_String Token_Origin_Stream = "existing istream";
61 const EST_String Token_Origin_String = "existing string";
62 
63 static EST_Regex RXanywhitespace("[ \t\n\r]");
64 
65 static inline char *check_extend_str_in(char *str, int pos, int *max)
66 {
67  // Check we are not at the end of the string, if so get some more
68  // and copy the old one into the new one
69  char *newstuff;
70 
71  if (pos >= *max)
72  {
73  if (pos > *max)
74  *max = 2 * pos;
75  else
76  *max *= 2;
77  newstuff = new char[*max];
78  strncpy(newstuff,str,pos);
79  delete [] str;
80  return newstuff;
81  }
82  else
83  return str;
84 }
85 
86 #define check_extend_str(STR, POS, MAX) \
87  (((POS)>= *(MAX))?check_extend_str_in((STR),(POS),(MAX)):(STR))
88 
89 ostream& operator<<(ostream& s, const EST_Token &p)
90 {
91  s << "[TOKEN " << p.pname << "]";
92  return s;
93 }
94 
95 
97 {
98  linenum = a.linenum;
99  linepos = a.linepos;
100  p_filepos = a.p_filepos;
101  p_quoted = a.p_quoted;
102  space = a.space;
103  prepunc = a.prepunc;
104  pname = a.pname;
105  punc = a.punc;
106  return *this;
107 }
108 
110 {
111  return "line "+itoString(linenum)+" char "+itoString(linepos);
112 }
113 
115 {
116  pname = a;
117  return *this;
118 }
119 
121 {
122  tok_wspacelen = 64; // will grow if necessary
123  tok_wspace = new char[tok_wspacelen];
124  tok_stufflen = 512; // will grow if necessary
125  tok_stuff = new char[tok_stufflen];
126  tok_prepuncslen = 32; // will grow if necessary
127  tok_prepuncs = new char[tok_prepuncslen];
128 
129  default_values();
130 }
131 
133 {
134  (void)s;
135 
136  cerr << "TokenStream: warning passing TokenStream not as reference"
137  << endl;
138 
139  // You *really* shouldn't use this AT ALL unless you
140  // fully understand its consequences, you'll be copying open
141  // files and moving file pointers all over the place
142  // basically *DON'T* do this, pass the stream by reference
143 
144  // Now there may be occasions when you do want to do this for example
145  // when you need to do far look ahead or check point as you read
146  // but they are obscure and I'm not sure how to do that for all
147  // the file forms supported by the TokenStream. If you do
148  // I can write a clone function that might do it.
149 
150 }
151 
152 void EST_TokenStream::default_values()
153 {
154  type = tst_none;
155  peeked_tokp = FALSE;
156  peeked_charp = FALSE;
157  eof_flag = FALSE;
158  quotes = FALSE;
159  p_filepos = 0;
160  linepos = 1;
161  WhiteSpaceChars = EST_Token_Default_WhiteSpaceChars;
162  SingleCharSymbols = EST_String::Empty;
163  PrePunctuationSymbols = EST_String::Empty;
164  PunctuationSymbols = EST_String::Empty;
165  build_table();
166  close_at_end=TRUE;
167 
168  /* Avoid leaving uninitialized members */
169  fp = 0;
170  is = 0;
171  fd = 0;
172  buffer = 0;
173  buffer_length = 0;
174  pos = 0;
175  peeked_char = 0;
176  quote = 0;
177  escape = 0;
178 }
179 
181 {
182  if (type != tst_none)
183  close();
184  delete [] tok_wspace;
185  delete [] tok_stuff;
186  delete [] tok_prepuncs;
187 
188 }
189 
190 ostream& operator<<(ostream& s, EST_TokenStream &p)
191 {
192  s << "[TOKENSTREAM ";
193  switch (p.type)
194  {
195  case tst_none:
196  cerr << "UNSET"; break;
197  case tst_file:
198  cerr << "FILE"; break;
199  case tst_pipe:
200  cerr << "PIPE"; break;
201  case tst_istream:
202  cerr << "ISTREAM"; break;
203  case tst_string:
204  cerr << "STRING"; break;
205  default:
206  cerr << "UNKNOWN" << endl;
207  }
208  s << "]";
209 
210  return s;
211 }
212 
213 int EST_TokenStream::open(const EST_String &filename)
214 {
215  if (type != tst_none)
216  close();
217  default_values();
218  fp = fopen(filename,"rb");
219  if (fp == NULL)
220  {
221  cerr << "Cannot open file " << filename << " as tokenstream"
222  << endl;
223  return -1;
224  }
225  Origin = filename;
226  type = tst_file;
227 
228  return 0;
229 }
230 
231 int EST_TokenStream::open(FILE *ofp, int close_when_finished)
232 {
233  // absorb already open stream
234  if (type != tst_none)
235  close();
236  default_values();
237  fp = ofp;
238  if (fp == NULL)
239  {
240  cerr << "Cannot absorb NULL filestream as tokenstream" << endl;
241  return -1;
242  }
243  Origin = Token_Origin_FD;
244  type = tst_file;
245 
246  close_at_end = close_when_finished;
247 
248  return 0;
249 }
250 
251 int EST_TokenStream::open(std::istream &newis)
252 {
253  // absorb already open istream
254  if (type != tst_none)
255  close();
256  default_values();
257  is = &newis;
258  Origin = Token_Origin_Stream;
259  type = tst_istream;
260 
261  return 0;
262 }
263 
265 {
266  // Make a tokenstream from an internal existing string/buffer
267  const char *buf;
268  if (type != tst_none)
269  close();
270  default_values();
271  buf = (const char *)newbuffer;
272  buffer_length = newbuffer.length();
273  buffer = new char[buffer_length+1];
274  memmove(buffer,buf,buffer_length+1);
275  pos = 0;
276  Origin = Token_Origin_String;
277  type = tst_string;
278 
279  return 0;
280 }
281 
283 {
284  // This isn't actually useful but people expect it
285  peeked_charp = FALSE;
286  peeked_tokp = FALSE;
287 
288  switch (type)
289  {
290  case tst_none:
291  cerr << "EST_TokenStream unset" << endl;
292  return -1;
293  break;
294  case tst_file:
295  EST_fseek(fp,0,SEEK_END);
296  p_filepos = EST_ftell(fp);
297  return p_filepos;
298  case tst_pipe:
299  cerr << "EST_TokenStream seek on pipe not supported" << endl;
300  return -1;
301  break;
302  case tst_istream:
303  is->seekg(0,is->end);
304  p_filepos = is->tellg();
305  return p_filepos;
306  break;
307  case tst_string:
308  pos = buffer_length;
309  return pos;
310  default:
311  cerr << "EST_TokenStream: unknown type" << endl;
312  return -1;
313  }
314 
315  return -1; // can't get here
316 }
317 
318 int EST_TokenStream::seek(int position)
319 {
320  peeked_charp = FALSE;
321  peeked_tokp = FALSE;
322 
323  switch (type)
324  {
325  case tst_none:
326  cerr << "EST_TokenStream unset" << endl;
327  return -1;
328  break;
329  case tst_file:
330  p_filepos = position;
331  return EST_fseek(fp,position,SEEK_SET);
332  case tst_pipe:
333  cerr << "EST_TokenStream seek on pipe not supported" << endl;
334  return -1;
335  break;
336  case tst_istream:
337  p_filepos = position;
338  is->seekg(position, is->beg);
339  return 0;
340  break;
341  case tst_string:
342  if (position >= pos)
343  {
344  pos = position;
345  return -1;
346  }
347  else
348  {
349  pos = position;
350  return 0;
351  }
352  break;
353  default:
354  cerr << "EST_TokenStream: unknown type" << endl;
355  return -1;
356  }
357 
358  return -1; // can't get here
359 
360 }
361 
362 static int stdio_fread(void *buff,int size,int nitems,FILE *fp)
363 {
364  // So it can find the stdio one rather than the TokenStream one
365  return fread(buff,size,nitems,fp);
366 }
367 
368 int EST_TokenStream::fread(void *buff, int size, int nitems)
369 {
370  // switching into binary mode for current position
371  int items_read;
372 
373  // so we can continue to read afterwards
374  if (peeked_tokp)
375  {
376  cerr << "ERROR " << pos_description()
377  << " peeked into binary data" << endl;
378  return 0;
379  }
380 
381  peeked_charp = FALSE;
382  peeked_tokp = FALSE;
383 
384  switch (type)
385  {
386  case tst_none:
387  cerr << "EST_TokenStream unset" << endl;
388  return 0;
389  break;
390  case tst_file:
391  items_read = stdio_fread(buff,(ssize_t)size,(ssize_t)nitems,fp);
392  p_filepos += items_read*size;
393  return items_read;
394  case tst_pipe:
395  cerr << "EST_TokenStream fread pipe not yet supported" << endl;
396  return 0;
397  break;
398  case tst_istream:
399  is->read((char*)buff, (ssize_t) size*nitems);
400  return is->gcount()/size;
401  break;
402  case tst_string:
403  if ((buffer_length-pos)/size < nitems)
404  items_read = (buffer_length-pos)/size;
405  else
406  items_read = nitems;
407  memcpy(buff,&buffer[pos],items_read*size);
408  pos += items_read*size;
409  return items_read;
410  default:
411  cerr << "EST_TokenStream: unknown type" << endl;
412  return EOF;
413  }
414 
415  return 0; // can't get here
416 
417 }
418 
420 {
421  // close any files (if they were used)
422 
423  switch (type)
424  {
425  case tst_none:
426  break;
427  case tst_file:
428  if (close_at_end)
429  fclose(fp);
430  case tst_pipe:
431  // close(fd);
432  break;
433  case tst_istream:
434  break;
435  case tst_string:
436  delete [] buffer;
437  buffer = 0;
438  break;
439  default:
440  cerr << "EST_TokenStream: unknown type" << endl;
441  break;
442  }
443 
444  type = tst_none;
445  peeked_charp = FALSE;
446  peeked_tokp = FALSE;
447 
448 }
449 
451 {
452  // For paul, the only person I know who uses this
453 
454  switch (type)
455  {
456  case tst_none:
457  break;
458  case tst_file:
459  fp = freopen(Origin,"rb",fp);
460  p_filepos = 0;
461  break;
462  case tst_pipe:
463  cerr << "EST_TokenStream: can't rewind pipe" << endl;
464  return -1;
465  break;
466  case tst_istream:
467  cerr << "EST_TokenStream: can't rewind istream" << endl;
468  break;
469  case tst_string:
470  pos = 0;
471  break;
472  default:
473  cerr << "EST_TokenStream: unknown type" << endl;
474  break;
475  }
476 
477  linepos = 1;
478  peeked_charp = FALSE;
479  peeked_tokp = FALSE;
480  eof_flag = FALSE;
481 
482  return 0;
483 }
484 
486 {
487  return get(p);
488 }
489 
491 {
492  EST_Token t;
493 
494  get(t);
495  p = t.string();
496  return *this;
497 }
498 
500 {
501  tok = get();
502  return *this;
503 }
504 
506 {
507  // Returns a concatenated token form here to next symbol that matches s
508  // including s (though not adding s on the result)
509  // Not really for the purist but lots of times very handy
510  // Note this is not very efficient
511  EST_String result;
512  EST_Token t;
513 
514  for (result=EST_String::Empty; (t=get()) != s; )
515  {
516  result += t.whitespace() + t.prepunctuation() +
517  t.string() + t.punctuation();
518  if (eof())
519  {
520  cerr << "EST_TokenStream: end of file when looking for \"" <<
521  s << "\"" << endl;
522  break;
523  }
524  }
525 
526  return EST_Token(result);
527 }
528 
530 {
531  // Swallow the lot up to end of line
532  // assumes \n is a whitespace character
533 
535 
536  while (!eoln())
537  {
538  EST_Token &t=get();
539  result += t.whitespace() + t.prepunctuation();
540 
541  if (quotes)
542  result += quote_string(t.string());
543  else
544  result += t.string();
545 
546  result += t.punctuation();
547 
548  if (eof())
549  {
550 // cerr << "EST_TokenStream: end of file when looking for end of line"
551 // << endl;
552  break;
553  }
554  }
555  // So that the next call works I have to step over the eoln condition
556  // That involves removing the whitespace upto and including the next
557  // \n in the peek token.
558 
559  char *w = wstrdup(peek().whitespace());
560  int i;
561  for (i=0; w[i] != 0; i++)
562  if (w[i] == '\n') // maybe not portable
563  peek().set_whitespace(&w[i+1]);
564 
565  wfree(w);
566 
567  static EST_Token result_t;
568 
569  result_t.set_token(result);
570 
571  return result_t;
572 }
573 
575 {
576  EST_Token &tok = get();
577 
578  if (tok != expected)
579  {
580  if (ok != NULL)
581  {
582  *ok=FALSE;
583  return tok;
584  }
585  else
586  EST_error("Expected '%s' got '%s' at %s",
587  (const char *)expected,
588  (const char *)(EST_String)tok,
589  (const char *)pos_description());
590  }
591 
592  if (ok != NULL)
593  *ok=TRUE;
594  return tok;
595 }
596 
597 void EST_TokenStream::build_table()
598 {
599  int i;
600  const char *p;
601  unsigned char c;
602 
603  for (i=0; i<256; ++i)
604  p_table[i]=0;
605 
606  for (p=WhiteSpaceChars; *p; ++p)
607  if (p_table[c=(unsigned char)*p])
608  EST_warning("Character '%c' has two classes, '%c' and '%c'",
609  *p, c, ' ');
610  else
611  p_table[c] = ' ';
612 
613  for (p=SingleCharSymbols; *p; ++p)
614  if (p_table[c=(unsigned char)*p])
615  EST_warning("Character '%c' has two classes, '%c' and '%c'",
616  *p, p_table[c], '!');
617  else
618  p_table[c] = '@';
619 
620  for (p=PunctuationSymbols; *p; ++p)
621  if (p_table[c=(unsigned char)*p] == '@')
622  continue;
623  else if (p_table[c])
624  EST_warning("Character '%c' has two classes, '%c' and '%c'",
625  *p, p_table[c], '.');
626  else
627  p_table[c] = '.';
628 
629  for(p=PrePunctuationSymbols; *p; ++p)
630  if (p_table[c=(unsigned char)*p] == '@')
631  continue;
632  else if (p_table[c] == '.')
633  p_table[c] = '"';
634  else if (p_table[c])
635  EST_warning("Character '%c' has two classes, '%c' and '%c'",
636  *p, p_table[c], '$');
637  else
638  p_table[c] = '$';
639 
640  p_table_wrong=0;
641 }
642 
643 inline int EST_TokenStream::getpeeked_internal(void)
644 {
645  peeked_charp = FALSE;
646  return peeked_char;
647 }
648 
649 inline
650 int EST_TokenStream::getch_internal()
651 {
652  // Return next character in stream
653  if (EST_TokenStream::peeked_charp)
654  {
655  return getpeeked_internal();
656  }
657 
658  switch (type)
659  {
660  case tst_none:
661  cerr << "EST_TokenStream unset" << endl;
662  return EOF;
663  break;
664  case tst_file:
665  p_filepos++;
666  {
667  char lc;
668  if (stdio_fread(&lc,1,1,fp) == 0)
669  return EOF;
670  else
671  return (int)lc;
672  }
673 /* return getc(fp); */
674  case tst_pipe:
675  cerr << "EST_TokenStream pipe not yet supported" << endl;
676  return EOF;
677  break;
678  case tst_istream:
679  p_filepos++;
680  return is->get();
681  case tst_string:
682  if (pos < buffer_length)
683  {
684  p_filepos++;
685  return buffer[pos++];
686  }
687  else
688  return EOF;
689  default:
690  cerr << "EST_TokenStream: unknown type" << endl;
691  return EOF;
692  }
693 
694  return EOF; // can't get here
695 }
696 
697 int EST_TokenStream::getch(void)
698 {
699  return getch_internal();
700 }
701 
702 inline int EST_TokenStream::peekch_internal()
703 {
704  // Return next character in stream (without reading it)
705 
706  if (!peeked_charp)
707  peeked_char = getch_internal();
708  peeked_charp = TRUE;
709  return peeked_char;
710 }
711 
712 
713 int EST_TokenStream::peekch(void)
714 {
715  return peekch_internal();
716 
717 }
718 
719 #define CLASS(C,CL) (p_table[(unsigned char)(C)]==(CL))
720 
721 #define CLASS2(C,CL1,CL2) (p_table[(unsigned char)(C)]==(CL1)||p_table[(unsigned char)(C)]==(CL2))
722 
724 {
725  if (peeked_tokp)
726  {
727  peeked_tokp = FALSE;
728  return current_tok;
729  }
730 
731  if (p_table_wrong)
732  build_table();
733 
734  char *word;
735  int c,i,j;
736  c=getch_internal();
737  for (i=0; (c != EOF && CLASS(c,' ')); i++)
738  {
739  if (c == '\n') linepos++;
740  tok_wspace = check_extend_str(tok_wspace,i,&tok_wspacelen);
741  tok_wspace[i] = c;
742  c=getch_internal();
743  }
744  tok_wspace[i] = '\0';
745 
746  current_tok.init();
747 
748  if (c != EOF)
749  {
750  current_tok.set_filepos(p_filepos-1);
751 
752  if ((quotes) && // quoted strings (with escapes) are allowed
753  (c == quote))
754  {
755  for (i=0;
756  ((c = getch_internal()) != EOF)
757  ;)
758  {
759  if (c == quote)
760  break;
761  tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
762  if (c == escape)
763  c = getch_internal();
764  tok_stuff[i++] = c;
765  }
766  current_tok.set_quoted(TRUE);
767  }
768  else // standard whitespace separated tokens
769  {
770  for (i=0,tok_stuff[i++]=c;
771  (
772  !CLASS(c,'@') &&
773  (c=peekch_internal(),
774  c >= 0 && !CLASS(c,' ')) &&
775  !CLASS(c,'@') &&
776  ( c != EOF )) ;)
777  {
778  tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
779  // note, we must have peeked to get here.
780  tok_stuff[i++] = getpeeked_internal();
781  }
782  }
783  tok_stuff[i] = '\0';
784  // Are there any punctuation symbols at the start?
785  for (j=0;
786  ((j < i) && CLASS2(tok_stuff[j], '$', '"'));
787  j++);
788  if ((j > 0) && (j < i)) // there are
789  {
790  tok_prepuncs = check_extend_str(tok_prepuncs,j+1,&tok_prepuncslen);
791  memmove(tok_prepuncs,tok_stuff,j);
792  tok_prepuncs[j] = '\0';
793  current_tok.set_prepunctuation(tok_prepuncs);
794  word=&tok_stuff[j];
795  i-=j; // reduce size by number of prepuncs
796  }
797  else
798  {
799  current_tok.set_prepunctuation(EST_String::Empty);
800  word = tok_stuff;
801  }
802  // Are there any punctuation symbols at the end
803  for (j=i-1;
804  ((j > 0) && CLASS2(word[j],'.','"'));
805  j--);
806  if (word[j+1] != '\0')
807  {
808  current_tok.set_punctuation(&word[j+1]);
809  word[j+1] = '\0';
810  }
811  else
812  current_tok.set_punctuation(EST_String::Empty);
813 
814  current_tok.set_token(word);
815  if (tok_wspace[0] == '\0') // feature paths will have null whitespace
816  current_tok.set_whitespace(EST_String::Empty);
817  else
818  current_tok.set_whitespace(tok_wspace);
819  }
820  else
821  {
822  current_tok.set_token(EST_String::Empty);
823  current_tok.set_whitespace(tok_wspace);
824  current_tok.set_punctuation(EST_String::Empty);
825  current_tok.set_prepunctuation(EST_String::Empty);
826  eof_flag = TRUE;
827  }
828 
829  return current_tok;
830 }
831 
833 {
834  // This doesn't really work if there are blank lines (and you want
835  // to know about them)
836 
837  if ((peek().whitespace().contains("\n")) || eof())
838  return TRUE;
839  else
840  return FALSE;
841 
842 }
843 
845  const EST_String &quote,
846  const EST_String &escape,
847  int force)
848 {
849  // Quotes s always if force true, or iff s contains whitespace,
850  // quotes or escapes force is false
851  // Note quote and escape are assumed to be string of length 1
852  EST_String quoted_form;
853  if ((force) ||
854  (s.contains(quote)) ||
855  (s.contains(escape)) ||
856  (s.contains(RXanywhitespace)) ||
857  (s.length() == 0))
858  {
859  // bigger than the quoted form could ever be
860  size_t i,j;
861  char *quoted = new char[s.length()*(quote.length()+escape.length())+
862  1+quote.length()+quote.length()];
863  quoted[0] = quote(0);
864  for (i=1,j=0; j < s.length(); j++,i++)
865  {
866  if (s(j) == quote(0))
867  quoted[i++] = escape(0);
868  else if (s(j) == escape(0))
869  quoted[i++] = escape(0);
870  quoted[i] = s(j);
871  }
872  quoted[i++] = quote(0);
873  quoted[i] = '\0';
874  quoted_form = quoted;
875  delete [] quoted;
876  return quoted_form;
877  }
878  else
879  return s;
880 }
881 
883 {
884  return Origin+":"+itoString(linepos);
885 }
#define check_extend_str(STR, POS, MAX)
Definition: EST_Token.cc:86
char * wstrdup(const char *s)
Definition: walloc.c:117
~EST_TokenStream()
will close file if appropriate for type
Definition: EST_Token.cc:180
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
const EST_String pos_description() const
A string describing current position, suitable for error messages.
Definition: EST_Token.cc:109
EST_FilePos EST_ftell(FILE *fp)
Definition: EST_File.h:71
const EST_String & punctuation()
Definition: EST_Token.h:114
int fread(void *buff, int size, int nitems) EST_WARN_UNUSED_RESULT
Reading binary data, (don&#39;t use peek() immediately beforehand)
Definition: EST_Token.cc:368
ostream & operator<<(ostream &s, const EST_Token &p)
Definition: EST_Token.cc:89
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
void close(void)
Close stream.
Definition: EST_Token.cc:419
#define CLASS2(C, CL1, CL2)
Definition: EST_Token.cc:721
const EST_String & whitespace()
Definition: EST_Token.h:112
EST_String itoString(int n)
Make a EST_String object from an integer.
Definition: util_io.cc:141
const EST_String Token_Origin_String
Definition: EST_Token.cc:61
int ssize_t
const EST_String EST_Token_Default_PunctuationSymbols
Definition: EST_Token.cc:58
#define CLASS(C, CL)
Definition: EST_Token.cc:719
int contains(EST_TList< int > &l, int n)
Definition: EST_cluster.cc:82
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:213
int open_string(const EST_String &newbuffer)
open a EST_TokenStream for string rather than a file
Definition: EST_Token.cc:264
float max(float a, float b)
Definition: EST_cluster.cc:143
#define SEEK_END
Definition: system.h:28
void set_token(const EST_String &p)
set token from a string
Definition: EST_Token.h:96
const EST_String EST_Token_Default_PrePunctuationSymbols
Definition: EST_Token.cc:57
int restart(void)
Reset to start of file/string.
Definition: EST_Token.cc:450
const EST_String & prepunctuation()
Definition: EST_Token.h:116
EST_Token & operator=(const EST_Token &a)
Definition: EST_Token.cc:96
#define FALSE
Definition: EST_bool.h:119
NULL
Definition: EST_WFST.cc:55
#define EST_error
Definition: EST_error.h:104
int EST_fseek(FILE *fp, EST_FilePos offset, int whence)
Definition: EST_File.h:75
EST_TokenStream & operator>>(EST_Token &p)
Definition: EST_Token.cc:485
EST_Token & get()
get next token in stream
Definition: EST_Token.cc:723
const EST_String EST_Token_Default_SingleCharSymbols
Definition: EST_Token.cc:56
EST_Token get_upto(const EST_String &s)
get up to s in stream as a single token.
Definition: EST_Token.cc:505
size_t length(void) const
Length of string ({not} length of underlying chunk)
Definition: EST_String.h:231
const EST_String & string() const
Definition: EST_Token.h:120
LISP quote(LISP item)
Definition: siod.cc:252
#define EST_warning
Definition: EST_error.h:106
const EST_String Token_Origin_Stream
Definition: EST_Token.cc:60
const EST_String pos_description()
A string describing current position, suitable for error messages.
Definition: EST_Token.cc:882
EST_Token get_upto_eoln(void)
get up to s in end of line as a single token.
Definition: EST_Token.cc:529
EST_Token & must_get(EST_String expected, bool *ok)
Definition: EST_Token.cc:574
LISP fp
Definition: kkcompile.cc:63
const EST_String Token_Origin_FD
Definition: EST_Token.cc:59
void wfree(void *p)
Definition: walloc.c:131
EST_String quote_string(const EST_String &s, const EST_String &quote, const EST_String &escape, int force)
Definition: EST_Token.cc:844
#define TRUE
Definition: EST_bool.h:118
int seek(int position)
seek, reposition file pointer
Definition: EST_Token.cc:318
const EST_String EST_Token_Default_WhiteSpaceChars
The default whitespace characters.
Definition: EST_Token.cc:55
static const EST_String Empty
Constant empty string.
Definition: EST_String.h:110
int eoln()
end of line
Definition: EST_Token.cc:832
Utility EST_String Functions header file.
#define SEEK_SET
Definition: system.h:20