63 static EST_Regex RXanywhitespace(
"[ \t\n\r]");
65 static inline char *check_extend_str_in(
char *str,
int pos,
int *
max)
77 newstuff =
new char[*
max];
78 strncpy(newstuff,str,pos);
86 #define check_extend_str(STR, POS, MAX) \ 87 (((POS)>= *(MAX))?check_extend_str_in((STR),(POS),(MAX)):(STR)) 91 s <<
"[TOKEN " << p.pname <<
"]";
100 p_filepos = a.p_filepos;
101 p_quoted = a.p_quoted;
123 tok_wspace =
new char[tok_wspacelen];
125 tok_stuff =
new char[tok_stufflen];
126 tok_prepuncslen = 32;
127 tok_prepuncs =
new char[tok_prepuncslen];
136 cerr <<
"TokenStream: warning passing TokenStream not as reference" 152 void EST_TokenStream::default_values()
156 peeked_charp =
FALSE;
184 delete [] tok_wspace;
186 delete [] tok_prepuncs;
192 s <<
"[TOKENSTREAM ";
196 cerr <<
"UNSET";
break;
198 cerr <<
"FILE";
break;
200 cerr <<
"PIPE";
break;
202 cerr <<
"ISTREAM";
break;
204 cerr <<
"STRING";
break;
206 cerr <<
"UNKNOWN" << endl;
218 fp = fopen(filename,
"rb");
221 cerr <<
"Cannot open file " << filename <<
" as tokenstream" 240 cerr <<
"Cannot absorb NULL filestream as tokenstream" << endl;
246 close_at_end = close_when_finished;
271 buf = (
const char *)newbuffer;
272 buffer_length = newbuffer.
length();
273 buffer =
new char[buffer_length+1];
274 memmove(buffer,buf,buffer_length+1);
285 peeked_charp =
FALSE;
291 cerr <<
"EST_TokenStream unset" << endl;
299 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
303 is->seekg(0,is->end);
304 p_filepos = is->tellg();
311 cerr <<
"EST_TokenStream: unknown type" << endl;
320 peeked_charp =
FALSE;
326 cerr <<
"EST_TokenStream unset" << endl;
330 p_filepos = position;
333 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
337 p_filepos = position;
338 is->seekg(position, is->beg);
354 cerr <<
"EST_TokenStream: unknown type" << endl;
362 static int stdio_fread(
void *buff,
int size,
int nitems,FILE *
fp)
365 return fread(buff,size,nitems,fp);
376 cerr <<
"ERROR " << pos_description()
377 <<
" peeked into binary data" << endl;
381 peeked_charp =
FALSE;
387 cerr <<
"EST_TokenStream unset" << endl;
392 p_filepos += items_read*size;
395 cerr <<
"EST_TokenStream fread pipe not yet supported" << endl;
399 is->read((
char*)buff, (
ssize_t) size*nitems);
400 return is->gcount()/size;
403 if ((buffer_length-pos)/size < nitems)
404 items_read = (buffer_length-pos)/size;
407 memcpy(buff,&buffer[pos],items_read*size);
408 pos += items_read*size;
411 cerr <<
"EST_TokenStream: unknown type" << endl;
440 cerr <<
"EST_TokenStream: unknown type" << endl;
445 peeked_charp =
FALSE;
459 fp = freopen(Origin,
"rb",fp);
463 cerr <<
"EST_TokenStream: can't rewind pipe" << endl;
467 cerr <<
"EST_TokenStream: can't rewind istream" << endl;
473 cerr <<
"EST_TokenStream: unknown type" << endl;
478 peeked_charp =
FALSE;
520 cerr <<
"EST_TokenStream: end of file when looking for \"" <<
559 char *w =
wstrdup(peek().whitespace());
561 for (i=0; w[i] != 0; i++)
563 peek().set_whitespace(&w[i+1]);
586 EST_error(
"Expected '%s' got '%s' at %s",
587 (
const char *)expected,
589 (
const char *)pos_description());
597 void EST_TokenStream::build_table()
603 for (i=0; i<256; ++i)
606 for (p=WhiteSpaceChars; *p; ++p)
607 if (p_table[c=(
unsigned char)*p])
608 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
613 for (p=SingleCharSymbols; *p; ++p)
614 if (p_table[c=(
unsigned char)*p])
615 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
616 *p, p_table[c],
'!');
620 for (p=PunctuationSymbols; *p; ++p)
621 if (p_table[c=(
unsigned char)*p] ==
'@')
624 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
625 *p, p_table[c],
'.');
629 for(p=PrePunctuationSymbols; *p; ++p)
630 if (p_table[c=(
unsigned char)*p] ==
'@')
632 else if (p_table[c] ==
'.')
635 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
636 *p, p_table[c],
'$');
643 inline int EST_TokenStream::getpeeked_internal(
void)
645 peeked_charp =
FALSE;
650 int EST_TokenStream::getch_internal()
653 if (EST_TokenStream::peeked_charp)
655 return getpeeked_internal();
661 cerr <<
"EST_TokenStream unset" << endl;
668 if (stdio_fread(&lc,1,1,fp) == 0)
675 cerr <<
"EST_TokenStream pipe not yet supported" << endl;
682 if (pos < buffer_length)
685 return buffer[pos++];
690 cerr <<
"EST_TokenStream: unknown type" << endl;
697 int EST_TokenStream::getch(
void)
699 return getch_internal();
702 inline int EST_TokenStream::peekch_internal()
707 peeked_char = getch_internal();
713 int EST_TokenStream::peekch(
void)
715 return peekch_internal();
719 #define CLASS(C,CL) (p_table[(unsigned char)(C)]==(CL)) 721 #define CLASS2(C,CL1,CL2) (p_table[(unsigned char)(C)]==(CL1)||p_table[(unsigned char)(C)]==(CL2)) 737 for (i=0; (c != EOF &&
CLASS(c,
' ')); i++)
739 if (c ==
'\n') linepos++;
744 tok_wspace[i] =
'\0';
750 current_tok.set_filepos(p_filepos-1);
756 ((c = getch_internal()) != EOF)
763 c = getch_internal();
766 current_tok.set_quoted(
TRUE);
770 for (i=0,tok_stuff[i++]=c;
773 (c=peekch_internal(),
774 c >= 0 && !
CLASS(c,
' ')) &&
780 tok_stuff[i++] = getpeeked_internal();
786 ((j < i) &&
CLASS2(tok_stuff[j],
'$',
'"'));
788 if ((j > 0) && (j < i))
791 memmove(tok_prepuncs,tok_stuff,j);
792 tok_prepuncs[j] =
'\0';
793 current_tok.set_prepunctuation(tok_prepuncs);
804 ((j > 0) &&
CLASS2(word[j],
'.',
'"'));
806 if (word[j+1] !=
'\0')
808 current_tok.set_punctuation(&word[j+1]);
814 current_tok.set_token(word);
815 if (tok_wspace[0] ==
'\0')
818 current_tok.set_whitespace(tok_wspace);
823 current_tok.set_whitespace(tok_wspace);
837 if ((peek().whitespace().
contains(
"\n")) || eof())
863 quoted[0] =
quote(0);
864 for (i=1,j=0; j < s.
length(); j++,i++)
866 if (s(j) ==
quote(0))
867 quoted[i++] = escape(0);
868 else if (s(j) == escape(0))
869 quoted[i++] = escape(0);
872 quoted[i++] =
quote(0);
874 quoted_form = quoted;
#define check_extend_str(STR, POS, MAX)
char * wstrdup(const char *s)
~EST_TokenStream()
will close file if appropriate for type
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
const EST_String pos_description() const
A string describing current position, suitable for error messages.
EST_FilePos EST_ftell(FILE *fp)
const EST_String & punctuation()
int fread(void *buff, int size, int nitems) EST_WARN_UNUSED_RESULT
Reading binary data, (don't use peek() immediately beforehand)
ostream & operator<<(ostream &s, const EST_Token &p)
A Regular expression class to go with the CSTR EST_String class.
void close(void)
Close stream.
#define CLASS2(C, CL1, CL2)
const EST_String & whitespace()
EST_String itoString(int n)
Make a EST_String object from an integer.
const EST_String Token_Origin_String
const EST_String EST_Token_Default_PunctuationSymbols
int contains(EST_TList< int > &l, int n)
int open(const EST_String &filename)
open a EST_TokenStream for a file.
int open_string(const EST_String &newbuffer)
open a EST_TokenStream for string rather than a file
float max(float a, float b)
void set_token(const EST_String &p)
set token from a string
const EST_String EST_Token_Default_PrePunctuationSymbols
int restart(void)
Reset to start of file/string.
const EST_String & prepunctuation()
EST_Token & operator=(const EST_Token &a)
int EST_fseek(FILE *fp, EST_FilePos offset, int whence)
EST_TokenStream & operator>>(EST_Token &p)
EST_Token & get()
get next token in stream
const EST_String EST_Token_Default_SingleCharSymbols
EST_Token get_upto(const EST_String &s)
get up to s in stream as a single token.
size_t length(void) const
Length of string ({not} length of underlying chunk)
const EST_String & string() const
const EST_String Token_Origin_Stream
const EST_String pos_description()
A string describing current position, suitable for error messages.
EST_Token get_upto_eoln(void)
get up to s in end of line as a single token.
EST_Token & must_get(EST_String expected, bool *ok)
const EST_String Token_Origin_FD
EST_String quote_string(const EST_String &s, const EST_String "e, const EST_String &escape, int force)
int seek(int position)
seek, reposition file pointer
const EST_String EST_Token_Default_WhiteSpaceChars
The default whitespace characters.
static const EST_String Empty
Constant empty string.
Utility EST_String Functions header file.