55 if ((input_format ==
"sentence_per_line") ||
56 (input_format ==
"sentence_per_file"))
60 window[ngram.
order()-1] = t;
62 cerr <<
"EST_Ngrammar test: skipping bad word \"" <<
65 else if (input_format ==
"ngram_per_line")
67 for (i=0; i < ngram.
order(); i++)
72 cerr <<
"EST_Ngrammar test: skipping bad word \"" <<
77 EST_error(
"EST_Ngrammar test: unknown input format \"%s\"\n",
78 (
const char *)input_format);
83 for (i=0; i < ngram.
order(); i++)
113 else if (ts.
open(filename) == -1)
114 EST_error(
"EST_Ngrammar test: unable to open test file \"%s\"\n",
115 (
const char *)filename);
122 (get_next_window(ts,window,input_format,ngram) ==
TRUE))
127 if ((input_format ==
"sentence_per_line") && (ts.
eoln()))
132 raw_entropy = -1 *
H;
133 entropy = -1 * (H/Q);
134 perplexity = pow(2.0,entropy);
EST_TokenStream & get(EST_Token &t)
get next token in stream
int wordlist_index(const EST_String &word, const bool report=true) const
int open(const EST_String &filename)
open a EST_TokenStream for a file.
double probability(const EST_StrVector &words, bool force=false, const bool trace=false) const
void fill_window_start(EST_IVector &window, const EST_String &prev, const EST_String &prev_prev) const
void slide(EST_IVector &v, const int l)
bool test_stats(EST_Ngrammar &ngram, const EST_String &filename, double &raw_entropy, double &count, double &entropy, double &perplexity, const EST_String &input_format, const EST_String &prev, const EST_String &prev_prev, const EST_String &last)