82     int this_num,this_order;
    84     if (ts.
open(filename) == -1)
    88     while ((!ts.
eof()) && !ts.
get().string().contains(
"\\data\\"));
   113         this_order=atoi(s.
before(
"="));
   114         this_num=atoi(s.
after(
"="));
   119         nums[this_order] = this_num;
   121         if(this_order > order)
   140     for(i=1;i<=order;i++)
   157         cerr << 
"Unexpected end of grammar file whilst looking for '"   158         << tmp << 
"'" << endl;
   166     for(j=0;j<nums(i);j++)
   169         for (k=0; ((k<i) && !ts.
eof()); k++)
   170         window[k] = ts.
get().string();
   174         cerr << 
"Unexpected end of file whilst reading " << i
   175             << 
"-grams !" << endl;
   184         cerr << 
"ooooooooops" << endl;
   212     if (ts.
get().string() == 
"\\end\\")
   219     cerr << 
"Missing \\end\\ !" << endl;
   233     if (ts.
open(filename) == -1)
   243     order = atoi(ts.
get().string());
   256     cerr << 
"Something may be wrong with the vocab lists in '"   257         << filename << 
"'" << endl;
   265     for (i=0; i < order; i++)
   266         window[i] = ts.
get().string();
   267     if (ts.
get().string() != 
":")
   269         cerr << 
"EST_Ngrammar:load_ngram_cstr_ascii missing colon at filepos "   273     occur = atof(ts.
get().string());
   277         cerr << 
"EST_Ngrammar:load_ngram_cstr_ascii expect end of line at filepos "   295     double approx_num_samples = 0.0;
   296     long freq_data_start, freq_data_end;
   301     if ((ifd=fopen(filename,
"rb")) == 
NULL)
   303     if (fread(&magic,
sizeof(
int),1,ifd) != 1)
   305         cerr << 
"Could not read integer from " << filename << endl;
   329     order = atoi(ts.
get().string());
   330     if (ts.
get() != 
"\n")
   339     while ((ts.
peek() != 
"\n") && (!ts.
eof()))
   342     while ((ts.
peek() != 
"\n") && (!ts.
eof()))
   361     num_entries = (freq_data_end-freq_data_start)/
sizeof(
double);
   362     double *dd = 
new double[num_entries];
   367     if (fread(dd,
sizeof(
double),num_entries,ifd) != (
unsigned)num_entries)
   369     cerr << 
"EST_Ngrammar::load_ngram_cstr_bin format does not have expected number of entries" << endl;
   380     if (j >= num_entries)
   382         cerr << 
"EST_Ngrammar::load_ngram_cstr_bin unexpected end of frequency data" << endl;
   394         approx_num_samples += dd[j]; 
   399         if (j+1 >= num_entries)
   401         else if (dd[j+1] < -1)
   403         else if (dd[j+1] == -1)
   431     this_ngram[0] = word;
   448     if(floor_prob_total > 1)
   450     cerr << 
"ERROR : floor is impossibly large, scaling it !" << endl;
   452     floor_prob_total = 1;
   469     *ost << word << 
" 0 ";
   496             *ost << 
"*" << lcount << 
" ";
   505             double base_prob = freq / total_freq;
   508             *ost << floor + ( base_prob * (1-floor_prob_total) );
   527     *ost << 0 << 
" ERROR !!!!!!!! ";
   538         *ost << 
"*" << lcount << 
" " << endl;
   544         *ost << 
"*" << lcount << 
" ";
   550         double base_prob = freq / total_freq;
   553         *ost << floor + ( base_prob * (1-floor_prob_total) ) << endl;
   557         *ost << floor << endl;
   574     cerr << 
"Can only save bigrams in htk_ascii format" << endl;
   580     cerr << 
"Negative floor probability does not make sense !" << endl;
   587     ost = 
new ofstream(filename);
   595     cerr << 
"ERROR : floor is impossibly large, scaling it to ";
   596     cerr << floor << endl;
   603     cerr << 
"Can't save in HTK format as no sentence start/end tags"   604         << 
" were given !" << endl;
   643     *((
double*)count) += 1;
   655     for(i=0;i<ngram.
n();i++)
   656         *((ostream*)(ost)) << ngram(i) << 
" ";
   659         (n->
order() > ngram.
n()) )
   664     *((ostream*)(ost)) << endl;
   680     ost = 
new ofstream(filename);
   691     *ost << 
"\\data\\" << endl;
   693     double *count = 
new double;
   697     for(o=1;o<=n.
order();o++)
   707         *ost << 
"ngram " << o << 
"=" << *count << endl;
   710     for(o=1;o<=n.
order();o++)
   713         *ost << 
"\\" << o << 
"-grams:" << endl;
   724     for(i=0;i<n.
order();i++)
   728     *ost << 
"ngram " << n.
order() << 
"=" << *count << endl;
   731     *ost << 
"\\" << n.
order() << 
"-grams:" << endl;
   733     for(i=0;i<n.
order();i++)
   739     *ost << 
"\\end\\" << endl;
   741     if (ost != &cout) 
delete ost;
   748               const bool trace, 
double floor)
   759     ost = 
new ofstream(filename);
   764     *ost << 
"Ngram_2 " << n.
order() << endl;
   778     for(i=0;i<total_ngrams;i++)
   792         for (
int jj=0; jj < this_ngram.
n(); jj++)
   793             *ost << this_ngram(jj) << 
" ";
   794         *ost << name << 
" : " << freq << endl;
   812     if ((ost = fopen(filename,
"wb")) == 
NULL)
   814     cerr << 
"Ngrammar save: unable to open \"" << filename << 
   815         "\" for writing" << endl;
   819     fprintf(ost,
"EST_File fst\n");
   820     fprintf(ost,
"DataType ascii\n");
   821     fprintf(ost,
"in \"(");
   823     fprintf(ost,
" %s\n",(
const char *)n.
vocab->
name(i));
   824     fprintf(ost,
" )\"\n");
   825     fprintf(ost,
"out \"(");
   827     fprintf(ost,
" %s\n",(
const char *)n.
vocab->
name(i));
   828     fprintf(ost,
" )\"\n");
   830     fprintf(ost,
"EST_Header_End\n");
   834     fprintf(ost,
"((%d nonfinal %d)\n",i,i);
   845             const bool trace, 
double floor)
   860     if ((ofd=stdout) == 
NULL)
   865     if ((ofd=fopen(filename,
"wb")) == 
NULL)
   869     fwrite(&magic,
sizeof(
int),1,ofd);
   870     fprintf(ofd,
"mBin_2 %d\n",n.
order());
   872     fprintf(ofd,
"%s ",(
const char *)n.
vocab->
name(i));
   883     cerr << 
"Saving ..." << endl;
   907             fwrite(&count,
sizeof(
double),1,ofd);
   908             fwrite(&freq,
sizeof(
double),1,ofd);
   915         fwrite(&count,
sizeof(
double),1,ofd);
   926     for(i=0;i<total_ngrams;i++)
   930         cerr << 
"\r" << i*100/total_ngrams << 
"%";
   949             fwrite(&count,
sizeof(
double),1,ofd);
   950             fwrite(&freq,
sizeof(
double),1,ofd);
   961     cerr << 
"\r      \r" << endl;
 void set_WhiteSpaceChars(const EST_String &ws)
set which characters are to be treated as whitespace 
 
#define EST_NGRAMBIN_MAGIC
 
EST_TokenStream & get(EST_Token &t)
get next token in stream 
 
EST_FilePos filepos(void) const 
file position in original EST_TokenStream. 
 
EST_String p_sentence_end_marker
 
EST_Litem * item_next(EST_Litem *idx) const 
Used for iterating through members of the distribution. 
 
int contains(const char *s, ssize_t pos=-1) const 
Does it contain this substring? 
 
void count_ngram_arpa_sub(EST_Ngrammar *n, EST_StrVector &ngram, void *count)
 
EST_read_status load_ngram_cstr_ascii(const EST_String filename, EST_Ngrammar &n)
 
EST_FilePos EST_ftell(FILE *fp)
 
void accumulate(const EST_StrVector &words, const double count=1)
 
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols 
 
void close(void)
Close stream. 
 
const EST_String & name(const int n) const 
The name given the index. 
 
double get_backoff_weight(const EST_StrVector &words) const 
 
int length(void) const 
The number of members in the discrete. 
 
EST_String itoString(int n)
Make a EST_String object from an integer. 
 
void swap_bytes_double(double *data, int length)
 
EST_read_status load_ngram_arpa(const EST_String filename, EST_Ngrammar &n, const EST_StrList &vocab)
 
EST_write_status save_ngram_htk_ascii_sub(const EST_String &word, ostream *ost, EST_Ngrammar &n, double floor)
 
EST_Litem * item_start() const 
Used for iterating through members of the distribution. 
 
EST_Discrete * pred_vocab
 
int item_end(EST_Litem *idx) const 
Used for iterating through members of the distribution. 
 
EST_NgrammarState * p_states
 
int num_states(void) const 
 
double safe_log10(const double x)
 
EST_read_status load_ngram_htk_ascii(const EST_String filename, EST_Ngrammar &n)
 
EST_DiscreteProbDistribution vocab_pdf
 
int open(const EST_String &filename)
open a EST_TokenStream for a file. 
 
EST_DiscreteProbDistribution & pdf()
 
double probability(const EST_StrVector &words, bool force=false, const bool trace=false) const 
 
const EST_DiscreteProbDistribution & prob_dist(const EST_StrVector &words) const 
 
void cumulate(const EST_String &s, double count=1)
Add this observation, may specify number of occurrences. 
 
double frequency(const EST_String &s) const 
 
void item_freq(EST_Litem *idx, EST_String &s, double &freq) const 
During iteration returns name and frequency given index. 
 
EST_write_status save_ngram_cstr_bin(const EST_String filename, EST_Ngrammar &n, const bool trace, double floor)
 
EST_write_status save_ngram_htk_ascii(const EST_String filename, EST_Ngrammar &n, double floor)
 
The file was written successfully. 
 
representation_t representation() const 
 
EST_read_status load_ngram_cstr_bin(const EST_String filename, EST_Ngrammar &n)
 
void print_freqs(ostream &os, double floor=0.0)
 
EST_write_status save_ngram_arpa(const EST_String filename, EST_Ngrammar &n)
 
int EST_fseek(FILE *fp, EST_FilePos offset, int whence)
 
EST_Token & peek(void)
peek at next token 
 
EST_read_status load_ngram_htk_binary(const EST_String filename, EST_Ngrammar &n)
 
int get_vocab_length() const 
 
EST_write_status save_ngram_cstr_ascii(const EST_String filename, EST_Ngrammar &n, const bool trace, double floor)
 
The file was not written successfully. 
 
void set_frequency(const EST_String &s, double c)
 
void append(const T &item)
add item onto end of list 
 
const EST_StrVector & make_ngram_from_index(const int i) const 
 
EST_FilePos filepos(void) const 
current file position in EST_TokenStream 
 
const EST_String & string() const 
 
EST_Token get_upto_eoln(void)
get up to s in end of line as a single token. 
 
void save_ngram_arpa_sub(EST_Ngrammar *n, EST_StrVector &ngram, void *ost)
 
EST_String after(int pos, int len=1) const 
Part after pos+len. 
 
EST_String before(int pos, int len=0) const 
Part before position. 
 
void iterate(EST_StrVector &words, void(*function)(EST_Ngrammar *n, EST_StrVector &words, void *params), void *params)
 
bool ngram_exists(const EST_StrVector &words) const 
 
INLINE ssize_t n() const 
number of items in vector. 
 
bool init(int o, representation_t r, const EST_StrList &wordlist)
 
EST_String p_sentence_start_marker
 
EST_write_status save_ngram_wfst(const EST_String filename, EST_Ngrammar &n)