speech-tools/ngram__test__main_8cc_source.html

 /*************************************************************************/
 /*                                                                       */
 /*                Centre for Speech Technology Research                  */
 /*                     University of Edinburgh, UK                       */
 /*                      Copyright (c) 1995,1996                          */
 /*                        All Rights Reserved.                           */
 /*                                                                       */
 /*  Permission is hereby granted, free of charge, to use and distribute  */
 /*  this software and its documentation without restriction, including   */
 /*  without limitation the rights to use, copy, modify, merge, publish,  */
 /*  distribute, sublicense, and/or sell copies of this work, and to      */
 /*  permit persons to whom this work is furnished to do so, subject to   */
 /*  the following conditions:                                            */
 /*   1. The code must retain the above copyright notice, this list of    */
 /*      conditions and the following disclaimer.                         */
 /*   2. Any modifications must be clearly marked as such.                */
 /*   3. Original authors' names are not deleted.                         */
 /*   4. The authors' names are not used to endorse or promote products   */
 /*      derived from this software without specific prior written        */
 /*      permission.                                                      */
 /*                                                                       */
 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
 /*  THIS SOFTWARE.                                                       */
 /*                                                                       */
 /*************************************************************************/
 /*                 Authors:  Simon King                                  */
 /*                 Date   :  July 1995                                   */
 /*-----------------------------------------------------------------------*/
 /*                 EST_Ngrammar test program                             */
 /*                                                                       */
 /*=======================================================================*/
 #include "EST.h"
 #include "EST_Ngrammar.h"

 using namespace std;

 int main(int argc, char **argv)
 {
     //int order;
     EST_StrList files,script;
     EST_Option al, op;
     EST_String wordlist_file, script_file, in_file, format;
     EST_String prev_tag, prev_prev_tag, last_tag;
     EST_Litem *p;
     //EST_Ngrammar::representation_t representation =
     //EST_Ngrammar::dense;

     EST_StrList wordlist;
     EST_Ngrammar ngrammar;
     bool per_file_stats=false;
     bool raw_stats=false;
     bool brief=false;
     EST_String input_format;

     double raw_entropy,count,entropy,perplexity,total_raw_H,total_count;
     total_count = 0;
     total_raw_H = 0;

     parse_command_line
     (argc, argv,
      EST_String("[input file0] [input file1] ...\n")+
      "-g <ifile>   grammar file (required)\n"+
      "-w <ifile>   filename containing word list (required for some grammar formats)\n"+
      "-S <ifile>   script file\n"+
          "-raw_stats   print unnormalised entropy and sample count\n"+
          "-brief       print results in brief format\n"+
          "-f           print stats for each file\n"+
      "\n"+
      "-input_format <string>\n"+
          "             format of input data (default sentence_per_line)\n"+
      "             may also be sentence_per_file, or ngram_per_line.\n"+
          "\n"+
      "Pseudo-words :\n"+
      "-prev_tag <string>\n"+
          "             tag before sentence start\n"+
      "-prev_prev_tag <string>\n"+
          "             all words before 'prev_tag'\n"+
      "-last_tag <string>\n"+
          "             after sentence end\n"+
      "-default_tags\n"+
          "             use default tags of "+SENTENCE_START_MARKER+
             ","+SENTENCE_END_MARKER+" and "+SENTENCE_END_MARKER+"\n"+
          "             respectively\n",
             files, al);


     if (al.present("-w"))
     wordlist_file = al.val("-w");
     else{
     wordlist_file = "";
     }

     if (al.present("-f"))
     per_file_stats = true;
     if (al.present("-input_format"))
     input_format = al.val("-input_format");
     else
     input_format = "sentence_per_line";

     if ( al.present("-raw_stats") || al.present("-r"))
     raw_stats = true;

     if ( al.present("-brief") || al.present("-b") )
     brief = true;


     if (al.present("-default_tags"))
     {
     prev_tag = SENTENCE_START_MARKER;
     prev_prev_tag = SENTENCE_END_MARKER;
     last_tag = SENTENCE_END_MARKER;
     }

     if (al.present("-prev_tag"))
     {
     if (al.present("-default_tags"))
         cerr << "test_ngram: WARNING : -prev_tag overrides -default_tags"
         << endl;
     prev_tag = al.val("-prev_tag");
     }

     if (al.present("-prev_prev_tag"))
     {
     if (al.present("-default_tags"))
         cerr << "test_ngram: WARNING : -prev_prev_tag overrides -default_tags" << endl;
     prev_prev_tag = al.val("-prev_prev_tag");
     }

     if (al.present("-last_tag"))
     {
     if (al.present("-default_tags"))
         cerr << "test_ngram: WARNING : -last_tag overrides -default_tags" << endl;
     last_tag = al.val("-last_tag");
     }

     if (   ( (prev_tag=="") ||  (prev_prev_tag=="") || (last_tag=="") )
     && ( (prev_tag!="") ||  (prev_prev_tag!="") || (last_tag!="") )   )
     {
     cerr << "test_ngram: ERROR : if any tags are given, ALL must be given" << endl;
     exit(1);
     }


     // script
     if (al.present("-S"))
     {
     script_file = al.val("-S");

     if(load_StrList(script_file,script) != format_ok)
     {
         cerr << "test_ngram: Could not read script from file "
         << script_file << endl;
         exit(1);
     }
     }

     if (al.present("-g"))
     in_file = al.val("-g");
     else
     {
     cerr << "test_ngram: Must give a grammar filename using -g" << endl;
     exit(1);
     }

     // plus any files on command line
     // except file "-" unless there is no script
     if(script.head()==NULL)
     script += files;
     else
     for(p=files.head();p!=0;p=p->next())
         if(files(p) != "-")
         script.append(files(p));

     if(script.head() == NULL)
     {
     cerr << "test_ngram: No test files given" << endl;
     exit(1);
     }

     if (wordlist_file != "")
     {
     // load wordlist
     if (load_StrList(wordlist_file,wordlist) != format_ok)
     {
         cerr << "test_ngram: Could not read wordlist from file " << wordlist_file
         << endl;
         exit(1);
     }

     // load grammar using wordlist
     if (ngrammar.load(in_file,wordlist) != format_ok)
     {
         cerr << "test_ngram: Failed to load grammar" << endl;
         exit(1);
     }
     }
     else
     {
     if (ngrammar.load(in_file) != format_ok)
     {
         cerr << "test_ngram: Failed to load grammar" << endl;
         exit(1);
     }
     }

     if (!brief)
     {
     cout << "Ngram Test Results" << endl;
     cout << "==================" << endl;
     }

     for (p = script.head(); p; p = p->next())
     {
     // test each file
     if (test_stats(ngrammar,
                script(p),
                raw_entropy,count,
                entropy,perplexity,
                input_format,
                prev_tag,
                prev_prev_tag))
     {
         total_raw_H += raw_entropy;
         total_count += count;

         if(per_file_stats)
         {
         if (brief)
             cout << basename(script(p)) << " \t";
         else
             cout << script(p) << endl;

         if(raw_stats)
         {
             if (brief)
              cout << raw_entropy << " " << count << " ";
             else
             {
             cout << " raw entropy " << raw_entropy << endl;
             cout << " count       " << count << endl;
             }
         }

         if (brief)
             cout << entropy << " " << perplexity << endl;
         else
         {
             cout << " entropy     " << entropy << endl;
             cout << " perplexity  " << perplexity << endl << endl;
         }
         }
     }
     else
     {
         cerr << "test_ngram: WARNING : file '" << script(p)
         << "' could not be processed" << endl;
     }

     }
     if (total_count > 0)
     {
     if (!brief)
         cout << "Summary for grammar " << in_file << endl;
     else
         if (per_file_stats)
         cout << "summary \t";

     if(raw_stats)
     {
         if (brief)
         cout << total_raw_H << " " << total_count << " ";
         else
         {
         cout << " raw entropy " << total_raw_H << endl;
         cout << " count       " << total_count << endl;
         }
     }
     if (brief)
     {
         cout << total_raw_H / total_count;
         cout << " " << pow(2.0,total_raw_H / total_count);
         cout << endl;
     }
     else
     {
         cout << " entropy     " << total_raw_H / total_count << endl;
         cout << " perplexity  " <<  pow(2.0,total_raw_H / total_count);
         cout << endl;
     }
     }
     else
     {
     cerr << "test_ngram: No data processed" << endl;
     }

     // everything went okay
     return 0;
 }


 void override_lib_ops(EST_Option &a_list, EST_Option &al)
 {
     (void)a_list;
     (void)al;
 }

EST_Ngrammar::load
EST_read_status load(const EST_String &filename)
Definition: EST_Ngrammar.cc:2115

EST_UItem
Definition: EST_UList.h:49

SENTENCE_END_MARKER
#define SENTENCE_END_MARKER
Definition: EST_Ngrammar.h:60

std

EST_UItem::next
EST_UItem * next()
Definition: EST_UList.h:55

EST_Ngrammar
Definition: EST_Ngrammar.h:216

main
int main(int argc, char **argv)
Definition: ngram_test_main.cc:44

override_lib_ops
void override_lib_ops(EST_Option &a_list, EST_Option &al)
Definition: ngram_test_main.cc:308

NULL
NULL
Definition: EST_WFST.cc:55

EST.h

EST_TKVL::val
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145

EST_TList::append
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196

basename
EST_String basename(EST_String full, EST_String ext="")
This acts like the bourne shell basename command. By default, it strips any leading path from a strin...
Definition: util_io.cc:167

EST_Option
Definition: EST_Option.h:50

format_ok
#define format_ok
Definition: EST_rw_status.h:159

EST_TKVL::present
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222

EST_UList::head
EST_UItem * head() const
Definition: EST_UList.h:97

EST_String
EST_String
Definition: EST_features_aux.cc:50

SENTENCE_START_MARKER
#define SENTENCE_START_MARKER
Definition: EST_Ngrammar.h:59

load_StrList
EST_read_status load_StrList(EST_String filename, EST_StrList &l)
Load tokens from a file and return them in a EST_StrList.
Definition: EST_slist_aux.cc:128

parse_command_line
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
Definition: cmd_line.cc:101

EST_Ngrammar.h

test_stats
bool test_stats(EST_Ngrammar &ngram, const EST_String &filename, double &raw_entropy, double &count, double &entropy, double &perplexity, const EST_String &input_format, const EST_String &prev, const EST_String &prev_prev, const EST_String &last)
Definition: ngrammar_utils.cc:89

EST_String
Definition: EST_String.h:76

EST_TList
Definition: EST_TList.h:61