Edinburgh Speech Tools  2.1-release
scfg_parse_main.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : October 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Parse a list of sentences with a given stochastic context free */
37 /* grammar */
38 /* */
39 /*=======================================================================*/
40 #include <cstdlib>
41 #include <cstdio>
42 #include <iostream>
43 #include <fstream>
44 #include <cstring>
45 #include "EST.h"
46 #include "EST_SCFG.h"
47 #include "EST_SCFG_Chart.h"
48 #include "siod.h"
49 
50 using namespace std;
51 
52 static EST_String outfile = "-";
53 
54 static int scfg_parse_main(int argc, char **argv);
55 
56 
57 int main(int argc, char **argv)
58 {
59 
60  scfg_parse_main(argc,argv);
61 
62  exit(0);
63  return 0;
64 }
65 
66 static int scfg_parse_main(int argc, char **argv)
67 {
68  // Top level function generates a probabilistic grammar
69  EST_Option al;
70  EST_StrList files;
71  EST_SCFG_Chart chart;
72  LISP rules,s,parse;
73  FILE *corpus,*output;
74  int i;
75 
77  (argc, argv,
78  EST_String("[options]\n")+
79  "Summary: Parse a corpus with a stochastic context free grammar\n"+
80  "-grammar <ifile> Grammar file, one rule per line.\n"+
81  "-corpus <ifile> Corpus file, one bracketed sentence per line.\n"+
82  "-brackets Output bracketing only.\n"+
83  "-o <ofile> Output file for parsed sentences.\n",
84  files, al);
85 
86  if (al.present("-o"))
87  outfile = al.val("-o");
88  else
89  outfile = "-";
90 
91  siod_init();
92 
93  if (al.present("-grammar"))
94  {
95  rules = vload(al.val("-grammar"),1);
96  gc_protect(&rules);
97  }
98  else
99  {
100  cerr << "scfg_parse: no grammar specified" << endl;
101  exit(1);
102  }
103 
104  if (al.present("-corpus"))
105  {
106  if ((corpus = fopen(al.val("-corpus"),"r")) == NULL)
107  {
108  cerr << "scfg_parse: can't open corpus file \"" <<
109  al.val("-corpus") << "\" for reading " << endl;
110  exit(1);
111  }
112  }
113  else
114  {
115  cerr << "scfg_parse: no corpus specified" << endl;
116  exit(1);
117  }
118 
119  if (al.present("-o"))
120  {
121  if ((output=fopen(al.val("-o"),"w")) == NULL)
122  {
123  cerr << "scfg_parse: can't open output file \"" <<
124  al.val("-o") << "\" for writing " << endl;
125  exit(1);
126  }
127  }
128  else
129  output = stdout;
130 
131  gc_protect(&s);
132  gc_protect(&parse);
133  for (i=0; ((s=lreadf(corpus)) != get_eof_val()); i++)
134  {
135  parse = scfg_parse(s,rules);
136  if (al.present("-brackets"))
137  {
138  LISP bparse = scfg_bracketing_only(parse);
139  if (bparse == NIL)
140  bparse = s;
141  pprint_to_fd(output,bparse);
142  }
143  else
144  pprint_to_fd(output,parse);
145  if (i%100 == 99)
146  user_gc(NIL);
147  }
148 
149  if (output != stdout)
150  fclose(output);
151  gc_unprotect(&s);
152  gc_unprotect(&parse);
153  gc_unprotect(&rules);
154 
155  return 0;
156 }
157 
LISP get_eof_val(void)
Definition: slib_file.cc:556
#define NIL
Definition: siod_defs.h:92
void gc_unprotect(LISP *location)
Definition: slib.cc:759
LISP user_gc(LISP args)
Definition: slib.cc:1269
LISP scfg_bracketing_only(LISP parse)
LISP vload(const char *fname, long cflag)
Definition: slib_file.cc:632
LISP lreadf(FILE *f)
Definition: slib.cc:1582
EST_String outfile
NULL
Definition: EST_WFST.cc:55
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
void pprint_to_fd(FILE *fd, LISP exp)
Definition: slib_file.cc:74
A class for parsing with a probabilistic grammars.
int main(int argc, char **argv)
int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
void gc_protect(LISP *location)
Definition: slib.cc:791
int siod_init(int heap_size=DEFAULT_HEAP_SIZE)
Definition: siod.cc:58
EST_String
void scfg_parse(EST_Relation *Word, const EST_String &name, EST_Relation *Syntax, EST_SCFG &grammar)
int parse_command_line(int argc, char *argv[], const EST_String &usage, EST_StrList &files, EST_Option &al, int make_stdio=1)
Definition: cmd_line.cc:101