Edinburgh Speech Tools  2.1-release
est_file.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1994,1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : March 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* File functions for EST type files */
37 /* */
38 /*=======================================================================*/
39 
40 #include "EST_FileType.h"
41 #include "EST_TNamedEnum.h"
42 #include "EST_Token.h"
43 #include "EST_Option.h"
44 #include "EST_Features.h"
45 
46 using namespace std;
47 
49 estfile_names[] =
50 {
51  { est_file_none, { "None" }, 0},
52  { est_file_track, { "Track", "track" }, 0},
53  { est_file_wave, { "wave" }, 0},
54  { est_file_label, { "label" }, 0},
55  { est_file_utterance, { "utterance" }, 0},
56  { est_file_fmatrix, { "fmatrix" }, 0},
57  { est_file_fvector, { "fvector" }, 0},
58  { est_file_dmatrix, { "dmatrix" }, 0},
59  { est_file_dvector, { "dvector" }, 0},
60  { est_file_feature_data, { "feature_data" }, 0},
61  { est_file_fst, { "fst" }, 0},
62  { est_file_ngram, { "ngram" }, 0},
63  { est_file_index, { "index" }, 0},
64  { est_file_f_catalogue, { "f_catalogue" }, 0},
65  { est_file_unknown, { "unknown" }, 0},
66  { est_file_none, { "None" }, 0},
67 };
68 
70 
71 #if defined(INSTANTIATE_TEMPLATES)
72 
73 #include "../base_class/EST_TNamedEnum.cc"
74 template class EST_TNamedEnum<EST_EstFileType>;
78 #endif
79 
80 /** Read and parse the header of an EST_File - interim version
81 returning features rather than EST_Option
82 */
83 
85  bool &ascii, EST_EstFileType &t)
86 {
87  EST_String k, v;
88  char magic_number[9];
89  int pos;
90 
91  // read initial file type identifier, can't use peek or get
92  // as that could read *way* too far if it's binary so just read
93  // the first n bytes to change the magic number
94  pos = ts.tell();
95  if ((ts.fread(magic_number,sizeof(char),8) != 8) ||
96  (strncmp(magic_number,"EST_File",8) != 0))
97  {
98  if (ts.seek(pos) != 0) {
99  cerr << "Data seek error: Could not set TokenStream to the original position" << endl;
100  return wrong_format;
101  }
102  return wrong_format;
103  }
104 
105  v = ts.get().string();
106  t = EstFileEnums.token(v);
107 
108  if (t == est_file_none)
109  {
110  // Its not a standardly defined type but did have EST_File on
111  // it so accept it but set FileType in the header info
112  t = est_file_unknown;
113  hinfo.set("FileType", v);
114  }
115 
116  while ((!ts.eof()) && (ts.peek().string() != "EST_Header_End"))
117  { // note this *must* be done using temporary variables
118  k = ts.get().string();
119  v = ts.get_upto_eoln().string();
120 
121  if (v.contains(RXwhite, 0))
122  v = v.after(RXwhite);
123 
124  hinfo.set(k, v);
125  }
126 
127  if (ts.eof())
128  {
129  cerr << "Unexpected end of EST_File" << endl;
130  return misc_read_error;
131  }
132  ts.get().string(); // read control EST_Header_End
133 
134  // If it explicitly says binary it is, otherwise its ascii
135  if (hinfo.S("DataType") == "binary")
136  ascii = false;
137  else
138  ascii = true;
139 
140  return format_ok;
141 }
142 
144  bool &ascii, EST_EstFileType &t)
145 {
146  EST_String k, v;
147  char magic_number[9];
148  EST_FilePos pos;
149 
150  // read initial file type identifier, can't use peek or get
151  // as that could read *way* too far if it's binary so just read
152  // the first n bytes to change the magic number
153  pos = ts.tell();
154  if ((ts.fread(magic_number,sizeof(char),8) != 8) ||
155  (strncmp(magic_number,"EST_File",8) != 0))
156  {
157  if (ts.seek(pos) != 0) {
158  cerr << "Data seek error: Could not set the TokenStream to the original position" << endl;
159  return wrong_format;
160  }
161  return wrong_format;
162  }
163 
164  v = ts.get().string();
165  t = EstFileEnums.token(v);
166 
167  if (t == est_file_none)
168  {
169  // Its not a standardly defined type but did have EST_File on
170  // it so accept it but set FileType in the header info
171  t = est_file_unknown;
172  hinfo.add_item("FileType",v);
173  }
174 
175  while ((!ts.eof()) && (ts.peek().string() != "EST_Header_End"))
176  { // note this *must* be done using temporary variables
177  k = ts.get().string();
178  v = ts.get_upto_eoln().string();
179 
180  if (v.contains(RXwhite, 0))
181  v = v.after(RXwhite);
182 
183  hinfo.add_item(k, v);
184  }
185 
186  if (ts.eof())
187  {
188  cerr << "Unexpected end of EST_File" << endl;
189  return misc_read_error;
190  }
191  ts.get().string(); // read control EST_Header_End
192 
193  // If it explicitly says binary it is, otherwise its ascii
194  if (hinfo.sval("DataType",0) == "binary")
195  ascii = false;
196  else
197  ascii = true;
198 
199  return format_ok;
200 }
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:499
int contains(const char *s, ssize_t pos=-1) const
Does it contain this substring?
Definition: EST_String.h:365
int fread(void *buff, int size, int nitems) EST_WARN_UNUSED_RESULT
Reading binary data, (don&#39;t use peek() immediately beforehand)
Definition: EST_Token.cc:368
EST_EstFileType
Definition: EST_FileType.h:50
EST_FilePos tell(void) const
tell, synonym for filepos
Definition: EST_Token.h:369
void set(const EST_String &name, int ival)
Definition: EST_Features.h:186
const EST_String S(const EST_String &path) const
Definition: EST_Features.h:158
int eof()
end of file
Definition: EST_Token.h:362
EST_read_status read_est_header(EST_TokenStream &ts, EST_Features &hinfo, bool &ascii, EST_EstFileType &t)
Definition: est_file.cc:84
const EST_String & sval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:93
EST_Regex RXwhite("[ \n\t\r]+")
White space.
#define wrong_format
#define misc_read_error
EST_Token & peek(void)
peek at next token
Definition: EST_Token.h:332
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
Definition: EST_TKVL.cc:248
EST_read_status
const EST_String & string() const
Definition: EST_Token.h:120
#define format_ok
EST_Token get_upto_eoln(void)
get up to s in end of line as a single token.
Definition: EST_Token.cc:529
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
EST_TNamedEnum< EST_EstFileType > EstFileEnums(estfile_names)
int seek(int position)
seek, reposition file pointer
Definition: EST_Token.cc:318
off_t EST_FilePos
Definition: EST_File.h:69