File: | modules/base/pos.cc |
Location: | line 128, column 2 |
Description: | Value stored to 'pd' is never read |
1 | /*************************************************************************/ |
2 | /* */ |
3 | /* Centre for Speech Technology Research */ |
4 | /* University of Edinburgh, UK */ |
5 | /* Copyright (c) 1996,1997 */ |
6 | /* All Rights Reserved. */ |
7 | /* */ |
8 | /* Permission is hereby granted, free of charge, to use and distribute */ |
9 | /* this software and its documentation without restriction, including */ |
10 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
11 | /* distribute, sublicense, and/or sell copies of this work, and to */ |
12 | /* permit persons to whom this work is furnished to do so, subject to */ |
13 | /* the following conditions: */ |
14 | /* 1. The code must retain the above copyright notice, this list of */ |
15 | /* conditions and the following disclaimer. */ |
16 | /* 2. Any modifications must be clearly marked as such. */ |
17 | /* 3. Original authors' names are not deleted. */ |
18 | /* 4. The authors' names are not used to endorse or promote products */ |
19 | /* derived from this software without specific prior written */ |
20 | /* permission. */ |
21 | /* */ |
22 | /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ |
23 | /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ |
24 | /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ |
25 | /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ |
26 | /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ |
27 | /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ |
28 | /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ |
29 | /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ |
30 | /* THIS SOFTWARE. */ |
31 | /* */ |
32 | /*************************************************************************/ |
33 | /* Author : Alan W Black */ |
34 | /* Date : August 1996 */ |
35 | /*-----------------------------------------------------------------------*/ |
36 | /* */ |
37 | /* Various part-of-speech predciting modules */ |
38 | /* */ |
39 | /*=======================================================================*/ |
40 | #include <cstdio> |
41 | #include "festival.h" |
42 | #include "lexicon.h" |
43 | |
44 | using namespace std; |
45 | |
46 | static EST_VTCandidate *pos_candlist(EST_Item *s,EST_Features &f); |
47 | static EST_VTPath *pos_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f); |
48 | static double find_np_prob(EST_VTPath *p,int n,int *state); |
49 | |
50 | static EST_Ngrammar *pos_ngram = 0; |
51 | |
52 | static EST_String zeroString("0"); |
53 | static int p_word = 0; // arbitrary numbers |
54 | static int n_word = 1; |
55 | |
56 | LISP FT_Classic_POS_Utt(LISP utt) |
57 | { |
58 | // Predict part of speech for word stream |
59 | EST_Utterance *u = get_c_utt(utt)(utterance(utt)); |
60 | LISP pos_lex_name, pos_ngram_name; |
61 | LISP lastlex, pos_p_start_tag, pos_pp_start_tag; |
62 | |
63 | *cdebug << "Classic POS module\n"; |
64 | |
65 | pos_lex_name = siod_get_lval("pos_lex_name",NULL__null); |
66 | if (pos_lex_name == NIL((struct obj *) 0)) |
67 | return utt; // not set so ignore it |
68 | pos_ngram_name = siod_get_lval("pos_ngram_name","no pos ngram name"); |
69 | pos_p_start_tag = siod_get_lval("pos_p_start_tag","no prev start tag"); |
70 | pos_pp_start_tag = siod_get_lval("pos_pp_start_tag","no prev prev start tag"); |
71 | |
72 | lastlex = lex_select_lex(pos_lex_name); |
73 | |
74 | if ((pos_ngram = get_ngram(get_c_string(pos_ngram_name))) == 0) |
75 | { |
76 | cerr << "POS: no ngram called \"" << |
77 | get_c_string(pos_ngram_name) << "\" defined" << endl; |
78 | festival_error()(errjmp_ok ? longjmp(*est_errjmp,1) : festival_tidy_up(),exit (-1)); |
79 | } |
80 | |
81 | p_word = pos_ngram->get_vocab_word(get_c_string(pos_p_start_tag)); |
82 | n_word = pos_ngram->get_vocab_word(get_c_string(pos_pp_start_tag)); |
83 | |
84 | EST_Viterbi_Decoder v(pos_candlist,pos_npath,pos_ngram->num_states()); |
85 | |
86 | v.initialise(u->relation("Word")); |
87 | v.search(); |
88 | v.result("pos_index"); |
89 | |
90 | lex_select_lex(lastlex); |
91 | |
92 | EST_Item *w; |
93 | EST_String pos; |
94 | LISP l; |
95 | // Map pos tagset to desired set |
96 | LISP pos_map = siod_get_lval("pos_map",NULL__null); |
97 | for (w=u->relation("Word")->first(); w != 0; w = w->next()) |
98 | { |
99 | // convert pos index into string value |
100 | pos = pos_ngram->get_vocab_word(w->f("pos_index").Int()); |
101 | w->set("pos",pos); |
102 | for (l=pos_map; l != NIL((struct obj *) 0); l=cdr(l)) |
103 | if (siod_member_str(pos,car(car(l))) != NIL((struct obj *) 0)) |
104 | { |
105 | w->set("pos",get_c_string(car(cdr(car(l))))); |
106 | break; |
107 | } |
108 | } |
109 | |
110 | return utt; |
111 | } |
112 | |
113 | static EST_VTCandidate *pos_candlist(EST_Item *s,EST_Features &f) |
114 | { |
115 | // Return list of possible pos based on a priori probabilities |
116 | LISP pd,l; |
117 | EST_Item *token; |
118 | EST_VTCandidate *c; |
119 | EST_VTCandidate *all_c = 0; |
120 | EST_String actual_pos; |
121 | (void)f; |
122 | |
123 | if (((actual_pos = s->S("pos","0")) != "0") || |
124 | (((token = parent(s,"Token")) != 0) && |
125 | ((actual_pos = token->S("pos","0")) != "0"))) |
126 | { |
127 | // There is an explicit pos specified, so respect it |
128 | pd = cons(make_param_float(actual_pos,1.0),NIL((struct obj *) 0)); |
Value stored to 'pd' is never read | |
129 | c = new EST_VTCandidate; |
130 | c->name = pos_ngram->get_vocab_word(actual_pos); |
131 | c->score = 1.0; |
132 | c->s = s; |
133 | c->next = 0; |
134 | return c; |
135 | } |
136 | |
137 | LISP e = lex_lookup_word(s->name(),NIL((struct obj *) 0)); |
138 | pd = car(cdr(e)); |
139 | |
140 | if (pd == NIL((struct obj *) 0)) |
141 | { |
142 | const char *chr = s->name(); |
143 | if (strchr("0123456789",chr[0]) != NULL__null) |
144 | e = lex_lookup_word("_number_",NIL((struct obj *) 0)); // I *know* there is an entry |
145 | else |
146 | e = lex_lookup_word("_OOV_",NIL((struct obj *) 0)); // I *know* there is an entry |
147 | pd = car(cdr(e)); |
148 | } |
149 | |
150 | // Build a candidate for each entry in prob distribution |
151 | for (l=pd; l != NIL((struct obj *) 0); l=cdr(l)) |
152 | { |
153 | c = new EST_VTCandidate; |
154 | c->name = pos_ngram->get_vocab_word(get_c_string(car(car(l)))); |
155 | c->score = get_c_float(car(cdr(car(l)))); |
156 | c->s = s; |
157 | c->next = all_c; |
158 | all_c = c; |
159 | } |
160 | |
161 | return all_c; |
162 | } |
163 | |
164 | static EST_VTPath *pos_npath(EST_VTPath *p,EST_VTCandidate *c,EST_Features &f) |
165 | { |
166 | // Build a potential new path from previous path and this candidate |
167 | EST_VTPath *np = new EST_VTPath; |
168 | // static EST_String lscorename("lscore"); |
169 | double prob; |
170 | double lprob; |
171 | (void)f; |
172 | |
173 | np->c = c; |
174 | np->from = p; |
175 | int n = c->name.Int(); |
176 | prob = find_np_prob(p,n,&np->state); |
177 | if (prob == 0) |
178 | lprob = log(0.00000001); |
179 | else |
180 | lprob = log(prob); |
181 | |
182 | // np->set_feature(lscorename,(c->score+lprob)); |
183 | if (p==0) |
184 | np->score = (c->score+lprob); |
185 | else |
186 | np->score = (c->score+lprob) + p->score; |
187 | |
188 | return np; |
189 | } |
190 | |
191 | static double find_np_prob(EST_VTPath *p,int n,int *state) |
192 | { |
193 | int oldstate=0; |
194 | |
195 | if (p==0) |
196 | { // This could be done once before the search is called |
197 | int order = pos_ngram->order(); |
198 | EST_IVector window(order); |
199 | int i; |
200 | |
201 | window.a_no_check(order-1) = n; |
202 | window.a_no_check(order-2) = p_word; |
203 | for (i = order-3; i>=0; i--) |
204 | window.a_no_check(i) = n_word; |
205 | oldstate = pos_ngram->find_state_id(window); |
206 | } |
207 | else |
208 | oldstate = p->state; |
209 | *state = pos_ngram->find_next_state_id(oldstate,n); |
210 | const EST_DiscreteProbDistribution &pd = pos_ngram->prob_dist(oldstate); |
211 | if (pd.samples() == 0) |
212 | return 0; |
213 | else |
214 | return (double)pd.probability(n); |
215 | } |