| 1 | |
| 2 | |
| 3 | |
| 4 | |
| 5 | |
| 6 | |
| 7 | |
| 8 | |
| 9 | |
| 10 | |
| 11 | |
| 12 | |
| 13 | |
| 14 | |
| 15 | |
| 16 | |
| 17 | |
| 18 | |
| 19 | |
| 20 | |
| 21 | |
| 22 | |
| 23 | |
| 24 | |
| 25 | |
| 26 | |
| 27 | |
| 28 | |
| 29 | |
| 30 | |
| 31 | |
| 32 | |
| 33 | |
| 34 | |
| 35 | |
| 36 | |
| 37 | |
| 38 | |
| 39 | |
| 40 | |
| 41 | #include "siod.h" |
| 42 | #include "EST.h" |
| 43 | #include "us_diphone.h" |
| 44 | #include "Phone.h" |
| 45 | |
| 46 | using namespace std; |
| 47 | |
| 48 | extern USDiphIndex *diph_index; |
| 49 | |
| 50 | void dur_to_end(EST_Relation &r) |
| 51 | { |
| 52 | float prev_end = 0; |
| 53 | |
| 54 | for (EST_Item *p = r.head(); p ; p = p->next()) |
| 55 | { |
| 56 | p->set("end", p->F("dur") + prev_end); |
| 57 | prev_end = p->F("end"); |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | void add_end_silences(EST_Relation &segment, EST_Relation &target) |
| 62 | { |
| 63 | EST_Item *t, *n; |
| 64 | float shift = 0.0; |
| 65 | const float pause_duration = 0.1; |
| 66 | |
| 67 | t = segment.head(); |
| 68 | if (!ph_is_silence(t->f("name"))) |
| 69 | { |
| 70 | n = t->insert_before(); |
| 71 | n->set("name", ph_silence()); |
| 72 | n->set("dur", pause_duration); |
| 73 | shift += pause_duration; |
| 74 | } |
| 75 | |
| 76 | t = segment.tail(); |
| 77 | if (!ph_is_silence(t->S("name"))) |
| 78 | { |
| 79 | n = t->insert_after(); |
| 80 | n->set("name", ph_silence()); |
| 81 | n->set("dur", pause_duration); |
| 82 | shift += pause_duration; |
| 83 | } |
| 84 | dur_to_end(segment); |
| 85 | |
| 86 | target.tail()->set("pos", (target.tail()->F("pos") + shift)); |
| 87 | } |
| 88 | |
| 89 | void add_end_silences(EST_Relation &segment) |
| 90 | { |
| 91 | EST_Item *t, *n; |
| 92 | |
| 93 | t = segment.head(); |
| 94 | if (!ph_is_silence(t->S("name"))) |
| 95 | { |
| 96 | n = t->insert_before(); |
| 97 | n->set("name", ph_silence()); |
| 98 | } |
| 99 | |
| 100 | t = segment.tail(); |
| 101 | if (!ph_is_silence(t->S("name"))) |
| 102 | { |
| 103 | n = t->insert_after(); |
| 104 | n->set("name", ph_silence()); |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | void parse_diphone_times(EST_Relation &diphone_stream, |
| 109 | EST_Relation &source_lab) |
| 110 | { |
| 111 | EST_Item *s, *u; |
| 112 | EST_Track *pm; |
| 113 | int e_frame, m_frame = 0; |
| 114 | float dur_1 = 0.0, dur_2 = 0.0, p_time; |
| 115 | float t_time = 0.0, end; |
| 116 | p_time = 0.0; |
| 117 | |
| 118 | for (s = source_lab.head(), u = diphone_stream.head(); u; u = u->next(), |
| 119 | s = s->next()) |
| 120 | { |
| 121 | pm = track(u->f("coefs")); |
| 122 | |
| 123 | e_frame = pm->num_frames() - 1; |
| 124 | m_frame = u->I("middle_frame"); |
| 125 | |
| 126 | if (m_frame < 0) m_frame=0; |
| 127 | dur_1 = pm->t(m_frame); |
| 128 | if (e_frame < m_frame) e_frame=m_frame; |
| 129 | dur_2 = pm->t(e_frame) - dur_1; |
| 130 | |
| 131 | s->set("source_end", (dur_1 + p_time)); |
| 132 | |
| 133 | p_time = s->F("source_end") + dur_2; |
| 134 | |
| 135 | end = dur_1 + dur_2 + t_time; |
| 136 | t_time = end; |
| 137 | u->set("end", t_time); |
| 138 | } |
| 139 | if (s) |
| 140 | s->set("source_end", (dur_2 + p_time)); |
| 141 | } |
| 142 | |
| 143 | void load_separate_diphone(int unit, bool keep_full, |
| 144 | const EST_String &cut_type) |
| 145 | { |
| 146 | |
| 147 | |
| 148 | |
| 149 | |
| 150 | |
| 151 | int samp_start, samp_end; |
| 152 | int pm_start, pm_end, pm_middle; |
| 153 | EST_Track full_coefs, dcoefs, *coefs; |
| 154 | |
| 155 | |
| 156 | if (full_coefs.load(diph_index->coef_dir + "/" |
| 157 | + diph_index->diphone[unit].S("filename") |
| 158 | + diph_index->coef_ext) != format_okread_ok) |
| 159 | { |
| 160 | cerr << "US DB: failed to read coefs file from " << |
| 161 | diph_index->coef_dir + "/" |
| 162 | + diph_index->diphone[unit].S("filename") |
| 163 | + diph_index->coef_ext << endl; |
| 164 | EST_error(EST_error_where = __null), (*EST_error_func)(""); |
| 165 | } |
| 166 | |
| 167 | pm_start = full_coefs.index(diph_index->diphone[unit].f("start")); |
| 168 | pm_middle = full_coefs.index(diph_index->diphone[unit].f("middle")); |
| 169 | pm_end = full_coefs.index(diph_index->diphone[unit].f("end")); |
| 170 | |
| 171 | |
| 172 | if (cut_type == "first_half") |
| 173 | pm_end = pm_middle; |
| 174 | else if (cut_type == "second_half") |
| 175 | pm_start = pm_middle; |
| 176 | |
| 177 | |
| 178 | full_coefs.sub_track(dcoefs, pm_start, pm_end - pm_start + 1, 0, EST_ALL); |
| 179 | |
| 180 | coefs = new EST_Track(dcoefs); |
| 181 | for (int j = 0; j < dcoefs.num_frames(); ++j) |
| 182 | coefs->t(j) = dcoefs.t(j) - full_coefs.t(Gof((pm_start - 1), 0)((((pm_start - 1)) > (0)) ? ((pm_start - 1)) : (0))); |
| 183 | |
| 184 | diph_index->diphone[unit].set("first_dur", |
| 185 | full_coefs.t(pm_middle) - |
| 186 | full_coefs.t(pm_start)); |
| 187 | |
| 188 | diph_index->diphone[unit].set("second_dur", |
| 189 | full_coefs.t(pm_end) - |
| 190 | full_coefs.t(pm_middle)); |
| 191 | |
| 192 | if (keep_full) |
| 193 | { |
| 194 | EST_Track *f = new EST_Track; |
| 195 | *f = full_coefs; |
| 196 | diph_index->diphone[unit].set_val("full_coefs",est_val(f)); |
| 197 | } |
| 198 | |
| 199 | diph_index->diphone[unit].set_val("coefs", est_val(coefs)); |
| 200 | diph_index->diphone[unit].set("middle_frame", pm_middle - pm_start -1); |
| 201 | |
| 202 | EST_Wave full_sig, sub_sig; |
| 203 | |
| 204 | if (diph_index->sig_dir == "none") |
| 205 | return; |
| 206 | |
| 207 | if (full_sig.load(diph_index->sig_dir + "/" |
| 208 | + diph_index->diphone[unit].f("filename") |
| 209 | + diph_index->sig_ext) != format_okread_ok) |
| 210 | { |
| 211 | cerr << "US DB: failed to read signal file from " << |
| 212 | diph_index->sig_dir + "/" |
| 213 | + diph_index->diphone[unit].f("filename") |
| 214 | + diph_index->sig_ext << endl; |
| 215 | EST_error(EST_error_where = __null), (*EST_error_func)(""); |
| 216 | } |
| 217 | |
| 218 | |
| 219 | samp_start = (int)(full_coefs.t(Gof((pm_start - 1), 0)((((pm_start - 1)) > (0)) ? ((pm_start - 1)) : (0))) |
| 220 | * (float)full_sig.sample_rate()); |
| 221 | if (pm_end+1 < full_coefs.num_frames()) |
| 222 | pm_end++; |
| 223 | |
| 224 | samp_end = (int)(full_coefs.t(pm_end) * (float)full_sig.sample_rate()); |
| 225 | full_sig.sub_wave(sub_sig, samp_start, samp_end - samp_start + 1); |
| 226 | EST_Wave *sig = new EST_Wave(sub_sig); |
| 227 | |
| 228 | diph_index->diphone[unit].set_val("sig", est_val(sig)); |
| 229 | |
| 230 | if (keep_full) |
| 231 | { |
| 232 | EST_Wave *s = new EST_Wave; |
| 233 | *s = full_sig; |
| 234 | diph_index->diphone[unit].set_val("full_sig", est_val(s)); |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | void load_full_diphone(int unit) |
| 239 | { |
| 240 | |
| 241 | |
| 242 | |
| 243 | |
| 244 | |
| 245 | int pm_start, pm_end, pm_middle; |
| 246 | EST_Track *full_coefs; |
| 247 | |
| 248 | full_coefs = new EST_Track; |
| 249 | |
| 250 | if (full_coefs->load(diph_index->coef_dir + "/" |
| 251 | + diph_index->diphone[unit].f("filename") |
| 252 | + diph_index->coef_ext) != format_okread_ok) |
| 253 | { |
| 254 | cerr << "US DB: failed to read coefs file from " << |
| 255 | diph_index->coef_dir + "/" |
| 256 | + diph_index->diphone[unit].f("filename") |
| 257 | + diph_index->coef_ext << endl; |
| 258 | EST_error(EST_error_where = __null), (*EST_error_func)(""); |
| 259 | } |
| 260 | |
| 261 | pm_start = full_coefs->index(diph_index->diphone[unit].f("start")); |
| 262 | pm_middle = full_coefs->index(diph_index->diphone[unit].f("middle")); |
| 263 | pm_end = full_coefs->index(diph_index->diphone[unit].f("end")); |
| Value stored to 'pm_end' is never read |
| 264 | |
| 265 | diph_index->diphone[unit].set_val("full_coefs", est_val(full_coefs)); |
| 266 | |
| 267 | EST_Wave *full_sig = new EST_Wave; |
| 268 | |
| 269 | if (full_sig->load(diph_index->sig_dir + "/" |
| 270 | + diph_index->diphone[unit].f("filename") |
| 271 | + diph_index->sig_ext) != format_okread_ok) |
| 272 | { |
| 273 | cerr << "US DB: failed to read signal file from " << |
| 274 | diph_index->sig_dir + "/" |
| 275 | + diph_index->diphone[unit].f("filename") |
| 276 | + diph_index->sig_ext << endl; |
| 277 | EST_error(EST_error_where = __null), (*EST_error_func)(""); |
| 278 | } |
| 279 | diph_index->diphone[unit].set_val("full_sig", est_val(full_sig)); |
| 280 | } |