Edinburgh Speech Tools  2.1-release
EST_cluster.cc
Go to the documentation of this file.
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : July 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Event RFC labelling */
37 /* */
38 /*=======================================================================*/
39 
40 #include <cstdlib>
41 #include "EST_system.h"
42 #include "EST_FMatrix.h"
43 #include "EST_cluster.h"
44 #include <fstream>
45 #include "EST_string_aux.h"
46 #include <cfloat>
47 
48 using namespace std;
49 
50 int fn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d);
51 int nn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d);
52 int nn_cluster2(EST_FMatrix &m, EST_CBK &cbk, float d);
53 float lval(EST_FMatrix &a, float floor, int &row, int &col);
54 float nn_cluster3(EST_FMatrix &m, EST_CBK &cbk, EST_String method);
55 
56 void init_cluster(EST_CBK &cbk, int n)
57 {
58  int i;
59  EST_TList<int> tmp;
60 
61  for (i = 0; i < n; ++i)
62  {
63  tmp.clear();
64  tmp.append(i);
65  cbk.append(tmp);
66  }
67 }
68 
70  EST_TList<EST_String> &names)
71 {
72  float dist;
73  while (cbk.length() > 1)
74  {
75  dist = nn_cluster3(m, cbk, method);
76  ans.append(print_codebook(cbk, dist, names));
77  }
78  return 0;
79 }
80 
81 // return true if list l contains integer n
82 int contains(EST_TList<int> &l, int n)
83 {
84  EST_Litem *p;
85 
86  for (p = l.head(); p != 0; p = p->next())
87  if (l(p) == n)
88  return 1;
89 
90  return 0;
91 }
92 
94 {
95  EST_Litem *pi, *pj;
96  int i, j;
97 
98  for (i = 0, pi = group.head(); pi != 0; pi = pi->next(), ++i)
99  for (j = 0, pj = group.head(); pj != 0; pj = pj->next(), ++j)
100  d(group(pi), group(pj)) = 0.0;
101 }
102 
103 /*EST_FMatrix remove_line(Fmatrix a, int r)
104 {
105  int n;
106  n = a.num_rows() - 1;
107 
108  Fmatrix b(n, n);
109 
110  int i, j;
111 
112  for (i = i2 = 0; i < n; ++i. ++i2)
113  for (j = j2 = 0; j < n; ++j, ++j2)
114  if (i == r)
115  ;
116 
117 */
118 
119 void collapse(EST_FMatrix &d, EST_CBK &cbk, int row, int col)
120 {
121  EST_Litem *pi, *pj;
122 
123  for (pi = cbk.head(); pi != 0; pi = pi->next())
124  if (contains(cbk(pi), row))
125  break;
126 
127  for (pj = cbk.head(); pj != 0; pj = pj->next())
128  if (contains(cbk(pj), col))
129  break;
130 
131  if (pi != 0 && pj != 0) {
132  cbk(pi) += cbk(pj);
133  remove_distances(d, cbk(pi));
134  cbk.remove(pj);
135  }
136 }
137 
138 float min(float a, float b)
139 {
140  return (a < b) ? a: b;
141 }
142 
143 float max(float a, float b)
144 {
145  return (a > b) ? a: b;
146 }
147 
148 // combine codebook groups "row" and "col" into one group and calculate a
149 // new distance matrix
150 void collapse3(EST_FMatrix &d, EST_CBK &cbk, int row, int col, EST_String method)
151 {
152  int i;
153  EST_Litem *pi;
154  EST_TList<int> v;
155  float fm;
156 
157  cout << "Removing row/column " << col << endl;
158  cout << "row " <<cbk.nth(row) << endl;
159  cout << "col " <<cbk.nth(col) << endl;
160  cbk.nth(row) += cbk.nth(col);
161  cout << "row " <<cbk.nth(row) << endl;
162 
163  for (i = 0; i < d.num_rows(); ++i)
164  {
165  if ((i != row) && (i != col))
166  v.append(i);
167  }
168 
169  cout << "row " << row << " col " << col << " left out " << v;
170 
171  for (pi = v.head(); pi != 0; pi = pi->next())
172  {
173  if (method == "nearest")
174  fm = min(d(row,v(pi)),d(col,v(pi)));
175  else if (method == "furthest")
176  fm = max(d(row,v(pi)),d(col,v(pi)));
177  else
178  fm = min(d(row,v(pi)),d(col,v(pi)));
179 
180  cout << "writing values to " << v(pi) << ", " << row << " min "
181  << fm << endl;
182  d(v(pi), row) = fm;
183  d(row, v(pi)) = fm;
184  }
185 
186  d = sub(d, col, col);
187  cbk.remove_nth(col);
188 }
189 
190 int nn_cluster2(EST_FMatrix &m, EST_CBK &cbk, float d)
191 {
192  static float smallest = 0.0;
193  int row=0, col=0;
194  (void)d;
195 
196 // Change so that all values aprt from lowest in codebook get set to
197 // Nan (or whatever)
198 
199  smallest = lval(m, smallest, row, col);
200  cout << "smallest = " << smallest << endl;
201  cout << "row = " << row << " col " << col << endl;
202  collapse(m, cbk, row, col);
203 
204  for (EST_Litem *p = cbk.head(); p != 0; p = p->next())
205  cout << cbk(p);
206  cout << "New matrix\n" << m;
207 
208  // cout << cbk;
209  return 1;
210 }
211 
212 float nn_cluster3(EST_FMatrix &m, EST_CBK &cbk, EST_String method)
213 {
214  static float smallest = 0.0;
215  int row=0, col=0;
216 
217 // Change so that all values aprt from lowest in codebook get set to
218 // Nan (or whatever)
219 
220  cout << "analysing matrix\n" << m;
221  smallest = lval(m, smallest, row, col);
222  cout << "smallest = " << smallest << endl;
223  cout << "row = " << row << " col " << col << endl;
224  collapse3(m, cbk, row, col, method);
225 
226  for (EST_Litem *p = cbk.head(); p != 0; p = p->next())
227  cout << cbk(p);
228  cout << "New matrix\n" << m << endl << endl;
229 
230  // cout << cbk;
231  return smallest;
232 }
233 
234 int nn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
235 {
236  int i;
237  EST_Litem *pi, *pj;
238  float smallest;
239  int c = 0;
240 
241  i = 0;
242  for (pi = cbk.head(); pi != 0; pi = pi->next(), ++i)
243  {
244  for (pj = pi->next(); pj != 0; pj = pj->next())
245  {
246  smallest = lowestval(m, cbk(pj), cbk(pi));
247  if (smallest < d)
248  {
249  cbk(pi) += cbk(pj);
250  cbk(pj).clear();
251  }
252  }
253  }
254 
255  for (pi = cbk.head(); pi != 0; pi = pi->next())
256  {
257  if (cbk(pi).empty())
258  {
259  cout << "Empty entry\n";
260  pi = cbk.remove(pi);
261  c = 1;
262  }
263  else
264  cout << cbk(pi);
265  }
266  return c;
267 }
268 
269 int fn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
270 {
271  int i;
272  EST_Litem *pi, *pj;
273  float smallest;
274  int c = 0;
275 
276  i = 0;
277  for (pi = cbk.head(); pi != 0; pi = pi->next(), ++i)
278  {
279  for (pj = pi->next(); pj != 0; pj = pj->next())
280  {
281  smallest = highestval(m, cbk(pj), cbk(pi));
282  if (smallest < d)
283  {
284  cbk(pi) += cbk(pj);
285  cbk(pj).clear();
286  }
287  }
288  }
289 
290  for (pi = cbk.head(); pi != 0; pi = pi->next())
291  {
292  if (cbk(pi).empty())
293  {
294  cout << "Empty entry\n";
295  pi = cbk.remove(pi);
296  c = 1;
297  }
298  else
299  cout << cbk(pi);
300  }
301  return c;
302 }
303 
304 
305 static int sorttest(const void *a, const void *b)
306 { // for use with qsort C library function.
307  float *c = (float *)a;
308  float *d = (float *)b;
309  float res = (*c - *d);
310  if (res == 0.0)
311  return 0;
312  return (res < 0.0) ? -1 : 1;
313 }
314 
316 {
317  int i, j, k;
318  float *v;
319  int n_vals;
320 
321  // determine size of triangular part of matrix, excluding diagonal
322  int size = m.num_rows() - 1;
323 
324  n_vals = 0;
325  for (i = 0; i < size; ++i)
326  n_vals+=(i + 1);
327 
328  cout<<"number of values in EST_FMatrix:" << n_vals << " size " << size << endl;
329 
330  v = new float[n_vals];
331 
332  for (i = k = 0; i < m.num_rows(); ++i)
333  for (j = i + 1; j < m.num_columns(); ++j, ++k)
334  {
335  cout << i << " " << j << " " << k << " " << (i * size) + k << endl;
336  v[k] = m(j, i);
337  }
338 
339  for (i = 0; i < n_vals; ++i)
340  cout << "v[" << i << "] = " << v[i] << endl;
341 
342  qsort(v, n_vals, sizeof(float), sorttest);
343 
344  EST_FVector vsort(n_vals);
345  for (i = 0; i < n_vals; ++i)
346  vsort[i] = v[i];
347  delete[] v;
348  return vsort;
349 }
350 
352 {
353  EST_Litem *pi;
354  EST_Litem *pj;
355  EST_String s;
356 
357  s = ftoString(d) + " ";
358  for (pi = cbk.head(); pi != 0; pi = pi->next())
359  {
360  s += "(";
361  for (pj = cbk(pi).head(); pj != 0; pj = pj->next())
362  {
363  if (names.empty())
364  s += itoString(cbk.item(pi).item(pj));
365  else
366  s += names.nth(cbk.item(pi).item(pj));
367  if (pj->next() != 0)
368  s += " ";
369  }
370  s += ") ";
371  }
372  return s;
373 }
374 
375 void cluster3(EST_FMatrix &m, float d)
376 {
377  int n = m.num_rows();
378  EST_TList<int> oldcbk[12];
379 
380  int i, j;
381  float smallest;
382 
383  for (i = 0; i < n; ++i)
384  oldcbk[i].append(i);
385 
386  for (i = 0; i < n; ++i)
387  cout << "n: " << i << " " << oldcbk[i] << endl;
388 
389 
390  for (i = 0; i < n; ++i)
391  for (j = i + 1; j < n; ++j)
392  {
393  smallest = lowestval(m, oldcbk[j], oldcbk[i]);
394  cout << "smallest = " << smallest << " d= " << d << endl << endl;
395  if (smallest < d)
396  {
397  cout << "merging " << i << " " << j << endl << endl;
398  merge(oldcbk, i, j);
399  n--;
400  }
401  }
402 
403  for (i = 0; i < n; ++i)
404  cout << "n: " << i << " " << oldcbk[i] << endl;
405 }
406 /*
407  int cluster2(EST_FMatrix &dist, float d)
408  {
409  int n = dist.num_frames();
410  EST_TList<int> oldcbk[12];
411  EST_TList<int> newcbk[12];
412  float sortval = {2.0, 3.0, 4.0, 5.0, 6.0};
413  int i, j, n, m;
414  EST_Litem *p;
415 
416  for (i = 0; i < n; ++i)
417  oldcbk[i].append(i);
418 
419  i = 0;
420  while (n > 2)
421  {
422  s = findval(dist, m, n, sortval[i++]);
423  merge9u
424  }
425 
426  }
427 
428  float findval(EST_FMatrix &dist, int &n, int &m, float val)
429  {
430  int i, j;
431 
432  for (i = 0; i < m.num_frames(); ++i)
433  for (j = 0; j < m.order(); ++j)
434  if ((m.x[i][j] < (val + 0.001)) && (m.x[i][j] > (val - 0.001)))
435  return;
436 
437  cerr << "Couldn't find value " << val << endl;
438  }
439  */
441 {
442  EST_Litem *pa, *pb;
443  float lowest = 100000.0;
444 
445  cout << "list a:" << a << "list b:" << b;
446 
447  for (pa = a.head(); pa != 0; pa = pa->next())
448  for (pb = b.head(); pb != 0; pb = pb->next())
449  {
450  // cout << "m:" << a(pa) << " " << b(pb) << " " << m.x[a(pa)][b(pb)] << endl;
451  if (m(a(pa), b(pb)) < lowest)
452  lowest = m(a(pa), b(pb));
453  }
454  // cout << "lowest " << lowest << endl;
455  return lowest;
456 }
457 
458 // find the lowest value in matrix a above floor, and return it. Also
459 // set row and column to be the indices of it.
460 float lval(EST_FMatrix &a, float floor, int &row, int &col)
461 {
462  int i, j;
463  float lowest = FLT_MAX;
464 
465  for (i = 0; i < a.num_rows(); ++i)
466  for (j = 0; j < a.num_rows(); ++j)
467  if ((a(i, j) < lowest) && (a(i, j) > floor))
468  {
469  lowest = a(i, j);
470  row = i;
471  col = j;
472  }
473  return lowest;
474 }
475 
477 {
478  EST_Litem *pa, *pb;
479  float h = 0.0;
480 
481  cout << "list a:" << a << "list b:" << b;
482 
483  for (pa = a.head(); pa != 0; pa = pa->next())
484  for (pb = b.head(); pb != 0; pb = pb->next())
485  {
486  if (m(a(pa), b(pb)) > h)
487  h = m(a(pa), b(pb));
488  }
489  // cout << "lowest " << lowest << endl;
490  return h;
491 }
492 /*
493  float nearest(EST_FMatrix &m, EST_TList<int> &cbk)
494  {
495  EST_Litem *p;
496  float lowest = 100000.0;
497 
498  for (p = cbk.head(); p != 0; p = p->next())
499  {
500  cout << "cbk(p) " << cbk(p) << endl;
501  if (cbk(p) < lowest)
502  lowest = cbk(p);
503  }
504 
505  cout << "lowest = " << lowest << endl;
506  return lowest;
507  }
508  */
509 void merge(EST_TList<int> cbk[], int i, int j)
510 {
511  EST_Litem *p;
512 
513  for (p = cbk[j].head(); p != 0; p = p->next())
514  cbk[i].append(cbk[j].item(p));
515 
516  cbk[j].clear();
517 }
518 
520 {
521  char inbuf[1000];
522  EST_String tmpstr;
523 
524  ifstream inf(file);
525  if (!inf) cerr << "Can't open names file " << file << endl;
526 
527  while(inf.getline(inbuf, 1000))
528  {
529  tmpstr = inbuf;
530  names.append(tmpstr);
531  }
532  return 0;
533 }
534 
535 /*int merge(EST_TList<int> &a, EST_TList<int> &b)
536  {
537  EST_TList<int> newgroup;
538  EST_Litem *p;
539 
540  for (p = cbk[j].head(); p != 0; p = p->next())
541  cbk[i].append(cbk[j].item(p));
542 
543  cbk[j].clear();
544  }
545  */
546 
int nn_cluster2(EST_FMatrix &m, EST_CBK &cbk, float d)
Definition: EST_cluster.cc:190
EST_Litem * remove_nth(int n)
remove nth item, return pointer to previous item
Definition: EST_TList.h:184
float highestval(EST_FMatrix &m, EST_TList< int > &a, EST_TList< int > &b)
Definition: EST_cluster.cc:476
void qsort(EST_TList< T > &a)
void merge(EST_TList< int > cbk[], int i, int j)
Definition: EST_cluster.cc:509
void init_cluster(EST_CBK &cbk, int n)
Definition: EST_cluster.cc:56
EST_FMatrix row(const EST_FMatrix &a, ssize_t row)
Definition: vec_mat_aux.cc:210
ssize_t num_columns() const
return number of columns
Definition: EST_TMatrix.h:179
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
Definition: EST_FMatrix.h:119
LISP append(LISP l1, LISP l2)
Definition: slib_list.cc:177
EST_String itoString(int n)
Make a EST_String object from an integer.
Definition: util_io.cc:141
ssize_t num_rows() const
return number of rows
Definition: EST_TMatrix.h:177
T & nth(int n)
return the Nth value
Definition: EST_TList.h:145
float lval(EST_FMatrix &a, float floor, int &row, int &col)
Definition: EST_cluster.cc:460
EST_String ftoString(float n, int pres=3, int width=0, int l=0)
Make a EST_String object from an float, with variable precision.
Definition: util_io.cc:149
int contains(EST_TList< int > &l, int n)
Definition: EST_cluster.cc:82
EST_UItem * next()
Definition: EST_UList.h:55
float max(float a, float b)
Definition: EST_cluster.cc:143
#define FLT_MAX
Definition: EST_math.h:134
int load_names(EST_String file, EST_TList< EST_String > &names)
Definition: EST_cluster.cc:519
float lowestval(EST_FMatrix &m, EST_TList< int > &a, EST_TList< int > &b)
Definition: EST_cluster.cc:440
int cluster(EST_FMatrix &m, EST_CBK &cbk, EST_TList< EST_String > &ans, EST_String method, EST_TList< EST_String > &names)
Definition: EST_cluster.cc:69
EST_FMatrix sub(const EST_FMatrix &a, ssize_t row, ssize_t col)
Definition: vec_mat_aux.cc:187
float min(float a, float b)
Definition: EST_cluster.cc:138
int fn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
Definition: EST_cluster.cc:269
float nn_cluster3(EST_FMatrix &m, EST_CBK &cbk, EST_String method)
Definition: EST_cluster.cc:212
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:196
void cluster3(EST_FMatrix &m, float d)
Definition: EST_cluster.cc:375
int length() const
Definition: EST_UList.cc:57
EST_String print_codebook(EST_CBK &cbk, float d, EST_TList< EST_String > &names)
Definition: EST_cluster.cc:351
void collapse3(EST_FMatrix &d, EST_CBK &cbk, int row, int col, EST_String method)
Definition: EST_cluster.cc:150
T & item(const EST_Litem *p)
Definition: EST_TList.h:139
void remove_distances(EST_FMatrix &d, EST_TList< int > &group)
Definition: EST_cluster.cc:93
EST_UItem * head() const
Definition: EST_UList.h:97
int nn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
Definition: EST_cluster.cc:234
EST_FVector sort_matrix(EST_FMatrix &m)
Definition: EST_cluster.cc:315
void clear(void)
remove all items in list
Definition: EST_TList.h:244
EST_Litem * remove(EST_Litem *ptr)
Definition: EST_TList.h:180
int empty() const
Definition: EST_UList.h:89
Utility EST_String Functions header file.
void collapse(EST_FMatrix &d, EST_CBK &cbk, int row, int col)
Definition: EST_cluster.cc:119