Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_relation_aux.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Taylor and Simon King */
34/* Date : June 1995 */
35/*-----------------------------------------------------------------------*/
36/* Relation class auxiliary routines */
37/* */
38/*=======================================================================*/
39#include <cstdlib>
40#include <iostream>
41#include <fstream>
42#include <cmath>
43#include "EST_types.h"
44#include "ling_class/EST_Relation.h"
45#include "ling_class/EST_relation_aux.h"
46#include "EST_string_aux.h"
47#include "EST_io_aux.h"
48#include "EST_Option.h"
49#include "EST_Token.h"
50
51static int is_in_class(const EST_String &name, EST_StrList &s);
52
53bool dp_match(const EST_Relation &lexical,
54 const EST_Relation &surface,
56 float ins, float del, float sub);
57
58
59float start(EST_Item *n)
60{
61 return (iprev(n) == 0) ? 0.0 : iprev(n)->F("end");
62}
63
64float duration(EST_Item *n)
65{
66 return n->F("end") - start(n);
67}
68
69void quantize(EST_Relation &a, float q)
70{
72 float end;
73
74 for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
75 {
76 end = a_ptr->F("end") / q;
77 end = rint(end);
78 end = end * q;
79 a_ptr->set("end", end);
80 }
81}
82
83// edit labels using a sed file to do the editing
84
85int edit_labels(EST_Relation &a, EST_String sedfile)
86{
88 char command[100], name[100], newname[100], sf[100];
89 FILE *fp;
92 file1 = make_tmp_filename();
93 file2 = make_tmp_filename();
94
95 fp = fopen(file1, "wb");
96 if (fp == NULL)
97 {
98 fprintf(stderr,"edit_labels: cannot open \"%s\" for writing\n",
99 (const char *)file1);
100 return -1;
101 }
102 for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
103 {
104 strcpy(name, a_ptr->name());
105 fprintf(fp, "%s\n", name);
106 }
107 fclose(fp);
108 strcpy(command, "cat ");
110 strcat(command, " | sed -f ");
112 strcat(command, " > ");
114
115 printf("command: %s\n", command);
117
118 fp = fopen(file2, "rb");
119 if (fp == NULL)
120 {
121 fprintf(stderr,"edit_labels: cannot open \"%s\" for reading\n",
122 (const char *)file2);
123 return -1;
124 }
125 for (a_ptr = a.head(); a_ptr != 0; a_ptr = inext(a_ptr))
126 {
127 fscanf(fp, "%s", newname);
128// cout << "oldname: " << a_ptr->name() << " newname: " << newname << endl;
129 a_ptr->set_name(newname);
130 }
131 fclose(fp);
132 return 0;
133}
134
135// make new EST_Relation from start and end points.
136void extract(const EST_Relation &orig, float s,
137 float e, EST_Relation &ex)
138{
139 EST_Item *a;
140 EST_Item *tmp;
141
142 for (a = orig.head(); a != 0; a = inext(a))
143 if ((a->F("end") > s) && (start(a) < e))
144 {
145 tmp = ex.append(a);
146 if ((a->F("end") > e))
147 tmp->set("end", e);
148 }
149}
150
151void merge_all_label(EST_Relation &seg, const EST_String &labtype)
152{
154 (void)labtype; // unused parameter
155
156 for (a_ptr = seg.head(); a_ptr != seg.tail(); a_ptr = n_ptr)
157 {
158 n_ptr = inext(a_ptr);
159 if (a_ptr->name() == inext(a_ptr)->name())
160 seg.remove_item(a_ptr);
161 }
162}
163
164void change_label(EST_Relation &seg, const EST_String &oname,
165 const EST_String &nname)
166{
168
169 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
170 if (a_ptr->name() == oname)
171 a_ptr->set_name(nname);
172}
173
174void change_label(EST_Relation &seg, const EST_StrList &oname,
175 const EST_String &nname)
176{
178 EST_Litem *p;
179
180 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
181 for (p = oname.head(); p ; p = p->next())
182 if (a_ptr->name() == oname(p))
183 a_ptr->set_name(nname);
184}
185
186static int is_in_class(const EST_String &name, EST_StrList &s)
187{
188 EST_Litem *p;
189
190 for (p = s.head(); p; p = p->next())
191 if (name == s(p))
192 return TRUE;
193
194 return FALSE;
195}
196
197int check_vocab(EST_Relation &a, EST_StrList &vocab)
198{
199 EST_Item *s;
200 for (s = a.head(); s; s = inext(s))
201 if (!is_in_class(s->name(), vocab))
202 {
203 cerr<<"Illegal entry in file " <<a.name()<< ":\"" << *s << "\"\n";
204 return -1;
205 }
206 return 0;
207}
208
209void convert_to_broad_class(EST_Relation &seg, const EST_String &class_type,
211{
212 // class_type contains a list of whitepsace separated segment names.
213 // This function looks at each segment and adds a feature "pos"
214 // if its name is contained in the list.
219
220 ts.open_string(bc_list);
221 while (!ts.eof())
222 pos_list.append(ts.get().string());
223
224 convert_to_broad(seg, pos_list);
225}
226
227void convert_to_broad(EST_Relation &seg, EST_StrList &pos_list,
229{
231 if (broad_name == "")
232 broad_name = "pos";
233
234 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
235 if (is_in_class(a_ptr->name(), pos_list))
236 a_ptr->set(broad_name, (polarity) ? 1 : 0);
237 else
238 a_ptr->set(broad_name, (polarity) ? 0 : 1);
239}
240
241void label_map(EST_Relation &seg, EST_Option &map)
242{
243 EST_Item *p;
244
245 for (p = seg.head(); p != 0; p = inext(p))
246 {
247 if (map.present(p->name()))
248 {
249 if (map.val(p->name()) == "!DELETE")
250 seg.remove_item(p);
251 else
252 p->set_name(map.val(p->name()));
253 }
254
255 }
256}
257
258void shift_label(EST_Relation &seg, float shift)
259{
260 //shift every end time by adding x seconds.
262
263 for (a_ptr = seg.head(); a_ptr != 0; a_ptr = inext(a_ptr))
264 a_ptr->set("end", a_ptr->F("end") + shift);
265}
266
267void RelationList_select(EST_RelationList &mlf, EST_StrList filenames, bool
269{
270 // select only files in 'filenames'
271 // remove all others from mlf
272 EST_Litem *fptr, *ptr;
273 bool flag;
274
275 // if not exact match, only match basenames
277 for (ptr = filenames.head(); ptr != NULL; ptr = ptr->next())
278 if(exact_match)
279 tmp_filenames.append( filenames(ptr) );
280 else
281 tmp_filenames.append( basename(filenames(ptr)) );
282
283 for(fptr=mlf.head(); fptr != NULL;)
284 {
285 flag=false;
286 for (ptr = tmp_filenames.head(); ptr != NULL; ptr = ptr->next())
287 if(exact_match)
288 {
289 if(tmp_filenames(ptr) == mlf(fptr).name())
290 {
291 flag=true;
292 break;
293 }
294 }
295 else if(mlf(fptr).name().contains(tmp_filenames(ptr)))
296 {
297 flag=true;
298 break;
299 }
300
301 if(!flag)
302 {
303 fptr = mlf.remove(fptr);
304
305 if(fptr==0) // must have removed head of list
306 fptr=mlf.head();
307 else
308 fptr=fptr->next();
309 }
310 else
311 fptr=fptr->next();
312 }
313 tmp_filenames.clear();
314}
315
316// look for a single file called "filename" and make a EST_Relation out of
317// this
318EST_Relation RelationList_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
319{
320
321 EST_Litem *p;
322 EST_String test, ref;
323
324 if (base)
325 for (p = mlf.head(); p; p = p->next())
326 {
327 if (basename(mlf(p).name(), "*")==basename(filename, "*"))
328 return mlf(p);
329 }
330 else
331 for (p = mlf.head(); p; p = p->next())
332 {
333 if (basename(mlf(p).name()) == filename)
334 return mlf(p);
335 }
336
337 cerr << "No match for file " << filename << " found in mlf\n";
338 EST_Relation d;
339 return d;
340}
341
342// combine all relation in MLF into a single relation.
343EST_Relation RelationList_combine(EST_RelationList &mlf)
344{
345 EST_Litem *p;
347 EST_Item *s, *t = 0;
348 float last = 0.0;
349
350 for (p = mlf.head(); p; p = p->next())
351 {
352 for (s = mlf(p).head(); s; s = inext(s))
353 {
354 t = all.append();
355 t->set("name", s->S("name"));
356 t->set("end", s->F("end") + last);
357 cout << "appended t " << t << endl;
358 }
359 last = (t != 0) ? t->F("end") : 0.0;
360 }
361 return all;
362}
363
364EST_Relation RelationList_combine(EST_RelationList &mlf, EST_Relation &key)
365{
366 EST_Litem *p;
368 EST_Item *s, *t = 0, *k;
369 float st;
370
371 if (key.length() != mlf.length())
372 {
373 cerr << "RelationList has " << mlf.length() << " elements: expected "
374 << key.length() << " from key file\n";
375 return all;
376 }
377
378 for (k = key.head(), p = mlf.head(); p; p = p->next(), k = inext(k))
379 {
380 st = start(k);
381 for (s = mlf(p).head(); s; s = inext(s))
382 {
383 t = all.append();
384 t->set("name", s->S("name"));
385 t->set("end", (s->F("end") + st));
386 }
387 }
388 return all;
389}
390
391int relation_divide(EST_RelationList &slist, EST_Relation &lab,
394{ // divides a single relation into multiple chunks according to the
395 // keylab relation. If the keylab boundary falls in the middle of a label,
396 // the label is assigned to the chunk which has the most overlap with
397 // it. Some labels may be specified in the "blank" list which means thy
398 // are duplicated across boundaries.
399
401 EST_Item *s, *k, *t = 0, *n;
402 EST_String filename;
403 float kstart;
404
405 slist.clear();
406
407 if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
408 {
409 cerr << "Key file must extend beyond end of label file\n";
410 return -1;
411 }
412
413 // find a the first keylab that will make a non-empty file
414 for (k = keylab.head(); k ; k = inext(k))
415 if (k->F("end") > lab.head()->F("end"))
416 break;
417
418 filename = (EST_String)k->f("file");
419 a.f.set("name", (filename + ext));
420 kstart = 0.0;
421
422 for (s = lab.head(); s; s = inext(s))
423 {
424 n = inext(s);
425 if (n == 0)
426 {
427 t = a.append(s);
428 t->set("end", (s->F("end") - kstart));
429 break;
430 }
431 if (n->F("end") > k->F("end"))
432 {
433 if (((n->F("end") - k->F("end")) <
434 (k->F("end") - start(n))) ||
435 is_in_class(n->name(), blank))
436 {
437 a.append(s);
438 t->set("end", (s->F("end") - kstart));
439
440 t = a.append(n);
441 t->set("end", (k->F("end") - kstart));
442
443 if (!is_in_class(n->name(), blank))
444 s = inext(s);
445 }
446 else
447 {
448 t = a.append(s);
449 t->set("end", (k->F("end") - kstart));
450 }
451
452 slist.append(a);
453 k = inext(k);
454 kstart = start(k);
455 a.clear();
456 filename = (EST_String)k->f("file");
457 a.f.set("name", (filename + ext));
458 }
459 else
460 {
461 t = a.append(s);
462 t->set("end", (s->F("end") - kstart));
463 }
464 }
465 slist.append(a);
466
467 return 0;
468}
469
470int relation_divide2(EST_RelationList &mlf, EST_Relation &lab,
472{
474 EST_Item *s, *k, *t;
475 float kstart;
476
477 mlf.clear();
478
479 if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
480 {
481 cerr << "Key file must extend beyond end of label file\n";
482 return -1;
483 }
484
485 k = keylab.head();
486 a.f.set("name", (k->name() + ext));
487 kstart = 0.0;
488
489 for (s = lab.head(); s; s = inext(s))
490 {
491 t = a.append();
492 t->set_name(s->name());
493 t->set("end", (s->F("end") - kstart));
494
495 if (s->F("end") > k->F("end"))
496 {
497 cout << "appending " << a;
498 mlf.append(a);
499
500 kstart = s->F("end");
501 k->set("end", (s->F("end")));
502 k = inext(k);
503 a.clear();
504 a.f.set("name", (k->name() + ext));
505 }
506 }
507 cout << "appending " << a;
508 mlf.append(a);
509
510 return 0;
511}
512
513
514
515
516void map_match_times(EST_Relation &target, const EST_String &match_name,
517 const EST_String &time_name, bool do_start)
518{
519 EST_Item *s, *t, *p;
521 int i;
522
523 // first pass, copy times as appropriate, and find first
524 // and last defined ends
525 // This is hacky and certainly won't work for many cases
526
527 first_end = -1.0;
528 prev_end = 0.0;
529 last_end = 0.0;
530
531// cout << "surface: " << surface << endl;
532
533 for (s = target.head(); s; s = inext(s))
534 {
535 if ((t = daughter1(s->as_relation(match_name))) != 0)
536 {
537 s->set(time_name + "end", t->F("end"));
538 if (do_start)
539 s->set(time_name + "start", t->F("start"));
540
541 last_end = t->F("end");
542 if (first_end < 0.0)
543 first_end = t->F("end");
544 }
545 }
546
547 if (!target.head()->f_present(time_name + "end"))
548 {
549 target.head()->set(time_name + "end", first_end / 2.0);
550 if (do_start)
551 target.head()->set(time_name + "start", 0.0);
552 }
553
554 if (!target.tail()->f_present(time_name + "end"))
555 {
556 target.tail()->set(time_name + "end", last_end + 0.01);
557 if (do_start)
558 target.tail()->set(time_name + "start", last_end);
559 }
560
561 for (s = target.head(); s; s = inext(s))
562 {
563 if (!s->f_present(time_name + "end"))
564 {
565// cout << "missing end feature for " << *s << endl;
566 for (i = 1, p = s; p; p = inext(p), ++i)
567 if (p->f_present(time_name + "end"))
568 break;
569 inc = (p->F(time_name + "end") - prev_end) / ((float) i);
570// cout << "inc is : " << inc << endl;
571
572// cout << "stop phone is " << *p << endl;
573
574 for (i = 1; s !=p ; s = inext(s), ++i)
575 {
576 s->set(time_name + "end", (prev_end + ((float) i * inc)));
577 if (do_start)
578 s->set(time_name + "start", (prev_end+((float) (i - 1 )* inc)));
579 }
580 }
581 prev_end = s->F("end");
582 }
583}
584
585void dp_time_align(EST_Utterance &utt, const EST_String &source_name,
586 const EST_String &target_name,
587 const EST_String &time_name,
588 bool do_start)
589{
590 utt.create_relation("Match");
591
592 dp_match(*utt.relation(target_name), *utt.relation(source_name),
593 *utt.relation("Match"), 7.0, 7.0, 7.0);
594
595 map_match_times(*utt.relation(target_name), "Match", time_name, do_start);
596}
597
598
599EST_Litem *RelationList_ptr_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
600{
601 EST_Litem *p;
602 EST_String test, ref;
603
604 if (base)
605 for (p = mlf.head(); p; p = p->next())
606 {
607 if (basename(mlf(p).name(), "*")==basename(filename, "*"))
608 return p;
609 }
610 else
611 for (p = mlf.head(); p; p = p->next())
612 if (mlf(p).name() == filename)
613 return p;
614
615 cerr << "No match for file " << filename << " found in mlf\n";
616 return 0;
617}
618
619void relation_convert(EST_Relation &lab, EST_Option &al, EST_Option &op)
620{
621 if (al.present("-shift"))
622 shift_label(lab, al.fval("-shift"));
623
624 // fix option later.
625 if (al.present("-extend"))
626 al.override_fval("-length",
627 al.fval("-extend",0) * lab.tail()->F("end"));
628
629 // quantize (ie round up or down) label times
630 if (al.present("-q"))
631 quantize(lab, al.fval("-q"));
632
633 if (al.present("-start"))
634 {
635 if (!al.present("-end"))
636 cerr << "-start option must be used with -end option\n";
637 else
638 extract(lab, al.fval("-start"), al.fval("-end"), lab);
639 }
640
641 if (al.present("-class"))
642 convert_to_broad_class(lab, al.val("-class"), op);
643
644 else if (al.present("-pos"))
645 {
647 StringtoStrList(al.val("-lablist"), bclass);
648 convert_to_broad(lab, bclass);
649 }
650 else if (al.present("-sed"))
651 edit_labels(lab, al.val("-sed"));
652 else if (al.present("-map"))
653 {
654 EST_Option map;
655 if (map.load(al.val("-map")) != format_ok)
656 return;
657 label_map(lab, map);
658 }
659}
660
661
662
663void print_relation_features(EST_Relation &stream)
664{
665 EST_Item *s;
667
668 for (s = stream.head(); s; s = inext(s))
669 {
670 cout << s->name() << "\t:";
671 for(p.begin(s->features()); p; ++p)
672 cout << p->k << " "
673 << p->v << "; ";
674 cout << endl;
675 }
676
677}
678
679
680void build_RelationList_hash_table(EST_RelationList &mlf,
682 const bool base)
683{
684 EST_Litem *p;
685 if (base)
686 for (p = mlf.head(); p; p = p->next())
687 hash_table.add_item(basename(mlf(p).name(), "*"),
688 &(mlf(p)));
689 else
690 for (p = mlf.head(); p; p = p->next())
691 hash_table.add_item(mlf(p).name(),
692 &(mlf(p)));
693}
694
695
696bool hashed_RelationList_extract(EST_Relation* &rel,
698 const EST_String &filename, bool base)
699{
700 EST_Relation *d;
701 EST_String fname = filename;
702 int found;
703
704 if (base)
705 fname=basename(filename, "*");
706
707 d=hash_table.val(fname,found);
708
709 if(found)
710 {
711 rel = d;
712 return true;
713 }
714 cerr << "No match for file " << fname << " found in mlf\n";
715 return false;
716}
717
718
void set(const EST_String &name, int ival)
K k
The key.
Definition EST_THash.h:78
V v
The value.
Definition EST_THash.h:80
void set(const EST_String &name, int ival)
Definition EST_Item.h:179
const EST_String S(const EST_String &name) const
Definition EST_Item.h:143
const float F(const EST_String &name) const
Definition EST_Item.h:134
EST_Item * as_relation(const char *relname) const
View item from another relation (const char *) method.
Definition EST_Item.h:302
int f_present(const EST_String &name) const
Definition EST_Item.h:230
EST_read_status load(const EST_String &filename, const EST_String &comment=";")
EST_Features f
EST_Item * head() const
int length() const
const EST_String & name() const
void begin(const Container &over)
Set the iterator ready to run over this container.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition EST_TKVL.cc:145
const int present(const K &rkey) const
Returns true if key is present.
Definition EST_TKVL.cc:222
EST_Relation * relation(const char *name, int err_on_not_found=1) const
get relation by name
EST_Relation * create_relation(const EST_String &relname)
create a new relation called <parameter>n</parameter>.