PocketSphinx 5prealpha
ptm_mgau.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38/* System headers */
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <assert.h>
43#include <limits.h>
44#include <math.h>
45#if defined(__ADSPBLACKFIN__)
46#elif !defined(_WIN32_WCE)
47#include <sys/types.h>
48#endif
49
50/* SphinxBase headers */
51#include <sphinx_config.h>
52#include <sphinxbase/cmd_ln.h>
53#include <sphinxbase/fixpoint.h>
54#include <sphinxbase/ckd_alloc.h>
55#include <sphinxbase/bio.h>
56#include <sphinxbase/err.h>
57#include <sphinxbase/prim_type.h>
58
59/* Local headers */
60#include "tied_mgau_common.h"
61#include "ptm_mgau.h"
62
63static ps_mgaufuncs_t ptm_mgau_funcs = {
64 "ptm",
65 ptm_mgau_frame_eval, /* frame_eval */
66 ptm_mgau_mllr_transform, /* transform */
67 ptm_mgau_free /* free */
68};
69
70#define COMPUTE_GMM_MAP(_idx) \
71 diff[_idx] = obs[_idx] - mean[_idx]; \
72 sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \
73 compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
74#define COMPUTE_GMM_REDUCE(_idx) \
75 d = GMMSUB(d, compl[_idx]);
76
77static void
78insertion_sort_topn(ptm_topn_t *topn, int i, int32 d)
79{
80 ptm_topn_t vtmp;
81 int j;
82
83 topn[i].score = d;
84 if (i == 0)
85 return;
86 vtmp = topn[i];
87 for (j = i - 1; j >= 0 && d > topn[j].score; j--) {
88 topn[j + 1] = topn[j];
89 }
90 topn[j + 1] = vtmp;
91}
92
93static int
94eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
95{
96 ptm_topn_t *topn;
97 int i, ceplen;
98
99 topn = s->f->topn[cb][feat];
100 ceplen = s->g->featlen[feat];
101
102 for (i = 0; i < s->max_topn; i++) {
103 mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
104 mfcc_t *var, d;
105 mfcc_t *obs;
106 int32 cw, j;
107
108 cw = topn[i].cw;
109 mean = s->g->mean[cb][feat][0] + cw * ceplen;
110 var = s->g->var[cb][feat][0] + cw * ceplen;
111 d = s->g->det[cb][feat][cw];
112 obs = z;
113 for (j = 0; j < ceplen % 4; ++j) {
114 diff[0] = *obs++ - *mean++;
115 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
116 compl[0] = MFCCMUL(sqdiff[0], *var);
117 d = GMMSUB(d, compl[0]);
118 ++var;
119 }
120 /* We could vectorize this but it's unlikely to make much
121 * difference as the outer loop here isn't very big. */
122 for (;j < ceplen; j += 4) {
123 COMPUTE_GMM_MAP(0);
124 COMPUTE_GMM_MAP(1);
125 COMPUTE_GMM_MAP(2);
126 COMPUTE_GMM_MAP(3);
127 COMPUTE_GMM_REDUCE(0);
128 COMPUTE_GMM_REDUCE(1);
129 COMPUTE_GMM_REDUCE(2);
130 COMPUTE_GMM_REDUCE(3);
131 var += 4;
132 obs += 4;
133 mean += 4;
134 }
135 insertion_sort_topn(topn, i, (int32)d);
136 }
137
138 return topn[0].score;
139}
140
141/* This looks bad, but it actually isn't. Less than 1% of eval_cb's
142 * time is spent doing this. */
143static void
144insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best,
145 int cw, int32 intd)
146{
147 for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur)
148 memcpy(*cur + 1, *cur, sizeof(**cur));
149 ++*cur;
150 (*cur)->cw = cw;
151 (*cur)->score = intd;
152}
153
154static int
155eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
156{
157 ptm_topn_t *worst, *best, *topn;
158 mfcc_t *mean;
159 mfcc_t *var, *det, *detP, *detE;
160 int32 i, ceplen;
161
162 best = topn = s->f->topn[cb][feat];
163 worst = topn + (s->max_topn - 1);
164 mean = s->g->mean[cb][feat][0];
165 var = s->g->var[cb][feat][0];
166 det = s->g->det[cb][feat];
167 detE = det + s->g->n_density;
168 ceplen = s->g->featlen[feat];
169
170 for (detP = det; detP < detE; ++detP) {
171 mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
172 mfcc_t d, thresh;
173 mfcc_t *obs;
174 ptm_topn_t *cur;
175 int32 cw, j;
176
177 d = *detP;
178 thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */
179 obs = z;
180 cw = (int)(detP - det);
181
182 /* Unroll the loop starting with the first dimension(s). In
183 * theory this might be a bit faster if this Gaussian gets
184 * "knocked out" by C0. In practice not. */
185 for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
186 diff[0] = *obs++ - *mean++;
187 sqdiff[0] = MFCCMUL(diff[0], diff[0]);
188 compl[0] = MFCCMUL(sqdiff[0], *var++);
189 d = GMMSUB(d, compl[0]);
190 }
191 /* Now do 4 dimensions at a time. You'd think that GCC would
192 * vectorize this? Apparently not. And it's right, because
193 * that won't make this any faster, at least on x86-64. */
194 for (; j < ceplen && d >= thresh; j += 4) {
195 COMPUTE_GMM_MAP(0);
196 COMPUTE_GMM_MAP(1);
197 COMPUTE_GMM_MAP(2);
198 COMPUTE_GMM_MAP(3);
199 COMPUTE_GMM_REDUCE(0);
200 COMPUTE_GMM_REDUCE(1);
201 COMPUTE_GMM_REDUCE(2);
202 COMPUTE_GMM_REDUCE(3);
203 var += 4;
204 obs += 4;
205 mean += 4;
206 }
207 if (j < ceplen) {
208 /* terminated early, so not in topn */
209 mean += (ceplen - j);
210 var += (ceplen - j);
211 continue;
212 }
213 if (d < thresh)
214 continue;
215 for (i = 0; i < s->max_topn; i++) {
216 /* already there, so don't need to insert */
217 if (topn[i].cw == cw)
218 break;
219 }
220 if (i < s->max_topn)
221 continue; /* already there. Don't insert */
222 insertion_sort_cb(&cur, worst, best, cw, (int32)d);
223 }
224
225 return best->score;
226}
227
231static int
232ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame)
233{
234 int i, j;
235
236 /* First evaluate top-N from previous frame. */
237 for (i = 0; i < s->g->n_mgau; ++i)
238 for (j = 0; j < s->g->n_feat; ++j)
239 eval_topn(s, i, j, z[j]);
240
241 /* If frame downsampling is in effect, possibly do nothing else. */
242 if (frame % s->ds_ratio)
243 return 0;
244
245 /* Evaluate remaining codebooks. */
246 for (i = 0; i < s->g->n_mgau; ++i) {
247 if (bitvec_is_clear(s->f->mgau_active, i))
248 continue;
249 for (j = 0; j < s->g->n_feat; ++j) {
250 eval_cb(s, i, j, z[j]);
251 }
252 }
253 return 0;
254}
255
265static int
266ptm_mgau_codebook_norm(ptm_mgau_t *s, mfcc_t **z, int frame)
267{
268 int i, j;
269
270 for (j = 0; j < s->g->n_feat; ++j) {
271 int32 norm = WORST_SCORE;
272 for (i = 0; i < s->g->n_mgau; ++i) {
273 if (bitvec_is_clear(s->f->mgau_active, i))
274 continue;
275 if (norm < s->f->topn[i][j][0].score >> SENSCR_SHIFT)
276 norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT;
277 }
278 assert(norm != WORST_SCORE);
279 for (i = 0; i < s->g->n_mgau; ++i) {
280 int32 k;
281 if (bitvec_is_clear(s->f->mgau_active, i))
282 continue;
283 for (k = 0; k < s->max_topn; ++k) {
284 s->f->topn[i][j][k].score >>= SENSCR_SHIFT;
285 s->f->topn[i][j][k].score -= norm;
286 s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score;
287 if (s->f->topn[i][j][k].score > MAX_NEG_ASCR)
288 s->f->topn[i][j][k].score = MAX_NEG_ASCR;
289 }
290 }
291 }
292
293 return 0;
294}
295
296static int
297ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active,
298 int32 n_senone_active, int compallsen)
299{
300 int i, lastsen;
301
302 if (compallsen) {
303 bitvec_set_all(s->f->mgau_active, s->g->n_mgau);
304 return 0;
305 }
306 bitvec_clear_all(s->f->mgau_active, s->g->n_mgau);
307 for (lastsen = i = 0; i < n_senone_active; ++i) {
308 int sen = senone_active[i] + lastsen;
309 int cb = s->sen2cb[sen];
310 bitvec_set(s->f->mgau_active, cb);
311 lastsen = sen;
312 }
313 E_DEBUG(1, ("Active codebooks:"));
314 for (i = 0; i < s->g->n_mgau; ++i) {
315 if (bitvec_is_clear(s->f->mgau_active, i))
316 continue;
317 E_DEBUGCONT(1, (" %d", i));
318 }
319 E_DEBUGCONT(1, ("\n"));
320 return 0;
321}
322
326static int
327ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores,
328 uint8 *senone_active, int32 n_senone_active,
329 int compall)
330{
331 int i, lastsen, bestscore;
332
333 memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
334 /* FIXME: This is the non-cache-efficient way to do this. We want
335 * to evaluate one codeword at a time but this requires us to have
336 * a reverse codebook to senone mapping, which we don't have
337 * (yet), since different codebooks have different top-N
338 * codewords. */
339 if (compall)
340 n_senone_active = s->n_sen;
341 bestscore = 0x7fffffff;
342 for (lastsen = i = 0; i < n_senone_active; ++i) {
343 int sen, f, cb;
344 int ascore;
345
346 if (compall)
347 sen = i;
348 else
349 sen = senone_active[i] + lastsen;
350 lastsen = sen;
351 cb = s->sen2cb[sen];
352
353 if (bitvec_is_clear(s->f->mgau_active, cb)) {
354 int j;
355 /* Because senone_active is deltas we can't really "knock
356 * out" senones from pruned codebooks, and in any case,
357 * it wouldn't make any difference to the search code,
358 * which doesn't expect senone_active to change. */
359 for (f = 0; f < s->g->n_feat; ++f) {
360 for (j = 0; j < s->max_topn; ++j) {
361 s->f->topn[cb][f][j].score = MAX_NEG_ASCR;
362 }
363 }
364 }
365 /* For each feature, log-sum codeword scores + mixw to get
366 * feature density, then sum (multiply) to get ascore */
367 ascore = 0;
368 for (f = 0; f < s->g->n_feat; ++f) {
369 ptm_topn_t *topn;
370 int j, fden = 0;
371 topn = s->f->topn[cb][f];
372 for (j = 0; j < s->max_topn; ++j) {
373 int mixw;
374 /* Find mixture weight for this codeword. */
375 if (s->mixw_cb) {
376 int dcw = s->mixw[f][topn[j].cw][sen/2];
377 dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
378 mixw = s->mixw_cb[dcw];
379 }
380 else {
381 mixw = s->mixw[f][topn[j].cw][sen];
382 }
383 if (j == 0)
384 fden = mixw + topn[j].score;
385 else
386 fden = fast_logmath_add(s->lmath_8b, fden,
387 mixw + topn[j].score);
388 E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n",
389 sen, f, mixw, topn[j].score, fden));
390 }
391 ascore += fden;
392 }
393 if (ascore < bestscore) bestscore = ascore;
394 senone_scores[sen] = ascore;
395 }
396 /* Normalize the scores again (finishing the job we started above
397 * in ptm_mgau_codebook_eval...) */
398 for (i = 0; i < s->n_sen; ++i) {
399 senone_scores[i] -= bestscore;
400 }
401
402 return 0;
403}
404
408int32
409ptm_mgau_frame_eval(ps_mgau_t *ps,
410 int16 *senone_scores,
411 uint8 *senone_active,
412 int32 n_senone_active,
413 mfcc_t ** featbuf, int32 frame,
414 int32 compallsen)
415{
416 ptm_mgau_t *s = (ptm_mgau_t *)ps;
417 int fast_eval_idx;
418
419 /* Find the appropriate frame in the rotating history buffer
420 * corresponding to the requested input frame. No bounds checking
421 * is done here, which just means you'll get semi-random crap if
422 * you request a frame in the future or one that's too far in the
423 * past. Since the history buffer is just used for fast match
424 * that might not be fatal. */
425 fast_eval_idx = frame % s->n_fast_hist;
426 s->f = s->hist + fast_eval_idx;
427 /* Compute the top-N codewords for every codebook, unless this
428 * is a past frame, in which case we already have them (we
429 * hope!) */
430 if (frame >= ps_mgau_base(ps)->frame_idx) {
431 ptm_fast_eval_t *lastf;
432 /* Get the previous frame's top-N information (on the
433 * first frame of the input this is just all WORST_DIST,
434 * no harm in that) */
435 if (fast_eval_idx == 0)
436 lastf = s->hist + s->n_fast_hist - 1;
437 else
438 lastf = s->hist + fast_eval_idx - 1;
439 /* Copy in initial top-N info */
440 memcpy(s->f->topn[0][0], lastf->topn[0][0],
441 s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t));
442 /* Generate initial active codebook list (this might not be
443 * necessary) */
444 ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
445 /* Now evaluate top-N, prune, and evaluate remaining codebooks. */
446 ptm_mgau_codebook_eval(s, featbuf, frame);
447 ptm_mgau_codebook_norm(s, featbuf, frame);
448 }
449 /* Evaluate intersection of active senones and active codebooks. */
450 ptm_mgau_senone_eval(s, senone_scores, senone_active,
451 n_senone_active, compallsen);
452
453 return 0;
454}
455
456static int32
457read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file)
458{
459 FILE *fp;
460 char line[1000];
461 int32 i, n, r, c;
462 int32 do_swap, do_mmap;
463 size_t offset;
464 int n_clust = 0;
465 int n_feat = s->g->n_feat;
466 int n_density = s->g->n_density;
467 int n_sen = bin_mdef_n_sen(mdef);
468 int n_bits = 8;
469
470 s->n_sen = n_sen; /* FIXME: Should have been done earlier */
471 do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
472
473 if ((fp = fopen(file, "rb")) == NULL)
474 return -1;
475
476 E_INFO("Loading senones from dump file %s\n", file);
477 /* Read title size, title */
478 if (fread(&n, sizeof(int32), 1, fp) != 1) {
479 E_ERROR_SYSTEM("Failed to read title size from %s", file);
480 goto error_out;
481 }
482 /* This is extremely bogus */
483 do_swap = 0;
484 if (n < 1 || n > 999) {
485 SWAP_INT32(&n);
486 if (n < 1 || n > 999) {
487 E_ERROR("Title length %x in dump file %s out of range\n", n, file);
488 goto error_out;
489 }
490 do_swap = 1;
491 }
492 if (fread(line, sizeof(char), n, fp) != n) {
493 E_ERROR_SYSTEM("Cannot read title");
494 goto error_out;
495 }
496 if (line[n - 1] != '\0') {
497 E_ERROR("Bad title in dump file\n");
498 goto error_out;
499 }
500 E_INFO("%s\n", line);
501
502 /* Read header size, header */
503 if (fread(&n, sizeof(n), 1, fp) != 1) {
504 E_ERROR_SYSTEM("Failed to read header size from %s", file);
505 goto error_out;
506 }
507 if (do_swap) SWAP_INT32(&n);
508 if (fread(line, sizeof(char), n, fp) != n) {
509 E_ERROR_SYSTEM("Cannot read header");
510 goto error_out;
511 }
512 if (line[n - 1] != '\0') {
513 E_ERROR("Bad header in dump file\n");
514 goto error_out;
515 }
516
517 /* Read other header strings until string length = 0 */
518 for (;;) {
519 if (fread(&n, sizeof(n), 1, fp) != 1) {
520 E_ERROR_SYSTEM("Failed to read header string size from %s", file);
521 goto error_out;
522 }
523 if (do_swap) SWAP_INT32(&n);
524 if (n == 0)
525 break;
526 if (fread(line, sizeof(char), n, fp) != n) {
527 E_ERROR_SYSTEM("Cannot read header");
528 goto error_out;
529 }
530 /* Look for a cluster count, if present */
531 if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
532 n_feat = atoi(line + strlen("feature_count "));
533 }
534 if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
535 n_density = atoi(line + strlen("mixture_count "));
536 }
537 if (!strncmp(line, "model_count ", strlen("model_count "))) {
538 n_sen = atoi(line + strlen("model_count "));
539 }
540 if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
541 n_clust = atoi(line + strlen("cluster_count "));
542 }
543 if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
544 n_bits = atoi(line + strlen("cluster_bits "));
545 }
546 }
547
548 /* Defaults for #rows, #columns in mixw array. */
549 c = n_sen;
550 r = n_density;
551 if (n_clust == 0) {
552 /* Older mixw files have them here, and they might be padded. */
553 if (fread(&r, sizeof(r), 1, fp) != 1) {
554 E_ERROR_SYSTEM("Cannot read #rows");
555 goto error_out;
556 }
557 if (do_swap) SWAP_INT32(&r);
558 if (fread(&c, sizeof(c), 1, fp) != 1) {
559 E_ERROR_SYSTEM("Cannot read #columns");
560 goto error_out;
561 }
562 if (do_swap) SWAP_INT32(&c);
563 E_INFO("Rows: %d, Columns: %d\n", r, c);
564 }
565
566 if (n_feat != s->g->n_feat) {
567 E_ERROR("Number of feature streams mismatch: %d != %d\n",
568 n_feat, s->g->n_feat);
569 goto error_out;
570 }
571 if (n_density != s->g->n_density) {
572 E_ERROR("Number of densities mismatch: %d != %d\n",
573 n_density, s->g->n_density);
574 goto error_out;
575 }
576 if (n_sen != s->n_sen) {
577 E_ERROR("Number of senones mismatch: %d != %d\n",
578 n_sen, s->n_sen);
579 goto error_out;
580 }
581
582 if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
583 E_ERROR("Cluster count must be 0, 15, or 16\n");
584 goto error_out;
585 }
586 if (n_clust == 15)
587 ++n_clust;
588
589 if (!((n_bits == 8) || (n_bits == 4))) {
590 E_ERROR("Cluster count must be 4 or 8\n");
591 goto error_out;
592 }
593
594 if (do_mmap) {
595 E_INFO("Using memory-mapped I/O for senones\n");
596 }
597 offset = ftell(fp);
598
599 /* Allocate memory for pdfs (or memory map them) */
600 if (do_mmap) {
601 s->sendump_mmap = mmio_file_read(file);
602 /* Get cluster codebook if any. */
603 if (n_clust) {
604 s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
605 offset += n_clust;
606 }
607 }
608 else {
609 /* Get cluster codebook if any. */
610 if (n_clust) {
611 s->mixw_cb = ckd_calloc(1, n_clust);
612 if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
613 E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
614 goto error_out;
615 }
616 }
617 }
618
619 /* Set up pointers, or read, or whatever */
620 if (s->sendump_mmap) {
621 s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
622 for (n = 0; n < n_feat; n++) {
623 int step = c;
624 if (n_bits == 4)
625 step = (step + 1) / 2;
626 for (i = 0; i < r; i++) {
627 s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
628 offset += step;
629 }
630 }
631 }
632 else {
633 s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
634 /* Read pdf values and ids */
635 for (n = 0; n < n_feat; n++) {
636 int step = c;
637 if (n_bits == 4)
638 step = (step + 1) / 2;
639 for (i = 0; i < r; i++) {
640 if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
641 != (size_t) step) {
642 E_ERROR("Failed to read %d bytes from sendump\n", step);
643 goto error_out;
644 }
645 }
646 }
647 }
648
649 fclose(fp);
650 return 0;
651error_out:
652 fclose(fp);
653 return -1;
654}
655
656static int32
657read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin)
658{
659 char **argname, **argval;
660 char eofchk;
661 FILE *fp;
662 int32 byteswap, chksum_present;
663 uint32 chksum;
664 float32 *pdf;
665 int32 i, f, c, n;
666 int32 n_sen;
667 int32 n_feat;
668 int32 n_comp;
669 int32 n_err;
670
671 E_INFO("Reading mixture weights file '%s'\n", file_name);
672
673 if ((fp = fopen(file_name, "rb")) == NULL)
674 E_FATAL_SYSTEM("Failed to open mixture file '%s' for reading", file_name);
675
676 /* Read header, including argument-value info and 32-bit byteorder magic */
677 if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
678 E_FATAL("Failed to read header from '%s'\n", file_name);
679
680 /* Parse argument-value list */
681 chksum_present = 0;
682 for (i = 0; argname[i]; i++) {
683 if (strcmp(argname[i], "version") == 0) {
684 if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
685 E_WARN("Version mismatch(%s): %s, expecting %s\n",
686 file_name, argval[i], MGAU_MIXW_VERSION);
687 }
688 else if (strcmp(argname[i], "chksum0") == 0) {
689 chksum_present = 1; /* Ignore the associated value */
690 }
691 }
692 bio_hdrarg_free(argname, argval);
693 argname = argval = NULL;
694
695 chksum = 0;
696
697 /* Read #senones, #features, #codewords, arraysize */
698 if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
699 || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
700 1)
701 || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
702 1)
703 || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
704 E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
705 }
706 if (n_feat != s->g->n_feat)
707 E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
708 if (n != n_sen * n_feat * n_comp) {
709 E_FATAL
710 ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
711 file_name, i, n_sen, n_feat, n_comp);
712 }
713
714 /* n_sen = number of mixture weights per codeword, which is
715 * fixed at the number of senones since we have only one codebook.
716 */
717 s->n_sen = n_sen;
718
719 /* Quantized mixture weight arrays. */
720 s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density,
721 n_sen, sizeof(***s->mixw));
722
723 /* Temporary structure to read in floats before conversion to (int32) logs3 */
724 pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
725
726 /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
727 n_err = 0;
728 for (i = 0; i < n_sen; i++) {
729 for (f = 0; f < n_feat; f++) {
730 if (bio_fread((void *) pdf, sizeof(float32),
731 n_comp, fp, byteswap, &chksum) != n_comp) {
732 E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
733 }
734
735 /* Normalize and floor */
736 if (vector_sum_norm(pdf, n_comp) <= 0.0)
737 n_err++;
738 vector_floor(pdf, n_comp, SmoothMin);
739 vector_sum_norm(pdf, n_comp);
740
741 /* Convert to LOG, quantize, and transpose */
742 for (c = 0; c < n_comp; c++) {
743 int32 qscr;
744
745 qscr = -logmath_log(s->lmath_8b, pdf[c]);
746 if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
747 qscr = MAX_NEG_MIXW;
748 s->mixw[f][c][i] = qscr;
749 }
750 }
751 }
752 if (n_err > 0)
753 E_WARN("Weight normalization failed for %d mixture weights components\n", n_err);
754
755 ckd_free(pdf);
756
757 if (chksum_present)
758 bio_verify_chksum(fp, byteswap, chksum);
759
760 if (fread(&eofchk, 1, 1, fp) == 1)
761 E_FATAL("More data than expected in %s\n", file_name);
762
763 fclose(fp);
764
765 E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
766 return n_sen;
767}
768
769ps_mgau_t *
770ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef)
771{
772 ptm_mgau_t *s;
773 ps_mgau_t *ps;
774 char const *sendump_path;
775 int i;
776
777 s = ckd_calloc(1, sizeof(*s));
778 s->config = acmod->config;
779
780 s->lmath = logmath_retain(acmod->lmath);
781 /* Log-add table. */
782 s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
783 if (s->lmath_8b == NULL)
784 goto error_out;
785 /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
786 if (logmath_get_width(s->lmath_8b) != 1) {
787 E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
788 logmath_get_base(s->lmath_8b));
789 goto error_out;
790 }
791
792 /* Read means and variances. */
793 if ((s->g = gauden_init(cmd_ln_str_r(s->config, "_mean"),
794 cmd_ln_str_r(s->config, "_var"),
795 cmd_ln_float32_r(s->config, "-varfloor"),
796 s->lmath)) == NULL) {
797 E_ERROR("Failed to read means and variances\n");
798 goto error_out;
799 }
800
801 /* We only support 256 codebooks or less (like 640k or 2GB, this
802 * should be enough for anyone) */
803 if (s->g->n_mgau > 256) {
804 E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau);
805 goto error_out;
806 }
807 if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) {
808 E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef));
809 goto error_out;
810 }
811 /* Verify n_feat and veclen, against acmod. */
812 if (s->g->n_feat != feat_dimension1(acmod->fcb)) {
813 E_ERROR("Number of streams does not match: %d != %d\n",
814 s->g->n_feat, feat_dimension1(acmod->fcb));
815 goto error_out;
816 }
817 for (i = 0; i < s->g->n_feat; ++i) {
818 if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
819 E_ERROR("Dimension of stream %d does not match: %d != %d\n",
820 s->g->featlen[i], feat_dimension2(acmod->fcb, i));
821 goto error_out;
822 }
823 }
824 /* Read mixture weights. */
825 if ((sendump_path = cmd_ln_str_r(s->config, "_sendump"))) {
826 if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
827 goto error_out;
828 }
829 }
830 else {
831 if (read_mixw(s, cmd_ln_str_r(s->config, "_mixw"),
832 cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
833 goto error_out;
834 }
835 }
836 s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
837 s->max_topn = cmd_ln_int32_r(s->config, "-topn");
838 E_INFO("Maximum top-N: %d\n", s->max_topn);
839
840 /* Assume mapping of senones to their base phones, though this
841 * will become more flexible in the future. */
842 s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb));
843 for (i = 0; i < s->n_sen; ++i)
844 s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i);
845
846 /* Allocate fast-match history buffers. We need enough for the
847 * phoneme lookahead window, plus the current frame, plus one for
848 * good measure? (FIXME: I don't remember why) */
849 s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
850 s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist));
851 /* s->f will be a rotating pointer into s->hist. */
852 s->f = s->hist;
853 for (i = 0; i < s->n_fast_hist; ++i) {
854 int j, k, m;
855 /* Top-N codewords for every codebook and feature. */
856 s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat,
857 s->max_topn, sizeof(ptm_topn_t));
858 /* Initialize them to sane (yet arbitrary) defaults. */
859 for (j = 0; j < s->g->n_mgau; ++j) {
860 for (k = 0; k < s->g->n_feat; ++k) {
861 for (m = 0; m < s->max_topn; ++m) {
862 s->hist[i].topn[j][k][m].cw = m;
863 s->hist[i].topn[j][k][m].score = WORST_DIST;
864 }
865 }
866 }
867 /* Active codebook mapping (just codebook, not features,
868 at least not yet) */
869 s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau);
870 /* Start with them all on, prune them later. */
871 bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau);
872 }
873
874 ps = (ps_mgau_t *)s;
875 ps->vt = &ptm_mgau_funcs;
876 return ps;
877error_out:
878 ptm_mgau_free(ps_mgau_base(s));
879 return NULL;
880}
881
882int
883ptm_mgau_mllr_transform(ps_mgau_t *ps,
884 ps_mllr_t *mllr)
885{
886 ptm_mgau_t *s = (ptm_mgau_t *)ps;
887 return gauden_mllr_transform(s->g, mllr, s->config);
888}
889
890void
891ptm_mgau_free(ps_mgau_t *ps)
892{
893 int i;
894 ptm_mgau_t *s = (ptm_mgau_t *)ps;
895
896 logmath_free(s->lmath);
897 logmath_free(s->lmath_8b);
898 if (s->sendump_mmap) {
899 ckd_free_2d(s->mixw);
900 mmio_file_unmap(s->sendump_mmap);
901 }
902 else {
903 ckd_free_3d(s->mixw);
904 }
905 ckd_free(s->sen2cb);
906
907 for (i = 0; i < s->n_fast_hist; i++) {
908 ckd_free_3d(s->hist[i].topn);
909 bitvec_free(s->hist[i].mgau_active);
910 }
911 ckd_free(s->hist);
912
913 gauden_free(s->g);
914 ckd_free(s);
915}
#define WORST_SCORE
Large "bad" score.
Definition hmm.h:84
#define SENSCR_SHIFT
Shift count for senone scores.
Definition hmm.h:73
Fast phonetically-tied mixture evaluation.
Acoustic model structure.
Definition acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition acmod.h:159
cmd_ln_t * config
Configuration.
Definition acmod.h:150
feat_t * fcb
Dynamic feature computation.
Definition acmod.h:156
logmath_t * lmath
Log-math computation.
Definition acmod.h:151
mfcc_t **** var
like mean; diagonal covariance vector only
Definition ms_gauden.h:84
mfcc_t *** det
log(determinant) for each variance vector; actually, log(sqrt(2*pi*det))
Definition ms_gauden.h:85
int32 n_feat
Number feature streams in each codebook.
Definition ms_gauden.h:89
mfcc_t **** mean
mean[codebook][feature][codeword] vector
Definition ms_gauden.h:83
int32 n_density
Number gaussian densities in each codebook-feature stream.
Definition ms_gauden.h:90
int32 * featlen
feature length for each feature
Definition ms_gauden.h:91
int32 n_mgau
Number codebooks.
Definition ms_gauden.h:88
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition acmod.h:114
Feature space linear transform structure.
Definition acmod.h:82
ptm_topn_t *** topn
Top-N for each codebook (mgau x feature x topn)
Definition ptm_mgau.h:64
bitvec_t * mgau_active
Set of active codebooks.
Definition ptm_mgau.h:65
uint8 * sen2cb
Senone to codebook mapping.
Definition ptm_mgau.h:73
ptm_fast_eval_t * hist
Fast evaluation info for past frames.
Definition ptm_mgau.h:80
cmd_ln_t * config
Configuration parameters.
Definition ptm_mgau.h:70
ptm_fast_eval_t * f
Fast eval info for current frame.
Definition ptm_mgau.h:81
int32 n_sen
Number of senones.
Definition ptm_mgau.h:72
int n_fast_hist
Number of past frames tracked.
Definition ptm_mgau.h:82
gauden_t * g
Set of Gaussians.
Definition ptm_mgau.h:71
uint8 *** mixw
Mixture weight distributions by feature, codeword, senone.
Definition ptm_mgau.h:74
int32 cw
Codeword index.
Definition ptm_mgau.h:59
int32 score
Score.
Definition ptm_mgau.h:60
Common code shared between SC and PTM (tied-state) models.
#define GMMSUB(a, b)
Subtract GMM component b (assumed to be positive) and saturate.
LOGMATH_INLINE int fast_logmath_add(logmath_t *lmath, int mlx, int mly)
Quickly log-add two negated log probabilities.
#define MAX_NEG_ASCR
Maximum negated acoustic score value.
#define MAX_NEG_MIXW
Maximum negated mixture weight value.