aboutsummaryrefslogtreecommitdiff
path: root/gfsm/gfsm/src/libgfsm/gfsmLookup.c
blob: 9b99d8fb868cf1c2b84664fde01dde28654355ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481

/*=============================================================================*\
 * File: gfsmLookup.c
 * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
 * Description: finite state machine library
 *
 * Copyright (c) 2005-2007 Bryan Jurish.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *=============================================================================*/

#include <gfsmLookup.h>

#include <gfsmAlphabet.h>
#include <gfsmState.h>
#include <gfsmArc.h>
#include <gfsmArcIter.h>

#include <string.h>

/*======================================================================
 * Constants
 */
const gfsmStateId gfsmLookupStateMapGet = 16;

/*======================================================================
 * Methods: lookup
 */

//--------------------------------------------------------------
gfsmAutomaton *gfsm_automaton_lookup_full(gfsmAutomaton     *fst,
					  gfsmLabelVector   *input,
					  gfsmAutomaton     *result,
					  gfsmStateIdVector *statemap)
{
  GSList           *stack = NULL;
  gfsmLookupConfig *cfg   = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1);
  gfsmLookupConfig *cfg_new;
  const gfsmState  *qt;
  gfsmState        *qr;
  gfsmLabelVal      a;
  gfsmArcIter       ai;

  //-- ensure result automaton exists and is clear
  if (result==NULL) {
    result = gfsm_automaton_shadow(fst);
  } else {
    gfsm_automaton_clear(result);
  }
  result->flags.is_transducer = TRUE;

  //-- initialization
  result->root_id = gfsm_automaton_add_state(result);
  cfg->qt = fst->root_id;
  cfg->qr = result->root_id;
  cfg->i  = 0;
  stack = g_slist_prepend(stack, cfg);

  //-- ye olde loope
  while (stack != NULL) {
    //-- pop the top element off the stack
    cfg   = (gfsmLookupConfig*)(stack->data);
    stack = g_slist_delete_link(stack, stack);

    //-- add config to the state-map, if non-NULL
    if (statemap) {
      if (cfg->qr >= statemap->len) {
	g_ptr_array_set_size(statemap, cfg->qr + gfsmLookupStateMapGet);
      }
      g_ptr_array_index(statemap, cfg->qr) = GUINT_TO_POINTER(cfg->qt);
    }

    //-- get states
    qt = gfsm_automaton_find_state_const(fst,    cfg->qt);
    qr = gfsm_automaton_find_state      (result, cfg->qr);
    a  = (cfg->i < input->len
	  ? (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(input, cfg->i))
	  : gfsmNoLabel);

    //-- check for final states
    if (cfg->i >= input->len && gfsm_state_is_final(qt)) {
      gfsm_automaton_set_final_state_full(result, cfg->qr, TRUE,
					  gfsm_automaton_get_final_weight(fst, cfg->qt));
    }

    //-- handle outgoing arcs
    for (gfsm_arciter_open_ptr(&ai, fst, (gfsmState*)qt); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai))
      {
	gfsmArc *arc = gfsm_arciter_arc(&ai);

	//-- epsilon arcs
	if (arc->lower == gfsmEpsilon) {
	  cfg_new = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1);
	  cfg_new->qt = arc->target;
	  cfg_new->qr = gfsm_automaton_add_state(result);
	  cfg_new->i  = cfg->i;
	  gfsm_automaton_add_arc(result, cfg->qr, cfg_new->qr, arc->lower, arc->upper, arc->weight);
	  stack = g_slist_prepend(stack, cfg_new);
	}
	//-- input-matching arcs
	else if (a != gfsmNoLabel && arc->lower == a) {
	  cfg_new = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1);
	  cfg_new->qt = arc->target;
	  cfg_new->qr = gfsm_automaton_add_state(result);
	  cfg_new->i  = cfg->i+1;
	  gfsm_automaton_add_arc(result, cfg->qr, cfg_new->qr, arc->lower, arc->upper, arc->weight);
	  stack = g_slist_prepend(stack, cfg_new);
	}
      }

    //-- we're done with this config
    g_free(cfg);
  }

  //-- set final size of the state-map
  if (statemap) { statemap->len = result->states->len; }
  
  return result;
}


/*======================================================================
 * Methods: Viterbi
 */


//--------------------------------------------------------------
gfsmAutomaton *gfsm_automaton_lookup_viterbi_full(gfsmAutomaton     *fst,
						  gfsmLabelVector   *input,
						  gfsmAutomaton     *trellis,
						  gfsmStateIdVector *trellis2fst)
{
  //-- cols: array of (GSList <gfsmViterbiConfig*> *)
  gfsmViterbiTable *cols = g_ptr_array_sized_new(input->len+1);
  GSList           *col, *prevcoli;
  gfsmViterbiMap   *fst2trellis = gfsm_viterbi_map_new();
  guint             i;
  gboolean          trellis2fst_is_tmp = FALSE;
  gfsmStateId       qid_trellis, qid_trellis_nxt, qid_fst;
  gpointer          ptr_qid_trellis_nxt;
  gfsmState        *q_trellis, *q_trellis_nxt, *q_fst;
  gfsmArcIter       ai;
  gfsmWeight        w_trellis;

  //-- ensure trellis automaton exists and is clear
  if (trellis==NULL) {
    trellis = gfsm_automaton_shadow(fst);
  } else {
    gfsm_automaton_clear(trellis);
  }
  trellis->flags.is_transducer = TRUE;

  //-- ensure trellis->fst stateid-map exists and is clear
  if (!trellis2fst) {
    trellis2fst = g_ptr_array_sized_new(input->len+1);
    trellis2fst_is_tmp = TRUE;
  } else if (trellis2fst->len < 2) {
    g_ptr_array_set_size(trellis2fst, input->len+1);
  }

  //-- initial config: trellis structure
  qid_trellis = trellis->root_id = gfsm_automaton_add_state(trellis);
  q_trellis = gfsm_automaton_find_state(trellis, qid_trellis);
  gfsm_automaton_set_final_state_full(trellis, qid_trellis, TRUE, fst->sr->one);
  gfsm_automaton_add_arc(trellis, qid_trellis, qid_trellis, gfsmNoLabel, gfsmNoLabel, fst->sr->one);

  //-- initial config: stateid-mappings
  g_ptr_array_index(trellis2fst, qid_trellis) = GUINT_TO_POINTER(fst->root_id);
  g_tree_insert(fst2trellis, GUINT_TO_POINTER(fst->root_id), GUINT_TO_POINTER(qid_trellis));

  //-- initial config: epsilon-expansion on column
  g_ptr_array_index(cols,0) = col = g_slist_prepend(NULL, GUINT_TO_POINTER(qid_trellis));
  _gfsm_viterbi_expand_column(fst, trellis, col, trellis2fst, fst2trellis);

  //-- initial config: cleanup
  gfsm_viterbi_map_free(fst2trellis);


  //-- ye olde loope: for each input character (i)
  for (i=0; i < input->len; i++) {
    gfsmLabelVal a  = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(input, i));

    fst2trellis = gfsm_viterbi_map_new();
    col         = NULL;

    //-- get possible successors
    for (prevcoli=(GSList*)g_ptr_array_index(cols,i); prevcoli != NULL; prevcoli=prevcoli->next) {

      //-- get the top element of the queue
      qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(prevcoli->data);
      qid_fst     = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst, qid_trellis));

      //-- get state pointers
      q_trellis = gfsm_automaton_find_state(trellis, qid_trellis);
      q_fst     = gfsm_automaton_find_state(fst,     qid_fst);

      //-- get Viterbi properties
      w_trellis = gfsm_viterbi_node_best_weight(q_trellis);


      //-- search for input-matching arcs & add them to the successor map for next column
      for (gfsm_arciter_open_ptr(&ai, fst, q_fst), gfsm_arciter_seek_lower(&ai,a);
	   gfsm_arciter_ok(&ai);
	   gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,a))
	{
	  gfsmArc     *arc_fst         = gfsm_arciter_arc(&ai);
	  gfsmWeight   w_trellis_nxt;
	  gpointer     orig_key;

	  //-- found a matching arc: is its target state already marked as a successor?
	  if (g_tree_lookup_extended(fst2trellis,
				     GUINT_TO_POINTER(arc_fst->target),
				     &orig_key,
				     &ptr_qid_trellis_nxt))
	    {
	      //-- yep: known successor: get old ("*_nxt") & new ("*_nxt_new") weights
	      gfsmWeight w_trellis_nxt_new = gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight);
	      qid_trellis_nxt = GPOINTER_TO_UINT(ptr_qid_trellis_nxt);
	      q_trellis_nxt   = gfsm_automaton_find_state(trellis, qid_trellis_nxt);
	      w_trellis_nxt   = gfsm_viterbi_node_best_weight(q_trellis_nxt);

	      //-- is the new path better than the stored path?
	      if (gfsm_sr_less(fst->sr, w_trellis_nxt_new, w_trellis_nxt)) {
		//-- yep: update mappings: trellis automaton
		gfsmArc *arc_trellis_nxt = gfsm_viterbi_node_arc(q_trellis_nxt);
		arc_trellis_nxt->target  = qid_trellis;
		arc_trellis_nxt->lower   = a;
		arc_trellis_nxt->upper   = arc_fst->upper;
		arc_trellis_nxt->weight  = w_trellis_nxt_new;

		//-- update mappings: trellis->fst stateid-map
		g_ptr_array_index(trellis2fst, qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);

		//-- update mappings: fst->trellis stateid-map
		g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));
	      }
	    }
	else
	  {
	    //-- target state not already marked as a successor: mark it
	    qid_trellis_nxt = gfsm_automaton_add_state(trellis);
	    q_trellis_nxt   = gfsm_automaton_find_state(trellis,qid_trellis_nxt);
	    gfsm_automaton_add_arc(trellis,
				   qid_trellis_nxt, qid_trellis,
				   a,               arc_fst->upper,
				   gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight));

	    //-- save trellis->fst stateid-map
	    if (qid_trellis_nxt >= trellis2fst->len) {
	      g_ptr_array_set_size(trellis2fst, qid_trellis_nxt + gfsmLookupStateMapGet);
	    }
	    g_ptr_array_index(trellis2fst,qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);

	    //-- save fst->trellis stateid-map
	    g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));

	    //-- add new trellis state to the column
	    col = g_slist_prepend(col, GUINT_TO_POINTER(qid_trellis_nxt));
	  }

	} //-- END: seek input-matching arcs
    } //-- END: previous column iteration (prevcoli)

    //-- expand epsilons in current column
    _gfsm_viterbi_expand_column(fst, trellis, col, trellis2fst, fst2trellis);

    //-- update column table
    g_ptr_array_index(cols,i+1) = col;

    //-- per-input-index cleanup
    gfsm_viterbi_map_free(fst2trellis);
  }

  //-- final iteration (EOS): get possible "final" states
  qid_trellis_nxt = gfsm_automaton_add_state(trellis); //-- qid_trellis_nxt: new root
  for (prevcoli=(GSList*)g_ptr_array_index(cols,input->len); prevcoli != NULL; prevcoli=prevcoli->next) {

    //-- get the top element of the queue
    qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(prevcoli->data);
    qid_fst     = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst, qid_trellis));
      
    //-- get state pointers
    q_trellis = gfsm_automaton_find_state(trellis, qid_trellis);
    q_fst     = gfsm_automaton_find_state(fst,     qid_fst);
    
    //-- get Viterbi properties
    w_trellis = gfsm_viterbi_node_best_weight(q_trellis);

    //-- check for finality
    if (q_fst->is_final) {
      gfsm_automaton_add_arc(trellis, qid_trellis_nxt, qid_trellis,
			     gfsmEpsilon, gfsmEpsilon,
			     gfsm_sr_times(fst->sr,
					   w_trellis,
					   gfsm_automaton_get_final_weight(fst,qid_fst)));
    }
  }

  //-- mark single best path from new root
  qid_trellis = qid_trellis_nxt;
  q_trellis = gfsm_automaton_find_state(trellis,qid_trellis);
  q_trellis->arcs = gfsm_arclist_sort(q_trellis->arcs,
				      &((gfsmArcCompData){gfsmASMWeight,fst->sr,NULL,NULL}));

  //-- break dummy arc on trellis final state (old root)
  q_trellis = gfsm_automaton_find_state(trellis,trellis->root_id);
  gfsm_arclist_free(q_trellis->arcs);
  q_trellis->arcs = NULL;

  //-- mark new root
  trellis->root_id = qid_trellis;


  //-- cleanup: columns
  for (i=0; i < cols->len; i++) {
    g_slist_free((GSList*)g_ptr_array_index(cols,i));
  }

  //-- cleanup: column array
  g_ptr_array_free(cols,TRUE);
  if (trellis2fst_is_tmp) g_ptr_array_free(trellis2fst,TRUE);
  else {
    //-- just set length
    trellis2fst->len = trellis->states->len;
  }

  return trellis;
}


/*======================================================================
 * Methods: Viterbi: expand_column
 */

//--------------------------------------------------------------
void _gfsm_viterbi_expand_column(gfsmAutomaton        *fst,
				 gfsmAutomaton        *trellis,
				 gfsmViterbiColumn    *col,
				 gfsmStateIdVector    *trellis2fst,
				 gfsmViterbiMap       *fst2trellis)
{
  gfsmArcIter ai;
  gfsmViterbiColumn *coli;
  gfsmStateId        qid_trellis, qid_fst;
  gfsmState         *q_trellis;
  gfsmArc           *arc_trellis;
  gfsmWeight         w_trellis;

  //-- pass-1: add everything already in the column as a literal
  /*
  for (coli=col; coli != NULL; coli=coli->next) {
    node = (gfsmViterbiNode*)(coli->data);
    if (!g_tree_lookup(fst2trellis,node->key)) {
      g_tree_insert(cmap,node->key,node->val);
    }
  }
  */

  //-- pass-2: add epsilon arcs from every literal in the column
  for (coli=col; coli != NULL; coli=coli->next) {
    //-- get node
    qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(coli->data);
    q_trellis   = gfsm_automaton_find_state(trellis,qid_trellis);
    arc_trellis = gfsm_viterbi_node_arc(q_trellis);
    w_trellis   = gfsm_viterbi_node_best_weight(q_trellis);
    qid_fst     = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst,qid_trellis));

    //-- search for input-epsilon arcs & add them to this column
    for (gfsm_arciter_open(&ai,fst,qid_fst), gfsm_arciter_seek_lower(&ai,gfsmEpsilon);
	 gfsm_arciter_ok(&ai);
	 gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,gfsmEpsilon))
      {
	gfsmArc     *arc_fst = gfsm_arciter_arc(&ai);
	gfsmStateId  qid_trellis_nxt = gfsmNoState;
	gpointer     ptr_qid_trellis_nxt;
	gfsmState   *q_trellis_nxt;
	gfsmWeight   w_trellis_nxt;
	gpointer     orig_key;

	//-- found an eps-arc: is its target state already marked as a successor?
	if (g_tree_lookup_extended(fst2trellis,
				   GUINT_TO_POINTER(arc_fst->target),
				   &orig_key,
				   &ptr_qid_trellis_nxt))
	  {
	    //-- yep: get the old ("*_eps") & new ("*_nxt") weights
	    gfsmWeight w_trellis_eps = gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight);
	    qid_trellis_nxt = GPOINTER_TO_UINT(ptr_qid_trellis_nxt);
	    q_trellis_nxt   = gfsm_automaton_find_state(trellis,qid_trellis_nxt);
	    w_trellis_nxt   = gfsm_viterbi_node_best_weight(q_trellis_nxt);

	    //-- is the new eps-path better than the stored path?
	    if (gfsm_sr_less(fst->sr,w_trellis_eps,w_trellis_nxt)) {
	      //-- yep: update mappings: trellis automaton
	      gfsmArc *arc_trellis_nxt = gfsm_viterbi_node_arc(q_trellis_nxt);
	      arc_trellis_nxt->target  = qid_trellis;
	      arc_trellis_nxt->lower   = gfsmEpsilon;
	      arc_trellis_nxt->upper   = arc_fst->upper;
	      arc_trellis_nxt->weight  = w_trellis_eps;

	      //-- update mappings: trellis->fst stateid-map
	      g_ptr_array_index(trellis2fst, qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);

	      //-- update mappings: fst->trellis stateid-map
	      g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));
	    }
	    else {
	      //-- eps-path is worse than the existing path: forget about it
	      ;
	    }
	  }
	else
	  {
	    //-- eps-target state not already marked as a successor: mark it
	    qid_trellis_nxt = gfsm_automaton_add_state(trellis);
	    q_trellis_nxt   = gfsm_automaton_find_state(trellis,qid_trellis_nxt);
	    gfsm_automaton_add_arc(trellis,
				   qid_trellis_nxt, qid_trellis,
				   gfsmEpsilon,     arc_fst->upper,
				   gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight));

	    //-- save trellis->fst stateid-map
	    if (qid_trellis_nxt >= trellis2fst->len) {
	      g_ptr_array_set_size(trellis2fst, qid_trellis_nxt + gfsmLookupStateMapGet);
	    }
	    g_ptr_array_index(trellis2fst,qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);

	    //-- save fst->trellis stateid-map
	    g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));

	    //-- queue-up new trellis state for eps-seek
	    coli->next = g_slist_prepend(coli->next, GUINT_TO_POINTER(qid_trellis_nxt));
	  }

      } //-- END: seek epsilon arcs

  } //-- END column iteration

}


/*======================================================================
 * Methods: Viterbi: Map
 */

//--------------------------------------------------------------
#if 0
gfsmViterbiNodeValue *gfsm_viterbi_column_map_insert_if_less(gfsmViterbiMap      *vmap,
							     gfsmViterbiNodeKey    key,
							     gfsmWeight            w,
							     gfsmSemiring         *sr)
{
  gpointer s_val;
  if (s_val = gfsm_viterbi_map_lookup(vmap,key)) {
    //-- already present
    if (!gfsm_sr_less(sr,w,s_val->w)) return NULL; //-- (s_val->w) <= (w)
    s_val->w = w;
  } else {
    //-- not already present: copy & insert
    s_val  = g_new(gfsmViterbiNodeValue,1);
    s_val->qtrellis = gfsmNoState;
    a_val->pqtrellis = gfsmNoState;
    s_val->w        = w;
    g_tree_insert(col,key,s_val);
  }
  return s_val; //-- update occurred
}
#endif