aboutsummaryrefslogtreecommitdiff
path: root/gfsm/gfsm/src/libgfsm/gfsmLookup.c
diff options
context:
space:
mode:
Diffstat (limited to 'gfsm/gfsm/src/libgfsm/gfsmLookup.c')
-rw-r--r--gfsm/gfsm/src/libgfsm/gfsmLookup.c481
1 files changed, 481 insertions, 0 deletions
diff --git a/gfsm/gfsm/src/libgfsm/gfsmLookup.c b/gfsm/gfsm/src/libgfsm/gfsmLookup.c
new file mode 100644
index 0000000..9b99d8f
--- /dev/null
+++ b/gfsm/gfsm/src/libgfsm/gfsmLookup.c
@@ -0,0 +1,481 @@
+
+/*=============================================================================*\
+ * File: gfsmLookup.c
+ * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
+ * Description: finite state machine library
+ *
+ * Copyright (c) 2005-2007 Bryan Jurish.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *=============================================================================*/
+
+#include <gfsmLookup.h>
+
+#include <gfsmAlphabet.h>
+#include <gfsmState.h>
+#include <gfsmArc.h>
+#include <gfsmArcIter.h>
+
+#include <string.h>
+
+/*======================================================================
+ * Constants
+ */
+const gfsmStateId gfsmLookupStateMapGet = 16;
+
+/*======================================================================
+ * Methods: lookup
+ */
+
+//--------------------------------------------------------------
+gfsmAutomaton *gfsm_automaton_lookup_full(gfsmAutomaton *fst,
+ gfsmLabelVector *input,
+ gfsmAutomaton *result,
+ gfsmStateIdVector *statemap)
+{
+ GSList *stack = NULL;
+ gfsmLookupConfig *cfg = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1);
+ gfsmLookupConfig *cfg_new;
+ const gfsmState *qt;
+ gfsmState *qr;
+ gfsmLabelVal a;
+ gfsmArcIter ai;
+
+ //-- ensure result automaton exists and is clear
+ if (result==NULL) {
+ result = gfsm_automaton_shadow(fst);
+ } else {
+ gfsm_automaton_clear(result);
+ }
+ result->flags.is_transducer = TRUE;
+
+ //-- initialization
+ result->root_id = gfsm_automaton_add_state(result);
+ cfg->qt = fst->root_id;
+ cfg->qr = result->root_id;
+ cfg->i = 0;
+ stack = g_slist_prepend(stack, cfg);
+
+ //-- ye olde loope
+ while (stack != NULL) {
+ //-- pop the top element off the stack
+ cfg = (gfsmLookupConfig*)(stack->data);
+ stack = g_slist_delete_link(stack, stack);
+
+ //-- add config to the state-map, if non-NULL
+ if (statemap) {
+ if (cfg->qr >= statemap->len) {
+ g_ptr_array_set_size(statemap, cfg->qr + gfsmLookupStateMapGet);
+ }
+ g_ptr_array_index(statemap, cfg->qr) = GUINT_TO_POINTER(cfg->qt);
+ }
+
+ //-- get states
+ qt = gfsm_automaton_find_state_const(fst, cfg->qt);
+ qr = gfsm_automaton_find_state (result, cfg->qr);
+ a = (cfg->i < input->len
+ ? (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(input, cfg->i))
+ : gfsmNoLabel);
+
+ //-- check for final states
+ if (cfg->i >= input->len && gfsm_state_is_final(qt)) {
+ gfsm_automaton_set_final_state_full(result, cfg->qr, TRUE,
+ gfsm_automaton_get_final_weight(fst, cfg->qt));
+ }
+
+ //-- handle outgoing arcs
+ for (gfsm_arciter_open_ptr(&ai, fst, (gfsmState*)qt); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai))
+ {
+ gfsmArc *arc = gfsm_arciter_arc(&ai);
+
+ //-- epsilon arcs
+ if (arc->lower == gfsmEpsilon) {
+ cfg_new = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1);
+ cfg_new->qt = arc->target;
+ cfg_new->qr = gfsm_automaton_add_state(result);
+ cfg_new->i = cfg->i;
+ gfsm_automaton_add_arc(result, cfg->qr, cfg_new->qr, arc->lower, arc->upper, arc->weight);
+ stack = g_slist_prepend(stack, cfg_new);
+ }
+ //-- input-matching arcs
+ else if (a != gfsmNoLabel && arc->lower == a) {
+ cfg_new = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1);
+ cfg_new->qt = arc->target;
+ cfg_new->qr = gfsm_automaton_add_state(result);
+ cfg_new->i = cfg->i+1;
+ gfsm_automaton_add_arc(result, cfg->qr, cfg_new->qr, arc->lower, arc->upper, arc->weight);
+ stack = g_slist_prepend(stack, cfg_new);
+ }
+ }
+
+ //-- we're done with this config
+ g_free(cfg);
+ }
+
+ //-- set final size of the state-map
+ if (statemap) { statemap->len = result->states->len; }
+
+ return result;
+}
+
+
+/*======================================================================
+ * Methods: Viterbi
+ */
+
+
+//--------------------------------------------------------------
+gfsmAutomaton *gfsm_automaton_lookup_viterbi_full(gfsmAutomaton *fst,
+ gfsmLabelVector *input,
+ gfsmAutomaton *trellis,
+ gfsmStateIdVector *trellis2fst)
+{
+ //-- cols: array of (GSList <gfsmViterbiConfig*> *)
+ gfsmViterbiTable *cols = g_ptr_array_sized_new(input->len+1);
+ GSList *col, *prevcoli;
+ gfsmViterbiMap *fst2trellis = gfsm_viterbi_map_new();
+ guint i;
+ gboolean trellis2fst_is_tmp = FALSE;
+ gfsmStateId qid_trellis, qid_trellis_nxt, qid_fst;
+ gpointer ptr_qid_trellis_nxt;
+ gfsmState *q_trellis, *q_trellis_nxt, *q_fst;
+ gfsmArcIter ai;
+ gfsmWeight w_trellis;
+
+ //-- ensure trellis automaton exists and is clear
+ if (trellis==NULL) {
+ trellis = gfsm_automaton_shadow(fst);
+ } else {
+ gfsm_automaton_clear(trellis);
+ }
+ trellis->flags.is_transducer = TRUE;
+
+ //-- ensure trellis->fst stateid-map exists and is clear
+ if (!trellis2fst) {
+ trellis2fst = g_ptr_array_sized_new(input->len+1);
+ trellis2fst_is_tmp = TRUE;
+ } else if (trellis2fst->len < 2) {
+ g_ptr_array_set_size(trellis2fst, input->len+1);
+ }
+
+ //-- initial config: trellis structure
+ qid_trellis = trellis->root_id = gfsm_automaton_add_state(trellis);
+ q_trellis = gfsm_automaton_find_state(trellis, qid_trellis);
+ gfsm_automaton_set_final_state_full(trellis, qid_trellis, TRUE, fst->sr->one);
+ gfsm_automaton_add_arc(trellis, qid_trellis, qid_trellis, gfsmNoLabel, gfsmNoLabel, fst->sr->one);
+
+ //-- initial config: stateid-mappings
+ g_ptr_array_index(trellis2fst, qid_trellis) = GUINT_TO_POINTER(fst->root_id);
+ g_tree_insert(fst2trellis, GUINT_TO_POINTER(fst->root_id), GUINT_TO_POINTER(qid_trellis));
+
+ //-- initial config: epsilon-expansion on column
+ g_ptr_array_index(cols,0) = col = g_slist_prepend(NULL, GUINT_TO_POINTER(qid_trellis));
+ _gfsm_viterbi_expand_column(fst, trellis, col, trellis2fst, fst2trellis);
+
+ //-- initial config: cleanup
+ gfsm_viterbi_map_free(fst2trellis);
+
+
+ //-- ye olde loope: for each input character (i)
+ for (i=0; i < input->len; i++) {
+ gfsmLabelVal a = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(input, i));
+
+ fst2trellis = gfsm_viterbi_map_new();
+ col = NULL;
+
+ //-- get possible successors
+ for (prevcoli=(GSList*)g_ptr_array_index(cols,i); prevcoli != NULL; prevcoli=prevcoli->next) {
+
+ //-- get the top element of the queue
+ qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(prevcoli->data);
+ qid_fst = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst, qid_trellis));
+
+ //-- get state pointers
+ q_trellis = gfsm_automaton_find_state(trellis, qid_trellis);
+ q_fst = gfsm_automaton_find_state(fst, qid_fst);
+
+ //-- get Viterbi properties
+ w_trellis = gfsm_viterbi_node_best_weight(q_trellis);
+
+
+ //-- search for input-matching arcs & add them to the successor map for next column
+ for (gfsm_arciter_open_ptr(&ai, fst, q_fst), gfsm_arciter_seek_lower(&ai,a);
+ gfsm_arciter_ok(&ai);
+ gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,a))
+ {
+ gfsmArc *arc_fst = gfsm_arciter_arc(&ai);
+ gfsmWeight w_trellis_nxt;
+ gpointer orig_key;
+
+ //-- found a matching arc: is its target state already marked as a successor?
+ if (g_tree_lookup_extended(fst2trellis,
+ GUINT_TO_POINTER(arc_fst->target),
+ &orig_key,
+ &ptr_qid_trellis_nxt))
+ {
+ //-- yep: known successor: get old ("*_nxt") & new ("*_nxt_new") weights
+ gfsmWeight w_trellis_nxt_new = gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight);
+ qid_trellis_nxt = GPOINTER_TO_UINT(ptr_qid_trellis_nxt);
+ q_trellis_nxt = gfsm_automaton_find_state(trellis, qid_trellis_nxt);
+ w_trellis_nxt = gfsm_viterbi_node_best_weight(q_trellis_nxt);
+
+ //-- is the new path better than the stored path?
+ if (gfsm_sr_less(fst->sr, w_trellis_nxt_new, w_trellis_nxt)) {
+ //-- yep: update mappings: trellis automaton
+ gfsmArc *arc_trellis_nxt = gfsm_viterbi_node_arc(q_trellis_nxt);
+ arc_trellis_nxt->target = qid_trellis;
+ arc_trellis_nxt->lower = a;
+ arc_trellis_nxt->upper = arc_fst->upper;
+ arc_trellis_nxt->weight = w_trellis_nxt_new;
+
+ //-- update mappings: trellis->fst stateid-map
+ g_ptr_array_index(trellis2fst, qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);
+
+ //-- update mappings: fst->trellis stateid-map
+ g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));
+ }
+ }
+ else
+ {
+ //-- target state not already marked as a successor: mark it
+ qid_trellis_nxt = gfsm_automaton_add_state(trellis);
+ q_trellis_nxt = gfsm_automaton_find_state(trellis,qid_trellis_nxt);
+ gfsm_automaton_add_arc(trellis,
+ qid_trellis_nxt, qid_trellis,
+ a, arc_fst->upper,
+ gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight));
+
+ //-- save trellis->fst stateid-map
+ if (qid_trellis_nxt >= trellis2fst->len) {
+ g_ptr_array_set_size(trellis2fst, qid_trellis_nxt + gfsmLookupStateMapGet);
+ }
+ g_ptr_array_index(trellis2fst,qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);
+
+ //-- save fst->trellis stateid-map
+ g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));
+
+ //-- add new trellis state to the column
+ col = g_slist_prepend(col, GUINT_TO_POINTER(qid_trellis_nxt));
+ }
+
+ } //-- END: seek input-matching arcs
+ } //-- END: previous column iteration (prevcoli)
+
+ //-- expand epsilons in current column
+ _gfsm_viterbi_expand_column(fst, trellis, col, trellis2fst, fst2trellis);
+
+ //-- update column table
+ g_ptr_array_index(cols,i+1) = col;
+
+ //-- per-input-index cleanup
+ gfsm_viterbi_map_free(fst2trellis);
+ }
+
+ //-- final iteration (EOS): get possible "final" states
+ qid_trellis_nxt = gfsm_automaton_add_state(trellis); //-- qid_trellis_nxt: new root
+ for (prevcoli=(GSList*)g_ptr_array_index(cols,input->len); prevcoli != NULL; prevcoli=prevcoli->next) {
+
+ //-- get the top element of the queue
+ qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(prevcoli->data);
+ qid_fst = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst, qid_trellis));
+
+ //-- get state pointers
+ q_trellis = gfsm_automaton_find_state(trellis, qid_trellis);
+ q_fst = gfsm_automaton_find_state(fst, qid_fst);
+
+ //-- get Viterbi properties
+ w_trellis = gfsm_viterbi_node_best_weight(q_trellis);
+
+ //-- check for finality
+ if (q_fst->is_final) {
+ gfsm_automaton_add_arc(trellis, qid_trellis_nxt, qid_trellis,
+ gfsmEpsilon, gfsmEpsilon,
+ gfsm_sr_times(fst->sr,
+ w_trellis,
+ gfsm_automaton_get_final_weight(fst,qid_fst)));
+ }
+ }
+
+ //-- mark single best path from new root
+ qid_trellis = qid_trellis_nxt;
+ q_trellis = gfsm_automaton_find_state(trellis,qid_trellis);
+ q_trellis->arcs = gfsm_arclist_sort(q_trellis->arcs,
+ &((gfsmArcCompData){gfsmASMWeight,fst->sr,NULL,NULL}));
+
+ //-- break dummy arc on trellis final state (old root)
+ q_trellis = gfsm_automaton_find_state(trellis,trellis->root_id);
+ gfsm_arclist_free(q_trellis->arcs);
+ q_trellis->arcs = NULL;
+
+ //-- mark new root
+ trellis->root_id = qid_trellis;
+
+
+ //-- cleanup: columns
+ for (i=0; i < cols->len; i++) {
+ g_slist_free((GSList*)g_ptr_array_index(cols,i));
+ }
+
+ //-- cleanup: column array
+ g_ptr_array_free(cols,TRUE);
+ if (trellis2fst_is_tmp) g_ptr_array_free(trellis2fst,TRUE);
+ else {
+ //-- just set length
+ trellis2fst->len = trellis->states->len;
+ }
+
+ return trellis;
+}
+
+
+/*======================================================================
+ * Methods: Viterbi: expand_column
+ */
+
+//--------------------------------------------------------------
+void _gfsm_viterbi_expand_column(gfsmAutomaton *fst,
+ gfsmAutomaton *trellis,
+ gfsmViterbiColumn *col,
+ gfsmStateIdVector *trellis2fst,
+ gfsmViterbiMap *fst2trellis)
+{
+ gfsmArcIter ai;
+ gfsmViterbiColumn *coli;
+ gfsmStateId qid_trellis, qid_fst;
+ gfsmState *q_trellis;
+ gfsmArc *arc_trellis;
+ gfsmWeight w_trellis;
+
+ //-- pass-1: add everything already in the column as a literal
+ /*
+ for (coli=col; coli != NULL; coli=coli->next) {
+ node = (gfsmViterbiNode*)(coli->data);
+ if (!g_tree_lookup(fst2trellis,node->key)) {
+ g_tree_insert(cmap,node->key,node->val);
+ }
+ }
+ */
+
+ //-- pass-2: add epsilon arcs from every literal in the column
+ for (coli=col; coli != NULL; coli=coli->next) {
+ //-- get node
+ qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(coli->data);
+ q_trellis = gfsm_automaton_find_state(trellis,qid_trellis);
+ arc_trellis = gfsm_viterbi_node_arc(q_trellis);
+ w_trellis = gfsm_viterbi_node_best_weight(q_trellis);
+ qid_fst = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst,qid_trellis));
+
+ //-- search for input-epsilon arcs & add them to this column
+ for (gfsm_arciter_open(&ai,fst,qid_fst), gfsm_arciter_seek_lower(&ai,gfsmEpsilon);
+ gfsm_arciter_ok(&ai);
+ gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,gfsmEpsilon))
+ {
+ gfsmArc *arc_fst = gfsm_arciter_arc(&ai);
+ gfsmStateId qid_trellis_nxt = gfsmNoState;
+ gpointer ptr_qid_trellis_nxt;
+ gfsmState *q_trellis_nxt;
+ gfsmWeight w_trellis_nxt;
+ gpointer orig_key;
+
+ //-- found an eps-arc: is its target state already marked as a successor?
+ if (g_tree_lookup_extended(fst2trellis,
+ GUINT_TO_POINTER(arc_fst->target),
+ &orig_key,
+ &ptr_qid_trellis_nxt))
+ {
+ //-- yep: get the old ("*_eps") & new ("*_nxt") weights
+ gfsmWeight w_trellis_eps = gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight);
+ qid_trellis_nxt = GPOINTER_TO_UINT(ptr_qid_trellis_nxt);
+ q_trellis_nxt = gfsm_automaton_find_state(trellis,qid_trellis_nxt);
+ w_trellis_nxt = gfsm_viterbi_node_best_weight(q_trellis_nxt);
+
+ //-- is the new eps-path better than the stored path?
+ if (gfsm_sr_less(fst->sr,w_trellis_eps,w_trellis_nxt)) {
+ //-- yep: update mappings: trellis automaton
+ gfsmArc *arc_trellis_nxt = gfsm_viterbi_node_arc(q_trellis_nxt);
+ arc_trellis_nxt->target = qid_trellis;
+ arc_trellis_nxt->lower = gfsmEpsilon;
+ arc_trellis_nxt->upper = arc_fst->upper;
+ arc_trellis_nxt->weight = w_trellis_eps;
+
+ //-- update mappings: trellis->fst stateid-map
+ g_ptr_array_index(trellis2fst, qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);
+
+ //-- update mappings: fst->trellis stateid-map
+ g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));
+ }
+ else {
+ //-- eps-path is worse than the existing path: forget about it
+ ;
+ }
+ }
+ else
+ {
+ //-- eps-target state not already marked as a successor: mark it
+ qid_trellis_nxt = gfsm_automaton_add_state(trellis);
+ q_trellis_nxt = gfsm_automaton_find_state(trellis,qid_trellis_nxt);
+ gfsm_automaton_add_arc(trellis,
+ qid_trellis_nxt, qid_trellis,
+ gfsmEpsilon, arc_fst->upper,
+ gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight));
+
+ //-- save trellis->fst stateid-map
+ if (qid_trellis_nxt >= trellis2fst->len) {
+ g_ptr_array_set_size(trellis2fst, qid_trellis_nxt + gfsmLookupStateMapGet);
+ }
+ g_ptr_array_index(trellis2fst,qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target);
+
+ //-- save fst->trellis stateid-map
+ g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt));
+
+ //-- queue-up new trellis state for eps-seek
+ coli->next = g_slist_prepend(coli->next, GUINT_TO_POINTER(qid_trellis_nxt));
+ }
+
+ } //-- END: seek epsilon arcs
+
+ } //-- END column iteration
+
+}
+
+
+/*======================================================================
+ * Methods: Viterbi: Map
+ */
+
+//--------------------------------------------------------------
+#if 0
+gfsmViterbiNodeValue *gfsm_viterbi_column_map_insert_if_less(gfsmViterbiMap *vmap,
+ gfsmViterbiNodeKey key,
+ gfsmWeight w,
+ gfsmSemiring *sr)
+{
+ gpointer s_val;
+ if (s_val = gfsm_viterbi_map_lookup(vmap,key)) {
+ //-- already present
+ if (!gfsm_sr_less(sr,w,s_val->w)) return NULL; //-- (s_val->w) <= (w)
+ s_val->w = w;
+ } else {
+ //-- not already present: copy & insert
+ s_val = g_new(gfsmViterbiNodeValue,1);
+ s_val->qtrellis = gfsmNoState;
+ a_val->pqtrellis = gfsmNoState;
+ s_val->w = w;
+ g_tree_insert(col,key,s_val);
+ }
+ return s_val; //-- update occurred
+}
+#endif
+