diff options
Diffstat (limited to 'gfsm/gfsm/src/libgfsm/gfsmPaths.c')
-rw-r--r-- | gfsm/gfsm/src/libgfsm/gfsmPaths.c | 431 |
1 files changed, 431 insertions, 0 deletions
diff --git a/gfsm/gfsm/src/libgfsm/gfsmPaths.c b/gfsm/gfsm/src/libgfsm/gfsmPaths.c new file mode 100644 index 0000000..7b5faa5 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmPaths.c @@ -0,0 +1,431 @@ + +/*=============================================================================*\ + * File: gfsmPaths.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmPaths.h> +#include <gfsmArc.h> +#include <gfsmArcIter.h> + + +/*====================================================================== + * Methods: Path Utilities: gfsmLabelVector + */ + +//-------------------------------------------------------------- +gfsmLabelVector *gfsm_label_vector_copy(gfsmLabelVector *dst, gfsmLabelVector *src) +{ + int i; + g_ptr_array_set_size(dst, src->len); + for (i=0; i < src->len; i++) { + g_ptr_array_index(dst,i) = g_ptr_array_index(src,i); + } + return dst; +} + +//-------------------------------------------------------------- +gfsmLabelVector *gfsm_label_vector_reverse(gfsmLabelVector *v) +{ + guint i, mid; + gpointer tmp; + mid = v->len/2; + for (i=0; i < mid; i++) { + tmp = g_ptr_array_index(v,i); + g_ptr_array_index(v,i) = g_ptr_array_index(v,v->len-i-1); + g_ptr_array_index(v,v->len-i-1) = tmp; + } + return v; +} + +/*====================================================================== + * Methods: Path Utilities: gfsmPath + */ + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_full(gfsmLabelVector *lo, gfsmLabelVector *hi, gfsmWeight w) +{ + gfsmPath *p = g_new(gfsmPath,1); + p->lo = lo ? lo : g_ptr_array_new(); + p->hi = hi ? hi : g_ptr_array_new(); + p->w = w; + return p; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_copy(gfsmPath *p1) +{ + gfsmPath *p = g_new(gfsmPath,1); + + p->lo = g_ptr_array_sized_new(p1->lo->len); + p->hi = g_ptr_array_sized_new(p1->hi->len); + + gfsm_label_vector_copy(p->lo, p1->lo); + gfsm_label_vector_copy(p->hi, p1->hi); + + p->w = p1->w; + + return p; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_append(gfsmPath *p1, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gfsmSemiring *sr) +{ + gfsmPath *p = g_new(gfsmPath,1); + + if (lo != gfsmEpsilon) { + p->lo = g_ptr_array_sized_new(p1->lo->len+1); + gfsm_label_vector_copy(p->lo, p1->lo); + g_ptr_array_add(p->lo, GUINT_TO_POINTER(lo)); + } else { + p->lo = g_ptr_array_sized_new(p1->lo->len); + gfsm_label_vector_copy(p->lo, p1->lo); + } + + if (hi != gfsmEpsilon) { + p->hi = g_ptr_array_sized_new(p1->hi->len+1); + gfsm_label_vector_copy(p->hi, p1->hi); + g_ptr_array_add(p->hi, GUINT_TO_POINTER(hi)); + } else { + p->hi = g_ptr_array_sized_new(p1->hi->len); + gfsm_label_vector_copy(p->hi, p1->hi); + } + + p->w = gfsm_sr_times(sr, p1->w, w); + + return p; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_times_w(gfsmPath *p1, gfsmWeight w, gfsmSemiring *sr) +{ + gfsmPath *p = g_new(gfsmPath,1); + + p->lo = g_ptr_array_sized_new(p1->lo->len); + gfsm_label_vector_copy(p->lo, p1->lo); + + p->hi = g_ptr_array_sized_new(p1->hi->len); + gfsm_label_vector_copy(p->hi, p1->hi); + + p->w = gfsm_sr_times(sr, p1->w, w); + + return p; +} + +//-------------------------------------------------------------- +void gfsm_path_push(gfsmPath *p, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gfsmSemiring *sr) +{ + if (lo != gfsmEpsilon) g_ptr_array_add(p->lo, GUINT_TO_POINTER(lo)); + if (hi != gfsmEpsilon) g_ptr_array_add(p->hi, GUINT_TO_POINTER(hi)); + p->w = gfsm_sr_times(sr, p->w, w); +} + + +//-------------------------------------------------------------- +void gfsm_path_pop(gfsmPath *p, gfsmLabelVal lo, gfsmLabelVal hi) +{ + if (lo != gfsmEpsilon) g_ptr_array_remove_index_fast(p->lo, p->lo->len-1); + if (hi != gfsmEpsilon) g_ptr_array_remove_index_fast(p->hi, p->hi->len-1); +} + +//-------------------------------------------------------------- +int gfsm_label_vector_compare(const gfsmLabelVector *v1, const gfsmLabelVector *v2) +{ + int i; + gfsmLabelVal lab1, lab2; + if (v1==v2) return 0; + + for (i=0; i < v1->len && i < v2->len; i++) { + lab1 = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(v1,i)); + lab2 = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(v2,i)); + if (lab1 < lab2) return -1; + if (lab1 > lab2) return 1; + } + if (v1->len < v2->len) return -1; + if (v1->len > v2->len) return 1; + return 0; +} + +//-------------------------------------------------------------- +int gfsm_path_compare_data(const gfsmPath *p1, const gfsmPath *p2, gfsmSemiring *sr) +{ + int cmp; + if (p1==p2) return 0; + if ((cmp=gfsm_sr_compare(sr, p1->w, p2->w))!=0) return cmp; + if ((cmp=gfsm_label_vector_compare(p1->lo,p2->lo))!=0) return cmp; + if ((cmp=gfsm_label_vector_compare(p1->hi,p2->hi))!=0) return cmp; + return 0; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_reverse(gfsmPath *p) +{ + if (p->lo) gfsm_label_vector_reverse(p->lo); + if (p->hi) gfsm_label_vector_reverse(p->hi); + return p; +} + +//-------------------------------------------------------------- +void gfsm_path_free(gfsmPath *p) +{ + if (!p) return; + if (p->lo) g_ptr_array_free(p->lo,TRUE); + if (p->hi) g_ptr_array_free(p->hi,TRUE); + g_free(p); +} + +/*====================================================================== + * Methods: Automaton Serialization: paths() + */ + +//-------------------------------------------------------------- +gfsmSet *gfsm_automaton_paths(gfsmAutomaton *fsm, gfsmSet *paths) +{ + return gfsm_automaton_paths_full(fsm, paths, (fsm->flags.is_transducer ? gfsmLSBoth : gfsmLSLower)); +} + +//-------------------------------------------------------------- +gfsmSet *gfsm_automaton_paths_full(gfsmAutomaton *fsm, gfsmSet *paths, gfsmLabelSide which) +{ + gfsmPath *tmp = gfsm_path_new(fsm->sr); + if (paths==NULL) { + paths = gfsm_set_new_full((GCompareDataFunc)gfsm_path_compare_data, + (gpointer)fsm->sr, + (GDestroyNotify)gfsm_path_free); + } + _gfsm_automaton_paths_r(fsm, paths, which, fsm->root_id, tmp); + gfsm_path_free(tmp); + return paths; +} + +//-------------------------------------------------------------- +gfsmSet *_gfsm_automaton_paths_r(gfsmAutomaton *fsm, + gfsmSet *paths, + gfsmLabelSide which, + gfsmStateId q, + gfsmPath *path) +{ + gfsmArcIter ai; + gfsmWeight fw; + + //-- if final state, add to set of full paths + if (gfsm_automaton_lookup_final(fsm,q,&fw)) { + gfsmWeight path_w = path->w; + path->w = gfsm_sr_times(fsm->sr, fw, path_w); + + if (!gfsm_set_contains(paths,path)) { + gfsm_set_insert(paths, gfsm_path_new_copy(path)); + } + path->w = path_w; + } + + //-- investigate all outgoing arcs + for (gfsm_arciter_open(&ai, fsm, q); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + gfsmWeight w = path->w; + gfsmLabelVal lo,hi; + + if (which==gfsmLSLower) { + lo = arc->lower; + hi = gfsmEpsilon; + } else if (which==gfsmLSUpper) { + lo = gfsmEpsilon; + hi = arc->upper; + } else { + lo = arc->lower; + hi = arc->upper; + } + + gfsm_path_push(path, lo, hi, arc->weight, fsm->sr); + _gfsm_automaton_paths_r(fsm, paths, which, arc->target, path); + + gfsm_path_pop(path, lo, hi); + path->w = w; + } + gfsm_arciter_close(&ai); + + return paths; +} + +/*====================================================================== + * Methods: Automaton Serialization: paths_to_strings() + */ + +//-------------------------------------------------------------- +GSList *gfsm_paths_to_strings(gfsmSet *paths, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style, + GSList *strings) +{ + gfsmPathsToStringsOptions opts = + { + abet_lo, + abet_hi, + sr, + warn_on_undefined, + att_style, + NULL + }; + + gfsm_set_foreach(paths, (GTraverseFunc)_gfsm_paths_to_strings_foreach_func, &opts); + + return g_slist_reverse(opts.strings); +} + +//-------------------------------------------------------------- +gboolean _gfsm_paths_to_strings_foreach_func(gfsmPath *path, + gpointer value_dummy, + gfsmPathsToStringsOptions *opts) +{ + GString *gs = gfsm_path_to_gstring(path, NULL, + opts->abet_lo, opts->abet_hi, opts->sr, + opts->warn_on_undefined, opts->att_style); + opts->strings = g_slist_prepend(opts->strings, gs->str); + g_string_free(gs,FALSE); + + return FALSE; +} + +//-------------------------------------------------------------- +GString *gfsm_path_to_gstring(gfsmPath *path, + GString *gs, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style) +{ + if (!gs) gs = g_string_new(""); + if (abet_lo && path->lo->len > 0) { + gfsm_alphabet_labels_to_gstring(abet_lo, path->lo, gs, warn_on_undefined, att_style); + } + if (abet_hi && path->hi->len > 0) { + g_string_append(gs," : "); + gfsm_alphabet_labels_to_gstring(abet_hi, path->hi, gs, warn_on_undefined, att_style); + } + if (gfsm_sr_compare(sr, path->w, sr->one) != 0) { + g_string_append_printf(gs," <%g>",path->w); + } + return gs; +} + +//-------------------------------------------------------------- +char *gfsm_path_to_string(gfsmPath *path, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style) +{ + GString *gs = gfsm_path_to_gstring(path,NULL,abet_lo,abet_hi,sr,warn_on_undefined,att_style); + char *s = gs->str; + g_string_free(gs,FALSE); + return s; +} + + +/*====================================================================== + * Methods: Viterbi trellis: paths + */ + +//-------------------------------------------------------------- +gfsmSet *gfsm_viterbi_trellis_paths_full(gfsmAutomaton *trellis, gfsmSet *paths, gfsmLabelSide which) +{ + gfsmArcIter ai; + + //-- sanity check: create path-set if given as NULL + if (!paths) { + paths = gfsm_set_new_full((GCompareDataFunc)gfsm_path_compare_data, + (gpointer)trellis->sr, + (GDestroyNotify)gfsm_path_free); + } + + //-- get & follow pseudo-root of all paths + for (gfsm_arciter_open(&ai, trellis, trellis->root_id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + gfsmPath *path = gfsm_path_new(trellis->sr); + + _gfsm_viterbi_trellis_bestpath_r(trellis, path, which, arc->target); + path->w = arc->weight; + + //-- reverse the path we've created + gfsm_path_reverse(path); + + //-- ... and maybe insert it + if (gfsm_set_contains(paths,path)) { + //-- oops: we've already got this one: free it + gfsm_path_free(path); + } else { + //-- it's a bona-fide new path: insert it + gfsm_set_insert(paths,path); + } + } + + return paths; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_viterbi_trellis_bestpath_full(gfsmAutomaton *trellis, gfsmPath *path, gfsmLabelSide which) +{ + gfsmArcIter ai; + + //-- sanity check: create path if NULL + if (!path) { path = gfsm_path_new(trellis->sr); } + + //-- get & follow pseudo-root of best path + gfsm_arciter_open(&ai, trellis, trellis->root_id); + if (gfsm_arciter_ok(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + _gfsm_viterbi_trellis_bestpath_r(trellis, path, which, arc->target); + path->w = arc->weight; + } else { + path->w = trellis->sr->zero; + } + + //-- reverse the path we've created + gfsm_path_reverse(path); + + return path; +} + +//-------------------------------------------------------------- +void _gfsm_viterbi_trellis_bestpath_r(gfsmAutomaton *trellis, + gfsmPath *path, + gfsmLabelSide which, + gfsmStateId qid) +{ + while (TRUE) { + gfsmArcIter ai; + gfsm_arciter_open(&ai, trellis, qid); + + if (gfsm_arciter_ok(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + gfsm_path_push(path, + (which!=gfsmLSUpper ? arc->lower : gfsmEpsilon), + (which!=gfsmLSLower ? arc->upper : gfsmEpsilon), + trellis->sr->one, trellis->sr); + qid = arc->target; + } + else break; + } +} |