aboutsummaryrefslogtreecommitdiff
path: root/gfsm/gfsm/src/libgfsm/gfsmTrie.c
diff options
context:
space:
mode:
Diffstat (limited to 'gfsm/gfsm/src/libgfsm/gfsmTrie.c')
-rw-r--r--gfsm/gfsm/src/libgfsm/gfsmTrie.c282
1 files changed, 282 insertions, 0 deletions
diff --git a/gfsm/gfsm/src/libgfsm/gfsmTrie.c b/gfsm/gfsm/src/libgfsm/gfsmTrie.c
new file mode 100644
index 0000000..952e55d
--- /dev/null
+++ b/gfsm/gfsm/src/libgfsm/gfsmTrie.c
@@ -0,0 +1,282 @@
+
+/*=============================================================================*\
+ * File: gfsmTrie.c
+ * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
+ * Description: finite state machine library
+ *
+ * Copyright (c) 2005-2006 Bryan Jurish.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *=============================================================================*/
+
+#include <gfsmTrie.h>
+#include <gfsmArcIter.h>
+
+/*======================================================================
+ * Constants
+ */
+
+const gfsmAutomatonFlags gfsmTrieDefaultFlags =
+ {
+ TRUE, //-- is_transducer:1
+ TRUE, //-- is_weighted:1
+ TRUE, //-- is_deterministic:1
+ gfsmASMLower, //-- sort_mode:24
+ 0 //-- unused:5
+ };
+
+const gfsmSRType gfsmTrieDefaultSRType = gfsmSRTReal;
+
+
+/*======================================================================
+ * Methods: ensure path (adding weight)
+ */
+
+//--------------------------------------------------------------
+gfsmStateId gfsm_trie_add_path(gfsmTrie *trie,
+ gfsmLabelVector *lo,
+ gfsmLabelVector *hi,
+ gfsmWeight w)
+{
+ return gfsm_trie_add_path_full(trie,lo,hi,w,TRUE,TRUE,TRUE,NULL);
+}
+
+
+//--------------------------------------------------------------
+gfsmStateId gfsm_trie_add_path_full(gfsmTrie *trie,
+ gfsmLabelVector *lo,
+ gfsmLabelVector *hi,
+ gfsmWeight w,
+ gboolean add_to_arcs,
+ gboolean add_to_state_final,
+ gboolean add_to_path_final,
+ gfsmStateIdVector *path_states
+ )
+{
+ gfsmStateId qid;
+ guint i;
+
+ //-- ensure trie has a root state
+ if (!gfsm_automaton_has_state(trie,trie->root_id)) {
+ trie->root_id = gfsm_automaton_add_state(trie);
+ }
+ qid = trie->root_id;
+
+ //-- initialize state-path, if specified
+ if (path_states) {
+ g_ptr_array_set_size(path_states, (lo ? lo->len : 0) + (hi ? hi->len : 0));
+ path_states->len = 0;
+ g_ptr_array_add(path_states, GUINT_TO_POINTER(qid));
+ }
+
+ //-- add lower path
+ for (i=0; lo && i < lo->len; i++) {
+ if (add_to_state_final) {
+ gfsm_automaton_set_final_state_full(trie, qid, TRUE,
+ gfsm_sr_plus(trie->sr, w, gfsm_automaton_get_final_weight(trie, qid)));
+ }
+ qid = gfsm_trie_get_arc_lower(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(lo,i))), w, add_to_arcs);
+ if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid));
+ }
+
+ //-- add upper path
+ for (i=0; hi && i < hi->len; i++) {
+ if (add_to_state_final) {
+ gfsm_automaton_set_final_state_full(trie, qid, TRUE,
+ gfsm_sr_plus(trie->sr, w, gfsm_automaton_get_final_weight(trie, qid)));
+ }
+ qid = gfsm_trie_get_arc_upper(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(hi,i))), w, add_to_arcs);
+ if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid));
+ }
+
+ //-- add final epsilon-arc
+ //qid = gfsm_trie_get_arc_both(trie, qid, gfsmEpsilon, gfsmEpsilon, w, add_to_arcs);
+ //if (path_states) g_ptr_array_add(path_states,qid);
+
+ if (add_to_state_final || add_to_path_final) {
+ gfsm_automaton_set_final_state_full(trie, qid, TRUE,
+ gfsm_sr_plus(trie->sr, w, gfsm_automaton_get_final_weight(trie, qid)));
+ } else {
+ gfsm_automaton_set_final_state(trie,qid,TRUE);
+ }
+
+ return qid;
+}
+
+/*======================================================================
+ * Methods: find prefix
+ */
+gfsmStateId gfsm_trie_find_prefix(gfsmTrie *trie,
+ gfsmLabelVector *lo,
+ gfsmLabelVector *hi,
+ guint *lo_i,
+ guint *hi_i,
+ gfsmWeight *w_last,
+ gfsmStateIdVector *path_states
+ )
+{
+ gfsmStateId qid = trie->root_id;
+ gfsmWeight fw, w = gfsm_sr_zero(trie->sr);
+ guint i, j=0;
+ gfsmArc *a;
+
+ //-- initialize state-path, if specified
+ if (path_states) {
+ g_ptr_array_set_size(path_states, (lo ? lo->len : 0) + (hi ? hi->len : 0));
+ path_states->len = 0;
+ g_ptr_array_add(path_states, GUINT_TO_POINTER(qid));
+ }
+
+ //-- find lower path
+ for (i=0; lo && i < lo->len; i++) {
+ if ( !(a=gfsm_trie_find_arc_lower(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(lo,i))))) )
+ break;
+
+ qid = a->target;
+ w = a->weight;
+ if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid));
+ }
+
+ //-- find upper path
+ if (i==lo->len) {
+ for (j=0; hi && j < hi->len; j++) {
+ if ( !(a = gfsm_trie_find_arc_upper(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(hi,j))))) )
+ break;
+
+ qid = a->target;
+ w = a->weight;
+ if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid));
+ }
+
+ //-- final state?
+ if (j==hi->len && gfsm_automaton_lookup_final(trie, qid, &fw))
+ w = fw;
+ }
+
+ //-- output variables
+ if (lo_i) *lo_i = i;
+ if (hi_i) *hi_i = j;
+ if (w_last) *w_last = w;
+
+ return qid;
+}
+
+/*======================================================================
+ * Methods: find arcs
+ */
+
+//--------------------------------------------------------------
+gfsmArc* gfsm_trie_find_arc_lower(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab)
+{
+ gfsmArcIter ai;
+ gfsmArc *a=NULL;
+ for (gfsm_arciter_open(&ai, trie, qid), gfsm_arciter_seek_lower(&ai, lab);
+ gfsm_arciter_ok(&ai);
+ gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,lab))
+ {
+ a = gfsm_arciter_arc(&ai);
+ break;
+ }
+ gfsm_arciter_close(&ai);
+ return a;
+}
+
+//--------------------------------------------------------------
+gfsmArc* gfsm_trie_find_arc_upper(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab)
+{
+ gfsmArcIter ai;
+ gfsmArc *a=NULL;
+ for (gfsm_arciter_open(&ai, trie, qid), gfsm_arciter_seek_upper(&ai, lab);
+ gfsm_arciter_ok(&ai);
+ gfsm_arciter_next(&ai), gfsm_arciter_seek_upper(&ai,lab))
+ {
+ a = gfsm_arciter_arc(&ai);
+ break;
+ }
+ gfsm_arciter_close(&ai);
+ return a;
+}
+
+//--------------------------------------------------------------
+gfsmArc* gfsm_trie_find_arc_both(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lo, gfsmLabelVal hi)
+{
+ gfsmArcIter ai;
+ gfsmArc *a=NULL;
+ for (gfsm_arciter_open(&ai,trie,qid), gfsm_arciter_seek_both(&ai,lo,hi);
+ gfsm_arciter_ok(&ai);
+ gfsm_arciter_next(&ai), gfsm_arciter_seek_both(&ai,lo,hi))
+ {
+ a = gfsm_arciter_arc(&ai);
+ break;
+ }
+ gfsm_arciter_close(&ai);
+ return a;
+}
+
+
+/*======================================================================
+ * Methods: find or insert arcs
+ */
+
+//--------------------------------------------------------------
+gfsmStateId gfsm_trie_get_arc_lower(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab, gfsmWeight w, gboolean add_weight)
+{
+ gfsmArc *a=gfsm_trie_find_arc_lower(trie,qid,lab);
+
+ if (a==NULL) {
+ gfsmStateId qid2 = gfsm_automaton_add_state(trie);
+ gfsm_automaton_add_arc(trie,qid,qid2,lab,gfsmEpsilon, add_weight ? w : trie->sr->zero);
+ return qid2;
+ }
+
+ //-- found an existing arc
+ if (add_weight) a->weight = gfsm_sr_plus(trie->sr, a->weight, w);
+ return a->target;
+}
+
+//--------------------------------------------------------------
+gfsmStateId gfsm_trie_get_arc_upper(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab, gfsmWeight w, gboolean add_weight)
+{
+ gfsmArc *a=gfsm_trie_find_arc_upper(trie,qid,lab);
+
+ if (a==NULL) {
+ gfsmStateId qid2 = gfsm_automaton_add_state(trie);
+ gfsm_automaton_add_arc(trie,qid,qid2,gfsmEpsilon,lab, add_weight ? w : trie->sr->zero);
+ //trie->flags.is_deterministic = TRUE; //-- HACK
+ return qid2;
+ }
+
+ //-- found an existing arc
+ if (add_weight) a->weight = gfsm_sr_plus(trie->sr, a->weight, w);
+ return a->target;
+}
+
+//--------------------------------------------------------------
+gfsmStateId gfsm_trie_get_arc_both(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gboolean add_weight)
+{
+ gfsmArc *a=gfsm_trie_find_arc_both(trie,qid,lo,hi);
+
+ if (a==NULL) {
+ gfsmStateId qid2 = gfsm_automaton_add_state(trie);
+ gfsm_automaton_add_arc(trie,qid,qid2,lo,hi, add_weight ? w : trie->sr->zero);
+ //trie->flags.is_deterministic = TRUE; //-- HACK
+ return qid2;
+ }
+
+ //-- found an existing arc
+ if (add_weight) a->weight = gfsm_sr_plus(trie->sr, a->weight, w);
+ return a->target;
+}
+