aboutsummaryrefslogtreecommitdiff
path: root/gfsm/gfsm/src/libgfsm/gfsmAutomaton.c
diff options
context:
space:
mode:
Diffstat (limited to 'gfsm/gfsm/src/libgfsm/gfsmAutomaton.c')
-rw-r--r--gfsm/gfsm/src/libgfsm/gfsmAutomaton.c338
1 files changed, 338 insertions, 0 deletions
diff --git a/gfsm/gfsm/src/libgfsm/gfsmAutomaton.c b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.c
new file mode 100644
index 0000000..17f7a37
--- /dev/null
+++ b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.c
@@ -0,0 +1,338 @@
+/*=============================================================================*\
+ * File: gfsmAutomaton.c
+ * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
+ * Description: finite state machine library: automata
+ *
+ * Copyright (c) 2004-2007 Bryan Jurish.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *=============================================================================*/
+
+#include <gfsmAutomaton.h>
+#include <gfsmArcIter.h>
+#include <gfsmUtils.h>
+#include <gfsmBitVector.h>
+#include <stdlib.h>
+
+#ifndef GFSM_INLINE_ENABLED
+# include <gfsmAutomaton.hi>
+#endif
+
+/*======================================================================
+ * Constants
+ */
+const gfsmStateId gfsmAutomatonDefaultSize = 128;
+
+const gfsmAutomatonFlags gfsmAutomatonDefaultFlags =
+ {
+ TRUE, //-- is_transducer:1
+ TRUE, //-- is_weighted:1
+ //0, //-- sort_mode_old__:4
+ FALSE, //-- is_deterministic:1
+ 0, //-- unused:29 (was: 25)
+ gfsmASMNone //-- sort_mode
+ };
+
+//const gfsmSRType gfsmAutomatonDefaultSRType = gfsmSRTReal;
+const gfsmSRType gfsmAutomatonDefaultSRType = gfsmSRTTropical;
+
+//======================================================================
+// API: Constructors etc.
+
+/*--------------------------------------------------------------
+ * copy()
+ */
+gfsmAutomaton *gfsm_automaton_copy(gfsmAutomaton *dst, gfsmAutomaton *src)
+{
+ gfsmStateId qid;
+ gfsm_automaton_clear(dst);
+ gfsm_automaton_copy_shallow(dst,src);
+ dst->root_id = src->root_id; //-- since copy_shallow() no longer does this!
+ gfsm_automaton_reserve(dst,src->states->len);
+ gfsm_weightmap_copy(dst->finals, src->finals);
+ //
+ for (qid=0; qid < src->states->len; qid++) {
+ const gfsmState *src_s = gfsm_automaton_find_state_const(src,qid);
+ gfsmState *dst_s = gfsm_automaton_find_state(dst,qid);
+ gfsm_state_copy(dst_s, src_s);
+ }
+ return dst;
+}
+
+/*--------------------------------------------------------------
+ * clear()
+ */
+void gfsm_automaton_clear(gfsmAutomaton *fsm)
+{
+ gfsmStateId i;
+ if (!fsm) return;
+ for (i=0; fsm->states && i < fsm->states->len; i++) {
+ gfsmState *st = gfsm_automaton_find_state(fsm,i);
+ if (!st || !st->is_valid) continue;
+ gfsm_state_clear(st);
+ }
+ if (fsm->states) g_array_set_size(fsm->states,0);
+ if (fsm->finals) gfsm_set_clear(fsm->finals);
+ fsm->root_id = gfsmNoState;
+ return;
+}
+
+
+//======================================================================
+// API: Automaton Semiring
+
+//======================================================================
+// API: Automaton Properties
+
+/*--------------------------------------------------------------
+ * n_arcs_full()
+ */
+guint gfsm_automaton_n_arcs_full(gfsmAutomaton *fsm,
+ guint *n_lo_epsilon,
+ guint *n_hi_epsilon,
+ guint *n_both_epsilon)
+{
+ guint i, total=0;
+ guint n_lo_eps=0, n_hi_eps=0, n_both_eps=0;
+ gfsmStateId n_states = gfsm_automaton_n_states(fsm);
+
+ for (i=0; i < n_states; i++) {
+ gfsmArcIter ai;
+ for (gfsm_arciter_open(&ai, fsm, i); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) {
+ gfsmArc *a = gfsm_arciter_arc(&ai);
+ ++total;
+ if (a->lower==gfsmEpsilon) {
+ ++n_lo_eps;
+ if (a->upper==gfsmEpsilon) {
+ ++n_hi_eps;
+ ++n_both_eps;
+ }
+ }
+ else if (a->upper==gfsmEpsilon) {
+ ++n_hi_eps;
+ }
+ }
+ gfsm_arciter_close(&ai);
+ }
+ if (n_lo_epsilon) *n_lo_epsilon = n_lo_eps;
+ if (n_hi_epsilon) *n_hi_epsilon = n_hi_eps;
+ if (n_both_epsilon) *n_both_epsilon = n_both_eps;
+ return total;
+}
+
+
+/*--------------------------------------------------------------
+ * is_cyclic_state()
+ */
+gboolean gfsm_automaton_is_cyclic_state(gfsmAutomaton *fsm,
+ gfsmStateId id,
+ gfsmBitVector *visited,
+ gfsmBitVector *completed)
+{
+ gfsmState *s;
+ gfsmArcIter ai;
+ //
+ if (gfsm_bitvector_get(visited,id)) {
+ if (gfsm_bitvector_get(completed,id)) return FALSE;
+ return TRUE;
+ }
+ //
+ s = gfsm_automaton_find_state(fsm,id);
+ if (!s || !s->is_valid) return FALSE; //-- invalid states don't count as cyclic
+ //
+ //-- mark node as visited (& not completed)
+ gfsm_bitvector_set(visited,id,1);
+ gfsm_bitvector_set(completed,id,0);
+ //
+ //-- visit outgoing arcs
+ for (gfsm_arciter_open_ptr(&ai,fsm,s); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) {
+ if (gfsm_automaton_is_cyclic_state(fsm, gfsm_arciter_arc(&ai)->target, visited, completed)) {
+ gfsm_arciter_close(&ai);
+ return TRUE;
+ }
+ }
+ gfsm_arciter_close(&ai);
+ //
+ //-- mark node as completed
+ gfsm_bitvector_set(completed,id,1);
+ //
+ //-- finished traversal; this state isn't cyclic
+ return FALSE;
+}
+
+/*--------------------------------------------------------------
+ * is_cyclic()
+ */
+gboolean gfsm_automaton_is_cyclic(gfsmAutomaton *fsm)
+{
+ gfsmBitVector *visited; //-- records which states we've visited
+ gfsmBitVector *completed; //-- records which states we've completed
+ gboolean rc; //-- return value
+
+ if (fsm->root_id==gfsmNoState || fsm->states->len==0) return FALSE; //-- sanity check(s)
+
+ visited = gfsm_bitvector_sized_new(fsm->states->len);
+ completed = gfsm_bitvector_sized_new(fsm->states->len);
+ rc = gfsm_automaton_is_cyclic_state(fsm, fsm->root_id, visited, completed);
+
+ //-- cleanup
+ gfsm_bitvector_free(visited);
+ gfsm_bitvector_free(completed);
+
+ return rc;
+}
+
+
+/*--------------------------------------------------------------
+ * get_alphabet()
+ */
+gfsmAlphabet *gfsm_automaton_get_alphabet(gfsmAutomaton *fsm, gfsmLabelSide which, gfsmAlphabet *alph)
+{
+ gfsmStateId id;
+ //-- ensure alphabet
+ if (!alph) alph = gfsm_range_alphabet_new();
+
+ for (id=0; id < fsm->states->len; id++) {
+ gfsmArcIter ai;
+ for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) {
+ gfsmArc *a = gfsm_arciter_arc(&ai);
+
+ if (which != gfsmLSUpper)
+ gfsm_alphabet_insert(alph, GUINT_TO_POINTER((guint)(a->lower)), a->lower);
+
+ if (which != gfsmLSLower)
+ gfsm_alphabet_insert(alph, GUINT_TO_POINTER((guint)(a->upper)), a->upper);
+ }
+ gfsm_arciter_close(&ai);
+ }
+ return alph;
+}
+
+/*======================================================================
+ * Methods: Accessors: Automaton States
+ */
+
+/*--------------------------------------------------------------
+ * renumber_states()
+ */
+void gfsm_automaton_renumber_states(gfsmAutomaton *fsm)
+{
+ gfsmStateId oldid, newid;
+ GArray *old2new = NULL;
+
+ //-- always set root state to zero -- even add one
+ if (fsm->root_id == gfsmNoState) fsm->root_id = gfsm_automaton_add_state(fsm);
+
+ //-- get old-to-new id map
+ old2new = g_array_sized_new(FALSE,FALSE,sizeof(gfsmStateId),fsm->states->len);
+ g_array_index(old2new,gfsmStateId,fsm->root_id) = 0;
+ for (oldid=0, newid=0; oldid < fsm->states->len; oldid++) {
+ if (oldid==fsm->root_id) continue;
+ if (gfsm_automaton_has_state(fsm,oldid)) {
+ g_array_index(old2new,gfsmStateId,oldid) = ++newid;
+ } else {
+ g_array_index(old2new,gfsmStateId,oldid) = gfsmNoState;
+ }
+ }
+
+ //-- perform actual renumbering
+ gfsm_automaton_renumber_states_full(fsm, old2new, newid+1);
+
+ //-- cleanup
+ g_array_free(old2new,TRUE);
+}
+
+/*--------------------------------------------------------------
+ * renumber_states_full()
+ */
+void gfsm_automaton_renumber_states_full(gfsmAutomaton *fsm, GArray *old2new, gfsmStateId n_new_states)
+{
+ gfsmStateId oldid, newid;
+ gfsmState *s_old, *s_new;
+ gfsmWeightMap *new_finals = gfsm_weightmap_new(gfsm_uint_compare);
+ GArray *new_states = NULL;
+
+ //-- get new number of states
+ if (n_new_states==0) {
+ for (oldid=0; oldid < fsm->states->len; oldid++) {
+ if (!gfsm_automaton_has_state(fsm,oldid)) continue;
+ newid = g_array_index(old2new,gfsmStateId,oldid);
+ if (newid != gfsmNoState && newid >= n_new_states) { n_new_states=newid+1; }
+ }
+ }
+
+ //-- allocate new state-vector
+ new_states = g_array_sized_new(FALSE,TRUE,sizeof(gfsmState),n_new_states);
+
+ //-- renumber states
+ for (oldid=0; oldid < fsm->states->len; oldid++) {
+ gfsmArcIter ai;
+ newid = g_array_index(old2new,gfsmStateId,oldid);
+
+ if (newid==gfsmNoState || !gfsm_automaton_has_state(fsm,oldid)) continue; //-- ignore bad states
+
+ //-- copy state data
+ s_old = gfsm_automaton_find_state(fsm, oldid);
+ s_new = &(g_array_index(new_states,gfsmState,newid));
+ *s_new = *s_old;
+
+ //-- check for final state
+ if (s_new->is_final) {
+ gfsmWeight fw =0; //-- hack to convince gcc not to complain about unitialized fw
+ gfsm_weightmap_lookup(fsm->finals, GUINT_TO_POINTER(oldid), &fw);
+ gfsm_weightmap_insert(new_finals, GUINT_TO_POINTER(newid), fw);
+ }
+
+ //-- renumber sources & targets of outgoing arcs
+ for (gfsm_arciter_open_ptr(&ai, fsm, s_new); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai))
+ {
+ gfsmArc *a = gfsm_arciter_arc(&ai);
+ a->source = newid;
+ a->target = g_array_index(old2new,gfsmStateId,a->target);
+ }
+ gfsm_arciter_close(&ai);
+ }
+
+ //-- set new root-id
+ fsm->root_id = g_array_index(old2new,gfsmStateId,fsm->root_id);
+
+ //-- set new final weights
+ gfsm_weightmap_free(fsm->finals);
+ fsm->finals = new_finals;
+
+ //-- set new state vector
+ g_array_free(fsm->states,TRUE);
+ fsm->states = new_states;
+ fsm->states->len = n_new_states;
+}
+
+
+/*======================================================================
+ * Methods: Accessors: Automaton Arcs
+ */
+
+/*--------------------------------------------------------------
+ * arcsort_full()
+ */
+void gfsm_automaton_arcsort_full(gfsmAutomaton *fsm, GCompareDataFunc cmpfunc, gpointer data)
+{
+ gfsmStateId qid;
+ for (qid=0; qid < fsm->states->len; qid++) {
+ gfsmState *qp = gfsm_automaton_find_state(fsm,qid);
+ if (!qp || !qp->is_valid) continue;
+ qp->arcs = gfsm_arclist_sort_full(qp->arcs, cmpfunc, data);
+ }
+ fsm->flags.sort_mode = gfsmACUser;
+}