diff options
Diffstat (limited to 'gfsm/gfsm/src/libgfsm/gfsmAlgebra.c')
-rw-r--r-- | gfsm/gfsm/src/libgfsm/gfsmAlgebra.c | 1715 |
1 files changed, 1715 insertions, 0 deletions
diff --git a/gfsm/gfsm/src/libgfsm/gfsmAlgebra.c b/gfsm/gfsm/src/libgfsm/gfsmAlgebra.c new file mode 100644 index 0000000..4e9effe --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAlgebra.c @@ -0,0 +1,1715 @@ + +/*=============================================================================*\ + * File: gfsmAlgebra.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <glib.h> +#include <gfsmAlgebra.h> +#include <gfsmAssert.h> +#include <gfsmArcIter.h> +#include <gfsmStateSet.h> +#include <gfsmEnum.h> +#include <gfsmUtils.h> +#include <gfsmCompound.h> + +/*====================================================================== + * Methods: algebra + */ + +/*-------------------------------------------------------------- + * closure_final_func() + * + called for each final @id of @fsm during closure(@fsm) + */ +static +gboolean gfsm_automaton_closure_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm) +{ + gfsmWeight w = gfsm_ptr2weight(pw); + if (id != fsm->root_id) + gfsm_automaton_add_arc(fsm, id, fsm->root_id, gfsmEpsilon, gfsmEpsilon, w); + return FALSE; +} + + +/*-------------------------------------------------------------- + * closure() + */ +gfsmAutomaton *gfsm_automaton_closure(gfsmAutomaton *fsm, gboolean is_plus) +{ + //-- sanity check(s) + if (!fsm || fsm->root_id == gfsmNoState) return fsm; + + //-- add epsilon arcs from old final states to translated new root + gfsm_automaton_finals_foreach(fsm, (GTraverseFunc)gfsm_automaton_closure_final_func_, fsm); + + //-- reflexive+transitive or reflexive? + if (!is_plus) gfsm_automaton_optional(fsm); + + return fsm; +} + + +/*-------------------------------------------------------------- + * n_closure() + */ +gfsmAutomaton *gfsm_automaton_n_closure(gfsmAutomaton *fsm, guint n) +{ + //-- sanity check(s) + if (!fsm || fsm->root_id == gfsmNoState) return fsm; + + //-- check for simple closures + if (n == 0) return gfsm_automaton_closure(fsm, FALSE); + else if (n == 1) return gfsm_automaton_closure(fsm, TRUE); + else { + gfsm_automaton_n_concat(fsm, fsm, n-1); + } + + return gfsm_automaton_closure(fsm, TRUE); +} + + +/*-------------------------------------------------------------- + * complement() + */ +gfsmAutomaton *gfsm_automaton_complement(gfsmAutomaton *fsm) +{ + gfsmAlphabet *alph = gfsm_identity_alphabet_new(); + gfsm_automaton_get_alphabet(fsm, gfsmLSLower, alph); + gfsm_automaton_complement_full(fsm,alph); + gfsm_alphabet_free(alph); + return fsm; +} + +/*-------------------------------------------------------------- + * complement_full() + */ +gfsmAutomaton *gfsm_automaton_complement_full(gfsmAutomaton *fsm, gfsmAlphabet *alph) +{ + gfsmStateId id, sink_id; + gfsm_automaton_complete(fsm, alph, &sink_id); + + //-- flip final states (no weights here) + for (id = 0; id < fsm->states->len; id++) { + gfsmState *s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) continue; + gfsm_automaton_set_final_state(fsm, id, !s->is_final); + } + + return fsm; +} + +/*-------------------------------------------------------------- + * complete() + */ +gfsmAutomaton *gfsm_automaton_complete(gfsmAutomaton *fsm, gfsmAlphabet *alph, gfsmStateId *sinkp) +{ + gfsmStateId id, sinkid; + GPtrArray *alabels; + + if (!fsm->flags.is_deterministic) fsm = gfsm_automaton_determinize(fsm); + if (gfsm_acmask_nth(fsm->flags.sort_mode,0) != gfsmACLower) { + gfsm_automaton_arcsort(fsm,gfsmACLower); + } + //-- avoid "smart" arc insertion + fsm->flags.sort_mode = gfsmASMNone; + + //-- add sink-id + sinkid = gfsm_automaton_add_state(fsm); + if (sinkp) *sinkp = sinkid; + + //-- get alphabet label-vector + alabels = g_ptr_array_sized_new(gfsm_alphabet_size(alph)); + gfsm_alphabet_labels_to_array(alph,alabels); + + for (id = 0; id < fsm->states->len; id++) { + gfsmState *s = gfsm_automaton_find_state(fsm,id); + gfsmArcList *al; + gfsmArc *a; + guint labi; + if (!s || !s->is_valid) continue; + + al = s->arcs; + a = gfsm_arclist_arc(al); + for (labi=0; labi < alabels->len; ) { + gfsmLabelVal lab = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(alabels,labi)); + + if (lab==gfsmEpsilon) { + ++labi; + } + else if (!a || a->lower > lab) { + //-- no arc for this label: route it to sink + gfsm_automaton_add_arc(fsm, id, sinkid, lab, lab, fsm->sr->one); + ++labi; + } + else if (a->lower == lab) { + ++labi; + } + else { + while (al != NULL && a->lower < lab) { + al = al->next; + a = gfsm_arclist_arc(al); + } + } + } + } + + //-- mark fsm as (still) deterministic + fsm->flags.is_deterministic = TRUE; + + //-- cleanup + //g_array_free(alabels,TRUE); + g_ptr_array_free(alabels,TRUE); + + return fsm; +} + + +/*-------------------------------------------------------------- + * compose() + */ +gfsmAutomaton *gfsm_automaton_compose(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm = gfsm_automaton_compose_full(fsm1,fsm2, NULL,NULL); + gfsm_automaton_swap(fsm1,fsm); + gfsm_automaton_free(fsm); + return fsm1; +} + +/*-------------------------------------------------------------- + * compose_visit_() + */ +//#define GFSM_DEBUG_COMPOSE_VISIT 1 +#ifdef GFSM_DEBUG_COMPOSE_VISIT +# include <stdio.h> +#endif +gfsmStateId gfsm_automaton_compose_visit_(gfsmComposeState sp, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *fsm, + gfsmComposeStateEnum *spenum, + gfsmComposeFlags flags) +{ + gfsmState *q1, *q2; + gfsmStateId qid = gfsm_enum_lookup(spenum,&sp); + gfsmStateId qid2; + gfsmArcList *al1, *al2, *ai1, *ai2; + gfsmArcList *ai1_noneps, *ai2_noneps, *ai2_continue; + gfsmArc *a1,*a2; + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, "compose(): visit : (q%u,f%u,q%u) => q%d\n", sp.id1, sp.idf, sp.id2, + (int)(qid==gfsmEnumNone ? -1 : qid)); +#endif + + //-- ignore already-visited states + if (qid != gfsmEnumNone) return qid; + + //-- get state pointers for input automata + q1 = gfsm_automaton_find_state(fsm1,sp.id1); + q2 = gfsm_automaton_find_state(fsm2,sp.id2); + + //-- sanity check + if ( !(q1 && q2 && q1->is_valid && q2->is_valid) ) { +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, "compose(): BAD : (q%u,f%u,q%u) XXXXX\n", sp.id1, sp.idf, sp.id2); +#endif + return gfsmNoState; + } + + //-- insert new state into output automaton + qid = gfsm_automaton_add_state(fsm); + gfsm_enum_insert_full(spenum,&sp,qid); + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, "compose(): CREATE: (q%u,f%u,q%u) => q%u ***\n", sp.id1, sp.idf, sp.id2, qid); +#endif + + //-- check for final states + if (q1->is_final && q2->is_final) { + gfsm_automaton_set_final_state_full(fsm,qid,TRUE, + gfsm_sr_times(fsm->sr, + gfsm_automaton_get_final_weight(fsm1,sp.id1), + gfsm_automaton_get_final_weight(fsm2,sp.id2))); + } + + //------------------------------------------- + // recurse on outgoing arcs + + //-------------------------------- + // recurse: arcs: sort + + //-- arcs: sort arclists: fsm1 + if (flags&gfsmCFEfsm1NeedsArcSort) { + gfsmArcCompData sortdata = { gfsmACUpper,NULL,NULL,NULL }; + al1 = gfsm_arclist_sort(gfsm_arclist_clone(q1->arcs), &sortdata); + } + else { al1 = q1->arcs; } + + //-- arcs: sort arclists: fsm2 + if (flags&gfsmCFEfsm2NeedsArcSort) { + gfsmArcCompData sortdata = { gfsmACLower,NULL,NULL,NULL }; + al2 = gfsm_arclist_sort(gfsm_arclist_clone(q2->arcs), &sortdata); + } + else { al2 = q2->arcs; } + + //-------------------------------- + // recusrse: arcs: handle epsilons + for (ai1_noneps=al1; ai1_noneps!=NULL && ai1_noneps->arc.upper==gfsmEpsilon; ai1_noneps=ai1_noneps->next) {;} + for (ai2_noneps=al2; ai2_noneps!=NULL && ai2_noneps->arc.lower==gfsmEpsilon; ai2_noneps=ai2_noneps->next) {;} + + //-- (eps,NULL): case fsm1(q1 --a:eps(~eps2)--> q1b), filter:({0,2} --eps2:eps2--> 2), fsm2(q2 --(NULL~eps2:eps)--> q2) + if (sp.idf != 1) { + for (ai1=al1; ai1!=ai1_noneps; ai1=ai1->next) { + a1 = &(ai1->arc); +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATCH[e,NULL]: (q%u --%d:eps(e2)--> q%u) ~ ({0,2}--(e2:e2)-->2) ~ (q%u --(NULL~e2:eps)--> q%u) ***\n", + sp.id1, a1->lower, a1->target, + sp.id2, sp.id2); +#endif + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){a1->target, sp.id2, 2}, fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, gfsmEpsilon, a1->weight); + } + } + //-- (NULL,eps): case fsm1(q1 --(NULL~eps:eps1)--> q1), filter:({0,1} --eps1:eps1--> 1), fsm2(q2 --eps(~eps1):b--> q2b) + if (sp.idf != 2) { + for (ai2=al2; ai2!=ai2_noneps; ai2=ai2->next) { + a2 = &(ai2->arc); +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATHC[NULL,e]: (q%u --(NULL~eps:e1)--> q%u) ~ ({0,1}--(e1:e1)-->1) ~ (q%u --eps(e1):%d--> q%u) ***\n", + sp.id1, sp.id1, + sp.id2, a2->upper, a2->target); +#endif + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){sp.id1, a2->target, 1}, fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, a2->upper, a2->weight); + } + } + //-- (eps,eps): case fsm1(q1 --a:eps(~eps2)--> q1b), filter:({0} --eps2:eps1--> 0), fsm2(q2 --eps:b--> q2b) + if (sp.idf == 0) { + for (ai1=al1; ai1!=ai1_noneps; ai1=ai1->next) { + a1 = &(ai1->arc); + for (ai2=al2; ai2!=ai2_noneps; ai2=ai2->next) { + a2 = &(ai2->arc); +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATCH[e,e]: (q%u --%d:eps(e2)--> q%u) ~ ({0}--(e2:e1)-->0) ~ (q%u --eps(e1):%d--> q%u) ***\n", + sp.id1, a1->lower, a1->target, + sp.id2, a2->upper, a2->target); +#endif + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){a1->target, a2->target, 0}, + fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, a2->upper, + gfsm_sr_times(fsm->sr, a1->weight, a2->weight)); + } + } + } + + //-------------------------------- + // recurse: arcs: non-eps: iterate + for (ai1=ai1_noneps, ai2_continue=ai2_noneps; ai1!=NULL; ai1=ai1->next) { + a1 = &(ai1->arc); + + for (ai2=ai2_continue; ai2!=NULL; ai2=ai2->next) { + a2 = &(ai2->arc); + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): check[x,x]: (q%u --%d:%d--> q%u) ~ ({0,1,2}--(x:x)-->0) ~ (q%u --%d:%d--> q%u)\n", + sp.id1, a1->lower, a1->upper, a1->target, + sp.id2, a2->lower, a2->upper, a2->target); +#endif + + if (a2->lower < a1->upper) { ai2_continue=ai2->next; continue; } + else if (a2->lower > a1->upper) { break; } + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATCH[x,x]: (q%u --%d:%d--> q%u) ~ ({0,1,2}--(x:x)-->0) ~ (q%u --%d:%d--> q%u) ***\n", + sp.id1, a1->lower, a1->upper, a1->target, + sp.id2, a2->lower, a2->upper, a2->target); +#endif + + //-- non-eps: case fsm1:(q1 --a:b--> q1'), fsm2:(q2 --b:c--> q2') + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){a1->target,a2->target,0}, + fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, a2->upper, + gfsm_sr_times(fsm1->sr, a1->weight, a2->weight)); + } + } + + //-- maybe cleanup temporary arc-lists + if (flags&gfsmCFEfsm1NeedsArcSort) gfsm_arclist_free(al1); + if (flags&gfsmCFEfsm2NeedsArcSort) gfsm_arclist_free(al2); + + return qid; +} + +/*-------------------------------------------------------------- + * compose_full() + */ +//#define GFSM_DEBUG_COMPOSE +#ifdef GFSM_DEBUG_COMPOSE +# include <gfsmAutomatonIO.h> +#endif +gfsmAutomaton *gfsm_automaton_compose_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *composition, + gfsmComposeStateEnum *spenum + ) +{ + gboolean spenum_is_temp; + gfsmComposeState rootpair; + gfsmStateId rootid; + gfsmComposeFlags flags = 0; +#ifdef GFSM_DEBUG_COMPOSE + gfsmError *err =NULL; +#endif + + //-- setup: output fsm + if (!composition) { + composition=gfsm_automaton_shadow(fsm1); + } else { + gfsm_automaton_clear(composition); + gfsm_automaton_copy_shallow(composition,fsm1); + } + composition->flags.sort_mode = gfsmASMNone; + composition->flags.is_transducer = 1; + + //-- setup: ComposeStateEnum + if (spenum==NULL) { + spenum_is_temp=TRUE; + spenum = gfsm_compose_state_enum_new(); + } else { + spenum_is_temp=FALSE; + gfsm_enum_clear(spenum); + } + + //-- setup: flags + if (gfsm_acmask_nth(fsm1->flags.sort_mode,0) != gfsmACUpper) flags |= gfsmCFEfsm1NeedsArcSort; + if (gfsm_acmask_nth(fsm2->flags.sort_mode,0) != gfsmACLower) flags |= gfsmCFEfsm2NeedsArcSort; + + //-- guts: recursively visit states depth-first from root + rootpair.id1 = fsm1->root_id; + rootpair.id2 = fsm2->root_id; + rootpair.idf = 0; + rootid = gfsm_automaton_compose_visit_(rootpair, fsm1, fsm2, composition, spenum, flags); + + //-- finalize: set new root state + if (rootid != gfsmNoState) { + gfsm_automaton_set_root(composition, rootid); + } else { + composition->root_id = gfsmNoState; + } + //-- cleanup + if (spenum_is_temp) gfsm_enum_free(spenum); + + return composition; +} + + + +/*-------------------------------------------------------------- + * concat_final_func() + * + called for each final @id of @fsm during concat(@fsm,@fsm2) + * + during the call @fsm1->root_id should be set to the translated root of @fsm2 + */ +static +gboolean gfsm_automaton_concat_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm) +{ + gfsmWeight w = gfsm_ptr2weight(pw); + gfsm_automaton_add_arc(fsm, id, fsm->root_id, gfsmEpsilon, gfsmEpsilon, w); + gfsm_automaton_find_state(fsm,id)->is_final = FALSE; + return FALSE; +} + +/*-------------------------------------------------------------- + * concat_final_func_1() + * + called for singleton final @id of @fsm during concat(@fsm,@fsm2) + * + BAD if singleton final of @fsm has outgoing arcs! + */ +#if 0 +struct gfsm_automaton_concat_1_final_data_ { + gfsmStateId *rootxp; + gfsmWeight *weightp; +}; +static +gboolean gfsm_automaton_concat_final_func_1_(gfsmStateId id, + gpointer pw, + struct gfsm_automaton_concat_1_final_data_ *data) +{ + *(data->rootxp) = id; + *(data->weightp) = gfsm_ptr2weight(pw); + return TRUE; +} +#endif + +/*-------------------------------------------------------------- + * concat() + */ +gfsmAutomaton *gfsm_automaton_concat(gfsmAutomaton *fsm1, gfsmAutomaton *_fsm2) +{ + gfsmAutomaton *fsm2; + gfsmStateId offset; + gfsmStateId id2; + gfsmStateId size2; + gfsmStateId rootx; + gfsmWeightMap *finals2 = NULL; + + //-- sanity check(s) + if (!_fsm2 || _fsm2->root_id == gfsmNoState) return fsm1; + if (_fsm2==fsm1) fsm2 = gfsm_automaton_clone(fsm1); + else fsm2 = _fsm2; + + if (fsm1->finals == fsm2->finals) { + finals2 = gfsm_weightmap_new(gfsm_uint_compare); + gfsm_weightmap_copy(finals2, fsm2->finals); + } + + offset = fsm1->states->len; + size2 = fsm2->states->len; + gfsm_automaton_reserve(fsm1, offset + size2); + + //-- concatenative arcs + if (fsm1->root_id != gfsmNoState) { + //-- multiple final states: add epsilon arcs from old finals to mapped root2 + gfsmStateId root_tmp = fsm1->root_id; + rootx = fsm2->root_id+offset; + fsm1->root_id = rootx; + gfsm_automaton_finals_foreach(fsm1, (GTraverseFunc)gfsm_automaton_concat_final_func_, fsm1); + fsm1->root_id = root_tmp; + } else /*if (fsm2->root_id != gfsmNoState)*/ { + fsm1->root_id = rootx = fsm2->root_id + offset; + } + gfsm_weightmap_clear(fsm1->finals); + + //-- adopt states from fsm2 into fsm1 + for (id2 = 0; id2 < size2; id2++) { + gfsmStateId id1; + const gfsmState *s2; + gfsmState *s1; + gfsmArcIter ai; + gfsmWeight s2fw; + + s2 = gfsm_automaton_find_state_const(fsm2,id2); + id1 = id2+offset; + s1 = gfsm_automaton_find_state(fsm1, id1); + + //-- sanity check(s) + if (!s1 || !s2 || !s2->is_valid) continue; + + //-- copy state + gfsm_state_copy(s1,s2); + + //-- translate targets for adopted arcs + for (gfsm_arciter_open_ptr(&ai,fsm1,s1); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target += offset; + } + + //-- check for new final states: get weight & mark state is_final flag + if ( (finals2 && gfsm_weightmap_lookup(finals2, GUINT_TO_POINTER(id2), &s2fw)) + || + (!finals2 && gfsm_weightmap_lookup(fsm2->finals, GUINT_TO_POINTER(id2), &s2fw)) ) + { + s1->is_final = TRUE; + gfsm_weightmap_insert(fsm1->finals, GUINT_TO_POINTER(id1), s2fw); + } + } + + //-- mark as unsorted + fsm1->flags.sort_mode = gfsmASMNone; + + //-- cleanup + if (finals2) gfsm_weightmap_free(finals2); + if (fsm2 != _fsm2) gfsm_automaton_free(fsm2); + + return fsm1; +} + +/*-------------------------------------------------------------- + * n_concat() + */ +gfsmAutomaton *gfsm_automaton_n_concat(gfsmAutomaton *fsm1, gfsmAutomaton *_fsm2, guint n) +{ + gfsmAutomaton *fsm2 = _fsm2; + + //-- sanity check(s) + if (!_fsm2 || _fsm2->root_id == gfsmNoState) return fsm1; + if (_fsm2==fsm1) fsm2 = gfsm_automaton_clone(fsm1); + + for ( ; n > 0; n--) { gfsm_automaton_concat(fsm1, fsm2); } + + if (fsm2 != _fsm2) gfsm_automaton_free(fsm2); + + return fsm1; +} + + +/*-------------------------------------------------------------- + * connect() + */ +gfsmAutomaton *gfsm_automaton_connect(gfsmAutomaton *fsm) +{ + gfsmBitVector *wanted; + + //-- sanity check + if (!fsm) return fsm; + + wanted = gfsm_bitvector_sized_new(fsm->states->len); + gfsm_automaton_connect_fw(fsm, wanted); + + gfsm_bitvector_zero(wanted); + gfsm_automaton_connect_bw(fsm, NULL, wanted); + + gfsm_bitvector_free(wanted); + return fsm; +} + + +/*-------------------------------------------------------------- + * connect_fw_visit_state() + * + marks all states on a path from (id) in (visited) + */ +void gfsm_connect_fw_visit_state(gfsmAutomaton *fsm, + gfsmStateId id, + gfsmBitVector *visited) +{ + gfsmState *s; + gfsmArcIter ai; + + //-- already visited + if (gfsm_bitvector_get(visited,id)) return; + + s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) return; //-- ignore invalid states + + //-- mark node as visited on this path + gfsm_bitvector_set(visited,id,1); + + //-- visit targets of outgoing arcs + for (gfsm_arciter_open_ptr(&ai,fsm,s); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsm_connect_fw_visit_state(fsm, gfsm_arciter_arc(&ai)->target, visited); + } + + return; +} + +/*-------------------------------------------------------------- + * connect_fw() + */ +gfsmAutomaton *gfsm_automaton_connect_fw(gfsmAutomaton *fsm, gfsmBitVector *visited) +{ + gboolean visited_is_temp = FALSE; + + //-- sanity check + if (!fsm || fsm->root_id == gfsmNoState) + return gfsm_automaton_prune_states(fsm,NULL); + + //-- traversal record + if (visited==NULL) { + visited = gfsm_bitvector_sized_new(fsm->states->len); + visited_is_temp = TRUE; + } + + //-- traverse + gfsm_connect_fw_visit_state(fsm, fsm->root_id, visited); + gfsm_automaton_prune_states(fsm, visited); + + //-- cleanup + if (visited_is_temp) gfsm_bitvector_free(visited); + + return fsm; +} + +/*-------------------------------------------------------------- + * connect_bw(): final_foreach() + */ +struct gfsm_connect_bw_data_ { + gfsmAutomaton *fsm; + GPtrArray *rarcs; + gfsmBitVector *finalizable; +}; + +gboolean gfsm_connect_bw_visit_state(gfsmStateId id, + gpointer pw, + struct gfsm_connect_bw_data_ *data) +{ + GSList *rl; + + //-- already visited + if (gfsm_bitvector_get(data->finalizable,id) //-- already visited? + || !gfsm_automaton_has_state(data->fsm, id)) //-- bad state? + return FALSE; //-----> continue traversal + + //-- mark state as finalizable + gfsm_bitvector_set(data->finalizable,id,1); + + //-- visit sources of incoming arcs + for (rl=g_ptr_array_index(data->rarcs,id); rl != NULL; rl=rl->next) { + gfsmArc *arc = (gfsmArc*)rl->data; + gfsm_connect_bw_visit_state(arc->source,pw,data); + } + + return FALSE; //-- continue traversal +} + +/*-------------------------------------------------------------- + * connect_bw() + */ +gfsmAutomaton *gfsm_automaton_connect_bw(gfsmAutomaton *fsm, + gfsmReverseArcIndex *rarcs, + gfsmBitVector *finalizable) +{ + gboolean rarcs_is_temp = FALSE; + gboolean finalizable_is_temp = FALSE; + struct gfsm_connect_bw_data_ data = {fsm,rarcs,finalizable}; + + //-- sanity check(s) + if (!fsm || gfsm_automaton_n_final_states(fsm)==0) + return gfsm_automaton_prune_states(fsm,NULL); + + //-- reverse arc-index + if (rarcs==NULL) { + rarcs = data.rarcs = gfsm_automaton_reverse_arc_index(fsm,NULL); + rarcs_is_temp = TRUE; + } + + //-- traversal record + if (finalizable==NULL) { + finalizable = data.finalizable = gfsm_bitvector_sized_new(fsm->states->len); + finalizable_is_temp = TRUE; + } + + //-- traverse + gfsm_automaton_finals_foreach(fsm, (GTraverseFunc)gfsm_connect_bw_visit_state, &data); + gfsm_automaton_prune_states(fsm, finalizable); + + //-- cleanup + if (finalizable_is_temp) gfsm_bitvector_free(finalizable); + if (rarcs_is_temp) gfsm_reverse_arc_index_free(rarcs,TRUE); + + return fsm; +} + + +/*-------------------------------------------------------------- + * prune_states() + */ +gfsmAutomaton *gfsm_automaton_prune_states(gfsmAutomaton *fsm, gfsmBitVector *wanted) +{ + gfsmStateId id, maxwanted=gfsmNoState; + gfsmArcIter ai; + + for (id=0; id < fsm->states->len; id++) { + if (!wanted || !gfsm_bitvector_get(wanted,id)) { + //-- unwanted state: chuck it + gfsm_automaton_remove_state(fsm,id); + } + else { + maxwanted = id; + //-- prune outgoing arcs to any unwanted states, too + for (gfsm_arciter_open(&ai, fsm, id); gfsm_arciter_ok(&ai); ) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + if (!wanted || !gfsm_bitvector_get(wanted,arc->target)) { + gfsm_arciter_remove(&ai); + } else { + gfsm_arciter_next(&ai); + } + } + } + } + + //-- update number of states + if (maxwanted != gfsmNoState) { + g_array_set_size(fsm->states, maxwanted+1); + } else { + g_array_set_size(fsm->states, 0); + } + + return fsm; +} + +/*-------------------------------------------------------------- + * determinize_lp2ec_foreach_func_() + */ +typedef struct { + gfsmAutomaton *nfa; + gfsmAutomaton *dfa; + gfsmStateId dfa_src_id; + gfsmEnum *ec2id; +} gfsmLp2EcForeachData; + +static +gboolean gfsm_determinize_lp2ec_foreach_func_(gfsmLabelPair lp, + gfsmWeightedStateSet *wss, + gfsmLp2EcForeachData *data) +{ + gfsmStateId ec2id_val; + gpointer ec2id_val_as_ptr; + gfsmStateSet *ec2id_key; + + if ( gfsm_enum_lookup_extended(data->ec2id, + wss->set, + (gpointer)(&ec2id_key), + (gpointer)(&ec2id_val_as_ptr)) ) + { + //-- target node-set is already present: just add an arc in @dfa + ec2id_val = GPOINTER_TO_UINT(ec2id_val_as_ptr); + gfsm_automaton_add_arc(data->dfa, + data->dfa_src_id, + ec2id_val, + gfsm_labelpair_lower(lp), + gfsm_labelpair_upper(lp), + wss->weight); + + //-- ... and maybe free the embedded state set + if (wss->set != ec2id_key) gfsm_stateset_free(wss->set); + wss->set = NULL; + } + else + { + //-- image of equiv-class (wss->set) was not yet present: make a new one + ec2id_val = gfsm_automaton_ensure_state(data->dfa, + gfsm_enum_insert(data->ec2id, wss->set)); + + //-- ... add @dfa arc + gfsm_automaton_add_arc(data->dfa, + data->dfa_src_id, + ec2id_val, + gfsm_labelpair_lower(lp), + gfsm_labelpair_upper(lp), + wss->weight); + + //-- ... and recurse + gfsm_determinize_visit_state_(data->nfa, data->dfa, + wss->set, ec2id_val, + data->ec2id); + } + return FALSE; +} + +/*-------------------------------------------------------------- + * determinize_visit_state_() + */ +void gfsm_determinize_visit_state_(gfsmAutomaton *nfa, gfsmAutomaton *dfa, + gfsmStateSet *nfa_ec, gfsmStateId dfa_id, + gfsmEnum *ec2id) +{ + GTree *lp2ecw; //-- maps label-pairs@nfa.src.ec => (eq-class@nfa.sink, sum(weight)) + gfsmStateSetIter eci; + gfsmStateId ecid; + gfsmLp2EcForeachData lp2ec_foreach_data; + gfsmWeight fw; + + //-- check for final state + if (gfsm_stateset_lookup_final_weight(nfa_ec,nfa,&fw)) { + gfsm_automaton_set_final_state_full(dfa, dfa_id, TRUE, fw); + } + + //-- build label-pair => (sink-eqc, sum(weight)) mapping 'lp2ecw' for node-set nfa_ec + lp2ecw = g_tree_new_full(((GCompareDataFunc) + gfsm_labelpair_compare_with_data), //-- key_comp_func + NULL, //-- key_comp_data + NULL, //-- key_free_func + (GDestroyNotify)g_free); //-- val_free_func + + for (eci=gfsm_stateset_iter_begin(nfa_ec); + (ecid=gfsm_stateset_iter_id(eci)) != gfsmNoState; + eci=gfsm_stateset_iter_next(nfa_ec,eci)) + { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai, nfa, ecid); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsmLabelPair lp; + gfsmLabelPair *lp2ec_key; + gfsmWeightedStateSet *lp2ec_val; + + //if (a->lower==gfsmEpsilon && a->upper==gfsmEpsilon) continue; //-- ignore eps arcs + lp = gfsm_labelpair_new(a->lower, a->upper); + + //-- add equivalence class to local mapping + if ( g_tree_lookup_extended(lp2ecw, + GUINT_TO_POINTER(lp), + (gpointer)(&lp2ec_key), + (gpointer)(&lp2ec_val)) ) + { + //-- already present: compute union and add new arc's weight + gfsm_stateset_insert(lp2ec_val->set, a->target); + lp2ec_val->weight = gfsm_sr_plus(nfa->sr, lp2ec_val->weight, a->weight); + } + else + { + //-- not yet present: insert new value + lp2ec_val = g_new(gfsmWeightedStateSet,1); + lp2ec_val->set = gfsm_stateset_new_singleton(a->target); + lp2ec_val->weight = a->weight; + g_tree_insert(lp2ecw, GUINT_TO_POINTER(lp), lp2ec_val); + } + } + + //-- tmp-cleanup + gfsm_arciter_close(&ai); + } + + //-- stateset-iter (eci) cleanup + //(none) + + //-- insert computed arcs into @dfa + lp2ec_foreach_data.nfa = nfa; + lp2ec_foreach_data.dfa = dfa; + lp2ec_foreach_data.dfa_src_id = dfa_id; + lp2ec_foreach_data.ec2id = ec2id; + g_tree_foreach(lp2ecw, + (GTraverseFunc)gfsm_determinize_lp2ec_foreach_func_, + (gpointer)(&lp2ec_foreach_data)); + + //-- cleanup + g_tree_destroy(lp2ecw); +} + + +/*-------------------------------------------------------------- + * determinize() + */ +gfsmAutomaton *gfsm_automaton_determinize(gfsmAutomaton *nfa) +{ + if (!nfa->flags.is_deterministic) { + gfsmAutomaton *dfa = gfsm_automaton_determinize_full(nfa,NULL); + gfsm_automaton_swap(nfa,dfa); + gfsm_automaton_free(dfa); + } + return nfa; +} + +/*-------------------------------------------------------------- + * determinize_full() + */ +gfsmAutomaton *gfsm_automaton_determinize_full(gfsmAutomaton *nfa, gfsmAutomaton *dfa) +{ + gfsmEnum *ec2id; //-- (global) maps literal(equiv-class@nfa) => node-id@dfa + gfsmStateSet *nfa_ec; //-- (temp) equiv-class@nfa + gfsmStateId dfa_id; //-- (temp) id @ dfa + + //-- sanity check(s) + if (!nfa) return NULL; + else if (nfa->flags.is_deterministic) { + if (dfa) gfsm_automaton_copy(dfa,nfa); + else dfa = gfsm_automaton_clone(nfa); + return dfa; + } + + //-- initialization: dfa + if (!dfa) { + dfa = gfsm_automaton_shadow(nfa); + } else { + gfsm_automaton_clear(dfa); + gfsm_automaton_copy_shallow(dfa,nfa); + } + //-- avoid "smart" arc-insertion + dfa->flags.sort_mode = gfsmASMNone; + + //-- initialization: ec2id + ec2id = gfsm_enum_new_full(NULL /*(gfsmDupFunc)gfsm_stateset_clone*/ , + (GHashFunc)gfsm_stateset_hash, + (GEqualFunc)gfsm_stateset_equal, + (GDestroyNotify)gfsm_stateset_free); + + //-- initialization: nfa_ec + nfa_ec = gfsm_stateset_sized_new(32); + gfsm_stateset_insert(nfa_ec, nfa->root_id); + + //-- set root in dfa + dfa_id = gfsm_automaton_ensure_state(dfa, gfsm_enum_insert(ec2id, nfa_ec)); + gfsm_automaton_set_root(dfa, dfa_id); + + //-- guts: determinize recursively outwards from root node + gfsm_determinize_visit_state_(nfa, dfa, nfa_ec, dfa_id, ec2id); + + //-- set flag in dfa + dfa->flags.is_deterministic = TRUE; + + //-- cleanup + //gfsm_stateset_free(nfa_ec); //-- this ought to be freed by gfsm_enum_free(ec2id) + gfsm_enum_free(ec2id); + + return dfa; +} + + + +/*-------------------------------------------------------------- + * difference() + */ +gfsmAutomaton *gfsm_automaton_difference(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm = gfsm_automaton_difference_full(fsm1,fsm2,NULL); + gfsm_automaton_swap(fsm1,fsm); + gfsm_automaton_free(fsm); + return fsm1; +} + +/*-------------------------------------------------------------- + * difference_full() + */ +gfsmAutomaton *gfsm_automaton_difference_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *diff) +{ + gfsmAutomaton *not_fsm2; + gfsmAlphabet *alph1 = gfsm_identity_alphabet_new(); + + gfsm_automaton_get_alphabet(fsm1, gfsmLSLower, alph1); + not_fsm2 = gfsm_automaton_clone(fsm2); + gfsm_automaton_complement_full(not_fsm2, alph1); + diff = gfsm_automaton_intersect_full(fsm1, not_fsm2, diff, NULL); + + gfsm_automaton_free(not_fsm2); + gfsm_alphabet_free(alph1); + + return diff; +} + + + +/*-------------------------------------------------------------- + * intersect() + */ +gfsmAutomaton *gfsm_automaton_intersect(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm = gfsm_automaton_intersect_full(fsm1,fsm2,NULL,NULL); + gfsm_automaton_swap(fsm1,fsm); + gfsm_automaton_free(fsm); + return fsm1; +} + +/*-------------------------------------------------------------- + * intersect_full() + */ +gfsmAutomaton *gfsm_automaton_intersect_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *intersect, + gfsmStatePairEnum *spenum) +{ + gboolean spenum_is_temp; + gfsmStatePair rootpair; + gfsmStateId rootid; + gfsmComposeFlags flags = 0; + + //-- setup: output fsm + if (!intersect) { + intersect=gfsm_automaton_shadow(fsm1); + } else { + gfsm_automaton_clear(intersect); + gfsm_automaton_copy_shallow(intersect,fsm1); + } + //-- avoid "smart" arc-insertion + intersect->flags.sort_mode = gfsmASMNone; + intersect->flags.is_transducer = 0; + + //-- setup: StatePairEnum + if (spenum==NULL) { + spenum_is_temp=TRUE; + spenum = gfsm_statepair_enum_new(); + } else { + spenum_is_temp=FALSE; + gfsm_enum_clear(spenum); + } + + //-- setup: flags + if (gfsm_acmask_nth(fsm1->flags.sort_mode,0) != gfsmACLower) flags |= gfsmCFEfsm1NeedsArcSort; + if (gfsm_acmask_nth(fsm2->flags.sort_mode,0) != gfsmACLower) flags |= gfsmCFEfsm2NeedsArcSort; + + //-- guts + rootpair.id1 = fsm1->root_id; + rootpair.id2 = fsm2->root_id; + rootid = gfsm_automaton_intersect_visit_(rootpair, fsm1, fsm2, intersect, spenum,flags); + + //-- finalize: set root state + if (rootid != gfsmNoState) { + gfsm_automaton_set_root(intersect, rootid); + } else { + intersect->root_id = gfsmNoState; + } + + //-- cleanup + if (spenum_is_temp) gfsm_enum_free(spenum); + + return intersect; +} + +/*-------------------------------------------------------------- + * intersect_visit() + */ +gfsmStateId gfsm_automaton_intersect_visit_(gfsmStatePair sp, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *fsm, + gfsmStatePairEnum *spenum, + gfsmComposeFlags flags) +{ + gfsmState *q1, *q2; + gfsmStateId qid = gfsm_enum_lookup(spenum,&sp); + gfsmStateId qid2; + gfsmArcList *al1, *al2, *ai1, *ai2, *ai2eps; + gfsmArc *a1,*a2; + + //-- ignore already-visited states + if (qid != gfsmEnumNone) return qid; + + //-- get state pointers for input automata + q1 = gfsm_automaton_find_state(fsm1,sp.id1); + q2 = gfsm_automaton_find_state(fsm2,sp.id2); + + //-- sanity check + if ( !(q1 && q2 && q1->is_valid && q2->is_valid) ) return gfsmNoState; + + //-- insert new state into output automaton + qid = gfsm_automaton_add_state(fsm); + gfsm_enum_insert_full(spenum,&sp,qid); + //q = gfsm_automaton_get_state(fsm,qid); + + //-- check for final states + if (q1->is_final && q2->is_final) { + gfsm_automaton_set_final_state_full(fsm,qid,TRUE, + gfsm_sr_times(fsm->sr, + gfsm_automaton_get_final_weight(fsm1,sp.id1), + gfsm_automaton_get_final_weight(fsm2,sp.id2))); + } + + //------------------------------------------- + // recurse on outgoing arcs + + //-------------------------------- + // recurse: arcs: sort + + //-- arcs: sort arclists: fsm1 + if (flags&gfsmCFEfsm1NeedsArcSort) { + gfsmArcCompData sortdata = { (gfsmACLower|(gfsmACUpper<<gfsmACShift)),NULL,NULL,NULL }; + al1 = gfsm_arclist_sort(gfsm_arclist_clone(q1->arcs), &sortdata); + } + else { al1 = q1->arcs; } + + //-- arcs: sort arclists: fsm2 + if (flags&gfsmCFEfsm2NeedsArcSort) { + gfsmArcCompData sortdata = { (gfsmACLower|(gfsmACUpper<<gfsmACShift)),NULL,NULL,NULL }; + al2 = gfsm_arclist_sort(gfsm_arclist_clone(q2->arcs), &sortdata); + } + else { al2 = q2->arcs; } + + //-------------------------------- + // recurse: arcs: iterate + for (ai1=al1, ai2=al2; ai1 != NULL; ai1=ai1->next) { + a1 = &(ai1->arc); + if (a1->lower == gfsmEpsilon) { + //-- handle epsilon arcs + + //-- eps: case fsm1:(q1 --eps--> q1'), fsm2:(q2) + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){a1->target,sp.id2}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, gfsmEpsilon, a1->weight); + + //-- eps: case fsm1:(q1 --eps--> q1'), fsm2:(q2 --eps--> q2') + for (ai2eps=al2; ai2eps != NULL; ai2eps=ai2eps->next) { + a2 = &(ai2eps->arc); + if (a2->lower != gfsmEpsilon) break; + + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){a1->target,a2->target}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, gfsmEpsilon, + gfsm_sr_times(fsm1->sr, a1->weight, a2->weight)); + } + } + else { + //-- handle non-epsilon arcs + for ( ; ai2 != NULL; ai2=ai2->next) { + a2 = &(ai2->arc); + + if (a2->lower < a1->lower) continue; + else if (a2->lower > a1->lower) break; + + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){a1->target,a2->target}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, a1->lower, + gfsm_sr_times(fsm1->sr, a1->weight, a2->weight)); + } + } + } + + //-- handle epsilon-arcs on fsm2 + for (ai2=al2 ; ai2 != NULL; ai2=ai2->next) { + a2 = &(ai2->arc); + if (a2->lower != gfsmEpsilon) break; + + //-- eps: case fsm1:(q1), fsm2:(q2 --eps--> q2') + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){sp.id1,a2->target}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, gfsmEpsilon, a2->weight); + } + + //-- cleanup + if (flags&gfsmCFEfsm1NeedsArcSort) gfsm_arclist_free(al1); + if (flags&gfsmCFEfsm2NeedsArcSort) gfsm_arclist_free(al2); + + return qid; +} + + +/*-------------------------------------------------------------- + * invert() + */ +gfsmAutomaton *gfsm_automaton_invert(gfsmAutomaton *fsm) +{ + gfsmStateId id; + gfsmArcIter ai; + gfsmArcCompMask acmask_old=fsm->flags.sort_mode, acmask_new=gfsmACNone;; + gint aci; + + //-- invert arcs + for (id=0; id < fsm->states->len; id++) { + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsmLabelId tmp = a->lower; + a->lower = a->upper; + a->upper = tmp; + } + } + + //-- adjust sort mask (translate "lower"<->"upper") + for (aci=0; aci < gfsmACMaxN; aci++) { + gfsmArcCompMask cmp = gfsm_acmask_nth(acmask_old,aci); + switch (cmp) { + case gfsmACLower: cmp=gfsmACUpper; break; + case gfsmACUpper: cmp=gfsmACLower; break; + case gfsmACLowerR: cmp=gfsmACUpperR; break; + case gfsmACUpperR: cmp=gfsmACLowerR; break; + default: break; + } + acmask_new |= gfsm_acmask_new(cmp,aci); + } + fsm->flags.sort_mode = acmask_new; + + return fsm; +} + +/*-------------------------------------------------------------- + * optional() + */ +gfsmAutomaton *gfsm_automaton_optional(gfsmAutomaton *fsm) +{ + if (!gfsm_automaton_is_final_state(fsm,fsm->root_id)) + gfsm_automaton_set_final_state_full(fsm,fsm->root_id,TRUE,fsm->sr->one); + return fsm; +} + +/*-------------------------------------------------------------- + * product() (single-destructive) + */ +gfsmAutomaton *gfsm_automaton_product(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm2_tmp = gfsm_automaton_clone(fsm2); + gfsm_automaton_product2(fsm1,fsm2_tmp); + gfsm_automaton_free(fsm2_tmp); + return fsm1; +} + +/*-------------------------------------------------------------- + * _product() (dual-destructive) + */ +gfsmAutomaton *gfsm_automaton_product2(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmStateId qid; + gfsmState *qp; + gfsmArcIter ai; + gfsmArc *a; + + //-- chuck out all upper-labels from fsm1 + for (qid=0; qid < fsm1->states->len; qid++) { + qp = gfsm_automaton_find_state(fsm1,qid); + if (!qp || !qp->is_valid) continue; + for (gfsm_arciter_open_ptr(&ai,fsm1,qp); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + a = gfsm_arciter_arc(&ai); + a->upper = gfsmEpsilon; + } + } + + //-- chuck out all upper-labels from fsm2 + for (qid=0; qid < fsm2->states->len; qid++) { + qp = gfsm_automaton_find_state(fsm2,qid); + if (!qp || !qp->is_valid) continue; + for (gfsm_arciter_open_ptr(&ai,fsm2,qp); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + a = gfsm_arciter_arc(&ai); + a->lower = gfsmEpsilon; + } + } + + //-- concatenate + gfsm_automaton_concat(fsm1,fsm2); + + //-- mark output fsm as transducer + fsm1->flags.is_transducer = 1; + + return fsm1; +} + +/*-------------------------------------------------------------- + * project() + */ +gfsmAutomaton *gfsm_automaton_project(gfsmAutomaton *fsm, gfsmLabelSide which) +{ + gfsmStateId id; + gfsmArcIter ai; + if (which==gfsmLSBoth) return fsm; + + for (id=0; id < fsm->states->len; id++) { + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + if (which==gfsmLSLower) a->upper = a->lower; + else a->lower = a->upper; + } + } + fsm->flags.is_transducer = FALSE; + return fsm; +} + +/*-------------------------------------------------------------- + * replace() + */ +gfsmAutomaton *gfsm_automaton_replace(gfsmAutomaton *fsm1, gfsmLabelVal lo, gfsmLabelVal hi, gfsmAutomaton *fsm2) +{ + gfsmStateId id; + gfsmArcIter ai; + gfsmStateId nstates = fsm1->states->len; + + for (id=0; id < nstates; id++) { + if (!gfsm_automaton_has_state(fsm1,id)) continue; + for (gfsm_arciter_open(&ai,fsm1,id), gfsm_arciter_seek_both(&ai,lo,hi); + gfsm_arciter_ok(&ai); + gfsm_arciter_seek_both(&ai,lo,hi)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsm_automaton_insert_automaton(fsm1, id, a->target, fsm2, a->weight); + gfsm_arciter_remove(&ai); //-- implies gfsm_arciter_next() + } + //gfsm_arciter_close(&ai); + } + + return fsm1; +} + +/*-------------------------------------------------------------- + * insert_automaton() + */ +gfsmAutomaton *gfsm_automaton_insert_automaton(gfsmAutomaton *fsm1, + gfsmStateId q1from, + gfsmStateId q1to, + gfsmAutomaton *fsm2, + gfsmWeight w) +{ + gfsmStateId offset; + gfsmStateId size2; + gfsmStateId id2; + gfsmStateId id1; + const gfsmState *s2; + gfsmState *s1; + gfsmArcIter ai; + gfsmWeight s2fw; + + //-- reserve size + offset = fsm1->states->len; + size2 = fsm2->states->len; + gfsm_automaton_reserve(fsm1, offset + size2); + + //-- avoid "smart" arc-insertion + fsm1->flags.sort_mode = gfsmASMNone; + + //-- adopt states from fsm2 into fsm1 + for (id2 = 0; id2 < size2; id2++) { + + s2 = gfsm_automaton_find_state_const(fsm2,id2); + id1 = id2+offset; + s1 = gfsm_automaton_find_state(fsm1, id1); + + //-- sanity check(s) + if (!s1 || !s2 || !s2->is_valid) continue; + + //-- copy state + gfsm_state_copy(s1,s2); + + //-- translate targets for adopted arcs + for (gfsm_arciter_open_ptr(&ai,fsm1,s1); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target += offset; + } + + //-- check for fsm2-final states: get weight & add arc to our sink state + if (gfsm_weightmap_lookup(fsm2->finals, GUINT_TO_POINTER(id2), &s2fw)) { + s1->is_final = FALSE; + gfsm_automaton_add_arc(fsm1,id1,q1to,gfsmEpsilon,gfsmEpsilon, s2fw); + } + } + + //-- add arc to new state + gfsm_automaton_add_arc(fsm1, q1from, fsm2->root_id+offset, gfsmEpsilon, gfsmEpsilon, w); + + return fsm1; +} + +/*-------------------------------------------------------------- + * rmepsilon_foreach_func() + */ +static +void gfsm_automaton_rmeps_pass2_foreach_func_(gfsmStatePair *sp, gpointer pw, gfsmAutomaton *fsm) +{ + gfsmWeight w = gfsm_ptr2weight(pw); + gfsmWeight fw2; + gfsmArcIter ai; + gfsmArc *a; + if (sp->id1==sp->id2) return; //-- sanity check + + //-- adopt final weights (plus) + if (gfsm_automaton_lookup_final(fsm, sp->id2, &fw2)) { + gfsm_automaton_set_final_state_full(fsm, sp->id1, TRUE, + gfsm_sr_plus(fsm->sr, + gfsm_automaton_get_final_weight(fsm, sp->id1), + gfsm_sr_times(fsm->sr, w, fw2))); + } + + //-- adopt non-epsilon arcs + for (gfsm_arciter_open(&ai,fsm,sp->id2); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + a = gfsm_arciter_arc(&ai); + if (a->lower != gfsmEpsilon || a->upper != gfsmEpsilon) { + gfsm_automaton_add_arc(fsm, sp->id1, a->target, a->lower, a->upper, + gfsm_sr_times(fsm->sr, a->weight, w)); + } + } +} + +/*-------------------------------------------------------------- + * rmepsilon() + */ +gfsmAutomaton *gfsm_automaton_rmepsilon(gfsmAutomaton *fsm) +{ + gfsmStatePair2WeightHash *sp2wh = gfsm_statepair2weighthash_new(); + gfsmArcIter ai; + gfsmStateId qid; + gfsmArc *a; + + //-- pass-1: populate sp2wh with epsilon-reachable states + for (qid=0; qid < fsm->states->len; qid++) { + gfsm_automaton_rmeps_visit_state_(fsm, qid, qid, fsm->sr->one, sp2wh); + } + + //-- pass-2: adopt non-epsilon arcs & final weights from eps-reachable states + gfsm_weighthash_foreach(sp2wh, (GHFunc)gfsm_automaton_rmeps_pass2_foreach_func_, fsm); + + //-- pass-3: actual removal of now-redundant epsilon arcs + for (qid=0; qid < fsm->states->len; qid++) { + for (gfsm_arciter_open(&ai,fsm,qid); gfsm_arciter_ok(&ai); ) { + a = gfsm_arciter_arc(&ai); + if (a->lower==gfsmEpsilon && a->upper==gfsmEpsilon) { + gfsm_arciter_remove(&ai); + } else { + gfsm_arciter_next(&ai); + } + } + } + + //-- cleanup + gfsm_weighthash_free(sp2wh); + + return fsm; +} + +/*-------------------------------------------------------------- + * rmepsilon_visit_state() + */ +void gfsm_automaton_rmeps_visit_state_(gfsmAutomaton *fsm, + gfsmStateId qid_noeps, //-- state reachable by non-eps arcs + gfsmStateId qid_eps, //-- eps-reachable state from qid_noeps + gfsmWeight weight_eps, //-- total weight of followed eps-arcs + gfsmStatePair2WeightHash *sp2wh //-- maps (qid_noeps,qid_noeps)=>sum_weight_eps + ) +{ + gfsmState *q_noeps, *q_eps; + gfsmStatePair sp = {qid_noeps,qid_eps}; + gfsmArcIter ai; + gfsmArc *a; + + //-- visited check, mark + if (!gfsm_weighthash_insert_sum_if_less(sp2wh, &sp, weight_eps, fsm->sr)) + return; //-- no update required + + //-- sanity check + q_noeps = gfsm_automaton_find_state(fsm,qid_noeps); + q_eps = gfsm_automaton_find_state(fsm,qid_eps); + if (!q_noeps || !q_noeps->is_valid || !q_eps || !q_eps->is_valid) return; + + //-- visit epsilon-reachable states from q_eps + for (gfsm_arciter_open_ptr(&ai, fsm, q_eps), gfsm_arciter_seek_both(&ai,gfsmEpsilon,gfsmEpsilon); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_both(&ai,gfsmEpsilon,gfsmEpsilon)) + { + a = gfsm_arciter_arc(&ai); + gfsm_automaton_rmeps_visit_state_(fsm, qid_noeps, a->target, + gfsm_sr_times(fsm->sr, weight_eps, a->weight), + sp2wh); + } +} + + +/*-------------------------------------------------------------- + * reverse() + */ +gfsmAutomaton *gfsm_automaton_reverse(gfsmAutomaton *fsm) +{ + gfsmStateId new_root = gfsm_automaton_add_state(fsm); + gfsmStateId id; + gfsmState *s, *ts; + gfsmArcList *al, *al_next, *al_prev; + gfsmWeight w; + //gfsmArcSortMode sm = gfsm_automaton_sortmode(fsm); + + //-- mark automaton as unsorted (avoid "smart" arc-insertion) + fsm->flags.sort_mode = gfsmASMNone; + + //-- reverse arc directions, keeping old "source" and "target" values + // intact as sentinels + for (id = 0; id < new_root; id++) { + s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) continue; + + //-- check for old final states + if (gfsm_automaton_lookup_final(fsm,id,&w)) { + s->is_final = FALSE; + gfsm_weightmap_remove(fsm->finals, GUINT_TO_POINTER(id)); + gfsm_automaton_add_arc(fsm, new_root, id, gfsmEpsilon, gfsmEpsilon, w); + } + + //-- reverse arcs + for (al_prev=NULL, al=s->arcs; al != NULL; al=al_next) { + gfsmArc *a = gfsm_arclist_arc(al); + al_next = al->next; + if (a->target==id) { + //-- already reversed (or a single-arc loop, which doesn't need reversal) + al_prev = al; + continue; + } + + //-- steal arc + if (al_prev) al_prev->next = al->next; + else s->arcs = al->next; + al->next = NULL; + + //-- move arc + ts = gfsm_automaton_find_state(fsm,a->target); + gfsm_automaton_add_arc_node(fsm, ts, al); + } + } + + //-- sanitize: swap 'source' and 'target' fields + for (id=0; id < new_root; id++) { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsmStateId tmp = a->target; + a->target = a->source; + a->source = tmp; + } + } + + //-- root flop + gfsm_automaton_set_final_state_full(fsm,fsm->root_id,TRUE,fsm->sr->one); + fsm->root_id = new_root; + + return fsm; +} + +#if 0 +gfsmAutomaton *gfsm_automaton_reverse_old(gfsmAutomaton *fsm) +{ + gfsmStateId new_root = gfsm_automaton_add_state(fsm); + gfsmStateId id; + gfsmState *s, *ts; + gfsmArcList *al, *al_next, *al_prev; + gfsmWeight w; + //gfsmArcSortMode sm = gfsm_automaton_sortmode(fsm); + + //-- mark automaton as unsorted (avoid "smart" arc-insertion) + fsm->flags.sort_mode = gfsmASMNone; + + //-- reverse arc directions, assigning reversed arcs 'target' values as 'old_src+new_root' + for (id = 0; id < new_root; id++) { + s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) continue; + + //-- check for old final states + if (gfsm_automaton_lookup_final(fsm,id,&w)) { + s->is_final = FALSE; + gfsm_weightmap_remove(fsm->finals, GUINT_TO_POINTER(id)); + gfsm_automaton_add_arc(fsm, new_root, id, gfsmEpsilon, gfsmEpsilon, w); + } + + //-- reverse arcs + for (al_prev=NULL, al=s->arcs; al != NULL; al=al_next) { + gfsmArc *a = gfsm_arclist_arc(al); + al_next = al->next; + if (a->target >= new_root) { + //-- already moved + al_prev = al; + continue; + } + + //-- steal arc + if (al_prev) al_prev->next = al->next; + else s->arcs = al->next; + al->next = NULL; + + //-- move arc + ts = gfsm_automaton_find_state(fsm,a->target); + gfsm_automaton_add_arc_link(fsm, ts, al); + + //-- flag as reversed + a->target = id + new_root; + } + } + + //-- sanitize + for (id=0; id < new_root; id++) { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target -= new_root; + } + } + + //-- root flop + gfsm_automaton_set_final_state_full(fsm,fsm->root_id,TRUE,fsm->sr->one); + fsm->root_id = new_root; + + return fsm; +} +#endif + +/*-------------------------------------------------------------- + * sigma() + */ +gboolean gfsm_automaton_sigma_foreach_func_(gfsmAlphabet *abet, gpointer key, gfsmLabelVal lab, gfsmAutomaton *fsm) +{ + gfsm_automaton_add_arc(fsm,0,1,lab,lab,fsm->sr->one); + return FALSE; +} + +gfsmAutomaton *gfsm_automaton_sigma(gfsmAutomaton *fsm, gfsmAlphabet *abet) +{ + gfsm_automaton_clear(fsm); + fsm->flags.sort_mode = gfsmASMNone; //-- avoid "smart" arc-insertion + fsm->root_id = gfsm_automaton_add_state_full(fsm,0); + gfsm_automaton_add_state_full(fsm,1); + gfsm_automaton_set_final_state_full(fsm,1,TRUE,fsm->sr->one); + gfsm_alphabet_foreach(abet, (gfsmAlphabetForeachFunc)gfsm_automaton_sigma_foreach_func_, fsm); + return fsm; +} + +/*-------------------------------------------------------------- + * union_() + */ +gfsmAutomaton *gfsm_automaton_union(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmStateId offset; + gfsmStateId id2; + gfsmStateId oldroot1; + gfsmArcCompData sortdata = {0,0,0,0}; + + //-- sanity check + if (!fsm2 || fsm2->root_id==gfsmNoState) return fsm1; + + offset = fsm1->states->len + 1; + gfsm_automaton_reserve(fsm1, offset + fsm2->states->len); + + + //-- add new root and eps-arc to old root for fsm1 + oldroot1 = fsm1->root_id; + fsm1->root_id = gfsm_automaton_add_state(fsm1); + if (oldroot1 != gfsmNoState) { + gfsm_automaton_add_arc(fsm1, fsm1->root_id, oldroot1, gfsmEpsilon, gfsmEpsilon, fsm1->sr->one); + } + + //-- avoid "smart" arc-insertion (temporary) + sortdata.mask = fsm1->flags.sort_mode; + sortdata.sr = fsm1->sr; + fsm1->flags.sort_mode = gfsmASMNone; + + //-- adopt states from fsm2 into fsm1 + for (id2 = 0; id2 < fsm2->states->len; id2++) { + const gfsmState *s2 = gfsm_automaton_find_state_const(fsm2,id2); + gfsmState *s1 = gfsm_automaton_find_state(fsm1,id2+offset); + gfsmArcIter ai; + gfsm_state_copy(s1,s2); + for (gfsm_arciter_open_ptr(&ai, fsm1, s1); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target += offset; + } + //-- index final states from @fsm2 + if (s2->is_final) { + gfsm_automaton_set_final_state_full(fsm1, id2+offset, TRUE, gfsm_automaton_get_final_weight(fsm2, id2)); + } + //-- maybe sort new arcs + if (sortdata.mask != gfsmASMNone + && (fsm2->flags.sort_mode != sortdata.mask + || (sortdata.mask == gfsmASMWeight && fsm2->sr->type != fsm1->sr->type))) + { + s1->arcs = gfsm_arclist_sort(s1->arcs, &sortdata); + } + } + + //-- re-instate "smart" arc-insertion + fsm1->flags.sort_mode = sortdata.mask; + + //-- add epsilon arc to translated root(fsm2) in fsm1 + gfsm_automaton_add_arc(fsm1, + fsm1->root_id, + offset + fsm2->root_id, + gfsmEpsilon, + gfsmEpsilon, + fsm1->sr->one); + + return fsm1; +} + + +/*-------------------------------------------------------------- + * dummy() + */ +gfsmAutomaton *gfsm_automaton_dummy(gfsmAutomaton *fsm) +{ + g_assert_not_reached(); /*-- NOT gfsm_assert_not_reached() ! */ + return fsm; +} |