diff options
Diffstat (limited to 'gfsm/gfsm/src/libgfsm/gfsmArcIndex.c')
-rw-r--r-- | gfsm/gfsm/src/libgfsm/gfsmArcIndex.c | 498 |
1 files changed, 498 insertions, 0 deletions
diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcIndex.c b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.c new file mode 100644 index 0000000..3df0760 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.c @@ -0,0 +1,498 @@ + +/*=============================================================================*\ + * File: gfsmArcIndex.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc indices + * + * Copyright (c) 2006-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmArcIndex.h> +#include <gfsmArcIter.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmArcIndex.hi> +#endif + +/*====================================================================== + * gfsmReverseArcIndex + */ + +/*-------------------------------------------------------------- + * automaton_reverse_arc_index() + */ +gfsmReverseArcIndex *gfsm_automaton_to_reverse_arc_index(gfsmAutomaton *fsm, gfsmReverseArcIndex *rarcs) +{ + gfsmStateId idfrom; + gfsmArcIter ai; + gfsmArc *arc; + + if (!rarcs) { + rarcs = gfsm_reverse_arc_index_sized_new(fsm->states->len); + } + g_ptr_array_set_size(rarcs,fsm->states->len); + + for (idfrom=0; idfrom < fsm->states->len; idfrom++) { + for (gfsm_arciter_open(&ai,fsm,idfrom); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + arc = gfsm_arciter_arc(&ai); + g_ptr_array_index(rarcs,arc->target) + //= gfsm_arclist_prepend(g_ptr_array_index(rarcs,arc->target), arc); + = g_slist_prepend(g_ptr_array_index(rarcs,arc->target),arc); + } + gfsm_arciter_close(&ai); + } + + return rarcs; +} + +/*-------------------------------------------------------------- + * reverse_arc_index_free() + */ +void gfsm_reverse_arc_index_free(gfsmReverseArcIndex *rarcs, gboolean free_lists) +{ + guint i; + if (free_lists) { + //-- +free_lists, -free_arcs + for (i=0; i < rarcs->len; i++) { g_slist_free(g_ptr_array_index(rarcs,i)); } + } + + //-- free index array + g_ptr_array_free(rarcs,TRUE); +} + + + +/*====================================================================== + * gfsmWeightVector + */ + +/*-------------------------------------------------------------- + * automaton_to_weight_vector() + */ +gfsmWeightVector *gfsm_automaton_to_final_weight_vector(gfsmAutomaton *fsm, gfsmWeightVector *wv) +{ + gfsmStateId qid; + guint n_states = gfsm_automaton_n_states(fsm); + gfsmWeight *wp; + + if (wv==NULL) { + wv = gfsm_weight_vector_sized_new(n_states); + } else { + gfsm_weight_vector_resize(wv,n_states); + } + wv->len = n_states; + + for (qid=0,wp=(gfsmWeight*)wv->data; qid < n_states; qid++,wp++) { + gfsm_automaton_lookup_final(fsm,qid,wp); + } + + return wv; +} + +/*-------------------------------------------------------------- + * weight_vector_write_bin_handle() + */ +gboolean gfsm_weight_vector_write_bin_handle(gfsmWeightVector *wv, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len = wv->len; + if (!gfsmio_write(ioh,&len,sizeof(guint32))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_write_bin_handle:len"), //-- code + "could not store weight vector length"); + return FALSE; + } + if (!gfsmio_write(ioh,wv->data,wv->len*sizeof(gfsmWeight))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_write_bin_handle:weights"), //-- code + "could not store weight vector data"); + return FALSE; + } + return TRUE; +} + +/*-------------------------------------------------------------- + * weight_vector_read_bin_handle() + */ +gboolean gfsm_weight_vector_read_bin_handle(gfsmWeightVector *wv, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len; + if (!gfsmio_read(ioh, &len, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_read_bin_handle:len"), //-- code + "could not read weight vector length"); + return FALSE; + } + gfsm_weight_vector_resize(wv,len); + if (!gfsmio_read(ioh, wv->data, len*sizeof(gfsmWeight))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_read_bin_handle:data"), //-- code + "could not read weight vector data"); + return FALSE; + } + return TRUE; +} + +/*====================================================================== + * gfsmArcTable + */ + +/*-------------------------------------------------------------- + * automaton_to_arc_table() + */ +gfsmArcTable *gfsm_automaton_to_arc_table(gfsmAutomaton *fsm, gfsmArcTable *tab) +{ + gfsmStateId qid, n_states=gfsm_automaton_n_states(fsm); + guint n_arcs=gfsm_automaton_n_arcs(fsm); + gfsmArcIter ai; + gfsmArc *arcp; + + //-- maybe allocate + if (!tab) { + tab = gfsm_arc_table_sized_new(n_arcs); + } else { + gfsm_arc_table_resize(tab, n_arcs); + } + tab->len = n_arcs; + + //-- populate arcs + for (qid=0,arcp=(gfsmArc*)tab->data; qid < n_states; qid++) { + if (!gfsm_automaton_has_state(fsm,qid)) continue; + for (gfsm_arciter_open(&ai,fsm,qid); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + *(arcp++) = *a; + } + gfsm_arciter_close(&ai); + } + + //-- return + return tab; +} + +/*-------------------------------------------------------------- + * arc_table_write_bin_handle() + */ +gboolean gfsm_arc_table_write_bin_handle(gfsmArcTable *tab, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len = tab->len; + if (!gfsmio_write(ioh, &len, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_write_bin_handle:len"), + "could not write arc table length"); + return FALSE; + } + if (!gfsmio_write(ioh, tab->data, len*sizeof(gfsmArc))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_write_bin_handle:data"), + "could not write arc table data"); + return FALSE; + } + return TRUE; +} + +/*-------------------------------------------------------------- + * arc_table_read_bin_handle() + */ +gboolean gfsm_arc_table_read_bin_handle(gfsmArcTable *tab, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len; + if (!gfsmio_read(ioh, &len, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_read_bin_handle:len"), + "could not read arc table length"); + return FALSE; + } + gfsm_arc_table_resize(tab,len); + if (!gfsmio_read(ioh, tab->data, len*sizeof(gfsmArc))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_read_bin_handle:data"), + "could not read arc table data"); + return FALSE; + } + return TRUE; +} + + + +/*====================================================================== + * gfsmArcTableIndex + */ + +/*-------------------------------------------------------------- + * arc_table_index_copy() + */ +gfsmArcTableIndex *gfsm_arc_table_index_copy(gfsmArcTableIndex *dst, gfsmArcTableIndex *src) +{ + gfsmStateId i; + gfsm_arc_table_copy (dst->tab, src->tab); + g_ptr_array_set_size(dst->first, src->first->len); + + for (i=0; i < src->first->len; i++) { + gint offset = (gfsmArc*)g_ptr_array_index(src->first,i) - (gfsmArc*)src->tab->data; + g_ptr_array_index(dst->first,i) = (gfsmArc*)dst->tab->data + offset; + } + + return dst; +} + + +/*-------------------------------------------------------------- + * automaton_to_arc_table_index() + */ +gfsmArcTableIndex *gfsm_automaton_to_arc_table_index(gfsmAutomaton *fsm, gfsmArcTableIndex *tabx) +{ + gfsmStateId qid, n_states=gfsm_automaton_n_states(fsm); + guint n_arcs=gfsm_automaton_n_arcs(fsm); + gfsmArc *arcp, *arcp_max; + gfsmArc **firstp; + + //-- maybe allocate + if (!tabx) { + tabx = gfsm_arc_table_index_sized_new(n_states, n_arcs); + } else { + gfsm_arc_table_index_resize(tabx, n_states, n_arcs); + } + + //-- populate tabx->arcs + gfsm_automaton_to_arc_table(fsm,tabx->tab); + + //-- populate tabx->first + arcp = (gfsmArc*)tabx->tab->data; + arcp_max = arcp + n_arcs; + for (qid=0,firstp=(gfsmArc**)tabx->first->pdata; qid<n_states; qid++,firstp++) { + *firstp = arcp; + for ( ; arcp<arcp_max && arcp->source==qid; arcp++) { ; } + } + *firstp = arcp_max; + + //-- return + return tabx; +} + +/*-------------------------------------------------------------- + * arc_table_index_sort_with_data() + */ +void gfsm_arc_table_index_sort_with_data(gfsmArcTableIndex *tabx, GCompareDataFunc compare_func, gpointer data) +{ + gfsmArc **firstp = (gfsmArc**)tabx->first->pdata; + gfsmArc **firstp_max = firstp + tabx->first->len - 1; + for ( ; firstp < firstp_max; firstp++) { + gfsmArc *min = *firstp; + gfsmArc *max = *(firstp+1); + g_qsort_with_data(min, max-min, sizeof(gfsmArc), compare_func, data); + } +} + +/*-------------------------------------------------------------- + * arc_table_index_write_bin_handle() + */ +gboolean gfsm_arc_table_index_write_bin_handle(gfsmArcTableIndex *tabx, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmStateId first_len=tabx->first->len, qid; + if (!gfsm_arc_table_write_bin_handle(tabx->tab, ioh, errp)) return FALSE; + + if (!gfsmio_write(ioh, &first_len, sizeof(gfsmStateId))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_write_bin_handle:len"), + "could not write arc table index 'first' length"); + return FALSE; + } + for (qid=0; qid < first_len; qid++) { + gfsmArc *a = (gfsmArc*)g_ptr_array_index(tabx->first,qid); + guint32 offset = a - ((gfsmArc*)tabx->tab->data); + if (!gfsmio_write(ioh, &offset, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_write_bin_handle:data"), + "could not write state arc offset for state '%u'", qid); + return FALSE; + } + } + return TRUE; +} + +/*-------------------------------------------------------------- + * arc_table_index_read_bin_handle() + */ +gboolean gfsm_arc_table_index_read_bin_handle(gfsmArcTableIndex *tabx, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmStateId first_len, qid; + if (!gfsm_arc_table_read_bin_handle(tabx->tab, ioh, errp)) return FALSE; + + if (!gfsmio_read(ioh, &first_len, sizeof(gfsmStateId))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_read_bin_handle:len"), + "could not read arc table index 'first' length"); + return FALSE; + } + g_ptr_array_set_size(tabx->first,first_len); + for (qid=0; qid < first_len; qid++) { + guint32 offset; + if (!gfsmio_read(ioh, &offset, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_write_bin_handle:data"), + "could not read state arc offset for state '%u'", qid); + return FALSE; + } + g_ptr_array_index(tabx->first,qid) = &g_array_index(tabx->tab,gfsmArc,offset); + } + return TRUE; +} + + +/*====================================================================== + * gfsmArcLabelIndex [GONE] + */ +//-------------------------------------------------------------- +// arc_label_index_compare_arcs() +/* +gint gfsm_arc_label_index_compare_arcs(gfsmArc *a1, gfsmArc *a2, gfsmArcLabelIndexSortData *sdata) +{ return gfsm_arc_label_index_compare_arcs_inline(a1,a2,sdata); } +*/ + + +/*====================================================================== + * gfsmArcRange + */ + +#undef GFSM_ARCRANGE_ENABLE_BSEARCH +#undef GFSM_ARCRANGE_ENABLE_SEEK + +#ifdef GFSM_ARCRANGE_ENABLE_BSEARCH +/*-------------------------------------------------------------- + * arc_range_bsearch_*() + * + NOT WORTH IT (tested for out_degree {1,2,4,8,16,32,64,128,256,512}) + */ +void gfsm_arcrange_bsearch_source(gfsmArcRange *range, gfsmStateId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->source < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_target(gfsmArcRange *range, gfsmStateId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->target < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_lower(gfsmArcRange *range, gfsmLabelId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->lower < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_upper(gfsmArcRange *range, gfsmLabelId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->upper < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_weight(gfsmArcRange *range, gfsmWeight find, gfsmSemiring *sr) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (gfsm_sr_compare(sr,mid->weight,find) < 0) { range->min = mid+1; } + else { range->max = mid; } + } + range->min = min; +} +#endif /* GFSM_ARCRANGE_ENABLE_BSEARCH */ + +#ifdef GFSM_ARCRANGE_ENABLE_SEEK +//-------------------------------------------------------------- +// arcrange_seek_X() +// -- also not worth it + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_source(gfsmArcRange *range, gfsmStateId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->source < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_target(gfsmArcRange *range, gfsmStateId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->target < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_lower(gfsmArcRange *range, gfsmLabelId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->lower < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_upper(gfsmArcRange *range, gfsmLabelId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->upper < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_weight(gfsmArcRange *range, gfsmWeight find, gfsmSemiring *sr) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_sr_compare(sr,gfsm_arcrange_arc(range)->weight,find) < 0) + gfsm_arcrange_next(range); +} +#endif /* GFSM_ARCRANGE_ENABLE_SEEK */ |