diff options
author | Bryan Jurish <mukau@users.sourceforge.net> | 2008-11-29 23:22:39 +0000 |
---|---|---|
committer | Bryan Jurish <mukau@users.sourceforge.net> | 2008-11-29 23:22:39 +0000 |
commit | d13da71edce4b913736c1e752a211ae20c193292 (patch) | |
tree | 57b0c3884b5d465259d21906f89e4ec5635da9d3 /gfsm/gfsm/src | |
parent | 2072ea2ef54b92775efc83c82d9a4b7a8ac4d616 (diff) |
+ added local copy of gfsm source tree in gfsm/ subdir (for pd-extended auto-builds)
+ external builds now use static local libgfsm by default (insulates vs. API change, etc.)
svn path=/trunk/externals/moocow/; revision=10403
Diffstat (limited to 'gfsm/gfsm/src')
391 files changed, 55890 insertions, 0 deletions
diff --git a/gfsm/gfsm/src/.cvsignore b/gfsm/gfsm/src/.cvsignore new file mode 100644 index 0000000..45bc517 --- /dev/null +++ b/gfsm/gfsm/src/.cvsignore @@ -0,0 +1,20 @@ +*~ +.*~ +*.o +*.lo +*.la +.libs +*.a +*.so +.deps +*.fst +*.tfst +*.lab +stamp-h* +config.h +config.h.in +aclocal.m4 +Makefile +Makefile.in +README.txt +configure diff --git a/gfsm/gfsm/src/Makefile.am b/gfsm/gfsm/src/Makefile.am new file mode 100644 index 0000000..486aedb --- /dev/null +++ b/gfsm/gfsm/src/Makefile.am @@ -0,0 +1,69 @@ +# File: ./src/Makefile.am +# Package: * +# Description: +# + source-level automake file +# +# Process this file with Automake to create Makefile.in. +#----------------------------------------------------------------------- + +#----------------------------------------------------------------------- +# Options & Subdirectories +#----------------------------------------------------------------------- + +## --- recursion subdirectories +#SUBDIRS = libgfsm programs +SUBDIRS = libgfsm $(GFSM_SRC_SUBDIRS) + +#----------------------------------------------------------------------- +# Variables: cleanup +#----------------------------------------------------------------------- + +## --- mostlyclean: built by 'make' & commonly rebuilt +#MOSTLYCLEANFILES = + +## --- clean: built by 'make' +#CLEANFILES = + +## --- distclean: built by 'configure' +DISTCLEANFILES = \ + autom4te.cache \ + config.log \ + config.status \ + config.cache + +## -- maintainerclean: built by maintainer / by hand +MAINTAINERCLEANFILES = *~ \ + $(PODS:.pod=.txt) \ + Makefile Makefile.in \ + aclocal.m4 \ + configure + +#----------------------------------------------------------------------- +# Variables: distribution +#----------------------------------------------------------------------- + +## --- extra distribution files +#EXTRA_DIST = + +## --- recursion subdirectories for 'make dist' +DIST_SUBDIRS = $(SUBDIRS) + +## --- dist-hook: when another 'Makefile.am' is overkill +#DISTHOOK_DIRS = subdir +#DISTHOOK_FILES = subdir/file1 subdir/file2 +# +#dist-hook: +# for d in $(DISTHOOK_DIRS); do\ +# mkdir -p $(distdir)/$$d ;\ +# done +# for f in $(DISTHOOK_FILES); do\ +# cp -p $(srcdir)/$$f $(distdir)/$$f ;\ +# done + +#----------------------------------------------------------------------- +# Rules: cleanup +#----------------------------------------------------------------------- +.PHONY: cvsclean cvsclean-hook + +cvsclean: maintainer-clean ; + diff --git a/gfsm/gfsm/src/libgfsm/.cvsignore b/gfsm/gfsm/src/libgfsm/.cvsignore new file mode 100644 index 0000000..fcae916 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/.cvsignore @@ -0,0 +1,43 @@ +*~ +.*~ +*.o +*.lo +*.la +*.a +*.so +.libs +.deps +*.fst +*.tfst +*.lab +*.loT +gfsmConfigAuto.* +gfsmConfigNoAuto.h +gmon.out +*.output +stamp-h* +###config.h +config.h.in +aclocal.m4 +Makefile +Makefile.in +README.txt +configure +graveyard +*.tfst +*.afst +*.thfst +*.gfst +*.lab +*.scl +*.sym +*.dot +*.vcg +*.ps +*.tfst +fsmlib +SFST.moocow +fsa6-src +pd-fst +save +libgfsm.* diff --git a/gfsm/gfsm/src/libgfsm/Makefile.am b/gfsm/gfsm/src/libgfsm/Makefile.am new file mode 100644 index 0000000..aba4734 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/Makefile.am @@ -0,0 +1,288 @@ +## File: src/Makefile.am +## Package: gfsm +## Description: +## + source-level automake file +## +## Process this file with Automake to create Makefile.in. +##----------------------------------------------------------------------- + +##----------------------------------------------------------------------- +## subdirectories +##----------------------------------------------------------------------- +SUBDIRS = gnulib + +##----------------------------------------------------------------------- +## primary targets +##----------------------------------------------------------------------- + +## --- executable programs +#bin_PROGRAMS = ${prog_1} ... ${prog_N} +#bin_PROGRAMS = dwdspp moot moot-fstgen moot-pargen + +## --- libtool libraries +#lib_LTLIBRARIES = ${lib_1}.la ... ${lib_N}.la +lib_LTLIBRARIES = libgfsm.la + +## --- shared data in @pkgdatadir@ +#pkgdata_DATA = ${data_1} ... ${data_N} + +## --- manpages -- section is auto-detected +#man_MANS = ${man_1} ... ${man_N} + +##----------------------------------------------------------------------- +## sources +##----------------------------------------------------------------------- + +## --- Required sources +#${prog_i}_SOURCES = +#${lib_i}_la_SOURCES = +libgfsm_la_SOURCES = \ + config.h \ + gfsmCommon.c \ + gfsmCompound.c \ + gfsmDebug.c \ + gfsmError.c \ + gfsmIO.c \ + gfsmMem.c \ + gfsmVersion.c \ + gfsmUtils.c \ + gfsmEnum.c \ + gfsmSet.c \ + gfsmWeightMap.c \ + gfsmBitVector.c \ + gfsmAlphabet.c \ + gfsmSemiring.c \ + gfsmArc.c \ + gfsmArcList.c \ + gfsmArcIter.c \ + gfsmArcIndex.c \ + gfsmState.c \ + gfsmStateSet.c \ + gfsmAutomaton.c \ + gfsmAutomatonIO.c \ + gfsmDraw.c \ + gfsmAlgebra.c \ + gfsmArith.c \ + gfsmLookup.c \ + gfsmPaths.c \ + gfsmTrie.c \ + gfsmScanner.c \ + gfsmRegex.lex.l \ + gfsmRegex.tab.y \ + gfsmRegexCompiler.c \ + gfsmIndexed.c \ + gfsmIndexedIO.c + +sources_argh = \ + gfsmIndexed.c \ + gfsmIndexed2.c \ + gfsmIndexedIO.c + +## --- Extra (possible) sources +#EXTRA_${prog_i}_SOURCES = +#EXTRA_${lib_i}_la_SOURCES = + +## --- Additional dependencies + +##---------------------------------------------------- +## Additional rules + +#gfsmConfigNoAuto.h: gfsmConfigAuto.h.in +# rm -f "$@" +# cp "$<" "$@" + +##----------------------------------------------------------------------- +## headers +##----------------------------------------------------------------------- + +## --- installable headers: @includedir@/@pkgname@ +#pkginclude_HEADERS = ${hfile_1} ... ${hfile_N} +pkginclude_HEADERS = \ + gfsmAssert.h \ + gfsmConfig.h \ + gfsmConfigNoAuto.h \ + gfsmConfigAuto.h \ + gfsmCommon.h \ + gfsmCompound.h \ + gfsmCompound.hi \ + gfsmDebug.h \ + gfsmError.h \ + gfsmIO.h \ + gfsmMem.h \ + gfsmMem.hi \ + gfsmVersion.h \ + gfsmUtils.h \ + gfsmEnum.h \ + gfsmEnum.hi \ + gfsmSet.h \ + gfsmSet.hi \ + gfsmWeightMap.h \ + gfsmWeightMap.hi \ + gfsmBitVector.h \ + gfsmBitVector.hi \ + gfsmAlphabet.h \ + gfsmSemiring.h \ + gfsmSemiring.hi \ + gfsmArc.h \ + gfsmArc.hi \ + gfsmArcList.h \ + gfsmArcList.hi \ + gfsmArcIter.h \ + gfsmArcIter.hi \ + gfsmArcIndex.h \ + gfsmArcIndex.hi \ + gfsmState.h \ + gfsmState.hi \ + gfsmStateSet.h \ + gfsmStateSet.hi \ + gfsmAutomaton.h \ + gfsmAutomaton.hi \ + gfsmAutomatonIO.h \ + gfsmDraw.h \ + gfsmAlgebra.h \ + gfsmArith.h \ + gfsmLookup.h \ + gfsmPaths.h \ + gfsmTrie.h \ + gfsmScanner.h \ + gfsmRegexCompiler.h \ + gfsmIndexed.h \ + gfsmIndexed.hi \ + gfsmIndexedIO.h \ + gfsm.h + +headers_argh = \ + gfsmIndexed.h \ + gfsmIndexed2.h \ + gfsmIndexedIO.h + +## --- no-install headers +#noinst_HEADERS = nopackage.h + +##----------------------------------------------------------------------- +## pre-compile rules: flex ; bison +##----------------------------------------------------------------------- +SRCDIR = @srcdir@ + +if HAVE_FLEX +MY_LEX = @FLEX@ +MY_LFLAGS = +else +MY_LEX = sh $(SRCDIR)/dummy-flex.sh +MY_LFLAGS = +endif +LEX = $(MY_LEX) +AM_LFLAGS = $(MY_LFLAGS) + +if HAVE_BISON +MY_YACC = @BISON@ +MY_YFLAGS = --defines --fixed-output-files --name-prefix="$(basename $*)_yy" +else +MY_YACC = sh $(SRCDIR)/dummy-bison.sh +MY_YFLAGS = +endif +YACC = $(MY_YACC) +AM_YFLAGS = $(MY_YFLAGS) + +SUFFIXES = .l .lex.l .y .tab.y .tab.c .tab.h .lex.c .lex.h + +##---------------------------------------- +gfsmRegex.lex.o: gfsmRegex.lex.c gfsmRegex.tab.c gfsmRegexCompiler.h +gfsmRegex.tab.o: gfsmRegex.tab.c gfsmRegex.lex.c gfsmRegexCompiler.h +gfsmRegexCompiler.o: gfsmRegexCompiler.c gfsmRegexCompiler.h gfsmRegex.lex.c gfsmRegex.tab.c + +##----------------------------------------------------------------------- +## compile flags +##----------------------------------------------------------------------- + +## --- preprocessor flags +#AM_CPPFLAGS = +AM_CPPFLAGS = -I. -I$(SRCDIR)/gnulib + +## --- compiler flags (cc) +AM_CFLAGS = $(gfsm_WFLAGS) $(gfsm_OFLAGS) + +##----------------------------------------------------------------------- +## Additional variable: library versioning +## + for -version-info $(CUR):$(REV):$(AGE) : +## + base lib is created as libMYLIB.so.$(CUR).$(AGE).$(REV) +##----------------------------------------------------------------------- + +## --- The most recent interface number that this library implements. +LIBCUR = $(GFSM_VERSION_MAJOR) + +## --- The difference between the newest and oldest interfaces that this +## library implements. In other words, the library implements all the +## interface numbers in the range from number `CURRENT - AGE' to +## `CURRENT'. +LIBAGE = 0 + +## --- The implementation number of the CURRENT interface. +LIBREV = $(GFSM_VERSION_MINOR) + +##----------------------------------------------------------------------- +## linker flags +## + library special handling of configure.in vars: +##----------------------------------------------------------------------- +#${prog_i}_LDFLAGS = -L. -static +#${prog_i}_LDADD = ${non_src_file}.o -lsomelib + +#${lib_i}_la_LDFLAGS = -L. -version-info ${lib_i_current}:${lib_i_rev}:${lib_i_age} +#${lib_i}_la_LIBADD = -lotherlib + +libgfsm_la_LDFLAGS = -version-info $(LIBCUR):$(LIBREV):$(LIBAGE) +libgfsm_la_LIBADD = gnulib/libgnu.la @gfsm_LIBS@ $(LIBS) + +##----------------------------------------------------------------------- +## Variables: cleanup +##----------------------------------------------------------------------- + +## --- mostlyclean: built by 'make' & commonly rebuilt +#MOSTLYCLEANFILES = + +## --- clean: built by 'make' +#CLEANFILES = + +## --- distclean: built by 'configure' +#DISTCLEANFILES = + +## -- maintainerclean: built by maintainer / by hand +MAINTAINERCLEANFILES = \ + *~ .*~ Makefile Makefile.in \ + gmon.out + +##----------------------------------------------------------------------- +## Variables: distribution +##----------------------------------------------------------------------- + +## --- extra distribution files +EXTRA_DIST = \ + config.h \ + gfsmConfigNoAuto.h \ + gfsmRegex.lex.c \ + gfsmRegex.lex.h \ + gfsmRegex.tab.c \ + gfsmRegex.tab.h \ + dummy-flex.sh \ + dummy-bison.sh + +## --- recursion subdirectories for 'make dist' +#DIST_SUBDIRS = $(SUBDIRS) + +## --- dist-hook: when another 'Makefile.am' is overkill +#DISTHOOK_DIRS = foo +#DISTHOOK_FILES = foo/bar.txt foo/baz.txt +#dist-hook: +# for d in $(DISTHOOK_DIRS); do\ +# mkdir -p $(distdir)/$$d ;\ +# done +# for f in $(DISTHOOK_FILES); do\ +# cp -p $(srcdir)/$$f $(distdir)/$$f ;\ +# done + +##----------------------------------------------------------------------- +## Rules: cleanup +##----------------------------------------------------------------------- +.PHONY: cvsclean + +cvsclean: maintainer-clean ; diff --git a/gfsm/gfsm/src/libgfsm/config.h b/gfsm/gfsm/src/libgfsm/config.h new file mode 100644 index 0000000..9f1ffb0 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/config.h @@ -0,0 +1 @@ +#include "gfsmConfigAuto.h" diff --git a/gfsm/gfsm/src/libgfsm/dummy-bison.sh b/gfsm/gfsm/src/libgfsm/dummy-bison.sh new file mode 100755 index 0000000..ad2995f --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/dummy-bison.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +base=`basename $1 .y` +cp -v -f "$base.c" y.tab.c +cp -v -f "$base.h" y.tab.h +if test -f "$base.output"; then cp -v -f "$base.output" y.output ; fi diff --git a/gfsm/gfsm/src/libgfsm/dummy-flex.sh b/gfsm/gfsm/src/libgfsm/dummy-flex.sh new file mode 100755 index 0000000..1b27ea2 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/dummy-flex.sh @@ -0,0 +1,3 @@ +#!/bin/sh +cp -v -f "`basename $1 .l`.c" lex.yy.c +touch --no-create "`basename $1 .l`.h" diff --git a/gfsm/gfsm/src/libgfsm/gfsm.c b/gfsm/gfsm/src/libgfsm/gfsm.c new file mode 100644 index 0000000..10a3be7 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsm.c @@ -0,0 +1,27 @@ +/*=============================================================================*\ + * File: gfsm.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: top-level + * + * Copyright (c) 2004 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsm.h> + diff --git a/gfsm/gfsm/src/libgfsm/gfsm.h b/gfsm/gfsm/src/libgfsm/gfsm.h new file mode 100644 index 0000000..ae50be3 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsm.h @@ -0,0 +1,78 @@ + +/*=============================================================================*\ + * File: gfsm.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: top-level + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER + * OF ALL WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsm.h + * \brief Top-level header. + */ + +#ifndef _GFSM_H +#define _GFSM_H + +/* +#ifdef __cplusplus +extern "C" { +#endif +*/ + +#include <glib.h> +#include <gfsmAssert.h> +#include <gfsmCommon.h> +#include <gfsmCompound.h> +#include <gfsmVersion.h> +#include <gfsmError.h> +#include <gfsmUtils.h> +#include <gfsmEnum.h> +#include <gfsmSet.h> +#include <gfsmWeightMap.h> +#include <gfsmBitVector.h> +#include <gfsmAlphabet.h> +#include <gfsmSemiring.h> +#include <gfsmArc.h> +#include <gfsmState.h> +#include <gfsmAutomaton.h> +#include <gfsmArcIter.h> +#include <gfsmArcIndex.h> +#include <gfsmStateSet.h> +#include <gfsmIO.h> +#include <gfsmAutomatonIO.h> +#include <gfsmDraw.h> +#include <gfsmAlgebra.h> +#include <gfsmArith.h> +#include <gfsmLookup.h> +#include <gfsmPaths.h> +#include <gfsmTrie.h> +#include <gfsmScanner.h> +#include <gfsmRegexCompiler.h> +#include <gfsmIndexed.h> +#include <gfsmIndexedIO.h> + +/* +#ifdef __cplusplus +} +#endif +*/ + +#endif /* _GFSM_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmAlgebra.c b/gfsm/gfsm/src/libgfsm/gfsmAlgebra.c new file mode 100644 index 0000000..4e9effe --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAlgebra.c @@ -0,0 +1,1715 @@ + +/*=============================================================================*\ + * File: gfsmAlgebra.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <glib.h> +#include <gfsmAlgebra.h> +#include <gfsmAssert.h> +#include <gfsmArcIter.h> +#include <gfsmStateSet.h> +#include <gfsmEnum.h> +#include <gfsmUtils.h> +#include <gfsmCompound.h> + +/*====================================================================== + * Methods: algebra + */ + +/*-------------------------------------------------------------- + * closure_final_func() + * + called for each final @id of @fsm during closure(@fsm) + */ +static +gboolean gfsm_automaton_closure_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm) +{ + gfsmWeight w = gfsm_ptr2weight(pw); + if (id != fsm->root_id) + gfsm_automaton_add_arc(fsm, id, fsm->root_id, gfsmEpsilon, gfsmEpsilon, w); + return FALSE; +} + + +/*-------------------------------------------------------------- + * closure() + */ +gfsmAutomaton *gfsm_automaton_closure(gfsmAutomaton *fsm, gboolean is_plus) +{ + //-- sanity check(s) + if (!fsm || fsm->root_id == gfsmNoState) return fsm; + + //-- add epsilon arcs from old final states to translated new root + gfsm_automaton_finals_foreach(fsm, (GTraverseFunc)gfsm_automaton_closure_final_func_, fsm); + + //-- reflexive+transitive or reflexive? + if (!is_plus) gfsm_automaton_optional(fsm); + + return fsm; +} + + +/*-------------------------------------------------------------- + * n_closure() + */ +gfsmAutomaton *gfsm_automaton_n_closure(gfsmAutomaton *fsm, guint n) +{ + //-- sanity check(s) + if (!fsm || fsm->root_id == gfsmNoState) return fsm; + + //-- check for simple closures + if (n == 0) return gfsm_automaton_closure(fsm, FALSE); + else if (n == 1) return gfsm_automaton_closure(fsm, TRUE); + else { + gfsm_automaton_n_concat(fsm, fsm, n-1); + } + + return gfsm_automaton_closure(fsm, TRUE); +} + + +/*-------------------------------------------------------------- + * complement() + */ +gfsmAutomaton *gfsm_automaton_complement(gfsmAutomaton *fsm) +{ + gfsmAlphabet *alph = gfsm_identity_alphabet_new(); + gfsm_automaton_get_alphabet(fsm, gfsmLSLower, alph); + gfsm_automaton_complement_full(fsm,alph); + gfsm_alphabet_free(alph); + return fsm; +} + +/*-------------------------------------------------------------- + * complement_full() + */ +gfsmAutomaton *gfsm_automaton_complement_full(gfsmAutomaton *fsm, gfsmAlphabet *alph) +{ + gfsmStateId id, sink_id; + gfsm_automaton_complete(fsm, alph, &sink_id); + + //-- flip final states (no weights here) + for (id = 0; id < fsm->states->len; id++) { + gfsmState *s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) continue; + gfsm_automaton_set_final_state(fsm, id, !s->is_final); + } + + return fsm; +} + +/*-------------------------------------------------------------- + * complete() + */ +gfsmAutomaton *gfsm_automaton_complete(gfsmAutomaton *fsm, gfsmAlphabet *alph, gfsmStateId *sinkp) +{ + gfsmStateId id, sinkid; + GPtrArray *alabels; + + if (!fsm->flags.is_deterministic) fsm = gfsm_automaton_determinize(fsm); + if (gfsm_acmask_nth(fsm->flags.sort_mode,0) != gfsmACLower) { + gfsm_automaton_arcsort(fsm,gfsmACLower); + } + //-- avoid "smart" arc insertion + fsm->flags.sort_mode = gfsmASMNone; + + //-- add sink-id + sinkid = gfsm_automaton_add_state(fsm); + if (sinkp) *sinkp = sinkid; + + //-- get alphabet label-vector + alabels = g_ptr_array_sized_new(gfsm_alphabet_size(alph)); + gfsm_alphabet_labels_to_array(alph,alabels); + + for (id = 0; id < fsm->states->len; id++) { + gfsmState *s = gfsm_automaton_find_state(fsm,id); + gfsmArcList *al; + gfsmArc *a; + guint labi; + if (!s || !s->is_valid) continue; + + al = s->arcs; + a = gfsm_arclist_arc(al); + for (labi=0; labi < alabels->len; ) { + gfsmLabelVal lab = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(alabels,labi)); + + if (lab==gfsmEpsilon) { + ++labi; + } + else if (!a || a->lower > lab) { + //-- no arc for this label: route it to sink + gfsm_automaton_add_arc(fsm, id, sinkid, lab, lab, fsm->sr->one); + ++labi; + } + else if (a->lower == lab) { + ++labi; + } + else { + while (al != NULL && a->lower < lab) { + al = al->next; + a = gfsm_arclist_arc(al); + } + } + } + } + + //-- mark fsm as (still) deterministic + fsm->flags.is_deterministic = TRUE; + + //-- cleanup + //g_array_free(alabels,TRUE); + g_ptr_array_free(alabels,TRUE); + + return fsm; +} + + +/*-------------------------------------------------------------- + * compose() + */ +gfsmAutomaton *gfsm_automaton_compose(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm = gfsm_automaton_compose_full(fsm1,fsm2, NULL,NULL); + gfsm_automaton_swap(fsm1,fsm); + gfsm_automaton_free(fsm); + return fsm1; +} + +/*-------------------------------------------------------------- + * compose_visit_() + */ +//#define GFSM_DEBUG_COMPOSE_VISIT 1 +#ifdef GFSM_DEBUG_COMPOSE_VISIT +# include <stdio.h> +#endif +gfsmStateId gfsm_automaton_compose_visit_(gfsmComposeState sp, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *fsm, + gfsmComposeStateEnum *spenum, + gfsmComposeFlags flags) +{ + gfsmState *q1, *q2; + gfsmStateId qid = gfsm_enum_lookup(spenum,&sp); + gfsmStateId qid2; + gfsmArcList *al1, *al2, *ai1, *ai2; + gfsmArcList *ai1_noneps, *ai2_noneps, *ai2_continue; + gfsmArc *a1,*a2; + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, "compose(): visit : (q%u,f%u,q%u) => q%d\n", sp.id1, sp.idf, sp.id2, + (int)(qid==gfsmEnumNone ? -1 : qid)); +#endif + + //-- ignore already-visited states + if (qid != gfsmEnumNone) return qid; + + //-- get state pointers for input automata + q1 = gfsm_automaton_find_state(fsm1,sp.id1); + q2 = gfsm_automaton_find_state(fsm2,sp.id2); + + //-- sanity check + if ( !(q1 && q2 && q1->is_valid && q2->is_valid) ) { +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, "compose(): BAD : (q%u,f%u,q%u) XXXXX\n", sp.id1, sp.idf, sp.id2); +#endif + return gfsmNoState; + } + + //-- insert new state into output automaton + qid = gfsm_automaton_add_state(fsm); + gfsm_enum_insert_full(spenum,&sp,qid); + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, "compose(): CREATE: (q%u,f%u,q%u) => q%u ***\n", sp.id1, sp.idf, sp.id2, qid); +#endif + + //-- check for final states + if (q1->is_final && q2->is_final) { + gfsm_automaton_set_final_state_full(fsm,qid,TRUE, + gfsm_sr_times(fsm->sr, + gfsm_automaton_get_final_weight(fsm1,sp.id1), + gfsm_automaton_get_final_weight(fsm2,sp.id2))); + } + + //------------------------------------------- + // recurse on outgoing arcs + + //-------------------------------- + // recurse: arcs: sort + + //-- arcs: sort arclists: fsm1 + if (flags&gfsmCFEfsm1NeedsArcSort) { + gfsmArcCompData sortdata = { gfsmACUpper,NULL,NULL,NULL }; + al1 = gfsm_arclist_sort(gfsm_arclist_clone(q1->arcs), &sortdata); + } + else { al1 = q1->arcs; } + + //-- arcs: sort arclists: fsm2 + if (flags&gfsmCFEfsm2NeedsArcSort) { + gfsmArcCompData sortdata = { gfsmACLower,NULL,NULL,NULL }; + al2 = gfsm_arclist_sort(gfsm_arclist_clone(q2->arcs), &sortdata); + } + else { al2 = q2->arcs; } + + //-------------------------------- + // recusrse: arcs: handle epsilons + for (ai1_noneps=al1; ai1_noneps!=NULL && ai1_noneps->arc.upper==gfsmEpsilon; ai1_noneps=ai1_noneps->next) {;} + for (ai2_noneps=al2; ai2_noneps!=NULL && ai2_noneps->arc.lower==gfsmEpsilon; ai2_noneps=ai2_noneps->next) {;} + + //-- (eps,NULL): case fsm1(q1 --a:eps(~eps2)--> q1b), filter:({0,2} --eps2:eps2--> 2), fsm2(q2 --(NULL~eps2:eps)--> q2) + if (sp.idf != 1) { + for (ai1=al1; ai1!=ai1_noneps; ai1=ai1->next) { + a1 = &(ai1->arc); +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATCH[e,NULL]: (q%u --%d:eps(e2)--> q%u) ~ ({0,2}--(e2:e2)-->2) ~ (q%u --(NULL~e2:eps)--> q%u) ***\n", + sp.id1, a1->lower, a1->target, + sp.id2, sp.id2); +#endif + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){a1->target, sp.id2, 2}, fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, gfsmEpsilon, a1->weight); + } + } + //-- (NULL,eps): case fsm1(q1 --(NULL~eps:eps1)--> q1), filter:({0,1} --eps1:eps1--> 1), fsm2(q2 --eps(~eps1):b--> q2b) + if (sp.idf != 2) { + for (ai2=al2; ai2!=ai2_noneps; ai2=ai2->next) { + a2 = &(ai2->arc); +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATHC[NULL,e]: (q%u --(NULL~eps:e1)--> q%u) ~ ({0,1}--(e1:e1)-->1) ~ (q%u --eps(e1):%d--> q%u) ***\n", + sp.id1, sp.id1, + sp.id2, a2->upper, a2->target); +#endif + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){sp.id1, a2->target, 1}, fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, a2->upper, a2->weight); + } + } + //-- (eps,eps): case fsm1(q1 --a:eps(~eps2)--> q1b), filter:({0} --eps2:eps1--> 0), fsm2(q2 --eps:b--> q2b) + if (sp.idf == 0) { + for (ai1=al1; ai1!=ai1_noneps; ai1=ai1->next) { + a1 = &(ai1->arc); + for (ai2=al2; ai2!=ai2_noneps; ai2=ai2->next) { + a2 = &(ai2->arc); +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATCH[e,e]: (q%u --%d:eps(e2)--> q%u) ~ ({0}--(e2:e1)-->0) ~ (q%u --eps(e1):%d--> q%u) ***\n", + sp.id1, a1->lower, a1->target, + sp.id2, a2->upper, a2->target); +#endif + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){a1->target, a2->target, 0}, + fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, a2->upper, + gfsm_sr_times(fsm->sr, a1->weight, a2->weight)); + } + } + } + + //-------------------------------- + // recurse: arcs: non-eps: iterate + for (ai1=ai1_noneps, ai2_continue=ai2_noneps; ai1!=NULL; ai1=ai1->next) { + a1 = &(ai1->arc); + + for (ai2=ai2_continue; ai2!=NULL; ai2=ai2->next) { + a2 = &(ai2->arc); + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): check[x,x]: (q%u --%d:%d--> q%u) ~ ({0,1,2}--(x:x)-->0) ~ (q%u --%d:%d--> q%u)\n", + sp.id1, a1->lower, a1->upper, a1->target, + sp.id2, a2->lower, a2->upper, a2->target); +#endif + + if (a2->lower < a1->upper) { ai2_continue=ai2->next; continue; } + else if (a2->lower > a1->upper) { break; } + +#ifdef GFSM_DEBUG_COMPOSE_VISIT + fprintf(stderr, + "compose(): MATCH[x,x]: (q%u --%d:%d--> q%u) ~ ({0,1,2}--(x:x)-->0) ~ (q%u --%d:%d--> q%u) ***\n", + sp.id1, a1->lower, a1->upper, a1->target, + sp.id2, a2->lower, a2->upper, a2->target); +#endif + + //-- non-eps: case fsm1:(q1 --a:b--> q1'), fsm2:(q2 --b:c--> q2') + qid2 = gfsm_automaton_compose_visit_((gfsmComposeState){a1->target,a2->target,0}, + fsm1,fsm2,fsm, spenum,flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, a2->upper, + gfsm_sr_times(fsm1->sr, a1->weight, a2->weight)); + } + } + + //-- maybe cleanup temporary arc-lists + if (flags&gfsmCFEfsm1NeedsArcSort) gfsm_arclist_free(al1); + if (flags&gfsmCFEfsm2NeedsArcSort) gfsm_arclist_free(al2); + + return qid; +} + +/*-------------------------------------------------------------- + * compose_full() + */ +//#define GFSM_DEBUG_COMPOSE +#ifdef GFSM_DEBUG_COMPOSE +# include <gfsmAutomatonIO.h> +#endif +gfsmAutomaton *gfsm_automaton_compose_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *composition, + gfsmComposeStateEnum *spenum + ) +{ + gboolean spenum_is_temp; + gfsmComposeState rootpair; + gfsmStateId rootid; + gfsmComposeFlags flags = 0; +#ifdef GFSM_DEBUG_COMPOSE + gfsmError *err =NULL; +#endif + + //-- setup: output fsm + if (!composition) { + composition=gfsm_automaton_shadow(fsm1); + } else { + gfsm_automaton_clear(composition); + gfsm_automaton_copy_shallow(composition,fsm1); + } + composition->flags.sort_mode = gfsmASMNone; + composition->flags.is_transducer = 1; + + //-- setup: ComposeStateEnum + if (spenum==NULL) { + spenum_is_temp=TRUE; + spenum = gfsm_compose_state_enum_new(); + } else { + spenum_is_temp=FALSE; + gfsm_enum_clear(spenum); + } + + //-- setup: flags + if (gfsm_acmask_nth(fsm1->flags.sort_mode,0) != gfsmACUpper) flags |= gfsmCFEfsm1NeedsArcSort; + if (gfsm_acmask_nth(fsm2->flags.sort_mode,0) != gfsmACLower) flags |= gfsmCFEfsm2NeedsArcSort; + + //-- guts: recursively visit states depth-first from root + rootpair.id1 = fsm1->root_id; + rootpair.id2 = fsm2->root_id; + rootpair.idf = 0; + rootid = gfsm_automaton_compose_visit_(rootpair, fsm1, fsm2, composition, spenum, flags); + + //-- finalize: set new root state + if (rootid != gfsmNoState) { + gfsm_automaton_set_root(composition, rootid); + } else { + composition->root_id = gfsmNoState; + } + //-- cleanup + if (spenum_is_temp) gfsm_enum_free(spenum); + + return composition; +} + + + +/*-------------------------------------------------------------- + * concat_final_func() + * + called for each final @id of @fsm during concat(@fsm,@fsm2) + * + during the call @fsm1->root_id should be set to the translated root of @fsm2 + */ +static +gboolean gfsm_automaton_concat_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm) +{ + gfsmWeight w = gfsm_ptr2weight(pw); + gfsm_automaton_add_arc(fsm, id, fsm->root_id, gfsmEpsilon, gfsmEpsilon, w); + gfsm_automaton_find_state(fsm,id)->is_final = FALSE; + return FALSE; +} + +/*-------------------------------------------------------------- + * concat_final_func_1() + * + called for singleton final @id of @fsm during concat(@fsm,@fsm2) + * + BAD if singleton final of @fsm has outgoing arcs! + */ +#if 0 +struct gfsm_automaton_concat_1_final_data_ { + gfsmStateId *rootxp; + gfsmWeight *weightp; +}; +static +gboolean gfsm_automaton_concat_final_func_1_(gfsmStateId id, + gpointer pw, + struct gfsm_automaton_concat_1_final_data_ *data) +{ + *(data->rootxp) = id; + *(data->weightp) = gfsm_ptr2weight(pw); + return TRUE; +} +#endif + +/*-------------------------------------------------------------- + * concat() + */ +gfsmAutomaton *gfsm_automaton_concat(gfsmAutomaton *fsm1, gfsmAutomaton *_fsm2) +{ + gfsmAutomaton *fsm2; + gfsmStateId offset; + gfsmStateId id2; + gfsmStateId size2; + gfsmStateId rootx; + gfsmWeightMap *finals2 = NULL; + + //-- sanity check(s) + if (!_fsm2 || _fsm2->root_id == gfsmNoState) return fsm1; + if (_fsm2==fsm1) fsm2 = gfsm_automaton_clone(fsm1); + else fsm2 = _fsm2; + + if (fsm1->finals == fsm2->finals) { + finals2 = gfsm_weightmap_new(gfsm_uint_compare); + gfsm_weightmap_copy(finals2, fsm2->finals); + } + + offset = fsm1->states->len; + size2 = fsm2->states->len; + gfsm_automaton_reserve(fsm1, offset + size2); + + //-- concatenative arcs + if (fsm1->root_id != gfsmNoState) { + //-- multiple final states: add epsilon arcs from old finals to mapped root2 + gfsmStateId root_tmp = fsm1->root_id; + rootx = fsm2->root_id+offset; + fsm1->root_id = rootx; + gfsm_automaton_finals_foreach(fsm1, (GTraverseFunc)gfsm_automaton_concat_final_func_, fsm1); + fsm1->root_id = root_tmp; + } else /*if (fsm2->root_id != gfsmNoState)*/ { + fsm1->root_id = rootx = fsm2->root_id + offset; + } + gfsm_weightmap_clear(fsm1->finals); + + //-- adopt states from fsm2 into fsm1 + for (id2 = 0; id2 < size2; id2++) { + gfsmStateId id1; + const gfsmState *s2; + gfsmState *s1; + gfsmArcIter ai; + gfsmWeight s2fw; + + s2 = gfsm_automaton_find_state_const(fsm2,id2); + id1 = id2+offset; + s1 = gfsm_automaton_find_state(fsm1, id1); + + //-- sanity check(s) + if (!s1 || !s2 || !s2->is_valid) continue; + + //-- copy state + gfsm_state_copy(s1,s2); + + //-- translate targets for adopted arcs + for (gfsm_arciter_open_ptr(&ai,fsm1,s1); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target += offset; + } + + //-- check for new final states: get weight & mark state is_final flag + if ( (finals2 && gfsm_weightmap_lookup(finals2, GUINT_TO_POINTER(id2), &s2fw)) + || + (!finals2 && gfsm_weightmap_lookup(fsm2->finals, GUINT_TO_POINTER(id2), &s2fw)) ) + { + s1->is_final = TRUE; + gfsm_weightmap_insert(fsm1->finals, GUINT_TO_POINTER(id1), s2fw); + } + } + + //-- mark as unsorted + fsm1->flags.sort_mode = gfsmASMNone; + + //-- cleanup + if (finals2) gfsm_weightmap_free(finals2); + if (fsm2 != _fsm2) gfsm_automaton_free(fsm2); + + return fsm1; +} + +/*-------------------------------------------------------------- + * n_concat() + */ +gfsmAutomaton *gfsm_automaton_n_concat(gfsmAutomaton *fsm1, gfsmAutomaton *_fsm2, guint n) +{ + gfsmAutomaton *fsm2 = _fsm2; + + //-- sanity check(s) + if (!_fsm2 || _fsm2->root_id == gfsmNoState) return fsm1; + if (_fsm2==fsm1) fsm2 = gfsm_automaton_clone(fsm1); + + for ( ; n > 0; n--) { gfsm_automaton_concat(fsm1, fsm2); } + + if (fsm2 != _fsm2) gfsm_automaton_free(fsm2); + + return fsm1; +} + + +/*-------------------------------------------------------------- + * connect() + */ +gfsmAutomaton *gfsm_automaton_connect(gfsmAutomaton *fsm) +{ + gfsmBitVector *wanted; + + //-- sanity check + if (!fsm) return fsm; + + wanted = gfsm_bitvector_sized_new(fsm->states->len); + gfsm_automaton_connect_fw(fsm, wanted); + + gfsm_bitvector_zero(wanted); + gfsm_automaton_connect_bw(fsm, NULL, wanted); + + gfsm_bitvector_free(wanted); + return fsm; +} + + +/*-------------------------------------------------------------- + * connect_fw_visit_state() + * + marks all states on a path from (id) in (visited) + */ +void gfsm_connect_fw_visit_state(gfsmAutomaton *fsm, + gfsmStateId id, + gfsmBitVector *visited) +{ + gfsmState *s; + gfsmArcIter ai; + + //-- already visited + if (gfsm_bitvector_get(visited,id)) return; + + s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) return; //-- ignore invalid states + + //-- mark node as visited on this path + gfsm_bitvector_set(visited,id,1); + + //-- visit targets of outgoing arcs + for (gfsm_arciter_open_ptr(&ai,fsm,s); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsm_connect_fw_visit_state(fsm, gfsm_arciter_arc(&ai)->target, visited); + } + + return; +} + +/*-------------------------------------------------------------- + * connect_fw() + */ +gfsmAutomaton *gfsm_automaton_connect_fw(gfsmAutomaton *fsm, gfsmBitVector *visited) +{ + gboolean visited_is_temp = FALSE; + + //-- sanity check + if (!fsm || fsm->root_id == gfsmNoState) + return gfsm_automaton_prune_states(fsm,NULL); + + //-- traversal record + if (visited==NULL) { + visited = gfsm_bitvector_sized_new(fsm->states->len); + visited_is_temp = TRUE; + } + + //-- traverse + gfsm_connect_fw_visit_state(fsm, fsm->root_id, visited); + gfsm_automaton_prune_states(fsm, visited); + + //-- cleanup + if (visited_is_temp) gfsm_bitvector_free(visited); + + return fsm; +} + +/*-------------------------------------------------------------- + * connect_bw(): final_foreach() + */ +struct gfsm_connect_bw_data_ { + gfsmAutomaton *fsm; + GPtrArray *rarcs; + gfsmBitVector *finalizable; +}; + +gboolean gfsm_connect_bw_visit_state(gfsmStateId id, + gpointer pw, + struct gfsm_connect_bw_data_ *data) +{ + GSList *rl; + + //-- already visited + if (gfsm_bitvector_get(data->finalizable,id) //-- already visited? + || !gfsm_automaton_has_state(data->fsm, id)) //-- bad state? + return FALSE; //-----> continue traversal + + //-- mark state as finalizable + gfsm_bitvector_set(data->finalizable,id,1); + + //-- visit sources of incoming arcs + for (rl=g_ptr_array_index(data->rarcs,id); rl != NULL; rl=rl->next) { + gfsmArc *arc = (gfsmArc*)rl->data; + gfsm_connect_bw_visit_state(arc->source,pw,data); + } + + return FALSE; //-- continue traversal +} + +/*-------------------------------------------------------------- + * connect_bw() + */ +gfsmAutomaton *gfsm_automaton_connect_bw(gfsmAutomaton *fsm, + gfsmReverseArcIndex *rarcs, + gfsmBitVector *finalizable) +{ + gboolean rarcs_is_temp = FALSE; + gboolean finalizable_is_temp = FALSE; + struct gfsm_connect_bw_data_ data = {fsm,rarcs,finalizable}; + + //-- sanity check(s) + if (!fsm || gfsm_automaton_n_final_states(fsm)==0) + return gfsm_automaton_prune_states(fsm,NULL); + + //-- reverse arc-index + if (rarcs==NULL) { + rarcs = data.rarcs = gfsm_automaton_reverse_arc_index(fsm,NULL); + rarcs_is_temp = TRUE; + } + + //-- traversal record + if (finalizable==NULL) { + finalizable = data.finalizable = gfsm_bitvector_sized_new(fsm->states->len); + finalizable_is_temp = TRUE; + } + + //-- traverse + gfsm_automaton_finals_foreach(fsm, (GTraverseFunc)gfsm_connect_bw_visit_state, &data); + gfsm_automaton_prune_states(fsm, finalizable); + + //-- cleanup + if (finalizable_is_temp) gfsm_bitvector_free(finalizable); + if (rarcs_is_temp) gfsm_reverse_arc_index_free(rarcs,TRUE); + + return fsm; +} + + +/*-------------------------------------------------------------- + * prune_states() + */ +gfsmAutomaton *gfsm_automaton_prune_states(gfsmAutomaton *fsm, gfsmBitVector *wanted) +{ + gfsmStateId id, maxwanted=gfsmNoState; + gfsmArcIter ai; + + for (id=0; id < fsm->states->len; id++) { + if (!wanted || !gfsm_bitvector_get(wanted,id)) { + //-- unwanted state: chuck it + gfsm_automaton_remove_state(fsm,id); + } + else { + maxwanted = id; + //-- prune outgoing arcs to any unwanted states, too + for (gfsm_arciter_open(&ai, fsm, id); gfsm_arciter_ok(&ai); ) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + if (!wanted || !gfsm_bitvector_get(wanted,arc->target)) { + gfsm_arciter_remove(&ai); + } else { + gfsm_arciter_next(&ai); + } + } + } + } + + //-- update number of states + if (maxwanted != gfsmNoState) { + g_array_set_size(fsm->states, maxwanted+1); + } else { + g_array_set_size(fsm->states, 0); + } + + return fsm; +} + +/*-------------------------------------------------------------- + * determinize_lp2ec_foreach_func_() + */ +typedef struct { + gfsmAutomaton *nfa; + gfsmAutomaton *dfa; + gfsmStateId dfa_src_id; + gfsmEnum *ec2id; +} gfsmLp2EcForeachData; + +static +gboolean gfsm_determinize_lp2ec_foreach_func_(gfsmLabelPair lp, + gfsmWeightedStateSet *wss, + gfsmLp2EcForeachData *data) +{ + gfsmStateId ec2id_val; + gpointer ec2id_val_as_ptr; + gfsmStateSet *ec2id_key; + + if ( gfsm_enum_lookup_extended(data->ec2id, + wss->set, + (gpointer)(&ec2id_key), + (gpointer)(&ec2id_val_as_ptr)) ) + { + //-- target node-set is already present: just add an arc in @dfa + ec2id_val = GPOINTER_TO_UINT(ec2id_val_as_ptr); + gfsm_automaton_add_arc(data->dfa, + data->dfa_src_id, + ec2id_val, + gfsm_labelpair_lower(lp), + gfsm_labelpair_upper(lp), + wss->weight); + + //-- ... and maybe free the embedded state set + if (wss->set != ec2id_key) gfsm_stateset_free(wss->set); + wss->set = NULL; + } + else + { + //-- image of equiv-class (wss->set) was not yet present: make a new one + ec2id_val = gfsm_automaton_ensure_state(data->dfa, + gfsm_enum_insert(data->ec2id, wss->set)); + + //-- ... add @dfa arc + gfsm_automaton_add_arc(data->dfa, + data->dfa_src_id, + ec2id_val, + gfsm_labelpair_lower(lp), + gfsm_labelpair_upper(lp), + wss->weight); + + //-- ... and recurse + gfsm_determinize_visit_state_(data->nfa, data->dfa, + wss->set, ec2id_val, + data->ec2id); + } + return FALSE; +} + +/*-------------------------------------------------------------- + * determinize_visit_state_() + */ +void gfsm_determinize_visit_state_(gfsmAutomaton *nfa, gfsmAutomaton *dfa, + gfsmStateSet *nfa_ec, gfsmStateId dfa_id, + gfsmEnum *ec2id) +{ + GTree *lp2ecw; //-- maps label-pairs@nfa.src.ec => (eq-class@nfa.sink, sum(weight)) + gfsmStateSetIter eci; + gfsmStateId ecid; + gfsmLp2EcForeachData lp2ec_foreach_data; + gfsmWeight fw; + + //-- check for final state + if (gfsm_stateset_lookup_final_weight(nfa_ec,nfa,&fw)) { + gfsm_automaton_set_final_state_full(dfa, dfa_id, TRUE, fw); + } + + //-- build label-pair => (sink-eqc, sum(weight)) mapping 'lp2ecw' for node-set nfa_ec + lp2ecw = g_tree_new_full(((GCompareDataFunc) + gfsm_labelpair_compare_with_data), //-- key_comp_func + NULL, //-- key_comp_data + NULL, //-- key_free_func + (GDestroyNotify)g_free); //-- val_free_func + + for (eci=gfsm_stateset_iter_begin(nfa_ec); + (ecid=gfsm_stateset_iter_id(eci)) != gfsmNoState; + eci=gfsm_stateset_iter_next(nfa_ec,eci)) + { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai, nfa, ecid); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsmLabelPair lp; + gfsmLabelPair *lp2ec_key; + gfsmWeightedStateSet *lp2ec_val; + + //if (a->lower==gfsmEpsilon && a->upper==gfsmEpsilon) continue; //-- ignore eps arcs + lp = gfsm_labelpair_new(a->lower, a->upper); + + //-- add equivalence class to local mapping + if ( g_tree_lookup_extended(lp2ecw, + GUINT_TO_POINTER(lp), + (gpointer)(&lp2ec_key), + (gpointer)(&lp2ec_val)) ) + { + //-- already present: compute union and add new arc's weight + gfsm_stateset_insert(lp2ec_val->set, a->target); + lp2ec_val->weight = gfsm_sr_plus(nfa->sr, lp2ec_val->weight, a->weight); + } + else + { + //-- not yet present: insert new value + lp2ec_val = g_new(gfsmWeightedStateSet,1); + lp2ec_val->set = gfsm_stateset_new_singleton(a->target); + lp2ec_val->weight = a->weight; + g_tree_insert(lp2ecw, GUINT_TO_POINTER(lp), lp2ec_val); + } + } + + //-- tmp-cleanup + gfsm_arciter_close(&ai); + } + + //-- stateset-iter (eci) cleanup + //(none) + + //-- insert computed arcs into @dfa + lp2ec_foreach_data.nfa = nfa; + lp2ec_foreach_data.dfa = dfa; + lp2ec_foreach_data.dfa_src_id = dfa_id; + lp2ec_foreach_data.ec2id = ec2id; + g_tree_foreach(lp2ecw, + (GTraverseFunc)gfsm_determinize_lp2ec_foreach_func_, + (gpointer)(&lp2ec_foreach_data)); + + //-- cleanup + g_tree_destroy(lp2ecw); +} + + +/*-------------------------------------------------------------- + * determinize() + */ +gfsmAutomaton *gfsm_automaton_determinize(gfsmAutomaton *nfa) +{ + if (!nfa->flags.is_deterministic) { + gfsmAutomaton *dfa = gfsm_automaton_determinize_full(nfa,NULL); + gfsm_automaton_swap(nfa,dfa); + gfsm_automaton_free(dfa); + } + return nfa; +} + +/*-------------------------------------------------------------- + * determinize_full() + */ +gfsmAutomaton *gfsm_automaton_determinize_full(gfsmAutomaton *nfa, gfsmAutomaton *dfa) +{ + gfsmEnum *ec2id; //-- (global) maps literal(equiv-class@nfa) => node-id@dfa + gfsmStateSet *nfa_ec; //-- (temp) equiv-class@nfa + gfsmStateId dfa_id; //-- (temp) id @ dfa + + //-- sanity check(s) + if (!nfa) return NULL; + else if (nfa->flags.is_deterministic) { + if (dfa) gfsm_automaton_copy(dfa,nfa); + else dfa = gfsm_automaton_clone(nfa); + return dfa; + } + + //-- initialization: dfa + if (!dfa) { + dfa = gfsm_automaton_shadow(nfa); + } else { + gfsm_automaton_clear(dfa); + gfsm_automaton_copy_shallow(dfa,nfa); + } + //-- avoid "smart" arc-insertion + dfa->flags.sort_mode = gfsmASMNone; + + //-- initialization: ec2id + ec2id = gfsm_enum_new_full(NULL /*(gfsmDupFunc)gfsm_stateset_clone*/ , + (GHashFunc)gfsm_stateset_hash, + (GEqualFunc)gfsm_stateset_equal, + (GDestroyNotify)gfsm_stateset_free); + + //-- initialization: nfa_ec + nfa_ec = gfsm_stateset_sized_new(32); + gfsm_stateset_insert(nfa_ec, nfa->root_id); + + //-- set root in dfa + dfa_id = gfsm_automaton_ensure_state(dfa, gfsm_enum_insert(ec2id, nfa_ec)); + gfsm_automaton_set_root(dfa, dfa_id); + + //-- guts: determinize recursively outwards from root node + gfsm_determinize_visit_state_(nfa, dfa, nfa_ec, dfa_id, ec2id); + + //-- set flag in dfa + dfa->flags.is_deterministic = TRUE; + + //-- cleanup + //gfsm_stateset_free(nfa_ec); //-- this ought to be freed by gfsm_enum_free(ec2id) + gfsm_enum_free(ec2id); + + return dfa; +} + + + +/*-------------------------------------------------------------- + * difference() + */ +gfsmAutomaton *gfsm_automaton_difference(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm = gfsm_automaton_difference_full(fsm1,fsm2,NULL); + gfsm_automaton_swap(fsm1,fsm); + gfsm_automaton_free(fsm); + return fsm1; +} + +/*-------------------------------------------------------------- + * difference_full() + */ +gfsmAutomaton *gfsm_automaton_difference_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *diff) +{ + gfsmAutomaton *not_fsm2; + gfsmAlphabet *alph1 = gfsm_identity_alphabet_new(); + + gfsm_automaton_get_alphabet(fsm1, gfsmLSLower, alph1); + not_fsm2 = gfsm_automaton_clone(fsm2); + gfsm_automaton_complement_full(not_fsm2, alph1); + diff = gfsm_automaton_intersect_full(fsm1, not_fsm2, diff, NULL); + + gfsm_automaton_free(not_fsm2); + gfsm_alphabet_free(alph1); + + return diff; +} + + + +/*-------------------------------------------------------------- + * intersect() + */ +gfsmAutomaton *gfsm_automaton_intersect(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm = gfsm_automaton_intersect_full(fsm1,fsm2,NULL,NULL); + gfsm_automaton_swap(fsm1,fsm); + gfsm_automaton_free(fsm); + return fsm1; +} + +/*-------------------------------------------------------------- + * intersect_full() + */ +gfsmAutomaton *gfsm_automaton_intersect_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *intersect, + gfsmStatePairEnum *spenum) +{ + gboolean spenum_is_temp; + gfsmStatePair rootpair; + gfsmStateId rootid; + gfsmComposeFlags flags = 0; + + //-- setup: output fsm + if (!intersect) { + intersect=gfsm_automaton_shadow(fsm1); + } else { + gfsm_automaton_clear(intersect); + gfsm_automaton_copy_shallow(intersect,fsm1); + } + //-- avoid "smart" arc-insertion + intersect->flags.sort_mode = gfsmASMNone; + intersect->flags.is_transducer = 0; + + //-- setup: StatePairEnum + if (spenum==NULL) { + spenum_is_temp=TRUE; + spenum = gfsm_statepair_enum_new(); + } else { + spenum_is_temp=FALSE; + gfsm_enum_clear(spenum); + } + + //-- setup: flags + if (gfsm_acmask_nth(fsm1->flags.sort_mode,0) != gfsmACLower) flags |= gfsmCFEfsm1NeedsArcSort; + if (gfsm_acmask_nth(fsm2->flags.sort_mode,0) != gfsmACLower) flags |= gfsmCFEfsm2NeedsArcSort; + + //-- guts + rootpair.id1 = fsm1->root_id; + rootpair.id2 = fsm2->root_id; + rootid = gfsm_automaton_intersect_visit_(rootpair, fsm1, fsm2, intersect, spenum,flags); + + //-- finalize: set root state + if (rootid != gfsmNoState) { + gfsm_automaton_set_root(intersect, rootid); + } else { + intersect->root_id = gfsmNoState; + } + + //-- cleanup + if (spenum_is_temp) gfsm_enum_free(spenum); + + return intersect; +} + +/*-------------------------------------------------------------- + * intersect_visit() + */ +gfsmStateId gfsm_automaton_intersect_visit_(gfsmStatePair sp, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *fsm, + gfsmStatePairEnum *spenum, + gfsmComposeFlags flags) +{ + gfsmState *q1, *q2; + gfsmStateId qid = gfsm_enum_lookup(spenum,&sp); + gfsmStateId qid2; + gfsmArcList *al1, *al2, *ai1, *ai2, *ai2eps; + gfsmArc *a1,*a2; + + //-- ignore already-visited states + if (qid != gfsmEnumNone) return qid; + + //-- get state pointers for input automata + q1 = gfsm_automaton_find_state(fsm1,sp.id1); + q2 = gfsm_automaton_find_state(fsm2,sp.id2); + + //-- sanity check + if ( !(q1 && q2 && q1->is_valid && q2->is_valid) ) return gfsmNoState; + + //-- insert new state into output automaton + qid = gfsm_automaton_add_state(fsm); + gfsm_enum_insert_full(spenum,&sp,qid); + //q = gfsm_automaton_get_state(fsm,qid); + + //-- check for final states + if (q1->is_final && q2->is_final) { + gfsm_automaton_set_final_state_full(fsm,qid,TRUE, + gfsm_sr_times(fsm->sr, + gfsm_automaton_get_final_weight(fsm1,sp.id1), + gfsm_automaton_get_final_weight(fsm2,sp.id2))); + } + + //------------------------------------------- + // recurse on outgoing arcs + + //-------------------------------- + // recurse: arcs: sort + + //-- arcs: sort arclists: fsm1 + if (flags&gfsmCFEfsm1NeedsArcSort) { + gfsmArcCompData sortdata = { (gfsmACLower|(gfsmACUpper<<gfsmACShift)),NULL,NULL,NULL }; + al1 = gfsm_arclist_sort(gfsm_arclist_clone(q1->arcs), &sortdata); + } + else { al1 = q1->arcs; } + + //-- arcs: sort arclists: fsm2 + if (flags&gfsmCFEfsm2NeedsArcSort) { + gfsmArcCompData sortdata = { (gfsmACLower|(gfsmACUpper<<gfsmACShift)),NULL,NULL,NULL }; + al2 = gfsm_arclist_sort(gfsm_arclist_clone(q2->arcs), &sortdata); + } + else { al2 = q2->arcs; } + + //-------------------------------- + // recurse: arcs: iterate + for (ai1=al1, ai2=al2; ai1 != NULL; ai1=ai1->next) { + a1 = &(ai1->arc); + if (a1->lower == gfsmEpsilon) { + //-- handle epsilon arcs + + //-- eps: case fsm1:(q1 --eps--> q1'), fsm2:(q2) + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){a1->target,sp.id2}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, gfsmEpsilon, a1->weight); + + //-- eps: case fsm1:(q1 --eps--> q1'), fsm2:(q2 --eps--> q2') + for (ai2eps=al2; ai2eps != NULL; ai2eps=ai2eps->next) { + a2 = &(ai2eps->arc); + if (a2->lower != gfsmEpsilon) break; + + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){a1->target,a2->target}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, gfsmEpsilon, + gfsm_sr_times(fsm1->sr, a1->weight, a2->weight)); + } + } + else { + //-- handle non-epsilon arcs + for ( ; ai2 != NULL; ai2=ai2->next) { + a2 = &(ai2->arc); + + if (a2->lower < a1->lower) continue; + else if (a2->lower > a1->lower) break; + + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){a1->target,a2->target}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, a1->lower, a1->lower, + gfsm_sr_times(fsm1->sr, a1->weight, a2->weight)); + } + } + } + + //-- handle epsilon-arcs on fsm2 + for (ai2=al2 ; ai2 != NULL; ai2=ai2->next) { + a2 = &(ai2->arc); + if (a2->lower != gfsmEpsilon) break; + + //-- eps: case fsm1:(q1), fsm2:(q2 --eps--> q2') + qid2 = gfsm_automaton_intersect_visit_((gfsmStatePair){sp.id1,a2->target}, + fsm1, fsm2, fsm, spenum, flags); + if (qid2 != gfsmNoState) + gfsm_automaton_add_arc(fsm, qid, qid2, gfsmEpsilon, gfsmEpsilon, a2->weight); + } + + //-- cleanup + if (flags&gfsmCFEfsm1NeedsArcSort) gfsm_arclist_free(al1); + if (flags&gfsmCFEfsm2NeedsArcSort) gfsm_arclist_free(al2); + + return qid; +} + + +/*-------------------------------------------------------------- + * invert() + */ +gfsmAutomaton *gfsm_automaton_invert(gfsmAutomaton *fsm) +{ + gfsmStateId id; + gfsmArcIter ai; + gfsmArcCompMask acmask_old=fsm->flags.sort_mode, acmask_new=gfsmACNone;; + gint aci; + + //-- invert arcs + for (id=0; id < fsm->states->len; id++) { + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsmLabelId tmp = a->lower; + a->lower = a->upper; + a->upper = tmp; + } + } + + //-- adjust sort mask (translate "lower"<->"upper") + for (aci=0; aci < gfsmACMaxN; aci++) { + gfsmArcCompMask cmp = gfsm_acmask_nth(acmask_old,aci); + switch (cmp) { + case gfsmACLower: cmp=gfsmACUpper; break; + case gfsmACUpper: cmp=gfsmACLower; break; + case gfsmACLowerR: cmp=gfsmACUpperR; break; + case gfsmACUpperR: cmp=gfsmACLowerR; break; + default: break; + } + acmask_new |= gfsm_acmask_new(cmp,aci); + } + fsm->flags.sort_mode = acmask_new; + + return fsm; +} + +/*-------------------------------------------------------------- + * optional() + */ +gfsmAutomaton *gfsm_automaton_optional(gfsmAutomaton *fsm) +{ + if (!gfsm_automaton_is_final_state(fsm,fsm->root_id)) + gfsm_automaton_set_final_state_full(fsm,fsm->root_id,TRUE,fsm->sr->one); + return fsm; +} + +/*-------------------------------------------------------------- + * product() (single-destructive) + */ +gfsmAutomaton *gfsm_automaton_product(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmAutomaton *fsm2_tmp = gfsm_automaton_clone(fsm2); + gfsm_automaton_product2(fsm1,fsm2_tmp); + gfsm_automaton_free(fsm2_tmp); + return fsm1; +} + +/*-------------------------------------------------------------- + * _product() (dual-destructive) + */ +gfsmAutomaton *gfsm_automaton_product2(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmStateId qid; + gfsmState *qp; + gfsmArcIter ai; + gfsmArc *a; + + //-- chuck out all upper-labels from fsm1 + for (qid=0; qid < fsm1->states->len; qid++) { + qp = gfsm_automaton_find_state(fsm1,qid); + if (!qp || !qp->is_valid) continue; + for (gfsm_arciter_open_ptr(&ai,fsm1,qp); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + a = gfsm_arciter_arc(&ai); + a->upper = gfsmEpsilon; + } + } + + //-- chuck out all upper-labels from fsm2 + for (qid=0; qid < fsm2->states->len; qid++) { + qp = gfsm_automaton_find_state(fsm2,qid); + if (!qp || !qp->is_valid) continue; + for (gfsm_arciter_open_ptr(&ai,fsm2,qp); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + a = gfsm_arciter_arc(&ai); + a->lower = gfsmEpsilon; + } + } + + //-- concatenate + gfsm_automaton_concat(fsm1,fsm2); + + //-- mark output fsm as transducer + fsm1->flags.is_transducer = 1; + + return fsm1; +} + +/*-------------------------------------------------------------- + * project() + */ +gfsmAutomaton *gfsm_automaton_project(gfsmAutomaton *fsm, gfsmLabelSide which) +{ + gfsmStateId id; + gfsmArcIter ai; + if (which==gfsmLSBoth) return fsm; + + for (id=0; id < fsm->states->len; id++) { + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + if (which==gfsmLSLower) a->upper = a->lower; + else a->lower = a->upper; + } + } + fsm->flags.is_transducer = FALSE; + return fsm; +} + +/*-------------------------------------------------------------- + * replace() + */ +gfsmAutomaton *gfsm_automaton_replace(gfsmAutomaton *fsm1, gfsmLabelVal lo, gfsmLabelVal hi, gfsmAutomaton *fsm2) +{ + gfsmStateId id; + gfsmArcIter ai; + gfsmStateId nstates = fsm1->states->len; + + for (id=0; id < nstates; id++) { + if (!gfsm_automaton_has_state(fsm1,id)) continue; + for (gfsm_arciter_open(&ai,fsm1,id), gfsm_arciter_seek_both(&ai,lo,hi); + gfsm_arciter_ok(&ai); + gfsm_arciter_seek_both(&ai,lo,hi)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsm_automaton_insert_automaton(fsm1, id, a->target, fsm2, a->weight); + gfsm_arciter_remove(&ai); //-- implies gfsm_arciter_next() + } + //gfsm_arciter_close(&ai); + } + + return fsm1; +} + +/*-------------------------------------------------------------- + * insert_automaton() + */ +gfsmAutomaton *gfsm_automaton_insert_automaton(gfsmAutomaton *fsm1, + gfsmStateId q1from, + gfsmStateId q1to, + gfsmAutomaton *fsm2, + gfsmWeight w) +{ + gfsmStateId offset; + gfsmStateId size2; + gfsmStateId id2; + gfsmStateId id1; + const gfsmState *s2; + gfsmState *s1; + gfsmArcIter ai; + gfsmWeight s2fw; + + //-- reserve size + offset = fsm1->states->len; + size2 = fsm2->states->len; + gfsm_automaton_reserve(fsm1, offset + size2); + + //-- avoid "smart" arc-insertion + fsm1->flags.sort_mode = gfsmASMNone; + + //-- adopt states from fsm2 into fsm1 + for (id2 = 0; id2 < size2; id2++) { + + s2 = gfsm_automaton_find_state_const(fsm2,id2); + id1 = id2+offset; + s1 = gfsm_automaton_find_state(fsm1, id1); + + //-- sanity check(s) + if (!s1 || !s2 || !s2->is_valid) continue; + + //-- copy state + gfsm_state_copy(s1,s2); + + //-- translate targets for adopted arcs + for (gfsm_arciter_open_ptr(&ai,fsm1,s1); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target += offset; + } + + //-- check for fsm2-final states: get weight & add arc to our sink state + if (gfsm_weightmap_lookup(fsm2->finals, GUINT_TO_POINTER(id2), &s2fw)) { + s1->is_final = FALSE; + gfsm_automaton_add_arc(fsm1,id1,q1to,gfsmEpsilon,gfsmEpsilon, s2fw); + } + } + + //-- add arc to new state + gfsm_automaton_add_arc(fsm1, q1from, fsm2->root_id+offset, gfsmEpsilon, gfsmEpsilon, w); + + return fsm1; +} + +/*-------------------------------------------------------------- + * rmepsilon_foreach_func() + */ +static +void gfsm_automaton_rmeps_pass2_foreach_func_(gfsmStatePair *sp, gpointer pw, gfsmAutomaton *fsm) +{ + gfsmWeight w = gfsm_ptr2weight(pw); + gfsmWeight fw2; + gfsmArcIter ai; + gfsmArc *a; + if (sp->id1==sp->id2) return; //-- sanity check + + //-- adopt final weights (plus) + if (gfsm_automaton_lookup_final(fsm, sp->id2, &fw2)) { + gfsm_automaton_set_final_state_full(fsm, sp->id1, TRUE, + gfsm_sr_plus(fsm->sr, + gfsm_automaton_get_final_weight(fsm, sp->id1), + gfsm_sr_times(fsm->sr, w, fw2))); + } + + //-- adopt non-epsilon arcs + for (gfsm_arciter_open(&ai,fsm,sp->id2); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + a = gfsm_arciter_arc(&ai); + if (a->lower != gfsmEpsilon || a->upper != gfsmEpsilon) { + gfsm_automaton_add_arc(fsm, sp->id1, a->target, a->lower, a->upper, + gfsm_sr_times(fsm->sr, a->weight, w)); + } + } +} + +/*-------------------------------------------------------------- + * rmepsilon() + */ +gfsmAutomaton *gfsm_automaton_rmepsilon(gfsmAutomaton *fsm) +{ + gfsmStatePair2WeightHash *sp2wh = gfsm_statepair2weighthash_new(); + gfsmArcIter ai; + gfsmStateId qid; + gfsmArc *a; + + //-- pass-1: populate sp2wh with epsilon-reachable states + for (qid=0; qid < fsm->states->len; qid++) { + gfsm_automaton_rmeps_visit_state_(fsm, qid, qid, fsm->sr->one, sp2wh); + } + + //-- pass-2: adopt non-epsilon arcs & final weights from eps-reachable states + gfsm_weighthash_foreach(sp2wh, (GHFunc)gfsm_automaton_rmeps_pass2_foreach_func_, fsm); + + //-- pass-3: actual removal of now-redundant epsilon arcs + for (qid=0; qid < fsm->states->len; qid++) { + for (gfsm_arciter_open(&ai,fsm,qid); gfsm_arciter_ok(&ai); ) { + a = gfsm_arciter_arc(&ai); + if (a->lower==gfsmEpsilon && a->upper==gfsmEpsilon) { + gfsm_arciter_remove(&ai); + } else { + gfsm_arciter_next(&ai); + } + } + } + + //-- cleanup + gfsm_weighthash_free(sp2wh); + + return fsm; +} + +/*-------------------------------------------------------------- + * rmepsilon_visit_state() + */ +void gfsm_automaton_rmeps_visit_state_(gfsmAutomaton *fsm, + gfsmStateId qid_noeps, //-- state reachable by non-eps arcs + gfsmStateId qid_eps, //-- eps-reachable state from qid_noeps + gfsmWeight weight_eps, //-- total weight of followed eps-arcs + gfsmStatePair2WeightHash *sp2wh //-- maps (qid_noeps,qid_noeps)=>sum_weight_eps + ) +{ + gfsmState *q_noeps, *q_eps; + gfsmStatePair sp = {qid_noeps,qid_eps}; + gfsmArcIter ai; + gfsmArc *a; + + //-- visited check, mark + if (!gfsm_weighthash_insert_sum_if_less(sp2wh, &sp, weight_eps, fsm->sr)) + return; //-- no update required + + //-- sanity check + q_noeps = gfsm_automaton_find_state(fsm,qid_noeps); + q_eps = gfsm_automaton_find_state(fsm,qid_eps); + if (!q_noeps || !q_noeps->is_valid || !q_eps || !q_eps->is_valid) return; + + //-- visit epsilon-reachable states from q_eps + for (gfsm_arciter_open_ptr(&ai, fsm, q_eps), gfsm_arciter_seek_both(&ai,gfsmEpsilon,gfsmEpsilon); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_both(&ai,gfsmEpsilon,gfsmEpsilon)) + { + a = gfsm_arciter_arc(&ai); + gfsm_automaton_rmeps_visit_state_(fsm, qid_noeps, a->target, + gfsm_sr_times(fsm->sr, weight_eps, a->weight), + sp2wh); + } +} + + +/*-------------------------------------------------------------- + * reverse() + */ +gfsmAutomaton *gfsm_automaton_reverse(gfsmAutomaton *fsm) +{ + gfsmStateId new_root = gfsm_automaton_add_state(fsm); + gfsmStateId id; + gfsmState *s, *ts; + gfsmArcList *al, *al_next, *al_prev; + gfsmWeight w; + //gfsmArcSortMode sm = gfsm_automaton_sortmode(fsm); + + //-- mark automaton as unsorted (avoid "smart" arc-insertion) + fsm->flags.sort_mode = gfsmASMNone; + + //-- reverse arc directions, keeping old "source" and "target" values + // intact as sentinels + for (id = 0; id < new_root; id++) { + s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) continue; + + //-- check for old final states + if (gfsm_automaton_lookup_final(fsm,id,&w)) { + s->is_final = FALSE; + gfsm_weightmap_remove(fsm->finals, GUINT_TO_POINTER(id)); + gfsm_automaton_add_arc(fsm, new_root, id, gfsmEpsilon, gfsmEpsilon, w); + } + + //-- reverse arcs + for (al_prev=NULL, al=s->arcs; al != NULL; al=al_next) { + gfsmArc *a = gfsm_arclist_arc(al); + al_next = al->next; + if (a->target==id) { + //-- already reversed (or a single-arc loop, which doesn't need reversal) + al_prev = al; + continue; + } + + //-- steal arc + if (al_prev) al_prev->next = al->next; + else s->arcs = al->next; + al->next = NULL; + + //-- move arc + ts = gfsm_automaton_find_state(fsm,a->target); + gfsm_automaton_add_arc_node(fsm, ts, al); + } + } + + //-- sanitize: swap 'source' and 'target' fields + for (id=0; id < new_root; id++) { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsmStateId tmp = a->target; + a->target = a->source; + a->source = tmp; + } + } + + //-- root flop + gfsm_automaton_set_final_state_full(fsm,fsm->root_id,TRUE,fsm->sr->one); + fsm->root_id = new_root; + + return fsm; +} + +#if 0 +gfsmAutomaton *gfsm_automaton_reverse_old(gfsmAutomaton *fsm) +{ + gfsmStateId new_root = gfsm_automaton_add_state(fsm); + gfsmStateId id; + gfsmState *s, *ts; + gfsmArcList *al, *al_next, *al_prev; + gfsmWeight w; + //gfsmArcSortMode sm = gfsm_automaton_sortmode(fsm); + + //-- mark automaton as unsorted (avoid "smart" arc-insertion) + fsm->flags.sort_mode = gfsmASMNone; + + //-- reverse arc directions, assigning reversed arcs 'target' values as 'old_src+new_root' + for (id = 0; id < new_root; id++) { + s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) continue; + + //-- check for old final states + if (gfsm_automaton_lookup_final(fsm,id,&w)) { + s->is_final = FALSE; + gfsm_weightmap_remove(fsm->finals, GUINT_TO_POINTER(id)); + gfsm_automaton_add_arc(fsm, new_root, id, gfsmEpsilon, gfsmEpsilon, w); + } + + //-- reverse arcs + for (al_prev=NULL, al=s->arcs; al != NULL; al=al_next) { + gfsmArc *a = gfsm_arclist_arc(al); + al_next = al->next; + if (a->target >= new_root) { + //-- already moved + al_prev = al; + continue; + } + + //-- steal arc + if (al_prev) al_prev->next = al->next; + else s->arcs = al->next; + al->next = NULL; + + //-- move arc + ts = gfsm_automaton_find_state(fsm,a->target); + gfsm_automaton_add_arc_link(fsm, ts, al); + + //-- flag as reversed + a->target = id + new_root; + } + } + + //-- sanitize + for (id=0; id < new_root; id++) { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target -= new_root; + } + } + + //-- root flop + gfsm_automaton_set_final_state_full(fsm,fsm->root_id,TRUE,fsm->sr->one); + fsm->root_id = new_root; + + return fsm; +} +#endif + +/*-------------------------------------------------------------- + * sigma() + */ +gboolean gfsm_automaton_sigma_foreach_func_(gfsmAlphabet *abet, gpointer key, gfsmLabelVal lab, gfsmAutomaton *fsm) +{ + gfsm_automaton_add_arc(fsm,0,1,lab,lab,fsm->sr->one); + return FALSE; +} + +gfsmAutomaton *gfsm_automaton_sigma(gfsmAutomaton *fsm, gfsmAlphabet *abet) +{ + gfsm_automaton_clear(fsm); + fsm->flags.sort_mode = gfsmASMNone; //-- avoid "smart" arc-insertion + fsm->root_id = gfsm_automaton_add_state_full(fsm,0); + gfsm_automaton_add_state_full(fsm,1); + gfsm_automaton_set_final_state_full(fsm,1,TRUE,fsm->sr->one); + gfsm_alphabet_foreach(abet, (gfsmAlphabetForeachFunc)gfsm_automaton_sigma_foreach_func_, fsm); + return fsm; +} + +/*-------------------------------------------------------------- + * union_() + */ +gfsmAutomaton *gfsm_automaton_union(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsmStateId offset; + gfsmStateId id2; + gfsmStateId oldroot1; + gfsmArcCompData sortdata = {0,0,0,0}; + + //-- sanity check + if (!fsm2 || fsm2->root_id==gfsmNoState) return fsm1; + + offset = fsm1->states->len + 1; + gfsm_automaton_reserve(fsm1, offset + fsm2->states->len); + + + //-- add new root and eps-arc to old root for fsm1 + oldroot1 = fsm1->root_id; + fsm1->root_id = gfsm_automaton_add_state(fsm1); + if (oldroot1 != gfsmNoState) { + gfsm_automaton_add_arc(fsm1, fsm1->root_id, oldroot1, gfsmEpsilon, gfsmEpsilon, fsm1->sr->one); + } + + //-- avoid "smart" arc-insertion (temporary) + sortdata.mask = fsm1->flags.sort_mode; + sortdata.sr = fsm1->sr; + fsm1->flags.sort_mode = gfsmASMNone; + + //-- adopt states from fsm2 into fsm1 + for (id2 = 0; id2 < fsm2->states->len; id2++) { + const gfsmState *s2 = gfsm_automaton_find_state_const(fsm2,id2); + gfsmState *s1 = gfsm_automaton_find_state(fsm1,id2+offset); + gfsmArcIter ai; + gfsm_state_copy(s1,s2); + for (gfsm_arciter_open_ptr(&ai, fsm1, s1); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->target += offset; + } + //-- index final states from @fsm2 + if (s2->is_final) { + gfsm_automaton_set_final_state_full(fsm1, id2+offset, TRUE, gfsm_automaton_get_final_weight(fsm2, id2)); + } + //-- maybe sort new arcs + if (sortdata.mask != gfsmASMNone + && (fsm2->flags.sort_mode != sortdata.mask + || (sortdata.mask == gfsmASMWeight && fsm2->sr->type != fsm1->sr->type))) + { + s1->arcs = gfsm_arclist_sort(s1->arcs, &sortdata); + } + } + + //-- re-instate "smart" arc-insertion + fsm1->flags.sort_mode = sortdata.mask; + + //-- add epsilon arc to translated root(fsm2) in fsm1 + gfsm_automaton_add_arc(fsm1, + fsm1->root_id, + offset + fsm2->root_id, + gfsmEpsilon, + gfsmEpsilon, + fsm1->sr->one); + + return fsm1; +} + + +/*-------------------------------------------------------------- + * dummy() + */ +gfsmAutomaton *gfsm_automaton_dummy(gfsmAutomaton *fsm) +{ + g_assert_not_reached(); /*-- NOT gfsm_assert_not_reached() ! */ + return fsm; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmAlgebra.h b/gfsm/gfsm/src/libgfsm/gfsmAlgebra.h new file mode 100644 index 0000000..301bec1 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAlgebra.h @@ -0,0 +1,559 @@ + +/*=============================================================================*\ + * File: gfsmAlgebra.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmAlgebra.h + * \brief Algebraic operations on automata + */ + +#ifndef _GFSM_ALGEBRA_H +#define _GFSM_ALGEBRA_H + +#include <gfsmStateSet.h> +#include <gfsmCompound.h> +#include <gfsmArcIter.h> +#include <gfsmArcIndex.h> + +/*====================================================================== + * Methods: algebra + */ + +//------------------------------ +///\name Closure (self-concatenation) +//@{ +/** Compute transitive or reflexive-\&-transitive + * closure of \a fsm. + * \note Destructively alters \a fsm . + * + * \param fsm Automaton + * \param is_plus Which type of closure to compute: + * \arg TRUE transitive closure + * \arg FALSE reflexive \& transitive closure + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_closure(gfsmAutomaton *fsm, gboolean is_plus); + +/* Final-state pre-traversal utility for \c closure(fsm). */ +//gboolean gfsm_automaton_closure_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm); + +/** Compute \a n-ary closure of \a fsm. + * \note Destructively alters \a fsm. + * + * \param fsm Automaton + * \param n Number of repetitions + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_n_closure(gfsmAutomaton *fsm, guint n); +//@} + + +//------------------------------ +///\name Complementation and Completion +//@{ +/** + * Compute the lower-side complement of \a fsm with respect to its own lower alphabet. + * \note Destructively alters \a fsm + * + * \param fsm Acceptor + * \returns \a fsm. + */ +gfsmAutomaton *gfsm_automaton_complement(gfsmAutomaton *fsm); + +/** + * Compute the lower-side complement of \a fsm with respect to the alphabet \a alph, + * which should contain all of the lower-labels from \a fsm. + * \note Destructively alters \a fsm. + * + * \param fsm Acceptor + * \param alph Alphabet with respect to which to compute complement. + * \returns \a fsm + */ +gfsmAutomaton *gfsm_automaton_complement_full(gfsmAutomaton *fsm, gfsmAlphabet *alph); + +/** + * Complete the lower side of automaton \a fsm with respect to the alphabet \a alph + * by directing "missing" arcs to the (new) state with id \a *sink. + * \note Destructively alters \a fsm. + * + * \param fsm Acceptor + * \param alpha Alphabet with respect to which \a fsm is to be completed + * \param sinkp Pointer to a variable which on completion contains the Id of a (new) non-final sink state + * \returns \a fsm + */ +gfsmAutomaton *gfsm_automaton_complete(gfsmAutomaton *fsm, + gfsmAlphabet *alph, + gfsmStateId *sinkp); +//@} + +//------------------------------ +///\name Composition +//@{ + +/** Compute the composition of \a fsm1 with \a fsm2 + * (upper-side of \a fsm1 intersection with lower-side of \a fsm2). + * + * \param fsm1 Lower-middle transducer + * \param fsm2 Middle-upper transducer + * \returns Altered \a fsm1 + * + * \note + * \li Pseudo-destructive on \a fsm1. + * \li Runtime efficiency can be greatly improved if + * \a fsm1 is sorted on upper labels (::gfsmASMUpper) + * and \a fsm2 is sorted on lower labels (::gfsmASMLower). + * + * \sa gfsm_automaton_compose_full() + */ +gfsmAutomaton *gfsm_automaton_compose(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2); + + +/** Compute the composition of two transducers \a fsm1 and \a fsm2 + * into the transducer \a composition. + * + * \param fsm1 Lower-middle transducer + * \param fsm2 Middle-upper transducer + * \param composition Lower-upper transducer. May be passed as NULL to create a new automaton. + * \param spenum + * Mapping from (\a fsm1,\a fsm2,\a filter) ::gfsmComposeState s to \a composition (::gfsmStateId)s, + * if it is passed as \a NULL, a temporary enum will be created and freed. + * + * \sa Mohri, Pereira, and Riley (1996) "Weighted Automata in Text and Speech Processing", + * Proc. ECAI '96, John Wiley & Sons, Ltd. + * + * \returns \a composition + */ +gfsmAutomaton *gfsm_automaton_compose_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *composition, + gfsmComposeStateEnum *spenum); + +typedef guint32 gfsmComposeFlags; /**< flags for gfsm_automaton_compose_visit_state_() */ + +/** \brief Enum type for low-level flags to gfsm_automaton_compose_visit_state_() */ +typedef enum { + gfsmCFEfsm1NeedsArcSort = 0x1, + gfsmCFEfsm2NeedsArcSort = 0x2 +} gfsmComposeFlagsE; + +/** Guts for gfsm_automaton_compose() \returns (new) StateId for \a sp */ +gfsmStateId gfsm_automaton_compose_visit_(gfsmComposeState sp, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *fsm, + gfsmComposeStateEnum *spenum, + gfsmComposeFlags flags); +//@} + +//------------------------------ +///\name Concatenation + +/** Append \a fsm2 onto the end of \a fsm1 \a n times. + * \note Destructively alters \a fsm1. + * + * \param fsm1 Automaton + * \param fsm2 Automaton + * \returns \a fsm1 + */ +gfsmAutomaton *gfsm_automaton_n_concat(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2, guint n); + +/** Append \a _fsm2 onto the end of \a fsm1. + * \note Destructively alters \a fsm1. + * + * \param fsm1 Automaton + * \param fsm2 Automaton + * \returns \a fsm1 + */ +gfsmAutomaton *gfsm_automaton_concat(gfsmAutomaton *fsm1, gfsmAutomaton *_fsm2); + +/* Final-state pre-traversal utility for \a concat(fsm,fsm2). + * + * \note Assumes \a fsm->root_id has been temporarily set to the translated gfsmStateId + * of \a fsm2->root_id. + * + * \param pw final ::gfsmWeight encoded as a gpointer (e.g. with gfsm_weight2ptr()) + * \param fsm concatenation first argument / return value + * \returns FALSE + */ +//gboolean gfsm_automaton_concat_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm); + +//@} + +//------------------------------ +///\name Co-Accessibility & Pruning +//@{ + +/** + * Remove non-coaccessible states from \a fsm. + * Calls gfsm_automaton_connect_fw() and gfsm_automaton_connect_bw() + * \note Destructively alters \a fsm + * + * \param fsm Automaton + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_connect(gfsmAutomaton *fsm); + +//------------------------------ +/** + * Remove non root-accessible states from \a fsm. + * Called by gfsm_automaton_connect() + * + * \note Destructively alters \a fsm + * + * \param fsm Automaton + * \param visited + * Bit-vector for traversal. Should have all bits set to zero. + * If passed as NULL, a new bit-vector will be created and freed. + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_connect_fw(gfsmAutomaton *fsm, gfsmBitVector *visited); + +//------------------------------ +/** + * Remove non-finalizable states from \a fsm. + * Called by connect(). + * \note Destructively alters \a fsm + * + * \param fsm Automaton + * \param rarcs + * Reverse arc-index as returned by gfsm_automaton_reverse_arc_index(). + * If passed as NULL, gfsm_automaton_reverse_arc_index() will be called + * on a temporary arc index. + * \param finalizable + * Bit-vector for traversal. Should have all bits set to zero. + * If passed as NULL, a new bit-vector will be created and freed. + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_connect_bw(gfsmAutomaton *fsm, + gfsmReverseArcIndex *rarcs, + gfsmBitVector *finalizable); + +//------------------------------ +/** + * Utility for gfsm_automaton_connect_fw() and gfsm_automaton_connect_bw(). + * Prunes states from \a fsm whose id bit is not set in \a want + * + * \param fsm Automaton + * \param wanted + * Bit-vector indexed by state id: bit \a id should be unset + * iff state \a id is to be removed. + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_prune_states(gfsmAutomaton *fsm, gfsmBitVector *wanted); + +//@} + +//------------------------------ +///\name Determinization +//@{ + +/** Utility for \a gfsm_automaton_determinize(). */ +void gfsm_determinize_visit_state_(gfsmAutomaton *nfa, gfsmAutomaton *dfa, + gfsmStateSet *nfa_ec, gfsmStateId dfa_id, + gfsmEnum *ec2id); + +/** Determinize acceptor \a fsm + * + * - Pseudo-destructive on \a fsm + * - Epsilon is treated like any other symbol. + * - Arc labels are treated as (input,output) pairs for purposes + * of state-equivalence-class construction: this may not be what you want. + * . + * + * \param fsm Automaton + * \returns altered \a fsm + * + * \sa gfsm_automaton_determinize_full() + */ +gfsmAutomaton *gfsm_automaton_determinize(gfsmAutomaton *fsm); + +/** Alias for gfsm_automaton_determinize() */ +#define gfsm_automaton_determinise(fsm) gfsm_automaton_determinize(fsm) + +/** Alias for gfsm_automaton_determinize_full() */ +#define gfsm_automaton_determinise_full(nfa,dfa) gfsm_automaton_determinize_full((nfa),(dfa)) + +/** Determinize automaton \a nfa to \a dfa. + * - Epsilon is treated like any other symbol. + * - Arc labels are treated as (input,output) pairs. + * . + * + * \param nfa non-deterministic acceptor + * \param dfa deterministic acceptor to be constructed + * May be passed as \c NULL to create a new automaton. + * \returns \a dfa + */ +gfsmAutomaton *gfsm_automaton_determinize_full(gfsmAutomaton *nfa, gfsmAutomaton *dfa); + +//@} + +//------------------------------ +///\name Difference +//@{ + +/** Remove language of acceptor \a fsm2 from acceptor \a fsm1. + * + * - Pseudo-destructively alters \a fsm1. + * - Really just an alias for intersect_full(fsm1,fsm2,NULL) + * . + * + * \param fsm1 Acceptor + * \param fsm2 Acceptor + * \returns \a fsm1 + */ +gfsmAutomaton *gfsm_automaton_difference(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2); + +/** Compute difference of acceptors (\a fsm1 - \a fsm2) into acceptor \a diff- + * + * \note Really just an alias for intersect_full(fsm1,complement(clone(fsm2)),diff). + * + * \param fsm1 Acceptor + * \param fsm2 Acceptor + * \param diff Output (difference) acceptor, + * may be passed as \c NULL to implicitly create a new automaton. + * + * \returns (possibly new) difference automaton \a diff + */ +gfsmAutomaton *gfsm_automaton_difference_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *diff); + +//@} + +//------------------------------ +///\name Intersection +//@{ +/** Compute the intersection of two acceptors \a fsm1 and \a fsm2 (lower-side intersection). + * \note Pseudo-destructive on \a fsm1. + * + * \param fsm1 Acceptor + * \param fsm2 Acceptor + * \returns \a fsm1. + */ +gfsmAutomaton *gfsm_automaton_intersect(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2); + +/** Compute the intersection of two acceptors \a fsm1 and \a fsm2 + * into the acceptor \a intersect. + * + * \param fsm1 Acceptor + * \param fsm2 Acceptor + * \param intersect Output acceptor, may be \c NULL to create a new automaton. + * \param spenum Mapping from (\a fsm1,\a fsm2) (::gfsmStatePair)s to \a intersect (::gfsmStateId)s, + * may be NULL to use a temporary mapping. + * \returns \a intersect. + */ +gfsmAutomaton *gfsm_automaton_intersect_full(gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *intersect, + gfsmStatePairEnum *spenum); + +/** Guts for gfsm_automaton_intersect() + * \returns (new) ::gfsmStateId for \a sp + */ +gfsmStateId gfsm_automaton_intersect_visit_(gfsmStatePair sp, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2, + gfsmAutomaton *fsm, + gfsmStatePairEnum *spenum, + gfsmComposeFlags flags); +//@} + + +//------------------------------ +///\name Inversion & Projection +//@{ +/** Invert upper and lower labels of an automaton. + * \note Destructively alters \a fsm. + * + * \param fsm Transducer + * \returns \a fsm + */ +gfsmAutomaton *gfsm_automaton_invert(gfsmAutomaton *fsm); + +//------------------------------ +/** Project one "side" (lower or upper) of \a fsm + * \note Destructively alters \a fsm + * + * \param fsm Transducer + * \param which Which label side to project. + * \arg gfsmLSLower project lower side + * \arg gfsmLSUpper project upper side (default) + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_project(gfsmAutomaton *fsm, gfsmLabelSide which); + +//@} + +//------------------------------ +///\name Optionality +//@{ +/** Make \a fsm optional. + * \note Destructively alters \a fsm + * + * \param fsm Automaton + * \returns \a modified fsm + */ +gfsmAutomaton *gfsm_automaton_optional(gfsmAutomaton *fsm); + +//@} + +//------------------------------ +///\name Cartesian Product +//@{ +/** Compute Cartesian product of acceptors \a fsm1 and \a fsm2. + * \note Destructively alters \a fsm1. + * + * \param fsm1 Acceptor (lower) + * \param fsm2 Acceptor (upper) + * \returns \a fsm1 (transducer) + * + * \sa gfsm_automaton_product2() + */ +gfsmAutomaton *gfsm_automaton_product(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2); + +/** Compute Cartesian product of acceptors \a fsm1 and \a fsm2. + * \note Destructively alters \a fsm1 \b and \a fsm2. + * + * \param fsm1 Acceptor (lower) + * \param fsm2 Acceptor (upper) + * \returns \a fsm1 + */ +gfsmAutomaton *gfsm_automaton_product2(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2); + +/* Backwards-compatible alias for gfsm_automaton_product2() [DISABLED] */ +//#define _gfsm_automaton_product gfsm_automaton_product2 + +//@} + + +//------------------------------ +///\name Replacement +//@{ +/** Replace label-pair \a (lo,hi) with \a fsm2 in \a fsm1 . + * \note Destructively alters \a fsm. + * + * \param fsm1 Automaton + * \param lo lower label or gfsmNoLabel to ignore lower labels + * \param hi upper label or gfsmNoLabel to ignore upper labels + * \returns modified \a fsm1 + */ +gfsmAutomaton *gfsm_automaton_replace(gfsmAutomaton *fsm1, gfsmLabelVal lo, gfsmLabelVal hi, gfsmAutomaton *fsm2); + +/** Insert automaton \a fsm2 into \a fsm1 between states \a q1from and \a q1to with weight \a w. + * \note Destructively alters \a fsm1. + * + * \param fsm1 Automaton into which \a fsm2 is inserted + * \param fsm2 Automaton to be inserted + * \param q1from Source state for inserted automaton in \a fsm1 + * \param q1to Final state for inserted automaton in \a fsm1 + * \param w Weight to add to final arcs for translated \a fsm2 in \a fsm1 + * \returns modified \a fsm1 + */ +gfsmAutomaton *gfsm_automaton_insert_automaton(gfsmAutomaton *fsm1, + gfsmStateId q1from, + gfsmStateId q1to, + gfsmAutomaton *fsm2, + gfsmWeight w); + +//@} + + +//------------------------------ +///\name Reversal +//@{ +/** Reverse an automaton \a fsm. + * \note Destructively alters \a fsm. + * + * \param fsm Automaton + * \returns \a fsm + */ +gfsmAutomaton *gfsm_automaton_reverse(gfsmAutomaton *fsm); + +//@} + +//------------------------------ +///\name Epsilon Removal +//@{ +/** + * Remove epsilon arcs from \a fsm. + * - Destructively alters \a fsm. + * - Multiple epsilon-paths between two states may not be weighted correctly in the output automaton. + * . + * + * \warning negative-cost epsilon cycles in \a fsm will cause infinite recursion! + * + * \param fsm Automaton + * \returns \a fsm + */ +gfsmAutomaton *gfsm_automaton_rmepsilon(gfsmAutomaton *fsm); + +/** Pass-1 guts for gfsm_automaton_rmepsilon(): populates the mapping \a sp2wh + * with state-pairs (qid_noeps,qid_eps)=>weight for all + * \a qid_eps epsilon-reachable from \a qid_noeps in \a fsm + */ +void gfsm_automaton_rmeps_visit_state_(gfsmAutomaton *fsm, + gfsmStateId qid_noeps, + gfsmStateId qid_eps, + gfsmWeight weight_eps, + gfsmStatePair2WeightHash *sp2wh + ); + +/* Pass-2 for gfsm_automaton_rmepsilon(): arc-adoption iterator */ +//void gfsm_automaton_rmeps_pass2_foreach_func_(gfsmStatePair *sp, gpointer pw, gfsmAutomaton *fsm); +//@} + +//------------------------------ +///\name Alphabet Recognizer +//@{ +/** + * Make \a fsm an identity-transducer for alphabet \a abet + * \note Destructively alters \a fsm. + * + * \param fsm Automaton + * \returns \a fsm + */ +gfsmAutomaton *gfsm_automaton_sigma(gfsmAutomaton *fsm, gfsmAlphabet *abet); +//@} + +//------------------------------ +///\name Union +//@{ +/** Add the language or relation of \a fsm2 to \a fsm1. + * \note Destructively alters \a fsm1 + * + * \param fsm1 Automaton + * \param fsm2 Automaton + * \returns \a fsm1 + */ +gfsmAutomaton *gfsm_automaton_union(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2); +//@} + +/** \file gfsmAlgebra.h + * \todo sigma() + * \todo bestpath() + * \todo encode() ? + * \todo equiv() ? + * \todo minimize() ? + * \todo Regex compiler + * \todo deterministic union, tries + */ + +#endif /* _GFSM_ALGEBRA_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmAlphabet.c b/gfsm/gfsm/src/libgfsm/gfsmAlphabet.c new file mode 100644 index 0000000..078f176 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAlphabet.c @@ -0,0 +1,1026 @@ +/*=============================================================================*\ + * File: gfsmAlphabet.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: alphabet + * + * Copyright (c) 2004-2008 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmAlphabet.h> +#include <gfsmSet.h> +#include <gfsmUtils.h> +#include <gfsmError.h> + +#include <string.h> +#include <errno.h> +#include <ctype.h> +#include <stdlib.h> + +/*====================================================================== + * Constants + */ +gfsmUserAlphabetMethods gfsmUserAlphabetDefaultMethods = + { + NULL, //-- key_lookup + NULL, //-- lab_lookup + NULL, //-- insert + //NULL, //-- key_remove + NULL, //-- lab_remove + NULL, //-- key_read + NULL //-- key_write + }; + +/*====================================================================== + * Methods: Constructors + */ + +/*-------------------------------------------------------------- + * new() + */ +gfsmAlphabet *gfsm_alphabet_new(gfsmAType type) +{ + gfsmAlphabet *a=NULL; + switch (type) { + case gfsmATIdentity: + a = (gfsmAlphabet*)g_new0(gfsmIdentityAlphabet,1); + break; + case gfsmATPointer: + a = (gfsmAlphabet*)g_new0(gfsmPointerAlphabet,1); + break; + case gfsmATUser: + a = (gfsmAlphabet*)g_new0(gfsmUserAlphabet,1); + break; + case gfsmATString: + a = (gfsmAlphabet*)g_new0(gfsmStringAlphabet,1); + break; + case gfsmATUnknown: + case gfsmATRange: + default: + a = (gfsmAlphabet*)g_new0(gfsmRangeAlphabet,1); + break; + } + a->type = type; + a->lab_min = gfsmNoLabel; + a->lab_max = gfsmNoLabel; + return a; +} + +/*-------------------------------------------------------------- + * string_new() + */ +/*gfsmAlphabet *gfsm_string_alphabet_new(void) +{ + return gfsm_string_alphabet_init(g_new(gfsmStringAlphabet,1)); + } +*/ + +/*-------------------------------------------------------------- + * init() + */ +gfsmAlphabet *gfsm_alphabet_init(gfsmAlphabet *a) +{ + if (!a) return NULL; + + a->lab_min = gfsmNoLabel; + a->lab_max = gfsmNoLabel; + + switch (a->type) { + case gfsmATIdentity: + return gfsm_identity_alphabet_init((gfsmIdentityAlphabet*)a); + case gfsmATPointer: + return gfsm_pointer_alphabet_init((gfsmPointerAlphabet*)a,NULL,NULL,NULL,NULL); + case gfsmATUser: + return gfsm_user_alphabet_init((gfsmUserAlphabet*)a,NULL,NULL,NULL,NULL, + NULL,&gfsmUserAlphabetDefaultMethods); + case gfsmATString: + return gfsm_string_alphabet_init((gfsmStringAlphabet*)a,FALSE); + case gfsmATUnknown: + case gfsmATRange: + default: + break; + } + return a; +} + +/*-------------------------------------------------------------- + * range_init() + */ +gfsmAlphabet *gfsm_range_alphabet_init (gfsmRangeAlphabet *a, gfsmLabelVal min, gfsmLabelVal max) +{ + a->lab_min = min; + a->lab_max = max; + return a; +} + +/*-------------------------------------------------------------- + * identity_init() + */ +gfsmAlphabet *gfsm_identity_alphabet_init (gfsmIdentityAlphabet *a) +{ + gfsm_range_alphabet_init((gfsmRangeAlphabet*)a, gfsmNoLabel, gfsmNoLabel); + if (!a->labels) a->labels = gfsm_set_new(gfsm_uint_compare); + gfsm_set_clear(a->labels); + return (gfsmAlphabet*)a; +} + +/*-------------------------------------------------------------- + * pointer_init() + */ +gfsmAlphabet *gfsm_pointer_alphabet_init(gfsmPointerAlphabet *a, + gfsmAlphabetKeyDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_free_func) +{ + gfsm_range_alphabet_init((gfsmRangeAlphabet*)a,gfsmNoLabel,gfsmNoLabel); + + if (a->keys2labels) g_hash_table_destroy(a->keys2labels); + if (a->labels2keys) g_ptr_array_free(a->labels2keys,TRUE); + + a->keys2labels = g_hash_table_new_full(key_hash_func, key_equal_func, key_free_func, NULL); + a->labels2keys = g_ptr_array_new(); + a->key_dup_func = key_dup_func; + + return (gfsmAlphabet*)a; +} + +/*-------------------------------------------------------------- + * string_init() + */ +gfsmAlphabet *gfsm_string_alphabet_init(gfsmStringAlphabet *a, gboolean do_copy) +{ + if (do_copy) + return gfsm_pointer_alphabet_init(a, + (gfsmAlphabetKeyDupFunc)gfsm_alphabet_strdup, + g_str_hash, g_str_equal, g_free); + return gfsm_pointer_alphabet_init(a, NULL, g_str_hash, g_str_equal, NULL); +} + +/*-------------------------------------------------------------- + * user_init() + */ +gfsmAlphabet *gfsm_user_alphabet_init(gfsmUserAlphabet *a, + gfsmAlphabetKeyDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func, + gpointer user_data, + gfsmUserAlphabetMethods *methods) +{ + gfsm_pointer_alphabet_init((gfsmPointerAlphabet*)a, + key_dup_func, + key_hash_func, + key_equal_func, + key_destroy_func); + a->data = user_data; + a->methods = methods ? (*methods) : gfsmUserAlphabetDefaultMethods; + return (gfsmAlphabet*)a; +} + +/*-------------------------------------------------------------- + * clear() + */ +void gfsm_alphabet_clear(gfsmAlphabet *a) +{ + switch (a->type) { + case gfsmATUnknown: + case gfsmATRange: + break; + case gfsmATIdentity: + gfsm_set_clear(((gfsmIdentityAlphabet*)a)->labels); + break; + case gfsmATPointer: + case gfsmATString: + g_ptr_array_set_size(((gfsmPointerAlphabet*)a)->labels2keys,0); + g_hash_table_foreach_remove(((gfsmPointerAlphabet*)a)->keys2labels, gfsm_hash_clear_func, NULL); + break; + case gfsmATUser: + default: + gfsm_alphabet_foreach(a, gfsm_alphabet_foreach_remove_func, NULL); + g_ptr_array_set_size(((gfsmPointerAlphabet*)a)->labels2keys,0); + break; + } + + a->lab_min = gfsmNoLabel; + a->lab_max = gfsmNoLabel; +} + +/*-------------------------------------------------------------- + * gfsm_alphabet_foreach_remove_func() + */ +gboolean gfsm_alphabet_foreach_remove_func(gfsmAlphabet *a, + gpointer key, + gfsmLabelVal lab, + gpointer data) +{ + gfsm_alphabet_remove_label(a,lab); + return FALSE; +} + +/*-------------------------------------------------------------- + * free() + */ +void gfsm_alphabet_free(gfsmAlphabet *a) +{ + switch (a->type) { + case gfsmATIdentity: + gfsm_set_free(((gfsmIdentityAlphabet*)a)->labels); + g_free((gfsmIdentityAlphabet*)a); + return; + case gfsmATUser: + case gfsmATPointer: + case gfsmATString: + g_ptr_array_free(((gfsmPointerAlphabet*)a)->labels2keys,TRUE); + g_hash_table_destroy(((gfsmPointerAlphabet*)a)->keys2labels); + g_free((gfsmPointerAlphabet*)a); + return; + case gfsmATUnknown: + case gfsmATRange: + default: + break; + } + g_free(a); +}; + +/*====================================================================== + * Methods: Utilties + */ + +/*-------------------------------------------------------------- + * gfsm_alphabet_foreach() + */ +gboolean gfsm_alphabet_foreach (gfsmAlphabet *a, + gfsmAlphabetForeachFunc func, + gpointer data) +{ + gfsmLabelVal lab; + gpointer key; + gboolean rc=FALSE; + for (lab = a->lab_min; !rc && lab <= a->lab_max && lab < gfsmNoLabel; lab++) { + if ((key=gfsm_alphabet_find_key(a,lab))==gfsmNoKey) continue; + rc = (*func)(a,key,lab,data); + } + return rc; +} + +/*-------------------------------------------------------------- + * strdup() + */ +gpointer gfsm_alphabet_strdup(gfsmAlphabet *a, const gchar *str) +{ return g_strdup(str); } + +/*====================================================================== + * Methods: Accessors + */ + +/*-------------------------------------------------------------- + * size() + */ +gfsmLabelVal gfsm_alphabet_size(gfsmAlphabet *a) +{ + guint n=0; + switch (a->type) { + case gfsmATIdentity: + return gfsm_set_size(((gfsmIdentityAlphabet*)a)->labels); + case gfsmATUser: + gfsm_alphabet_foreach(a, (gfsmAlphabetForeachFunc)gfsm_alphabet_foreach_size_func, &n); + return (gfsmLabelVal)n; + case gfsmATPointer: + case gfsmATString: + return ((gfsmPointerAlphabet*)a)->labels2keys->len; + case gfsmATUnknown: + case gfsmATRange: + default: + return (a->lab_min != gfsmNoLabel && a->lab_max != gfsmNoLabel + ? (a->lab_max - a->lab_min) + : 0); + } + return n; +} + +/*-------------------------------------------------------------- + * foreach_size_func() + */ +gboolean gfsm_alphabet_foreach_size_func(gfsmAlphabet *a, + gpointer key, + gfsmLabelVal lab, + guint *np) +{ + if (key != gfsmNoKey && lab != gfsmNoLabel) ++(*np); + return FALSE; +} + +/*-------------------------------------------------------------- + * insert() + */ +gfsmLabelVal gfsm_alphabet_insert(gfsmAlphabet *a, gpointer key, gfsmLabelVal label) +{ + switch (a->type) { + + case gfsmATIdentity: + gfsm_set_insert(((gfsmIdentityAlphabet*)a)->labels, key); + label = (gfsmLabelVal) GPOINTER_TO_INT(key); + break; + + case gfsmATUser: + if (((gfsmUserAlphabet*)a)->methods.insert) { + if (((gfsmPointerAlphabet*)a)->key_dup_func) { + key = (*(((gfsmPointerAlphabet*)a)->key_dup_func))(((gfsmPointerAlphabet*)a),key); + } + label = (*(((gfsmUserAlphabet*)a)->methods.insert))((gfsmUserAlphabet*)a, key, label); + break; + } + + case gfsmATPointer: + case gfsmATString: + if (label==gfsmNoLabel) + label = ((gfsmPointerAlphabet*)a)->labels2keys->len; + + if (label >= ((gfsmPointerAlphabet*)a)->labels2keys->len) + g_ptr_array_set_size(((gfsmPointerAlphabet*)a)->labels2keys, label+1); + + if (((gfsmPointerAlphabet*)a)->key_dup_func) + key = (*(((gfsmPointerAlphabet*)a)->key_dup_func))(((gfsmPointerAlphabet*)a),key); + + g_ptr_array_index(((gfsmPointerAlphabet*)a)->labels2keys, label) = key; + g_hash_table_insert(((gfsmPointerAlphabet*)a)->keys2labels, key, GUINT_TO_POINTER(label)); + break; + + case gfsmATUnknown: + case gfsmATRange: + default: + break; + } + + //-- range + if (label != gfsmNoLabel) { + if (a->lab_min==gfsmNoLabel || label < a->lab_min) a->lab_min = label; + if (a->lab_max==gfsmNoLabel || label > a->lab_max) a->lab_max = label; + } + + return label; +} + +/*-------------------------------------------------------------- + * get_full() + */ +gfsmLabelVal gfsm_alphabet_get_full(gfsmAlphabet *a, gpointer key, gfsmLabelVal label) +{ + gfsmLabelVal l = gfsm_alphabet_find_label(a,key); + if (l != gfsmNoLabel) { + //-- old mapping exists + if (label == gfsmNoLabel) return l; //-- ... but no new mapping was requested + gfsm_alphabet_remove_label(a,l); + } + return gfsm_alphabet_insert(a,key,label); +} + +/*-------------------------------------------------------------- + * find_label() + */ +gfsmLabelVal gfsm_alphabet_find_label (gfsmAlphabet *a, gconstpointer key) +{ + gpointer k, l; + + switch (a->type) { + + case gfsmATIdentity: + if (gfsm_set_contains(((gfsmIdentityAlphabet*)a)->labels, key)) + return (gfsmLabelVal)GPOINTER_TO_UINT(key); + break; + + case gfsmATUser: + if (((gfsmUserAlphabet*)a)->methods.key_lookup) + return (*(((gfsmUserAlphabet*)a)->methods.key_lookup))((gfsmUserAlphabet*)a, key); + + case gfsmATPointer: + case gfsmATString: + if ( g_hash_table_lookup_extended(((gfsmPointerAlphabet*)a)->keys2labels, key, &k, &l) ) + return (gfsmLabelVal)GPOINTER_TO_UINT(l); + break; + + case gfsmATUnknown: + case gfsmATRange: + default: + return ( ((gfsmLabelVal)GPOINTER_TO_UINT(key)) >= a->lab_min + && + ((gfsmLabelVal)GPOINTER_TO_UINT(key)) <= a->lab_max + ? ((gfsmLabelVal)GPOINTER_TO_UINT(key)) + : gfsmNoLabel ); + } + + return gfsmNoLabel; +} + +/*-------------------------------------------------------------- + * find_key + */ +gpointer gfsm_alphabet_find_key(gfsmAlphabet *a, gfsmLabelVal label) +{ + switch (a->type) { + + case gfsmATIdentity: + if ( gfsm_set_contains(((gfsmIdentityAlphabet*)a)->labels, GUINT_TO_POINTER(label)) ) + return GUINT_TO_POINTER(label); + break; + + case gfsmATUser: + if (((gfsmUserAlphabet*)a)->methods.lab_lookup) + return (*(((gfsmUserAlphabet*)a)->methods.lab_lookup))((gfsmUserAlphabet*)a, label); + + case gfsmATPointer: + case gfsmATString: + if (label < ((gfsmPointerAlphabet*)a)->labels2keys->len) + return g_ptr_array_index(((gfsmPointerAlphabet*)a)->labels2keys,label); + break; + + case gfsmATUnknown: + case gfsmATRange: + default: + if (label >= a->lab_min && label <= a->lab_max) + return GUINT_TO_POINTER(label); + } + + return gfsmNoKey; +} + +/*-------------------------------------------------------------- + * get_key() + */ +gpointer gfsm_alphabet_get_key(gfsmAlphabet *a, gfsmLabelVal label) +{ + gpointer key; + if (label == gfsmNoLabel) return gfsmNoKey; + + key = gfsm_alphabet_find_key(a,label); + if (key != gfsmNoKey) return key; + gfsm_alphabet_get_full(a, gfsmNoKey, label); + + return gfsmNoKey; +} + +/*-------------------------------------------------------------- + * remove_key() + */ +void gfsm_alphabet_remove_key(gfsmAlphabet *a, gconstpointer key) +{ + gfsmLabelVal label; + + switch (a->type) { + + case gfsmATIdentity: + gfsm_set_remove(((gfsmIdentityAlphabet*)a)->labels, key); + break; + + case gfsmATUser: + if (((gfsmUserAlphabet*)a)->methods.lab_remove) { + label = gfsm_alphabet_find_label(a,key); + (*(((gfsmUserAlphabet*)a)->methods.lab_remove))((gfsmUserAlphabet*)a, label); + break; + } + + case gfsmATPointer: + case gfsmATString: + label = gfsm_alphabet_find_label(a,key); + g_hash_table_remove(((gfsmPointerAlphabet*)a)->keys2labels,key); + if (label != gfsmNoLabel && label < ((gfsmPointerAlphabet*)a)->labels2keys->len) { + g_ptr_array_index(((gfsmPointerAlphabet*)a)->labels2keys, label) = NULL; + } + break; + + case gfsmATUnknown: + case gfsmATRange: + default: + break; + } + + //-- ranges + //(missing) +} + +/*-------------------------------------------------------------- + * remove_label() + */ +void gfsm_alphabet_remove_label(gfsmAlphabet *a, gfsmLabelVal label) +{ + gpointer key; + + switch (a->type) { + case gfsmATIdentity: + gfsm_set_remove(((gfsmIdentityAlphabet*)a)->labels, GUINT_TO_POINTER(label)); + break; + + case gfsmATUser: + if (((gfsmUserAlphabet*)a)->methods.lab_remove) { + (*(((gfsmUserAlphabet*)a)->methods.lab_remove))((gfsmUserAlphabet*)a, label); + break; + } + + case gfsmATPointer: + case gfsmATString: + if (label < ((gfsmPointerAlphabet*)a)->labels2keys->len) { + key = g_ptr_array_index(((gfsmPointerAlphabet*)a)->labels2keys,label); + g_ptr_array_index(((gfsmPointerAlphabet*)a)->labels2keys,label) = NULL; + g_hash_table_remove(((gfsmPointerAlphabet*)a)->keys2labels,key); + } + break; + + case gfsmATUnknown: + case gfsmATRange: + default: + break; + } +} + +/*-------------------------------------------------------------- + * union() + */ +gfsmAlphabet *gfsm_alphabet_union(gfsmAlphabet *a1, gfsmAlphabet *a2) +{ + gfsm_alphabet_foreach(a2, (gfsmAlphabetForeachFunc)gfsm_alphabet_foreach_union_func, a1); + return a1; +} + +/*-------------------------------------------------------------- + * union_func() + */ +gboolean gfsm_alphabet_foreach_union_func(gfsmAlphabet *src, + gpointer src_key, + gfsmLabelVal src_id, + gfsmAlphabet *dst) +{ + gfsm_alphabet_get_label(dst,src_key); + return FALSE; +} + +/*-------------------------------------------------------------- + * gfsm_alphabet_labels_to_array_func() + */ +gboolean gfsm_alphabet_labels_to_array_func(gfsmAlphabet *alph, + gpointer key, + gfsmLabelVal lab, + GPtrArray *ary) +{ + //g_array_append_val(ary, lab); + g_ptr_array_add(ary, GUINT_TO_POINTER(lab)); + return FALSE; +} + +/*-------------------------------------------------------------- + * gfsm_alphabet_labels_to_array() + */ +void gfsm_alphabet_labels_to_array(gfsmAlphabet *alph, GPtrArray *ary) +{ + gfsm_alphabet_foreach(alph, + (gfsmAlphabetForeachFunc)gfsm_alphabet_labels_to_array_func, + ary); + //return ary; +} + +/*====================================================================== + * Methods: I/O + */ + +/*-------------------------------------------------------------- + * string2key() + */ +gpointer gfsm_alphabet_string2key(gfsmAlphabet *a, GString *gstr) +{ + gpointer key=NULL; + + switch (a->type) { + + case gfsmATUser: + if (((gfsmUserAlphabet*)a)->methods.key_read) { + key = (*(((gfsmUserAlphabet*)a)->methods.key_read))((gfsmUserAlphabet*)a, gstr); + break; + } + + case gfsmATPointer: + case gfsmATString: + key = gstr->str; + break; + + case gfsmATUnknown: + case gfsmATRange: + case gfsmATIdentity: + default: + key = (gpointer)strtol(gstr->str,NULL,10); + break; + } + return key; +} + +/*-------------------------------------------------------------- + * key2string() + */ +void gfsm_alphabet_key2string(gfsmAlphabet *a, gpointer key, GString *gstr) +{ + switch (a->type) { + + case gfsmATUser: + if (((gfsmUserAlphabet*)a)->methods.key_write) { + (*(((gfsmUserAlphabet*)a)->methods.key_write))((gfsmUserAlphabet*)a, key, gstr); + break; + } + + case gfsmATPointer: + //-- ? + case gfsmATString: + g_string_assign(gstr,key); + break; + + case gfsmATUnknown: + case gfsmATRange: + case gfsmATIdentity: + default: + g_string_printf(gstr,"%u", GPOINTER_TO_UINT(key)); + break; + } +} + +/*-------------------------------------------------------------- + * load_handle() + */ +gboolean gfsm_alphabet_load_handle (gfsmAlphabet *a, gfsmIOHandle *ioh, gfsmError **errp) +{ + int c; + gpointer key; + gfsmLabelVal label; + GString *s_key = g_string_new(""); + GString *s_lab = g_string_new(""); + + //if (!myname) myname = "gfsm_string_alphabet_load_file()"; + + do { + g_string_truncate(s_key,0); + g_string_truncate(s_lab,0); + + //-- read data fields into temp strings + for (c=gfsmio_getc(ioh); !gfsmio_eof(ioh) && isspace((char)c); c=gfsmio_getc(ioh)) ; + if (gfsmio_eof(ioh)) break; + + for (g_string_append_c(s_key,c), c=gfsmio_getc(ioh); + !gfsmio_eof(ioh) && !isspace((char)c); + c=gfsmio_getc(ioh)) + { + g_string_append_c(s_key,c); + } + + for ( ; !gfsmio_eof(ioh) && isspace((char)c); c=gfsmio_getc(ioh)) ; + if (gfsmio_eof(ioh)) break; + + for (g_string_append_c(s_lab,c), c=gfsmio_getc(ioh); + !gfsmio_eof(ioh) && !isspace((char)c); + c=gfsmio_getc(ioh)) + { + g_string_append_c(s_lab,c); + } + + for ( ; (char)c != '\n' && !gfsmio_eof(ioh); c=gfsmio_getc(ioh) ) ; + + //-- get actual key and label + key = gfsm_alphabet_string2key(a,s_key); + label = strtol(s_lab->str, NULL, 10); + if (gfsm_alphabet_find_label(a,key) != label) { + gfsm_alphabet_remove_key(a, key); + gfsm_alphabet_insert(a, key, label); + } + } while (!gfsmio_eof(ioh)); + + //-- cleanup + g_string_free(s_key,TRUE); + g_string_free(s_lab,TRUE); + return TRUE; +} + + +/*-------------------------------------------------------------- + * load_file() + */ +gboolean gfsm_alphabet_load_file (gfsmAlphabet *a, FILE *f, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_zfile(f,"rb",-1); + gboolean rc = gfsm_alphabet_load_handle(a, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * load_filename() + */ +gboolean gfsm_alphabet_load_filename (gfsmAlphabet *a, const gchar *filename, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename, "rb", -1, errp); + gboolean rc = ioh && !(*errp) && gfsm_alphabet_load_handle(a, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + + + +/*-------------------------------------------------------------- + * save_handle() + */ +gboolean gfsm_alphabet_save_handle(gfsmAlphabet *a, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmAlphabetSaveFileData sfdata; + gboolean rc; + sfdata.ioh = ioh; + sfdata.errp = errp; + sfdata.gstr = g_string_new(""); + sfdata.field_sep = "\t"; + sfdata.record_sep = "\n"; + + //-- guts + rc = gfsm_alphabet_foreach(a, (gfsmAlphabetForeachFunc)gfsm_alphabet_save_file_func, &sfdata); + + //-- cleanup + g_string_free(sfdata.gstr,TRUE); + + return !rc; +} + + +/*-------------------------------------------------------------- + * save_file_full() + */ +gboolean gfsm_alphabet_save_file_full(gfsmAlphabet *a, FILE *f, int zlevel, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_zfile(f,"wb",zlevel); + gboolean rc = ioh && !(*errp) && gfsm_alphabet_save_handle(a, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * save_file() + */ +gboolean gfsm_alphabet_save_file(gfsmAlphabet *a, FILE *f, gfsmError **errp) +{ + return gfsm_alphabet_save_file_full(a,f,0,errp); +} + + +/*-------------------------------------------------------------- + * save_filename_full() + */ +gboolean gfsm_alphabet_save_filename_full (gfsmAlphabet *a, const gchar *filename, int zlevel, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename,"wb",zlevel,errp); + gboolean rc = ioh && !(*errp) && gfsm_alphabet_save_handle(a, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * save_filename() + */ +gboolean gfsm_alphabet_save_filename (gfsmAlphabet *a, const gchar *filename, gfsmError **errp) +{ + return gfsm_alphabet_save_filename_full(a,filename,0,errp); +} + + +/*-------------------------------------------------------------- + * save_file_func() + */ +gboolean gfsm_alphabet_save_file_func(gfsmAlphabet *a, + gpointer key, + gfsmLabelVal lab, + gfsmAlphabetSaveFileData *sfdata) +{ + gfsm_alphabet_key2string(a,key,sfdata->gstr); + gfsmio_printf(sfdata->ioh, + "%s%s%u%s", + sfdata->gstr->str, sfdata->field_sep, lab, sfdata->record_sep); + return (sfdata->errp && *(sfdata->errp)); +} + + + +/*====================================================================== + * Methods: String Alphabet Utilities + */ + +/*-------------------------------------------------------------- + * gfsm_alphabet_string_to_labels() + */ +gfsmLabelVector *gfsm_alphabet_string_to_labels(gfsmAlphabet *abet, + const gchar *str, + gfsmLabelVector *vec, + gboolean warn_on_undefined) +{ + gfsmLabelVal lab; + const gchar *s = str; + gchar cs[2] = {0,0}; + + //-- setup vector + if (vec==NULL) { + vec = g_ptr_array_sized_new(strlen(str)); + } else { + g_ptr_array_set_size(vec, 0); + } + + for (; *s; s++) { + cs[0] = *s; + lab = gfsm_alphabet_find_label(abet, cs); + + //-- check for non-existant labels + if (lab==gfsmNoLabel) { + if (warn_on_undefined) { + gfsm_carp(g_error_new(g_quark_from_static_string("gfsm"), //--domain + g_quark_from_static_string("gfsm_alphabet_string_to_labels"), //-- code + "Warning: unknown character '%c' in string '%s' -- skipping.", + *s, str)); + } + continue; + } + + g_ptr_array_add(vec, GUINT_TO_POINTER(lab)); + } + + return vec; +} + +/*-------------------------------------------------------------- + * gfsm_alphabet_att_string_to_labels() + */ +gfsmLabelVector *gfsm_alphabet_att_string_to_labels(gfsmAlphabet *abet, + const gchar *str, + gfsmLabelVector *vec, + gboolean warn_on_undefined) +{ + gfsmLabelVal lab; + const gchar *s = str; + GString *gs = g_string_sized_new(4); + gchar mode = 0; + + //-- setup vector + if (vec==NULL) { + vec = g_ptr_array_sized_new(strlen(str)); + } else { + g_ptr_array_set_size(vec, 0); + } + + //-- loop(str): beginning of next symbol + for (; *s; s++) { + switch (mode) { + case '[': + //-- bracket-escape mode + if (*s==']') { mode=0; } + else { g_string_append_c(gs,*s); continue; } + break; + + case '\\': + //-- backslash-escape mode + g_string_append_c(gs,*s); + mode = 0; + break; + + default: + case 0: + //-- outer (unescaped) mode + if (*s == '[') { mode='['; continue; } + else if (*s == '\\') { mode='\\'; continue; } + else if (isspace(*s)) { continue; } //-- ignore spaces + //-- plain single-character symbol: set key-string + g_string_append_c(gs,*s); + break; + } + + //-- lookup key + lab = gfsm_alphabet_find_label(abet, gs->str); + + //-- check for non-existant labels + if (lab==gfsmNoLabel) { + if (warn_on_undefined) { + gfsm_carp(g_error_new(g_quark_from_static_string("gfsm"), //--domain + g_quark_from_static_string("gfsm_alphabet_att_string_to_labels"), //-- code + "Warning: unknown symbol [%s] in string '%s' -- skipping.", + gs->str, str)); + } + g_string_truncate(gs,0); + continue; + } + + //-- add to vector + g_ptr_array_add(vec, GUINT_TO_POINTER(lab)); + g_string_truncate(gs,0); + } + + //-- cleanup + g_string_free(gs,TRUE); + + return vec; +} + +/*-------------------------------------------------------------- + * gfsm_alphabet_generic_string_to_labels() + */ +gfsmLabelVector *gfsm_alphabet_generic_string_to_labels(gfsmAlphabet *abet, + const gchar *str, + gfsmLabelVector *vec, + gboolean warn_on_undefined, + gboolean att_mode) +{ + return (att_mode + ? gfsm_alphabet_att_string_to_labels(abet,str,vec,warn_on_undefined) + : gfsm_alphabet_string_to_labels(abet,str,vec,warn_on_undefined)); +} + + + +/*-------------------------------------------------------------- + * gfsm_alphabet_labels_to_gstring() + */ +GString *gfsm_alphabet_labels_to_gstring(gfsmAlphabet *abet, + gfsmLabelVector *vec, + GString *gstr, + gboolean warn_on_undefined, + gboolean att_style) +{ + gfsmLabelVal lab; + const gchar *sym; + int i; + + //-- setup GString + if (gstr==NULL) { + gstr = g_string_new_len("",vec->len); + } + + //-- lookup & append symbols + for (i=0; i < vec->len; i++) { + lab = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(vec,i)); + sym = (const gchar*)gfsm_alphabet_find_key(abet,lab); + + //-- check for unknown labels + if (sym==NULL) { + if (warn_on_undefined) { + gfsm_carp(g_error_new(g_quark_from_static_string("gfsm"), //--domain + g_quark_from_static_string("gfsm_alphabet_labels_to_gstring"), //-- code + "Warning: unknown label '%d' -- skipping.", + lab)); + } + continue; + } + + //-- append the symbol to the output string + if (att_style) { + if (strlen(sym)==1) { + g_string_append_c(gstr,sym[0]); + } + else { + g_string_append_c(gstr,'['); + g_string_append(gstr,sym); + g_string_append_c(gstr,']'); + } + } else { //-- !att_style + if (i != 0) g_string_append_c(gstr,' '); + g_string_append(gstr, sym); + } + } + + return gstr; +} + +/*-------------------------------------------------------------- + * gfsm_alphabet_labels_to_string() + */ +char *gfsm_alphabet_labels_to_string(gfsmAlphabet *abet, + gfsmLabelVector *vec, + gboolean warn_on_undefined, + gboolean att_style) +{ + GString *gstr = g_string_new(""); + gfsm_alphabet_labels_to_gstring(abet,vec,gstr,warn_on_undefined,att_style); + char *str = gstr->str; + g_string_free(gstr,FALSE); + return str; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmAlphabet.h b/gfsm/gfsm/src/libgfsm/gfsmAlphabet.h new file mode 100644 index 0000000..5ae80cd --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAlphabet.h @@ -0,0 +1,450 @@ + +/*=============================================================================*\ + * File: gfsmAlphabet.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: alphabet + * + * Copyright (c) 2004-2008 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmAlphabet.h + * \brief Map between gfsmLabelIds and external objects + */ + +#ifndef _GFSM_ALPHABET_H +#define _GFSM_ALPHABET_H + +#include <gfsmCommon.h> +#include <gfsmSet.h> +#include <gfsmIO.h> + +/*====================================================================== + * Alphabet: Flags + */ +/** Enumeration of builtin alphabet types */ +typedef enum { + gfsmATUnknown = 0, ///< unknown alphabet type + gfsmATRange = 1, ///< alphabet type for label ranges + gfsmATIdentity = 2, ///< alphabet type for sparse identity alphabets + gfsmATPointer = 3, ///< pointer-hashing alphabet + gfsmATString = 4, ///< string alphabet + gfsmATUser = 256 ///< user-defined alphabet +} gfsmAType; + +/*====================================================================== + * Alphabet: Basic Types + */ +/// Generic alphabet structure +typedef struct { + gfsmAType type; /**< alphabet type */ + gfsmLabelVal lab_min; /**< minimum label */ + gfsmLabelVal lab_max; /**< maximum label */ +} gfsmAlphabet; + +/// Ranged alphabet structure +typedef gfsmAlphabet gfsmRangeAlphabet; + +/// Sparse identity alphabet structure +typedef struct { + gfsmAlphabet a; /**< inheritance magic */ + gfsmSet *labels; /**< known labels */ +} gfsmIdentityAlphabet; + +// Pointer-hashing alphabet structure (forward decl) +struct gfsmPointerAlphabet_; + +// User-extendable alphabet structure (forward decl) +struct gfsmUserAlphabet_; + +//@} + +/*====================================================================== + * Alphabet: Function types + */ +///\name Alphabet: Function Types +//@{ +/** Type for key-duplication functions */ +typedef gpointer (*gfsmAlphabetKeyDupFunc) (struct gfsmPointerAlphabet_ *a, gpointer key); + +/** Type for alphabet (key->label) lookup functions */ +typedef gfsmLabelVal (*gfsmAlphabetKeyLookupFunc) (struct gfsmUserAlphabet_ *a, gconstpointer key); + +/** Type for alphabet (label->key) lookup functions */ +typedef gpointer (*gfsmAlphabetLabLookupFunc) (struct gfsmUserAlphabet_ *a, gfsmLabelVal lab); + +/** Type for alphabet insertion functions */ +typedef gfsmLabelVal (*gfsmAlphabetInsertFunc) (struct gfsmUserAlphabet_ *a, gpointer key, gfsmLabelVal lab); + +/** Type for alphabet key removal functions (unused) */ +typedef void (*gfsmAlphabetKeyRemoveFunc) (struct gfsmUserAlphabet_ *a, gpointer key); + +/** Type for alphabet label removal functions */ +typedef void (*gfsmAlphabetLabRemoveFunc) (struct gfsmUserAlphabet_ *a, gfsmLabelVal lab); + +/** Type for alphabet string input functions (should return a static key) */ +typedef gpointer (*gfsmAlphabetKeyReadFunc) (struct gfsmUserAlphabet_ *a, GString *gstr); + +/** Type for alphabet string output functions (should write to @str) */ +typedef void (*gfsmAlphabetKeyWriteFunc) (struct gfsmUserAlphabet_ *a, gconstpointer key, GString *str); + +/// method table for user-defined alphabets +typedef struct { + gfsmAlphabetKeyLookupFunc key_lookup; /**< key->label lookup function */ + gfsmAlphabetLabLookupFunc lab_lookup; /**< label->key lookup function */ + gfsmAlphabetInsertFunc insert; /**< insertion function: receives a newly copied key! */ + gfsmAlphabetLabRemoveFunc lab_remove; /**< label removal function */ + gfsmAlphabetKeyReadFunc key_read; /**< key input function */ + gfsmAlphabetKeyWriteFunc key_write; /**< key output function */ +} gfsmUserAlphabetMethods; + +/// default methods for user-defined alphabets (dummy) +extern gfsmUserAlphabetMethods gfsmUserAlphabetDefaultMethods; +//@} + +/*====================================================================== + * Alphabet: Extendible types + */ +///\name Alphabet: Extendible Types +//@{ +/// Pointer-hashing alphabet structure +typedef struct gfsmPointerAlphabet_ { + gfsmAlphabet a; /**< inheritance magic */ + GPtrArray *labels2keys; /**< label->key lookup table */ + GHashTable *keys2labels; /**< key->label lookup table */ + gfsmAlphabetKeyDupFunc key_dup_func; /**< key duplication function */ +} gfsmPointerAlphabet; + +/// type for string alphabets +typedef gfsmPointerAlphabet gfsmStringAlphabet; + +/// User-extendable alphabet structure +typedef struct gfsmUserAlphabet_ +{ + gfsmPointerAlphabet aa; /**< inheritance magic */ + gpointer data; /**< user data */ + gfsmUserAlphabetMethods methods; /**< method table */ +} gfsmUserAlphabet; +//@} + +/*====================================================================== + * Methods: Constructors etc. + */ +/// \name Constructors etc. +//@{ + +/** Create a new alphabet. The alphabet will be uninitialized until you call + * one of the gfsm_*_alphabet_init() functions. + * + * \param type Type of alphabet to create. + */ +gfsmAlphabet *gfsm_alphabet_new(gfsmAType type); + + +/** Create and initialize a new identity alphabet. + * You do not need to call an init() function for the returned alphabet. + */ +#define gfsm_identity_alphabet_new() \ + gfsm_identity_alphabet_init((gfsmIdentityAlphabet*)gfsm_alphabet_new(gfsmATIdentity)) + +/** Create and initialize a new string alphabet. + * You do not need to call an init() function for the returned alphabet. + */ +#define gfsm_string_alphabet_new_full(docopy) \ + gfsm_string_alphabet_init((gfsmStringAlphabet*)gfsm_alphabet_new(gfsmATString),(docopy)) + +/** Create and initialize a new string alphabet which copies keys. + * You do not need to call an init() function for the returned alphabet. + */ +#define gfsm_string_alphabet_new() gfsm_string_alphabet_new_full(TRUE) + +/** Create and initialize a new range alphabet. + * You do not need to call an init() function for the returned alphabet. */ +#define gfsm_range_alphabet_new() \ + gfsm_range_alphabet_init((gfsmRangeAlphabet*)gfsm_alphabet_new(gfsmATRange), \ + gfsmNoLabel, gfsmNoLabel) + +/** Create and initialize a new pointer alphabet. + * You do not need to call an init() function for the returned alphabet. */ +#define gfsm_pointer_alphabet_new(key_dup_f, key_hash_f, key_eq_f, key_free_f) \ + gfsm_pointer_alphabet_init((gfsmPointerAlphabet*)gfsm_alphabet_new(gfsmATPointer),\ + key_dup_f, key_hash_f, key_eq_f, key_free_f) + + +/** Initialize a builtin alphabet (depending on \a a->type) + * This really only works well identity, range, and string alphabets, + * as well as for literal pointer alphabets (without copy and/or free) + * and for user alphabets using literal pointers. + */ +gfsmAlphabet *gfsm_alphabet_init(gfsmAlphabet *a); + + +/** Initialize a range alphabet */ +gfsmAlphabet *gfsm_range_alphabet_init (gfsmRangeAlphabet *a, gfsmLabelVal min, gfsmLabelVal max); + +/** Initialize a sparse identity alphabet */ +gfsmAlphabet *gfsm_identity_alphabet_init (gfsmIdentityAlphabet *a); + +/** Initialize a pointer alphabet */ +gfsmAlphabet *gfsm_pointer_alphabet_init (gfsmPointerAlphabet *a, + gfsmAlphabetKeyDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func); + +/** Initialize a string alphabet */ +gfsmAlphabet *gfsm_string_alphabet_init (gfsmStringAlphabet *a, gboolean do_copy); + + +/** Initialize a user alphabet */ +gfsmAlphabet *gfsm_user_alphabet_init(gfsmUserAlphabet *a, + gfsmAlphabetKeyDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func, + gpointer user_data, + gfsmUserAlphabetMethods *methods); + +/** Clear all labels and keys from a gfsmAlphabet */ +void gfsm_alphabet_clear(gfsmAlphabet *a); + +/** foreach utility function to clear user alphabets */ +gboolean gfsm_alphabet_foreach_remove_func (gfsmAlphabet *a, + gpointer key, + gfsmLabelVal lab, + gpointer data); + +/** Free all memory allocated by a gfsmAlphabet */ +void gfsm_alphabet_free(gfsmAlphabet *a); +//@} + +/*====================================================================== + * Methods: Utilties + */ +///\name Utilities +//@{ +/** Type for alphabet iterator functions. + * Functions should return \a TRUE to stop the traversal + */ +typedef gboolean (*gfsmAlphabetForeachFunc) (gfsmAlphabet *a, + gpointer key, + gfsmLabelVal lab, + gpointer data); + +/** General iteration utility */ +gboolean gfsm_alphabet_foreach (gfsmAlphabet *a, + gfsmAlphabetForeachFunc func, + gpointer data); + +/** dup function for string alphabets */ +gpointer gfsm_alphabet_strdup(gfsmAlphabet *a, const gchar *str); +//@} + +/*====================================================================== + * Methods: Accessors + */ +/// \name Accessors +///@{ +/** Get number of elements in the alphabet */ +gfsmLabelVal gfsm_alphabet_size(gfsmAlphabet *a); + +/** Utility for counting size of user alphabets (linear time) */ +gboolean gfsm_alphabet_foreach_size_func(gfsmAlphabet *a, + gpointer key, + gfsmLabelVal lab, + guint *np); + +/** + * Insert a \a (key,label) pair into the alphabet. + * If \a label is \a gfsmNoLabel, a new label will be assigned. + * \note No sanity checks are performed. + * + * \returns the new label for \a key + */ +gfsmLabelVal gfsm_alphabet_insert(gfsmAlphabet *a, gpointer key, gfsmLabelVal label); + +/** Get or assign a label for \a key. + * If \a label is gfsmNoLabel, a new label will be assigned for \a key if none exists. + * \returns label for \a key + */ +gfsmLabelVal gfsm_alphabet_get_full(gfsmAlphabet *a, gpointer key, gfsmLabelVal label); + +/** Get label for \a key or assign a new one if none exists. + * \returns label for \a key + */ +#define gfsm_alphabet_get_label(a,key) gfsm_alphabet_get_full(a,key,gfsmNoLabel) + +/** Lookup label for \a key. + * \returns label for \a key, or gfsmNoLabel if none is defined. + */ +gfsmLabelVal gfsm_alphabet_find_label(gfsmAlphabet *a, gconstpointer key); + +/** Lookup key for \a label + * \returns pointer to key for \a label, or \a NULL if no key is defined. + */ +gpointer gfsm_alphabet_find_key(gfsmAlphabet *a, gfsmLabelVal label); + +/** Get key for \a label or assign gfsmNoKey if none exists. + * \returns key for \a label + */ +gpointer gfsm_alphabet_get_key(gfsmAlphabet *a, gfsmLabelVal label); + +/** Remove mapping for \a key (and associated label, if any) */ +void gfsm_alphabet_remove_key(gfsmAlphabet *a, gconstpointer key); + +/** Remove mapping for \a label (and associated key, if any) */ +void gfsm_alphabet_remove_label(gfsmAlphabet *a, gfsmLabelVal label); + +/** Add all keys from alphabet \a a2 to \a a1. \returns \a a1 */ +gfsmAlphabet *gfsm_alphabet_union(gfsmAlphabet *a1, gfsmAlphabet *a2); + +/** foreach utility func for union() */ +gboolean gfsm_alphabet_foreach_union_func(gfsmAlphabet *src, + gpointer src_key, + gfsmLabelVal src_id, + gfsmAlphabet *dst); + +/** Append all defined labels to a GPtrArray of (gfsmLabelVal)s */ +void gfsm_alphabet_labels_to_array(gfsmAlphabet *alph, GPtrArray *ary); + +//@} + +/*====================================================================== + * Methods: I/O + */ +///\name I/O +//@{ + +/** Convert a string to a temporary key, used by load(). + * If you allocate anything here, you need to free it yourself. + */ +gpointer gfsm_alphabet_string2key(gfsmAlphabet *a, GString *gstr); + +/** Convert a key to a constant string, used by save() */ +void gfsm_alphabet_key2string(gfsmAlphabet *a, gpointer key, GString *gstr); + + +/** Load a string alphabet from a stream. Returns true on success */ +gboolean gfsm_alphabet_load_handle (gfsmAlphabet *a, gfsmIOHandle *ioh, gfsmError **errp); + +/** Load a string alphabet from a stream. Returns true on success */ +gboolean gfsm_alphabet_load_file (gfsmAlphabet *a, FILE *f, gfsmError **errp); + +/** Load a string alphabet from a named file */ +gboolean gfsm_alphabet_load_filename (gfsmAlphabet *a, const gchar *filename, gfsmError **errp); + + +/** Save a string alphabet to a gfsmIOHandle* */ +gboolean gfsm_alphabet_save_handle(gfsmAlphabet *a, gfsmIOHandle *ioh, gfsmError **errp); + +/** Save a string alphabet to a stream (uncompressed) */ +gboolean gfsm_alphabet_save_file (gfsmAlphabet *a, FILE *f, gfsmError **errp); + +/** Save a string alphabet to a (compressed) stream */ +gboolean gfsm_alphabet_save_file_full (gfsmAlphabet *a, FILE *f, int zlevel, gfsmError **errp); + +/** Save a string alphabet to a named file (uncompressed) */ +gboolean gfsm_alphabet_save_filename (gfsmAlphabet *a, const gchar *filename, gfsmError **errp); + +/** Save a string alphabet to a (compressed) named file */ +gboolean gfsm_alphabet_save_filename_full (gfsmAlphabet *a, const gchar *filename, int zlevel, gfsmError **errp); + +/// datatype used for save_file() iteration +typedef struct { + gfsmIOHandle *ioh; + gfsmError **errp; + GString *gstr; + gchar *field_sep; + gchar *record_sep; +} gfsmAlphabetSaveFileData; + +/** save_file iterator func */ +gboolean gfsm_alphabet_save_file_func(gfsmAlphabet *a, + gpointer key, + gfsmLabelVal lab, + gfsmAlphabetSaveFileData *sfdata); +//@} + +/*====================================================================== + * String Alphabet Utilties + */ +///\name String Alphabet Utilities +//@{ + +/** Convert an ASCII string character-wise to a vector of (gfsmLabel)s. + * \a vec is not cleared -- use g_ptr_array_set_size() for that. + * \returns \a vec if non-\a NULL, otherwise a new gfsmLabelVector. + * \a abet should be a gfsmStringAlphabet. + */ +gfsmLabelVector *gfsm_alphabet_string_to_labels(gfsmAlphabet *abet, + const gchar *str, + gfsmLabelVector *vec, + gboolean warn_on_undefined); + +/** Convert an ASCII GString character-wise to a vector of (gfsmLabel)s. + * \a vec is not cleared -- use g_ptr_array_set_size() for that. + * \returns \a vec if non-\a NULL, otherwise a new gfsmLabelVector. + * \a abet should be a gfsmStringAlphabet. + */ +#define gfsm_alphabet_gstring_to_labels(abet,gstr,vec,warn) \ + gfsm_alphabet_string_to_labels((abet),(gstr)->str,(vec),(warn)) + + +/** Convert an ASCII string in AT&T syntax to a vector of (gfsmLabel)s. + * \a vec is not cleared -- use g_ptr_array_set_size() for that. + * \returns \a vec if non-\a NULL, otherwise a new gfsmLabelVector. + * \a abet should be a gfsmStringAlphabet. + */ +gfsmLabelVector *gfsm_alphabet_att_string_to_labels(gfsmAlphabet *abet, + const gchar *str, + gfsmLabelVector *vec, + gboolean warn_on_undefined); + +/** Convert an ASCII string to a vector of (gfsmLabel)s, + * using either ::gfsm_alphabet_string_to_labels() or ::gfsm_alphabet_att_string_to_labels(). + * \param abet,str,vec,warn_undef as for ::gfsm_alphabet_string_to_labels(). + * \param att_mode if true, \c str is parsed as att-syntax, otherwise character-wise + * \returns as for ::gfsm_alphabet_string_to_labels() + */ +gfsmLabelVector *gfsm_alphabet_generic_string_to_labels(gfsmAlphabet *abet, + const gchar *str, + gfsmLabelVector *vec, + gboolean warn_on_undefined, + gboolean att_mode); + +/** Convert a gfsmLabelVector to a GString. + * \a gstr is not cleared. + * \returns \a gstr if non-\a NULL, otherwise a new GString*. + * \a abet should be a gfsmStringAlphabet. + */ +GString *gfsm_alphabet_labels_to_gstring(gfsmAlphabet *abet, + gfsmLabelVector *vec, + GString *gstr, + gboolean warn_on_undefined, + gboolean att_style); + +/** Convert a gfsmLabelVector to a new string. + * \a gstr is not cleared. + * \returns \a gstr if non-\a NULL, otherwise a new GString*. + * \a abet should be a gfsmStringAlphabet. + */ +char *gfsm_alphabet_labels_to_string(gfsmAlphabet *abet, + gfsmLabelVector *vec, + gboolean warn_on_undefined, + gboolean att_style); +//@} + +#endif /*_GFSM_ALPHABET_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmArc.c b/gfsm/gfsm/src/libgfsm/gfsmArc.c new file mode 100644 index 0000000..6d88072 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArc.c @@ -0,0 +1,192 @@ +/*=============================================================================*\ + * File: gfsmArc.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arcs + * + * Copyright (c) 2004-2008 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmArc.h> +#include <stdlib.h> + +#ifndef GFSM_INLINE_ENABLED +//-- no inline definitions +# include <gfsmArc.hi> +#endif + +/*====================================================================== + * Constants (none) + */ + +/*====================================================================== + * Methods: Arcs: Constructors etc. + */ +//--inline + +/*====================================================================== + * Arc Comparisons (old) + */ + + +/*-------------------------------------------------------------- + * sortmode_to_name() + */ +#if 0 +const gchar *gfsm_arc_sortmode_to_name(gfsmArcSortMode mode) +{ + switch (mode) { + case gfsmASMNone: return "none"; + case gfsmASMLower: return "lower"; + case gfsmASMUpper: return "upper"; + case gfsmASMWeight: return "weight"; + default: return "unknown"; + } +} +#endif + + +/*====================================================================== + * Arc Comparisons (new) + */ + +/*-------------------------------------------------------------- + * acmask_from_chars() + */ +gfsmArcCompMask gfsm_acmask_from_chars(const char *maskchars) +{ + gfsmArcCompMask m = 0; + gint i; + gint nth=0; + for (i=0; maskchars && maskchars[i] && nth < gfsmACMaxN; i++) { + switch (maskchars[i]) { + case 'l' : m |= gfsm_acmask_new(gfsmACLower, nth++); break; + case 'L' : m |= gfsm_acmask_new(gfsmACLowerR,nth++); break; + + case 'u' : m |= gfsm_acmask_new(gfsmACUpper, nth++); break; + case 'U' : m |= gfsm_acmask_new(gfsmACUpperR,nth++); break; + + case 'w' : m |= gfsm_acmask_new(gfsmACWeight, nth++); break; + case 'W' : m |= gfsm_acmask_new(gfsmACWeightR,nth++); break; + + case 's' : m |= gfsm_acmask_new(gfsmACSource, nth++); break; + case 'S' : m |= gfsm_acmask_new(gfsmACSourceR,nth++); break; + + case 't' : m |= gfsm_acmask_new(gfsmACTarget, nth++); break; + case 'T' : m |= gfsm_acmask_new(gfsmACTargetR,nth++); break; + + case 'x' : m |= gfsm_acmask_new(gfsmACUser, nth++); break; + case 'X' : m |= gfsm_acmask_new(gfsmACUserR,nth++); break; + + //-- silently ignore these + case '_': + case '-': + case ',': + case ' ': + case '\t': + case '\n': + break; + + default: + g_printerr("libgfsm: character '%c' is not in [stluwxSTLUWX_] in mode string '%s' - skipping\n", + maskchars[i], maskchars); + break; + } + } + return m; +} + +/*-------------------------------------------------------------- + * acmask_from_args() + */ +gfsmArcCompMask gfsm_acmask_from_args(gfsmArcComp cmp0, ...) +{ + gfsmArcCompMask m=0; + gfsmArcComp cmp; + gint nth=0; + va_list ap; + + va_start(ap,cmp0); + for (cmp=cmp0; cmp!=0 && nth < gfsmACMaxN; nth++, cmp=va_arg(ap,gfsmArcComp)) { + m |= gfsm_acmask_new(cmp,nth); + } + va_end(ap); + + return m; +} + +/*-------------------------------------------------------------- + * compare_bymask() + */ +gint gfsm_arc_compare_bymask(gfsmArc *a1, gfsmArc *a2, gfsmArcCompData *acdata) +{ return gfsm_arc_compare_bymask_inline(a1,a2,acdata); } + + +/*-------------------------------------------------------------- + * acmask_to_chars() + */ +gchar *gfsm_acmask_to_chars(gfsmArcCompMask m, gchar *chars) +{ + gint nth; + if (!chars) { chars = g_new0(gchar,gfsmACMaxN+1); } + for (nth=0; nth < gfsmACMaxN; nth++) { + chars[nth] = gfsm_acmask_nth_char(m,nth); + } + chars[gfsmACMaxN] = '\0'; + return chars; +} + +/*-------------------------------------------------------------- + * acmask_nth_string() + */ +const gchar *gfsm_acmask_nth_string(gfsmArcCompMask m, gint nth) +{ + switch (gfsm_acmask_nth(m,nth)) { + case gfsmACLower: return "lower"; + case gfsmACUpper: return "upper"; + case gfsmACWeight: return "weight"; + case gfsmACSource: return "source"; + case gfsmACTarget: return "target"; + // + case gfsmACLowerR: return "reverse_lower"; + case gfsmACUpperR: return "reverse_upper"; + case gfsmACWeightR: return "reverse_weight"; + case gfsmACSourceR: return "reverse_source"; + case gfsmACTargetR: return "reverse_target"; + // + case gfsmACUser: return "user"; + case gfsmACUserR: return "reverse_user"; + case gfsmACNone: return "none"; + case gfsmACReverse: return "reverse_none"; + default: return "?"; + } + return "?"; +} + +/*-------------------------------------------------------------- + * acmask_to_gstring() + */ +GString *gfsm_acmask_to_gstring(gfsmArcCompMask m, GString *gstr) +{ + gint nth; + if (!gstr) { gstr = g_string_sized_new(96); } + else { g_string_truncate(gstr,0); } + for (nth=0; nth < gfsmACMaxN; nth++) { + if (nth) { g_string_append(gstr, ", "); } + g_string_append(gstr, gfsm_acmask_nth_string(m,nth)); + } + return gstr; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmArc.h b/gfsm/gfsm/src/libgfsm/gfsmArc.h new file mode 100644 index 0000000..3ea6e22 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArc.h @@ -0,0 +1,305 @@ +/*=============================================================================*\ + * File: gfsmArc.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arcs + * + * Copyright (c) 2004-2008 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmArc.h + * \brief Arc (transition) definitions & utilities + */ + +#ifndef _GFSM_ARC_H +#define _GFSM_ARC_H + +#include <gfsmSemiring.h> +#include <stdarg.h> + +/// "Heavy" arc structure +typedef struct { + gfsmStateId source; /**< ID of source node */ + gfsmStateId target; /**< ID of target node */ + gfsmLabelId lower; /**< Lower label */ + gfsmLabelId upper; /**< Upper label */ + gfsmWeight weight; /**< arc weight */ +} gfsmArc; + +/// Type for identifying arc-label "sides" in a transducer (lower vs. upper) +typedef enum { + gfsmLSBoth = 0, ///< Both sides (lower and upper) + gfsmLSLower = 1, ///< Lower side only + gfsmLSUpper = 2 ///< Upper side only +} gfsmLabelSide; + +//---------------------------------------------------------------------- +// arc sorting (new) + +/// Enum type for elementary builtin comparisons on ::gfsmArc +/** \since v0.0.10 */ +typedef enum { + gfsmACNone = 0x0, /**< '_': no comparison at all */ + // + //-- Forward (ascending) + gfsmACLower = 0x1, /**< 'l': compare lower labels (ascending order) */ + gfsmACUpper = 0x2, /**< 'u': compare upper labels (ascending order) */ + gfsmACWeight = 0x3, /**< 'w': compare semiring weights (ascending order) */ + gfsmACSource = 0x4, /**< 's': compare source states (if supported and meaningful, ascending order) */ + gfsmACTarget = 0x5, /**< 't': compare target states (if supported and meaningful, ascending order) */ + gfsmACUser = 0x6, /**< 'x': pseudo-field indicating a user-defined comparison */ + gfsmACUnused1 = 0x7, /**< unused */ + // + //-- Reverse (descending) + gfsmACReverse = 0x8, /**< not really a comparison: the bit 0x8 is set for all "reverse" comparisons */ + gfsmACLowerR = 0x9, /**< 'L': compare lower labels (descending order) */ + gfsmACUpperR = 0xa, /**< 'U': compare upper labels (descending order) */ + gfsmACWeightR = 0xb, /**< 'W': compare semiring weights (descending order) */ + gfsmACSourceR = 0xc, /**< 'S': compare source states (if supported and meaningful, descending order) */ + gfsmACTargetR = 0xd, /**< 'T': compare target states (if supported and meaningful, descending order) */ + gfsmACUserR = 0xe, /**< 'X': pseudo-field for reversed user-defined comparisons, for symmetry */ + // + //-- Pseudo-comparisons + gfsmACAll = 0xf /**< '*': pseudo-field for mask of all known elementary comparisons */ +} gfsmArcComp; + +/** \brief Prioritized list of up to 5 elementary arc comparisons packed as a guint32 + * \detail + * The primary comparison is encoded as a ::gfsmArcComp in the least significant + * 4-bit nybble of the integer, the secondary comparison is left-shifted by 4 bits, + * and so on. + */ +typedef guint32 gfsmArcCompMask; + +/** Number of bits to left-shift ::gfsmArcCompMask for each arc sub-comparison using ::gfsmArcComp encoding */ +#define gfsmACShift 4 + +/** Maximum number of ::gfsmArcComp fields supported by ::gfsmArcCompMask (pragmatic definition) */ +#define gfsmACMaxN 6 + +/** Useful aliases for builtin arcsort modes, including backwards-compatible ::gfsmASMLower etc. */ +typedef enum { + gfsmASMNone = 0x0, /**< no sort */ + gfsmASMLower = (gfsmACLower|(gfsmACUpper<<gfsmACShift)|(gfsmACTarget<<(2*gfsmACShift))), /**< (lower,upper,target) */ + gfsmASMUpper = (gfsmACUpper|(gfsmACLower<<gfsmACShift)|(gfsmACTarget<<(2*gfsmACShift))), /**< (upper,lower,target) */ + gfsmASMWeight = gfsmACWeight, /**< (weight) */ + gfsmASMLowerWeight = (gfsmACLower|(gfsmACWeight<<gfsmACShift)), /**< (lower,weight) */ + gfsmASMUpperWeight = (gfsmACUpper|(gfsmACWeight<<gfsmACShift)), /**< (upper,weight) */ + gfsmASMUser = gfsmACUser, /**< user-defined sort */ +} gfsmArcSortModeE; + +/// Semi-compatible typedef for arc sort mode +typedef gfsmArcCompMask gfsmArcSortMode; + +/// Data for new-style generic arc comparison +typedef struct { + gfsmArcCompMask mask; /**< Comparison precedence */ + gfsmSemiring *sr; /**< Semiring to use for weight comparisons (if any) */ + GCompareDataFunc user_compare_func; /**< User comparison function (if any) */ + gpointer user_data; /**< User data (for \c user_compare_func) */ +} gfsmArcCompData; + +/*====================================================================== + * Methods: Arcs: Constructors etc. + */ + +/// \name Arcs: Constructors etc. +//@{ +/** Create and return a new (empty) ::gfsmArc */ +GFSM_INLINE +gfsmArc *gfsm_arc_new(void); + +/** Initialize a ::gfsmArc + * \param a arc to initialize + * \param src ID of source state + * \param dst ID of target state + * \param lo ID of lower label + * \param hi ID of upper label + * \param w arc weight + * \returns initialized arc \a a + */ +GFSM_INLINE +gfsmArc *gfsm_arc_init(gfsmArc *a, + gfsmStateId src, + gfsmStateId dst, + gfsmLabelId lo, + gfsmLabelId hi, + gfsmWeight wt); + +/** Convenience macro to simultaneously create and initialize a ::gfsmArc + * \param src ID of source state + * \param dst ID of target state + * \param lo ID of lower label + * \param hi ID of upper label + * \param w arc weight + * \returns newly allocated and initalized ::gfsmArc + */ +GFSM_INLINE +gfsmArc *gfsm_arc_new_full(gfsmStateId src, gfsmStateId dst, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight wt); + +/** Create an exact copy of the ::gfsmArc \a src */ +GFSM_INLINE +gfsmArc *gfsm_arc_clone(gfsmArc *src); + +/** Backwards-compatible convenience alias for gfsm_arc_clone() */ +#define gfsm_arc_copy(src) gfsm_arc_clone(src) + +/** Destroy a ::gfsmArc \a a */ +GFSM_INLINE +void gfsm_arc_free(gfsmArc *a); +//@} + +/*====================================================================== + * Methods: Arc: Accessors + */ +///\name Arc Accessors +//@{ + +/** Get source node of an arc -- may be gfsmNoState */ +#define gfsm_arc_source(arcptr) ((arcptr)->source) + +/** Get target node of an arc -- may be gfsmNoState */ +#define gfsm_arc_target(arcptr) ((arcptr)->target) + +/** Get lower label of an arc -- may be gfsmNoLabel */ +#define gfsm_arc_lower(arcptr) ((arcptr)->lower) + +/** Get upper label of an arc -- may be gfsmNoLabel */ +#define gfsm_arc_upper(arcptr) ((arcptr)->upper) + +/** Get weight of an arc -- may be gfsmNoWeight */ +#define gfsm_arc_weight(arcptr) ((arcptr)->weight) + +//@} + + +/*====================================================================== + * Arc Comparison Utilities + */ +///\name Arc Comparison Utilities +//@{ + +/** Generic 3-way comparison on arcs (inline version) + * \param a1 first arc to compare + * \param a2 second arc to compare + * \param acdata specifies comparison priorities + * \returns + * negative, zero, or positive integer depending on whether + * \a a1 is less-than, equal-to, or greater-than \a a2 according to \a acdata. + */ +GFSM_INLINE +gint gfsm_arc_compare_bymask_inline(gfsmArc *a1, gfsmArc *a2, gfsmArcCompData *acdata); + +/** Generic 3-way comparison on arcs (extern version) + * Really just a wrapper for gfsm_arc_compare_mask_inline() + */ +gint gfsm_arc_compare_bymask(gfsmArc *a1, gfsmArc *a2, gfsmArcCompData *acdata); + +/** Guts for gfsm_arc_compare_bymask_inline(): compare arcs w.r.t a single attribute \a cmp. + * \note gcc deems these calls "unlikely" and refuses to inline... + */ +GFSM_INLINE +gint gfsm_arc_compare_bymask_1_(gfsmArc *a1, gfsmArc *a2, gfsmArcComp cmp, gfsmArcCompData *acdata); + +/** Parse a NUL-terminated string into a ::gfsmArcCompMask + * \param maskchars + * A NUL-terminated string representing the precedence among elementary comparisons. + * Each character represents a single elementary comparison. + * The primary comparison is the first character of the string. + * Correspondence of characters to comparisons is: + * \li 'l' ::gfsmACLower + * \li 'u' ::gfsmACUpper + * \li 'w' ::gfsmACWeight + * \li 's' ::gfsmACSource + * \li 't' ::gfsmACTarget + * \li 'x' ::gfsmACUser + * \li 'L' ::gfsmACLowerR + * \li 'U' ::gfsmACUpperR + * \li 'W' ::gfsmACWeightR + * \li 'S' ::gfsmACSourceR + * \li 'T' ::gfsmACTargetR + * \li 'X' ::gfsmACUserR + * \returns a ::gfsmArcCompMask for \a maskchars + */ +gfsmArcCompMask gfsm_acmask_from_chars(const char *maskchars); + +/** Create and return a ::gfsmArcCompMask from a variable argument list of ::gfsmArcComp + * The argument list must be terminated with a zero (e.g. ::gfsmACNone) + * \param cmp0 primary comparison + * \param ... secondary, tertiary, ... comparisons + * \returns ::gfsmArcCompMask for specified comparisons + */ +gfsmArcCompMask gfsm_acmask_from_args(gfsmArcComp cmp0, ...); + +/** Create a partial ::gfsmArcCompMask for \a cmp as the \a nth (sub-)comparison */ +GFSM_INLINE +gfsmArcCompMask gfsm_acmask_new(gfsmArcComp cmp, gint nth); + +/** Get \a nth (sub-)comparison from a ::gfsmArcCompMask */ +GFSM_INLINE +gfsmArcComp gfsm_acmask_nth(gfsmArcCompMask m, gint nth); + +/** Get basic \a nth basic (sub-)comparison from a ::gfsmArcCompMask, disregarding sort order */ +GFSM_INLINE +gfsmArcComp gfsm_acmask_nth_comp(gfsmArcCompMask m, gint nth); + +/** Get sort order for \a nth (sub-)comparison of a ::gfsmArcCompMask. + * \returns a true value if \a nth sub-comparison is reversed (descending order), otherwise FALSE. + */ +GFSM_INLINE +gboolean gfsm_acmask_nth_reverse(gfsmArcCompMask m, gint nth); + +/** Get single character representing the \a nth field of arc comparison mask \a m, as for gfsm_acmask_from_chars() */ +GFSM_INLINE +gchar gfsm_acmask_nth_char(gfsmArcCompMask m, gint nth); + +/** Get a static human-readable string representing the \a nth field of arc comparison mask \a m */ +const gchar *gfsm_acmask_nth_string(gfsmArcCompMask m, gint nth); + +/** Populate a character string representing a ::gfsmArcCompMask. + * \param mask[in] mask to convert to a string + * \param chars[out] character string representing \a mask, in the format acepted by gfsm_acmask_from_chars(), + * or NULL to allocate a new string. If specified and non-NULL, \a chars should be + * long enough to hold ::gfsmACMaxN+1 characters, since a terminating NUL + * is implicitly added as the final character. + * \returns \a chars if specified, otherwise a newly allocated string. + * \note User is responsible for freeing the returned string with g_free() when it is no longer needed. + */ +gchar *gfsm_acmask_to_chars(gfsmArcCompMask m, gchar *chars); + +/** Populate a GString* with a human-readable representation of a ::gfsmArcCompMask. + * \param mask[in] mask to convert to a string + * \param gstr[out] GString representing \a mask, in human-readable format, + * or NULL to allocate a new GString*. +* \returns \a gstr if specified, otherwise a newly allocated GString*. + * \note User is responsible for freeing the returned GString* with g_string_free() when it is no longer needed. + */ +GString *gfsm_acmask_to_gstring(gfsmArcCompMask m, GString *gstr); + +/** Backwards-compatible arc sort mode name resolution function + * \deprecated in favor of gfsm_acmask_nth_string(), gfsm_acmask_to_gstring(), gfsm_acmask_to_chars() + */ +GFSM_INLINE +const gchar *gfsm_arc_sortmode_to_name(gfsmArcCompMask m); + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmArc.hi> +#endif + +#endif /* _GFSM_ARC_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmArc.hi b/gfsm/gfsm/src/libgfsm/gfsmArc.hi new file mode 100644 index 0000000..fcd9d79 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArc.hi @@ -0,0 +1,262 @@ + +/*=============================================================================*\ + * File: gfsmArc.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arcs: inline definitions + * + * Copyright (c) 2004-2008 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <stdlib.h> + +/*====================================================================== + * Methods: Arcs: Constructors etc. + */ + +/*-------------------------------------------------------------- + * arc_new() + */ +GFSM_INLINE +gfsmArc *gfsm_arc_new(void) +{ + return g_new0(gfsmArc,1); +} + +/*-------------------------------------------------------------- + * arc_init() + */ +GFSM_INLINE +gfsmArc *gfsm_arc_init(gfsmArc *a, + gfsmStateId src, + gfsmStateId dst, + gfsmLabelId lo, + gfsmLabelId hi, + gfsmWeight wt) +{ + if (a) { + a->source = src; + a->target = dst; + a->lower = lo; + a->upper = hi; + a->weight = wt; + } + return a; +} + +/*-------------------------------------------------------------- + * arc_new_full() + */ +GFSM_INLINE +gfsmArc *gfsm_arc_new_full(gfsmStateId src, gfsmStateId dst, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight wt) +{ + return gfsm_arc_init(g_new(gfsmArc,1),src,dst,lo,hi,wt); +} + +/*-------------------------------------------------------------- + * arc_clone() + */ +GFSM_INLINE +gfsmArc *gfsm_arc_clone(gfsmArc *src) +{ + gfsmArc *dst = g_new(gfsmArc,1); + *dst = *src; + return dst; +} + +/*-------------------------------------------------------------- + * arc_free() + */ +GFSM_INLINE +void gfsm_arc_free(gfsmArc *a) +{ g_free(a); } + + +/*====================================================================== + * Methods: Arc Comparison (backwards-compatible) + */ +//-- none + +/*====================================================================== + * Methods: arc comparison mask utilties + */ + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmArcCompMask gfsm_acmask_new(gfsmArcComp cmp, gint nth) +{ + return +#ifdef __cplusplus + //-- this kind of crap is the reason i hate c++ --moo + static_cast<gfsmArcCompMask> +#endif + ( (cmp&gfsmACAll)<<(nth*gfsmACShift) ); +} + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmArcComp gfsm_acmask_nth(gfsmArcCompMask m, gint nth) +{ + return +#ifdef __cplusplus + //-- this kind of crap is the reason i hate c++ --moo + static_cast<gfsmArcComp> +#endif + ( (m>>(nth*gfsmACShift))&gfsmACAll ); +} + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmArcComp gfsm_acmask_nth_comp(gfsmArcCompMask m, gint nth) +{ + return +#ifdef __cplusplus + //-- this kind of crap is the reason i hate c++ --moo + static_cast<gfsmArcComp>( + static_cast<gfsmArcCompMask>(gfsm_acmask_nth(m,nth)) + & + ~static_cast<gfsmArcCompMask>(gfsmACReverse) + ) +#else + gfsm_acmask_nth(m,nth) & (~gfsmACReverse) +#endif + ; +} + +//-------------------------------------------------------------- +GFSM_INLINE +gboolean gfsm_acmask_nth_reverse(gfsmArcCompMask m, gint nth) +{ return (gfsm_acmask_nth(m,nth)&gfsmACReverse) ? TRUE : FALSE; } + + +/*====================================================================== + * Methods: Arc comparison: prioritized + */ + +//-------------------------------------------------------------- +GFSM_INLINE +gint gfsm_arc_compare_bymask_1_(gfsmArc *a1, gfsmArc *a2, gfsmArcComp cmp, gfsmArcCompData *acdata) +{ + switch (cmp) { + // + //-- forward (ascending) + case gfsmACLower: + if (a1->lower < a2->lower) return -1; + if (a1->lower > a2->lower) return 1; + return 0; + case gfsmACUpper: + if (a1->upper < a2->upper) return -1; + if (a1->upper > a2->upper) return 1; + return 0; + case gfsmACWeight: + return acdata->sr ? gfsm_sr_compare(acdata->sr, a1->weight, a2->weight) : 0; + case gfsmACSource: + if (a1->source < a2->source) return -1; + if (a1->source > a2->source) return 1; + return 0; + case gfsmACTarget: + if (a1->target < a2->target) return -1; + if (a1->target > a2->target) return 1; + return 0; + // + //-- reverse (descending) + case gfsmACLowerR: + if (a1->lower < a2->lower) return 1; + if (a1->lower > a2->lower) return -1; + return 0; + case gfsmACUpperR: + if (a1->upper < a2->upper) return 1; + if (a1->upper > a2->upper) return -1; + return 0; + case gfsmACWeightR: + return acdata->sr ? gfsm_sr_compare(acdata->sr, a2->weight, a1->weight) : 0; + case gfsmACSourceR: + if (a1->source < a2->source) return 1; + if (a1->source > a2->source) return -1; + return 0; + case gfsmACTargetR: + if (a1->target < a2->target) return 1; + if (a1->target > a2->target) return -1; + return 0; + // + //-- user + case gfsmACUser: + if (acdata->user_compare_func) { + return (*(acdata->user_compare_func))(a1,a2,acdata->user_data); + } + // + //-- default + case gfsmACNone: + default: + return 0; + } + return 0; +} + +//-------------------------------------------------------------- +GFSM_INLINE +gint gfsm_arc_compare_bymask_inline(gfsmArc *a1, gfsmArc *a2, gfsmArcCompData *acdata) +{ + gint rc=0; + //-- NULL check + if (!a1) { + if (!a2) return 0; + return 1; + } + if (!a2) return -1; + // + if ( (rc=gfsm_arc_compare_bymask_1_(a1,a2,gfsm_acmask_nth(acdata->mask,0),acdata)) ) return rc; + if ( (rc=gfsm_arc_compare_bymask_1_(a1,a2,gfsm_acmask_nth(acdata->mask,1),acdata)) ) return rc; + if ( (rc=gfsm_arc_compare_bymask_1_(a1,a2,gfsm_acmask_nth(acdata->mask,2),acdata)) ) return rc; + if ( (rc=gfsm_arc_compare_bymask_1_(a1,a2,gfsm_acmask_nth(acdata->mask,3),acdata)) ) return rc; + if ( (rc=gfsm_arc_compare_bymask_1_(a1,a2,gfsm_acmask_nth(acdata->mask,4),acdata)) ) return rc; + if ( (rc=gfsm_arc_compare_bymask_1_(a1,a2,gfsm_acmask_nth(acdata->mask,5),acdata)) ) return rc; + return 0; +} + + +/*====================================================================== + * Methods: String utilities + */ + +//-------------------------------------------------------------- +GFSM_INLINE +gchar gfsm_acmask_nth_char(gfsmArcCompMask m, gint nth) +{ + switch (gfsm_acmask_nth(m,nth)) { + case gfsmACLower: return 'l'; + case gfsmACUpper: return 'u'; + case gfsmACWeight: return 'w'; + case gfsmACSource: return 's'; + case gfsmACTarget: return 't'; + // + case gfsmACLowerR: return 'L'; + case gfsmACUpperR: return 'U'; + case gfsmACWeightR: return 'W'; + case gfsmACSourceR: return 'S'; + case gfsmACTargetR: return 'T'; + // + case gfsmACUser: return 'x'; + case gfsmACNone: return '_'; + default: return '?'; + } + return '?'; +} + +//-------------------------------------------------------------- +GFSM_INLINE +const gchar *gfsm_arc_sortmode_to_name(gfsmArcCompMask m) +{ return gfsm_acmask_nth_string(m,0); } diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcIndex.c b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.c new file mode 100644 index 0000000..3df0760 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.c @@ -0,0 +1,498 @@ + +/*=============================================================================*\ + * File: gfsmArcIndex.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc indices + * + * Copyright (c) 2006-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmArcIndex.h> +#include <gfsmArcIter.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmArcIndex.hi> +#endif + +/*====================================================================== + * gfsmReverseArcIndex + */ + +/*-------------------------------------------------------------- + * automaton_reverse_arc_index() + */ +gfsmReverseArcIndex *gfsm_automaton_to_reverse_arc_index(gfsmAutomaton *fsm, gfsmReverseArcIndex *rarcs) +{ + gfsmStateId idfrom; + gfsmArcIter ai; + gfsmArc *arc; + + if (!rarcs) { + rarcs = gfsm_reverse_arc_index_sized_new(fsm->states->len); + } + g_ptr_array_set_size(rarcs,fsm->states->len); + + for (idfrom=0; idfrom < fsm->states->len; idfrom++) { + for (gfsm_arciter_open(&ai,fsm,idfrom); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + arc = gfsm_arciter_arc(&ai); + g_ptr_array_index(rarcs,arc->target) + //= gfsm_arclist_prepend(g_ptr_array_index(rarcs,arc->target), arc); + = g_slist_prepend(g_ptr_array_index(rarcs,arc->target),arc); + } + gfsm_arciter_close(&ai); + } + + return rarcs; +} + +/*-------------------------------------------------------------- + * reverse_arc_index_free() + */ +void gfsm_reverse_arc_index_free(gfsmReverseArcIndex *rarcs, gboolean free_lists) +{ + guint i; + if (free_lists) { + //-- +free_lists, -free_arcs + for (i=0; i < rarcs->len; i++) { g_slist_free(g_ptr_array_index(rarcs,i)); } + } + + //-- free index array + g_ptr_array_free(rarcs,TRUE); +} + + + +/*====================================================================== + * gfsmWeightVector + */ + +/*-------------------------------------------------------------- + * automaton_to_weight_vector() + */ +gfsmWeightVector *gfsm_automaton_to_final_weight_vector(gfsmAutomaton *fsm, gfsmWeightVector *wv) +{ + gfsmStateId qid; + guint n_states = gfsm_automaton_n_states(fsm); + gfsmWeight *wp; + + if (wv==NULL) { + wv = gfsm_weight_vector_sized_new(n_states); + } else { + gfsm_weight_vector_resize(wv,n_states); + } + wv->len = n_states; + + for (qid=0,wp=(gfsmWeight*)wv->data; qid < n_states; qid++,wp++) { + gfsm_automaton_lookup_final(fsm,qid,wp); + } + + return wv; +} + +/*-------------------------------------------------------------- + * weight_vector_write_bin_handle() + */ +gboolean gfsm_weight_vector_write_bin_handle(gfsmWeightVector *wv, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len = wv->len; + if (!gfsmio_write(ioh,&len,sizeof(guint32))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_write_bin_handle:len"), //-- code + "could not store weight vector length"); + return FALSE; + } + if (!gfsmio_write(ioh,wv->data,wv->len*sizeof(gfsmWeight))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_write_bin_handle:weights"), //-- code + "could not store weight vector data"); + return FALSE; + } + return TRUE; +} + +/*-------------------------------------------------------------- + * weight_vector_read_bin_handle() + */ +gboolean gfsm_weight_vector_read_bin_handle(gfsmWeightVector *wv, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len; + if (!gfsmio_read(ioh, &len, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_read_bin_handle:len"), //-- code + "could not read weight vector length"); + return FALSE; + } + gfsm_weight_vector_resize(wv,len); + if (!gfsmio_read(ioh, wv->data, len*sizeof(gfsmWeight))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("weight_vector_read_bin_handle:data"), //-- code + "could not read weight vector data"); + return FALSE; + } + return TRUE; +} + +/*====================================================================== + * gfsmArcTable + */ + +/*-------------------------------------------------------------- + * automaton_to_arc_table() + */ +gfsmArcTable *gfsm_automaton_to_arc_table(gfsmAutomaton *fsm, gfsmArcTable *tab) +{ + gfsmStateId qid, n_states=gfsm_automaton_n_states(fsm); + guint n_arcs=gfsm_automaton_n_arcs(fsm); + gfsmArcIter ai; + gfsmArc *arcp; + + //-- maybe allocate + if (!tab) { + tab = gfsm_arc_table_sized_new(n_arcs); + } else { + gfsm_arc_table_resize(tab, n_arcs); + } + tab->len = n_arcs; + + //-- populate arcs + for (qid=0,arcp=(gfsmArc*)tab->data; qid < n_states; qid++) { + if (!gfsm_automaton_has_state(fsm,qid)) continue; + for (gfsm_arciter_open(&ai,fsm,qid); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + *(arcp++) = *a; + } + gfsm_arciter_close(&ai); + } + + //-- return + return tab; +} + +/*-------------------------------------------------------------- + * arc_table_write_bin_handle() + */ +gboolean gfsm_arc_table_write_bin_handle(gfsmArcTable *tab, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len = tab->len; + if (!gfsmio_write(ioh, &len, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_write_bin_handle:len"), + "could not write arc table length"); + return FALSE; + } + if (!gfsmio_write(ioh, tab->data, len*sizeof(gfsmArc))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_write_bin_handle:data"), + "could not write arc table data"); + return FALSE; + } + return TRUE; +} + +/*-------------------------------------------------------------- + * arc_table_read_bin_handle() + */ +gboolean gfsm_arc_table_read_bin_handle(gfsmArcTable *tab, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len; + if (!gfsmio_read(ioh, &len, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_read_bin_handle:len"), + "could not read arc table length"); + return FALSE; + } + gfsm_arc_table_resize(tab,len); + if (!gfsmio_read(ioh, tab->data, len*sizeof(gfsmArc))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_read_bin_handle:data"), + "could not read arc table data"); + return FALSE; + } + return TRUE; +} + + + +/*====================================================================== + * gfsmArcTableIndex + */ + +/*-------------------------------------------------------------- + * arc_table_index_copy() + */ +gfsmArcTableIndex *gfsm_arc_table_index_copy(gfsmArcTableIndex *dst, gfsmArcTableIndex *src) +{ + gfsmStateId i; + gfsm_arc_table_copy (dst->tab, src->tab); + g_ptr_array_set_size(dst->first, src->first->len); + + for (i=0; i < src->first->len; i++) { + gint offset = (gfsmArc*)g_ptr_array_index(src->first,i) - (gfsmArc*)src->tab->data; + g_ptr_array_index(dst->first,i) = (gfsmArc*)dst->tab->data + offset; + } + + return dst; +} + + +/*-------------------------------------------------------------- + * automaton_to_arc_table_index() + */ +gfsmArcTableIndex *gfsm_automaton_to_arc_table_index(gfsmAutomaton *fsm, gfsmArcTableIndex *tabx) +{ + gfsmStateId qid, n_states=gfsm_automaton_n_states(fsm); + guint n_arcs=gfsm_automaton_n_arcs(fsm); + gfsmArc *arcp, *arcp_max; + gfsmArc **firstp; + + //-- maybe allocate + if (!tabx) { + tabx = gfsm_arc_table_index_sized_new(n_states, n_arcs); + } else { + gfsm_arc_table_index_resize(tabx, n_states, n_arcs); + } + + //-- populate tabx->arcs + gfsm_automaton_to_arc_table(fsm,tabx->tab); + + //-- populate tabx->first + arcp = (gfsmArc*)tabx->tab->data; + arcp_max = arcp + n_arcs; + for (qid=0,firstp=(gfsmArc**)tabx->first->pdata; qid<n_states; qid++,firstp++) { + *firstp = arcp; + for ( ; arcp<arcp_max && arcp->source==qid; arcp++) { ; } + } + *firstp = arcp_max; + + //-- return + return tabx; +} + +/*-------------------------------------------------------------- + * arc_table_index_sort_with_data() + */ +void gfsm_arc_table_index_sort_with_data(gfsmArcTableIndex *tabx, GCompareDataFunc compare_func, gpointer data) +{ + gfsmArc **firstp = (gfsmArc**)tabx->first->pdata; + gfsmArc **firstp_max = firstp + tabx->first->len - 1; + for ( ; firstp < firstp_max; firstp++) { + gfsmArc *min = *firstp; + gfsmArc *max = *(firstp+1); + g_qsort_with_data(min, max-min, sizeof(gfsmArc), compare_func, data); + } +} + +/*-------------------------------------------------------------- + * arc_table_index_write_bin_handle() + */ +gboolean gfsm_arc_table_index_write_bin_handle(gfsmArcTableIndex *tabx, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmStateId first_len=tabx->first->len, qid; + if (!gfsm_arc_table_write_bin_handle(tabx->tab, ioh, errp)) return FALSE; + + if (!gfsmio_write(ioh, &first_len, sizeof(gfsmStateId))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_write_bin_handle:len"), + "could not write arc table index 'first' length"); + return FALSE; + } + for (qid=0; qid < first_len; qid++) { + gfsmArc *a = (gfsmArc*)g_ptr_array_index(tabx->first,qid); + guint32 offset = a - ((gfsmArc*)tabx->tab->data); + if (!gfsmio_write(ioh, &offset, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_write_bin_handle:data"), + "could not write state arc offset for state '%u'", qid); + return FALSE; + } + } + return TRUE; +} + +/*-------------------------------------------------------------- + * arc_table_index_read_bin_handle() + */ +gboolean gfsm_arc_table_index_read_bin_handle(gfsmArcTableIndex *tabx, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmStateId first_len, qid; + if (!gfsm_arc_table_read_bin_handle(tabx->tab, ioh, errp)) return FALSE; + + if (!gfsmio_read(ioh, &first_len, sizeof(gfsmStateId))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_read_bin_handle:len"), + "could not read arc table index 'first' length"); + return FALSE; + } + g_ptr_array_set_size(tabx->first,first_len); + for (qid=0; qid < first_len; qid++) { + guint32 offset; + if (!gfsmio_read(ioh, &offset, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("arc_table_index_write_bin_handle:data"), + "could not read state arc offset for state '%u'", qid); + return FALSE; + } + g_ptr_array_index(tabx->first,qid) = &g_array_index(tabx->tab,gfsmArc,offset); + } + return TRUE; +} + + +/*====================================================================== + * gfsmArcLabelIndex [GONE] + */ +//-------------------------------------------------------------- +// arc_label_index_compare_arcs() +/* +gint gfsm_arc_label_index_compare_arcs(gfsmArc *a1, gfsmArc *a2, gfsmArcLabelIndexSortData *sdata) +{ return gfsm_arc_label_index_compare_arcs_inline(a1,a2,sdata); } +*/ + + +/*====================================================================== + * gfsmArcRange + */ + +#undef GFSM_ARCRANGE_ENABLE_BSEARCH +#undef GFSM_ARCRANGE_ENABLE_SEEK + +#ifdef GFSM_ARCRANGE_ENABLE_BSEARCH +/*-------------------------------------------------------------- + * arc_range_bsearch_*() + * + NOT WORTH IT (tested for out_degree {1,2,4,8,16,32,64,128,256,512}) + */ +void gfsm_arcrange_bsearch_source(gfsmArcRange *range, gfsmStateId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->source < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_target(gfsmArcRange *range, gfsmStateId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->target < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_lower(gfsmArcRange *range, gfsmLabelId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->lower < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_upper(gfsmArcRange *range, gfsmLabelId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->upper < find) { min = mid+1; } + else { max = mid; } + } + range->min = min; +} + +//-------------------------------------------------------------- +void gfsm_arcrange_bsearch_weight(gfsmArcRange *range, gfsmWeight find, gfsmSemiring *sr) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (gfsm_sr_compare(sr,mid->weight,find) < 0) { range->min = mid+1; } + else { range->max = mid; } + } + range->min = min; +} +#endif /* GFSM_ARCRANGE_ENABLE_BSEARCH */ + +#ifdef GFSM_ARCRANGE_ENABLE_SEEK +//-------------------------------------------------------------- +// arcrange_seek_X() +// -- also not worth it + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_source(gfsmArcRange *range, gfsmStateId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->source < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_target(gfsmArcRange *range, gfsmStateId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->target < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_lower(gfsmArcRange *range, gfsmLabelId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->lower < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_upper(gfsmArcRange *range, gfsmLabelId find) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->upper < find) + gfsm_arcrange_next(range); +} + +//---------------------------------------------- +GFSM_INLINE +void gfsm_arcrange_seek_weight(gfsmArcRange *range, gfsmWeight find, gfsmSemiring *sr) +{ + gfsm_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_sr_compare(sr,gfsm_arcrange_arc(range)->weight,find) < 0) + gfsm_arcrange_next(range); +} +#endif /* GFSM_ARCRANGE_ENABLE_SEEK */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcIndex.h b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.h new file mode 100644 index 0000000..db3f19b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.h @@ -0,0 +1,360 @@ + +/*=============================================================================*\ + * File: gfsmArcIndex.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc indices + * + * Copyright (c) 2006-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmArcIndex.h + * \brief Arc (transition) index utilities + */ + +#ifndef _GFSM_ARCINDEX_H +#define _GFSM_ARCINDEX_H + +#include <gfsmAutomaton.h> +#include <gfsmIO.h> + +/*====================================================================== + * ReverseArcIndex + */ +///\name gfsmReverseArcIndex +//@{ + +/// Reverse arc-index type +/** \a element at \a qto is a GSList* + * which contains a data element \a gfsmArc* \a arc={qfrom,qto,lo,hi,w} + * whenever source \a fsm contains an arc \a arc={qfrom,qto,lo,hi,w} + * from \a qfrom. + * + * \note + * arc data pointed to is shared by source automaton + * and the ::gfsmReverseArcIndex! + */ +typedef GPtrArray gfsmReverseArcIndex; + +/** Create and return a new ::gfsmReverseArcIndex + * \note + * Caller is responsible for freeing the returned index when it is no longer needed. + */ +GFSM_INLINE +gfsmReverseArcIndex *gfsm_reverse_arc_index_new(void); + +/** Create a new ::gfsmReverseArcIndex, given number of states to be indexed + * \note + * Caller is responsible for freeing the returned index when it is no longer needed. + */ +GFSM_INLINE +gfsmReverseArcIndex *gfsm_reverse_arc_index_sized_new(gfsmStateId n_states); + +/** Populate a reversed arc index for \a fsm. + * \param fsm source automaton + * \param rarcs + * Reverse arc index. + * May be passed as NULL to create a new arc index. + * \returns + * \a rarcs if non-NULL, otherwise a new reverse arc index for \a fsm. + * \note + * Caller is responsible for freeing the returned index when it is no longer needed. + */ +gfsmReverseArcIndex *gfsm_automaton_to_reverse_arc_index(gfsmAutomaton *fsm, gfsmReverseArcIndex *rarcs); + +/** Backwards-compatible alias for gfsm_automaton_to_reverse_arc_inde() */ +#define gfsm_automaton_reverse_arc_index gfsm_automaton_to_reverse_arc_index + +/** Free a ::gfsmReverseArcIndex + * \param rarcs + * reverse arc-index to be freed + * \param free_lists + * If true, associated arc-lists will be freed. + */ +void gfsm_reverse_arc_index_free(gfsmReverseArcIndex *rarcs, gboolean free_lists); + +//@} + + +/*====================================================================== + * gfsmFinalWeightIndex + */ +///\name gfsmFinalWeightIndex +//@{ + +/** GArray of ::gfsmWeight, indexed e.g. by ::gfsmStateId */ +typedef GArray gfsmWeightVector; + +/** Create a new (empty) ::gfsmWeightVector + * \note + * Caller is responsible for freeing \a wv when it is no longer needed. + */ +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_new(void); + +/** Create a new (empty) ::gfsmWeightVector, specifying initial size + * \note + * Caller is responsible for freeing the returned index when it is no longer needed. + */ +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_sized_new(guint size); + +/** Copy a ::gfsmWeightVector \a src to \a dst. \returns \a dst */ +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_copy(gfsmWeightVector *dst, gfsmWeightVector *src); + +/** Create and return an exact clone of a ::gfsmWeightVector */ +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_clone(gfsmWeightVector *src); + + +/** Set size of a ::gfsmWeightVector */ +GFSM_INLINE +void gfsm_weight_vector_resize(gfsmWeightVector *wv, guint size); + +/** Populate a ::gfsmWeightVector of state final weights in a ::gfsmAutomaton + * \param fsm source automaton + * \param wv + * Final weight index + * May be passed as NULL to create a new index. + * \returns \a wv if non-NULL, otherwise a new final weight index for \a fsm. + */ +gfsmWeightVector *gfsm_automaton_to_final_weight_vector(gfsmAutomaton *fsm, gfsmWeightVector *wv); + +/** Free a ::gfsmWeightVector */ +GFSM_INLINE +void gfsm_weight_vector_free(gfsmWeightVector *wv); + +/** Write the contents of a ::gfsmWeightVector to a (binary) ::gfsmIOHandle. + * \param wv weight vector to write + * \param ioh handle to which data is to be written + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_weight_vector_write_bin_handle(gfsmWeightVector *wv, gfsmIOHandle *ioh, gfsmError **errp); + +/** Read the contents of a ::gfsmWeightVector from a (binary) ::gfsmIOHandle. + * \param wv weight vector into which data is to be read + * \param ioh handle from which data is to be read + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_weight_vector_read_bin_handle(gfsmWeightVector *wv, gfsmIOHandle *ioh, gfsmError **errp); + +//@} + +/*====================================================================== + * gfsmArcTable + */ +///\name gfsmArcTable +//@{ + +/// Type for dedicated block-wise storage of ::gfsmArc data: GArray of ::gfsmArc +typedef GArray gfsmArcTable; + +/** Create and return a new (empty) ::gfsmArcTable */ +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_new(void); + +/** Create and return a new (empty) ::gfsmArcTable, specifying size */ +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_sized_new(guint n_arcs); + +/** Resize a ::gfsmArcTable */ +GFSM_INLINE +void gfsm_arc_table_resize(gfsmArcTable *tab, guint n_arcs); + +/** Copy a ::gfsmArcTable \a src to \a dst. \returns \a dst. */ +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_copy(gfsmArcTable *dst, gfsmArcTable *src); + +/** Create and return an exact copy of a ::gfsmArcTable \a src */ +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_clone(gfsmArcTable *src); + +/** Free a ::gfsmArcTable */ +GFSM_INLINE +void gfsm_arc_table_free(gfsmArcTable *tab); + +/** Populate a :gfsmArcTable by copying arcs from \a fsm + * \param fsm source automaton + * \param tab + * arc table to populate. + * May be passed as NULL to create a new arc table. + * \returns + * \a tab if non-NULL, otherwise a new ::gfsmArcTable for \a fsm. + * \note + * Caller is responsible for freeing \a tab when it is no longer needed. + */ +gfsmArcTable *gfsm_automaton_to_arc_table(gfsmAutomaton *fsm, gfsmArcTable *tab); + +/** Sort all arcs in a ::gfsmArcTable using a user-specified comparison function */ +GFSM_INLINE +void gfsm_arc_table_sort_with_data(gfsmArcTable *tab, GCompareDataFunc compare_func, gpointer data); + +/** Sort arcs by comparison priority in a ::gfsmArcTable */ +GFSM_INLINE +void gfsm_arc_table_sort_bymask(gfsmArcTable *tab, gfsmArcCompMask m, gfsmSemiring *sr); + +/** Write the contents of a ::gfsmArcTable to a (binary) ::gfsmIOHandle. + * \param tab table to write + * \param ioh handle to which data is to be written + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_arc_table_write_bin_handle(gfsmArcTable *tab, gfsmIOHandle *ioh, gfsmError **errp); + +/** Read the contents of a ::gfsmArcTable from a (binary) ::gfsmIOHandle. + * \param tab table into which data is to be read + * \param ioh handle from which data is to be read + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_arc_table_read_bin_handle(gfsmArcTable *tab, gfsmIOHandle *ioh, gfsmError **errp); + +//@} + +/*====================================================================== + * gfsmArcTableIndex + */ +///\name gfsmArcTableIndex +//@{ + +/// Basic type for dedicated arc storage state-based arc index +typedef struct { + gfsmArcTable *tab; /**< arc table, sorted by (source,...) */ + GPtrArray *first; /**< \a first[q] is address of first element of \a arcs->data for state \a q (a ::gfsmArc*) */ +} gfsmArcTableIndex; + +/** Create and return a new (empty) ::gfsmArcTableIndex */ +GFSM_INLINE +gfsmArcTableIndex *gfsm_arc_table_index_new(void); + +/** Create and return a new (empty) ::gfsmArcTableIndex, specifying sizes */ +GFSM_INLINE +gfsmArcTableIndex *gfsm_arc_table_index_sized_new(gfsmStateId n_states, guint n_arcs); + +/** Resize a ::gfsmArcTableIndex */ +GFSM_INLINE +void gfsm_arc_table_index_resize(gfsmArcTableIndex *tab, gfsmStateId n_states, guint n_arcs); + +/** Get number of states allocated for a ::gfsmArcTableIndex */ +GFSM_INLINE +gfsmStateId gfsm_arc_table_index_n_states(gfsmArcTableIndex *tabx); + +/** Get number of arcs allocated for a ::gfsmArcTableIndex */ +GFSM_INLINE +guint gfsm_arc_table_index_n_arcs(gfsmArcTableIndex *tabx); + +/** Copy a ::gfsmArcTableIndex \a src to \a dst. \returns \a dst. */ +gfsmArcTableIndex *gfsm_arc_table_index_copy(gfsmArcTableIndex *dst, gfsmArcTableIndex *src); + +/** Create and return an exact copy of a ::gfsmArcTableIndex \a src */ +GFSM_INLINE +gfsmArcTableIndex *gfsm_arc_table_index_clone(gfsmArcTableIndex *src); + +/** Free a ::gfsmArcTableIndex */ +GFSM_INLINE +void gfsm_arc_table_index_free(gfsmArcTableIndex *tabx); + +/** Populate a ::gfsmArcTableIndex by indexing outgoing arcs from each state in \a fsm. + * \param fsm source automaton + * \param tabx + * Indexed arc table to populate. + * May be passed as NULL to create a new indexed arc table. + * \returns + * \a tabx if non-NULL, otherwise a new index for \a fsm. + * \note + * \li Caller is responsible for freeing \a tabx when it is no longer needed. + */ +gfsmArcTableIndex *gfsm_automaton_to_arc_table_index(gfsmAutomaton *fsm, gfsmArcTableIndex *tabx); + +/** Sort arcs state-wise in a ::gfsmArcTableIndex */ +void gfsm_arc_table_index_sort_with_data(gfsmArcTableIndex *tabx, GCompareDataFunc compare_func, gpointer data); + +/** Sort arcs state-wise by field priority in a ::gfsmArcTableIndex. + * Really just a wrapper for gfsm_arc_table_index_sort_with_data() + */ +GFSM_INLINE +void gfsm_arc_table_index_sort_bymask(gfsmArcTableIndex *tabx, gfsmArcCompMask m, gfsmSemiring *sr); + +/** Get number of outgoing arcs from state \a qid in \a tabx */ +GFSM_INLINE +guint gfsm_arc_table_index_out_degree(gfsmArcTableIndex *tabx, gfsmStateId qid); + +/** Write the contents of a ::gfsmArcTableIndex to a (binary) ::gfsmIOHandle. + * \param tabx index to write + * \param ioh handle to which data is to be written + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_arc_table_index_write_bin_handle(gfsmArcTableIndex *tabx, gfsmIOHandle *ioh, gfsmError **errp); + +/** Read the contents of a ::gfsmArcTableIndex from a (binary) ::gfsmIOHandle. + * \param tabx table into which data is to be read + * \param ioh handle from which data is to be read + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_arc_table_index_read_bin_handle(gfsmArcTableIndex *tabx, gfsmIOHandle *ioh, gfsmError **errp); + +//@} + +/*====================================================================== + * gfsmArcRange + */ +///\name gfsmArcRange +//@{ + +/// Type for searching and iterating over arcs in a ::gfsmArcTable +typedef struct { + gfsmArc *min; /**< First (current) arc in range */ + gfsmArc *max; /**< First arc \b not in range */ +} gfsmArcRange; + +/** Open a ::gfsmArcRange for all outgoing arcs from state \a qid in \a tabx */ +GFSM_INLINE +void gfsm_arcrange_open_table_index(gfsmArcRange *range, gfsmArcTableIndex *tabx, gfsmStateId qid); + +/** Close a ::gfsmArcRange (currently does nothing really useful) */ +GFSM_INLINE +void gfsm_arcrange_close(gfsmArcRange *range); + +/** Check validity of a ::gfsmArcRange */ +GFSM_INLINE +gboolean gfsm_arcrange_ok(gfsmArcRange *range); + +/** Get current arc from a ::gfsmArcRange, which is assumed to be valid */ +GFSM_INLINE +gfsmArc *gfsm_arcrange_arc(gfsmArcRange *range); + +/** Increment current arc of a ::gfsmArcRange */ +GFSM_INLINE +void gfsm_arcrange_next(gfsmArcRange *range); + +//@} + +/*====================================================================== + * inline definitions + */ +#ifdef GFSM_INLINE_ENABLED +# include <gfsmArcIndex.hi> +#endif + +/*====================================================================== + * END + */ +#endif /* _GFSM_ARCINDEX_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcIndex.hi b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.hi new file mode 100644 index 0000000..3cd7b49 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcIndex.hi @@ -0,0 +1,325 @@ + +/*=============================================================================*\ + * File: gfsmArcIndex.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc indices: inline definitions + * + * Copyright (c) 2006-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmAssert.h> + +/*====================================================================== + * ReverseArcIndex + */ + +//-------------------------------------------------------------- +// reverse_arc_index_new() +GFSM_INLINE +gfsmReverseArcIndex *gfsm_reverse_arc_index_new(void) +{ return g_ptr_array_new(); } + +//-------------------------------------------------------------- +// reverse_arc_index_sized_new() +GFSM_INLINE +gfsmReverseArcIndex *gfsm_reverse_arc_index_sized_new(gfsmStateId n_states) +{ return g_ptr_array_sized_new(n_states); } + +//-------------------------------------------------------------- +// automaton_to_reverse_arc_index() +//--extern + +//-------------------------------------------------------------- +// reverse_arc_index_free() +//--extern + +//@} + +/*====================================================================== + * gfsmWeightVector + */ + +//-------------------------------------------------------------- +// weight_vector_new() +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_new(void) +{ return g_array_new(FALSE,FALSE,sizeof(gfsmWeight)); } + +//-------------------------------------------------------------- +// weight_vector_sized_new() +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_sized_new(gfsmStateId n_states) +{ return g_array_sized_new(FALSE,FALSE,sizeof(gfsmWeight),n_states); } + +//-------------------------------------------------------------- +// weight_vector_copy() +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_copy(gfsmWeightVector *dst, gfsmWeightVector *src) +{ + g_array_set_size(dst, src->len); + dst->len = 0; + g_array_append_vals(dst, src->data, src->len); + return dst; +} + +//-------------------------------------------------------------- +// weight_vector_clone() +GFSM_INLINE +gfsmWeightVector *gfsm_weight_vector_clone(gfsmWeightVector *src) +{ + return gfsm_weight_vector_copy(gfsm_weight_vector_sized_new(src->len), src); +} + + +//-------------------------------------------------------------- +// weight_vector_resize() +GFSM_INLINE +void gfsm_weight_vector_resize(gfsmWeightVector *wv, gfsmStateId n_states) +{ + g_array_set_size(wv,n_states); + wv->len = n_states; +} + +//-------------------------------------------------------------- +// weight_vector_free() +GFSM_INLINE +void gfsm_weight_vector_free(gfsmWeightVector *wv) +{ g_array_free(wv,TRUE); } + +//-------------------------------------------------------------- +// automaton_weight_vector() +//-- extern + + +/*====================================================================== + * gfsmArcTable + */ + +//-------------------------------------------------------------- +// arc_table_new() +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_new(void) +{ return g_array_new(FALSE,FALSE,sizeof(gfsmArc)); } + +//-------------------------------------------------------------- +// arc_table_sized_new() +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_sized_new(guint n_arcs) +{ return g_array_sized_new(FALSE,FALSE,sizeof(gfsmArc),n_arcs); } + +//-------------------------------------------------------------- +// arc_table_resize() +GFSM_INLINE +void gfsm_arc_table_resize(gfsmArcTable *tab, guint n_arcs) +{ + g_array_set_size(tab,n_arcs); + tab->len = n_arcs; +} + +//-------------------------------------------------------------- +// arc_table_copy() +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_copy(gfsmArcTable *dst, gfsmArcTable *src) +{ + g_array_set_size(dst, src->len); + dst->len = 0; + g_array_append_vals(dst, src->data, src->len); + return dst; +} + +//-------------------------------------------------------------- +// arc_table_clone() +GFSM_INLINE +gfsmArcTable *gfsm_arc_table_clone(gfsmArcTable *src) +{ + return gfsm_arc_table_copy(gfsm_arc_table_sized_new(src->len), src); +} + +//-------------------------------------------------------------- +// arc_table_free() +GFSM_INLINE +void gfsm_arc_table_free(gfsmArcTable *tab) +{ g_array_free(tab,TRUE); } + +//-------------------------------------------------------------- +// arc_table_sort_with_data() +GFSM_INLINE +void gfsm_arc_table_sort_with_data(gfsmArcTable *tab, GCompareDataFunc compare_func, gpointer data) +{ g_array_sort_with_data(tab,compare_func,data); } + +//-------------------------------------------------------------- +// arc_table_sort_bymask() +GFSM_INLINE +void gfsm_arc_table_sort_bymask(gfsmArcTable *tab, gfsmArcCompMask m, gfsmSemiring *sr) +{ + gfsmArcCompData acdata = { m,sr,NULL,NULL }; + gfsm_arc_table_sort_with_data(tab, (GCompareDataFunc)gfsm_arc_compare_bymask, &acdata); +} + + +/*====================================================================== + * gfsmArcTableIndex + */ + +//-------------------------------------------------------------- +// arc_table_index_new() +GFSM_INLINE +gfsmArcTableIndex *gfsm_arc_table_index_new(void) +{ + gfsmArcTableIndex *tabx = g_new(gfsmArcTableIndex,1); + tabx->tab = gfsm_arc_table_new(); + tabx->first = g_ptr_array_new(); + return tabx; +} + +//-------------------------------------------------------------- +// arc_table_index_init() [UNDECLARED] +GFSM_INLINE +void gfsm_arc_table_index_init(gfsmArcTableIndex *tabx, gfsmStateId n_states, guint n_arcs) +{ + tabx->tab = gfsm_arc_table_sized_new(n_arcs); + tabx->first = g_ptr_array_sized_new(n_states+1); +} + +//-------------------------------------------------------------- +// arc_table_index_sized_new() +GFSM_INLINE +gfsmArcTableIndex *gfsm_arc_table_index_sized_new(gfsmStateId n_states, guint n_arcs) +{ + gfsmArcTableIndex *tabx = g_new(gfsmArcTableIndex,1); + gfsm_arc_table_index_init(tabx, n_states, n_arcs); + return tabx; +} + +//-------------------------------------------------------------- +// arc_table_index_resize() +GFSM_INLINE +void gfsm_arc_table_index_resize(gfsmArcTableIndex *tabx, gfsmStateId n_states, guint n_arcs) +{ + gfsm_arc_table_resize(tabx->tab, n_arcs); + g_ptr_array_set_size(tabx->first, n_states+1); + tabx->first->len = n_states+1; +} + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmStateId gfsm_arc_table_index_n_states(gfsmArcTableIndex *tabx) +{ return tabx->first->len - 1; } + +//-------------------------------------------------------------- +GFSM_INLINE +guint gfsm_arc_table_index_n_arcs(gfsmArcTableIndex *tabx) +{ return tabx->tab->len; } + + +//-------------------------------------------------------------- +// arc_table_index_clone() +GFSM_INLINE +gfsmArcTableIndex *gfsm_arc_table_index_clone(gfsmArcTableIndex *src) +{ + return gfsm_arc_table_index_copy(gfsm_arc_table_index_sized_new(gfsm_arc_table_index_n_states(src), + gfsm_arc_table_index_n_arcs(src)), + src); +} + + +//-------------------------------------------------------------- +// arc_table_index_free() +GFSM_INLINE +void gfsm_arc_table_index_free(gfsmArcTableIndex *tabx) +{ + if (!tabx) return; + if (tabx->tab) gfsm_arc_table_free(tabx->tab); + if (tabx->first) g_ptr_array_free(tabx->first, TRUE); + g_free(tabx); +} + +//-------------------------------------------------------------- +// arc_table_index_sort_bymask() +GFSM_INLINE +void gfsm_arc_table_index_sort_bymask(gfsmArcTableIndex *tabx, gfsmArcCompMask m, gfsmSemiring *sr) +{ + gfsmArcCompData acdata = { m,sr,NULL,NULL }; + gfsm_arc_table_index_sort_with_data(tabx, (GCompareDataFunc)gfsm_arc_compare_bymask, &acdata); +} + +//-------------------------------------------------------------- +// arc_table_index_out_degree() +GFSM_INLINE +guint gfsm_arc_table_index_out_degree(gfsmArcTableIndex *tabx, gfsmStateId qid) +{ + //if (qid+1 >= tabx->first->len) return 0; + gfsm_assert( (qid+1) < tabx->first->len ); + return ((gfsmArc*)g_ptr_array_index(tabx->first,qid+1)) - ((gfsmArc*)g_ptr_array_index(tabx->first,qid)); +} + + + +/*====================================================================== + * gfsmArcRange + */ + +//-------------------------------------------------------------- +// arcrange_open_table_index() +GFSM_INLINE +void gfsm_arcrange_open_table_index(gfsmArcRange *range, gfsmArcTableIndex *tabx, gfsmStateId qid) +{ + gfsm_assert(range!=NULL); + gfsm_assert(qid < tabx->first->len-1); + range->min = (gfsmArc*)g_ptr_array_index(tabx->first, qid ); + range->max = (gfsmArc*)g_ptr_array_index(tabx->first, qid+1); +} + +//-------------------------------------------------------------- +// arcrange_close() +GFSM_INLINE +void gfsm_arcrange_close(gfsmArcRange *range) +{ + gfsm_assert(range!=NULL); + range->min = range->max = NULL; +} + +//-------------------------------------------------------------- +// arcrange_ok() +GFSM_INLINE +gboolean gfsm_arcrange_ok(gfsmArcRange *range) +{ + gfsm_assert(range!=NULL); + return range->min < range->max; +} + +//-------------------------------------------------------------- +// arcrange_arc() +GFSM_INLINE +gfsmArc *gfsm_arcrange_arc(gfsmArcRange *range) +{ + gfsm_assert(range!=NULL); + return range->min; +} + +//-------------------------------------------------------------- +// arcrange_next() +GFSM_INLINE +void gfsm_arcrange_next(gfsmArcRange *range) +{ + gfsm_assert(range!=NULL); + range->min++; +} + +/*====================================================================== + * END + */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcIter.c b/gfsm/gfsm/src/libgfsm/gfsmArcIter.c new file mode 100644 index 0000000..6a6d51e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcIter.c @@ -0,0 +1,78 @@ + +/*=============================================================================*\ + * File: gfsmArcIter.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc iterators + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmArcIter.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmArcIter.hi> +#endif + +/*====================================================================== + * Methods: Arc iterators: open/close + */ + +/*====================================================================== + * Methods: Arc iterators: Accessors + */ + + +//-------------------------------------------------------------- +// seek_lower() +void gfsm_arciter_seek_lower(gfsmArcIter *aip, gfsmLabelVal lo) +{ + for ( ; gfsm_arciter_ok(aip); gfsm_arciter_next(aip)) { + if (gfsm_arciter_arc(aip)->lower == lo) break; + } +} + +//-------------------------------------------------------------- +// seek_upper() +void gfsm_arciter_seek_upper(gfsmArcIter *aip, gfsmLabelVal hi) +{ + for ( ; gfsm_arciter_ok(aip); gfsm_arciter_next(aip)) { + if (gfsm_arciter_arc(aip)->upper == hi) break; + } +} + +//-------------------------------------------------------------- +// seek_both() +void gfsm_arciter_seek_both(gfsmArcIter *aip, gfsmLabelVal lo, gfsmLabelVal hi) +{ + for ( ; gfsm_arciter_ok(aip); gfsm_arciter_next(aip)) { + gfsmArc *a = gfsm_arciter_arc(aip); + if ((lo==gfsmNoLabel || a->lower==lo) && (hi==gfsmNoLabel || a->upper==hi)) break; + } +} + + +//-------------------------------------------------------------- +// seek_user() +void gfsm_arciter_seek_user(gfsmArcIter *aip, + gfsmArcIterSeekFunc seekfunc, + gpointer data) +{ + for ( ; gfsm_arciter_ok(aip); gfsm_arciter_next(aip)) { + if ((*seekfunc)(aip,data)) break; + } +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcIter.h b/gfsm/gfsm/src/libgfsm/gfsmArcIter.h new file mode 100644 index 0000000..ff96994 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcIter.h @@ -0,0 +1,202 @@ + +/*=============================================================================*\ + * File: gfsmArcIter.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc iterators + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmArcIter.h + * \brief Iterate over outgoing arcs of an automaton state. + */ + +#ifndef _GFSM_ARCITER_H +#define _GFSM_ARCITER_H + +#include <gfsmAutomaton.h> + +/*====================================================================== + * Types: Arc iterators + */ +/// Abstract type for arc iterators +typedef struct { + gfsmAutomaton *fsm; /**< fsm holding these arcs */ + gfsmState *state; /**< state holding these arcs */ + gfsmArcList *arcs; /**< pointer to node for current arc */ +} gfsmArcIter; + +/*====================================================================== + * Methods: Arc iterators: open/close + */ +///\name Arc Iterators: Constructors etc. +//@{ +/** Open a ::gfsmArcIter \a aip for the outgoing arcs from state with ID \a qid in the automaton \a fsm. + * \param aip Pointer to the ::gfsmArcIter to be opened; assumed to be already allocated + * \param fsm Automaton containing the state whose outgoing arcs are to be opened + * \param qid ID of the state whose outgoing arcs are to be opened + * + * \note + * \li Arc iterators may be silently invalidated by destructive operations + * \li The arc iterator should be closed with gfsm_arciter_close() when it is no longer needed. + * \li Caller is responsible for allocation and freeing of \a *aip. + */ +GFSM_INLINE +void gfsm_arciter_open(gfsmArcIter *aip, gfsmAutomaton *fsm, gfsmStateId stateid); + +/** "Open" an arc iterator for the outgoing arcs from a state pointer into \a fsm + * \deprecated prefer gfsm_arciter_open() + */ +GFSM_INLINE +void gfsm_arciter_open_ptr(gfsmArcIter *aip, gfsmAutomaton *fsm, gfsmState *stateptr); + +/** Close a ::gfsmArcIter \a aip if already opened, otherwise does nothing. + * \param aip The ::gfsmArcIter to be closed. + * \note + * \li If multiple copies of a ::gfsmArcIter exist, only one needs to be closed. + * \li Currently does nothing useful; in future versions this function may + * be required to free temporary allocations, etc. + */ +GFSM_INLINE +void gfsm_arciter_close(gfsmArcIter *aip); + +/** Copy positional data from \a src to \a dst. + * \param src The ::gfsmArcIter from which to copy positional data + * \param dst The ::gfsmArcIter to which positional data is to be written + * \note + * \li Only the position pointed to should be copied by this method, + * and not the underlying data. + * \li If you use this method to copy ::gfsmArcIter positions, + * you should subsequently call gfsm_arciter_close() on only + * \e one of them! + */ +GFSM_INLINE +gfsmArcIter *gfsm_arciter_copy(gfsmArcIter *dst, const gfsmArcIter *src); + +/* Create and return a new (shallow) copy of a ::gfsmArcIter. + * \param src The ::gfsmArcIter whose positional data is to be duplicated. + * \note + * \li Only the position pointed to should be copied by this method, + * and not the underlying data. + * \li If you use this method to copy ::gfsmArcIter positions, + * you should subsequently call gfsm_arciter_close() on only + * \e one of them! + */ +GFSM_INLINE +gfsmArcIter *gfsm_arciter_clone(const gfsmArcIter *src); + +//@} + +/*====================================================================== + * Methods: Arc iterators: Accessors + */ +///\name Arc Iterators: Accessors +//@{ + +/** Check validity of a ::gfsmArcIter* \a aip. + * \param aip The ::gfsmArcIter whose status is to be queried. + * \returns a true value if \a aip is considered valid, FALSE otherwise. + */ +GFSM_INLINE +gboolean gfsm_arciter_ok(const gfsmArcIter *aip); + +/** Position the ::gfsmArcIter \a aip to the next available outgoing arc for which it was opened. + * \param aip The ::gfsmArcIter to be incremented. + */ +GFSM_INLINE +void gfsm_arciter_next(gfsmArcIter *aip); + +/** Reset an arc iterator to the first outgoing arc for which it was initially opened. + * \param aip the ::gfsmArcIter to be reset + */ +GFSM_INLINE +void gfsm_arciter_reset(gfsmArcIter *aip); + +/** Get current arc associated with a :gfsmArcIter, or NULL if none is available. + * \param aip The ::gfsmArcIter to be 'dereferenced'. + * \returns A pointer to the current ::gfsmArc 'pointed to' by \a aip, or NULL if + * no more arcs are available. + * \note + * \li In future versions, a ::gfsmAutomaton implementation will be free to return + * a dynamically generated arc here: there is no general + * guarantee that modifications to the ::gfsmArc returned by this + * function will be propagated to the underlying ::gfsmAutomaton. + * \li It is expected to remain the case that for the default automaton implementation class, + * the arcs returned by this function should be modifiable in-place. + */ +GFSM_INLINE +gfsmArc *gfsm_arciter_arc(const gfsmArcIter *aip); + +/** Remove the arc referred to by a ::gfsmArcIter \a aip from the associated ::gfsmAutomaton, + * and position \aip to the next available arc, if any. + * \param aip The ::gfsmArcIter whose 'current' arc is to be removed. + */ +GFSM_INLINE +void gfsm_arciter_remove(gfsmArcIter *aip); + + +/** Position an arc-iterator to the current or next arc with lower label \a lo. + * \param aip The ::gfsmArcIter to reposition + * \param lo Lower arc label to seek + * \note + * Currently just wraps gfsm_arciter_ok(), gfsm_arciter_next() and gfsm_arciter_arc() + * in a linear search from the current position. + */ +void gfsm_arciter_seek_lower(gfsmArcIter *aip, gfsmLabelVal lo); + +/** Position an arc-iterator to the current or next arc with upper label \a hi. + * \param aip The ::gfsmArcIter to reposition + * \param lo Upper arc label to seek + * \note + * Currently just wraps gfsm_arciter_ok(), gfsm_arciter_next() and gfsm_arciter_arc() + * in a linear search from the current position. + */ +void gfsm_arciter_seek_upper(gfsmArcIter *aip, gfsmLabelVal hi); + +/** Position an arc-iterator to the current or next arc with lower label \a lo and upper label \a hi. + * If either \a lo or \a hi is ::gfsmNoLabel, no matching will be performed on the corresponding arc label(s). + * \param aip The ::gfsmArcIter to reposition + * \param lo Lower arc label to seek, or ::gfsmNoLabel to ignore lower labels + * \param hi Upper arc label to seek, or ::gfsmNoLabel to ignore upper labels + * \note + * Default implementation wraps gfsm_arciter_ok(), gfsm_arciter_next() and gfsm_arciter_arc() + * in a linear search from the current position. + */ +void gfsm_arciter_seek_both(gfsmArcIter *aip, gfsmLabelVal lo, gfsmLabelVal hi); + +/// Typedef for user-seek functions +typedef gboolean (*gfsmArcIterSeekFunc) (gfsmArcIter *aip, gpointer data); + +/** Position an arc-iterator to the next arc for which (*seekfunc)(arciter,data) returns TRUE. + * \note + * Just wraps gfsm_arciter_ok() and gfsm_arciter_next() + * in a linear search from the current position. + */ +void gfsm_arciter_seek_user(gfsmArcIter *aip, + gfsmArcIterSeekFunc seekfunc, + gpointer data); + + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmArcIter.hi> +#endif + +#endif /* _GFSM_ARCITER_H */ + diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcIter.hi b/gfsm/gfsm/src/libgfsm/gfsmArcIter.hi new file mode 100644 index 0000000..bb8268e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcIter.hi @@ -0,0 +1,126 @@ + +/*=============================================================================*\ + * File: gfsmArcIter.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc iterators: inline definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/*====================================================================== + * Methods: Arc iterators: open/close + */ + +//-------------------------------------------------------------- +// open() +GFSM_INLINE +void gfsm_arciter_open(gfsmArcIter *aip, gfsmAutomaton *fsm, gfsmStateId stateid) +{ + aip->fsm = fsm; + aip->state = gfsm_automaton_find_state(fsm,stateid); + aip->arcs = NULL; + gfsm_arciter_reset(aip); +} + +//-------------------------------------------------------------- +// open_ptr() +GFSM_INLINE +void gfsm_arciter_open_ptr(gfsmArcIter *aip, gfsmAutomaton *fsm, gfsmState *stateptr) +{ + aip->fsm = fsm; + aip->state = stateptr; + aip->arcs = NULL; + gfsm_arciter_reset(aip); +} + +//-------------------------------------------------------------- +// reset() +GFSM_INLINE +void gfsm_arciter_reset(gfsmArcIter *aip) { + if (aip->state && gfsm_state_is_ok(aip->state)) { + aip->arcs = aip->state->arcs; + } else { + aip->arcs = NULL; + } +} + +//-------------------------------------------------------------- +// close() +GFSM_INLINE +void gfsm_arciter_close(gfsmArcIter *aip) { + if (!aip) return; + aip->fsm = NULL; + aip->state = NULL; + aip->arcs = NULL; +} + +//-------------------------------------------------------------- +// copy() +GFSM_INLINE +gfsmArcIter *gfsm_arciter_copy(gfsmArcIter *dst, const gfsmArcIter *src) { + *dst = *src; + return dst; +} + +//-------------------------------------------------------------- +// clone() +GFSM_INLINE +gfsmArcIter *gfsm_arciter_clone(const gfsmArcIter *src) { + return (gfsmArcIter*)gfsm_mem_dup_n(src,sizeof(gfsmArcIter)); +} + +/*====================================================================== + * Methods: Arc iterators: Accessors + */ + +//-------------------------------------------------------------- +// arc() +GFSM_INLINE +gfsmArc *gfsm_arciter_arc(const gfsmArcIter *aip) +{ + //return aip->arcs ? ((gfsmArc*)aip->arcs->data) : NULL; + return aip->arcs ? (&(aip->arcs->arc)) : NULL; +} + +//-------------------------------------------------------------- +// ok() +GFSM_INLINE +gboolean gfsm_arciter_ok(const gfsmArcIter *aip) +{ return (aip != NULL && aip->arcs != NULL); } + +//-------------------------------------------------------------- +// next() +GFSM_INLINE +void gfsm_arciter_next(gfsmArcIter *aip) +{ if (aip && aip->arcs) aip->arcs = aip->arcs->next; } + + +//-------------------------------------------------------------- +// seek_X() +//--EXTERN + +//-------------------------------------------------------------- +// remove() +GFSM_INLINE +void gfsm_arciter_remove(gfsmArcIter *aip) +{ + if (aip && aip->arcs) { + gfsmArcList *next = aip->arcs->next; + aip->state->arcs = gfsm_arclist_delete_node(aip->state->arcs, aip->arcs); + aip->arcs = next; + } +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcList.c b/gfsm/gfsm/src/libgfsm/gfsmArcList.c new file mode 100644 index 0000000..8a2e381 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcList.c @@ -0,0 +1,226 @@ + +/*=============================================================================*\ + * File: gfsmArclist.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc lists + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmArcList.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmArcList.hi> +#endif + +/*====================================================================== + * Methods: Arc lists + */ + +/*-------------------------------------------------------------- + * arclist_insert_node_sorted() + */ +gfsmArcList *gfsm_arclist_insert_node_sorted(gfsmArcList *al, gfsmArcList *nod, gfsmArcCompData *acdata) +{ + gfsmArcList *al_first=al; + gfsmArcList *al_prev=NULL; + + for (; al != NULL; al_prev=al, al=al->next) { + if (gfsm_arc_compare_bymask_inline(&(nod->arc), &(al->arc), acdata) <= 0) break; + } + if (al_prev == NULL) return gfsm_arclist_prepend_node(al,nod); + al_prev->next = gfsm_arclist_prepend_node(al,nod); + + return al_first; +} + +/*-------------------------------------------------------------- + * arclist_clone() + */ +gfsmArcList *gfsm_arclist_clone(gfsmArcList *src) +{ + gfsmArcList *dst = NULL, *prev=NULL; + while (src != NULL) { + gfsmArcList *nod = gfsm_arclist_new_full(src->arc.source, + src->arc.target, + src->arc.lower, + src->arc.upper, + src->arc.weight, + NULL); + if (prev==NULL) { + dst = nod; + } else { + prev->next = nod; + } + prev = nod; + src = src->next; + } + return dst; +} + +/*-------------------------------------------------------------- + * arclist_concat() + */ +gfsmArcList *gfsm_arclist_concat(gfsmArcList *al1, gfsmArcList *al2) +{ + if (al1==NULL) { return al2; } + else { + gfsmArcList *nod=al1; + while (nod->next != NULL) { nod=nod->next; } + nod->next = al2; + } + return al1; +} + +/*-------------------------------------------------------------- + * arclist_length() + */ +guint gfsm_arclist_length(gfsmArcList *al) +{ + guint len=0; + while (al != NULL) { ++len; al=al->next; } + return len; +} + +/*-------------------------------------------------------------- + * arclist_free() + */ +void gfsm_arclist_free(gfsmArcList *al) +{ + while (al != NULL) { + gfsmArcList *nxt = al->next; + g_free(al); + al = nxt; + } +} + +/*-------------------------------------------------------------- + * arclist_sort_with_data() & friends + * + adapted from code in GLib glib/gslist.c: + * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald, + * Modified by the GLib Team and others 1997-2000. + */ +static +gfsmArcList *gfsm_arclist_sort_merge (gfsmArcList *l1, + gfsmArcList *l2, + GFunc compare_func, + gpointer user_data) +{ + gfsmArcList list, *l; + gint cmp; + + l=&list; + + while (l1 && l2) + { + cmp = ((GCompareDataFunc) compare_func) (&(l1->arc), (&l2->arc), user_data); + + if (cmp <= 0) + { + l=l->next=l1; + l1=l1->next; + } + else + { + l=l->next=l2; + l2=l2->next; + } + } + l->next= l1 ? l1 : l2; + + return list.next; +} + + +gfsmArcList *gfsm_arclist_sort_real (gfsmArcList *list, + GFunc compare_func, + gpointer user_data) +{ + gfsmArcList *l1, *l2; + + if (!list) + return NULL; + if (!list->next) + return list; + + l1 = list; + l2 = list->next; + + while ((l2 = l2->next) != NULL) + { + if ((l2 = l2->next) == NULL) + break; + l1=l1->next; + } + l2 = l1->next; + l1->next = NULL; + + return gfsm_arclist_sort_merge (gfsm_arclist_sort_real (list, compare_func, user_data), + gfsm_arclist_sort_real (l2, compare_func, user_data), + compare_func, + user_data); +} + + +/*-------------------------------------------------------------- + * arclist_remove_node() + * + adapted from _g_slist_remove_link() + */ +gfsmArcList *gfsm_arclist_remove_node(gfsmArcList *al, gfsmArcList *nod) +{ + gfsmArcList *tmp; + gfsmArcList *prev; + + prev = NULL; + tmp = al; + + while (tmp) + { + if (tmp == nod) + { + if (prev) + prev->next = tmp->next; + if (al == tmp) + al = al->next; + + tmp->next = NULL; + break; + } + + prev = tmp; + tmp = tmp->next; + } + + return al; +} + + +/*-------------------------------------------------------------- + * arclist_reverse() + */ +gfsmArcList* gfsm_arclist_reverse(gfsmArcList *al) +{ + gfsmArcList *prev=NULL; + while (al) { + gfsmArcList *next = al->next; + al->next = prev; + prev = al; + al = next; + } + return prev; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcList.h b/gfsm/gfsm/src/libgfsm/gfsmArcList.h new file mode 100644 index 0000000..23ae0ad --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcList.h @@ -0,0 +1,207 @@ + +/*=============================================================================*\ + * File: gfsmArcList.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc lists + * + formerly defined in gfsmArc.h + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmArcList.h + * \brief Definitions & utilities for arc lists, <b>Deprecated</b> + * \detail + * \deprecated + * in favor of gfsm_automaton_add_arc() and ::gfsmArcIter interface + * \see gfsmAutomaton.h, gfsmArcIter.h + */ + +#ifndef _GFSM_ARC_LIST_H +#define _GFSM_ARC_LIST_H + +#include <gfsmArc.h> + +/// "Heavy" arc-list structure, no longer using GSList +typedef struct gfsmArcListNode_ { + gfsmArc arc; /**< current arc */ + struct gfsmArcListNode_ *next; /**< next node in the list */ +} gfsmArcListNode; + +/// Alias for gfsmArcListNode +typedef gfsmArcListNode gfsmArcList; + + +/*====================================================================== + * Methods: Arc List: Constructors etc. + */ +/// \name Arc List: Constructors etc. +//@{ + +/** Prepend the node \a nod to the ::gfsmArcList \a al + * \returns a pointer to the new 1st element of the arclist + * \deprecated in favor of gfsm_automaton_add_arc(), gfsm_arciter_insert() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_prepend_node(gfsmArcList *al, gfsmArcList *nod); + +/** Allocate and return a new arc-list node */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_new_full(gfsmStateId src, + gfsmStateId dst, + gfsmLabelVal lo, + gfsmLabelVal hi, + gfsmWeight wt, + gfsmArcList *nxt); + + +/** Insert an arc into a (possibly sorted) arclist. + * \param al ::gfsmArcList into which a new arc is to be inserted + * \param src source state id for the new arc + * \param dst target state id for the new arc + * \param lo lower label for the new arc + * \param hi upper label for the new arc + * \parm wt weight for the new arc + * \param acdata comparison data for 'smart' sorted insertion + * \returns a pointer to the (possibly new) 1st node of the arc list + * \deprecated in favor of gfsm_automaton_add_arc(), gfsm_arciter_insert() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_insert(gfsmArcList *al, + gfsmStateId src, + gfsmStateId dst, + gfsmLabelVal lo, + gfsmLabelVal hi, + gfsmWeight wt, + gfsmArcCompData *acdata); + + +/** Insert a single arc-link into a (possibly sorted) arclist. + * \param al arc list into which \a link is to be inserted + * \param link arc list node to insert + * \param acdata sort data for 'smart' sorted insertion + * \returns a pointer to the (possibly new) 1st element of the arclist + * \deprecated in favor of gfsm_automaton_add_arc(), gfsm_arciter_insert() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_insert_node(gfsmArcList *al, gfsmArcList *nod, gfsmArcCompData *acdata); + +/** Low-level guts for gfsm_arclist_insert(), gfsm_arclist_insert_node() */ +gfsmArcList *gfsm_arclist_insert_node_sorted(gfsmArcList *al, gfsmArcList *link, gfsmArcCompData *acdata); + +/** Create and return a (deep) copy of an existing arc-list */ +gfsmArcList *gfsm_arclist_clone(gfsmArcList *src); + + +/** Destroy an arc-list node and all subsequent nodes */ +void gfsm_arclist_free(gfsmArcList *al); + +/* Free a single node of an arc-list */ +//void gfsm_arclist_free_node(gfsmArcList *nod); + +//@} + +/*====================================================================== + * Methods: Arc List: Accessors + */ +///\name Arc List: Access & Manipulation +//@{ + +/** Get the arc pointer for an arclist -- may be \c NULL + * \deprecated in favor of ::gfsmArcIter interface + * \see gfsmArcIter.h + */ +#define gfsm_arclist_arc(al) \ + ((al) ? (&(al->arc)) : NULL) + +// ((al) ? ((gfsmArc*)((al)->data)) : NULL) + +/** Concatenate 2 arc-lists + * \param al1 initial sublist + * \param al2 final sublist + * \returns pointer to head of the concatenated list + */ +gfsmArcList *gfsm_arclist_concat(gfsmArcList *al1, gfsmArcList *al2); + +/** Splice a single node out from a ::gfsmArcList. + * \param al arc list + * \param nod node to extract + * \returns pointer to head of the new arc list, without \a nod + * \warning removed \a nod is not freed! + * \see gfsm_arclist_delte_node() + */ +gfsmArcList *gfsm_arclist_remove_node(gfsmArcList *al, gfsmArcList *nod); + +/** Remove and free a single node from a ::gfsmArcList. + * \param al arc list + * \param nod node to extract + * \returns pointer to head of the new arc list, without \a nod + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_delete_node(gfsmArcList *al, gfsmArcList *nod); + +/** Reverse a ::gfsmArcList + * \param al arc list to reverse + * \returns pointer to head of the reversed arc list + */ +gfsmArcList *gfsm_arclist_reverse(gfsmArcList *al); + +//@} + + +/*====================================================================== + * Methods: Arc List: Utilities + */ +///\name Arc List: Utilities +//@{ + +/** Get length of an arc-list \a al (linear time) */ +guint gfsm_arclist_length(gfsmArcList *al); + // Signature: <tt>guint gfsm_arclist_length(gfsmArcList *al)</tt> +//#define gfsm_arclist_length g_slist_length + +/** Sort an arclist \a al using one of the builtin sort modes as specified by \a acdata. + * \param al arc list to sort + * \param acdata sort data for builtin comparison + * \returns pointer to the new head of the sorted arc list + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_sort(gfsmArcList *al, gfsmArcCompData *acdata); + +/** Sort an arclist \a al using a user-defined arc comparison function. + * \param al arc list to sort + * \param cmpfunc 3-way comparison function on ::gfsmArc* for sorting + * \param data additional data for \a cmpfunc + * \returns pointer to the new head of the sorted arc list + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_sort_with_data(gfsmArcList *al, GCompareDataFunc cmpfunc, gpointer data); + +/** Alias for gfsm_arclist_sort_with_data() */ +#define gfsm_arclist_sort_full gfsm_arclist_sort_with_data + +/** low-level guts for gfsm_arclist_sort() */ +gfsmArcList *gfsm_arclist_sort_real (gfsmArcList *list, GFunc compare_func, gpointer user_data); + + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmArcList.hi> +#endif + +#endif /* _GFSM_ARC_LIST_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmArcList.hi b/gfsm/gfsm/src/libgfsm/gfsmArcList.hi new file mode 100644 index 0000000..9426f26 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArcList.hi @@ -0,0 +1,124 @@ + +/*=============================================================================*\ + * File: gfsmArclist.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc lists: inline definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <stdlib.h> + +/*====================================================================== + * Methods: Arc lists + */ + +/*-------------------------------------------------------------- + * arclist_prepend_node() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_prepend_node(gfsmArcList *al, gfsmArcList *nod) +{ + nod->next = al; + return nod; +} + +/*-------------------------------------------------------------- + * arclist_new_full() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_new_full(gfsmStateId src, + gfsmStateId dst, + gfsmLabelVal lo, + gfsmLabelVal hi, + gfsmWeight wt, + gfsmArcList *nxt) +{ + gfsmArcList *nod = g_new(gfsmArcList,1); + nod->arc.source = src; + nod->arc.target = dst; + nod->arc.lower = lo; + nod->arc.upper = hi; + nod->arc.weight = wt; + nod->next = nxt; + return nod; +} + +/*-------------------------------------------------------------- + * arclist_delete_node() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_delete_node(gfsmArcList *al, gfsmArcList *nod) +{ + al = gfsm_arclist_remove_node(al,nod); + g_free(nod); + return al; +} + +/*-------------------------------------------------------------- + * arclist_insert() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_insert(gfsmArcList *al, + gfsmStateId src, + gfsmStateId dst, + gfsmLabelVal lo, + gfsmLabelVal hi, + gfsmWeight wt, + gfsmArcCompData *acdata) +{ + gfsmArcList *nod = gfsm_arclist_new_full(src,dst,lo,hi,wt,NULL); + if (!acdata || acdata->mask == gfsmASMNone) { + nod->next = al; + return nod; + } + return gfsm_arclist_insert_node_sorted(al,nod,acdata); +} + +/*-------------------------------------------------------------- + * arclist_insert_node() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_insert_node(gfsmArcList *al, + gfsmArcList *nod, + gfsmArcCompData *acdata) +{ + if (!acdata || acdata->mask == gfsmASMNone) return gfsm_arclist_prepend_node(al,nod); + return gfsm_arclist_insert_node_sorted(al,nod,acdata); +} + +/*-------------------------------------------------------------- + * arclist_sort_with_data() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_sort_with_data (gfsmArcList *al, + GCompareDataFunc compare_func, + gpointer user_data) +{ + return gfsm_arclist_sort_real (al, (GFunc)compare_func, user_data); +} + + + +/*-------------------------------------------------------------- + * arclist_sort() + */ +GFSM_INLINE +gfsmArcList *gfsm_arclist_sort(gfsmArcList *al, gfsmArcCompData *acdata) +{ + return gfsm_arclist_sort_real(al, (GFunc)gfsm_arc_compare_bymask, acdata); +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmArith.c b/gfsm/gfsm/src/libgfsm/gfsmArith.c new file mode 100644 index 0000000..f9ccf11 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArith.c @@ -0,0 +1,199 @@ +/*=============================================================================*\ + * File: gfsmArith.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <math.h> +#include <glib.h> +#include <gfsmArith.h> +#include <gfsmArcIter.h> + + +/*====================================================================== + * Methods: arithmetic: Generic + */ + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_automaton_arith(gfsmAutomaton *fsm, + gfsmArithOp op, + gfsmWeight arg, + gfsmLabelVal lo, + gfsmLabelVal hi, + gboolean do_arcs, + gboolean do_final, + gboolean do_zero) +{ + if (op == gfsmAONone) return fsm; //-- dummy operation + + //-- arc weights + if (do_arcs) { + gfsmStateId qid; + + if (fsm->flags.sort_mode == gfsmASMWeight) + fsm->flags.sort_mode = gfsmASMNone; //-- arc-weights may be destructively altered + + for (qid=0; qid < fsm->states->len; qid++) { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai,fsm,qid), gfsm_arciter_seek_both(&ai,lo,hi); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_both(&ai,lo,hi)) + { + gfsmArc *arc = gfsm_arciter_arc(&ai); + arc->weight = gfsm_weight_arith(fsm->sr, op, arc->weight, arg, do_zero); + } + } + } + + //-- final weights + if (do_final) gfsm_automaton_arith_final(fsm, op, arg, do_zero); + + return fsm; +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_automaton_arith_state(gfsmAutomaton *fsm, + gfsmStateId qid, + gfsmArithOp op, + gfsmWeight arg, + gfsmLabelVal lo, + gfsmLabelVal hi, + gboolean do_arcs, + gboolean do_final, + gboolean do_zero) +{ + if (qid==gfsmNoState) return gfsm_automaton_arith(fsm,op,arg,lo,hi,do_arcs,do_final,do_zero); + + //-- arc weights + if (do_arcs) { + gfsmArcIter ai; + + if (fsm->flags.sort_mode == gfsmASMWeight) + fsm->flags.sort_mode = gfsmASMNone; //-- arc-weights may be destructively altered + + for (gfsm_arciter_open(&ai,fsm,qid), gfsm_arciter_seek_both(&ai,lo,hi); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_both(&ai,lo,hi)) + { + gfsmArc *arc = gfsm_arciter_arc(&ai); + arc->weight = gfsm_weight_arith(fsm->sr, op, arc->weight, arg, do_zero); + } + } + + //-- final weight + if (do_final) { + gfsm_automaton_set_final_state_full(fsm, + qid, + gfsm_automaton_is_final_state(fsm, qid), + gfsm_weight_arith(fsm->sr, + op, + gfsm_automaton_get_final_weight(fsm, qid), + arg, + do_zero)); + } + + return fsm; +} + +//-------------------------------------------------------------- +gfsmWeight gfsm_weight_arith(gfsmSemiring *sr, + gfsmArithOp op, + gfsmWeight w1, + gfsmWeight w2, + gboolean do_zero) +{ + if (!do_zero && w1==sr->zero) return w1; + + switch (op) { + + case gfsmAOExp: ///< Exponentiate + return expf(w1); + break; + + case gfsmAOLog: ///< Logarithm + return logf(w1); + break; + + case gfsmAONoNeg: ///< Real force-positive + return (w1 < 0 ? (-w1) : w1); + break; + + case gfsmAOAdd: ///< Real Addition + return w1+w2; + break; + + case gfsmAOMult: ///< Real Multiplication + return w1*w2; + break; + + case gfsmAOSRNoNeg: ///< Semiring Force positive + return (gfsm_sr_less(sr,sr->zero,w1) ? sr->zero : w1); + break; + + case gfsmAOSRPlus: ///< Semiring Addition + return gfsm_sr_plus(sr,w1,w2); + break; + + case gfsmAOSRTimes: ///< Semiring Multiplication + return gfsm_sr_times(sr,w1,w2); + break; + + case gfsmAONone: ///< No operation + default: + return w1; + } + return w1; //-- should never happen +} + + + +/*====================================================================== + * Methods: arithmetic: final + */ + +//-------------------------------------------------------------- +gboolean _gfsm_automaton_arith_final_foreach_func(gfsmStateId id, + gpointer pw, + gfsmArithParams *params) +{ + gfsmWeight w = gfsm_ptr2weight(pw); + gfsm_weightmap_insert(params->fsm->finals, + GUINT_TO_POINTER(id), + gfsm_weight_arith(params->fsm->sr, + params->op, + w, + params->arg, + params->do_zero)); + return FALSE; +} + + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_automaton_arith_final(gfsmAutomaton *fsm, + gfsmArithOp op, + gfsmWeight arg, + gboolean do_zero) +{ + gfsmArithParams params = { fsm, op, arg, do_zero }; + g_tree_foreach(fsm->finals, + (GTraverseFunc)_gfsm_automaton_arith_final_foreach_func, + ¶ms); + return fsm; +} + diff --git a/gfsm/gfsm/src/libgfsm/gfsmArith.h b/gfsm/gfsm/src/libgfsm/gfsmArith.h new file mode 100644 index 0000000..c9ee40b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmArith.h @@ -0,0 +1,137 @@ + +/*=============================================================================*\ + * File: gfsmArith.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#ifndef _GFSM_ARITH_H +#define _GFSM_ARITH_H + +#include <gfsmAutomaton.h> + +/** \file gfsmArith.h + * \brief Arithmetic operations on automata + */ + +/*====================================================================== + * Types + */ +/** Type representing an elementary arithmetic operation */ +typedef enum { + //-- real ops + gfsmAONone, ///< No operation: \a w=w + gfsmAOExp, ///< Real Exponentiation: \a w=exp(w) + gfsmAOLog, ///< Real Logarithm: \a w=log(w) + gfsmAONoNeg, ///< Real Force-positive: \a w=(w < 0 ? -w : w) + gfsmAOAdd, ///< Real Addition: \a w=w+arg + gfsmAOMult, ///< Real Multiplication: \a w=w*arg + //-- semiring ops + gfsmAOSRNoNeg, ///< Semiring Force-positve: \a w=(sr_less(sr_zero,w) ? sr_zero : w) + gfsmAOSRPlus, ///< Semiring Addition: \a w=sr_plus(w,arg) + gfsmAOSRTimes ///< Semiring Multiplication: \a w=sr_times(w,arg) +} gfsmArithOp; + +/** \brief Type representing all parameters for a generic automaton arithmetic operation */ +typedef struct { + gfsmAutomaton *fsm; ///< Automaton + gfsmArithOp op; ///< Operation + gfsmWeight arg; ///< 2nd operation argument (if any) + gboolean do_zero; ///< operate on semiring-zeroes? +} gfsmArithParams; + + +/*====================================================================== + * Methods: arithmetic: generic + */ +///\name Arithmetic (Generic) +//@{ + +//------------------------------ +/** Perform a generic arithmetic operation on \a fsm. + * \note Destructively alters \a fsm. + * + * \param fsm Automaton + * \param op Operation + * \parma arg Argument of operation (if any) + * \param lo Perform only for arcs with lower label \a lo (gfsmNoLabel for any label) + * \param hi Perform only for arcs with upper label \a hi (gfsmNoLabel for any label) + * \param do_arcs Perform operation on arc weights + * \param do_final Perform operation on final weights + * \param do_zero Perform operation on zero weights + * + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_arith(gfsmAutomaton *fsm, + gfsmArithOp op, + gfsmWeight arg, + gfsmLabelVal lo, + gfsmLabelVal hi, + gboolean do_arcs, + gboolean do_final, + gboolean do_zero); + +//------------------------------ +/** Perform a generic arithmetic operation on state \a qid in \a fsm. + * \note Destructively alters \a fsm. + * + * \param fsm Automaton + * \param qid State-id in \a fsm, or gfsmNoState for all states + * \param op Operation + * \parma arg Argument of operation (if any) + * \param lo Perform only for arcs with lower label \a lo (gfsmNoLabel for any label) + * \param hi Perform only for arcs with upper label \a hi (gfsmNoLabel for any label) + * \param do_arcs Perform operation on arc weights + * \param do_final Perform operation on final weights + * \param do_zero Perform operation on zero weights + * + * \returns modified \a fsm + */ +gfsmAutomaton *gfsm_automaton_arith_state(gfsmAutomaton *fsm, + gfsmStateId qid, + gfsmArithOp op, + gfsmWeight arg, + gfsmLabelVal lo, + gfsmLabelVal hi, + gboolean do_arcs, + gboolean do_final, + gboolean do_zero); + + +//------------------------------ +/** Perform a generic arithmetic operation on final weights + * \returns params->fsm + */ +gfsmAutomaton *gfsm_automaton_arith_final(gfsmAutomaton *fsm, + gfsmArithOp op, + gfsmWeight arg, + gboolean do_zero); + +//------------------------------ +/** Perform a generic arithmetic operation on a gfsmWeight. + * \returns result of operation + */ +gfsmWeight gfsm_weight_arith(gfsmSemiring *sr, + gfsmArithOp op, + gfsmWeight w1, + gfsmWeight w2, + gboolean do_zero); + +//@} +#endif /* _GFSM_ARITH_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmAssert.h b/gfsm/gfsm/src/libgfsm/gfsmAssert.h new file mode 100644 index 0000000..baae9d4 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAssert.h @@ -0,0 +1,51 @@ + +/*=============================================================================*\ + * File: gfsmAssert.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: assertions + * + * Copyright (c) 2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmAssert.h + * \brief Assertions + */ + +#ifndef _GFSM_ASSERT_H +#define _GFSM_ASSERT_H + +#include <glib.h> + +/** + * \def gfsm_assert_not_reached() + * \detail If ever actually evaluated, aborts with an error message. + * Only available if libgfsm was configured with debugging enabled. + * + * \def gfsm_assert(expr) + * \detail If \a expr does not evaluate to a true value, aborts with an error message. + * Only available if libgfsm was configured with debugging enabled. + */ + +#ifdef GFSM_DEBUG_ENABLED +# define gfsm_assert_not_reached() g_assert_not_reached() +# define gfsm_assert(expr) g_assert(expr) +#else +# define gfsm_assert_not_reached() +# define gfsm_assert(expr) +#endif + +#endif /* _GFSM_ASSERT_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmAutomaton.c b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.c new file mode 100644 index 0000000..17f7a37 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.c @@ -0,0 +1,338 @@ +/*=============================================================================*\ + * File: gfsmAutomaton.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: automata + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmAutomaton.h> +#include <gfsmArcIter.h> +#include <gfsmUtils.h> +#include <gfsmBitVector.h> +#include <stdlib.h> + +#ifndef GFSM_INLINE_ENABLED +# include <gfsmAutomaton.hi> +#endif + +/*====================================================================== + * Constants + */ +const gfsmStateId gfsmAutomatonDefaultSize = 128; + +const gfsmAutomatonFlags gfsmAutomatonDefaultFlags = + { + TRUE, //-- is_transducer:1 + TRUE, //-- is_weighted:1 + //0, //-- sort_mode_old__:4 + FALSE, //-- is_deterministic:1 + 0, //-- unused:29 (was: 25) + gfsmASMNone //-- sort_mode + }; + +//const gfsmSRType gfsmAutomatonDefaultSRType = gfsmSRTReal; +const gfsmSRType gfsmAutomatonDefaultSRType = gfsmSRTTropical; + +//====================================================================== +// API: Constructors etc. + +/*-------------------------------------------------------------- + * copy() + */ +gfsmAutomaton *gfsm_automaton_copy(gfsmAutomaton *dst, gfsmAutomaton *src) +{ + gfsmStateId qid; + gfsm_automaton_clear(dst); + gfsm_automaton_copy_shallow(dst,src); + dst->root_id = src->root_id; //-- since copy_shallow() no longer does this! + gfsm_automaton_reserve(dst,src->states->len); + gfsm_weightmap_copy(dst->finals, src->finals); + // + for (qid=0; qid < src->states->len; qid++) { + const gfsmState *src_s = gfsm_automaton_find_state_const(src,qid); + gfsmState *dst_s = gfsm_automaton_find_state(dst,qid); + gfsm_state_copy(dst_s, src_s); + } + return dst; +} + +/*-------------------------------------------------------------- + * clear() + */ +void gfsm_automaton_clear(gfsmAutomaton *fsm) +{ + gfsmStateId i; + if (!fsm) return; + for (i=0; fsm->states && i < fsm->states->len; i++) { + gfsmState *st = gfsm_automaton_find_state(fsm,i); + if (!st || !st->is_valid) continue; + gfsm_state_clear(st); + } + if (fsm->states) g_array_set_size(fsm->states,0); + if (fsm->finals) gfsm_set_clear(fsm->finals); + fsm->root_id = gfsmNoState; + return; +} + + +//====================================================================== +// API: Automaton Semiring + +//====================================================================== +// API: Automaton Properties + +/*-------------------------------------------------------------- + * n_arcs_full() + */ +guint gfsm_automaton_n_arcs_full(gfsmAutomaton *fsm, + guint *n_lo_epsilon, + guint *n_hi_epsilon, + guint *n_both_epsilon) +{ + guint i, total=0; + guint n_lo_eps=0, n_hi_eps=0, n_both_eps=0; + gfsmStateId n_states = gfsm_automaton_n_states(fsm); + + for (i=0; i < n_states; i++) { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai, fsm, i); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + ++total; + if (a->lower==gfsmEpsilon) { + ++n_lo_eps; + if (a->upper==gfsmEpsilon) { + ++n_hi_eps; + ++n_both_eps; + } + } + else if (a->upper==gfsmEpsilon) { + ++n_hi_eps; + } + } + gfsm_arciter_close(&ai); + } + if (n_lo_epsilon) *n_lo_epsilon = n_lo_eps; + if (n_hi_epsilon) *n_hi_epsilon = n_hi_eps; + if (n_both_epsilon) *n_both_epsilon = n_both_eps; + return total; +} + + +/*-------------------------------------------------------------- + * is_cyclic_state() + */ +gboolean gfsm_automaton_is_cyclic_state(gfsmAutomaton *fsm, + gfsmStateId id, + gfsmBitVector *visited, + gfsmBitVector *completed) +{ + gfsmState *s; + gfsmArcIter ai; + // + if (gfsm_bitvector_get(visited,id)) { + if (gfsm_bitvector_get(completed,id)) return FALSE; + return TRUE; + } + // + s = gfsm_automaton_find_state(fsm,id); + if (!s || !s->is_valid) return FALSE; //-- invalid states don't count as cyclic + // + //-- mark node as visited (& not completed) + gfsm_bitvector_set(visited,id,1); + gfsm_bitvector_set(completed,id,0); + // + //-- visit outgoing arcs + for (gfsm_arciter_open_ptr(&ai,fsm,s); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + if (gfsm_automaton_is_cyclic_state(fsm, gfsm_arciter_arc(&ai)->target, visited, completed)) { + gfsm_arciter_close(&ai); + return TRUE; + } + } + gfsm_arciter_close(&ai); + // + //-- mark node as completed + gfsm_bitvector_set(completed,id,1); + // + //-- finished traversal; this state isn't cyclic + return FALSE; +} + +/*-------------------------------------------------------------- + * is_cyclic() + */ +gboolean gfsm_automaton_is_cyclic(gfsmAutomaton *fsm) +{ + gfsmBitVector *visited; //-- records which states we've visited + gfsmBitVector *completed; //-- records which states we've completed + gboolean rc; //-- return value + + if (fsm->root_id==gfsmNoState || fsm->states->len==0) return FALSE; //-- sanity check(s) + + visited = gfsm_bitvector_sized_new(fsm->states->len); + completed = gfsm_bitvector_sized_new(fsm->states->len); + rc = gfsm_automaton_is_cyclic_state(fsm, fsm->root_id, visited, completed); + + //-- cleanup + gfsm_bitvector_free(visited); + gfsm_bitvector_free(completed); + + return rc; +} + + +/*-------------------------------------------------------------- + * get_alphabet() + */ +gfsmAlphabet *gfsm_automaton_get_alphabet(gfsmAutomaton *fsm, gfsmLabelSide which, gfsmAlphabet *alph) +{ + gfsmStateId id; + //-- ensure alphabet + if (!alph) alph = gfsm_range_alphabet_new(); + + for (id=0; id < fsm->states->len; id++) { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai,fsm,id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + + if (which != gfsmLSUpper) + gfsm_alphabet_insert(alph, GUINT_TO_POINTER((guint)(a->lower)), a->lower); + + if (which != gfsmLSLower) + gfsm_alphabet_insert(alph, GUINT_TO_POINTER((guint)(a->upper)), a->upper); + } + gfsm_arciter_close(&ai); + } + return alph; +} + +/*====================================================================== + * Methods: Accessors: Automaton States + */ + +/*-------------------------------------------------------------- + * renumber_states() + */ +void gfsm_automaton_renumber_states(gfsmAutomaton *fsm) +{ + gfsmStateId oldid, newid; + GArray *old2new = NULL; + + //-- always set root state to zero -- even add one + if (fsm->root_id == gfsmNoState) fsm->root_id = gfsm_automaton_add_state(fsm); + + //-- get old-to-new id map + old2new = g_array_sized_new(FALSE,FALSE,sizeof(gfsmStateId),fsm->states->len); + g_array_index(old2new,gfsmStateId,fsm->root_id) = 0; + for (oldid=0, newid=0; oldid < fsm->states->len; oldid++) { + if (oldid==fsm->root_id) continue; + if (gfsm_automaton_has_state(fsm,oldid)) { + g_array_index(old2new,gfsmStateId,oldid) = ++newid; + } else { + g_array_index(old2new,gfsmStateId,oldid) = gfsmNoState; + } + } + + //-- perform actual renumbering + gfsm_automaton_renumber_states_full(fsm, old2new, newid+1); + + //-- cleanup + g_array_free(old2new,TRUE); +} + +/*-------------------------------------------------------------- + * renumber_states_full() + */ +void gfsm_automaton_renumber_states_full(gfsmAutomaton *fsm, GArray *old2new, gfsmStateId n_new_states) +{ + gfsmStateId oldid, newid; + gfsmState *s_old, *s_new; + gfsmWeightMap *new_finals = gfsm_weightmap_new(gfsm_uint_compare); + GArray *new_states = NULL; + + //-- get new number of states + if (n_new_states==0) { + for (oldid=0; oldid < fsm->states->len; oldid++) { + if (!gfsm_automaton_has_state(fsm,oldid)) continue; + newid = g_array_index(old2new,gfsmStateId,oldid); + if (newid != gfsmNoState && newid >= n_new_states) { n_new_states=newid+1; } + } + } + + //-- allocate new state-vector + new_states = g_array_sized_new(FALSE,TRUE,sizeof(gfsmState),n_new_states); + + //-- renumber states + for (oldid=0; oldid < fsm->states->len; oldid++) { + gfsmArcIter ai; + newid = g_array_index(old2new,gfsmStateId,oldid); + + if (newid==gfsmNoState || !gfsm_automaton_has_state(fsm,oldid)) continue; //-- ignore bad states + + //-- copy state data + s_old = gfsm_automaton_find_state(fsm, oldid); + s_new = &(g_array_index(new_states,gfsmState,newid)); + *s_new = *s_old; + + //-- check for final state + if (s_new->is_final) { + gfsmWeight fw =0; //-- hack to convince gcc not to complain about unitialized fw + gfsm_weightmap_lookup(fsm->finals, GUINT_TO_POINTER(oldid), &fw); + gfsm_weightmap_insert(new_finals, GUINT_TO_POINTER(newid), fw); + } + + //-- renumber sources & targets of outgoing arcs + for (gfsm_arciter_open_ptr(&ai, fsm, s_new); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + a->source = newid; + a->target = g_array_index(old2new,gfsmStateId,a->target); + } + gfsm_arciter_close(&ai); + } + + //-- set new root-id + fsm->root_id = g_array_index(old2new,gfsmStateId,fsm->root_id); + + //-- set new final weights + gfsm_weightmap_free(fsm->finals); + fsm->finals = new_finals; + + //-- set new state vector + g_array_free(fsm->states,TRUE); + fsm->states = new_states; + fsm->states->len = n_new_states; +} + + +/*====================================================================== + * Methods: Accessors: Automaton Arcs + */ + +/*-------------------------------------------------------------- + * arcsort_full() + */ +void gfsm_automaton_arcsort_full(gfsmAutomaton *fsm, GCompareDataFunc cmpfunc, gpointer data) +{ + gfsmStateId qid; + for (qid=0; qid < fsm->states->len; qid++) { + gfsmState *qp = gfsm_automaton_find_state(fsm,qid); + if (!qp || !qp->is_valid) continue; + qp->arcs = gfsm_arclist_sort_full(qp->arcs, cmpfunc, data); + } + fsm->flags.sort_mode = gfsmACUser; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmAutomaton.h b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.h new file mode 100644 index 0000000..6cb4927 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.h @@ -0,0 +1,557 @@ + +/*=============================================================================*\ + * File: gfsmAutomaton.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: automata + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmAutomaton.h + * \brief Automaton definitions and low-level access + */ + +#ifndef _GFSM_AUTOMATON_H +#define _GFSM_AUTOMATON_H + +#include <gfsmAlphabet.h> +#include <gfsmState.h> +#include <gfsmWeightMap.h> +#include <gfsmBitVector.h> + +/*====================================================================== + * Types + */ + +/** \brief Automaton status flags + * \detail + * \todo convert ::gfsmAutomatonFlags flags to 3-valued domain: unknown,true,false + * \todo add better checking & access for automaton status flags + */ +typedef struct { + guint32 is_transducer : 1; /**< whether this automaton is a transducer */ + guint32 is_weighted : 1; /**< whether this automaton is weighted */ + guint32 is_deterministic : 1; /**< whether fsm is known to be deterministic */ + guint32 sort_mode : 24; /**< new-style sort mode (a ::gfsmArcCompMask) */ + guint32 unused : 5; /**< reserved */ +} gfsmAutomatonFlags; + +/** \brief "Heavy" automaton type + * + * All automata are stored as weighted transducers. + */ +typedef struct { + //-- basic data + gfsmAutomatonFlags flags; /**< automaton flags */ + gfsmSemiring *sr; /**< semiring used for arc weight computations */ + GArray *states; /**< vector of automaton states */ + gfsmWeightMap *finals; /**< map from final state-Ids to final weights */ + gfsmStateId root_id; /**< ID of root node, or gfsmNoState if not defined */ +} gfsmAutomaton; + +/*====================================================================== + * Constants + */ +/** Default initial automaton size (number of states) */ +extern const gfsmStateId gfsmAutomatonDefaultSize; + +/** Default initial automaton flags */ +extern const gfsmAutomatonFlags gfsmAutomatonDefaultFlags; + +/** Default semiring for automaton arc weights */ +extern const gfsmSRType gfsmAutomatonDefaultSRType; + +/*======================================================================*/ +/// \name API: Constructors etc. +//@{ + +/** Create a new ::gfsmAutomaton, preallocating \a n_states states */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_new_full(gfsmAutomatonFlags flags, gfsmSRType srtype, gfsmStateId n_states); + +/** Create and return a new ::gfsmAutomaton, using default flags, semiring type and size */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_new(void); + +/** Create a new gfsmAutomaton as a deep exact copy of \a fsm. + * \param fsm automaton to be cloned + * \returns new deep copy of \a src + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_clone(gfsmAutomaton *fsm); + +/** Assign non-structural contents (flags, semiring) of \a src to \a dst, + * without altering \a dst's topology. + * \param dst target automaton + * \param src source automaton + * \returns modified \a dst + * \warning Earlier versions of this function also set the root state of \a dst + * to that of \a src, but you should no longer rely on this being the case! + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_copy_shallow(gfsmAutomaton *dst, gfsmAutomaton *src); + +/** Assign the contents of fsm \a src to fsm \a dst \returns \a dst */ +gfsmAutomaton *gfsm_automaton_copy(gfsmAutomaton *dst, gfsmAutomaton *src); + +/** Create a new ::gfsmAutomaton whose non-structural contents match those of \a fsm. + * \param fsm source automaton + * \returns new automaton whose non-structural fields match those of \a fsm + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_shadow(gfsmAutomaton *fsm); + +/** Swap the contents of automata \a fsm1 and \a fsm2 */ +GFSM_INLINE +void gfsm_automaton_swap(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2); + +/** Clear an automaton */ +void gfsm_automaton_clear(gfsmAutomaton *fsm); + +/** Destroy an automaton: all associated states and arcs will be freed. */ +GFSM_INLINE +void gfsm_automaton_free(gfsmAutomaton *fsm); +//@} + + +/*======================================================================*/ +/// \name API: Automaton Semiring +//@{ + +/** Get pointer to the semiring associated with this automaton */ +GFSM_INLINE +gfsmSemiring *gfsm_automaton_get_semiring(gfsmAutomaton *fsm); + +/** Set the semiring associated with this automaton + * \param fsm automaton to modify + * \param sr semiring to be copied into \a fsm->sr + * \returns pointer to the (new) semiring for \a fsm + * \note + * \li Implicitly frees the semiring previously associated with \a fsm, if any. + * \warning + * Prior to libgfsm-v0.0.9 this function returned the parameter \a sr itself + */ +GFSM_INLINE +gfsmSemiring *gfsm_automaton_set_semiring(gfsmAutomaton *fsm, gfsmSemiring *sr); + +/** Set the semiring associated with this automaton by type. + * \param fsm automaton whose semiring is to be set + * \param srtype type of new semiring + * \note + * \li If \a fsm's semiring is already of type \a srtype, this function does nothing. + * \li If \a srtype is ::gfsmSRTUser, \a fsm's new semiring will be unitialized + * \li Implicitly frees the semiring previously associated with \a fsm, if any. + */ +GFSM_INLINE +void gfsm_automaton_set_semiring_type(gfsmAutomaton *fsm, gfsmSRType srtype); + +//@} + +/*======================================================================*/ +///\name API: Automaton Structure +//@{ + +/** Reserve space for at least \a n_states states (may do nothing) + * \param fsm automaton to modify + * \param n_states number of states to reserve, if supported by implementation + */ +GFSM_INLINE +void gfsm_automaton_reserve_states(gfsmAutomaton *fsm, gfsmStateId n_states); + +/** Backwards-compatible alias for gfsm_automaton_reserve_states() + * \deprecated in favor of gfsm_automaton_reserve_states() + */ +#define gfsm_automaton_reserve(fsm,n_states) gfsm_automaton_reserve_states((fsm),(n_states)) + + +/** Reserve space for at least \a n_arcs arcs + * \param fsm automaton to modify + * \param n_arcs number of arcs to reserve + * \note + * Currently does nothing. + */ +GFSM_INLINE +void gfsm_automaton_reserve_arcs(gfsmAutomaton *fsm, guint n_arcs); + +/** Get the number of states in an automaton (modulo 'gaps' in state ID numbering). + * \param fsm automaton to examine + * \returns + * The least ::gfsmStateId \b q such that for all <b>r >= q</b>, + * \a fsm has no state with ID \b r. + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_n_states(gfsmAutomaton *fsm); + +/** Get number of final states in \a fsm */ +GFSM_INLINE +gfsmStateId gfsm_automaton_n_final_states(gfsmAutomaton *fsm); + +/** Get total number of arcs in \a fsm. + * \note + * Currently just a wrapper for gfsm_automaton_n_arcs_full(), so time is <em>O(n_arcs)</em> + */ +GFSM_INLINE +guint gfsm_automaton_n_arcs(gfsmAutomaton *fsm); + +/** Get ID of root state, or ::gfsmNoState if undefined */ +GFSM_INLINE +gfsmStateId gfsm_automaton_get_root(gfsmAutomaton *fsm); + +/** Set ID of root state, creating state if necessary. + * \param fsm automaton whose root state is to be set + * \param new_root_id ID of new root state + * \li If \a new_root_id is ::gfsmNoState, \a fsm is marked as 'unrooted' + * \li otherwise, a new state with ID \a new_root_id is implicitly created if none already existed + */ +GFSM_INLINE +void gfsm_automaton_set_root(gfsmAutomaton *fsm, gfsmStateId new_root_id); + +/** Call a user-defined function \a func for each final state of \a fsm. + * \param fsm automaton whose final states are to be traversed + * \param func \c GTraverseFunc for final states, as for \c g_tree_foreach() + * \param data user data for \a func + * \warning + * \a func may \e not directly alter any final weights of \a fsm. + * See GLib documentaion of g_tree_foreach() for details and a workaround. + * \note + * \a func will be called as <tt>(*func)(gpointer final_stateid, gpointer final_weight, gpointer data)</tt>; + * that is, both the ::gfsmStateId \a final_stateid and the final weight \a final_weight will be encoded + * as (gpointers). They can be decoded with GPOINTER_TO_UINT() and gfsm_ptr2weight(), respectively, e.g. +\code +gboolean my_final_func(gpointer id_p, gpointer fw_p, gpointer data) { + gfsmStateId final_id = GPOINTER_TO_UINT(id_p); //-- decode state id + gfsmWeight final_weight = gfsm_ptr2weight(fw_p); //-- decode final weight + do_something_interesting(); //-- ... whatever ... + return FALSE; //-- continue traversal +} +\endcode +* \see gfsm_automaton_finals_to_array() + */ +GFSM_INLINE +void gfsm_automaton_finals_foreach(gfsmAutomaton *fsm, GTraverseFunc func, gpointer data); + +/** Get a GArray of ::gfsmStateWeightPair values for final states of \a fsm. + * \param fsm automaton from which to extract final states + * \param array array to be populated, or NULL to allocate a new array + * \returns new \a array, or a newly allocated ::gfsmStateWeightPairArray + * \note + * Caller is responsible for freeing the array returned when it is no longer needed. + */ +GFSM_INLINE +gfsmStateWeightPairArray* gfsm_automaton_finals_to_array(gfsmAutomaton *fsm, gfsmStateWeightPairArray *array); + +//@} + +/*======================================================================*/ +/** \name API: Automaton Properties + * Currently quite sketchy; better tracking and checking of automaton flags should be implemented. + */ +//@{ + +/** True iff automaton is a transducer */ +#define gfsm_automaton_is_transducer(fsm) ((fsm)->flags.is_transducer) + +/** True iff automaton is weighted */ +#define gfsm_automaton_is_weighted(fsm) ((fsm)->flags.is_weighted) + +/** Get current automaton arc-sort mode (primary sort) */ +#define gfsm_automaton_sortmode(fsm) \ + ((gfsmArcSortMode)(gfsm_acmask_nth((fsm)->flags.sort_mode,0))) + +// ((gfsmArcSortMode)((fsm)->flags.sort_mode)) + +/** Get verbose summary arc information (linear time w/ number of arcs) + * \param[in] fsm automaton to examine + * \param[out] n_lo_epsilon on return holds number of arcs with lower label ::gfsmEpsilon, or NULL + * \param[out] n_hi_epsilon on return holds number of arcs with upper label ::gfsmEpsilon, or NULL + * \param[out] n_both_epsilon on return holds number of arcs with both lower and upper labels ::gfsmEpsilon, or NULL + * \returns total number of arcs + */ +guint gfsm_automaton_n_arcs_full(gfsmAutomaton *fsm, + guint *n_lo_epsilon, + guint *n_hi_epsilon, + guint *n_both_epsilon); + +/** Low-level utility function for gfsm_automaton_is_cyclic() */ +gboolean gfsm_automaton_is_cyclic_state(gfsmAutomaton *fsm, + gfsmStateId qid, + gfsmBitVector *visited, + gfsmBitVector *completed); + +/** Test whether automaton is cyclic */ +gboolean gfsm_automaton_is_cyclic(gfsmAutomaton *fsm); + +/** Test whether automaton is acyclic */ +#define gfsm_automaton_is_acyclic(fsm) (!gfsm_automaton_is_cyclic(fsm)) + +/** Extract automaton-internal labels to \a alph. If \a alph is NULL, + * a new default alphabet will be created and returned (you will need to + * free it yourself). + * + * The alphabet should be able to match literal label values to themselves + * (so don't pass a string alphabet) + * + * \param[in] fsm automaton to examine + * \param[in] which determines which label side(s) to extract + * \param[out] alph alphabet to which labels are extracted, or NULL to create a new alphabet + * + * \returns \a alph, or a newly allocated and populated alphabet + */ +gfsmAlphabet *gfsm_automaton_get_alphabet(gfsmAutomaton *fsm, + gfsmLabelSide which, + gfsmAlphabet *alph); + + +/** Renumber states of the automaton \a fsm. + * Destructively alters \c fsm. + * On return, \a fsm should have no 'gaps' in its state enumeration function, and its + * root state should have the ID 0 (zero). + */ +void gfsm_automaton_renumber_states(gfsmAutomaton *fsm); + +/** Renumber states of an FSM using user-specified state-ID map \a old2new. + * Destructively alters \c fsm. + * \param fsm + * Automaton whose states are to be renumbered + * \param old2new + * GArray of ::gfsmStateId such that <tt>qid_new=old2new[qid_old]</tt>. + * \a qid_new may be ::gfsmNoState to ignore the corresponding \a qid_old + * \param n_new_states + * Maximum \a qid_new ::gfsmStateId value in \a old2new, or 0 (zero) to auto-compute. + */ +void gfsm_automaton_renumber_states_full(gfsmAutomaton *fsm, GArray *old2new, gfsmStateId n_new_states); + +//@} + +/*======================================================================*/ +///\name API: gfsmState +//@{ + +/** Open and return a pointer to a ::gfsmState struct for ::gfsmStateId \a qid in \a fsm. + * \warning + * The pointer returned should be closed with gfsm_automaton_close_state() + * \param fsm automaton from which to draw state information + * \param qid ID of state to be opened + * \returns ::gfsmState* for state with ID \a qid in \a fsm, or NULL if no such state exists. + * \deprecated + * prefer gfsm_automaton_has_state(), gfsm_automaton_state_is_final(), gfsm_arciter_open() etc. + */ +GFSM_INLINE +gfsmState *gfsm_automaton_open_state(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Open and return a pointer to a ::gfsmState struct for ::gfsmStateId \a qid in \a fsm, + * creating state if it does not already exists. + * \warning + * The pointer returned should be closed with gfsm_automaton_close_state(). + * \param fsm automaton from which to draw state information + * \param qid ID of state to be opened + * \returns ::gfsmState* for (possibly new) state with ID \a qid in \a fsm + * \deprecated + * prefer gfsm_automaton_has_state(), gfsm_automaton_state_is_final(), gfsm_arciter_open() etc. + */ +GFSM_INLINE +gfsmState *gfsm_automaton_open_state_force(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Close a pointer to a ::gfsmState opened with gfsm_automaton_open_state() for \a fsm. + * \param fsm automaton from which state was opened. + * \param qp pointer as returned by gfsm_automaton_open_state() + * \note Currently does nothing. + */ +GFSM_INLINE +void gfsm_automaton_close_state(gfsmAutomaton *fsm, gfsmState *qp); + +/** Backwards-compatible alias for gfsm_automaton_open_state(). + * This alias is expected to disappear when calling gfsm_automaton_close_state() becomes mandatory. + */ +#define gfsm_automaton_find_state(fsm,qid) gfsm_automaton_open_state((fsm),(qid)) + +/** Backwards-compatible alias for gfsm_automaton_open_state(). + * This alias is expected to disappear when calling gfsm_automaton_close_state() becomes mandatory. + */ +#define gfsm_automaton_find_state_const(fsm,qid) ((const gfsmState*)(gfsm_automaton_open_state((fsm),(qid)))) + +/** Backwards-compatible alias for gfsm_automaton_open_state_force() + * This alias is expected to disappear when calling gfsm_automaton_close_state() becomes mandatory. + */ +#define gfsm_automaton_get_state(fsm,qid) gfsm_automaton_open_state_force((fsm),(qid)) + +//@} + +/*======================================================================*/ +/// \name API: Automaton States +//@{ + +/*-------------------------------------------------------------- + * has_state() + */ +GFSM_INLINE +gboolean gfsm_automaton_has_state(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Add a new state, specifying state ID. + * \param fsm automaton to modify + * \param qid ID of new state, or ::gfsmNoState to use the first available state ID. + * \returns Id of the (new) state + * \note + * \li Implicitly sets \a fsm's root state if \a fsm was previously unrooted. + * \li Does nothing if \a fsm already has a state with ID \a qid. + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_add_state_full(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Ensures that state \a id exists \returns \a qid + * Really just an alias for gfsm_automaton_add_state_full(). + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_ensure_state(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Add a new state to \a fsm. + * Really just an alias for \code gfsm_automaton_add_state_full(fsm,gfsmNoState) \endcode + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_add_state(gfsmAutomaton *fsm); + +/** Remove the state with id \a qid, if any. + * \param fsm automaton from which to remove a state + * \param qid ID of the state to be removed + * \note + * Any incoming arcs for state \a qid are NOT removed, + * although any outgoing arcs are removed and freed. + */ +GFSM_INLINE +void gfsm_automaton_remove_state(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Lookup final weight for state with ID \a qid in automaton \a fsm. + * \param fsm automaton to examine + * \param qid ID of state to examine + * \param wp output parameter for final weight + * \returns + * TRUE if state \a qid is final, FALSE otherwise + */ +GFSM_INLINE +gboolean gfsm_automaton_lookup_final(gfsmAutomaton *fsm, gfsmStateId qid, gfsmWeight *wp); + +/** Check whether the state with ID \a qid is final in \a fsm. + * Really just a wrapper for gfsm_automaton_lookup_final(). + * \param fsm automaton to examine + * \param qid ID of state to check for finality + * \returns TRUE if \a qid is final in \a fsm, FALSE otherwise. + */ +GFSM_INLINE +gboolean gfsm_automaton_state_is_final(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Backwards-compatible alias for gfsm_automaton_state_is_final() */ +#define gfsm_automaton_is_final_state(fsm,qid) gfsm_automaton_state_is_final((fsm),(qid)) + +/** Get final weight. \returns final weight if state \a qid is final, else \a fsm->sr->zero */ +GFSM_INLINE +gfsmWeight gfsm_automaton_get_final_weight(gfsmAutomaton *fsm, gfsmStateId qid); + +/** Set final-weight and/or final-states membership flag for state with ID \a qid in \a fsm. + * \param fsm automaton to modify + * \param qid ID of state to modified + * \param is_final whether state should be considered final + * \param final_weight + * If \a is_final is true, final weight for state. + * Otherwise, final weight is implicitly <tt>fsm->sr->zero</tt> + */ +GFSM_INLINE +void gfsm_automaton_set_final_state_full(gfsmAutomaton *fsm, + gfsmStateId qid, + gboolean is_final, + gfsmWeight final_weight); + +/** Backwards-compatble wrapper for <code>gfsm_automaton_set_final_state_fulll(fsm,qid,is_final,fsm->sr->one)</code> + * \see gfsm_automaton_set_final_state_full() + */ +GFSM_INLINE +void gfsm_automaton_set_final_state(gfsmAutomaton *fsm, gfsmStateId qid, gboolean is_final); + +/** Get number of outgoing arcs from \a qid in \a fsm */ +GFSM_INLINE +guint gfsm_automaton_out_degree(gfsmAutomaton *fsm, gfsmStateId qid); + +//@} + +/*======================================================================*/ +/// \name Accessors: Automaton Arcs +//@{ + +/** Add an arc from state with ID \a qid1 to state with ID \a qid2 + * on labels (\a lo,\a hi) with weight \a w. + * Missing states should be implicitly created. + * \param fsm Automaton to modify + * \param qid1 ID of source state + * \param qid2 ID of target state + * \param lo Lower label + * \param hi Upper label + * \param w Arc weight + */ +GFSM_INLINE +void gfsm_automaton_add_arc(gfsmAutomaton *fsm, + gfsmStateId qid1, + gfsmStateId qid2, + gfsmLabelId lo, + gfsmLabelId hi, + gfsmWeight w); + +/** Add an arc given pointers \a sp to the state and \a link to a + * single-element arclist to be added. + * No states are implicitly created. + * + * \deprecated prefer gfsm_automaton_add_arc() + */ +GFSM_INLINE +void gfsm_automaton_add_arc_node(gfsmAutomaton *fsm, + gfsmState *sp, + gfsmArcList *node); + + +/** Sort all arcs in an automaton by one of the built-in comparison functions. + * \param fsm Automaton to modify + * \param mode Specifies built-in arc comparison priorities + * \returns modified \a fsm + * \note + * \li Does nothing if \code (mode==gfsmASMNone || mode==fsm->flags.sort_mode) \endcode + * \li Really just a wrapper for gfsm_automaton_arcsort_full() + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_arcsort(gfsmAutomaton *fsm, gfsmArcCompMask mode); + +/** Sort all arcs in an automaton by a user-specified comparison function. + * \param fsm + * Automaton to modify + * \param cmpfunc + * 3-way comparison function, called as \a (*cmpfunc)(gfsmArc *a1, gfsmArc *a2, gpointer data) + * to compare arcs \a a1 and \a a2. + * \param data + * User data for \a cmpfunc + * \returns + * Modified \a fsm + */ +void gfsm_automaton_arcsort_full(gfsmAutomaton *fsm, GCompareDataFunc cmpfunc, gpointer data); + +/** Alias for gfsm_automaton_arcsort_full() */ +#define gfsm_automaton_arcsort_with_data(fsm,cmpfunc,data) gfsm_automaton_arcsort_full((fsm),(cmpfunc),(data)) + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmAutomaton.hi> +#endif + +#endif /* _GFSM_AUTOMATON_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmAutomaton.hi b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.hi new file mode 100644 index 0000000..b99f441 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAutomaton.hi @@ -0,0 +1,530 @@ +/*=============================================================================*\ + * File: gfsmAutomaton.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: automata: inline definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmUtils.h> +#include <gfsmBitVector.h> +#include <stdlib.h> + +/*====================================================================== + * Methods: Constructors etc. + */ + +/*-------------------------------------------------------------- + * new_full() + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_new_full(gfsmAutomatonFlags flags, gfsmSRType srtype, gfsmStateId size) +{ + gfsmAutomaton *fsm = (gfsmAutomaton*)g_new0(gfsmAutomaton,1); + fsm->flags = flags; + fsm->sr = gfsm_semiring_new(srtype); + fsm->states = g_array_sized_new(FALSE, TRUE, sizeof(gfsmState), size); + fsm->finals = gfsm_set_new(gfsm_uint_compare); + fsm->root_id = gfsmNoState; + return fsm; +} + +/*-------------------------------------------------------------- + * new() + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_new(void) +{ + return gfsm_automaton_new_full(gfsmAutomatonDefaultFlags, + gfsmAutomatonDefaultSRType, + gfsmAutomatonDefaultSize); +} + + +/*-------------------------------------------------------------- + * copy_shallow() + */ +//#define GFSM_SHALLOW_ROOT 1 +#undef GFSM_SHALLOW_ROOT //-- for local testing +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_copy_shallow(gfsmAutomaton *dst, gfsmAutomaton *src) +{ + dst->flags = src->flags; +#ifdef GFSM_SHALLOW_ROOT + dst->root_id = src->root_id; //-- pre v0.0.9: DANGEROUS! (assumed by clone() ?!) +#endif + // + //-- copy semiring + if (dst->sr && dst->sr->type != src->sr->type) { + gfsm_semiring_free(dst->sr); + dst->sr = gfsm_semiring_copy(src->sr); + } + return dst; +} + +/*-------------------------------------------------------------- + * copy() + */ +//--extern + +/*-------------------------------------------------------------- + * clone() + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_clone(gfsmAutomaton *fsm) +{ + return gfsm_automaton_copy(gfsm_automaton_new_full(gfsmAutomatonDefaultFlags,fsm->sr->type,0),fsm); +} + +/*-------------------------------------------------------------- + * shadow() + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_shadow(gfsmAutomaton *fsm) +{ + return gfsm_automaton_copy_shallow(gfsm_automaton_new(), fsm); +} + +/*-------------------------------------------------------------- + * swap() + */ +GFSM_INLINE +void gfsm_automaton_swap(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + if (fsm1 != fsm2) { + gfsmAutomaton *tmp = g_new0(gfsmAutomaton,1); + *tmp = *fsm2; + *fsm2 = *fsm1; + *fsm1 = *tmp; + g_free(tmp); + } +} + +/*-------------------------------------------------------------- + * clear() + */ +//--extern + +/*-------------------------------------------------------------- + * free() + */ +GFSM_INLINE +void gfsm_automaton_free(gfsmAutomaton *fsm) +{ + if (!fsm) return; + gfsm_automaton_clear(fsm); //-- implicitly frees indices + if (fsm->sr) gfsm_semiring_free(fsm->sr); + if (fsm->states) g_array_free(fsm->states,TRUE); + if (fsm->finals) gfsm_weightmap_free(fsm->finals); + g_free(fsm); +} + +/*====================================================================== + * Methods: Accessors: Semiring + */ + +/*-------------------------------------------------------------- + * get_semiring() + */ +GFSM_INLINE +gfsmSemiring *gfsm_automaton_get_semiring(gfsmAutomaton *fsm) { return fsm->sr; } + +/*-------------------------------------------------------------- + * set_semiring() + */ +GFSM_INLINE +gfsmSemiring *gfsm_automaton_set_semiring(gfsmAutomaton *fsm, gfsmSemiring *sr) +{ + if (fsm->sr) gfsm_semiring_free(fsm->sr); + fsm->sr = gfsm_semiring_copy(sr); + return fsm->sr; //-- WARNING: in gfsm < v0.0.9, returned literal 'sr' parameter! +} + +/*-------------------------------------------------------------- + * set_semiring_type() + */ +GFSM_INLINE +void gfsm_automaton_set_semiring_type(gfsmAutomaton *fsm, gfsmSRType srtype) +{ + if (!fsm->sr) fsm->sr = gfsm_semiring_new(srtype); + else if (fsm->sr->type != srtype) { + gfsm_semiring_free(fsm->sr); + fsm->sr = gfsm_semiring_new(srtype); + } +} + +/*======================================================================*/ +///\name API: Automaton Structure + +/*-------------------------------------------------------------- + * reserve_states() + */ +GFSM_INLINE +void gfsm_automaton_reserve_states(gfsmAutomaton *fsm, gfsmStateId n_states) +{ + if (n_states != gfsmNoState && n_states > fsm->states->len) + g_array_set_size(fsm->states, n_states); +} + +/*-------------------------------------------------------------- + * reserve_arcs() + */ +GFSM_INLINE +void gfsm_automaton_reserve_arcs(gfsmAutomaton *fsm, guint n_arcs) +{ + return; +} + +/*-------------------------------------------------------------- + * n_states() + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_n_states(gfsmAutomaton *fsm) +{ + return fsm->states->len; +} + +/*-------------------------------------------------------------- + * n_arcs() + */ +GFSM_INLINE +guint gfsm_automaton_n_arcs(gfsmAutomaton *fsm) +{ + return gfsm_automaton_n_arcs_full(fsm,NULL,NULL,NULL); +} + +/*-------------------------------------------------------------- + * n_final_states() + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_n_final_states(gfsmAutomaton *fsm) +{ + return gfsm_weightmap_size(fsm->finals); +} + +/*-------------------------------------------------------------- + * finals_foreach + */ +GFSM_INLINE +void gfsm_automaton_finals_foreach(gfsmAutomaton *fsm, GTraverseFunc func, gpointer data) +{ + gfsm_weightmap_foreach(fsm->finals,func,data); + return; +} + +/*-------------------------------------------------------------- + * finals_to_array + */ +GFSM_INLINE +gfsmStateWeightPairArray* gfsm_automaton_finals_to_array(gfsmAutomaton *fsm, gfsmStateWeightPairArray *array) +{ + return gfsm_weightmap_to_array(fsm->finals,array); +} + +/*-------------------------------------------------------------- + * get_root() + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_get_root(gfsmAutomaton *fsm) +{ + return fsm->root_id; +} + +/*-------------------------------------------------------------- + * set_root() + */ +GFSM_INLINE +void gfsm_automaton_set_root(gfsmAutomaton *fsm, gfsmStateId qid) +{ + if (qid!=gfsmNoState) qid = gfsm_automaton_ensure_state(fsm,qid); + fsm->root_id = qid; +} + + +/*====================================================================== + * API: Automaton Properties + */ + +/*-------------------------------------------------------------- + * is_cyclic_state() + */ +//--EXTERN + +/*-------------------------------------------------------------- + * is_cyclic() + */ +//--extern + +/*-------------------------------------------------------------- + * get_alphabet() + */ +//--EXTERN + +/*====================================================================== + * API: Automaton States: gfsmState* + */ + +/*-------------------------------------------------------------- + * open_state() [aka find_state()] + */ +GFSM_INLINE +gfsmState *gfsm_automaton_open_state(gfsmAutomaton *fsm, gfsmStateId qid) +{ + return (qid < fsm->states->len + ? (((gfsmState*)fsm->states->data)+qid) + : NULL); +} + +/*-------------------------------------------------------------- + * add_state_full() + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_add_state_full(gfsmAutomaton *fsm, gfsmStateId qid) +{ + gfsmState *st; + if (qid == gfsmNoState) qid = fsm->states->len; + if (qid >= fsm->states->len) gfsm_automaton_reserve(fsm,qid+1); + st = gfsm_automaton_open_state(fsm,qid); + st->is_valid = TRUE; + return qid; +} + +/*-------------------------------------------------------------- + * ensure_state() + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_ensure_state(gfsmAutomaton *fsm, gfsmStateId qid) +{ return gfsm_automaton_add_state_full(fsm,qid); } + +/*-------------------------------------------------------------- + * open_state_force() [aka get_state()] + */ +GFSM_INLINE +gfsmState *gfsm_automaton_open_state_force(gfsmAutomaton *fsm, gfsmStateId qid) +{ + return ((gfsmState*)fsm->states->data) + gfsm_automaton_ensure_state(fsm,qid); +} + +/*-------------------------------------------------------------- + * close_state() + */ +GFSM_INLINE +void gfsm_automaton_close_state(gfsmAutomaton *fsm, gfsmState *qp) +{ + //gfsm_state_close(qp); + return; +} + +/*====================================================================== + * API: Automaton States + */ + +/*-------------------------------------------------------------- + * has_state() + */ +GFSM_INLINE +gboolean gfsm_automaton_has_state(gfsmAutomaton *fsm, gfsmStateId qid) +{ + return qid < fsm->states->len && ((gfsmState*)fsm->states->data)[qid].is_valid; +} + +/*-------------------------------------------------------------- + * state_is_final() + */ +GFSM_INLINE +gboolean gfsm_automaton_state_is_final(gfsmAutomaton *fsm, gfsmStateId qid) +{ + gfsmState *qp = gfsm_automaton_find_state(fsm,qid); + return qp!=NULL && qp->is_valid && qp->is_final; +} + +/*-------------------------------------------------------------- + * add_state() + */ +GFSM_INLINE +gfsmStateId gfsm_automaton_add_state(gfsmAutomaton *fsm) +{ + return gfsm_automaton_add_state_full(fsm,gfsmNoState); +} + + +/*-------------------------------------------------------------- + * remove_state() + */ +GFSM_INLINE +void gfsm_automaton_remove_state(gfsmAutomaton *fsm, gfsmStateId qid) +{ + gfsmState *s = gfsm_automaton_find_state(fsm,qid); + if (!s || !s->is_valid) return; + // + if (s->is_final) gfsm_weightmap_remove(fsm->finals,GUINT_TO_POINTER(qid)); + if (qid==fsm->root_id) fsm->root_id = gfsmNoState; + // + gfsm_arclist_free(s->arcs); + s->arcs = NULL; + s->is_valid = FALSE; +} + +/*-------------------------------------------------------------- + * lookup_final() + */ +GFSM_INLINE +gboolean gfsm_automaton_lookup_final(gfsmAutomaton *fsm, gfsmStateId qid, gfsmWeight *wp) +{ + gfsmState *qp = gfsm_automaton_find_state(fsm,qid); + if (!qp || !qp->is_valid || !qp->is_final) { + *wp = fsm->sr->zero; + return FALSE; + } + return gfsm_weightmap_lookup(fsm->finals, GUINT_TO_POINTER(qid), wp); +} + +/*-------------------------------------------------------------- + * get_final_weight + */ +GFSM_INLINE +gfsmWeight gfsm_automaton_get_final_weight(gfsmAutomaton *fsm, gfsmStateId qid) +{ + gfsmWeight w =0; //-- convince gcc not to complain about uninitialized 'w' +#if 0 + //-- old implementation: direct weightmap lookup + if (gfsm_weightmap_lookup(fsm->finals, GUINT_TO_POINTER(id), &w)) return w; + return fsm->sr->zero; +#else + gfsm_automaton_lookup_final(fsm,qid,&w); + return w; +#endif +} + +/*-------------------------------------------------------------- + * set_final_state_full + */ +GFSM_INLINE +void gfsm_automaton_set_final_state_full(gfsmAutomaton *fsm, + gfsmStateId qid, + gboolean is_final, + gfsmWeight final_weight) +{ + gfsm_state_set_final(gfsm_automaton_get_state(fsm,qid),is_final); + if (is_final) { + gfsm_weightmap_insert(fsm->finals, GUINT_TO_POINTER(qid), final_weight); + } else { + gfsm_weightmap_remove(fsm->finals, GUINT_TO_POINTER(qid)); + } +} + +/*-------------------------------------------------------------- + * set_final_state() + */ +GFSM_INLINE +void gfsm_automaton_set_final_state(gfsmAutomaton *fsm, gfsmStateId qid, gboolean is_final) +{ + gfsm_automaton_set_final_state_full(fsm,qid,is_final,fsm->sr->one); +} + + +/*-------------------------------------------------------------- + * out_degree() + */ +GFSM_INLINE +guint gfsm_automaton_out_degree(gfsmAutomaton *fsm, gfsmStateId qid) +{ + gfsmState *qp = gfsm_automaton_open_state(fsm,qid); + if (!qp || !qp->is_valid) return 0; + return gfsm_state_out_degree(qp); +} + +/*-------------------------------------------------------------- + * renumber_states() + */ +//--EXTERN + +/*-------------------------------------------------------------- + * renumber_states_full() + */ +//--EXTERN + + +/*====================================================================== + * Methods: Accessors: Automaton Arcs + */ + +/*-------------------------------------------------------------- + * add_arc_link() + */ +GFSM_INLINE +void gfsm_automaton_add_arc_node(gfsmAutomaton *fsm, + gfsmState *sp, + gfsmArcList *node) +{ + //-- possibly sorted + gfsmArcCompData acdata = { fsm->flags.sort_mode, fsm->sr, NULL, NULL }; + sp->arcs = gfsm_arclist_insert_node(sp->arcs, node, &acdata); + + //-- always unmark 'deterministic' flag -- better: check + fsm->flags.is_deterministic = FALSE; + + //-- always mark arcs "dirty" + /*fsm->flags.arcs_dirty=1; + */ + + //- always mark 'unsorted' + /* + alp->next = sp->arcs; + sp->arcs = alp; + fsm->flags.sort_mode = gfsmASMNone; + */ +} + +/*-------------------------------------------------------------- + * add_arc() + */ +GFSM_INLINE +void gfsm_automaton_add_arc(gfsmAutomaton *fsm, + gfsmStateId qid1, + gfsmStateId qid2, + gfsmLabelId lo, + gfsmLabelId hi, + gfsmWeight w) +{ + gfsmState *qp1; + gfsm_automaton_ensure_state(fsm,qid2); + qp1 = gfsm_automaton_get_state(fsm,qid1); + gfsm_automaton_add_arc_node(fsm, + qp1, + gfsm_arclist_new_full(qid1,qid2,lo,hi,w,NULL)); +} + + +/*-------------------------------------------------------------- + * arcsort_full() + */ +//-- EXTERN + +/*-------------------------------------------------------------- + * arcsort() + */ +GFSM_INLINE +gfsmAutomaton *gfsm_automaton_arcsort(gfsmAutomaton *fsm, gfsmArcCompMask mode) +{ + if (mode != fsm->flags.sort_mode && mode != gfsmASMNone) { + gfsmArcCompData acdata = { mode, fsm->sr, NULL, NULL }; + gfsm_automaton_arcsort_full(fsm, (GCompareDataFunc)gfsm_arc_compare_bymask, &acdata); + } + fsm->flags.sort_mode = mode; + return fsm; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmAutomatonIO.c b/gfsm/gfsm/src/libgfsm/gfsmAutomatonIO.c new file mode 100644 index 0000000..234dec1 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAutomatonIO.c @@ -0,0 +1,897 @@ + +/*=============================================================================*\ + * File: gfsmAutomatonIO.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: automata: I/O + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER + * OF ALL WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmAutomatonIO.h> +#include <gfsmArcIter.h> +#include <gfsmUtils.h> +//#include <gfsmCompat.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> + + + +/*====================================================================== + * Constants: Binary I/O + */ +const gfsmVersionInfo gfsm_version_bincompat_min_store = + { + 0, // major + 0, // minor + 10 // micro //--8 + }; + +const gfsmVersionInfo gfsm_version_bincompat_min_check = + { + 0, // major + 0, // minor + 2 // micro + }; + +const gchar gfsm_header_magic[16] = "gfsm_automaton\0"; + +/*====================================================================== + * Methods: Binary I/O: load() + */ + +/*-------------------------------------------------------------- + * gfsmAutomatonFlags_009 + */ +typedef struct { + guint32 is_transducer : 1; /**< whether this automaton is a transducer */ + guint32 is_weighted : 1; /**< whether this automaton is weighted */ + guint32 sort_mode_009 : 4; /**< old-style sort-mode (cast to ::gfsmArcSortMode_009) */ + guint32 is_deterministic_009 : 1; /**< whether fsm is known to be deterministic */ + guint32 unused_009 : 25; /**< reserved */ +} gfsmAutomatonFlags_009; + +/*-------------------------------------------------------------- + * gfsmArcSortMode_009 + */ +typedef enum { + gfsmASMNone_009, + gfsmASMLower_009, + gfsmASMUpper_009, + gfsmASMWeight_009 +} gfsmArcSortMode_009; + +/*-------------------------------------------------------------- + * load_bin_header() + */ +gboolean gfsm_automaton_load_bin_header(gfsmAutomatonHeader *hdr, gfsmIOHandle *ioh, gfsmError **errp) +{ + if (!gfsmio_read(ioh, hdr, sizeof(gfsmAutomatonHeader))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin_header:size"), //-- code + "could not read header"); + return FALSE; + } + else if (strcmp(hdr->magic, gfsm_header_magic) != 0) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin_header:magic"), //-- code + "bad magic"); + return FALSE; + } + else if (gfsm_version_compare(hdr->version, gfsm_version_bincompat_min_check) < 0) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin_header:version"), //-- code + "stored format v%u.%u.%u is obsolete - need at least v%u.%u.%u", + hdr->version.major, + hdr->version.minor, + hdr->version.micro, + gfsm_version_bincompat_min_check.major, + gfsm_version_bincompat_min_check.minor, + gfsm_version_bincompat_min_check.micro); + return FALSE; + } + else if (gfsm_version_compare(gfsm_version, hdr->version_min) < 0) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin_header:version"), //-- code + "libgfsm v%u.%u.%u is obsolete - stored automaton needs at least v%u.%u.%u", + gfsm_version.major, + gfsm_version.minor, + gfsm_version.micro, + hdr->version_min.major, + hdr->version_min.minor, + hdr->version_min.micro); + return FALSE; + } + if (gfsm_version_less(hdr->version, ((gfsmVersionInfo){0,0,10}))) { + //-- flags compatibility hack + gpointer flags_009p = &(hdr->flags); + gfsmAutomatonFlags_009 flags_009 = *((gfsmAutomatonFlags_009*)flags_009p); + hdr->flags.is_transducer = flags_009.is_transducer; + hdr->flags.is_weighted = flags_009.is_weighted; + hdr->flags.is_deterministic = flags_009.is_deterministic_009; + hdr->flags.unused = flags_009.unused_009; + switch (flags_009.sort_mode_009) { + case gfsmASMLower_009: hdr->flags.sort_mode = gfsmASMLower; break; + case gfsmASMUpper_009: hdr->flags.sort_mode = gfsmASMUpper; break; + case gfsmASMWeight_009: hdr->flags.sort_mode = gfsmASMWeight; break; + default: + hdr->flags.sort_mode = flags_009.sort_mode_009; + } + } + if (hdr->srtype == gfsmSRTUnknown || hdr->srtype >= gfsmSRTUser) { + //-- compatibility hack + hdr->srtype = gfsmAutomatonDefaultSRType; + } + + return TRUE; +} + +/*-------------------------------------------------------------- + * load_bin_handle_0_0_8() + * + supports stored file versions v0.0.8 -- CURRENT + */ +gboolean gfsm_automaton_load_bin_handle_0_0_8(gfsmAutomatonHeader *hdr, gfsmAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmStateId id; + guint arci; + gfsmStoredArc s_arc; + gfsmStoredState s_state; + gfsmState *st; + gboolean rc = TRUE; + gfsmWeight w; + + //-- allocate states + gfsm_automaton_reserve(fsm, hdr->n_states); + + //-- set automaton-global properties + fsm->flags = hdr->flags; + gfsm_automaton_set_semiring_type(fsm, hdr->srtype); + fsm->root_id = hdr->root_id; + + //------ load states (one-by-one) + for (id=0; rc && id < hdr->n_states; id++) { + if (!gfsmio_read(ioh, &s_state, sizeof(gfsmStoredState))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin:state"), //-- code + "could not read stored state %d", id); + rc = FALSE; + break; + } + + if (!s_state.is_valid) continue; + + st = gfsm_automaton_find_state(fsm,id); + st->is_valid = TRUE; + + if (s_state.is_final) { + //-- read final weight + if (!gfsmio_read(ioh, &w, sizeof(gfsmWeight))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin:state:final_weight"), //-- code + "could not read final weight for stored state %d", id); + rc = FALSE; + break; + } + + //-- set final weight + st->is_final = TRUE; + gfsm_weightmap_insert(fsm->finals,GINT_TO_POINTER(id),w); + } else { + st->is_final = FALSE; + } + + + //-- read arcs (one-by-one) + st->arcs = NULL; + for (arci=0; arci < s_state.n_arcs; arci++) { + if (!gfsmio_read(ioh, &s_arc, sizeof(gfsmStoredArc))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin:state:arc"), //-- code + "could not read stored arcs for state %d", id); + rc=FALSE; + break; + } + if (!rc) break; + + st->arcs = gfsm_arclist_new_full(id, + s_arc.target, + s_arc.lower, + s_arc.upper, + s_arc.weight, + st->arcs); + } + + //-- reverse arc-list for sorted automata + if (fsm->flags.sort_mode != gfsmASMNone) st->arcs = gfsm_arclist_reverse(st->arcs); + } + + return rc; +} + + +/*-------------------------------------------------------------- + * load_bin_handle_0_0_7() + * + supports stored file versions 0.0.2 -- 0.0.7 + */ +/// Type for a stored state (v0.0.2 .. v0.0.7) +typedef struct { + gboolean is_valid : 1; /**< valid flag */ + gboolean is_final : 1; /**< final flag */ + guint n_arcs; /**< number of stored arcs */ + guint min_arc; /**< index of stored minimum arc (not really necessary) */ +} gfsmStoredState_007; + +gboolean gfsm_automaton_load_bin_handle_0_0_7(gfsmAutomatonHeader *hdr, gfsmAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmStateId id; + guint arci, n_arcs; + gfsmStoredArc s_arc; + gfsmStoredState_007 s_state; + gfsmState *st; + gboolean rc = TRUE; + + //-- allocate states + gfsm_automaton_reserve(fsm, hdr->n_states); + + //-- set automaton-global properties + fsm->flags = hdr->flags; + gfsm_semiring_init(fsm->sr, hdr->srtype); + fsm->root_id = hdr->root_id; + + //------ load states (one-by-one) + for (id=0; rc && id < hdr->n_states; id++) { + if (!gfsmio_read(ioh, &s_state, sizeof(gfsmStoredState_007))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin:state"), //-- code + "could not read stored state %d", id); + rc = FALSE; + break; + } + + if (!s_state.is_valid) continue; + + st = gfsm_automaton_find_state(fsm,id); + st->is_valid = TRUE; + + if (s_state.is_final) { + st->is_final = TRUE; + gfsm_weightmap_insert(fsm->finals, GUINT_TO_POINTER(id), fsm->sr->one); + } else { + st->is_final = FALSE; + } + + //-- HACK: remember number of arcs! + st->arcs = (gfsmArcList*) GUINT_TO_POINTER(s_state.n_arcs); + } + + //------ load arcs (state-by-state) + for (id=0; rc && id < hdr->n_states; id++) { + //-- get state + st = gfsm_automaton_find_state(fsm,id); + if (!st || !st->is_valid) continue; + + //-- read in arcs (one-by-one) + n_arcs = GPOINTER_TO_UINT(st->arcs); + st->arcs = NULL; + for (arci=0; arci < n_arcs; arci++) { + if (!gfsmio_read(ioh, &s_arc, sizeof(gfsmStoredArc))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_load_bin:arc"), //-- code + "could not read stored arcs for state %d", id); + rc=FALSE; + break; + } + + st->arcs = gfsm_arclist_new_full(id, + s_arc.target, + s_arc.lower, + s_arc.upper, + s_arc.weight, + st->arcs); + } + + //-- reverse arc-list for sorted automata + if (fsm->flags.sort_mode != gfsmASMNone) st->arcs = gfsm_arclist_reverse(st->arcs); + } + + return rc; +} + +/*-------------------------------------------------------------- + * load_bin_handle() + * + dispatch + */ +gboolean gfsm_automaton_load_bin_handle(gfsmAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmAutomatonHeader hdr; + + gfsm_automaton_clear(fsm); + + //-- load header + if (!gfsm_automaton_load_bin_header(&hdr,ioh,errp)) return FALSE; + + if (gfsm_version_ge(hdr.version,((gfsmVersionInfo){0,0,8}))) { + //-- v0.0.8 .. CURRENT + return gfsm_automaton_load_bin_handle_0_0_8(&hdr,fsm,ioh,errp); + } + else { + //-- v0.0.2 .. v0.0.7 + return gfsm_automaton_load_bin_handle_0_0_7(&hdr,fsm,ioh,errp); + } +} + +/*-------------------------------------------------------------- + * load_bin_file() + */ +gboolean gfsm_automaton_load_bin_file(gfsmAutomaton *fsm, FILE *f, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_zfile(f,"rb",-1); + gboolean rc = gfsm_automaton_load_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * load_bin_filename() + */ +gboolean gfsm_automaton_load_bin_filename(gfsmAutomaton *fsm, const gchar *filename, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename, "rb", -1, errp); + gboolean rc = ioh && !(*errp) && gfsm_automaton_load_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * load_bin_gstring() + */ +gboolean gfsm_automaton_load_bin_gstring(gfsmAutomaton *fsm, GString *gs, gfsmError **errp) +{ + gfsmPosGString pgs = { gs, 0 }; + gfsmIOHandle *ioh = gfsmio_new_gstring(&pgs); + gboolean rc = ioh && !(*errp) && gfsm_automaton_load_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + + +/*====================================================================== + * Methods: Binary I/O: save() + */ + +/*-------------------------------------------------------------- + * save_bin_handle() + */ +gboolean gfsm_automaton_save_bin_handle(gfsmAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmAutomatonHeader hdr; + gfsmStateId id; + gfsmState *st; + gfsmStoredState sst; + gfsmStoredArc sa; + gfsmWeight w; + gfsmArcIter ai; + gboolean rc = TRUE; + + //-- create header + memset(&hdr, 0, sizeof(gfsmAutomatonHeader)); + strcpy(hdr.magic, gfsm_header_magic); + hdr.version = gfsm_version; + hdr.version_min = gfsm_version_bincompat_min_store; + hdr.flags = fsm->flags; + hdr.root_id = fsm->root_id; + hdr.n_states = gfsm_automaton_n_states(fsm); + //hdr.n_arcs_007= gfsm_automaton_n_arcs(fsm); + hdr.srtype = gfsm_automaton_get_semiring(fsm)->type; + + //-- write header + if (!gfsmio_write(ioh, &hdr, sizeof(gfsmAutomatonHeader))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_save_bin:header"), //-- code + "could not store header"); + return FALSE; + } + + //-- zero stored state (allow zlib compression to work better for any 'unused' members) + memset(&sst, 0, sizeof(gfsmStoredState)); + + //-- write states + for (id=0; rc && id < hdr.n_states; id++) { + //-- store basic state information + st = &g_array_index(fsm->states, gfsmState, id); + sst.is_valid = st->is_valid; + sst.is_final = sst.is_valid ? st->is_final : FALSE; + sst.n_arcs = sst.is_valid ? gfsm_state_out_degree(st) : 0; + if (!gfsmio_write(ioh, &sst, sizeof(sst))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_save_bin:state"), //-- code + "could not store state %d", id); + rc = FALSE; + } + + //-- store final weight (maybe) + if (rc && sst.is_final) { + w = gfsm_automaton_get_final_weight(fsm,id); + if (!gfsmio_write(ioh, &w, sizeof(gfsmWeight))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_save_bin:state:final_weight"), //-- code + "could not store final weight for state %d", id); + rc = FALSE; + } + } + + //-- store arcs + if (sst.is_valid) { + for (gfsm_arciter_open_ptr(&ai,fsm,st); rc && gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + sa.target = a->target; + sa.lower = a->lower; + sa.upper = a->upper; + sa.weight = a->weight; + if (!gfsmio_write(ioh, &sa, sizeof(sa))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("automaton_save_bin:state:arc"), //-- code + "could not store arcs for state %d", id); + rc = FALSE; + } + } + } + } + + return rc; +} + +/*-------------------------------------------------------------- + * save_bin_file() + */ +gboolean gfsm_automaton_save_bin_file(gfsmAutomaton *fsm, FILE *f, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_file(f); + gboolean rc = ioh && !(*errp) && gfsm_automaton_save_bin_handle(fsm, ioh, errp); + if (ioh) { + //gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * save_bin_filename_nc() + */ +gboolean gfsm_automaton_save_bin_filename_nc(gfsmAutomaton *fsm, const gchar *filename, gfsmError **errp) +{ + FILE *f; + gboolean rc; + if (!(f=gfsm_open_filename(filename,"wb",errp))) return FALSE; + rc = gfsm_automaton_save_bin_file(fsm, f, errp); + if (f != stdout) fclose(f); + return rc; +} + +/*-------------------------------------------------------------- + * save_bin_filename() + */ +gboolean gfsm_automaton_save_bin_filename(gfsmAutomaton *fsm, const gchar *filename, int zlevel, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename, "wb", zlevel, errp); + gboolean rc = ioh && !(*errp) && gfsm_automaton_save_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * save_bin_gstring() + */ +gboolean gfsm_automaton_save_bin_gstring(gfsmAutomaton *fsm, GString *gs, gfsmError **errp) +{ + gfsmPosGString pgs = { gs, gs->len }; + gfsmIOHandle *ioh = gfsmio_new_gstring(&pgs); + gboolean rc = ioh && !(*errp) && gfsm_automaton_save_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + + +/*====================================================================== + * Methods: Text I/O: compile() + */ + +/*-------------------------------------------------------------- + * compile_handle() + */ +gboolean gfsm_automaton_compile_handle (gfsmAutomaton *fsm, + gfsmIOHandle *ioh, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + gfsmStateId q1, q2; + gfsmLabelId lo, hi; + gfsmWeight w; + char *buf = NULL; + size_t buflen = 0; + guint lineno = 1; + char *b1, *b2, *b3, *b4, *b5, *end; + GString *gs = g_string_new(""); + gpointer key; + gboolean rc = TRUE; + + /*extern int getline(char **, size_t *, FILE *);*/ + for (lineno=1; rc && gfsmio_getline(ioh,&buf,&buflen) > 0; ++lineno) { + b1 = b2 = b3 = b4 = b5 = end = NULL; + short int nfields = 0; + + //-- get pointers to initial elements of the line + for (b1 = buf; b1 && *b1 && isspace(*b1); b1++) { *b1 = '\0'; } + + for (b2 = b1 ; b2 && *b2 && !isspace(*b2); b2++) ; + for ( ; b2 && *b2 && isspace(*b2); b2++) { *b2 = '\0'; } + if (b2 != b1) nfields = 1; + else { + //-- empty line + continue; + } + + for (b3 = b2 ; b3 && *b3 && !isspace(*b3); b3++) ; + for ( ; b3 && *b3 && isspace(*b3); b3++) { *b3 = '\0'; } + if (b3 != b2) nfields = 2; + + for (b4 = b3 ; b4 && *b4 && !isspace(*b4); b4++) ; + for ( ; b4 && *b4 && isspace(*b4); b4++) { *b4 = '\0'; } + if (b4 != b3) nfields = 3; + + for (b5 = b4 ; b5 && *b5 && !isspace(*b5); b5++) ; + for ( ; b5 && *b5 && isspace(*b5); b5++) { *b5 = '\0'; } + if (b5 != b4) nfields = 4; + + for (end = b5; end && *end && !isspace(*end); end++) ; + if (end) *end = '\0'; + if (end != b5) nfields = 5; + + //---- q1: source state + if (state_alphabet) { + g_string_assign(gs,b1); + key = gfsm_alphabet_string2key(state_alphabet, gs); + if ((q1 = gfsm_alphabet_find_label(state_alphabet,key)) == gfsmNoLabel) + q1 = gfsm_alphabet_get_label(state_alphabet, key); + } else q1 = strtol(b1,NULL,10); + if (fsm->root_id == gfsmNoState) fsm->root_id = q1; + + //-- weightless final state? + if (nfields == 1) { + gfsm_automaton_set_final_state(fsm,q1,TRUE); + continue; + } + //-- weighted final state? + else if (nfields == 2) { + w = strtod(b2,NULL); + gfsm_automaton_set_final_state_full(fsm,q1,TRUE,w); + continue; + } + + //---- q2: sink state + if (state_alphabet) { + g_string_assign(gs,b2); + key = gfsm_alphabet_string2key(state_alphabet, gs); + if ((q2 = gfsm_alphabet_find_label(state_alphabet,key)) == gfsmNoLabel) + q2 = gfsm_alphabet_get_label(state_alphabet, key); + } else q2 = strtol(b2,NULL,10); + + //---- lo: lower label + if (lo_alphabet) { + g_string_assign(gs,b3); + key = gfsm_alphabet_string2key(lo_alphabet, gs); + if ((lo = gfsm_alphabet_find_label(lo_alphabet,key)) == gfsmNoLabel) + lo = gfsm_alphabet_get_label(lo_alphabet, key); + } else lo = strtol(b3,NULL,10); + + //---- hi: upper label + if (fsm->flags.is_transducer) { + if (nfields > 3) { + if (hi_alphabet) { + g_string_assign(gs,b4); + key = gfsm_alphabet_string2key(hi_alphabet, gs); + if ((hi = gfsm_alphabet_find_label(hi_alphabet,key)) == gfsmNoLabel) + hi = gfsm_alphabet_get_label(hi_alphabet, key); + } + else hi = strtol(b4,NULL,10); + } + else { + g_printerr("gfsm: Warning: no upper label given for transducer at line %u - using lower label\n", + lineno); + hi = lo; + } + } + else { + //-- not a transducer + hi = lo; + if (nfields > 4) { + g_printerr("gfsm: Warning: ignoring extra fields in acceptor file at line %u\n", lineno); + } + } + + //-- w: arc weight + if ( fsm->flags.is_transducer && nfields >= 5) { w = strtod(b5,NULL); } + else if (!fsm->flags.is_transducer && nfields >= 4) { w = strtod(b4,NULL); } + else { w = fsm->sr->one; } + + gfsm_automaton_add_arc(fsm,q1,q2,lo,hi,w); + } + + if (buf) free(buf); + g_string_free(gs,TRUE); + + return rc; +} + +/*-------------------------------------------------------------- + * compile_file_full() + */ +gboolean gfsm_automaton_compile_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_zfile(f,"rb",-1); + gboolean rc = (ioh && !(*errp) && + gfsm_automaton_compile_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * compile_filename_full() + */ +gboolean gfsm_automaton_compile_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename,"rb",-1,errp); + gboolean rc = (ioh && !(*errp) && + gfsm_automaton_compile_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * compile_gstring_full() + */ +gboolean gfsm_automaton_compile_gstring_full (gfsmAutomaton *fsm, + GString *gs, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + gfsmPosGString pgs = { gs, 0 }; + gfsmIOHandle *ioh = gfsmio_new_gstring(&pgs); + gboolean rc = (ioh && !(*errp) && + gfsm_automaton_compile_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + + +/*====================================================================== + * Methods: Text I/O: print() + */ + +/*-------------------------------------------------------------- + * print_handle() + */ +gboolean gfsm_automaton_print_handle (gfsmAutomaton *fsm, + gfsmIOHandle *ioh, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + guint i; + GString *gs = g_string_new(""); + gboolean rc = TRUE; + + //-- sanity check + if (fsm->root_id == gfsmNoState) { + g_printerr("gfsm: Warning: cowardly refusing to print() unrooted automaton\n"); + return TRUE; + } + + for (i=0; rc && i < fsm->states->len; i++) { + guint id = (fsm->root_id + i) % fsm->states->len; + gfsmState *st = gfsm_automaton_find_state(fsm,id); + gfsmArcIter ai; + gpointer key; + if (!st || !st->is_valid) continue; + + for (gfsm_arciter_open_ptr(&ai,fsm,st); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + + //-- source state + if (state_alphabet && (key=gfsm_alphabet_find_key(state_alphabet,id)) != gfsmNoKey) { + gfsm_alphabet_key2string(state_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + if (state_alphabet) g_printerr("Warning: no label defined for state '%u'!\n", id); + gfsmio_printf(ioh, "%u", id); + } + gfsmio_putc(ioh, '\t'); + + //-- sink state + if (state_alphabet && (key=gfsm_alphabet_find_key(state_alphabet,a->target)) != gfsmNoKey) { + gfsm_alphabet_key2string(state_alphabet,key,gs); + gfsmio_puts(ioh,gs->str); + } else { + if (state_alphabet) g_printerr("Warning: no label defined for state '%u'!\n", a->target); + gfsmio_printf(ioh, "%u", a->target); + } + gfsmio_putc(ioh,'\t'); + + //-- lower label + if (lo_alphabet && (key=gfsm_alphabet_find_key(lo_alphabet,a->lower)) != gfsmNoKey) { + gfsm_alphabet_key2string(lo_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + if (lo_alphabet) g_printerr("Warning: no lower label defined for Id '%u'!\n", a->lower); + gfsmio_printf(ioh, "%u", a->lower); + } + + //-- upper label + if (fsm->flags.is_transducer) { + gfsmio_putc(ioh, '\t'); + if (hi_alphabet && (key=gfsm_alphabet_find_key(hi_alphabet,a->upper)) != gfsmNoKey) { + gfsm_alphabet_key2string(hi_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + if (hi_alphabet) g_printerr("Warning: no upper label defined for Id '%u'!\n", a->upper); + gfsmio_printf(ioh, "%u", a->upper); + } + } + + //-- weight + if (fsm->flags.is_weighted) { // && a->weight != fsm->sr->one + gfsmio_printf(ioh, "\t%g", a->weight); + } + + gfsmio_putc(ioh, '\n'); + } + + //-- final? + if (gfsm_state_is_final(st)) { + if (state_alphabet && (key=gfsm_alphabet_find_key(state_alphabet,id)) != NULL) { + gfsm_alphabet_key2string(state_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + gfsmio_printf(ioh, "%u", id); + } + if (fsm->flags.is_weighted) { + gfsmio_printf(ioh, "\t%g", gfsm_automaton_get_final_weight(fsm,id)); + } + gfsmio_putc(ioh, '\n'); + } + } + + //-- cleanup + g_string_free(gs,TRUE); + + return rc; +} + +/*-------------------------------------------------------------- + * print_file_full() + */ +gboolean gfsm_automaton_print_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_zfile(f,"wb",zlevel); + gboolean rc = (ioh && !(*errp) && + gfsm_automaton_print_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * print_filename() + */ +gboolean gfsm_automaton_print_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename,"wb",zlevel,errp); + gboolean rc = (ioh && !(*errp) && + gfsm_automaton_print_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} +/*-------------------------------------------------------------- + * print_gstring_full() + */ +gboolean gfsm_automaton_print_gstring_full (gfsmAutomaton *fsm, + GString *gs, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + gfsmPosGString pgs = { gs, gs->len }; + gfsmIOHandle *ioh = gfsmio_new_gstring(&pgs); + gboolean rc = (ioh && !(*errp) && + gfsm_automaton_print_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + diff --git a/gfsm/gfsm/src/libgfsm/gfsmAutomatonIO.h b/gfsm/gfsm/src/libgfsm/gfsmAutomatonIO.h new file mode 100644 index 0000000..be9848c --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmAutomatonIO.h @@ -0,0 +1,225 @@ + +/*=============================================================================*\ + * File: gfsmAutomatonIO.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: automata: I/O + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER + * OF ALL WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmAutomatonIO.h + * \brief Librarian routines for automata. + */ + +#ifndef _GFSM_AUTOMATON_IO_H +#define _GFSM_AUTOMATON_IO_H + +#include <gfsmAutomaton.h> +#include <gfsmVersion.h> + +/*====================================================================== + * Types + */ +/// Header info for binary files +typedef struct { + gchar magic[16]; /**< magic header string "gfsm_automaton" */ + gfsmVersionInfo version; /**< gfsm version which created the stored file */ + gfsmVersionInfo version_min; /**< minimum gfsm version required to load the file */ + gfsmAutomatonFlags flags; /**< automaton flags */ + gfsmStateId root_id; /**< Id of root node */ + gfsmStateId n_states; /**< number of stored states */ + gfsmStateId n_arcs_007; /**< number of stored arcs (v0.0.2 .. v0.0.7) */ + guint32 srtype; /**< semiring type (cast to gfsmSRType) */ + guint32 unused1; /**< reserved */ + guint32 unused2; /**< reserved */ + guint32 unused3; /**< reserved */ +} gfsmAutomatonHeader; + +/// Type for a stored state +typedef struct { + guint32 is_valid : 1; /**< valid flag */ + guint32 is_final : 1; /**< final flag */ + guint32 unused : 30; /**< reserved */ + guint32 n_arcs; /**< number of stored arcs for this state */ +} gfsmStoredState; + + +/// Type for a stored arc (no 'source' field) +//typedef gfsmArc gfsmStoredArc; +typedef struct { + gfsmStateId target; /**< ID of target node */ + gfsmLabelId lower; /**< Lower label */ + gfsmLabelId upper; /**< Upper label */ + gfsmWeight weight; /**< arc weight */ +} gfsmStoredArc; + +/*====================================================================== + * Constants + */ +/* Scanner config for gfsm_automaton_compile() */ +//extern const GScannerConfig gfsm_automaton_scanner_config; + +/** Magic header string for stored gfsm files */ +extern const gchar gfsm_header_magic[16]; + +/** Minimum libgfsm version required for loading files stored by this version of libgfsm */ +extern const gfsmVersionInfo gfsm_version_bincompat_min_store; + +/** Minimum libgfsm version whose binary files this version of libgfsm can read */ +extern const gfsmVersionInfo gfsm_version_bincompat_min_check; + +/*====================================================================== + * Methods: Binary I/O + */ +/// \name Automaton Methods: Binary I/O +//@{ +/** Load an automaton header from a stored binary file. + * Returns TRUE iff the header looks valid. */ +gboolean gfsm_automaton_load_header(gfsmAutomatonHeader *hdr, gfsmIOHandle *ioh, gfsmError **errp); + +/** Load an automaton from a named binary file (implicitly clear()s \a fsm) */ +gboolean gfsm_automaton_load_bin_handle(gfsmAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp); + +/** Load an automaton from a stored binary file (implicitly clear()s \a fsm) */ +gboolean gfsm_automaton_load_bin_file(gfsmAutomaton *fsm, FILE *f, gfsmError **errp); + +/** Load an automaton from a named binary file (implicitly clear()s \a fsm) */ +gboolean gfsm_automaton_load_bin_filename(gfsmAutomaton *fsm, const gchar *filename, gfsmError **errp); + +/** Load an automaton from an in-memory buffer */ +gboolean gfsm_automaton_load_bin_gstring(gfsmAutomaton *fsm, GString *gs, gfsmError **errp); + + +/** Store an automaton in binary form to a gfsmIOHandle* */ +gboolean gfsm_automaton_save_bin_handle(gfsmAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp); + +/** Store an automaton in binary form to a file */ +gboolean gfsm_automaton_save_bin_file(gfsmAutomaton *fsm, FILE *f, gfsmError **errp); + +/** Store an automaton to a named binary file (no compression) */ +gboolean gfsm_automaton_save_bin_filename_nc(gfsmAutomaton *fsm, const gchar *filename, gfsmError **errp); + +/** Store an automaton to a named binary file, possibly compressing. + * Set \a zlevel=-1 for default compression, and + * set \a zlevel=0 for no compression, otherwise should be as for zlib (1 <= zlevel <= 9) + */ +gboolean gfsm_automaton_save_bin_filename(gfsmAutomaton *fsm, const gchar *filename, int zlevel, gfsmError **errp); + +/** Append an uncompressed binary automaton to an in-memory buffer */ +gboolean gfsm_automaton_save_bin_gstring(gfsmAutomaton *fsm, GString *gs, gfsmError **errp); + +//@} + +/*====================================================================== + * Automaton Methods: Text I/O + */ +/// \name Automaton Methods: Text I/O +//@{ + +/** Load an automaton in Ma-Bell-compatible text-format from a gfsmIOHandle* */ +gboolean gfsm_automaton_compile_handle (gfsmAutomaton *fsm, + gfsmIOHandle *ioh, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + + +/** Load an automaton in Ma-Bell-compatible text-format from a FILE* */ +gboolean gfsm_automaton_compile_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + +/** Convenience macro for compiling all-numeric-id text streams */ +#define gfsm_automaton_compile_file(fsm,filep,errp) \ + gfsm_automaton_compile_file_full((fsm),(filep),NULL,NULL,NULL,(errp)) + +/** Load an automaton in Ma-Bell-compatible text-format from a named file, possibly compressed. */ +gboolean gfsm_automaton_compile_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + +/** Convenience macro for compiling all-numeric-id named text files */ +#define gfsm_automaton_compile_filename(fsm,filename,errp) \ + gfsm_automaton_compile_filename_full((fsm),(filename),NULL,NULL,NULL,(errp)) + +/** Load an automaton in Ma-Bell-compatible text-format from an in-memory buffer */ +gboolean gfsm_automaton_compile_gstring_full (gfsmAutomaton *fsm, + GString *gs, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + + +/*-----------------------*/ + +/** Print an automaton in Ma-Bell-compatible text-format to a gfsmIOHandle* */ +gboolean gfsm_automaton_print_handle (gfsmAutomaton *fsm, + gfsmIOHandle *ioh, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + + +/** Print an automaton in Ma-Bell-compatible text-format to a FILE* */ +gboolean gfsm_automaton_print_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp); + +/** Convenience macro for printing to uncompresed all-numeric-id text streams */ +#define gfsm_automaton_print_file(fsm,filep,errp) \ + gfsm_automaton_print_file_full((fsm),(filep),NULL,NULL,NULL,0,(errp)) + +/** Print an automaton in Ma-Bell-compatible text-format to a named file */ +gboolean gfsm_automaton_print_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp); + +/** Convenience macro for printing to uncompressed all-numeric-id named text files */ +#define gfsm_automaton_print_filename(fsm,filep,errp) \ + gfsm_automaton_print_filename_full((fsm),(filep),NULL,NULL,NULL,0,(errp)) + +/** Print an automaton in Ma-Bell-compatible text-format to an in-memory buffer */ +gboolean gfsm_automaton_print_gstring_full (gfsmAutomaton *fsm, + GString *gs, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + +//@} + +#endif /* _GFSM_AUTOMATON_IO_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmBitVector.c b/gfsm/gfsm/src/libgfsm/gfsmBitVector.c new file mode 100644 index 0000000..e7ab34d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmBitVector.c @@ -0,0 +1,80 @@ + +/*=============================================================================*\ + * File: gfsmBitVector.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: bit vectors: extern functions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmConfig.h> +#include <gfsmBitVector.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmBitVector.hi> +#endif + +/*====================================================================== + * I/O + */ + +//-------------------------------------------------------------- +// bitvector_write_bin_handle() +gboolean gfsm_bitvector_write_bin_handle(gfsmBitVector *bv, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len = bv->len; + if (!gfsmio_write(ioh,&len,sizeof(guint32))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), + g_quark_from_static_string("bitvector_write_bin_handle:len"), + "could not store bit vector length "); + return FALSE; + } + if (!gfsmio_write(ioh, bv->data, bv->len)) { + g_set_error(errp, g_quark_from_static_string("gfsm"), + g_quark_from_static_string("bitvector_write_bin_handle:weights"), + "could not store bit vector data"); + return FALSE; + } + return TRUE; +} + +//-------------------------------------------------------------- +// bitvector_read_bin_handle() +gboolean gfsm_bitvector_read_bin_handle(gfsmBitVector *bv, gfsmIOHandle *ioh, gfsmError **errp) +{ + guint32 len; + if (!gfsmio_read(ioh, &len, sizeof(guint32))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("bitvector_read_bin_handle:len"), + "could not read bit vector length"); + return FALSE; + } + gfsm_bitvector_resize(bv,len); + if (!gfsmio_read(ioh, bv->data, len)) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("bitvector_read_bin_handle:data"), + "could not read bit vector data"); + return FALSE; + } + return TRUE; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmBitVector.h b/gfsm/gfsm/src/libgfsm/gfsmBitVector.h new file mode 100644 index 0000000..6c9b999 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmBitVector.h @@ -0,0 +1,141 @@ + +/*=============================================================================*\ + * File: gfsmBitVector.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmBitVector.h + * \brief Bit vector utilities using GArray + */ + +#ifndef _GFSM_BITVECTOR_H +#define _GFSM_BITVECTOR_H + +#include <gfsmIO.h> + +/*====================================================================== + * Types + */ +/// bit vector type: really just a wrapper for GArray +typedef GArray gfsmBitVector; + +/*====================================================================== + * Utilities + */ +///\name Utilities +//@{ +/** Low-level utility to convert bit-indices to byte-indices (starting from 0) */ +GFSM_INLINE +guint gfsm_bitvector_bits2bytes_(guint nbits); + +/** Low-level utility to convert byte-sizes to bit-sizes */ +GFSM_INLINE +guint gfsm_bitvector_bytes2bits_(guint nbytes); + +//@} + +/*====================================================================== + * Constructors etc. + */ +///\name Constructors etc. +//@{ + +/** Create a new bit vector of length 0 */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_new(void); + +/** Create a new bit vector of length 0 with reserved space for at least \a nbits bits */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_sized_new(guint nbits); + +/** Resize a bit vector \a bv to at least \a nbits bits (rounded to next byte boundary) */ +GFSM_INLINE +void gfsm_bitvector_resize(gfsmBitVector *bv, guint nbits); + +/** Get current size (in bits) of a bit vector \a bv */ +GFSM_INLINE +guint gfsm_bitvector_size(gfsmBitVector *bv); + +/** Clear a bit vector */ +GFSM_INLINE +void gfsm_bitvector_clear(gfsmBitVector *bv); + +/** Set all bits to zero. \returns altered bv */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_zero(gfsmBitVector *bv); + +/** Set all bits to one. \returns altered bv */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_one(gfsmBitVector *bv); + +/** Destroy a bit vector */ +GFSM_INLINE +void gfsm_bitvector_free(gfsmBitVector *bv); +//@} + +/*====================================================================== + * Accessors + */ +///\name Accessors +//@{ + +/** Get the value (0 or 1) of the bit at index \a i in vector \a bv. + */ +GFSM_INLINE +gboolean gfsm_bitvector_get(gfsmBitVector *bv, guint i); + +/** Set the value (0 or 1) of the bit at index \a i to boolean value \a v in vector \a bv. + * Formerly implemented as a macro which evaluates its arguments multiple times. + */ +GFSM_INLINE +void gfsm_bitvector_set(gfsmBitVector *bv, guint i, gboolean v); + +//@} + +/*====================================================================== + * I/O + */ +///\name I/O +//@{ + +/** Write the contents of a ::gfsmBitVector to a (binary) ::gfsmIOHandle. + * \param bv bit-vector to write + * \param ioh handle to which data is to be written + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_bitvector_write_bin_handle(gfsmBitVector *bv, gfsmIOHandle *ioh, gfsmError **errp); + +/** Read the contents of a ::gfsmBitVector from a (binary) ::gfsmIOHandle. + * \param bv bit-vector into which data is to be read + * \param ioh handle from which data is to be read + * \param errp if an error occurs, \a *errp will hold an error message + * \returns true on success + */ +gboolean gfsm_bitvector_read_bin_handle(gfsmBitVector *bv, gfsmIOHandle *ioh, gfsmError **errp); + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmBitVector.hi> +#endif + +#endif /* _GFSM_BITVECTOR_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmBitVector.hi b/gfsm/gfsm/src/libgfsm/gfsmBitVector.hi new file mode 100644 index 0000000..cfcc4dd --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmBitVector.hi @@ -0,0 +1,133 @@ + +/*=============================================================================*\ + * File: gfsmBitVector.def + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: bit vectors: inline definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER OF ALL + * WARRANTIES, see the file, "LICENSE.txt," in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <stdlib.h> +#include <string.h> + +/*====================================================================== + * Utilities + */ + +//-------------------------------------------------------------- +// bits2bytes() +GFSM_INLINE +guint gfsm_bitvector_bits2bytes_(guint nbits) +{ return nbits/8; } + +//-------------------------------------------------------------- +// bytes2bits() +GFSM_INLINE +guint gfsm_bitvector_bytes2bits_(guint nbytes) +{ return nbytes*8; } + +/*-------------------------------------------------------------- + * size() + */ +GFSM_INLINE +guint gfsm_bitvector_size(gfsmBitVector *bv) +{ return gfsm_bitvector_bytes2bits_(bv->len); } + +/*-------------------------------------------------------------- + * get() + */ +GFSM_INLINE +gboolean gfsm_bitvector_get(gfsmBitVector *bv, guint i) +{ + if (i < gfsm_bitvector_size(bv)) { + return g_array_index(bv, guint8, gfsm_bitvector_bits2bytes_(i)) & (1<<(i%8)) ? TRUE : FALSE; + } + return FALSE; +} + +/*-------------------------------------------------------------- + * resize() + */ +GFSM_INLINE +void gfsm_bitvector_resize(gfsmBitVector *bv, guint nbits) +{ g_array_set_size(bv, gfsm_bitvector_bits2bytes_(nbits)+1); } + +/*-------------------------------------------------------------- + * set() + */ +GFSM_INLINE +void gfsm_bitvector_set(gfsmBitVector *bv, guint i, gboolean v) +{ + if (i >= gfsm_bitvector_size(bv)) { gfsm_bitvector_resize(bv,i); } + if (v) { g_array_index(bv, guint8, gfsm_bitvector_bits2bytes_(i)) |= (1<<(i%8)); } + else { g_array_index(bv, guint8, gfsm_bitvector_bits2bytes_(i)) &= ~(1<<(i%8)); } +} + +/*-------------------------------------------------------------- + * new() + */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_new(void) +{ return g_array_new(FALSE,TRUE,1); } + +/*-------------------------------------------------------------- + * sized_new() + */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_sized_new(guint nbits) +{ + gfsmBitVector *bv = g_array_sized_new(FALSE,TRUE,1, gfsm_bitvector_bits2bytes_(nbits)+1); + gfsm_bitvector_set(bv, nbits-1, 0); + return bv; +} + +/*-------------------------------------------------------------- + * free() + */ +GFSM_INLINE +void gfsm_bitvector_free(gfsmBitVector *bv) +{ g_array_free(bv,TRUE); } + +/*-------------------------------------------------------------- + * clear() + */ +GFSM_INLINE +void gfsm_bitvector_clear(gfsmBitVector *bv) +{ g_array_set_size(bv,0); } + +/*-------------------------------------------------------------- + * zero() + */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_zero(gfsmBitVector *bv) +{ + memset(bv->data, 0, bv->len); + return bv; +} + +/*-------------------------------------------------------------- + * one() + */ +GFSM_INLINE +gfsmBitVector *gfsm_bitvector_one(gfsmBitVector *bv) +{ + memset(bv->data, 255, bv->len); + return bv; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmCommon.c b/gfsm/gfsm/src/libgfsm/gfsmCommon.c new file mode 100644 index 0000000..1135277 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmCommon.c @@ -0,0 +1,42 @@ +/*=============================================================================*\ + * File: gfsmCommon.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: common definitions + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmCommon.h> +#include <stdlib.h> + +/*====================================================================== + * Constants + */ +const gfsmLabelId gfsmEpsilon = 0; + +const gpointer gfsmNoKey = NULL; + +const gfsmLabelId gfsmNoLabel = (gfsmLabelId)-1; + +const gfsmLabelId gfsmEpsilon1 = (gfsmLabelId)-2; + +const gfsmLabelId gfsmEpsilon2 = (gfsmLabelId)-3; + +const gfsmStateId gfsmNoState = (gfsmStateId)-1; + +const gfsmWeight gfsmNoWeight = 0; + diff --git a/gfsm/gfsm/src/libgfsm/gfsmCommon.h b/gfsm/gfsm/src/libgfsm/gfsmCommon.h new file mode 100644 index 0000000..b27910e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmCommon.h @@ -0,0 +1,88 @@ + +/*=============================================================================*\ + * File: gfsmCommon.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: common definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmCommon.h + * \brief Commonly use typedefs and constants + */ + +#ifndef _GFSM_COMMON_H +#define _GFSM_COMMON_H + +#include <gfsmConfig.h> +#include <glib.h> + +/*====================================================================== + * Basic Types + */ +/** Type for elementary arc-labels */ +typedef guint16 gfsmLabelId; + +/** Type for extended arc-labels (parameters and return values) */ +typedef guint32 gfsmLabelVal; + +/** Type for elementary state identifiers */ +typedef guint32 gfsmStateId; + +/** Alias for state identifiers */ +typedef gfsmStateId gfsmNodeId; + +/** Type for arc weights */ +typedef gfloat gfsmWeight; + +/*====================================================================== + * Vector types + */ +/** Type for sequence of (gfsmLabelVal)s */ +typedef GPtrArray gfsmLabelVector; + +/** Type for a sequence of (gfsmStateId)s */ +typedef GPtrArray gfsmStateIdVector; + + +/*====================================================================== + * Constants + */ +/** Constant epsilon label */ +extern const gfsmLabelId gfsmEpsilon; + +/** Constant label for pseudo-epsilon moves in 1st argument to compose() */ +extern const gfsmLabelId gfsmEpsilon1; + +/** Constant label for pseudo-epsilon moves in 2nd argument to compose() */ +extern const gfsmLabelId gfsmEpsilon2; + +/** Constant indicating missing alphabet key */ +extern const gpointer gfsmNoKey; + +/** Constant indicating missing label */ +extern const gfsmLabelId gfsmNoLabel; + +/** Constant indicating missing state */ +extern const gfsmStateId gfsmNoState; + +/** Constant indicating missing weight + * \warning Deprecated: prefer gfsm_sr_one() + */ +extern const gfsmWeight gfsmNoWeight; + +#endif /* _GFSM_COMMON_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmCompound.c b/gfsm/gfsm/src/libgfsm/gfsmCompound.c new file mode 100644 index 0000000..ec54502 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmCompound.c @@ -0,0 +1,154 @@ +/*=============================================================================*\ + * File: gfsmCompound.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: compound states + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmCompound.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmCompound.hi> +#endif + +/*====================================================================== + * Label Pair: Methods + */ + +/*-------------------------------------------------------------- + * labelpair_compare() + */ +gint gfsm_labelpair_compare(gfsmLabelPair lp1, gfsmLabelPair lp2) +{ return gfsm_labelpair_compare_inline(lp1,lp2); } + +/*-------------------------------------------------------------- + * labelpair_compare_with_data() + */ +gint gfsm_labelpair_compare_with_data(gfsmLabelPair lp1, gfsmLabelPair lp2, gpointer data) +{ return gfsm_labelpair_compare_inline(lp1,lp2); } + + +/*====================================================================== + * Methods: gfsmStatePair + */ + +/*-------------------------------------------------------------- + * statepair_hash() + */ +guint gfsm_statepair_hash(gfsmStatePair *sp) +{ + //return 7*sp->id1 + sp->id2; + //return 5039*sp->id1 + sp->id2; + return 7949*sp->id1 + sp->id2; +} + + +/*-------------------------------------------------------------- + * statepair_compare() + */ +gint gfsm_statepair_compare(const gfsmStatePair *sp1, const gfsmStatePair *sp2) +{ + return (sp1->id1 < sp2->id1 ? -1 + : (sp1->id1 > sp2->id1 ? 1 + : (sp1->id2 < sp2->id2 ? -1 + : (sp1->id2 > sp2->id2 ? 1 + : 0)))); +} + +/*-------------------------------------------------------------- + * statepair_equal() + */ +gboolean gfsm_statepair_equal(const gfsmStatePair *sp1, const gfsmStatePair *sp2) +{ + return sp1->id1==sp2->id1 && sp1->id2==sp2->id2; +} + + + +/*====================================================================== + * Methods: gfsmComposeState + */ + +/*-------------------------------------------------------------- + * compose_state_hash() + */ +guint gfsm_compose_state_hash(gfsmComposeState *sp) +{ + return 7949*sp->id1 + sp->id2 + 7963*sp->idf; +} + + +/*-------------------------------------------------------------- + * compose_state_compare() + */ +gint gfsm_compose_state_compare(const gfsmComposeState *sp1, const gfsmComposeState *sp2) +{ + return (sp1->id1 < sp2->id1 ? -1 + : (sp1->id1 > sp2->id1 ? 1 + : (sp1->id2 < sp2->id2 ? -1 + : (sp1->id2 > sp2->id2 ? 1 + : (sp1->idf < sp2->idf ? -1 + : (sp1->idf > sp2->idf ? 1 + : 0)))))); +} + +/*-------------------------------------------------------------- + * compose_state_equal() + */ +gboolean gfsm_compose_state_equal(const gfsmComposeState *sp1, const gfsmComposeState *sp2) +{ + return sp1->id1==sp2->id1 && sp1->id2==sp2->id2 && sp1->idf==sp2->idf; +} + + + +/*====================================================================== + * Methods: gfsmStateWeightPair + */ + +//-------------------------------------------------------------- +guint gfsm_state_weight_pair_hash(gfsmStateWeightPair *swp) +{ + return swp->id; +} + +//-------------------------------------------------------------- +gint gfsm_state_weight_pair_compare(const gfsmStateWeightPair *swp1, const gfsmStateWeightPair *swp2, gfsmSemiring *sr) +{ + return (swp1->id < swp2->id ? -1 + : (swp1->id > swp2->id ? 1 + : gfsm_sr_compare(sr,swp1->w,swp2->w))); +} + +//-------------------------------------------------------------- +gboolean gfsm_state_weight_pair_equal(const gfsmStateWeightPair *swp1, const gfsmStateWeightPair *swp2) +{ + return swp1->id==swp2->id && swp1->w==swp2->w; +} + + +/*====================================================================== + * Methods: gfsmStatePairEnum + */ + + +/*====================================================================== + * Methods: gfsmComposeStateEnum + */ + diff --git a/gfsm/gfsm/src/libgfsm/gfsmCompound.h b/gfsm/gfsm/src/libgfsm/gfsmCompound.h new file mode 100644 index 0000000..d492cc7 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmCompound.h @@ -0,0 +1,266 @@ + +/*=============================================================================*\ + * File: gfsmCompound.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: basic compound types + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmCompound.h + * \brief Utilities for "compound" states and labels + */ + +#ifndef _GFSM_COMPOUND_H +#define _GFSM_COMPOUND_H + +#include <gfsmEnum.h> +#include <gfsmWeightMap.h> + +/*====================================================================== + * Compound Types + */ +/// Typedef for (lower,upper) label pairs: really just a wrapper for some bit operations +typedef guint32 gfsmLabelPair; + +/// Matched pair of state-ids (used for automaton intersection) +typedef struct { + gfsmStateId id1; /**< Id of first component */ + gfsmStateId id2; /**< Id of second component */ +} gfsmStatePair; + +/// Typedef for composition filter states +/** + * as described in Morhi, Pereira, & Riley (1996) "Weighted Automata in Text and Speech Processing", + * Proc. ECAI 96, John Wiley & Sons. + */ +typedef guint8 gfsmComposeFilterState; + +/// Matched pair of (::gfsmStateId)s with an additional filter state (used for transducer composition) +/** + * for details, see + * Morhi, Pereira, & Riley (1996) "Weighted Automata in Text and Speech Processing", + * Proc. ECAI 96, John Wiley & Sons. + */ +typedef struct { + gfsmStateId id1; /**< Id of first component */ + gfsmStateId id2; /**< Id of second component */ + gfsmComposeFilterState idf; /**< Filter state (0|1|2) */ +} gfsmComposeState; + +/// Type for a (::gfsmStateId,::gfsmWeight) pair (used by algebraic operations) +typedef struct { + gfsmStateId id; /**< state-id */ + gfsmWeight w; /**< weight */ +} gfsmStateWeightPair; + +/// Typedef for mapping (::gfsmStatePair)s to single (::gfsmStateId)s, +/// used by gfsm_automaton_intersection() +typedef gfsmEnum gfsmStatePairEnum; + +/// Typedef for mapping (::gfsmComposeState)s to single (::gfsmStateId)s, +/// used by gfsm_automaton_compose() +typedef gfsmEnum gfsmComposeStateEnum; + +/// Typedef for mapping (::gfsmStatePair)s to single (::gfsmWeight)s, +/// used by gfsm_automaton_rmepsilon() +typedef gfsmWeightHash gfsmStatePair2WeightHash; + + +/*====================================================================== + * LabelPair: Methods + */ +///\name gfsmLabelPair Methods +//@{ + +/** Create a "new" gsfmLabelPair */ +GFSM_INLINE +gfsmLabelPair gfsm_labelpair_new(guint32 lo, guint32 hi); + +/** Get lower label of of a label-pair */ +GFSM_INLINE +gfsmLabelId gfsm_labelpair_lower(gfsmLabelPair lp); + +/** Get upper label of of a label-pair */ +GFSM_INLINE +gfsmLabelId gfsm_labelpair_upper(gfsmLabelPair lp); + +/** Hash function alias for ::gfsmLabelPair */ +#define gfsm_labelpair_hash g_direct_hash + +/** Equal function alias for ::gfsmLabelPair */ +#define gfsm_labelpair_equal g_direct_equal + +/** Comparison function for ::gfsmLabelPair */ +gint gfsm_labelpair_compare(gfsmLabelPair lp1, gfsmLabelPair lp2); + +/** Dummy data-comparison function for gfsmLabelPair */ +gint gfsm_labelpair_compare_with_data(gfsmLabelPair lp1, gfsmLabelPair lp2, gpointer data); +//@} + +/*====================================================================== + * Methods: gfsmStatePair + */ +///\name gfsmStatePair Methods +//@{ + +/** Create a new ::gfsmStatePair */ +GFSM_INLINE +gfsmStatePair *gfsm_statepair_new(gfsmStateId id1, gfsmStateId id2); + +/** Clone an existing ::gfsmStatePair */ +GFSM_INLINE +gfsmStatePair *gfsm_statepair_clone(gfsmStatePair *sp); + +/** Free a ::gfsmStatePair: + * \code void gfsm_statepair_free(gfsmStatePair *sp); \endcode + */ +#define gfsm_statepair_free g_free + +/** Get a more or less sensible hash value from a state pair */ +guint gfsm_statepair_hash(gfsmStatePair *sp); + +/** Comparison function for ::gfsmStatePair */ +gint gfsm_statepair_compare(const gfsmStatePair *sp1, const gfsmStatePair *sp2); + +/** Equality predicate for ::gfsmStatePair */ +gboolean gfsm_statepair_equal(const gfsmStatePair *sp1, const gfsmStatePair *sp2); + +//@} + +/*====================================================================== + * Methods: gfsmComposeState + */ +///\name gfsmComposeState Methods +//@{ + +/** Create a new ::gfsmComposeState */ +GFSM_INLINE +gfsmComposeState *gfsm_compose_state_new(gfsmStateId id1, gfsmStateId id2, gfsmComposeFilterState idf); + +/** Clone an existing ::gfsmComposeState */ +GFSM_INLINE +gfsmComposeState *gfsm_compose_state_clone(gfsmComposeState *sp); + +/** Free a ::gfsmComposeState: + * \code void gfsm_compose_state_free(gfsmComposeState *csp); \endcode +*/ +#define gfsm_compose_state_free g_free + +/** Get a (more or less sensible) hash value from a ::gfsmComposeState */ +guint gfsm_compose_state_hash(gfsmComposeState *sp); + +/** Comparison function for gfsmComposeState */ +gint gfsm_compose_state_compare(const gfsmComposeState *sp1, const gfsmComposeState *sp2); + +/** Equality predicate for ::gfsmComposeState */ +gboolean gfsm_compose_state_equal(const gfsmComposeState *sp1, const gfsmComposeState *sp2); + +//@} + + +/*====================================================================== + * Methods: gfsmStateWeightPair + */ +///\name gfsmStateWeightPair Methods +//@{ + +/** Create a new ::gfsmStateWeightPair */ +GFSM_INLINE +gfsmStateWeightPair *gfsm_state_weight_pair_new(gfsmStateId id, gfsmWeight w); + +/** Clone an existing ::gfsmStateWeightPair */ +GFSM_INLINE +gfsmStateWeightPair *gfsm_state_weight_pair_clone(const gfsmStateWeightPair *swp); + +/** Free a ::gfsmStateWeightPair + * \code void gfsm_state_weight_pair_free(gfsmStateWeightPair *swp); \endcode + */ +#define gfsm_state_weight_pair_free g_free + +/** Get a (more or less sensible) hash value from a ::gfsmStateWeightPair (really just hashes id) */ +guint gfsm_state_weight_pair_hash(gfsmStateWeightPair *sp); + +/** Comparison function for ::gfsmStateWeightPair (id << w) */ +gint gfsm_state_weight_pair_compare(const gfsmStateWeightPair *swp1, const gfsmStateWeightPair *swp2, gfsmSemiring *sr); + +/** Equality predicate for ::gfsmStateWeightPair */ +gboolean gfsm_state_weight_pair_equal(const gfsmStateWeightPair *swp1, const gfsmStateWeightPair *swp2); + +//@} + +/*====================================================================== + * Methods: gfsmStatePairEnum + */ +///\name gfsmStatePairEnum Methods +//@{ + +/** create a new ::gfsmStatePairEnum (copies & frees keys) + * \see gfsmEnum + */ +GFSM_INLINE +gfsmStatePairEnum *gfsm_statepair_enum_new(void); + +/** Alias \see gfsm_enum_clear() */ +#define gfsm_statepair_enum_clear gfsm_enum_clear + +/** Alias; \see gfsm_enum_free() */ +#define gfsm_statepair_enum_free gfsm_enum_free + +//@} + +/*====================================================================== + * Methods: gfsmComposeStateEnum + */ +///\name gfsmComposeStateEnum Methods +//@{ + +/** create a new ::gfsmComposeStateEnum (copies & frees keys) + * \see gfsmEnum + */ +GFSM_INLINE +gfsmComposeStateEnum *gfsm_compose_state_enum_new(void); + +/** Alias; \see gfsm_enum_clear() */ +#define gfsm_compose_state_enum_clear gfsm_enum_clear + +/** Alias; \see gfsm_enum_clear() */ +#define gfsm_compose_state_enum_free gfsm_enum_free + +//@} + + +/*====================================================================== + * Methods: gfsmStatePair2WeightHash + */ +///\name gfsmStatePair2WeightHash Methods +//@{ + +/** create a new ::gfsmStatePair2WeightHash (copies & frees keys) + * \see gfsmWeightHash + */ +GFSM_INLINE +gfsmStatePair2WeightHash *gfsm_statepair2weighthash_new(void); + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmCompound.hi> +#endif + +#endif /* _GFSM_COMPOUND_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmCompound.hi b/gfsm/gfsm/src/libgfsm/gfsmCompound.hi new file mode 100644 index 0000000..cf0f006 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmCompound.hi @@ -0,0 +1,217 @@ +/*=============================================================================*\ + * File: gfsmCompound.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: compound states: inline definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmMem.h> +#include <stdlib.h> +#include <gfsmUtils.h> + + +/*====================================================================== + * Label Pair: Methods + */ + +//-------------------------------------------------------------- +// labelpair_new() +GFSM_INLINE +gfsmLabelPair gfsm_labelpair_new(guint32 lo, guint32 hi) +{ + return ((hi<<16)|lo); +} + +//-------------------------------------------------------------- +// labelpair_lower() +GFSM_INLINE +gfsmLabelId gfsm_labelpair_lower(gfsmLabelPair lp) +{ return (lp & 0xffff); } + +//-------------------------------------------------------------- +// labelpair_upper() +GFSM_INLINE +gfsmLabelId gfsm_labelpair_upper(gfsmLabelPair lp) +{ return ((lp>>16) & 0xffff); } + + +/*-------------------------------------------------------------- + * labelpair_compare() + */ +GFSM_INLINE +gint gfsm_labelpair_compare_inline(gfsmLabelPair lp1, gfsmLabelPair lp2) +{ + gfsmLabelId + lo1 = gfsm_labelpair_lower(lp1), + lo2 = gfsm_labelpair_lower(lp2), + hi1 = gfsm_labelpair_upper(lp1), + hi2 = gfsm_labelpair_upper(lp2); + return (lo1 < lo2 ? -1 + : (lo1 > lo2 ? 1 + : (hi1 < hi2 ? -1 + : (hi1 > hi2 ? 1 + : 0)))); +} + + +/*====================================================================== + * Methods: gfsmStatePair + */ + +/*-------------------------------------------------------------- + * statepair_new() + */ +GFSM_INLINE +gfsmStatePair *gfsm_statepair_new(gfsmStateId id1, gfsmStateId id2) +{ + gfsmStatePair *sp = g_new(gfsmStatePair,1); + sp->id1 = id1; + sp->id2 = id2; + return sp; +} + +/*-------------------------------------------------------------- + * statepair_clone() + */ +GFSM_INLINE +gfsmStatePair *gfsm_statepair_clone(gfsmStatePair *sp) +{ + return (gfsmStatePair*)gfsm_mem_dup_n(sp, sizeof(gfsmStatePair)); +} + +/*-------------------------------------------------------------- + * statepair_free() + */ +#if 0 +GFSM_INLINE +void gfsm_statepair_free(gfsmStatePair *sp) +{ g_free(sp); } +#endif + + + +/*====================================================================== + * Methods: gfsmComposeState + */ + +/*-------------------------------------------------------------- + * compose_state_new() + */ +GFSM_INLINE +gfsmComposeState *gfsm_compose_state_new(gfsmStateId id1, gfsmStateId id2, gfsmComposeFilterState idf) +{ + gfsmComposeState *sp = g_new(gfsmComposeState,1); + sp->id1 = id1; + sp->id2 = id2; + sp->idf = idf; + return sp; +} + +/*-------------------------------------------------------------- + * compose_state_clone() + */ +GFSM_INLINE +gfsmComposeState *gfsm_compose_state_clone(gfsmComposeState *sp) +{ + return (gfsmComposeState*)gfsm_mem_dup_n(sp, sizeof(gfsmComposeState)); +} + +/*-------------------------------------------------------------- + * compose_state_free() + */ +#if 0 +GFSM_INLINE +void gfsm_compose_state_free(gfsmComposeState *sp) +{ g_free(sp); } +#endif + + + +/*====================================================================== + * Methods: gfsmStateWeightPair + */ + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmStateWeightPair *gfsm_state_weight_pair_new(gfsmStateId id, gfsmWeight w) +{ + gfsmStateWeightPair *swp = g_new(gfsmStateWeightPair,1); + swp->id = id; + swp->w = w; + return swp; +} + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmStateWeightPair *gfsm_state_weight_pair_clone(const gfsmStateWeightPair *swp) +{ + return gfsm_state_weight_pair_new(swp->id,swp->w); +} + +//-------------------------------------------------------------- +#if 0 +GFSM_INLINE +void gfsm_state_weight_pair_free(gfsmStateWeightPair *swp) +{ g_free(swp); } +#endif + + +/*====================================================================== + * Methods: gfsmStatePairEnum + */ + +/*-------------------------------------------------------------- + * statepair_enum_new() + */ +GFSM_INLINE +gfsmStatePairEnum *gfsm_statepair_enum_new(void) +{ + return gfsm_enum_new_full((gfsmDupFunc)gfsm_statepair_clone, + (GHashFunc)gfsm_statepair_hash, + (GEqualFunc)gfsm_statepair_equal, + (GDestroyNotify)g_free); +} + + +/*====================================================================== + * Methods: gfsmComposeStateEnum + */ + +/*-------------------------------------------------------------- + * compose_state_enum_new() + */ +GFSM_INLINE +gfsmComposeStateEnum *gfsm_compose_state_enum_new(void) +{ + return gfsm_enum_new_full((gfsmDupFunc)gfsm_compose_state_clone, + (GHashFunc)gfsm_compose_state_hash, + (GEqualFunc)gfsm_compose_state_equal, + (GDestroyNotify)g_free); +} + +/*====================================================================== + * Methods: StatePair2WeightHash + */ +GFSM_INLINE +gfsmStatePair2WeightHash *gfsm_statepair2weighthash_new(void) +{ + return gfsm_weighthash_new_full((gfsmDupFunc)gfsm_statepair_clone, + (GHashFunc)gfsm_statepair_hash, + (GEqualFunc)gfsm_statepair_equal, + (GDestroyNotify)g_free); +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmConfig.h b/gfsm/gfsm/src/libgfsm/gfsmConfig.h new file mode 100644 index 0000000..ccb9cb4 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmConfig.h @@ -0,0 +1,64 @@ +/*=============================================================================*\ + * File: gfsmConfig.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: autoconf configuration hack + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** + * \file gfsmConfig.h + * \brief safely include autoheader preprocessor defines + * + * \file gfsmConfigNoAuto.h + * \brief Undefine any autoheader preprocessor defines + * + * \file gfsmConfigAuto.h + * \brief autoheader-generated preprocessor defines + */ + +/* + * Putting autoheader files within the #ifndef/#endif idiom (below) + * is potentially a BAD IDEA, since we might need to (re-)define + * the autoheader-generated preprocessor symbols (e.g. after + * (#include)ing in some config.h from another autoheader package + */ +#include <gfsmConfigNoAuto.h> +#include <gfsmConfigAuto.h> + +/* + * Define a sentinel preprocessor symbol _GFSM_CONFIG_H, just + * in case someone wants to check whether we've already + * (#include)d this file .... + */ +#ifndef _GFSM_CONFIG_H +#define _GFSM_CONFIG_H + +#ifdef GFSM_INLINE_ENABLED +# ifdef __cplusplus +/** Macro for declaring small functions inline (inlining enabled, C++-style) */ +# define GFSM_INLINE inline +# else +/** Macro for declaring small functions inline (inlining enabled, C98-style) */ +# define GFSM_INLINE static inline +# endif /* __cplusplus */ +#else +/** Macro for declaring small functions inline (inlining disabled) */ +# define GFSM_INLINE +#endif /* GFSM_INLINE_ENABLED */ + +#endif /* _GFSM_CONFIG_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmConfigAuto.h.in b/gfsm/gfsm/src/libgfsm/gfsmConfigAuto.h.in new file mode 100644 index 0000000..ba4b089 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmConfigAuto.h.in @@ -0,0 +1,185 @@ +/* src/libgfsm/gfsmConfigAuto.h.in. Generated from configure.ac by autoheader. */ + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define as good substitute value for EOVERFLOW. */ +#undef EOVERFLOW + +/* Define this to enable debugging code */ +#undef GFSM_DEBUG_ENABLED + +/* Define this to enable inline functions */ +#undef GFSM_INLINE_ENABLED + +/* Major gfsm version */ +#undef GFSM_VERSION_MAJOR + +/* Micro gfsm version */ +#undef GFSM_VERSION_MICRO + +/* Minor gfsm version */ +#undef GFSM_VERSION_MINOR + +/* Define this to enable zlib compression support */ +#undef GFSM_ZLIB_ENABLED + +/* Define to 1 if you have `alloca' after including <alloca.h>, a header that + may be supplied by this distribution. */ +#undef HAVE_ALLOCA + +/* Define HAVE_ALLOCA_H for backward compatibility with older code that + includes <alloca.h> only if HAVE_ALLOCA_H is defined. */ +#undef HAVE_ALLOCA_H + +/* Define to 1 if you have the declaration of `getdelim', and to 0 if you + don't. */ +#undef HAVE_DECL_GETDELIM + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the `flockfile' function. */ +#undef HAVE_FLOCKFILE + +/* Define to 1 if you have the `funlockfile' function. */ +#undef HAVE_FUNLOCKFILE + +/* Define to 1 if you have the `getdelim' function. */ +#undef HAVE_GETDELIM + +/* Define if you have the 'intmax_t' type in <stdint.h> or <inttypes.h>. */ +#undef HAVE_INTMAX_T + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define if <inttypes.h> exists, doesn't clash with <sys/types.h>, and + declares uintmax_t. */ +#undef HAVE_INTTYPES_H_WITH_UINTMAX + +/* Define if you have the 'long double' type. */ +#undef HAVE_LONG_DOUBLE + +/* Define if you have the 'long long' type. */ +#undef HAVE_LONG_LONG + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if the system has the type `ptrdiff_t'. */ +#undef HAVE_PTRDIFF_T + +/* Define to 1 if you have the `snprintf' function. */ +#undef HAVE_SNPRINTF + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define if <stdint.h> exists, doesn't clash with <sys/types.h>, and declares + uintmax_t. */ +#undef HAVE_STDINT_H_WITH_UINTMAX + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define this if you have the strdup() function */ +#undef HAVE_STRDUP + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `vasnprintf' function. */ +#undef HAVE_VASNPRINTF + +/* Define to 1 if you have the `vasprintf' function. */ +#undef HAVE_VASPRINTF + +/* Define to 1 if you have the `vfprintf' function. */ +#undef HAVE_VFPRINTF + +/* Define if you have the 'wchar_t' type. */ +#undef HAVE_WCHAR_T + +/* Define to 1 if you have the `wcslen' function. */ +#undef HAVE_WCSLEN + +/* Define if you have the 'wint_t' type. */ +#undef HAVE_WINT_T + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define as the maximum value of type 'size_t', if the system doesn't define + it. */ +#undef SIZE_MAX + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION + +/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a + `char[]'. */ +#undef YYTEXT_POINTER + +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +/* Define to long or long long if <stdint.h> and <inttypes.h> don't define. */ +#undef intmax_t + +/* Define to empty if the C compiler doesn't support this keyword. */ +#undef signed + +/* Define to `unsigned int' if <sys/types.h> does not define. */ +#undef size_t diff --git a/gfsm/gfsm/src/libgfsm/gfsmDebug.c b/gfsm/gfsm/src/libgfsm/gfsmDebug.c new file mode 100644 index 0000000..463e28a --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmDebug.c @@ -0,0 +1,35 @@ +#include <gfsmDebug.h> +#include <gfsmMem.h> +#include <gfsmConfig.h> + +#ifdef GFSM_DEBUG_ENABLED +# define GFSM_MEM_DEBUG +# define GFSM_ALLOC_DEBUG +#endif /* GFSM_DEBUG_ENABLED */ + +void gfsm_debug_init(void) { +#if defined(GFSM_MEM_DEBUG) + g_mem_set_vtable(glib_mem_profiler_table); +# if defined(GFSM_ALLOC_DEBUG) + gfsm_allocators_enable(); +# endif /* GFSM_ALLOC_DEBUG */ +#endif /* GFSM_MEM_DEBUG */ + return; +} + +void gfsm_debug_finish(void) { +#if defined(GFSM_MEM_DEBUG) + g_blow_chunks(); +# if defined(GFSM_ALLOC_DEBUG) + gfsm_allocators_free(); +# endif /* GFSM_ALLOC_DEBUG */ +#endif /* GFSM_MEM_DEBUG */ + return; +} + +void gfsm_debug_print(void) { +#if defined(GFSM_MEM_DEBUG) + g_mem_profile(); +#endif /* GFSM_MEM_DEBUG */ + return; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmDebug.h b/gfsm/gfsm/src/libgfsm/gfsmDebug.h new file mode 100644 index 0000000..73fa741 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmDebug.h @@ -0,0 +1,46 @@ + +/*=============================================================================*\ + * File: gfsmDebug.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: debugging + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmDebug.h + * \brief Debugging utilities + */ + +#ifndef _GFSM_DEBUG_H +#define _GFSM_DEBUG_H + +#include <glib.h> + +/* Define these to enable verbose memory debugging */ +/*#define GFSM_MEM_DEBUG*/ +/*#define GFSM_ALLOC_DEBUG*/ + +/** Initialize debugging -- should be called before any other gfsm operations */ +void gfsm_debug_init(void); + +/** Finish debugging -- be nice and clean up after ourselves */ +void gfsm_debug_finish(void); + +/** Print memory debugging trace information */ +void gfsm_debug_print(void); + +#endif /* _GFSM_DEBUG_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmDraw.c b/gfsm/gfsm/src/libgfsm/gfsmDraw.c new file mode 100644 index 0000000..3c10991 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmDraw.c @@ -0,0 +1,293 @@ + +/*=============================================================================*\ + * File: gfsmDraw.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: automata: visualization + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmDraw.h> +#include <gfsmArcIter.h> +#include <gfsmUtils.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/*====================================================================== + * Methods: Text I/O: vcg + */ + +/*-------------------------------------------------------------- + * draw_vcg_file() + */ +gboolean gfsm_automaton_draw_vcg_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + int xspace, // ? + int yspace, // ? + const gchar *orientation, // "(top|bottom|left|right)_to_(ditto)" + const gchar *state_shape, + const gchar *state_color, + const gchar *final_color, + gfsmError **errp) +{ + gfsmStateId id; + GString *gstr = g_string_new(""); + + fprintf(f, "graph: {\n"); + fprintf(f, " title: \"%s\"\n", (title ? title : "(gfsm)")); + fprintf(f, " display_edge_labels:yes\n"); + fprintf(f, " splines:yes\n"); + fprintf(f, " color:white\n"); + fprintf(f, " xspace:%d\n", (xspace ? xspace : 40)); + fprintf(f, " yspace:%d\n", (yspace ? yspace : 20)); + fprintf(f, " orientation:%s\n", (orientation ? orientation : "left_to_right")); + fprintf(f, " node.shape:%s\n", (state_shape ? state_shape : "ellipse")); + fprintf(f, " node.color:%s\n", (state_color ? state_color : "white")); + fprintf(f, " node.borderwidth:1\n"); + + //-- ye olde iterationne + for (id = 0; id < fsm->states->len; id++) { + gfsmState *s = gfsm_automaton_find_state(fsm,id); + gfsmArcIter ai; + gchar *sym; + if (!s || !s->is_valid) continue; + + //-- source state + fprintf(f, " node: {title:\"%u\" label:\"", id); + if (state_alphabet && (sym=gfsm_alphabet_find_key(state_alphabet,id)) != NULL) { + gfsm_alphabet_key2string(state_alphabet, sym, gstr); + fprintf(f, "%s", gstr->str); + } else { + if (state_alphabet) g_printerr("Warning: no label defined for state '%u'!\n", id); + fprintf(f, "%u", id); + } + if (fsm->flags.is_weighted) { + fprintf(f, "/%g", gfsm_automaton_get_final_weight(fsm,id)); + } + fprintf(f, "\""); + + if (s->is_final) { + fprintf(f, " color:%s", (final_color ? final_color : "lightgrey")); + } + if (id == fsm->root_id) fputs(" borderwidth:3", f); + fputs("}\n", f); + + for (gfsm_arciter_open_ptr(&ai,fsm,s); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + fprintf(f, " edge: {sourcename:\"%u\" targetname:\"%u\" label:\"", id, a->target); + + if (lo_alphabet && (sym=gfsm_alphabet_find_key(lo_alphabet,a->lower)) != NULL) { + gfsm_alphabet_key2string(lo_alphabet, sym, gstr); + fputs(gstr->str, f); + } else { + if (lo_alphabet) + g_printerr("Warning: no label defined for lower label '%u'!\n", a->lower); + fprintf(f, "%u", a->lower); + } + + if (fsm->flags.is_transducer) { + fputc(':', f); + if (hi_alphabet && (sym=gfsm_alphabet_find_key(hi_alphabet,a->upper)) != NULL) { + gfsm_alphabet_key2string(hi_alphabet, sym, gstr); + fputs(gstr->str, f); + } else { + if (hi_alphabet) + g_printerr("Warning: no label defined for upper label '%u'!\n", a->upper); + fprintf(f, "%u", a->upper); + } + } + + if (fsm->flags.is_weighted) fprintf(f, "/%g", a->weight); + fprintf(f, "\"}\n"); + } + } + fputs("}\n", f); + + return TRUE; +} + + +/*-------------------------------------------------------------- + * draw_vcg_filename() + */ +gboolean gfsm_automaton_draw_vcg_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + int xspace, + int yspace, + const gchar *orientation, + const gchar *state_shape, + const gchar *state_color, + const gchar *final_color, + gfsmError **errp) +{ + FILE *f; + gboolean rc; + if (!(f=gfsm_open_filename(filename, "w", errp))) return FALSE; + rc = gfsm_automaton_draw_vcg_file_full(fsm, f, lo_alphabet, hi_alphabet, state_alphabet, + title, xspace, yspace, orientation, + state_shape, state_color, final_color, + errp); + if (f != stdout) fclose(f); + return rc; +} + + +/*====================================================================== + * Methods: Draw: dot + */ + +/*-------------------------------------------------------------- + * draw_dot_file() + */ +gboolean gfsm_automaton_draw_dot_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + float width, + float height, + int fontsize, + const gchar *fontname, + gboolean portrait, + gboolean vertical, + float nodesep, + float ranksep, + gfsmError **errp) +{ + gfsmStateId id; + GString *gstr = g_string_new(""); + + fprintf(f, "digraph GFSM {\n"); + fprintf(f, " rankdir = %s;\n", vertical ? "TB" : "LR"); + if (width>0 && height>0) { + fprintf(f, " size = \"%g,\%g\";\n", (width ? width : 8.5), (height ? height : 11)); + } + fprintf(f, " label = \"%s\";\n", (title ? title : "(gfsm)")); + fprintf(f, " center = 1;\n"); + fprintf(f, " nodesep = \"%f\";\n", (nodesep ? nodesep : 0.25)); + fprintf(f, " ranksep = \"%f\";\n", (ranksep ? ranksep : 0.4)); + if (!portrait) { + //fprintf(f, " orientation = \"Landscape\";\n"); + fprintf(f, " rotate = 90;\n"); + } + + //-- ye olde iterationne + for (id = 0; id < fsm->states->len; id++) { + gfsmState *s = gfsm_automaton_find_state(fsm,id); + gfsmArcIter ai; + gchar *sym; + if (!s || !s->is_valid) continue; + + //-- source state + fprintf(f, "%u [label=\"", id); + if (state_alphabet && (sym=gfsm_alphabet_find_key(state_alphabet,id)) != NULL) { + gfsm_alphabet_key2string(state_alphabet, sym, gstr); + fputs(gstr->str, f); + } else { + if (state_alphabet) g_printerr("Warning: no label defined for state '%u'!\n", id); + fprintf(f, "%u", id); + } + if (fsm->flags.is_weighted && s->is_final) { + fprintf(f, "/%g", gfsm_automaton_get_final_weight(fsm,id)); + } + fprintf(f, "\", shape=%s, style=%s, fontsize=%d", + (s->is_final ? "doublecircle" : "circle"), + (id == fsm->root_id ? "bold" : "solid"), + (fontsize ? fontsize : 14)); + if (fontname && *fontname) fprintf(f, ", fontname=\"%s\"", fontname); + fprintf(f, "]\n"); + + for (gfsm_arciter_open_ptr(&ai,fsm,s); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + fprintf(f, " %u -> %u \[label=\"", id, a->target); + + if (lo_alphabet && (sym=gfsm_alphabet_find_key(lo_alphabet,a->lower)) != NULL) { + gfsm_alphabet_key2string(lo_alphabet, sym, gstr); + fputs(gstr->str, f); + } else { + if (lo_alphabet) + g_printerr("Warning: no label defined for lower label '%u'!\n", a->lower); + fprintf(f, "%u", a->lower); + } + + if (fsm->flags.is_transducer) { + fputc(':', f); + if (hi_alphabet && (sym=gfsm_alphabet_find_key(hi_alphabet,a->upper)) != NULL) { + gfsm_alphabet_key2string(hi_alphabet, sym, gstr); + fputs(gstr->str, f); + } else { + if (hi_alphabet) + g_printerr("Warning: no label defined for upper label '%u'!\n", a->upper); + fprintf(f, "%u", a->upper); + } + } + + if (fsm->flags.is_weighted) fprintf(f, "/%g", a->weight); + + fprintf(f, "\", fontsize=%d", (fontsize ? fontsize : 14)); + if (fontname && *fontname) fprintf(f, ", fontname=\"%s\"", fontname); + fprintf(f, "];\n"); + } + } + fputs("}\n", f); + + //--cleanup + g_string_free(gstr,TRUE); + + return TRUE; +} + + +/*-------------------------------------------------------------- + * draw_dot_filename() + */ +gboolean gfsm_automaton_draw_dot_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + float width, + float height, + int fontsize, + const gchar *fontname, + gboolean portrait, + gboolean vertical, + float nodesep, + float ranksep, + gfsmError **errp) +{ + FILE *f; + gboolean rc; + if (!(f=gfsm_open_filename(filename, "w", errp))) return FALSE; + rc = gfsm_automaton_draw_dot_file_full(fsm, f, lo_alphabet, hi_alphabet, state_alphabet, + title, width, height, fontsize, fontname, + portrait, vertical, nodesep, ranksep, + errp); + if (f != stdout) fclose(f); + return rc; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmDraw.h b/gfsm/gfsm/src/libgfsm/gfsmDraw.h new file mode 100644 index 0000000..793c821 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmDraw.h @@ -0,0 +1,113 @@ + +/*=============================================================================*\ + * File: gfsmDraw.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: automata: visualization + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmDraw.h + * \brief Automaton visualization utilities + */ + +#ifndef _GFSM_DRAW_H +#define _GFSM_DRAW_H + +#include <gfsmAutomaton.h> + +/*====================================================================== + * Automaton Methods: Visualization: vcg + */ +/// \name Automaton Methods: Visualization: vcg +//@{ + +/** Draw an automaton in VCG format to a FILE* */ +gboolean gfsm_automaton_draw_vcg_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + int xspace, // ? + int yspace, // ? + const gchar *orientation, // "(top|bottom|left|right)_to_(ditto)" + const gchar *state_shape, + const gchar *state_color, + const gchar *final_color, + gfsmError **errp); + +/** Draw an automaton in VCG format to a named file */ +gboolean gfsm_automaton_draw_vcg_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + int xspace, // ? + int yspace, // ? + const gchar *orientation, // "(top|bottom|left|right)_to_(ditto)" + const gchar *state_shape, + const gchar *state_color, + const gchar *final_color, + gfsmError **errp); +//@} + + +/*====================================================================== + * Automaton Methods: Visualization: dot + */ +///\name Automaton Methods: Visualization: dot +//@{ + +/** Draw an automaton in Ma-Bell .dot format to a FILE* */ +gboolean gfsm_automaton_draw_dot_file_full (gfsmAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + float width, + float height, + int fontsize, + const gchar *fontname, + gboolean portrait, + gboolean vertical, + float nodesep, + float ranksep, + gfsmError **errp); + +/** Draw an automaton in Ma-Bell .dot format to a named file */ +gboolean gfsm_automaton_draw_dot_filename_full (gfsmAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + const gchar *title, + float width, + float height, + int fontsize, + const gchar *fontname, + gboolean portrait, + gboolean vertical, + float nodesep, + float ranksep, + gfsmError **errp); +//@} + + +#endif /* _GFSM_DRAW_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmEnum.c b/gfsm/gfsm/src/libgfsm/gfsmEnum.c new file mode 100644 index 0000000..828aeb5 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmEnum.c @@ -0,0 +1,39 @@ +/*=============================================================================*\ + * File: gfsmEnum.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmEnum.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmEnum.hi> +#endif + +/*====================================================================== + * Constants + */ +const guint gfsmEnumNone = ((guint)(-1)); + +/*====================================================================== + * Methods: + */ +//-- all inlined + diff --git a/gfsm/gfsm/src/libgfsm/gfsmEnum.h b/gfsm/gfsm/src/libgfsm/gfsmEnum.h new file mode 100644 index 0000000..96ccdb7 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmEnum.h @@ -0,0 +1,129 @@ + +/*=============================================================================*\ + * File: gfsmEnum.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmEnum.h + * \brief Abstract utilities for run-time value enumerations + */ + +#ifndef _GFSM_ENUM_H +#define _GFSM_ENUM_H + +#include <gfsmMem.h> +#include <gfsmCommon.h> + +/*====================================================================== + * Types + */ +///\name Types and constants +//@{ + +/// Structure for mapping symbolic names to numeric IDs +typedef struct { + GHashTable *table; ///< hash table which does the dirty work + guint nxtval; ///< next id to assign + gfsmDupFunc key_dup; ///< key copying function +} gfsmEnum; + +/// Enumeration of StateIds +typedef gfsmEnum gfsmDirectEnum; + +/** Constant indicating failed gfsmEnum value-lookup */ +extern const guint gfsmEnumNone; + +//@} + +/*====================================================================== + * Methods: Constructors etc. + */ +/// \name Constructors etc. +//@{ + +/** create a new gfsmEnum (full version) */ +GFSM_INLINE +gfsmEnum *gfsm_enum_new_full(gfsmDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func); + +/** create a new gfsmEnum (no copying) */ +#define gfsm_enum_new(key_hash_f) gfsm_enum_new_full(NULL,key_hash_f,NULL,NULL) + +/** create a new gfsmDirectEnum */ +#define gfsm_direct_enum_new() gfsm_enum_new(g_direct_hash) + +/** Clear a gfsmEnum */ +GFSM_INLINE +void gfsm_enum_clear(gfsmEnum *en); + +/** Free a gfsmEnum */ +GFSM_INLINE +void gfsm_enum_free(gfsmEnum *en); +//@} + +/*====================================================================== + * Methods: Accessors + */ +/// \name Accessors +//@{ + +/** Get next available value */ +#define gfsm_enum_next_value(en) ((en)->nxtval) + +/** Lookup the numeric value associated with \a lookup_key. + * On return, *\a stored_key points to the original (stored) key, if any; + * and *\a stored_value points to the stored value. + * \returns true iff a value for \a key was already stored + */ +GFSM_INLINE +gboolean gfsm_enum_lookup_extended(gfsmEnum *en, + gconstpointer lookup_key, + gpointer *stored_key, + gpointer *stored_val); + +/** Lookup the numeric value associated with \a key. + * \returns the value associated with \a key, or gfsmEnumNone + * if no such value was found + */ +GFSM_INLINE +guint gfsm_enum_lookup(gfsmEnum *en, gconstpointer key); + + +/** Insert or overwrite new value \a val for \a key. + * If \a val is gfsmEnumNone, the next available value will be used. + * \returns new value for \a key + */ +GFSM_INLINE +guint gfsm_enum_insert_full(gfsmEnum *en, gpointer key, guint val); + +/** Insert a (possibly new) value \a val for \a key */ +#define gfsm_enum_insert(en,key) gfsm_enum_insert_full(en,key,gfsmEnumNone) + +/** Really just an alias for gfsm_enum_insert(en,key) */ +#define gfsm_enum_get(en,key) gfsm_enum_insert(en,key) + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmEnum.hi> +#endif + +#endif /* _GFSM_ENUM_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmEnum.hi b/gfsm/gfsm/src/libgfsm/gfsmEnum.hi new file mode 100644 index 0000000..f7ae20e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmEnum.hi @@ -0,0 +1,126 @@ +/*=============================================================================*\ + * File: gfsmEnum.def + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: enumerations: inline definitions + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <glib.h> +#include <gfsmUtils.h> + +/*====================================================================== + * Methods: Constructors etc. + */ + +/*-------------------------------------------------------------- + * new_full() + */ +GFSM_INLINE +gfsmEnum *gfsm_enum_new_full(gfsmDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func) +{ + gfsmEnum *en = g_new(gfsmEnum,1); + en->table = g_hash_table_new_full(key_hash_func, + key_equal_func, + key_destroy_func, + NULL); + en->nxtval = 0; + en->key_dup = key_dup_func; + return en; +} + +/*-------------------------------------------------------------- + * clear() + */ +GFSM_INLINE +void gfsm_enum_clear(gfsmEnum *en) +{ + g_hash_table_foreach_remove(en->table, gfsm_hash_clear_func, NULL); + en->nxtval = 0; +} + +/*-------------------------------------------------------------- + * free() + */ +GFSM_INLINE +void gfsm_enum_free(gfsmEnum *en) +{ + g_hash_table_destroy(en->table); + g_free(en); +} + + +/*====================================================================== + * Methods: Accessors + */ + +/*-------------------------------------------------------------- + * lookup_extended() + */ +GFSM_INLINE +gboolean gfsm_enum_lookup_extended(gfsmEnum *en, + gconstpointer lookup_key, + gpointer *stored_key, + gpointer *stored_val) +{ + return g_hash_table_lookup_extended(en->table, lookup_key, stored_key, stored_val); +} + +/*-------------------------------------------------------------- + * lookup() + */ +GFSM_INLINE +guint gfsm_enum_lookup(gfsmEnum *en, gconstpointer key) +{ + gpointer s_val; + if (g_hash_table_lookup_extended(en->table, key, NULL, &s_val)) return GPOINTER_TO_UINT(s_val); + return gfsmEnumNone; +} + + +/*-------------------------------------------------------------- + * insert_full() + */ +GFSM_INLINE +guint gfsm_enum_insert_full(gfsmEnum *en, gpointer key, guint val) +{ + gpointer s_key, s_val; + if (g_hash_table_lookup_extended(en->table, key, &s_key, &s_val)) { + //-- already present: steal it & replace + if (val == gfsmEnumNone) { + val = GPOINTER_TO_UINT(s_val); + } else { + g_hash_table_steal(en->table, s_key); + g_hash_table_insert(en->table, s_key, GUINT_TO_POINTER(val)); + } + } + else { + //-- no key already present: insert a new one + if (en->key_dup) s_key = (*(en->key_dup))(key); + else s_key = key; + if (val == gfsmEnumNone) val = en->nxtval; + g_hash_table_insert(en->table, s_key, GUINT_TO_POINTER(val)); + } + + //-- keep track of available values + if (val >= en->nxtval) en->nxtval = val+1; + + return val; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmError.c b/gfsm/gfsm/src/libgfsm/gfsmError.c new file mode 100644 index 0000000..88b8ac0 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmError.c @@ -0,0 +1,46 @@ +/*=============================================================================*\ + * File: gfsmError.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmError.h> +#include <stdlib.h> + +/*====================================================================== + * Error Utilties + */ +/*-------------------------------------------------------------- + * carp_named() + */ +void gfsm_carp_named(gfsmError *error, const gchar *myname) +{ + if (!error) return; + g_printerr("%s: %s\n", (myname ? myname : "gfsm"), error->message); +} + +/*-------------------------------------------------------------- + * die_named() + */ +void gfsm_die_named(gfsmError *error, const gchar *myname) +{ + if (!error) return; + g_printerr("%s: %s\n", (myname ? myname : "gfsm"), error->message); + exit(error->code); +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmError.h b/gfsm/gfsm/src/libgfsm/gfsmError.h new file mode 100644 index 0000000..d1b2a5f --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmError.h @@ -0,0 +1,63 @@ + +/*=============================================================================*\ + * File: gfsmError.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmError.h + * \brief Error utilities + */ + +#ifndef _GFSM_ERROR_H +#define _GFSM_ERROR_H + +#include <glib.h> + +/*====================================================================== + * Types + */ +/// struct for errors +typedef GError gfsmError; + +/*====================================================================== + * Constants + */ +/** Constant indicating no error condition */ +#define gfsmErrorNone NULL + +/*====================================================================== + * Error Methods + */ +/** Report a warning and continue; does nothing if error==NULL */ +void gfsm_carp_named(gfsmError *error, const gchar *myname); + +/** Report a warning and continue; does nothing if error==NULL */ +#define gfsm_carp(e) gfsm_carp_named(e,NULL) + +/** carp and exit with code error->code ; does nothing if error==NULL */ +void gfsm_die_named(gfsmError *error, const gchar *myname); + +/** die using default diagnostic name "gfsm"; does nothing if error==NULL */ +#define gfsm_die(e) gfsm_die_named(e,NULL) + +/** do or die */ +#define gfsm_do_or_die(e) gfsm_die_named(e,NULL) + +#endif /* _GFSM_UTILS_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmIO.c b/gfsm/gfsm/src/libgfsm/gfsmIO.c new file mode 100644 index 0000000..f5e637d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmIO.c @@ -0,0 +1,558 @@ +/*=============================================================================*\ + * File: gfsmIO.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: I/O + * + * Copyright (c) 2006-2008 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER + * OF ALL WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmConfig.h> + +#include <glib.h> +#include <gfsmIO.h> +//#include <gfsmCompat.h> +#include <gfsmUtils.h> + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#if 0 +#include <fcntl.h> +#endif +#include <errno.h> + +#ifdef GFSM_ZLIB_ENABLED +# include <zlib.h> +# define GFSM_DEFAULT_COMPRESSION Z_DEFAULT_COMPRESSION +#endif + +#include "vasprintf.h" +#include "getdelim.h" + +/*====================================================================== + * Protos: I/O: Handles: Methods: Instatiations: C FILE* + */ +void gfsmio_close_cfile(FILE *f); +void gfsmio_flush_cfile(FILE *f); +gboolean gfsmio_eof_cfile(FILE *f); +gboolean gfsmio_read_cfile(FILE *f, void *buf, size_t nbytes); +ssize_t gfsmio_getdelim_cfile(FILE *f, char **lineptr, size_t *n, int delim); +gboolean gfsmio_write_cfile(FILE *f, const void *buf, size_t nbytes); +#ifdef HAVE_VFPRINTF +int gfsmio_vprintf_cfile(FILE *f, const char *fmt, va_list *app); +#endif + +/*====================================================================== + * Protos: I/O: Handles: Methods: Instatiations: gzFile + */ +#ifdef GFSM_ZLIB_ENABLED +void gfsmio_close_zfile(gzFile zf); +void gfsmio_flush_zfile(gzFile zf); +gboolean gfsmio_eof_zfile(gzFile zf); +gboolean gfsmio_read_zfile(gzFile zf, void *buf, size_t nbytes); +gboolean gfsmio_write_zfile(gzFile zf, const void *buf, size_t nbytes); +#endif + +/*====================================================================== + * Protos: I/O: Handles: Methods: Instatiations: GString* + */ +void gfsmio_close_gstring(gfsmPosGString *pgs); +gboolean gfsmio_eof_gstring(gfsmPosGString *pgs); +gboolean gfsmio_read_gstring(gfsmPosGString *pgs, void *buf, size_t nbytes); +gboolean gfsmio_write_gstring(gfsmPosGString *pgs, const void *buf, size_t nbytes); + +/*====================================================================== + * I/O: Handles: Constructors etc. + */ + +/*--------------------------------------------------------------*/ +gfsmIOHandle *gfsmio_handle_new(gfsmIOHandleType typ, void *handle_data) +{ + gfsmIOHandle *ioh = g_new0(gfsmIOHandle,1); + ioh->iotype = typ; + ioh->handle = handle_data; + + switch (typ) { + //-------------------------------- + case gfsmIOTCFile: +#ifndef GFSM_ZLIB_ENABLED + case gfsmIOTZFile: +#endif + ioh->read_func = (gfsmIOReadFunc)gfsmio_read_cfile; + ioh->getdelim_func= (gfsmIOGetdelimFunc)gfsmio_getdelim_cfile; + + ioh->write_func = (gfsmIOWriteFunc)gfsmio_write_cfile; +#ifdef HAVE_VFPRINTF + ioh->vprintf_func = (gfsmIOVprintfFunc)gfsmio_vprintf_cfile; +#endif + + ioh->flush_func = (gfsmIOFlushFunc)gfsmio_flush_cfile; + ioh->close_func = (gfsmIOCloseFunc)gfsmio_close_cfile; + ioh->eof_func = (gfsmIOEofFunc)gfsmio_eof_cfile; + break; + +#ifdef GFSM_ZLIB_ENABLED + //-------------------------------- + case gfsmIOTZFile: + ioh->read_func = (gfsmIOReadFunc)gfsmio_read_zfile; + //ioh->getdelim_func= (gfsmIOReadFunc)gfsmio_getdelim_zfile; + + ioh->write_func = (gfsmIOWriteFunc)gfsmio_write_zfile; + //ioh->vprintf_func = (gfsmIOReadFunc)gfsmio_vprintf_zfile; + + ioh->flush_func = (gfsmIOFlushFunc)gfsmio_flush_zfile; + ioh->close_func = (gfsmIOCloseFunc)gfsmio_close_zfile; + ioh->eof_func = (gfsmIOEofFunc)gfsmio_eof_zfile; + break; +#endif + + //-------------------------------- + case gfsmIOTGString: + ioh->read_func = (gfsmIOReadFunc)gfsmio_read_gstring; + //ioh->getdelim_func= gfsmio_getdelim_gstring; + + ioh->write_func = (gfsmIOWriteFunc)gfsmio_write_gstring; + //ioh->vprintf_func = gfsmio_vprintf_gstring; + + //ioh->flush_func = gfsmio_flush_gstring; + ioh->close_func = (gfsmIOCloseFunc)gfsmio_close_gstring; + ioh->eof_func = (gfsmIOEofFunc)gfsmio_eof_gstring; + break; + + //-------------------------------- + case gfsmIOTUser: + default: + break; + } + + return ioh; +} + +/*--------------------------------------------------------------*/ +void gfsmio_handle_free(gfsmIOHandle *ioh) +{ + g_free(ioh); +} + +/*--------------------------------------------------------------*/ +gfsmIOHandle *gfsmio_new_file(FILE *f) +{ + return gfsmio_handle_new(gfsmIOTCFile, f); +} + +/*--------------------------------------------------------------*/ +#undef GFSM_ZFILE_USE_FCNTL +gfsmIOHandle *gfsmio_new_zfile(FILE *f, const char *mode, int compress_level) +{ +#ifdef GFSM_ZLIB_ENABLED +# ifdef GFSM_ZFILE_USE_FCNTL + int fd = fileno(f); + int flags = fcntl(fd, F_GETFL); + gzFile zf; +# if 0 /* DEBUG */ + //-- DEBUG + const int o_rdwr = O_RDWR; + const int o_rdonly = O_RDONLY; + const int o_wronly = O_WRONLY; + //-- /DEBUG +# endif /* DEBUG */ + if ( (flags&O_RDWR) == O_RDWR ) { + zf = gzdopen(dup(fd),"rwb"); + gzsetparams(zf, compress_level, Z_DEFAULT_STRATEGY); + } + else + if ( (flags&O_WRONLY) == O_WRONLY ) { + zf = gzdopen(fd,"wb"); + gzsetparams(zf, compress_level, Z_DEFAULT_STRATEGY); + } + else { // if ( (flags&O_RDONLY) == O_RDONLY ) + zf = gzdopen(fd,"rb"); + gzsetparams(zf, compress_level, Z_DEFAULT_STRATEGY); + } + return gfsmio_handle_new(gfsmIOTZFile, zf); + +# else /* !defined(GFSM_ZFILE_USE_FCNTL) */ + + if (compress_level != 0) { + //-- use compression + gzFile zf = gzdopen(fileno(f), mode); + if (strchr(mode,'w')) gzsetparams(zf, compress_level, Z_DEFAULT_STRATEGY); + return gfsmio_handle_new(gfsmIOTZFile, zf); + } else { + return gfsmio_new_file( fdopen(dup(fileno(f)), mode) ); + } + +# endif /* GFSM_ZFILE_USE_FCNTL */ + +#else /* !defined(GFSM_ZLIB_ENABLED) */ + + return gfsmio_new_file( fdopen(dup(fileno(f)), mode) ); +#endif +} + +/*--------------------------------------------------------------*/ +gfsmIOHandle *gfsmio_new_filename(const char *filename, const char *mode, int compress_level, gfsmError **errp) +{ +#ifdef GFSM_ZLIB_ENABLED + if (compress_level != 0) { + gzFile zf; + if (strcmp(filename,"-")==0) { + if (strchr(mode,'w')) zf = gzdopen(dup(fileno(stdout)), mode); + else zf = gzdopen(dup(fileno(stdin)), mode); + } + else if (!(zf = gzopen(filename,mode))) { + int errnum; + const char *zerror = gzerror(zf,&errnum); + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("gzopen"), //-- code + "gzopen() failed for file '%s': %s", + filename, + errnum==Z_ERRNO ? strerror(errno) : zerror); + return NULL; + } + + //-- set compression level + if (compress_level < 0) compress_level = GFSM_DEFAULT_COMPRESSION; + if (strchr(mode,'w')) { + gzsetparams(zf, compress_level, Z_DEFAULT_STRATEGY); + } + + return gfsmio_handle_new(gfsmIOTZFile,zf); + } + else { +#endif + FILE *f; + if (strcmp(filename,"-")==0) { + if (strchr(mode,'w')) f = stdout; + else f = stdin; + } + else if (!(f = fopen(filename,mode))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("fopen"), //-- code + "open failed for file '%s': %s", + filename, strerror(errno)); + } + return gfsmio_new_file(f); +#ifdef GFSM_ZLIB_ENABLED + } +#endif +} + +/*--------------------------------------------------------------*/ +gfsmIOHandle *gfsmio_new_gstring(gfsmPosGString *pgs) +{ + return gfsmio_handle_new(gfsmIOTGString, pgs); +} + + +/*====================================================================== + * I/O: Handles: Methods: Basic + */ + +/*--------------------------------------------------------------*/ +/** flush all data to an output handle (calls \a flush_func) */ +void gfsmio_close(gfsmIOHandle *ioh) +{ + if (ioh->close_func) (*ioh->close_func)(ioh->handle); +} + +/*--------------------------------------------------------------*/ +/** flush all data to an output handle (calls \a flush_func) */ +void gfsmio_flush(gfsmIOHandle *ioh) +{ + if (ioh->flush_func) (*ioh->flush_func)(ioh->handle); +} + +/*--------------------------------------------------------------*/ +/** returns true if \a h is at EOF, false otherwise or if no \a eof_func is defined */ +gboolean gfsmio_eof(gfsmIOHandle *ioh) +{ + if (ioh->eof_func) return (*ioh->eof_func)(ioh->handle); + return FALSE; +} + + + +/*====================================================================== + * I/O: Handles: Methods: Read + */ + +/*--------------------------------------------------------------*/ +int gfsmio_getc(gfsmIOHandle *ioh) +{ + if (gfsmio_eof(ioh)) return GFSMIO_EOF; + else { + //-- getc() --> read() + unsigned char c = 0; + if (gfsmio_read(ioh, &c, 1)) return (int)c; + } + return GFSMIO_EOF; +} + +/*--------------------------------------------------------------*/ +gboolean gfsmio_read(gfsmIOHandle *ioh, void *buf, size_t nbytes) +{ + if (ioh->read_func) return (*ioh->read_func)(ioh->handle, buf, nbytes); + + g_printerr("gfsmio_read(): no method defined for handle of type %d\n", ioh->iotype); + return FALSE; +} + +/*--------------------------------------------------------------*/ +ssize_t gfsmio_getline(gfsmIOHandle *ioh, char **lineptr, size_t *n) +{ + return gfsmio_getdelim(ioh, lineptr, n, '\n'); +} + +/*--------------------------------------------------------------*/ +ssize_t gfsmio_getdelim(gfsmIOHandle *ioh, char **lineptr, size_t *n, int delim) +{ + if (ioh->getdelim_func) { + return (*ioh->getdelim_func)(ioh->handle, lineptr, n, delim); + } + else { + //-- getdelim() --> getc() + ssize_t i = 0; + int c = -2; + GString *gs=NULL; + + while ( *n > 0 && i < (*n-1) && (c=gfsmio_getc(ioh)) != GFSMIO_EOF ) { + (*lineptr)[i++] = c; +#ifdef GFSM_DEBUG_GETDELIM + fprintf(stderr, "---> getdelim(i=%d) got char %d ~ '%c' to linebuf\n", i, (char)c, c);//--DEBUG +#endif + if ((char)c == (char)delim) { + (*lineptr)[i] = '\0'; + return i; + } + } + if (c == GFSMIO_EOF) { +#ifdef GFSM_DEBUG_GETDELIM + fprintf(stderr, "---> getdelim(i=%d) got EOF reading to linebuf\n", i);//--DEBUG +#endif + (*lineptr)[i] = '\0'; + return i == 0 ? GFSMIO_EOF : i; + } + + //-- oops: buffer overflow + gs = g_string_new_len((i>0 ? *lineptr : ""), i); + while ( (c=gfsmio_getc(ioh)) != GFSMIO_EOF ) { + g_string_append_c(gs,c); + i++; +#ifdef GFSM_DEBUG_GETDELIM + fprintf(stderr, "---> getdelim(i=%d) got char %d ~ '%c' to GString*\n", i, (char)c, c);//--DEBUG +#endif + if ((char)c == (char)delim) break; + } + +#ifdef GFSM_DEBUG_GETDELIM + if (c==GFSMIO_EOF) { fprintf(stderr, "---> getdelim(i=%d) got EOF reading to GString*\n", i); }//--DEBUG +#endif + + //-- maybe free old line buffer + if (*lineptr) free(*lineptr); + + //-- set up new buffer + g_string_append_c(gs,0); //-- this shouldn't be necessary, but weird things happen otherwise (bug?) + + //-- the following code breaks in Perl on OpenSuSE 11.0 [maybe GString doesn't use malloc ?!], --moocow 2008-10-31 + /* + *lineptr = gs->str; //-- copy literal GString data buffer + *n = gs->allocated_len; //-- ... and its length + g_string_free(gs,FALSE); //-- ... and only free GString wrapper struct; not the data buffer + */ + //-- ...so we do this instead (ugly but functional): + *lineptr = (char *)malloc(gs->allocated_len); //-- malloc a copy of GString data buffer + memcpy(*lineptr, gs->str, gs->allocated_len); //-- ... and copy the data + *n = gs->allocated_len; //-- ... and its length + g_string_free(gs,TRUE); //-- ... and free the whole GString and its data buffer + + return i==0 && c==GFSMIO_EOF ? GFSMIO_EOF : i; + } + return GFSMIO_EOF; +} + +/*====================================================================== + * I/O: Handles: Methods: Write + */ + +/*--------------------------------------------------------------*/ +gboolean gfsmio_putc(gfsmIOHandle *ioh, int c) +{ + return gfsmio_write(ioh, &c, 1); +} + +/*--------------------------------------------------------------*/ +gboolean gfsmio_puts(gfsmIOHandle *ioh, const char *s) +{ + return gfsmio_write(ioh, s, strlen(s)); +} + + +/*--------------------------------------------------------------*/ +gboolean gfsmio_write(gfsmIOHandle *ioh, const void *buf, size_t nbytes) +{ + if (ioh->write_func) return (*ioh->write_func)(ioh->handle, buf, nbytes); + + g_printerr("gfsmio_read(): no method defined for handle of type %d\n", ioh->iotype); + return FALSE; +} + +/*--------------------------------------------------------------*/ +int gfsmio_printf(gfsmIOHandle *io, const char *fmt, ...) +{ + int len; + va_list ap; + + va_start(ap,fmt); + len = gfsmio_vprintf(io, fmt, &ap); + va_end(ap); + + return len; +} + +/*--------------------------------------------------------------*/ +int gfsmio_vprintf(gfsmIOHandle *io, const char *fmt, va_list *app) +{ + char *obuf = NULL; + size_t len = 0; + gboolean rc; + len = vasprintf(&obuf, fmt, *app); + rc = gfsmio_write(io, obuf, len); + if (obuf) free(obuf); + return rc ? len : 0; +} + + +/*====================================================================== + * I/O: Handles: Methods: FILE* + */ + +/*-------------------------------------------------------------- + * FILE*: Basic Methods + */ +void gfsmio_flush_cfile(FILE *f) +{ if (f) fflush(f); } + +void gfsmio_close_cfile(FILE *f) +{ + if (f && f != stdin && f != stdout && f != stderr) fclose(f); +} + +gboolean gfsmio_eof_cfile(FILE *f) +{ return f ? feof(f) : FALSE; } + +/*-------------------------------------------------------------- + * FILE*: Read Methods + */ +gboolean gfsmio_read_cfile(FILE *f, void *buf, size_t nbytes) +{ return f ? (fread(buf,nbytes,1,f)==1) : FALSE; } + +ssize_t gfsmio_getdelim_cfile(FILE *f, char **lineptr, size_t *n, int delim) +{ return f ? getdelim(lineptr, n, delim, f) : 0; } + +/*-------------------------------------------------------------- + * FILE*: Write Methods + */ +gboolean gfsmio_write_cfile(FILE *f, const void *buf, size_t nbytes) +{ return f ? (fwrite(buf, nbytes, 1, f)==1) : FALSE; } + +#ifdef HAVE_VFPRINTF +int gfsmio_vprintf_cfile(FILE *f, const char *fmt, va_list *app) +{ return f ? vfprintf(f, fmt, *app) : 0; } +#endif + +/*====================================================================== + * I/O: Handles: Methods: gzFile + */ +#ifdef GFSM_ZLIB_ENABLED + +/*-------------------------------------------------------------- + * gzFile: Basic Methods + */ +void gfsmio_close_zfile(gzFile zf) +{ if (zf) gzclose(zf); } + +void gfsmio_flush_zfile(gzFile zf) +{ if (zf) gzflush(zf,Z_SYNC_FLUSH); } + +gboolean gfsmio_eof_zfile(gzFile zf) +{ return zf ? gzeof(zf) : FALSE; } + +/*-------------------------------------------------------------- + * gzFile: Read Methods + */ +gboolean gfsmio_read_zfile(gzFile zf, void *buf, size_t nbytes) +{ return zf ? (gzread(zf,buf,nbytes)==nbytes) : FALSE; } + +/*-------------------------------------------------------------- + * gzFile: Write Methods + */ +gboolean gfsmio_write_zfile(gzFile zf, const void *buf, size_t nbytes) +{ return zf ? (gzwrite(zf, buf, nbytes)==nbytes) : FALSE; } + +#endif /* GFSM_ZLIB_ENABLED */ + + +/*====================================================================== + * I/O: Handles: Methods: GString* + */ + +/*-------------------------------------------------------------- + * GString*: Basic Methods + */ + +void gfsmio_close_gstring(gfsmPosGString *pgs) +{ if (pgs) pgs->pos = 0; } + +gboolean gfsmio_eof_gstring(gfsmPosGString *pgs) +{ return pgs && pgs->gs ? (pgs->pos >= pgs->gs->len) : TRUE; } + +/*-------------------------------------------------------------- + * GString*: Read Methods + */ +gboolean gfsmio_read_gstring(gfsmPosGString *pgs, void *buf, size_t nbytes) +{ + if (!pgs || !pgs->gs || pgs->pos > pgs->gs->len) return FALSE; + if (pgs->pos+nbytes <= pgs->gs->len) { + //-- normal case: read it in + memcpy(buf, pgs->gs->str + pgs->pos, nbytes); + pgs->pos += nbytes; + return TRUE; + } + //-- overflow: grab what we can + memcpy(buf, pgs->gs->str + pgs->pos, pgs->gs->len-pgs->pos); + pgs->pos = pgs->gs->len; + return FALSE; +} + +/*-------------------------------------------------------------- + * GString*: Write Methods + */ +gboolean gfsmio_write_gstring(gfsmPosGString *pgs, const void *buf, size_t nbytes) +{ + if (pgs && pgs->gs) { + g_string_append_len(pgs->gs, buf, nbytes); + return TRUE; + } + return FALSE; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmIO.h b/gfsm/gfsm/src/libgfsm/gfsmIO.h new file mode 100644 index 0000000..6931699 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmIO.h @@ -0,0 +1,208 @@ +/*=============================================================================*\ + * File: gfsmIO.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: I/O + * + * Copyright (c) 2006-2008 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER + * OF ALL WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmIO.h + * \brief Abstract I/O routines + */ + +#ifndef _GFSM_IO_H +#define _GFSM_IO_H + +#include <gfsmConfig.h> +#include <gfsmError.h> + +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> + +#define GFSMIO_EOF ((int)-1) + +/*====================================================================== + * I/O: types + */ + +/** Builtin I/O types */ +typedef enum { + gfsmIOTCFile, ///< I/O on a C FILE* + gfsmIOTZFile, ///< I/O on a zlib gzFile* (only if GFSM_ZLIB_ENABLED is defined) + gfsmIOTGString, ///< I/O on a GString* + gfsmIOTUser = 255 ///< user I/O +} gfsmIOHandleType; + +/*====================================================================== + * I/O: Handles: Function types + */ + +/** Generic I/O Handle function type: fflush() and friends */ +typedef void (*gfsmIOFlushFunc) (void *handle); + +/** Generic I/O Handle function type: fclose() and friends */ +typedef void (*gfsmIOCloseFunc) (void *handle); + +/** Generic I/O Handle function type: feof() and friends */ +typedef gboolean (*gfsmIOEofFunc) (void *handle); + + +/** Generic I/O Handle function type: fread() and friends */ +typedef gboolean (*gfsmIOReadFunc) (void *handle, void *buf, size_t nbytes); + +/** Generic I/O Handle function type: getdelim() and friends */ +typedef ssize_t (*gfsmIOGetdelimFunc) (void *handle, char **lineptr, size_t *n, int delim); + + +/** Generic I/O Handle function type: fwrite() and friends */ +typedef gboolean (*gfsmIOWriteFunc) (void *handle, const void *buf, size_t nbytes); + +/** Generic I/O Handle function type: vprintf() and friends */ +typedef int (*gfsmIOVprintfFunc) (void *handle, const char *fmt, va_list *app); + + + +/*====================================================================== + * I/O: Handles: structs + */ + +/** \brief Generic I/O handle struct */ +typedef struct { + gfsmIOHandleType iotype; ///< I/O class of this handle + void *handle; ///< underlying handle data + + gfsmIOReadFunc read_func; /** fread() and friends (either read or getc must be defined) */ + gfsmIOGetdelimFunc getdelim_func; /** getdelim() and friends (optional) */ + + gfsmIOWriteFunc write_func; /** fwrite() and friends (either write or putc must be defined) */ + gfsmIOVprintfFunc vprintf_func; /** vprintf() and friends (optional) */ + + gfsmIOFlushFunc flush_func; /** fflush() and friends (optional) */ + gfsmIOCloseFunc close_func; /** fclose() and friends (optional) */ + gfsmIOEofFunc eof_func; /** eof() and friends (optional) */ +} gfsmIOHandle; + + +/** \brief GString with an associated index (read head) */ +typedef struct { + GString *gs; ///< associated string + size_t pos; ///< (read-)position +} gfsmPosGString; + +/*====================================================================== + * I/O: Handles: Constructors etc. + */ + +/** create, initialize, and return a new gfsmIOHandle + * \param typ type of this handle + * \param handle_data value of the \a handle structure datum: + * \li for \a typ==gfsmIOTCFile , \a handle_data should be a FILE* + * \li for \a typ==gfsmIOTGString , \a handle_data should be a gfsmPosGString* + * \li for \a typ==gfsmIOTZFile , \a handle_data should be a gzFile + * \li for \a typ==gfsmIOTUser , \a handle_data is whatever you want + * + * \returns new gfsmIOHandle + */ +gfsmIOHandle *gfsmio_handle_new(gfsmIOHandleType typ, void *handle_data); + +/** destroy a gfsmIOHandle: does NOT implicitly call \a close or anything else */ +void gfsmio_handle_free(gfsmIOHandle *ioh); + +/* TODO: utilities ? file_handle_new, zfile_handle_new, gstring_handle_new, user_handle_new ? */ + +/** Create and return a new gfsmIOHandle to an uncompressed C FILE* + * Caller is responsible for closing the handle. + */ +gfsmIOHandle *gfsmio_new_file(FILE *f); + +/** Create and return a new gfsmIOHandle to a C FILE* using compression (if available) + * Caller is responsible for closing the handle. + * The handle returned can always be closed without closing \a f itself. + */ +gfsmIOHandle *gfsmio_new_zfile(FILE *f, const char *mode, int compress_level); + +/** Create and return a new gfsmIOHandle to a named file. + * Uses gzFile if zlib support was enabled, otherwise C FILE* (uncompressed) + * Caller is responsible for closing the handle. + */ +gfsmIOHandle *gfsmio_new_filename(const char *filename, const char *mode, int compress_level, gfsmError **errp); + +/** Create and return a new gfsmIOHandle for a PosGString* + * Caller is responsible for allocation and de-allocation of the PosGString*. + */ +gfsmIOHandle *gfsmio_new_gstring(gfsmPosGString *pgs); + +/*====================================================================== + * I/O: Handles: Methods: Basic + */ + +/** close an open I/O handle (calls \a close_func) */ +void gfsmio_close(gfsmIOHandle *ioh); + +/** flush all data to an output handle (calls \a flush_func) */ +void gfsmio_flush(gfsmIOHandle *ioh); + +/** returns true if \a h is at EOF, false otherwise (or if no \a eof_func is defined) */ +gboolean gfsmio_eof(gfsmIOHandle *ioh); + + + +/*====================================================================== + * I/O: Handles: Methods: Read + */ + +/** read a single byte of data from \a h, should return GFSMIO_EOF on EOF */ +int gfsmio_getc(gfsmIOHandle *ioh); + +/** read \a nbytes of data from \a io into \a buf, as \a fread() */ +gboolean gfsmio_read(gfsmIOHandle *ioh, void *buf, size_t nbytes); + +/** wrapper for getline(), returns number of bytes read (0 on error) */ +ssize_t gfsmio_getline(gfsmIOHandle *ioh, char **lineptr, size_t *n); + + +/** wrapper for getdelim(), returns number of bytes read (0 on error) */ +ssize_t gfsmio_getdelim(gfsmIOHandle *io, char **lineptr, size_t *n, int delim); + + + +/*====================================================================== + * I/O: Handles: Methods: Write + */ + +/** write a single byte to handle \a ioh, as \a fputc() */ +gboolean gfsmio_putc(gfsmIOHandle *ioh, int c); + +/** wrapper for puts() */ +gboolean gfsmio_puts(gfsmIOHandle *io, const char *s); + +/** write \a nbytes of data from \a buf into \a io, as \a fwrite() */ +gboolean gfsmio_write(gfsmIOHandle *io, const void *buf, size_t nbytes); + +/** wrapper for printf(): calls \a gfsmio_vprintf() */ +int gfsmio_printf(gfsmIOHandle *io, const char *fmt, ...); + +/** wrapper for vprintf(): calls \a vprintf_func */ +int gfsmio_vprintf(gfsmIOHandle *io, const char *fmt, va_list *app); + + +#endif /* _GFSM_IO_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmIndexed.c b/gfsm/gfsm/src/libgfsm/gfsmIndexed.c new file mode 100644 index 0000000..ba36fc6 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmIndexed.c @@ -0,0 +1,157 @@ + +/*=============================================================================*\ + * File: gfsmIndexed.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc indices + * + * Copyright (c) 2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmIndexed.h> +#include <gfsmArcIter.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmIndexed.hi> +#endif + +/*====================================================================== + * Constructors etc. + */ + +//---------------------------------------- +gfsmIndexedAutomaton *gfsm_indexed_automaton_copy(gfsmIndexedAutomaton *dst, gfsmIndexedAutomaton *src) +{ + if (!dst) { + dst = gfsm_indexed_automaton_new_full(src->flags, + src->sr->type, + gfsm_indexed_automaton_n_states(src), + gfsm_indexed_automaton_n_arcs(src)); + } + else { + gfsm_indexed_automaton_clear(dst); + gfsm_indexed_automaton_reserve_states(dst, gfsm_indexed_automaton_n_states(src)); + gfsm_indexed_automaton_reserve_arcs (dst, gfsm_indexed_automaton_n_arcs(src) ); + } + + //-- copy: flags, semiring, root + dst->flags = src->flags; + gfsm_indexed_automaton_set_semiring(dst, src->sr); + dst->root_id = src->root_id; + + //-- copy: tables + gfsm_weight_vector_copy (dst->state_final_weight, src->state_final_weight); + gfsm_arc_table_index_copy(dst->arcs, src->arcs); + + return dst; +} + +/*====================================================================== + * Methods: Import & Export + */ + +//---------------------------------------- +gfsmIndexedAutomaton *gfsm_automaton_to_indexed(gfsmAutomaton *fsm, gfsmIndexedAutomaton *xfsm) +{ + //-- maybe allocate new indexed automaton + if (xfsm==NULL) { + xfsm = gfsm_indexed_automaton_new_full(fsm->flags, + fsm->sr->type, + gfsm_automaton_n_states(fsm), + gfsm_automaton_n_arcs(fsm) + ); + } else { + gfsm_indexed_automaton_clear(xfsm); + xfsm->flags = fsm->flags; + gfsm_indexed_automaton_reserve_states(xfsm,gfsm_automaton_n_states(fsm)); + gfsm_indexed_automaton_reserve_arcs(xfsm,gfsm_automaton_n_arcs(fsm)); + } + gfsm_indexed_automaton_set_semiring(xfsm,fsm->sr); //-- copy semiring + + //-- set root id + xfsm->root_id = fsm->root_id; + + //-- index final weights + gfsm_automaton_to_final_weight_vector(fsm, xfsm->state_final_weight); + gfsm_automaton_to_arc_table_index(fsm, xfsm->arcs); + + //-- sort arcs (no!) + //gfsm_indexed_automaton_sort(xfsm, xfsm->flags.sort_mode); + + + return xfsm; +} + + + +//---------------------------------------- +gfsmAutomaton *gfsm_indexed_to_automaton(gfsmIndexedAutomaton *xfsm, gfsmAutomaton *fsm) +{ + gfsmStateId qid; + gfsmWeight srzero; + + //-- maybe allocate new automaton + if (fsm==NULL) { + fsm = gfsm_automaton_new_full(xfsm->flags, xfsm->sr->type, gfsm_indexed_automaton_n_states(xfsm)); + } else { + gfsm_automaton_clear(fsm); + fsm->flags = xfsm->flags; + gfsm_automaton_set_semiring(fsm, gfsm_semiring_copy(xfsm->sr)); + gfsm_automaton_reserve(fsm, gfsm_indexed_automaton_n_states(xfsm)); + } + + //-- set root id + fsm->root_id = xfsm->root_id; + + //-- update state-wise + srzero = xfsm->sr->zero; + for (qid=0; qid < xfsm->state_final_weight->len; qid++) { + gfsmArcRange range; + + //-- state_final_weight + gfsmWeight fw = g_array_index(xfsm->state_final_weight,gfsmWeight,qid); + if (fw != srzero) { gfsm_automaton_set_final_state_full(fsm,qid,TRUE,fw); } + + //-- arcs + for (gfsm_arcrange_open_indexed(&range, xfsm, qid); gfsm_arcrange_ok(&range); gfsm_arcrange_next(&range)) { + gfsmArc *a = gfsm_arcrange_arc(&range); + gfsm_automaton_add_arc(fsm,a->source,a->target,a->lower,a->upper,a->weight); + } + gfsm_arcrange_close(&range); + } + + return fsm; +} + +/*====================================================================== + * Methods: Accessors: gfsmIndexedAutomaton + */ +//-- inlined + +/*====================================================================== + * Methods: Accessors: gfsmAutomaton API: Automaton + */ +//-- inlined + +/*====================================================================== + * Methods: Accessors: gfsmAutomaton API: States + */ +//-- inlined + +/*====================================================================== + * I/O + */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmIndexed.h b/gfsm/gfsm/src/libgfsm/gfsmIndexed.h new file mode 100644 index 0000000..2313c56 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmIndexed.h @@ -0,0 +1,231 @@ + +/*=============================================================================*\ + * File: gfsmIndexed.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: arc indices + * + * Copyright (c) 2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmIndexed.h + * \brief First stab at indexed automata + */ + +#ifndef _GFSM_INDEXED_H +#define _GFSM_INDEXED_H + +#include <gfsmArcIndex.h> + +/*====================================================================== + * Types + */ + +/// Type for an indexed automaton. +typedef struct { + //-- gfsmAutomaton compatibility + gfsmAutomatonFlags flags; /**< automaton flags, for ::gfsmAutomaton compatibility */ + gfsmSemiring *sr; /**< semiring used for arc weight computations */ + gfsmStateId root_id; /**< id of root state, or gfsmNoState if not defined */ + // + //-- Basic data + //gfsmBitVector *state_is_valid; /* per-state validity flags */ + gfsmWeightVector *state_final_weight; /**< State final weight, or sr->zero */ + gfsmArcTableIndex *arcs; /**< Arc storage (sorted primarily by source state) */ +} gfsmIndexedAutomaton; + +/*====================================================================== + * Methods: gfsmIndexedAutomaton: constructors, etc. + */ +/// \name Constructors etc. +//@{ + +/** Create a new ::gfsmIndexedAutomaton, specifying some basic automaton & index structure */ +GFSM_INLINE +gfsmIndexedAutomaton *gfsm_indexed_automaton_new_full(gfsmAutomatonFlags flags, + gfsmSRType srtype, + gfsmStateId n_states, + guint n_arcs); + +/** Create a new indexed automaton, using some default values */ +GFSM_INLINE +gfsmIndexedAutomaton *gfsm_indexed_automaton_new(void); + +/** Copy a ::gfsmIndexedAutomaton \a src to \a dst. \returns \a dst */ +gfsmIndexedAutomaton *gfsm_indexed_automaton_copy(gfsmIndexedAutomaton *dst, gfsmIndexedAutomaton *src); + +/** Create and return an exact clone of a ::gfsmIndexedAutomaton */ +GFSM_INLINE +gfsmIndexedAutomaton *gfsm_indexed_automaton_clone(gfsmIndexedAutomaton *xfsm); + +/** Clear a ::gfsmIndexedAutomaton */ +GFSM_INLINE +void gfsm_indexed_automaton_clear(gfsmIndexedAutomaton *xfsm); + +/** Free a ::gfsmIndexedAutomaton */ +GFSM_INLINE +void gfsm_indexed_automaton_free(gfsmIndexedAutomaton *xfsm); + +//@} + +/*====================================================================== + * Methods: Import & Export + */ +/// \name Import & Export +//@{ + +/** Populate a ::gfsmIndexedAutomaton from a ::gfsmAutomaton + * \param fsm source automaton + * \param xfsm destination indexed automaton, + * may be passed as NULL to create a new ::gfsmIndexedAutomaton + * \returns (new) indexed automaton \a xfsm + * \note implicitly clears \a xfsm + */ +gfsmIndexedAutomaton *gfsm_automaton_to_indexed(gfsmAutomaton *fsm, gfsmIndexedAutomaton *xfsm); + +/** Export a ::gfsmIndexedAutomaton to a ::gfsmAutomaton + * \param xfsm source indexed automaton + * \param fsm destination :.gfsmAutomaton + * may be passed as NULL to create a new ::gfsmAutomaton + * \returns (new) automaton \a fsm + * \note implicitly clears \a fsm + */ +gfsmAutomaton *gfsm_indexed_to_automaton(gfsmIndexedAutomaton *xfsm, gfsmAutomaton *fsm); + +//@} + +/*====================================================================== + * Methods: Accessors: gfsmIndexedAutomaton + */ +/// \name Accessors: gfsmIndexedAutomaton +//@{ + +/** Reserve space for at least \a n_states states */ +GFSM_INLINE +void gfsm_indexed_automaton_reserve_states(gfsmIndexedAutomaton *xfsm, gfsmStateId n_states); + +/** Reserve space for at least \a n_arcs arcs */ +GFSM_INLINE +void gfsm_indexed_automaton_reserve_arcs(gfsmIndexedAutomaton *xfsm, guint n_arcs); + +/** (re-)sort arcs in a ::gfsmIndexedAutomaton */ +GFSM_INLINE +void gfsm_indexed_automaton_sort(gfsmIndexedAutomaton *xfsm, gfsmArcCompMask sort_mask); + +//@} + +/*====================================================================== + * gfsmAutomaton API: Automaton properties + */ +/// \name gfsmAutomaton API: automaton properties +//@{ + +/** Get pointer to the semiring associated with this automaton */ +#define gfsm_indexed_automaton_get_semiring(xfsm) (xfsm->sr) + +/** Set the semiring associated with this automaton */ +GFSM_INLINE +gfsmSemiring *gfsm_indexed_automaton_set_semiring(gfsmIndexedAutomaton *xfsm, gfsmSemiring *sr); + +/** Set the semiring associated with this automaton by semiring-type */ +GFSM_INLINE +void gfsm_indexed_automaton_set_semiring_type(gfsmIndexedAutomaton *xfsm, gfsmSRType srtype); + +/** Get number of states (constant time) */ +GFSM_INLINE +gfsmStateId gfsm_indexed_automaton_n_states(gfsmIndexedAutomaton *xfsm); + +/** Get total number of arcs (constant time) */ +GFSM_INLINE +guint gfsm_indexed_automaton_n_arcs(gfsmIndexedAutomaton *xfsm); + +/** Get Id of root node, or gfsmNoState if undefined */ +GFSM_INLINE +gfsmStateId gfsm_indexed_automaton_get_root(gfsmIndexedAutomaton *xfsm); + +/** Set Id of root node, creating state if necessary */ +GFSM_INLINE +void gfsm_indexed_automaton_set_root(gfsmIndexedAutomaton *xfsm, gfsmStateId qid); + +//@} + +/*====================================================================== + * Methods: Accessors: gfsmAutomaton API: States + */ +/// \name gfsmAutomaton API: States +//@{ + +/** Check whether automaton has a state with ID \a qid. */ +GFSM_INLINE +gboolean gfsm_indexed_automaton_has_state(gfsmIndexedAutomaton *xfsm, gfsmStateId qid); + +/** Ensures that state \a id exists \returns \a qid */ +GFSM_INLINE +gfsmStateId gfsm_indexed_automaton_ensure_state(gfsmIndexedAutomaton *xfsm, gfsmStateId qid); + +/* Remove the state with id \a qid, if any. + * Currently does nothing. + */ +GFSM_INLINE +void gfsm_indexed_automaton_remove_state(gfsmIndexedAutomaton *fsm, gfsmStateId qid); + +/** Set boolean final-state flag. \returns (void) */ +#define gfsm_indexed_automaton_set_final_state(xfsm,qid,is_final) \ + gfsm_indexed_automaton_set_final_state_full((xfsm),(qid),(is_final),(xfsm)->sr->one) + +/** Set final weight. \returns (void) */ +GFSM_INLINE +void gfsm_indexed_automaton_set_final_state_full(gfsmIndexedAutomaton *fsm, + gfsmStateId qid, + gboolean is_final, + gfsmWeight final_weight); + +/** Lookup final weight. \returns TRUE iff state \a id is final, and sets \a *wp to its final weight. */ +GFSM_INLINE +gboolean gfsm_indexed_automaton_lookup_final(gfsmIndexedAutomaton *fsm, gfsmStateId id, gfsmWeight *wp); + +/** Is \a qid final in \a xfsm? Really just wraps gfsm_indexed_automaton_lookup_final() */ +GFSM_INLINE +gboolean gfsm_indexed_automaton_state_is_final(gfsmIndexedAutomaton *xfsm, gfsmStateId qid); + +/** Get final weight for \a qid final in \a xfsm? Really just wraps gfsm_indexed_automaton_lookup_final() */ +GFSM_INLINE +gfsmWeight gfsm_indexed_automaton_get_final_weight(gfsmIndexedAutomaton *xfsm, gfsmStateId qid); + +/** Get number of outgoing arcs. \returns guint */ +GFSM_INLINE +guint gfsm_indexed_automaton_out_degree(gfsmIndexedAutomaton *fsm, gfsmStateId qid); + +//@} + +/*====================================================================== + * ArcRange + */ +///\name gfsmArcRange interface +//@{ + +/** Open a ::gfsmArcRange for outgoing arcs from state \a qid in \a xfsm */ +GFSM_INLINE +void gfsm_arcrange_open_indexed(gfsmArcRange *range, gfsmIndexedAutomaton *xfsm, gfsmStateId qid); + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmIndexed.hi> +#endif + +#endif /* _GFSM_INDEXED_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmIndexed.hi b/gfsm/gfsm/src/libgfsm/gfsmIndexed.hi new file mode 100644 index 0000000..670f03d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmIndexed.hi @@ -0,0 +1,294 @@ + +/*=============================================================================*\ + * File: gfsmIndexed.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: indexed automaton: inline definitions + * + * Copyright (c) 2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/*====================================================================== + * Constructors etc. + */ + +//---------------------------------------- +gfsmIndexedAutomaton *gfsm_indexed_automaton_new_full(gfsmAutomatonFlags flags, + gfsmSRType srtype, + gfsmStateId n_states, + guint n_arcs) + +{ + gfsmIndexedAutomaton *xfsm = g_new0(gfsmIndexedAutomaton,1); +#if 0 + gfsmStateId qid; + gfsmWeight srzero; +#endif + + xfsm->flags = flags; + xfsm->sr = gfsm_semiring_new(srtype); + xfsm->root_id = gfsmNoState; + + //xfsm->state_is_valid = gfsm_bitvector_sized_new(n_states); + xfsm->state_final_weight = gfsm_weight_vector_sized_new(n_states); + xfsm->arcs = gfsm_arc_table_index_sized_new(n_states, n_arcs); + +#if 0 + //-- initialize: states + for (qid=0; qid < n_states; qid++) { + g_array_index(xfsm->state_final_weight,gfsmWeight,qid) = xfsm->sr->zero; + } +#endif + + return xfsm; +} + +//---------------------------------------- +GFSM_INLINE +gfsmIndexedAutomaton *gfsm_indexed_automaton_new(void) +{ + return gfsm_indexed_automaton_new_full(gfsmAutomatonDefaultFlags, + gfsmAutomatonDefaultSRType, + gfsmAutomatonDefaultSize, + gfsmAutomatonDefaultSize + ); +} + + +//---------------------------------------- +GFSM_INLINE +gfsmIndexedAutomaton *gfsm_indexed_automaton_clone(gfsmIndexedAutomaton *src) +{ + return gfsm_indexed_automaton_copy(gfsm_indexed_automaton_new_full(src->flags, + src->sr->type, + gfsm_indexed_automaton_n_states(src), + gfsm_indexed_automaton_n_arcs(src)), + src); +} + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_clear(gfsmIndexedAutomaton *xfsm) +{ + //gfsm_bitvector_clear(xfsm->state_is_valid); + gfsm_weight_vector_resize(xfsm->state_final_weight,0); + gfsm_arc_table_index_resize(xfsm->arcs,0,0); + xfsm->root_id = gfsmNoState; + return; + } + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_free(gfsmIndexedAutomaton *xfsm) +{ + if (!xfsm) return; + if (xfsm->sr) gfsm_semiring_free(xfsm->sr); + //if (xfsm->state_is_valid) gfsm_bitvector_free(xfsm->state_is_valid); + if (xfsm->state_final_weight) gfsm_weight_vector_free(xfsm->state_final_weight); + if (xfsm->arcs) gfsm_arc_table_index_free(xfsm->arcs); + g_free(xfsm); +} + +/*====================================================================== + * Methods: Import & Export + */ +//-- EXTERN + + +/*====================================================================== + * Methods: Accessors: gfsmIndexedAutomaton + */ + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_reserve_states(gfsmIndexedAutomaton *xfsm, gfsmStateId n_states) +{ +#if 0 + gfsmStateId n_states_old = gfsm_indexed_automaton_n_states(xfsm); + gfsmStateId qid; + gfsmWeight srzero; +#endif + + //-- resize state-indexed arrays + //gfsm_bitvector_resize(xfsm->state_is_valid, n_states); + gfsm_weight_vector_resize(xfsm->state_final_weight, n_states); + gfsm_arc_table_index_resize(xfsm->arcs, n_states, xfsm->arcs->tab->len); + +#if 0 + //-- ... adjust final weights + srzero = xfsm->sr->zero; + for (qid=n_states_old; qid < n_states; qid++) { + g_array_index(xfsm->state_final_weight,gfsmWeight,qid) = srzero; + } +#endif +} + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_reserve_arcs(gfsmIndexedAutomaton *xfsm, guint n_arcs) +{ + gfsm_arc_table_index_resize(xfsm->arcs, xfsm->arcs->first->len-1, n_arcs); +} + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_sort(gfsmIndexedAutomaton *xfsm, gfsmArcCompMask sort_mask) +{ + if (xfsm->flags.sort_mode != sort_mask && sort_mask != gfsmASMNone) { + gfsm_arc_table_index_sort_bymask(xfsm->arcs, sort_mask, xfsm->sr); + } + xfsm->flags.sort_mode = sort_mask; +} + + +/*====================================================================== + * Methods: Accessors: gfsmAutomaton API: Automaton + */ + +//---------------------------------------- +GFSM_INLINE +gfsmSemiring *gfsm_indexed_automaton_set_semiring(gfsmIndexedAutomaton *xfsm, gfsmSemiring *sr) +{ + if (xfsm->sr) gfsm_semiring_free(xfsm->sr); + xfsm->sr = gfsm_semiring_copy(sr); + return sr; +} + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_set_semiring_type(gfsmIndexedAutomaton *xfsm, gfsmSRType srtype) +{ + if (!xfsm->sr) xfsm->sr = gfsm_semiring_new(srtype); + else if (xfsm->sr->type != srtype) { + gfsm_semiring_free(xfsm->sr); + xfsm->sr = gfsm_semiring_new(srtype); + } +} + +//---------------------------------------- +GFSM_INLINE +gfsmStateId gfsm_indexed_automaton_n_states(gfsmIndexedAutomaton *xfsm) +{ return xfsm->state_final_weight->len; } + +//---------------------------------------- +GFSM_INLINE +guint gfsm_indexed_automaton_n_arcs(gfsmIndexedAutomaton *xfsm) +{ return xfsm->arcs->tab->len; } + +//---------------------------------------- +GFSM_INLINE +guint gfsm_indexed_automaton_get_root(gfsmIndexedAutomaton *xfsm) +{ return xfsm->root_id; } + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_set_root(gfsmIndexedAutomaton *xfsm, gfsmStateId qid) +{ + if (qid >= gfsm_indexed_automaton_n_states(xfsm) && qid != gfsmNoState) { + gfsm_indexed_automaton_reserve_states(xfsm,qid+1); + } + xfsm->root_id = qid; +} + +/*====================================================================== + * Methods: Accessors: gfsmAutomaton API: States + */ + +//---------------------------------------- +GFSM_INLINE +gboolean gfsm_indexed_automaton_has_state(gfsmIndexedAutomaton *xfsm, gfsmStateId qid) +{ return qid < gfsm_indexed_automaton_n_states(xfsm); } + +//---------------------------------------- +GFSM_INLINE +gfsmStateId gfsm_indexed_automaton_ensure_state(gfsmIndexedAutomaton *xfsm, gfsmStateId qid) +{ + if (qid >= gfsm_indexed_automaton_n_states(xfsm) && qid != gfsmNoState) { + gfsm_indexed_automaton_reserve_states(xfsm,qid+1); + } + return qid; +} + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_remove_state(gfsmIndexedAutomaton *fsm, gfsmStateId qid) +{ return; } + + +//---------------------------------------- +GFSM_INLINE +void gfsm_indexed_automaton_set_final_state_full(gfsmIndexedAutomaton *xfsm, + gfsmStateId qid, + gboolean is_final, + gfsmWeight final_weight) +{ + gfsm_indexed_automaton_ensure_state(xfsm,qid); + if (!is_final) final_weight = xfsm->sr->zero; + g_array_index(xfsm->state_final_weight,gfsmWeight,qid) = final_weight; +} + +//---------------------------------------- +GFSM_INLINE +gboolean gfsm_indexed_automaton_lookup_final(gfsmIndexedAutomaton *xfsm, gfsmStateId qid, gfsmWeight *wp) +{ + if (!gfsm_indexed_automaton_has_state(xfsm,qid)) { + *wp = xfsm->sr->zero; + return FALSE; + } + *wp = g_array_index(xfsm->state_final_weight,gfsmWeight,qid); + return ((*wp)!=xfsm->sr->zero); +} + +//---------------------------------------- +GFSM_INLINE +gboolean gfsm_indexed_automaton_state_is_final(gfsmIndexedAutomaton *xfsm, gfsmStateId qid) +{ + gfsmWeight fw; + return gfsm_indexed_automaton_lookup_final(xfsm,qid,&fw); +} + +//---------------------------------------- +GFSM_INLINE +gfsmWeight gfsm_indexed_automaton_get_final_weight(gfsmIndexedAutomaton *xfsm, gfsmStateId qid) +{ + if (!gfsm_indexed_automaton_has_state(xfsm,qid)) return xfsm->sr->zero; + return g_array_index(xfsm->state_final_weight,gfsmWeight,qid); +} + + +//---------------------------------------- +GFSM_INLINE +guint gfsm_indexed_automaton_out_degree(gfsmIndexedAutomaton *fsm, gfsmStateId qid) +{ + if (!gfsm_indexed_automaton_has_state(fsm,qid)) return 0; + return gfsm_arc_table_index_out_degree(fsm->arcs,qid); +} + + +/*====================================================================== + * gfsmArcRange + */ + +//---------------------------------------- +GFSM_INLINE +void gfsm_arcrange_open_indexed(gfsmArcRange *range, gfsmIndexedAutomaton *xfsm, gfsmStateId qid) +{ + if (gfsm_indexed_automaton_has_state(xfsm,qid)) { + gfsm_arcrange_open_table_index(range,xfsm->arcs,qid); + } else { + gfsm_arcrange_close(range); + } +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmIndexedIO.c b/gfsm/gfsm/src/libgfsm/gfsmIndexedIO.c new file mode 100644 index 0000000..9e2f297 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmIndexedIO.c @@ -0,0 +1,464 @@ + +/*=============================================================================*\ + * File: gfsmIndexedIO.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: indexed automata: I/O + * + * Copyright (c) 2007 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER + * OF ALL WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmIndexedIO.h> +#include <gfsmArcIter.h> +#include <gfsmUtils.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> + + + +/*====================================================================== + * Constants: Binary I/O + */ +const gfsmVersionInfo gfsm_indexed_version_bincompat_min_store = + { + 0, // major + 0, // minor + 10 // micro + }; + +const gfsmVersionInfo gfsm_indexed_version_bincompat_min_check = + { + 0, // major + 0, // minor + 10 // micro + }; + +const gchar gfsm_indexed_header_magic[16] = "gfsm_indexed\0"; + +/*====================================================================== + * Methods: Binary I/O: load() + */ + +/*-------------------------------------------------------------- + * load_bin_header() + */ +gboolean gfsm_indexed_automaton_load_bin_header(gfsmIndexedAutomatonHeader *hdr, gfsmIOHandle *ioh, gfsmError **errp) +{ + if (!gfsmio_read(ioh, hdr, sizeof(gfsmIndexedAutomatonHeader))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("indexed_automaton_load_bin_header:size"), + "could not read header"); + return FALSE; + } + else if (strcmp(hdr->magic, gfsm_indexed_header_magic) != 0) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("indexed_automaton_load_bin_header:magic"), + "bad magic"); + return FALSE; + } + else if (gfsm_version_compare(hdr->version, gfsm_version_bincompat_min_check) < 0) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("indexed_automaton_load_bin_header:version"), + "stored format v%u.%u.%u is obsolete - need at least v%u.%u.%u", + hdr->version.major, + hdr->version.minor, + hdr->version.micro, + gfsm_indexed_version_bincompat_min_check.major, + gfsm_indexed_version_bincompat_min_check.minor, + gfsm_indexed_version_bincompat_min_check.micro); + return FALSE; + } + else if (gfsm_version_compare(gfsm_version, hdr->version_min) < 0) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("indexed_automaton_load_bin_header:version"), + "libgfsm v%u.%u.%u is obsolete - stored automaton needs at least v%u.%u.%u", + gfsm_version.major, + gfsm_version.minor, + gfsm_version.micro, + hdr->version_min.major, + hdr->version_min.minor, + hdr->version_min.micro); + return FALSE; + } + if (hdr->srtype == gfsmSRTUnknown || hdr->srtype >= gfsmSRTUser) { + //-- compatibility hack + hdr->srtype = gfsmAutomatonDefaultSRType; + } + return TRUE; +} + +/*-------------------------------------------------------------- + * load_bin_handle() + * + supports stored file versions v0.0.9 -- CURRENT + */ +gboolean gfsm_indexed_automaton_load_bin_handle_0_0_9(gfsmIndexedAutomatonHeader *hdr, + gfsmIndexedAutomaton *xfsm, + gfsmIOHandle *ioh, + gfsmError **errp) +{ + //-- reserve states & ars + gfsm_indexed_automaton_reserve_states(xfsm, hdr->n_states); + gfsm_indexed_automaton_reserve_arcs(xfsm, hdr->n_arcs); + + //-- set automaton-global properties + xfsm->flags = hdr->flags; + gfsm_indexed_automaton_set_semiring_type(xfsm, hdr->srtype); + xfsm->root_id = hdr->root_id; + + //------ load: state_final_weight + if (!gfsm_weight_vector_read_bin_handle(xfsm->state_final_weight, ioh, errp)) { return FALSE; } + + //------ load: arcs + if (!gfsm_arc_table_index_read_bin_handle(xfsm->arcs, ioh, errp)) { return FALSE; } + + return TRUE; +} + + +/*-------------------------------------------------------------- + * load_bin_handle() + * + dispatch + */ +gboolean gfsm_indexed_automaton_load_bin_handle(gfsmIndexedAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmIndexedAutomatonHeader hdr; + gfsm_indexed_automaton_clear(fsm); + + //-- load header + if (!gfsm_indexed_automaton_load_bin_header(&hdr,ioh,errp)) return FALSE; + + //-- guts + return gfsm_indexed_automaton_load_bin_handle_0_0_9(&hdr,fsm,ioh,errp); +} + +/*-------------------------------------------------------------- + * load_bin_file() + */ +gboolean gfsm_indexed_automaton_load_bin_file(gfsmIndexedAutomaton *fsm, FILE *f, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_zfile(f,"rb",-1); + gboolean rc = gfsm_indexed_automaton_load_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * load_bin_filename() + */ +gboolean gfsm_indexed_automaton_load_bin_filename(gfsmIndexedAutomaton *fsm, const gchar *filename, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename, "rb", -1, errp); + gboolean rc = ioh && !(*errp) && gfsm_indexed_automaton_load_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * load_bin_gstring() + */ +gboolean gfsm_indexed_automaton_load_bin_gstring(gfsmIndexedAutomaton *fsm, GString *gs, gfsmError **errp) +{ + gfsmPosGString pgs = { gs, 0 }; + gfsmIOHandle *ioh = gfsmio_new_gstring(&pgs); + gboolean rc = ioh && !(*errp) && gfsm_indexed_automaton_load_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + + +/*====================================================================== + * Methods: Binary I/O: save() + */ + +/*-------------------------------------------------------------- + * save_bin_handle() + */ +gboolean gfsm_indexed_automaton_save_bin_handle(gfsmIndexedAutomaton *xfsm, gfsmIOHandle *ioh, gfsmError **errp) +{ + gfsmIndexedAutomatonHeader hdr; + + //-- create header + memset(&hdr, 0, sizeof(gfsmIndexedAutomatonHeader)); + strcpy(hdr.magic, gfsm_indexed_header_magic); + hdr.version = gfsm_version; + hdr.version_min = gfsm_indexed_version_bincompat_min_store; + hdr.flags = xfsm->flags; + hdr.root_id = xfsm->root_id; + hdr.n_states = gfsm_indexed_automaton_n_states(xfsm); + hdr.n_arcs = gfsm_indexed_automaton_n_arcs(xfsm); + hdr.srtype = gfsm_indexed_automaton_get_semiring(xfsm)->type; + + //-- write header + if (!gfsmio_write(ioh, &hdr, sizeof(gfsmIndexedAutomatonHeader))) { + g_set_error(errp, g_quark_from_static_string("gfsm"), + g_quark_from_static_string("indexed_automaton_save_bin:header"), + "could not store header"); + return FALSE; + } + + //------ save: state_final_weight + if (!gfsm_weight_vector_write_bin_handle(xfsm->state_final_weight, ioh, errp)) { return FALSE; } + + //------ save: arcs + if (!gfsm_arc_table_index_write_bin_handle(xfsm->arcs, ioh, errp)) { return FALSE; } + + return TRUE; +} + +/*-------------------------------------------------------------- + * save_bin_file() + */ +gboolean gfsm_indexed_automaton_save_bin_file(gfsmIndexedAutomaton *fsm, FILE *f, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_file(f); + gboolean rc = ioh && !(*errp) && gfsm_indexed_automaton_save_bin_handle(fsm, ioh, errp); + if (ioh) { + //gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * save_bin_filename_nc() + */ +gboolean gfsm_indexed_automaton_save_bin_filename_nc(gfsmIndexedAutomaton *fsm, const gchar *filename, gfsmError **errp) +{ + FILE *f; + gboolean rc; + if (!(f=gfsm_open_filename(filename,"wb",errp))) return FALSE; + rc = gfsm_indexed_automaton_save_bin_file(fsm, f, errp); + if (f != stdout) fclose(f); + return rc; +} + +/*-------------------------------------------------------------- + * save_bin_filename() + */ +gboolean gfsm_indexed_automaton_save_bin_filename(gfsmIndexedAutomaton *fsm, const gchar *filename, int zlevel, gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename, "wb", zlevel, errp); + gboolean rc = ioh && !(*errp) && gfsm_indexed_automaton_save_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * save_bin_gstring() + */ +gboolean gfsm_indexed_automaton_save_bin_gstring(gfsmIndexedAutomaton *fsm, GString *gs, gfsmError **errp) +{ + gfsmPosGString pgs = { gs, gs->len }; + gfsmIOHandle *ioh = gfsmio_new_gstring(&pgs); + gboolean rc = ioh && !(*errp) && gfsm_indexed_automaton_save_bin_handle(fsm, ioh, errp); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + + +/*====================================================================== + * Methods: Text I/O: compile() : NOT IMPLEMNENTED + */ + + +/*====================================================================== + * Methods: Text I/O: print() + */ + +/*-------------------------------------------------------------- + * print_handle() + */ +gboolean gfsm_indexed_automaton_print_handle (gfsmIndexedAutomaton *xfsm, + gfsmIOHandle *ioh, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + gfsmStateId qid; + gfsmArcRange range; + GString *gs = g_string_new(""); + gboolean rc = TRUE; + gpointer key; + + if (xfsm->root_id == gfsmNoState) rc = FALSE; //-- sanity check + + for (qid=0; rc && qid < gfsm_indexed_automaton_n_states(xfsm); qid++) { + if (!gfsm_indexed_automaton_has_state(xfsm,qid)) continue; + + for (gfsm_arcrange_open_indexed(&range,xfsm,qid); gfsm_arcrange_ok(&range); gfsm_arcrange_next(&range)) + { + gfsmArc *a = gfsm_arcrange_arc(&range); + + //-- source state + if (state_alphabet && (key=gfsm_alphabet_find_key(state_alphabet,qid)) != gfsmNoKey) { + gfsm_alphabet_key2string(state_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + if (state_alphabet) g_printerr("Warning: no label defined for state '%u'!\n", qid); + gfsmio_printf(ioh, "%u", qid); + } + gfsmio_putc(ioh, '\t'); + + //-- sink state + if (state_alphabet && (key=gfsm_alphabet_find_key(state_alphabet,a->target)) != gfsmNoKey) { + gfsm_alphabet_key2string(state_alphabet,key,gs); + gfsmio_puts(ioh,gs->str); + } else { + if (state_alphabet) g_printerr("Warning: no label defined for state '%u'!\n", a->target); + gfsmio_printf(ioh, "%u", a->target); + } + gfsmio_putc(ioh,'\t'); + + //-- lower label + if (lo_alphabet && (key=gfsm_alphabet_find_key(lo_alphabet,a->lower)) != gfsmNoKey) { + gfsm_alphabet_key2string(lo_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + if (lo_alphabet) g_printerr("Warning: no lower label defined for Id '%u'!\n", a->lower); + gfsmio_printf(ioh, "%u", a->lower); + } + + //-- upper label + if (xfsm->flags.is_transducer) { + gfsmio_putc(ioh, '\t'); + if (hi_alphabet && (key=gfsm_alphabet_find_key(hi_alphabet,a->upper)) != gfsmNoKey) { + gfsm_alphabet_key2string(hi_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + if (hi_alphabet) g_printerr("Warning: no upper label defined for Id '%u'!\n", a->upper); + gfsmio_printf(ioh, "%u", a->upper); + } + } + + //-- weight + if (xfsm->flags.is_weighted) { // && a->weight != fsm->sr->one + gfsmio_printf(ioh, "\t%g", a->weight); + } + + gfsmio_putc(ioh, '\n'); + } + gfsm_arcrange_close(&range); + + //-- final? + if (gfsm_indexed_automaton_state_is_final(xfsm,qid)) { + if (state_alphabet && (key=gfsm_alphabet_find_key(state_alphabet,qid)) != NULL) { + gfsm_alphabet_key2string(state_alphabet,key,gs); + gfsmio_puts(ioh, gs->str); + } else { + gfsmio_printf(ioh, "%u", qid); + } + if (xfsm->flags.is_weighted) { + gfsmio_printf(ioh, "\t%g", gfsm_indexed_automaton_get_final_weight(xfsm,qid)); + } + gfsmio_putc(ioh, '\n'); + } + } + + //-- cleanup + g_string_free(gs,TRUE); + + return rc; +} + +/*-------------------------------------------------------------- + * print_file_full() + */ +gboolean gfsm_indexed_automaton_print_file_full (gfsmIndexedAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_zfile(f,"wb",zlevel); + gboolean rc = (ioh && !(*errp) && + gfsm_indexed_automaton_print_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + +/*-------------------------------------------------------------- + * print_filename() + */ +gboolean gfsm_indexed_automaton_print_filename_full (gfsmIndexedAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp) +{ + gfsmIOHandle *ioh = gfsmio_new_filename(filename,"wb",zlevel,errp); + gboolean rc = (ioh && !(*errp) && + gfsm_indexed_automaton_print_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} +/*-------------------------------------------------------------- + * print_gstring_full() + */ +gboolean gfsm_indexed_automaton_print_gstring_full (gfsmIndexedAutomaton *fsm, + GString *gs, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp) +{ + gfsmPosGString pgs = { gs, gs->len }; + gfsmIOHandle *ioh = gfsmio_new_gstring(&pgs); + gboolean rc = (ioh && !(*errp) && + gfsm_indexed_automaton_print_handle(fsm,ioh,lo_alphabet,hi_alphabet,state_alphabet,errp)); + if (ioh) { + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + } + return rc; +} + diff --git a/gfsm/gfsm/src/libgfsm/gfsmIndexedIO.h b/gfsm/gfsm/src/libgfsm/gfsmIndexedIO.h new file mode 100644 index 0000000..f736645 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmIndexedIO.h @@ -0,0 +1,163 @@ + +/*=============================================================================*\ + * File: gfsmIndexedIO.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: indexed automata: I/O + * + * Copyright (c) 2007 Bryan Jurish. + * + * For information on usage and redistribution, and for a DISCLAIMER + * OF ALL WARRANTIES, see the file "COPYING" in this distribution. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmIndexedIO.h + * \brief Librarian routines for indexed automata. + */ + +#ifndef _GFSM_INDEXED_IO_H +#define _GFSM_INDEXED_IO_H + +#include <gfsmAutomatonIO.h> +#include <gfsmIndexed.h> + +/*====================================================================== + * Types + */ +/// Header info for binary files +typedef struct { + gchar magic[16]; /**< magic header string "gfsm_indexed" */ + gfsmVersionInfo version; /**< gfsm version which created the stored file */ + gfsmVersionInfo version_min; /**< minimum gfsm version required to load the file */ + gfsmAutomatonFlags flags; /**< automaton flags */ + gfsmStateId root_id; /**< Id of root node */ + gfsmStateId n_states; /**< number of stored states */ + gfsmStateId n_arcs; /**< number of stored arcs */ + guint32 srtype; /**< semiring type (cast to ::gfsmSRType) */ + guint32 sort_mask; /**< arc-sort priorities (a ::gfsmArcCompMask) */ + guint32 reserved2; /**< reserved */ + guint32 reserved3; /**< reserved */ +} gfsmIndexedAutomatonHeader; + +/*====================================================================== + * Constants + */ + +/** Magic header string for stored ::gfsmIndexedAutomaton files */ +extern const gchar gfsm_indexed_header_magic[16]; + +/** Minimum libgfsm version required for loading files stored by this version of libgfsm */ +extern const gfsmVersionInfo gfsm_indexed_version_bincompat_min_store; + +/** Minimum libgfsm version whose binary files this version of libgfsm can read */ +extern const gfsmVersionInfo gfsm_indexed_version_bincompat_min_check; + +/*====================================================================== + * Methods: Binary I/O + */ +/// \name Indexed Automaton Methods: Binary I/O +//@{ + +/** Load an automaton header from a stored binary file. + * Returns TRUE iff the header looks valid. */ +gboolean gfsm_indexed_automaton_load_header(gfsmIndexedAutomatonHeader *hdr, gfsmIOHandle *ioh, gfsmError **errp); + +/** Load an automaton from a named binary file (implicitly clear()s \a fsm) */ +gboolean gfsm_indexed_automaton_load_bin_handle(gfsmIndexedAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp); + +/** Load an automaton from a stored binary file (implicitly clear()s \a fsm) */ +gboolean gfsm_indexed_automaton_load_bin_file(gfsmIndexedAutomaton *fsm, FILE *f, gfsmError **errp); + +/** Load an automaton from a named binary file (implicitly clear()s \a fsm) */ +gboolean gfsm_indexed_automaton_load_bin_filename(gfsmIndexedAutomaton *fsm, const gchar *filename, gfsmError **errp); + +/** Load an automaton from an in-memory buffer */ +gboolean gfsm_indexed_automaton_load_bin_gstring(gfsmIndexedAutomaton *fsm, GString *gs, gfsmError **errp); + +/*--------------------------------------------------------------*/ + +/** Store an automaton in binary form to a gfsmIOHandle* */ +gboolean gfsm_indexed_automaton_save_bin_handle(gfsmIndexedAutomaton *fsm, gfsmIOHandle *ioh, gfsmError **errp); + +/** Store an automaton in binary form to a file */ +gboolean gfsm_indexed_automaton_save_bin_file(gfsmIndexedAutomaton *fsm, FILE *f, gfsmError **errp); + +/** Store an automaton to a named binary file (no compression) */ +gboolean gfsm_indexed_automaton_save_bin_filename_nc(gfsmIndexedAutomaton *fsm, const gchar *filename, gfsmError **errp); + +/** Store an automaton to a named binary file, possibly compressing. + * Set \a zlevel=-1 for default compression, and + * set \a zlevel=0 for no compression, otherwise should be as for zlib (1 <= zlevel <= 9) + */ +gboolean gfsm_indexed_automaton_save_bin_filename(gfsmIndexedAutomaton *fsm, const gchar *filename, int zlevel, gfsmError **errp); + +/** Append an uncompressed binary automaton to an in-memory buffer */ +gboolean gfsm_indexed_automaton_save_bin_gstring(gfsmIndexedAutomaton *fsm, GString *gs, gfsmError **errp); + +//@} + +/*====================================================================== + * Automaton Methods: Text I/O + */ +/// \name Automaton Methods: Text I/O (output only) +//@{ + +/** Print a ::gfsmIndexedAutomaton in Ma-Bell-compatible text-format to a ::gfsmIOHandle* */ +gboolean gfsm_indexed_automaton_print_handle (gfsmIndexedAutomaton *fsm, + gfsmIOHandle *ioh, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + + +/** Print an automaton in Ma-Bell-compatible text-format to a FILE* */ +gboolean gfsm_indexed_automaton_print_file_full (gfsmIndexedAutomaton *fsm, + FILE *f, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp); + +/** Convenience macro for printing to uncompresed all-numeric-id text streams */ +#define gfsm_indexed_automaton_print_file(fsm,f,errp) \ + gfsm_indexed_automaton_print_file_full(fsm,f,NULL,NULL,NULL,0,errp) + +/** Print an automaton in Ma-Bell-compatible text-format to a named file */ +gboolean gfsm_indexed_automaton_print_filename_full (gfsmIndexedAutomaton *fsm, + const gchar *filename, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + int zlevel, + gfsmError **errp); + +/** Convenience macro for printing to uncompressed all-numeric-id named text files */ +#define gfsm_indexed_automaton_print_filename(fsm,f,errp) \ + gfsm_indexed_automaton_print_filename_full(fsm,f,NULL,NULL,NULL,0,errp) + +/** Print an automaton in Ma-Bell-compatible text-format to an in-memory buffer */ +gboolean gfsm_indexed_automaton_print_gstring_full (gfsmIndexedAutomaton *fsm, + GString *gs, + gfsmAlphabet *lo_alphabet, + gfsmAlphabet *hi_alphabet, + gfsmAlphabet *state_alphabet, + gfsmError **errp); + +//@} + +#endif /* _GFSM_INDEXED_IO_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmLookup.c b/gfsm/gfsm/src/libgfsm/gfsmLookup.c new file mode 100644 index 0000000..9b99d8f --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmLookup.c @@ -0,0 +1,481 @@ + +/*=============================================================================*\ + * File: gfsmLookup.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmLookup.h> + +#include <gfsmAlphabet.h> +#include <gfsmState.h> +#include <gfsmArc.h> +#include <gfsmArcIter.h> + +#include <string.h> + +/*====================================================================== + * Constants + */ +const gfsmStateId gfsmLookupStateMapGet = 16; + +/*====================================================================== + * Methods: lookup + */ + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_automaton_lookup_full(gfsmAutomaton *fst, + gfsmLabelVector *input, + gfsmAutomaton *result, + gfsmStateIdVector *statemap) +{ + GSList *stack = NULL; + gfsmLookupConfig *cfg = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1); + gfsmLookupConfig *cfg_new; + const gfsmState *qt; + gfsmState *qr; + gfsmLabelVal a; + gfsmArcIter ai; + + //-- ensure result automaton exists and is clear + if (result==NULL) { + result = gfsm_automaton_shadow(fst); + } else { + gfsm_automaton_clear(result); + } + result->flags.is_transducer = TRUE; + + //-- initialization + result->root_id = gfsm_automaton_add_state(result); + cfg->qt = fst->root_id; + cfg->qr = result->root_id; + cfg->i = 0; + stack = g_slist_prepend(stack, cfg); + + //-- ye olde loope + while (stack != NULL) { + //-- pop the top element off the stack + cfg = (gfsmLookupConfig*)(stack->data); + stack = g_slist_delete_link(stack, stack); + + //-- add config to the state-map, if non-NULL + if (statemap) { + if (cfg->qr >= statemap->len) { + g_ptr_array_set_size(statemap, cfg->qr + gfsmLookupStateMapGet); + } + g_ptr_array_index(statemap, cfg->qr) = GUINT_TO_POINTER(cfg->qt); + } + + //-- get states + qt = gfsm_automaton_find_state_const(fst, cfg->qt); + qr = gfsm_automaton_find_state (result, cfg->qr); + a = (cfg->i < input->len + ? (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(input, cfg->i)) + : gfsmNoLabel); + + //-- check for final states + if (cfg->i >= input->len && gfsm_state_is_final(qt)) { + gfsm_automaton_set_final_state_full(result, cfg->qr, TRUE, + gfsm_automaton_get_final_weight(fst, cfg->qt)); + } + + //-- handle outgoing arcs + for (gfsm_arciter_open_ptr(&ai, fst, (gfsmState*)qt); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) + { + gfsmArc *arc = gfsm_arciter_arc(&ai); + + //-- epsilon arcs + if (arc->lower == gfsmEpsilon) { + cfg_new = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1); + cfg_new->qt = arc->target; + cfg_new->qr = gfsm_automaton_add_state(result); + cfg_new->i = cfg->i; + gfsm_automaton_add_arc(result, cfg->qr, cfg_new->qr, arc->lower, arc->upper, arc->weight); + stack = g_slist_prepend(stack, cfg_new); + } + //-- input-matching arcs + else if (a != gfsmNoLabel && arc->lower == a) { + cfg_new = (gfsmLookupConfig*)g_new(gfsmLookupConfig,1); + cfg_new->qt = arc->target; + cfg_new->qr = gfsm_automaton_add_state(result); + cfg_new->i = cfg->i+1; + gfsm_automaton_add_arc(result, cfg->qr, cfg_new->qr, arc->lower, arc->upper, arc->weight); + stack = g_slist_prepend(stack, cfg_new); + } + } + + //-- we're done with this config + g_free(cfg); + } + + //-- set final size of the state-map + if (statemap) { statemap->len = result->states->len; } + + return result; +} + + +/*====================================================================== + * Methods: Viterbi + */ + + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_automaton_lookup_viterbi_full(gfsmAutomaton *fst, + gfsmLabelVector *input, + gfsmAutomaton *trellis, + gfsmStateIdVector *trellis2fst) +{ + //-- cols: array of (GSList <gfsmViterbiConfig*> *) + gfsmViterbiTable *cols = g_ptr_array_sized_new(input->len+1); + GSList *col, *prevcoli; + gfsmViterbiMap *fst2trellis = gfsm_viterbi_map_new(); + guint i; + gboolean trellis2fst_is_tmp = FALSE; + gfsmStateId qid_trellis, qid_trellis_nxt, qid_fst; + gpointer ptr_qid_trellis_nxt; + gfsmState *q_trellis, *q_trellis_nxt, *q_fst; + gfsmArcIter ai; + gfsmWeight w_trellis; + + //-- ensure trellis automaton exists and is clear + if (trellis==NULL) { + trellis = gfsm_automaton_shadow(fst); + } else { + gfsm_automaton_clear(trellis); + } + trellis->flags.is_transducer = TRUE; + + //-- ensure trellis->fst stateid-map exists and is clear + if (!trellis2fst) { + trellis2fst = g_ptr_array_sized_new(input->len+1); + trellis2fst_is_tmp = TRUE; + } else if (trellis2fst->len < 2) { + g_ptr_array_set_size(trellis2fst, input->len+1); + } + + //-- initial config: trellis structure + qid_trellis = trellis->root_id = gfsm_automaton_add_state(trellis); + q_trellis = gfsm_automaton_find_state(trellis, qid_trellis); + gfsm_automaton_set_final_state_full(trellis, qid_trellis, TRUE, fst->sr->one); + gfsm_automaton_add_arc(trellis, qid_trellis, qid_trellis, gfsmNoLabel, gfsmNoLabel, fst->sr->one); + + //-- initial config: stateid-mappings + g_ptr_array_index(trellis2fst, qid_trellis) = GUINT_TO_POINTER(fst->root_id); + g_tree_insert(fst2trellis, GUINT_TO_POINTER(fst->root_id), GUINT_TO_POINTER(qid_trellis)); + + //-- initial config: epsilon-expansion on column + g_ptr_array_index(cols,0) = col = g_slist_prepend(NULL, GUINT_TO_POINTER(qid_trellis)); + _gfsm_viterbi_expand_column(fst, trellis, col, trellis2fst, fst2trellis); + + //-- initial config: cleanup + gfsm_viterbi_map_free(fst2trellis); + + + //-- ye olde loope: for each input character (i) + for (i=0; i < input->len; i++) { + gfsmLabelVal a = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(input, i)); + + fst2trellis = gfsm_viterbi_map_new(); + col = NULL; + + //-- get possible successors + for (prevcoli=(GSList*)g_ptr_array_index(cols,i); prevcoli != NULL; prevcoli=prevcoli->next) { + + //-- get the top element of the queue + qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(prevcoli->data); + qid_fst = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst, qid_trellis)); + + //-- get state pointers + q_trellis = gfsm_automaton_find_state(trellis, qid_trellis); + q_fst = gfsm_automaton_find_state(fst, qid_fst); + + //-- get Viterbi properties + w_trellis = gfsm_viterbi_node_best_weight(q_trellis); + + + //-- search for input-matching arcs & add them to the successor map for next column + for (gfsm_arciter_open_ptr(&ai, fst, q_fst), gfsm_arciter_seek_lower(&ai,a); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,a)) + { + gfsmArc *arc_fst = gfsm_arciter_arc(&ai); + gfsmWeight w_trellis_nxt; + gpointer orig_key; + + //-- found a matching arc: is its target state already marked as a successor? + if (g_tree_lookup_extended(fst2trellis, + GUINT_TO_POINTER(arc_fst->target), + &orig_key, + &ptr_qid_trellis_nxt)) + { + //-- yep: known successor: get old ("*_nxt") & new ("*_nxt_new") weights + gfsmWeight w_trellis_nxt_new = gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight); + qid_trellis_nxt = GPOINTER_TO_UINT(ptr_qid_trellis_nxt); + q_trellis_nxt = gfsm_automaton_find_state(trellis, qid_trellis_nxt); + w_trellis_nxt = gfsm_viterbi_node_best_weight(q_trellis_nxt); + + //-- is the new path better than the stored path? + if (gfsm_sr_less(fst->sr, w_trellis_nxt_new, w_trellis_nxt)) { + //-- yep: update mappings: trellis automaton + gfsmArc *arc_trellis_nxt = gfsm_viterbi_node_arc(q_trellis_nxt); + arc_trellis_nxt->target = qid_trellis; + arc_trellis_nxt->lower = a; + arc_trellis_nxt->upper = arc_fst->upper; + arc_trellis_nxt->weight = w_trellis_nxt_new; + + //-- update mappings: trellis->fst stateid-map + g_ptr_array_index(trellis2fst, qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target); + + //-- update mappings: fst->trellis stateid-map + g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt)); + } + } + else + { + //-- target state not already marked as a successor: mark it + qid_trellis_nxt = gfsm_automaton_add_state(trellis); + q_trellis_nxt = gfsm_automaton_find_state(trellis,qid_trellis_nxt); + gfsm_automaton_add_arc(trellis, + qid_trellis_nxt, qid_trellis, + a, arc_fst->upper, + gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight)); + + //-- save trellis->fst stateid-map + if (qid_trellis_nxt >= trellis2fst->len) { + g_ptr_array_set_size(trellis2fst, qid_trellis_nxt + gfsmLookupStateMapGet); + } + g_ptr_array_index(trellis2fst,qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target); + + //-- save fst->trellis stateid-map + g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt)); + + //-- add new trellis state to the column + col = g_slist_prepend(col, GUINT_TO_POINTER(qid_trellis_nxt)); + } + + } //-- END: seek input-matching arcs + } //-- END: previous column iteration (prevcoli) + + //-- expand epsilons in current column + _gfsm_viterbi_expand_column(fst, trellis, col, trellis2fst, fst2trellis); + + //-- update column table + g_ptr_array_index(cols,i+1) = col; + + //-- per-input-index cleanup + gfsm_viterbi_map_free(fst2trellis); + } + + //-- final iteration (EOS): get possible "final" states + qid_trellis_nxt = gfsm_automaton_add_state(trellis); //-- qid_trellis_nxt: new root + for (prevcoli=(GSList*)g_ptr_array_index(cols,input->len); prevcoli != NULL; prevcoli=prevcoli->next) { + + //-- get the top element of the queue + qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(prevcoli->data); + qid_fst = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst, qid_trellis)); + + //-- get state pointers + q_trellis = gfsm_automaton_find_state(trellis, qid_trellis); + q_fst = gfsm_automaton_find_state(fst, qid_fst); + + //-- get Viterbi properties + w_trellis = gfsm_viterbi_node_best_weight(q_trellis); + + //-- check for finality + if (q_fst->is_final) { + gfsm_automaton_add_arc(trellis, qid_trellis_nxt, qid_trellis, + gfsmEpsilon, gfsmEpsilon, + gfsm_sr_times(fst->sr, + w_trellis, + gfsm_automaton_get_final_weight(fst,qid_fst))); + } + } + + //-- mark single best path from new root + qid_trellis = qid_trellis_nxt; + q_trellis = gfsm_automaton_find_state(trellis,qid_trellis); + q_trellis->arcs = gfsm_arclist_sort(q_trellis->arcs, + &((gfsmArcCompData){gfsmASMWeight,fst->sr,NULL,NULL})); + + //-- break dummy arc on trellis final state (old root) + q_trellis = gfsm_automaton_find_state(trellis,trellis->root_id); + gfsm_arclist_free(q_trellis->arcs); + q_trellis->arcs = NULL; + + //-- mark new root + trellis->root_id = qid_trellis; + + + //-- cleanup: columns + for (i=0; i < cols->len; i++) { + g_slist_free((GSList*)g_ptr_array_index(cols,i)); + } + + //-- cleanup: column array + g_ptr_array_free(cols,TRUE); + if (trellis2fst_is_tmp) g_ptr_array_free(trellis2fst,TRUE); + else { + //-- just set length + trellis2fst->len = trellis->states->len; + } + + return trellis; +} + + +/*====================================================================== + * Methods: Viterbi: expand_column + */ + +//-------------------------------------------------------------- +void _gfsm_viterbi_expand_column(gfsmAutomaton *fst, + gfsmAutomaton *trellis, + gfsmViterbiColumn *col, + gfsmStateIdVector *trellis2fst, + gfsmViterbiMap *fst2trellis) +{ + gfsmArcIter ai; + gfsmViterbiColumn *coli; + gfsmStateId qid_trellis, qid_fst; + gfsmState *q_trellis; + gfsmArc *arc_trellis; + gfsmWeight w_trellis; + + //-- pass-1: add everything already in the column as a literal + /* + for (coli=col; coli != NULL; coli=coli->next) { + node = (gfsmViterbiNode*)(coli->data); + if (!g_tree_lookup(fst2trellis,node->key)) { + g_tree_insert(cmap,node->key,node->val); + } + } + */ + + //-- pass-2: add epsilon arcs from every literal in the column + for (coli=col; coli != NULL; coli=coli->next) { + //-- get node + qid_trellis = (gfsmStateId)GPOINTER_TO_UINT(coli->data); + q_trellis = gfsm_automaton_find_state(trellis,qid_trellis); + arc_trellis = gfsm_viterbi_node_arc(q_trellis); + w_trellis = gfsm_viterbi_node_best_weight(q_trellis); + qid_fst = (gfsmStateId)GPOINTER_TO_UINT(g_ptr_array_index(trellis2fst,qid_trellis)); + + //-- search for input-epsilon arcs & add them to this column + for (gfsm_arciter_open(&ai,fst,qid_fst), gfsm_arciter_seek_lower(&ai,gfsmEpsilon); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,gfsmEpsilon)) + { + gfsmArc *arc_fst = gfsm_arciter_arc(&ai); + gfsmStateId qid_trellis_nxt = gfsmNoState; + gpointer ptr_qid_trellis_nxt; + gfsmState *q_trellis_nxt; + gfsmWeight w_trellis_nxt; + gpointer orig_key; + + //-- found an eps-arc: is its target state already marked as a successor? + if (g_tree_lookup_extended(fst2trellis, + GUINT_TO_POINTER(arc_fst->target), + &orig_key, + &ptr_qid_trellis_nxt)) + { + //-- yep: get the old ("*_eps") & new ("*_nxt") weights + gfsmWeight w_trellis_eps = gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight); + qid_trellis_nxt = GPOINTER_TO_UINT(ptr_qid_trellis_nxt); + q_trellis_nxt = gfsm_automaton_find_state(trellis,qid_trellis_nxt); + w_trellis_nxt = gfsm_viterbi_node_best_weight(q_trellis_nxt); + + //-- is the new eps-path better than the stored path? + if (gfsm_sr_less(fst->sr,w_trellis_eps,w_trellis_nxt)) { + //-- yep: update mappings: trellis automaton + gfsmArc *arc_trellis_nxt = gfsm_viterbi_node_arc(q_trellis_nxt); + arc_trellis_nxt->target = qid_trellis; + arc_trellis_nxt->lower = gfsmEpsilon; + arc_trellis_nxt->upper = arc_fst->upper; + arc_trellis_nxt->weight = w_trellis_eps; + + //-- update mappings: trellis->fst stateid-map + g_ptr_array_index(trellis2fst, qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target); + + //-- update mappings: fst->trellis stateid-map + g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt)); + } + else { + //-- eps-path is worse than the existing path: forget about it + ; + } + } + else + { + //-- eps-target state not already marked as a successor: mark it + qid_trellis_nxt = gfsm_automaton_add_state(trellis); + q_trellis_nxt = gfsm_automaton_find_state(trellis,qid_trellis_nxt); + gfsm_automaton_add_arc(trellis, + qid_trellis_nxt, qid_trellis, + gfsmEpsilon, arc_fst->upper, + gfsm_sr_times(fst->sr, w_trellis, arc_fst->weight)); + + //-- save trellis->fst stateid-map + if (qid_trellis_nxt >= trellis2fst->len) { + g_ptr_array_set_size(trellis2fst, qid_trellis_nxt + gfsmLookupStateMapGet); + } + g_ptr_array_index(trellis2fst,qid_trellis_nxt) = GUINT_TO_POINTER(arc_fst->target); + + //-- save fst->trellis stateid-map + g_tree_insert(fst2trellis, GUINT_TO_POINTER(arc_fst->target), GUINT_TO_POINTER(qid_trellis_nxt)); + + //-- queue-up new trellis state for eps-seek + coli->next = g_slist_prepend(coli->next, GUINT_TO_POINTER(qid_trellis_nxt)); + } + + } //-- END: seek epsilon arcs + + } //-- END column iteration + +} + + +/*====================================================================== + * Methods: Viterbi: Map + */ + +//-------------------------------------------------------------- +#if 0 +gfsmViterbiNodeValue *gfsm_viterbi_column_map_insert_if_less(gfsmViterbiMap *vmap, + gfsmViterbiNodeKey key, + gfsmWeight w, + gfsmSemiring *sr) +{ + gpointer s_val; + if (s_val = gfsm_viterbi_map_lookup(vmap,key)) { + //-- already present + if (!gfsm_sr_less(sr,w,s_val->w)) return NULL; //-- (s_val->w) <= (w) + s_val->w = w; + } else { + //-- not already present: copy & insert + s_val = g_new(gfsmViterbiNodeValue,1); + s_val->qtrellis = gfsmNoState; + a_val->pqtrellis = gfsmNoState; + s_val->w = w; + g_tree_insert(col,key,s_val); + } + return s_val; //-- update occurred +} +#endif + diff --git a/gfsm/gfsm/src/libgfsm/gfsmLookup.h b/gfsm/gfsm/src/libgfsm/gfsmLookup.h new file mode 100644 index 0000000..7157047 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmLookup.h @@ -0,0 +1,212 @@ + +/*=============================================================================*\ + * File: gfsmLookup.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmLookup.h + * \brief Linear composition + */ + +#ifndef _GFSM_LOOKUP_H +#define _GFSM_LOOKUP_H + +#include <gfsmAutomaton.h> +#include <gfsmUtils.h> + +/*====================================================================== + * Types: lookup + */ +/** \brief Type for gfsm_automaton_lookup() computation state */ +typedef struct { + gfsmStateId qt; /**< current state in transducer */ + gfsmStateId qr; /**< current state in result acceptor */ + guint32 i; /**< current position in input vector */ +} gfsmLookupConfig; + +//------------------------------ + +/** Type for gfsm_automaton_lookup_viterbi(): Trellis (1 per call) */ +typedef GPtrArray gfsmViterbiTable; + +/** Viterbi algorithm best-successor accumulator + * \arg key is a gfsmStateId (state in fst) + * \arg value is a gfsmStateId (state in trellis) + */ +typedef GTree gfsmViterbiMap; + +/** Key type for gfsmViterbiMap (state-id in fst) */ +typedef gfsmStateId gfsmViterbiMapKey; + +/** Value type for gfsmViterbiMap (state-id in trellis) */ +typedef gfsmStateId gfsmViterbiMapValue; + +/** Type for Viterbi trellis column (1 per input index) + * \arg data is a gfsmStateId in trellis automaton + */ +typedef GSList gfsmViterbiColumn; + +/** Type for Viterbi trellis nodes: state in trellis automaton + * \arg state \a q has exactly one outgoing arc \a arc=((gfsmArc*)a->ars->data) + * \arg best preceeding state in trellis is \a arc->target + * \arg label of best arc from best preceeding state in trellis is \a arc->lower + * \arg total weight of best path to this state is \a arc->weight + */ +typedef gfsmState gfsmViterbiNode; + + +/*====================================================================== + * Constants + */ + +/** Number of states to pre-allocate when extending state-map vector on lookup_full() (>= 1) */ +extern const gfsmStateId gfsmLookupStateMapGet; + + +/*====================================================================== + * Methods: lookup + */ +///\name Lookup +//@{ + +//------------------------------ +/** Compose linear automaton specified by \a input with the transducer + * \a fst , storing result in \a result. + * \param fst transducer (lower-upper) + * \param input input labels (lower) + * \param result output transducer or NULL + * \returns \a result if non-NULL, otherwise a new automaton. + */ +#define gfsm_automaton_lookup(fst,input,result) \ + gfsm_automaton_lookup_full((fst),(input),(result),NULL) + +//------------------------------ +/** Compose linear automaton specified by \a input with the transducer + * \a fst , storing result in \a result , and storing state-translation map \a statemap. + * \param fst transducer (lower-upper) + * \param input input labels (lower) + * \param result output transducer or NULL + * \param statemap if non-NULL, maps \a result StateIds (indices) to \a fst StateIds (values) on return. + * Not implicitly created or cleared. + * \returns \a result if non-NULL, otherwise a new automaton. + */ +gfsmAutomaton *gfsm_automaton_lookup_full(gfsmAutomaton *fst, + gfsmLabelVector *input, + gfsmAutomaton *result, + gfsmStateIdVector *statemap); + +//@} + + +/*====================================================================== + * Methods: Viterbi + */ +///\name Viterbi Lookup +//@{ + +//------------------------------ +/** Get the best path for input \a input in the transducer \a fst using the Viterbi algorithm. + * \param fst transducer (lower-upper) + * \param input input labels (lower) + * \param trellis output fsm or NULL + * \returns \a trellis if non-NULL, otherwise a new automaton representing the (reversed) Viterbi trellis. + * \arg labels (lower & upper) in \a trellis represent upper labels of \a fst + * \arg arc-weights in \a trellis represent Viterbi algorithm weights (gamma) + * \arg arc-targets in \a trellis represent the best preceeding state (psi) + */ +#define gfsm_automaton_lookup_viterbi(fst,input,trellis) \ + gfsm_automaton_lookup_viterbi_full((fst),(input),(trellis),NULL) + +//------------------------------ +/** Get the best path for input \a input in the transducer \a fst using the Viterbi algorithm. + * \param fst transducer (lower-upper) + * \param input input labels (lower) + * \param trellis output fsm or NULL + * \param trellis2fst if non-NULL, maps \a trellis StateIds (indices) to \a fst StateIds (values) on return. + * If NULL, a temporary vector will be created & freed. + * \returns \a trellis if non-NULL, otherwise a new automaton representing the (reversed) Viterbi trellis. + * \arg lower-labels in \a trellis represent \a input labels + * \arg upper-labels of \a trellis represent upper labels of \a fst + * \arg arc-weights in \a trellis represent Viterbi algorithm weights (gamma) + * \arg arc-targets in \a trellis represent the best preceeding state (psi) + * \arg root state of \a trellis has arcs sorted by total path weight (best-first) + */ +gfsmAutomaton *gfsm_automaton_lookup_viterbi_full(gfsmAutomaton *fst, + gfsmLabelVector *input, + gfsmAutomaton *trellis, + gfsmStateIdVector *trellis2fst); + +//@} + +/*====================================================================== + * Viterbi: Utilities + */ +///\name Viterbi Low-level Utilities +//@{ + + +//------------------------------ +// expand_column() + +/** Expand lower-epsilon arcs from \a fst into \a col. */ +void _gfsm_viterbi_expand_column(gfsmAutomaton *fst, + gfsmAutomaton *trellis, + gfsmViterbiColumn *col, + gfsmStateIdVector *trellis2fst, + gfsmViterbiMap *fst2trellis); + + +//------------------------------ +// gfsmViterbiMap + +/** Create a new gfsmViterbiMap */ +#define gfsm_viterbi_map_new() \ + g_tree_new_full((GCompareDataFunc)gfsm_uint_compare, NULL, NULL, NULL) + + +/** Free a gfsmViterbiMap */ +#define gfsm_viterbi_map_free(vmap) g_tree_destroy(vmap) + + +/** Lookup stored value in a gfsmViterbiColumnMap + * \returns gpointer to the stored value for \a key in \a vmap + */ +#define gfsm_viterbi_map_lookup(vmap,key) g_tree_lookup((vmap),(key)) + +/** Insert a literal value into a gfsmViterbiColumnMap */ +#define gfsm_viterbi_map_insert(vmap,key,val) g_tree_insert((vmap),(gpointer)(key),(gpointer)(val)) + + +//------------------------------ +// gfsmViterbiNode + +/** gfsmViterbiNode: Accessor: unique outgoing arc for \a nod */ +//#define gfsm_viterbi_node_arc(nod) ((gfsmArc*)((nod)->arcs->data)) +#define gfsm_viterbi_node_arc(nod) gfsm_arclist_arc((nod)->arcs) + +/** gfsmViterbiNode: Accessor: Best preceeding state accessor for \a nod */ +#define gfsm_viterbi_node_best_prevstate(nod) gfsm_viterbi_node_arc(nod)->target + +/** gfsmViterbiNode: Accessor: Total weight of best path to \a nod */ +#define gfsm_viterbi_node_best_weight(nod) gfsm_viterbi_node_arc(nod)->weight + +//@} + +#endif /* _GFSM_LOOKUP_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmMem.c b/gfsm/gfsm/src/libgfsm/gfsmMem.c new file mode 100644 index 0000000..57198d7 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmMem.c @@ -0,0 +1,36 @@ + +/*=============================================================================*\ + * File: gfsmMem.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: memory utilities + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmMem.h> + +#ifndef GFSM_INLINE_ENABLED +# include <gfsmMem.hi> +#endif + +/*---------------------------------------------------------------------- + * Allocators + */ +GAllocator *gfsm_node_allocator = NULL; +GAllocator *gfsm_slist_allocator = NULL; +GAllocator *gfsm_list_allocator = NULL; +gboolean gfsm_allocators_enabled = FALSE; diff --git a/gfsm/gfsm/src/libgfsm/gfsmMem.h b/gfsm/gfsm/src/libgfsm/gfsmMem.h new file mode 100644 index 0000000..4791d26 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmMem.h @@ -0,0 +1,110 @@ + +/*=============================================================================*\ + * File: gfsmMem.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: memory utilities (currently unused) + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmMem.h + * \brief Memory utilities + */ + +#ifndef _GFSM_MEM_H +#define _GFSM_MEM_H + +#include <glib.h> +#include <gfsmConfig.h> + +/*---------------------------------------------------------------------- + * Allocators: variables + */ + +/** Default GNode allocator */ +extern GAllocator *gfsm_node_allocator; + +/** Default GSList allocator */ +extern GAllocator *gfsm_slist_allocator; + +/** Default GList allocator */ +extern GAllocator *gfsm_list_allocator; + +/** Whether gfsm allocators are currently enabled */ +extern gboolean gfsm_allocators_enabled; + + +/*---------------------------------------------------------------------- + * Allocators + * - these aren't used by default! + */ + +/** Ensure that gfsm allocators are defined and non-NULL */ +GFSM_INLINE +void gfsm_allocators_init(void); + +/** Push gfsm allocators to the stack */ +GFSM_INLINE +void gfsm_allocators_enable(void); + +/** Pop gfsm allocators from the stack */ +GFSM_INLINE +void gfsm_allocators_disable(void); + +/** Free all memory allocated by the gfsm allocators */ +GFSM_INLINE +void gfsm_allocators_free(void); + + +/*---------------------------------------------------------------------- + * Copying + */ +/** Abstract copy function */ +typedef gpointer (*gfsmDupNFunc) (gconstpointer src, gsize size); + +/** Abstract duplication function */ +typedef gpointer (*gfsmDupFunc) (gconstpointer src); + +/** String copy function for NUL-terminated strings */ +GFSM_INLINE +gpointer gfsm_string_dup_n (gconstpointer src, gsize size); + +/** size-based copy function */ +GFSM_INLINE +gpointer gfsm_mem_dup_n (gconstpointer src, gsize size); + +/** String duplication function for NUL-terminated strings */ +#define gfsm_string_dup g_strdup + +/** String duplication function for GString*s */ +GFSM_INLINE +GString *gfsm_gstring_dup (GString *gstr); + +/** Byte-assignment for GString*s */ +GFSM_INLINE +void gfsm_gstring_assign_bytes (GString *gstr, const gchar *src, gsize len); + +/** Byte-vector creation for GString*s */ +GFSM_INLINE +GString *gfsm_gstring_new_bytes (const gchar *src, gsize len); + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmMem.hi> +#endif + +#endif /* _GFSM_MEM_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmMem.hi b/gfsm/gfsm/src/libgfsm/gfsmMem.hi new file mode 100644 index 0000000..4b9819d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmMem.hi @@ -0,0 +1,135 @@ + +/*=============================================================================*\ + * File: gfsmMem.def + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: memory utilities: inline definitions + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <string.h> + +/*---------------------------------------------------------------------- + * init_allocators() + */ +GFSM_INLINE +void gfsm_allocators_init(void) +{ + if (!gfsm_node_allocator) gfsm_node_allocator = g_allocator_new("gfsm_node_allocator",128); + if (!gfsm_slist_allocator) gfsm_slist_allocator = g_allocator_new("gfsm_slist_allocator",128); + if (!gfsm_list_allocator) gfsm_list_allocator = g_allocator_new("gfsm_list_allocator",128); +} + +/*---------------------------------------------------------------------- + * allocators_enable() + */ +GFSM_INLINE +void gfsm_allocators_enable(void) +{ + if (!gfsm_allocators_enabled) { + gfsm_allocators_init(); + g_node_push_allocator(gfsm_node_allocator); + g_slist_push_allocator(gfsm_slist_allocator); + g_list_push_allocator(gfsm_list_allocator); + gfsm_allocators_enabled = TRUE; + } +} + +/*---------------------------------------------------------------------- + * allocators_disable() + */ +GFSM_INLINE +void gfsm_allocators_disable(void) +{ + if (gfsm_allocators_enabled) { + g_node_pop_allocator(); + g_slist_pop_allocator(); + g_list_pop_allocator(); + gfsm_allocators_enabled = FALSE; + } +} + + +/*---------------------------------------------------------------------- + * allocators_free() + */ +GFSM_INLINE +void gfsm_allocators_free(void) +{ + gfsm_allocators_disable(); + + if (gfsm_node_allocator) g_allocator_free(gfsm_node_allocator); + if (gfsm_slist_allocator) g_allocator_free(gfsm_slist_allocator); + if (gfsm_list_allocator) g_allocator_free(gfsm_list_allocator); + + gfsm_node_allocator=NULL; + gfsm_slist_allocator=NULL; + gfsm_list_allocator=NULL; +} + + +/*---------------------------------------------------------------------- + * string_dup_n() + */ +GFSM_INLINE +gpointer gfsm_string_dup_n(gconstpointer src, gsize size) +{ +#ifndef __cplusplus + return g_strndup(src,size); +#else + return g_strndup(static_cast<const gchar *>(src), size); +#endif +} + +/*---------------------------------------------------------------------- + * mem_dup_n() + */ +GFSM_INLINE +gpointer gfsm_mem_dup_n(gconstpointer src, gsize size) +{ return g_memdup(src,size); }; + +/*---------------------------------------------------------------------- + * gstring_dup() + */ +GFSM_INLINE +GString *gfsm_gstring_dup (GString *gstr) +{ + GString *dst = g_string_sized_new(gstr->len); + g_string_append_len(dst, gstr->str, gstr->len); + return dst; +} + +/*---------------------------------------------------------------------- + * gstring_asign_bytes() + */ +GFSM_INLINE +void gfsm_gstring_assign_bytes (GString *gstr, const gchar *src, gsize len) +{ + g_string_truncate(gstr, 0); + g_string_append_len(gstr, src, gstr->len); +} + +/*---------------------------------------------------------------------- + * gstring_new_bytes() + */ +GFSM_INLINE +GString *gfsm_gstring_new_bytes (const gchar *src, gsize len) +{ + GString *dst = g_string_sized_new(src[len]=='\0' ? len : (len+1)); + g_string_append_len(dst,src,len); + return dst; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmPaths.c b/gfsm/gfsm/src/libgfsm/gfsmPaths.c new file mode 100644 index 0000000..7b5faa5 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmPaths.c @@ -0,0 +1,431 @@ + +/*=============================================================================*\ + * File: gfsmPaths.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmPaths.h> +#include <gfsmArc.h> +#include <gfsmArcIter.h> + + +/*====================================================================== + * Methods: Path Utilities: gfsmLabelVector + */ + +//-------------------------------------------------------------- +gfsmLabelVector *gfsm_label_vector_copy(gfsmLabelVector *dst, gfsmLabelVector *src) +{ + int i; + g_ptr_array_set_size(dst, src->len); + for (i=0; i < src->len; i++) { + g_ptr_array_index(dst,i) = g_ptr_array_index(src,i); + } + return dst; +} + +//-------------------------------------------------------------- +gfsmLabelVector *gfsm_label_vector_reverse(gfsmLabelVector *v) +{ + guint i, mid; + gpointer tmp; + mid = v->len/2; + for (i=0; i < mid; i++) { + tmp = g_ptr_array_index(v,i); + g_ptr_array_index(v,i) = g_ptr_array_index(v,v->len-i-1); + g_ptr_array_index(v,v->len-i-1) = tmp; + } + return v; +} + +/*====================================================================== + * Methods: Path Utilities: gfsmPath + */ + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_full(gfsmLabelVector *lo, gfsmLabelVector *hi, gfsmWeight w) +{ + gfsmPath *p = g_new(gfsmPath,1); + p->lo = lo ? lo : g_ptr_array_new(); + p->hi = hi ? hi : g_ptr_array_new(); + p->w = w; + return p; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_copy(gfsmPath *p1) +{ + gfsmPath *p = g_new(gfsmPath,1); + + p->lo = g_ptr_array_sized_new(p1->lo->len); + p->hi = g_ptr_array_sized_new(p1->hi->len); + + gfsm_label_vector_copy(p->lo, p1->lo); + gfsm_label_vector_copy(p->hi, p1->hi); + + p->w = p1->w; + + return p; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_append(gfsmPath *p1, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gfsmSemiring *sr) +{ + gfsmPath *p = g_new(gfsmPath,1); + + if (lo != gfsmEpsilon) { + p->lo = g_ptr_array_sized_new(p1->lo->len+1); + gfsm_label_vector_copy(p->lo, p1->lo); + g_ptr_array_add(p->lo, GUINT_TO_POINTER(lo)); + } else { + p->lo = g_ptr_array_sized_new(p1->lo->len); + gfsm_label_vector_copy(p->lo, p1->lo); + } + + if (hi != gfsmEpsilon) { + p->hi = g_ptr_array_sized_new(p1->hi->len+1); + gfsm_label_vector_copy(p->hi, p1->hi); + g_ptr_array_add(p->hi, GUINT_TO_POINTER(hi)); + } else { + p->hi = g_ptr_array_sized_new(p1->hi->len); + gfsm_label_vector_copy(p->hi, p1->hi); + } + + p->w = gfsm_sr_times(sr, p1->w, w); + + return p; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_new_times_w(gfsmPath *p1, gfsmWeight w, gfsmSemiring *sr) +{ + gfsmPath *p = g_new(gfsmPath,1); + + p->lo = g_ptr_array_sized_new(p1->lo->len); + gfsm_label_vector_copy(p->lo, p1->lo); + + p->hi = g_ptr_array_sized_new(p1->hi->len); + gfsm_label_vector_copy(p->hi, p1->hi); + + p->w = gfsm_sr_times(sr, p1->w, w); + + return p; +} + +//-------------------------------------------------------------- +void gfsm_path_push(gfsmPath *p, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gfsmSemiring *sr) +{ + if (lo != gfsmEpsilon) g_ptr_array_add(p->lo, GUINT_TO_POINTER(lo)); + if (hi != gfsmEpsilon) g_ptr_array_add(p->hi, GUINT_TO_POINTER(hi)); + p->w = gfsm_sr_times(sr, p->w, w); +} + + +//-------------------------------------------------------------- +void gfsm_path_pop(gfsmPath *p, gfsmLabelVal lo, gfsmLabelVal hi) +{ + if (lo != gfsmEpsilon) g_ptr_array_remove_index_fast(p->lo, p->lo->len-1); + if (hi != gfsmEpsilon) g_ptr_array_remove_index_fast(p->hi, p->hi->len-1); +} + +//-------------------------------------------------------------- +int gfsm_label_vector_compare(const gfsmLabelVector *v1, const gfsmLabelVector *v2) +{ + int i; + gfsmLabelVal lab1, lab2; + if (v1==v2) return 0; + + for (i=0; i < v1->len && i < v2->len; i++) { + lab1 = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(v1,i)); + lab2 = (gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(v2,i)); + if (lab1 < lab2) return -1; + if (lab1 > lab2) return 1; + } + if (v1->len < v2->len) return -1; + if (v1->len > v2->len) return 1; + return 0; +} + +//-------------------------------------------------------------- +int gfsm_path_compare_data(const gfsmPath *p1, const gfsmPath *p2, gfsmSemiring *sr) +{ + int cmp; + if (p1==p2) return 0; + if ((cmp=gfsm_sr_compare(sr, p1->w, p2->w))!=0) return cmp; + if ((cmp=gfsm_label_vector_compare(p1->lo,p2->lo))!=0) return cmp; + if ((cmp=gfsm_label_vector_compare(p1->hi,p2->hi))!=0) return cmp; + return 0; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_path_reverse(gfsmPath *p) +{ + if (p->lo) gfsm_label_vector_reverse(p->lo); + if (p->hi) gfsm_label_vector_reverse(p->hi); + return p; +} + +//-------------------------------------------------------------- +void gfsm_path_free(gfsmPath *p) +{ + if (!p) return; + if (p->lo) g_ptr_array_free(p->lo,TRUE); + if (p->hi) g_ptr_array_free(p->hi,TRUE); + g_free(p); +} + +/*====================================================================== + * Methods: Automaton Serialization: paths() + */ + +//-------------------------------------------------------------- +gfsmSet *gfsm_automaton_paths(gfsmAutomaton *fsm, gfsmSet *paths) +{ + return gfsm_automaton_paths_full(fsm, paths, (fsm->flags.is_transducer ? gfsmLSBoth : gfsmLSLower)); +} + +//-------------------------------------------------------------- +gfsmSet *gfsm_automaton_paths_full(gfsmAutomaton *fsm, gfsmSet *paths, gfsmLabelSide which) +{ + gfsmPath *tmp = gfsm_path_new(fsm->sr); + if (paths==NULL) { + paths = gfsm_set_new_full((GCompareDataFunc)gfsm_path_compare_data, + (gpointer)fsm->sr, + (GDestroyNotify)gfsm_path_free); + } + _gfsm_automaton_paths_r(fsm, paths, which, fsm->root_id, tmp); + gfsm_path_free(tmp); + return paths; +} + +//-------------------------------------------------------------- +gfsmSet *_gfsm_automaton_paths_r(gfsmAutomaton *fsm, + gfsmSet *paths, + gfsmLabelSide which, + gfsmStateId q, + gfsmPath *path) +{ + gfsmArcIter ai; + gfsmWeight fw; + + //-- if final state, add to set of full paths + if (gfsm_automaton_lookup_final(fsm,q,&fw)) { + gfsmWeight path_w = path->w; + path->w = gfsm_sr_times(fsm->sr, fw, path_w); + + if (!gfsm_set_contains(paths,path)) { + gfsm_set_insert(paths, gfsm_path_new_copy(path)); + } + path->w = path_w; + } + + //-- investigate all outgoing arcs + for (gfsm_arciter_open(&ai, fsm, q); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + gfsmWeight w = path->w; + gfsmLabelVal lo,hi; + + if (which==gfsmLSLower) { + lo = arc->lower; + hi = gfsmEpsilon; + } else if (which==gfsmLSUpper) { + lo = gfsmEpsilon; + hi = arc->upper; + } else { + lo = arc->lower; + hi = arc->upper; + } + + gfsm_path_push(path, lo, hi, arc->weight, fsm->sr); + _gfsm_automaton_paths_r(fsm, paths, which, arc->target, path); + + gfsm_path_pop(path, lo, hi); + path->w = w; + } + gfsm_arciter_close(&ai); + + return paths; +} + +/*====================================================================== + * Methods: Automaton Serialization: paths_to_strings() + */ + +//-------------------------------------------------------------- +GSList *gfsm_paths_to_strings(gfsmSet *paths, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style, + GSList *strings) +{ + gfsmPathsToStringsOptions opts = + { + abet_lo, + abet_hi, + sr, + warn_on_undefined, + att_style, + NULL + }; + + gfsm_set_foreach(paths, (GTraverseFunc)_gfsm_paths_to_strings_foreach_func, &opts); + + return g_slist_reverse(opts.strings); +} + +//-------------------------------------------------------------- +gboolean _gfsm_paths_to_strings_foreach_func(gfsmPath *path, + gpointer value_dummy, + gfsmPathsToStringsOptions *opts) +{ + GString *gs = gfsm_path_to_gstring(path, NULL, + opts->abet_lo, opts->abet_hi, opts->sr, + opts->warn_on_undefined, opts->att_style); + opts->strings = g_slist_prepend(opts->strings, gs->str); + g_string_free(gs,FALSE); + + return FALSE; +} + +//-------------------------------------------------------------- +GString *gfsm_path_to_gstring(gfsmPath *path, + GString *gs, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style) +{ + if (!gs) gs = g_string_new(""); + if (abet_lo && path->lo->len > 0) { + gfsm_alphabet_labels_to_gstring(abet_lo, path->lo, gs, warn_on_undefined, att_style); + } + if (abet_hi && path->hi->len > 0) { + g_string_append(gs," : "); + gfsm_alphabet_labels_to_gstring(abet_hi, path->hi, gs, warn_on_undefined, att_style); + } + if (gfsm_sr_compare(sr, path->w, sr->one) != 0) { + g_string_append_printf(gs," <%g>",path->w); + } + return gs; +} + +//-------------------------------------------------------------- +char *gfsm_path_to_string(gfsmPath *path, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style) +{ + GString *gs = gfsm_path_to_gstring(path,NULL,abet_lo,abet_hi,sr,warn_on_undefined,att_style); + char *s = gs->str; + g_string_free(gs,FALSE); + return s; +} + + +/*====================================================================== + * Methods: Viterbi trellis: paths + */ + +//-------------------------------------------------------------- +gfsmSet *gfsm_viterbi_trellis_paths_full(gfsmAutomaton *trellis, gfsmSet *paths, gfsmLabelSide which) +{ + gfsmArcIter ai; + + //-- sanity check: create path-set if given as NULL + if (!paths) { + paths = gfsm_set_new_full((GCompareDataFunc)gfsm_path_compare_data, + (gpointer)trellis->sr, + (GDestroyNotify)gfsm_path_free); + } + + //-- get & follow pseudo-root of all paths + for (gfsm_arciter_open(&ai, trellis, trellis->root_id); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + gfsmPath *path = gfsm_path_new(trellis->sr); + + _gfsm_viterbi_trellis_bestpath_r(trellis, path, which, arc->target); + path->w = arc->weight; + + //-- reverse the path we've created + gfsm_path_reverse(path); + + //-- ... and maybe insert it + if (gfsm_set_contains(paths,path)) { + //-- oops: we've already got this one: free it + gfsm_path_free(path); + } else { + //-- it's a bona-fide new path: insert it + gfsm_set_insert(paths,path); + } + } + + return paths; +} + +//-------------------------------------------------------------- +gfsmPath *gfsm_viterbi_trellis_bestpath_full(gfsmAutomaton *trellis, gfsmPath *path, gfsmLabelSide which) +{ + gfsmArcIter ai; + + //-- sanity check: create path if NULL + if (!path) { path = gfsm_path_new(trellis->sr); } + + //-- get & follow pseudo-root of best path + gfsm_arciter_open(&ai, trellis, trellis->root_id); + if (gfsm_arciter_ok(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + _gfsm_viterbi_trellis_bestpath_r(trellis, path, which, arc->target); + path->w = arc->weight; + } else { + path->w = trellis->sr->zero; + } + + //-- reverse the path we've created + gfsm_path_reverse(path); + + return path; +} + +//-------------------------------------------------------------- +void _gfsm_viterbi_trellis_bestpath_r(gfsmAutomaton *trellis, + gfsmPath *path, + gfsmLabelSide which, + gfsmStateId qid) +{ + while (TRUE) { + gfsmArcIter ai; + gfsm_arciter_open(&ai, trellis, qid); + + if (gfsm_arciter_ok(&ai)) { + gfsmArc *arc = gfsm_arciter_arc(&ai); + gfsm_path_push(path, + (which!=gfsmLSUpper ? arc->lower : gfsmEpsilon), + (which!=gfsmLSLower ? arc->upper : gfsmEpsilon), + trellis->sr->one, trellis->sr); + qid = arc->target; + } + else break; + } +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmPaths.h b/gfsm/gfsm/src/libgfsm/gfsmPaths.h new file mode 100644 index 0000000..27d83cf --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmPaths.h @@ -0,0 +1,240 @@ +/*=============================================================================*\ + * File: gfsmPaths.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmPaths.h + * \brief Path discovery & enumeration + */ + +#ifndef _GFSM_PATHS_H +#define _GFSM_PATHS_H + +#include <gfsmAutomaton.h> + +/*====================================================================== + * Types: paths + */ + +/// Type for an automaton path +typedef struct { + gfsmLabelVector *lo; /**< lower label sequence */ + gfsmLabelVector *hi; /**< upper label sequence */ + gfsmWeight w; /**< weight attached to this path */ +} gfsmPath; + + + +/*====================================================================== + * Methods: Path Utilities + */ + +///\name Path Utilities +//@{ + +//------------------------------ +/** Copy gfsmLabelVector. \returns \a dst */ +gfsmLabelVector *gfsm_label_vector_copy(gfsmLabelVector *dst, gfsmLabelVector *src); + +/** Duplicate a gfsmLabelVector. \returns \a dst */ +#define gfsm_label_vector_dup(src) \ + gfsm_label_vector_copy(g_ptr_array_sized_new(src->len), src) + +/** Reverse a gfsmLabelVector. \returns \a v */ +gfsmLabelVector *gfsm_label_vector_reverse(gfsmLabelVector *v); + +//------------------------------ +/** Create and return a new gfsmPath, specifying components + * If either of \a lo or \a hi are NULL, a new vector will be created. + */ +gfsmPath *gfsm_path_new_full(gfsmLabelVector *lo, gfsmLabelVector *hi, gfsmWeight w); + +/** Create and return a new empty gfsmPath, specifying semiring. */ +#define gfsm_path_new(sr) \ + gfsm_path_new_full(NULL,NULL,gfsm_sr_one(sr)) + +/** Create and return a new gfsmPath as a copy of an existing gfsmPath */ +gfsmPath *gfsm_path_new_copy(gfsmPath *p1); + +/** Create and return a new gfsmPath, appending to an existing path */ +gfsmPath *gfsm_path_new_append(gfsmPath *p1, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gfsmSemiring *sr); + +/** Create and return a new gfsmPath as a copy of an existing gfsmPath with weight multiplied by \a w */ +gfsmPath *gfsm_path_new_times_w(gfsmPath *p1, gfsmWeight w, gfsmSemiring *sr); + +/** Append an arc to a gfsmPath */ +void gfsm_path_push(gfsmPath *p, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gfsmSemiring *sr); + +/** Pop an arc from a gfsmPath */ +void gfsm_path_pop(gfsmPath *p, gfsmLabelVal lo, gfsmLabelVal hi); + +/** 3-way path comparison function. */ +gint gfsm_path_compare_data(const gfsmPath *p1, const gfsmPath *p2, gfsmSemiring *sr); + +/** Reverse a gfsmPath */ +gfsmPath *gfsm_path_reverse(gfsmPath *p); + +/** Destroy a gfsmPath */ +void gfsm_path_free(gfsmPath *p); +//@} + +/*====================================================================== + * Methods: Automaton Serialization + */ + +///\name Automaton Serialization +//@{ + +//------------------------------ +/** Serialize a gfsmAutomaton to a set of (gfsmPath*)s. + * Really just a wrapper for gfsm_automaton_paths_full() + * + * \param fsm Acyclic automaton to be serializd + * \param paths output set or NULL + * + * \returns \a paths if non-NULL, otherwise a new gfsmSet*. + */ +gfsmSet *gfsm_automaton_paths(gfsmAutomaton *fsm, gfsmSet *paths); + +/** Serialize a gfsmAutomaton to a set of (gfsmPath*)s. + * + * Causes deep recursion for cyclic automata. + * Returns a gfsmSet whose elements are (gfsmPath*)s. + * allocated with g_new(). It is the caller's responsibility to free the + * returned objects. + * + * \param fsm Acyclic automaton to be serializd + * \param which Which side of arc-labels to serialize + * \param paths output set or NULL + * + * \returns \a paths if non-NULL, otherwise a new gfsmSet*. + */ +gfsmSet *gfsm_automaton_paths_full(gfsmAutomaton *fsm, gfsmSet *paths, gfsmLabelSide which); + + +/** Recursive guts for gfsm_automaton_paths() */ +gfsmSet *_gfsm_automaton_paths_r(gfsmAutomaton *fsm, + gfsmSet *paths, + gfsmLabelSide which, + gfsmStateId q, + gfsmPath *path); + + +//------------------------------ +/** Convert a gfsmPathSet to a list of (char*)s. + * \a abet_lo and \a abet_hi should be (gfsmStringAlphabet*)s. + */ +GSList *gfsm_paths_to_strings(gfsmSet *paths, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style, + GSList *strings); + +/** \brief Utility struct for gfsm_paths_to_strings() */ +typedef struct gfsmPathsToStringsOptions_ { + gfsmAlphabet *abet_lo; ///< should be a gfsmStringAlphabet* + gfsmAlphabet *abet_hi; ///< should be a gfsmStringAlphabet* + gfsmSemiring *sr; ///< semiring for weight-based set sorting + gboolean warn_on_undefined; ///< warn on undefined symbols? + gboolean att_style; ///< use ATT-style output? + GSList *strings; ///< output list +} gfsmPathsToStringsOptions; + +/** backwards compatible type alias */ +#define _gfsm_paths_to_strings_options gfsmPathsToStringsOptions_ + +/** Utility for gfsm_paths_to_strings() */ +gboolean _gfsm_paths_to_strings_foreach_func(gfsmPath *path, + gpointer value_dummy, + gfsmPathsToStringsOptions *opts); + +/** Append string for a single gfsmPath* to a GString, + * which may be NULL to allocate a new string. + * \returns \a gs if non-NULL, otherwise a new GString*. + * \warning it is the caller's responsibility to free the returned GString*. + */ +GString *gfsm_path_to_gstring(gfsmPath *path, + GString *gs, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style); + +/** Allocate and return a new string (char*) for a single gfsmPath*. + * \returns new (char*) representing \a path. + * \warning it is the callers responsibility to free the returned \a char*. + */ +char *gfsm_path_to_string(gfsmPath *path, + gfsmAlphabet *abet_lo, + gfsmAlphabet *abet_hi, + gfsmSemiring *sr, + gboolean warn_on_undefined, + gboolean att_style); + + +//------------------------------ + +/** Extract upper side of all paths from a Viterbi trellis. + * + * Returns a gfsmSet whose elements are (gfsmPath*)s. + * allocated with g_new(). It is the caller's responsibility to free the + * returned objects. + * + * \returns \a paths if non-NULL, otherwise a new gfsmSet*. + */ +#define gfsm_viterbi_trellis_paths(trellis,paths) \ + gfsm_viterbi_trellis_paths_full((trellis),(paths),gfsmLSUpper) + + +/** Extract all paths from a Viterbi trellis. + * + * Returns a gfsmSet whose elements are (gfsmPath*)s. + * allocated with g_new(). It is the caller's responsibility to free the + * returned objects. + * + * \returns \a paths if non-NULL, otherwise a new gfsmSet*. + */ +gfsmSet *gfsm_viterbi_trellis_paths_full(gfsmAutomaton *trellis, gfsmSet *paths, gfsmLabelSide which); + + +/** Extract the upper-side of the best path from a Viterbi trellis. + * \returns \a path if non-NULL, otherwise a new gfsmPath*. + */ +#define gfsm_viterbi_trellis_bestpath(trellis,path) \ + gfsm_viterbi_trellis_bestpath_full((trellis),(path),gfsmLSUpper) + +/** Extract the best path from a Viterbi trellis. + * \returns \a path if non-NULL, otherwise a new gfsmPath*. + */ +gfsmPath *gfsm_viterbi_trellis_bestpath_full(gfsmAutomaton *trellis, gfsmPath *path, gfsmLabelSide which); + +/** Guts for gfsm_viterbi_trellis_*path*() */ +void _gfsm_viterbi_trellis_bestpath_r(gfsmAutomaton *trellis, + gfsmPath *path, + gfsmLabelSide which, + gfsmStateId qid); + +//@} + + +#endif /* _GFSM_PATHS_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmRegex.lex.h b/gfsm/gfsm/src/libgfsm/gfsmRegex.lex.h new file mode 100644 index 0000000..209d7de --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmRegex.lex.h @@ -0,0 +1,342 @@ +#ifndef gfsmRegex_yyHEADER_H +#define gfsmRegex_yyHEADER_H 1 +#define gfsmRegex_yyIN_HEADER 1 + +#line 6 "gfsmRegex.lex.h" + +#line 8 "gfsmRegex.lex.h" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 33 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; +#endif /* ! C99 */ + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +int gfsmRegex_yylex_init (yyscan_t* scanner); + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef unsigned int yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +void gfsmRegex_yyrestart (FILE *input_file ,yyscan_t yyscanner ); +void gfsmRegex_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE gfsmRegex_yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void gfsmRegex_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void gfsmRegex_yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void gfsmRegex_yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void gfsmRegex_yypop_buffer_state (yyscan_t yyscanner ); + +YY_BUFFER_STATE gfsmRegex_yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE gfsmRegex_yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE gfsmRegex_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); + +void *gfsmRegex_yyalloc (yy_size_t ,yyscan_t yyscanner ); +void *gfsmRegex_yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void gfsmRegex_yyfree (void * ,yyscan_t yyscanner ); + +/* Begin user sect3 */ + +#define yytext_ptr yytext_r + +#ifdef YY_HEADER_EXPORT_START_CONDITIONS +#define INITIAL 0 +#define STATE_ESCAPE 1 +#define STATE_BRACKETED 2 +#define STATE_BRACKETED_SEP 3 +#define STATE_BRACKETED_ESCAPE 4 +#define STATE_COMMENT 5 +#define STATE_WEIGHT 6 +#define STATE_UINT 7 + +#endif + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include <unistd.h> +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int gfsmRegex_yylex_destroy (yyscan_t yyscanner ); + +int gfsmRegex_yyget_debug (yyscan_t yyscanner ); + +void gfsmRegex_yyset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE gfsmRegex_yyget_extra (yyscan_t yyscanner ); + +void gfsmRegex_yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *gfsmRegex_yyget_in (yyscan_t yyscanner ); + +void gfsmRegex_yyset_in (FILE * in_str ,yyscan_t yyscanner ); + +FILE *gfsmRegex_yyget_out (yyscan_t yyscanner ); + +void gfsmRegex_yyset_out (FILE * out_str ,yyscan_t yyscanner ); + +int gfsmRegex_yyget_leng (yyscan_t yyscanner ); + +char *gfsmRegex_yyget_text (yyscan_t yyscanner ); + +int gfsmRegex_yyget_lineno (yyscan_t yyscanner ); + +void gfsmRegex_yyset_lineno (int line_number ,yyscan_t yyscanner ); + +YYSTYPE * gfsmRegex_yyget_lval (yyscan_t yyscanner ); + +void gfsmRegex_yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int gfsmRegex_yywrap (yyscan_t yyscanner ); +#else +extern int gfsmRegex_yywrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int gfsmRegex_yylex \ + (YYSTYPE * yylval_param ,yyscan_t yyscanner); + +#define YY_DECL int gfsmRegex_yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +#undef YY_NEW_FILE +#undef YY_FLUSH_BUFFER +#undef yy_set_bol +#undef yy_new_buffer +#undef yy_set_interactive +#undef YY_DO_BEFORE_ACTION + +#ifdef YY_DECL_IS_OURS +#undef YY_DECL_IS_OURS +#undef YY_DECL +#endif + +#line 195 "gfsmRegex.lex.l" + + +#line 341 "gfsmRegex.lex.h" +#undef gfsmRegex_yyIN_HEADER +#endif /* gfsmRegex_yyHEADER_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmRegex.lex.l b/gfsm/gfsm/src/libgfsm/gfsmRegex.lex.l new file mode 100644 index 0000000..d8b3e95 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmRegex.lex.l @@ -0,0 +1,201 @@ +/*====================================================================== + * Flex Options + */ +/*%option outfile="gfsmRegex.lex.c"*/ +%option header-file="gfsmRegex.lex.h" +%option outfile="lex.yy.c" +%option prefix="gfsmRegex_yy" +%option reentrant +%option 8bit +%option yylineno + +%option bison-bridge + +%{ +/*====================================================================== + * User C Header + */ + +#include <gfsmMem.h> +#include <gfsmRegexCompiler.h> + +#include "gfsmRegex.tab.h" + +#define my_compiler ((gfsmRegexCompiler*)yyextra) +#define my_gstr (my_compiler->gstr) +#define my_abet (my_compiler->abet) + +%} + +/*====================================================================== + * Flex Definitions + */ + +DIGIT [0-9] +SPACE [[:space:]] +WCHAR [^\<\[\\ \t\n\r\#] +BCHAR [^\\\]\ \t\n\r\=] +UTF8PREFIX "Ã" + +%x STATE_ESCAPE +%x STATE_BRACKETED +%x STATE_BRACKETED_SEP +%x STATE_BRACKETED_ESCAPE +%x STATE_COMMENT +%x STATE_WEIGHT +%x STATE_UINT + +/*====================================================================== + * Rules + */ +%% + +{UTF8PREFIX}+. { + yylval->gs = gfsm_gstring_new_bytes(yytext,yyleng); + return TOK_STRING; +} + +"\\" { BEGIN(STATE_ESCAPE); } + +"#" { BEGIN(STATE_COMMENT); } + +"[" { + BEGIN(STATE_BRACKETED); + return '['; +} + +"<" { BEGIN(STATE_WEIGHT); return '<'; } + +"(" { return '('; } +")" { return ')'; } + +"*" { return '*'; } +"+" { return '+'; } +"^" { BEGIN(STATE_UINT); return '|'; } +"?" { return '?'; } +"!" { return '!'; } +"|" { return '|'; } +"&" { return '&'; } +":" { return ':'; } +"@" { return '@'; } +"-" { return '-'; /*"/1"{return PROJ1;}*/ /*"/2"{return PROJ2;}*/ } + +"$" { return '$'; /* non-AT&T: determinize */ } +"%" { return '%'; /* non-AT&T: rmepsilon */ } +"~" { return '~'; /* non-AT&T: connect */ } + +{SPACE} { /* ignore */ } + +{WCHAR} { + yylval->c = yytext[0]; + return TOK_CHAR; +} + + + +<STATE_WEIGHT>([\+\-])?({DIGIT}*\.)?({DIGIT}+)([Ee]([\+\-]?){DIGIT}+)? { + yylval->w = strtod(yytext,NULL); + return TOK_WEIGHT; +} + +<STATE_WEIGHT>\> { + BEGIN(INITIAL); + return '>'; +} + + + +<STATE_UINT>{DIGIT}+ { + BEGIN(INITIAL); + yylval->u = strtol(yytext,NULL,0); + return TOK_UINT; +} + + +<STATE_COMMENT>[^\n]*\n { BEGIN(INITIAL); /* ignore comments */ } + + + + +<STATE_ESCAPE>. { + BEGIN(INITIAL); + yylval->c = yytext[0]; + return TOK_CHAR; +} + + +<STATE_BRACKETED>{UTF8PREFIX}+. { + g_string_append_len(my_gstr,yytext,yyleng); +} +<STATE_BRACKETED>{SPACE}*"]" { + unput(']'); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) { + yylval->gs = gfsm_gstring_dup(my_gstr); + return TOK_STRING; + } +} +<STATE_BRACKETED>{BCHAR}+ { + g_string_append(my_gstr, yytext); +} +<STATE_BRACKETED>({SPACE}*)"="({SPACE}*) { + unput('='); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) { + yylval->gs = gfsm_gstring_dup(my_gstr); + return TOK_STRING; + } +} +<STATE_BRACKETED>{SPACE}+ { + unput(' '); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) { + yylval->gs = gfsm_gstring_dup(my_gstr); + return TOK_STRING; + } +} +<STATE_BRACKETED>\\ { + BEGIN(STATE_BRACKETED_ESCAPE); +} + + +<STATE_BRACKETED_SEP>"]" { + BEGIN(INITIAL); + if (my_gstr->len) { + g_string_truncate(my_gstr,0); + return ']'; + } +} +<STATE_BRACKETED_SEP>"=" { + BEGIN(STATE_BRACKETED); + g_string_truncate(my_gstr,0); + return '='; +} +<STATE_BRACKETED_SEP>" " { + BEGIN(STATE_BRACKETED); + if (my_gstr->len) { + g_string_truncate(my_gstr,0); + return ' '; + } +} + + +<STATE_BRACKETED_ESCAPE>. { + BEGIN(STATE_BRACKETED); + g_string_append_c(my_gstr, yytext[0]); +} + + + +<*>. { + gfsm_scanner_carp(yyextra, "bad character '%s'", yytext); + return TOK_UNKNOWN; +} + +%% + +/*====================================================================== + * User C Code + */ + +GFSM_SCANNER_YYWRAP(gfsmRegex_yy) diff --git a/gfsm/gfsm/src/libgfsm/gfsmRegex.tab.h b/gfsm/gfsm/src/libgfsm/gfsmRegex.tab.h new file mode 100644 index 0000000..deebb3e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmRegex.tab.h @@ -0,0 +1,84 @@ +/* A Bison parser, made by GNU Bison 2.3. */ + +/* Skeleton interface for Bison's Yacc-like parsers in C + + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + TOK_UNKNOWN = 258, + TOK_CHAR = 259, + TOK_UINT = 260, + TOK_STRING = 261, + TOK_WEIGHT = 262, + CONCAT = 263, + LABCONCAT = 264, + WEIGHT = 265 + }; +#endif +/* Tokens. */ +#define TOK_UNKNOWN 258 +#define TOK_CHAR 259 +#define TOK_UINT 260 +#define TOK_STRING 261 +#define TOK_WEIGHT 262 +#define CONCAT 263 +#define LABCONCAT 264 +#define WEIGHT 265 + + + + +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE +#line 31 "gfsmRegex.tab.y" +{ + gfsmAutomaton *fsm; //-- automaton + GString *gs; //-- needs to be freed by hand + gchar c; + guint32 u; + gfsmWeight w; +} +/* Line 1489 of yacc.c. */ +#line 77 "gfsmRegex.tab.h" + YYSTYPE; +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +# define YYSTYPE_IS_TRIVIAL 1 +#endif + + + diff --git a/gfsm/gfsm/src/libgfsm/gfsmRegex.tab.y b/gfsm/gfsm/src/libgfsm/gfsmRegex.tab.y new file mode 100644 index 0000000..5bbb4aa --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmRegex.tab.y @@ -0,0 +1,149 @@ +/*====================================================================== + * Bison Options + */ +%pure_parser + +%{ +/*====================================================================== + * Bison C Header + */ +#include <gfsmRegexCompiler.h> +#include <gfsmAutomatonIO.h> +#include <string.h> + +#include "gfsmRegex.tab.h" +#include "gfsmRegex.lex.h" + +#define my_compiler ((gfsmRegexCompiler*)reparser) + +#define YYLEX_PARAM ((gfsmRegexCompiler*)reparser)->scanner.yyscanner +#define YYPARSE_PARAM reparser + +#define YYERROR_VERBOSE 1 +#define gfsmRegex_yyerror(msg) \ + gfsm_scanner_carp((gfsmScanner*)reparser, (msg)); + +%} + +/*====================================================================== + * Bison Definitions + */ +%union { + gfsmAutomaton *fsm; //-- automaton + GString *gs; //-- needs to be freed by hand + gchar c; + guint32 u; + gfsmWeight w; +} + +%token <c> TOK_UNKNOWN TOK_CHAR +%token <u> TOK_UINT +%token <gs> TOK_STRING +%token <w> TOK_WEIGHT + +%type <u> label +%type <w> weight +%type <fsm> regex + +/* +empty { $$=gfsm_regex_automaton_epsilon(my_compiler); } +*/ + +/* + | gfsmRETChar %prec LAB + { $$=gfsm_regex_automaton_lab(my_compiler, $1); } +*/ + +// -- Operator precedence and associativity +%left CONCAT +%left LABCONCAT +%left WEIGHT +%right '%' //-- non-AT&T: rmepsilon: % REGEX +%right '$' //-- non-AT&T: determinize: $ REGEX +%right '~' //-- non-AT&T: connect: ~ REGEX +%left '*' '+' '?' '^' +%right '!' +%left '@' +%left ':' +%left '-' +%left '&' +%left '|' + +/*====================================================================== + * Bison Rules + */ +%% + +regex: '('regex ')' + { $$=$2; } + + | label + { $$=gfsm_regex_compiler_label_fsm(my_compiler, $1); } + + | label regex %prec LABCONCAT + { $$=gfsm_regex_compiler_prepend_lab(my_compiler, $1, $2); } + + | regex regex %prec CONCAT + { $$=gfsm_regex_compiler_concat(my_compiler, $1, $2); } + + | '%' regex + { $$=gfsm_regex_compiler_rmepsilon(my_compiler, $2); /* non-ATT */ } + + | '$' regex + { $$=gfsm_regex_compiler_determinize(my_compiler, $2); /* non-ATT */ } + + | '~' regex + { $$=gfsm_regex_compiler_connect(my_compiler, $2); /* non-ATT */ } + + | regex '*' + { $$=gfsm_regex_compiler_closure(my_compiler,$1,FALSE); } + + | regex '+' + { $$=gfsm_regex_compiler_closure(my_compiler,$1,TRUE); } + + | regex '^' TOK_UINT + { $$=gfsm_regex_compiler_power(my_compiler,$1,$3); } + + | regex '?' + { $$=gfsm_regex_compiler_optional(my_compiler,$1); } + + | '!' regex + { $$=gfsm_regex_compiler_complement(my_compiler,$2); } + + | regex '|' regex + { $$=gfsm_regex_compiler_union(my_compiler,$1,$3); } + + | regex '&' regex + { $$=gfsm_regex_compiler_intersect(my_compiler,$1,$3); } + + | regex ':' regex + { $$=gfsm_regex_compiler_product(my_compiler,$1,$3); } + + | regex '@' regex + { $$=gfsm_regex_compiler_compose(my_compiler,$1,$3); } + + | regex '-' regex + { $$=gfsm_regex_compiler_difference(my_compiler,$1,$3); } + + | regex weight %prec WEIGHT + { $$=gfsm_regex_compiler_weight(my_compiler,$1,$2); } + ; + +label: TOK_CHAR + { $$=gfsm_regex_compiler_char2label(my_compiler, $1); } + + | TOK_STRING + { $$=gfsm_regex_compiler_gstring2label(my_compiler, $1); } + + | '[' TOK_STRING ']' + { $$=gfsm_regex_compiler_gstring2label(my_compiler, $2); } + ; + +weight: '<' TOK_WEIGHT '>' { $$=$2; } + ; + +%% + +/*====================================================================== + * User C Code + */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmRegexCompiler.c b/gfsm/gfsm/src/libgfsm/gfsmRegexCompiler.c new file mode 100644 index 0000000..c25ba2d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmRegexCompiler.c @@ -0,0 +1,315 @@ + +/*=============================================================================*\ + * File: gfsmRegexCompiler.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmRegexCompiler.h> +#include <gfsmArith.h> +#include <gfsmUtils.h> + +#include "gfsmRegex.tab.h" +#include "gfsmRegex.lex.h" + +extern int gfsmRegex_yyparse(gfsmRegexCompiler *rec); + +/*====================================================================== + * Regex Compiler: Constructors etc. + */ + +//-------------------------------------------------------------- +gfsmRegexCompiler *gfsm_regex_compiler_new_full(const gchar *name, + gfsmAlphabet *abet, + gfsmSRType srtype, + gboolean emit_warnings) +{ + gfsmRegexCompiler *rec = g_new0(gfsmRegexCompiler,1); + char *myname = (name ? ((char*)name) : "gfsmRegexCompiler"); + gfsm_scanner_init(&(rec->scanner), myname, gfsmRegex_yy); + rec->fsm = NULL; + rec->abet = abet; + rec->srtype = srtype; + rec->scanner.emit_warnings = emit_warnings; + rec->gstr = g_string_new(""); + return rec; +} + +//-------------------------------------------------------------- +void gfsm_regex_compiler_free(gfsmRegexCompiler *rec, gboolean free_alphabet, gboolean free_automaton) +{ + if (free_alphabet && rec->abet) gfsm_alphabet_free(rec->abet); + if (free_automaton && rec->fsm) gfsm_automaton_free(rec->fsm); + g_string_free(rec->gstr,TRUE); + gfsm_scanner_free(&(rec->scanner)); //-- ought to free the rest +} + +//-------------------------------------------------------------- +void gfsm_regex_compiler_reset(gfsmRegexCompiler *rec, gboolean free_automaton) +{ + if (free_automaton && rec->fsm) gfsm_automaton_free(rec->fsm); + g_string_truncate(rec->gstr,0); + rec->fsm = NULL; + g_clear_error(&(rec->scanner.err)); +} + +/*====================================================================== + * Regex Compiler: Methods + */ + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_parse(gfsmRegexCompiler *rec) +{ + gfsmRegex_yyparse(rec); + if (rec->scanner.err) { + if (rec->fsm) gfsm_automaton_free(rec->fsm); + rec->fsm = NULL; + } + return rec->fsm; +} + + + +/*====================================================================== + * Regex Compiler: Alphabet Utilities + */ + +//-------------------------------------------------------------- +gfsmLabelVal gfsm_regex_compiler_char2label(gfsmRegexCompiler *rec, gchar c) +{ + gchar cs[2] = {c,'\0'}; + gfsmLabelVal lab = gfsm_alphabet_find_label(rec->abet, cs); + if (lab==gfsmNoLabel) { + gfsm_scanner_carp(&(rec->scanner), + "Warning: no label for character '%c' in alphabet: using gfsmNoLabel", c); + g_clear_error(&(rec->scanner.err)); + } + return lab; +} + +//-------------------------------------------------------------- +gfsmLabelVal gfsm_regex_compiler_gstring2label(gfsmRegexCompiler *rec, GString *gs) +{ + gfsmLabelVal lab = gfsm_alphabet_find_label(rec->abet, gs->str); + if (lab==gfsmNoLabel) { + gfsm_scanner_carp(&(rec->scanner), + "Warning: no label for string '%s' in alphabet: using gfsmNoLabel", gs->str); + g_clear_error(&(rec->scanner.err)); + } + g_string_free(gs,TRUE); + return lab; +} + + +/*====================================================================== + * Regex Compiler: Automaton Utilities + */ +#define RETURN(rec,_fsm) (rec)->fsm=(_fsm); return (_fsm); + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_new_fsm(gfsmRegexCompiler *rec) +{ + gfsmAutomaton *fsm = gfsm_automaton_new_full(gfsmAutomatonDefaultFlags, + rec->srtype, + gfsmAutomatonDefaultSize); + fsm->flags.is_transducer = FALSE; + return fsm; +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_epsilon_fsm(gfsmRegexCompiler *rec) +{ + gfsmAutomaton *fsm = gfsm_regex_compiler_new_fsm(rec); + fsm->root_id = gfsm_automaton_add_state(fsm); + gfsm_automaton_set_final_state(fsm,fsm->root_id,TRUE); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_label_fsm(gfsmRegexCompiler *rec, gfsmLabelVal lab) +{ + gfsmAutomaton *fsm = gfsm_regex_compiler_new_fsm(rec); + gfsmStateId labid; + fsm->root_id = gfsm_automaton_add_state(fsm); + labid = gfsm_automaton_add_state(fsm); + gfsm_automaton_add_arc(fsm, fsm->root_id, labid, lab, lab, fsm->sr->one); + gfsm_automaton_set_final_state(fsm,labid,TRUE); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_concat(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2) +{ + gfsm_automaton_concat(fsm1, fsm2); + gfsm_automaton_free(fsm2); + RETURN(rec,fsm1); +} + + +//-------------------------------------------------------------- +struct gfsm_regex_append_lab_data_ { + gfsmAutomaton *fsm; + gfsmLabelVal lab; + gfsmStateId newid; +}; + +gboolean _gfsm_regex_append_lab_foreach_func(gfsmStateId qid, gpointer pw, + struct gfsm_regex_append_lab_data_ *data) +{ + gfsm_automaton_get_state(data->fsm,qid)->is_final = FALSE; + gfsm_automaton_add_arc(data->fsm, qid, data->newid, data->lab, data->lab, gfsm_ptr2weight(pw)); + return FALSE; +} + +gfsmAutomaton *gfsm_regex_compiler_append_lab(gfsmRegexCompiler *rec, gfsmAutomaton *fsm, gfsmLabelVal lab) +{ + struct gfsm_regex_append_lab_data_ data = { fsm, lab, gfsm_automaton_add_state(fsm) }; + gfsm_weightmap_foreach(fsm->finals, + (GTraverseFunc)_gfsm_regex_append_lab_foreach_func, + &data); + gfsm_weightmap_clear(fsm->finals); + gfsm_automaton_set_final_state(fsm, data.newid, TRUE); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_prepend_lab(gfsmRegexCompiler *rec, gfsmLabelVal lab, gfsmAutomaton *fsm) +{ + gfsmStateId qid = gfsm_automaton_add_state(fsm); + gfsm_automaton_add_arc(fsm, qid, fsm->root_id, lab, lab, fsm->sr->one); + fsm->root_id = qid; + RETURN(rec,fsm); +} + + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_closure(gfsmRegexCompiler *rec, gfsmAutomaton *fsm, gboolean is_plus) +{ + gfsm_automaton_closure(fsm,is_plus); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_power(gfsmRegexCompiler *rec, gfsmAutomaton *fsm, guint32 n) +{ + gfsm_automaton_n_closure(fsm,n); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_project(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm, + gfsmLabelSide which) +{ + gfsm_automaton_project(fsm,which); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_optional(gfsmRegexCompiler *rec, gfsmAutomaton *fsm) +{ + gfsm_automaton_optional(fsm); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_complement(gfsmRegexCompiler *rec, gfsmAutomaton *fsm) +{ + gfsm_automaton_complement_full(fsm,rec->abet); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_union(gfsmRegexCompiler *rec, gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsm_automaton_union(fsm1,fsm2); + gfsm_automaton_free(fsm2); + RETURN(rec,fsm1); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_intersect(gfsmRegexCompiler *rec, gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsm_automaton_intersect(fsm1,fsm2); + gfsm_automaton_free(fsm2); + RETURN(rec,fsm1); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_product(gfsmRegexCompiler *rec, gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsm_automaton_product2(fsm1,fsm2); + gfsm_automaton_free(fsm2); + RETURN(rec,fsm1); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_compose(gfsmRegexCompiler *rec, gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsm_automaton_compose(fsm1,fsm2); + gfsm_automaton_free(fsm2); + RETURN(rec,fsm1); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_difference(gfsmRegexCompiler *rec, gfsmAutomaton *fsm1, gfsmAutomaton *fsm2) +{ + gfsm_automaton_difference(fsm1,fsm2); + gfsm_automaton_free(fsm2); + RETURN(rec,fsm1); +} + +//-------------------------------------------------------------- +/** Weight */ +gfsmAutomaton *gfsm_regex_compiler_weight(gfsmRegexCompiler *rec, gfsmAutomaton *fsm, gfsmWeight w) +{ + gfsm_automaton_arith_final(fsm, gfsmAOSRTimes, w, FALSE); + RETURN(rec,fsm); +} + + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_rmepsilon(gfsmRegexCompiler *rec, gfsmAutomaton *fsm) +{ + gfsm_automaton_rmepsilon(fsm); + //gfsm_automaton_connect(fsm); + //gfsm_automaton_renumber_states(fsm); + RETURN(rec,fsm); +} + + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_determinize(gfsmRegexCompiler *rec, gfsmAutomaton *fsm) +{ + gfsm_automaton_determinize(fsm); + RETURN(rec,fsm); +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_compiler_connect(gfsmRegexCompiler *rec, gfsmAutomaton *fsm) +{ + gfsm_automaton_connect(fsm); + gfsm_automaton_renumber_states(fsm); + RETURN(rec,fsm); +} + +#undef RETURN + diff --git a/gfsm/gfsm/src/libgfsm/gfsmRegexCompiler.h b/gfsm/gfsm/src/libgfsm/gfsmRegexCompiler.h new file mode 100644 index 0000000..f88c6fc --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmRegexCompiler.h @@ -0,0 +1,223 @@ + +/*=============================================================================*\ + * File: gfsmRegexCompiler.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmRegexCompiler.h + * \brief Regular expression compiler + * + * \file gfsmRegex.lex.h + * \brief flex-generated lexer headers for gfsmRegexCompiler + * + * \file gfsmRegex.tab.h + * \brief bison-generated parser headers for gfsmRegexCompiler + * + * \union YYSTYPE + * \brief bison-generated parser rule-value union + * + * \struct yy_buffer_state + * \brief flex-generated lexer input buffer state struct + */ + +#ifndef _GFSM_REGEX_COMPILER_H +#define _GFSM_REGEX_COMPILER_H + +#include <gfsmScanner.h> +#include <gfsmAlgebra.h> + +/*====================================================================== + * Types + */ + +/** \brief Type for a regular expression compiler */ +typedef struct { + gfsmScanner scanner; ///< underlying scanner + gfsmSRType srtype; ///< semiring type + gfsmAutomaton *fsm; ///< regex automaton under construction + gfsmAlphabet *abet; ///< alphabet + GString *gstr; ///< string buffer +} gfsmRegexCompiler; + +/*====================================================================== + * Regex Compiler: Constructors etc. + */ +///\name Regex Compiler: Constructors etc. +//@{ + +/** Create and return a new gfsmRegexCompiler */ +gfsmRegexCompiler *gfsm_regex_compiler_new_full(const gchar *name, + gfsmAlphabet *abet, + gfsmSRType srtype, + gboolean emit_warnings); + +/** Create and return a new gfsmRegexCompiler, no alphabet */ +#define gfsm_regex_compiler_new() \ + gfsm_regex_compiler_new_full("gfsmRegexCompiler", NULL, gfsmAutomatonDefaultSRType, TRUE); + +/** Destroy a gfsmRegexCompiler. + * \param free_automaton whether to free the stored alphabet, if present + * \param free_automaton whether to free the parsed automaton, if present + */ +void gfsm_regex_compiler_free(gfsmRegexCompiler *rec, gboolean free_alphabet, gboolean free_automaton); + +/** Reset regex compiler; possibly freeing associated automaton */ +void gfsm_regex_compiler_reset(gfsmRegexCompiler *rec, gboolean free_automaton); + +//@} + +/*====================================================================== + * Regex Compiler: Methods + */ +//@{ + +/** + * Parse an automaton from the currently selected input source. + * \returns parsed automaton, or \a NULL on error + */ +gfsmAutomaton *gfsm_regex_compiler_parse(gfsmRegexCompiler *rec); + +//@} + +/*====================================================================== + * Regex Compiler: Alphabet Utilities + */ +///\name Regex Compiler: Alphabet Utilities +//@{ + +/** Get a label value for a single character */ +gfsmLabelVal gfsm_regex_compiler_char2label(gfsmRegexCompiler *rec, gchar c); + +/** Get a label value for a GString* (implicitly frees \a gs) */ +gfsmLabelVal gfsm_regex_compiler_gstring2label(gfsmRegexCompiler *rec, GString *gs); + +//@} + +/*====================================================================== + * Regex Compiler: Automaton Utilities + */ +///\name Regex Compiler: Automaton Utilities +//@{ + +//-------------------------------------------------------------- +/** New full-fleded automaton */ +gfsmAutomaton *gfsm_regex_compiler_new_fsm(gfsmRegexCompiler *rec); + +/** New Epsilon recognizer */ +gfsmAutomaton *gfsm_regex_compiler_epsilon_fsm(gfsmRegexCompiler *rec); + +/** New single-character recognizer */ +gfsmAutomaton *gfsm_regex_compiler_label_fsm(gfsmRegexCompiler *rec, gfsmLabelVal lab); + + +//-------------------------------------------------------------- +/** Single-label concatenation: (low-level): append */ +gfsmAutomaton *gfsm_regex_compiler_append_lab(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm, + gfsmLabelVal lab); + +/** Single-label concatenation (low-level): prepend */ +gfsmAutomaton *gfsm_regex_compiler_prepend_lab(gfsmRegexCompiler *rec, + gfsmLabelVal lab, + gfsmAutomaton *fsm); + +/** General concatenation */ +gfsmAutomaton *gfsm_regex_compiler_concat(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2); + +//-------------------------------------------------------------- +/** Closure */ +gfsmAutomaton *gfsm_regex_compiler_closure(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm, + gboolean is_plus); + +/** Power (n-ary closure) */ +gfsmAutomaton *gfsm_regex_compiler_power(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm, + guint32 n); + +/** Optionality */ +gfsmAutomaton *gfsm_regex_compiler_optional(gfsmRegexCompiler *rec, gfsmAutomaton *fsm); + +//-------------------------------------------------------------- +/** Projection */ +gfsmAutomaton *gfsm_regex_compiler_project(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm, + gfsmLabelSide which); + +//-------------------------------------------------------------- +/** Complement */ +gfsmAutomaton *gfsm_regex_compiler_complement(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm); + +//-------------------------------------------------------------- +/** Union */ +gfsmAutomaton *gfsm_regex_compiler_union(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2); + +//-------------------------------------------------------------- +/** Intersection */ +gfsmAutomaton *gfsm_regex_compiler_intersect(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2); + +//-------------------------------------------------------------- +/** Product */ +gfsmAutomaton *gfsm_regex_compiler_product(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2); + +//-------------------------------------------------------------- +/** Composition */ +gfsmAutomaton *gfsm_regex_compiler_compose(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2); + +//-------------------------------------------------------------- +/** Difference */ +gfsmAutomaton *gfsm_regex_compiler_difference(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmAutomaton *fsm2); + +//-------------------------------------------------------------- +/** Weight (final) */ +gfsmAutomaton *gfsm_regex_compiler_weight(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm1, + gfsmWeight w); + +//-------------------------------------------------------------- +/** Remove epsilons */ +gfsmAutomaton *gfsm_regex_compiler_rmepsilon(gfsmRegexCompiler *rec, gfsmAutomaton *fsm); + + +//-------------------------------------------------------------- +/** Determinize */ +gfsmAutomaton *gfsm_regex_compiler_determinize(gfsmRegexCompiler *rec, gfsmAutomaton *fsm); + +//-------------------------------------------------------------- +/** Connect */ +gfsmAutomaton *gfsm_regex_compiler_connect(gfsmRegexCompiler *rec, gfsmAutomaton *fsm); + +//@} + + +#endif /* _GFSM_REGEX_COMPILER_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmScanner.c b/gfsm/gfsm/src/libgfsm/gfsmScanner.c new file mode 100644 index 0000000..afa2632 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmScanner.c @@ -0,0 +1,246 @@ + +/*=============================================================================*\ + * File: gfsmScanner.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmScanner.h> +#include <gfsmUtils.h> +#include <string.h> + +/*====================================================================== + * Constants + */ +const char *gfsmScannerDefaultName = "gfsmScanner"; + +const char *gfsmScannerDefaultFilename = "input"; + +/*====================================================================== + * gfsmScanner: Constructors etc. + */ + +//-------------------------------------------------------------- +gfsmScanner *gfsm_scanner_new_full(const char *name, + gfsmFlexScannerInitFunc yyinit_func, + gfsmFlexScannerFreeFunc yyfree_func, + gfsmFlexScannerScanFileFunc yyscan_file_func, + gfsmFlexScannerScanBytesFunc yyscan_bytes_func, + gfsmFlexScannerScanPopFunc yyscan_pop_func, + gfsmFlexScannerGetTextFunc yyget_text_func, + gfsmFlexScannerGetPosFunc yyget_lineno_func, + gfsmFlexScannerSetPosFunc yyset_lineno_func, + gfsmFlexScannerSetExtraFunc yyset_extra_func) +{ + return gfsm_scanner_init_full(g_new0(gfsmScanner,1), + name, + yyinit_func, + yyfree_func, + yyscan_file_func, + yyscan_bytes_func, + yyscan_pop_func, + yyget_text_func, + yyget_lineno_func, + yyset_lineno_func, + yyset_extra_func); +} + +//-------------------------------------------------------------- +gfsmScanner *gfsm_scanner_init_full(gfsmScanner *scanner, + const char *name, + gfsmFlexScannerInitFunc yyinit_func, + gfsmFlexScannerFreeFunc yyfree_func, + gfsmFlexScannerScanFileFunc yyscan_file_func, + gfsmFlexScannerScanBytesFunc yyscan_bytes_func, + gfsmFlexScannerScanPopFunc yyscan_pop_func, + gfsmFlexScannerGetTextFunc yyget_text_func, + gfsmFlexScannerGetPosFunc yyget_lineno_func, + gfsmFlexScannerSetPosFunc yyset_lineno_func, + gfsmFlexScannerSetExtraFunc yyset_extra_func) +{ + scanner->data = NULL; + scanner->name = (char *)name; + scanner->filename = NULL; + scanner->err = NULL; + + //-- funcs: init, free + scanner->yyfree_func = yyfree_func; + + //-- data & funcs: buffer switching + scanner->infile = NULL; + scanner->yyscan_file_func = yyscan_file_func; + scanner->yyscan_bytes_func = yyscan_bytes_func; + scanner->yyscan_pop_func = yyscan_pop_func; + + //-- funcs: text + scanner->yyget_text_func = yyget_text_func; + + //-- funcs: position + scanner->yyget_lineno_func = yyget_lineno_func; + scanner->yyset_lineno_func = yyset_lineno_func; + + //-- initialize underlying scanner + if (yyinit_func) (*yyinit_func)(&(scanner->yyscanner)); + + //-- set extra data + if (yyset_extra_func) (*yyset_extra_func)(scanner, scanner->yyscanner); + + return scanner; +} + +//-------------------------------------------------------------- +void gfsm_scanner_free(gfsmScanner *scanner) +{ + gfsm_scanner_close(scanner); + if (scanner->yyfree_func) (*(scanner->yyfree_func))(scanner->yyscanner); + g_clear_error(&(scanner->err)); + g_free(scanner); +} + + +/*====================================================================== + * gfsmScanner: I/O Selection + */ + +//-------------------------------------------------------------- +void gfsm_scanner_close(gfsmScanner *scanner) +{ + (*(scanner->yyscan_pop_func))(scanner->yyscanner); + if (scanner->infile) { + fclose(scanner->infile); + scanner->infile = NULL; + } + if (scanner->filename) { + g_free(scanner->filename); + scanner->filename = NULL; + } +} + +//-------------------------------------------------------------- +void gfsm_scanner_scan_file(gfsmScanner *scanner, FILE *f) +{ + gfsm_scanner_close(scanner); + (*(scanner->yyscan_file_func))(f, scanner->yyscanner); + (*(scanner->yyset_lineno_func))(1, scanner->yyscanner); +} + +//-------------------------------------------------------------- +void gfsm_scanner_scan_filename(gfsmScanner *scanner, const char *filename) +{ + g_clear_error(&(scanner->err)); + FILE *f = gfsm_open_filename(filename, "r", &(scanner->err)); + gfsm_scanner_scan_file(scanner,f); + scanner->filename = g_strdup(filename); + scanner->infile = f; +} + +//-------------------------------------------------------------- +void gfsm_scanner_scan_bytes(gfsmScanner *scanner, const char *bytes, int len) +{ + gfsm_scanner_close(scanner); + (*(scanner->yyscan_bytes_func))(bytes, len, scanner->yyscanner); + (*(scanner->yyset_lineno_func))(1, scanner->yyscanner); +} + +//-------------------------------------------------------------- +void gfsm_scanner_scan_gstring(gfsmScanner *scanner, GString *gstr) +{ + gfsm_scanner_scan_bytes(scanner, gstr->str, gstr->len); +} + +//-------------------------------------------------------------- +void gfsm_scanner_scan_string(gfsmScanner *scanner, const char *str) +{ + gfsm_scanner_scan_bytes(scanner, str, strlen(str)); +} + + + +/*====================================================================== + * gfsmScanner: Flex utilities + */ + +//-------------------------------------------------------------- +int gfsm_scanner_yywrap(gfsmScanner *scanner) +{ return 1; } + + +/*====================================================================== + * gfsmScanner: error reporting + */ + +//-------------------------------------------------------------- +void gfsm_scanner_carp(gfsmScanner *scanner, const char *fmt, ...) +{ + va_list ap; + va_start(ap,fmt); + gfsm_scanner_carp_full_v(scanner, + g_quark_from_static_string("gfsm"), + g_quark_from_static_string("scanner_error"), + fmt, ap); + va_end(ap); +} + +//-------------------------------------------------------------- +void gfsm_scanner_carp_full(gfsmScanner *scanner, + GQuark domain, + gint code, + const char *fmt, + ...) +{ + va_list ap; + va_start(ap,fmt); + gfsm_scanner_carp_full_v(scanner,domain,code,fmt,ap); + va_end(ap); +} + +//-------------------------------------------------------------- +void gfsm_scanner_carp_full_v(gfsmScanner *scanner, + GQuark domain, + gint code, + const char *fmt, + va_list ap) +{ + char *msg = g_strdup_vprintf(fmt, ap); + + if (scanner->err) { + g_error_free(scanner->err); + scanner->err = NULL; + } + + g_set_error(&(scanner->err), domain, code, + "%s: %s in %s%s%s at line %u%s%s%s", + (scanner->name ? scanner->name : gfsmScannerDefaultName), + msg, + (scanner->filename ? "file \"" : ""), + (scanner->filename ? scanner->filename : gfsmScannerDefaultFilename), + (scanner->filename ? "\"" : ""), + (scanner->yyget_lineno_func ? (*(scanner->yyget_lineno_func))(scanner->yyscanner) : 0), + //-- + (scanner->yyget_text_func ? ", near \"" : ""), + (scanner->yyget_text_func ? (*(scanner->yyget_text_func))(scanner->yyscanner) : ""), + (scanner->yyget_text_func ? "\"" : "") + ); + + if (scanner->emit_warnings) { + fprintf(stderr, "%s\n", scanner->err->message); + } + + g_free(msg); +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmScanner.h b/gfsm/gfsm/src/libgfsm/gfsmScanner.h new file mode 100644 index 0000000..d587486 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmScanner.h @@ -0,0 +1,262 @@ +/*=============================================================================*\ + * File: gfsmScanner.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmScanner.h + * \brief flex scanner utilities + */ + +#ifndef _GFSM_SCANNER_H +#define _GFSM_SCANNER_H + +#include <stdio.h> +#include <stdarg.h> + +#include <gfsmError.h> + +/*====================================================================== + * Types + */ + +//------------------------------------------------------ +/// Opaque type for generic flex lexers +typedef void* gfsmFlexScanner; + +//------------------------------------------------------ +/// Opaque type for generic flex lexer buffers +typedef void* gfsmFlexBufferState; + +//------------------------------------------------------ +/// typedef for flex scanner init() functions +typedef int (*gfsmFlexScannerInitFunc) (gfsmFlexScanner yyscanner); + +/// typedef for flex scanner destroy() functions +typedef int (*gfsmFlexScannerFreeFunc) (gfsmFlexScanner yyscanner); + +//------------------------------------------------------ +/// typedef for flex scanner lineno() and column() accessors +typedef int (*gfsmFlexScannerGetPosFunc) (gfsmFlexScanner yyscanner); + +/// typedef for flex scanner lineno() and column() manipulators +typedef void (*gfsmFlexScannerSetPosFunc) (int pos, gfsmFlexScanner yyscanner); + +//------------------------------------------------------ +/// typedef for flex scanner set_extra() function +typedef void (*gfsmFlexScannerSetExtraFunc) (void *extra, gfsmFlexScanner yyscanner); + +/// typedef for flex scanner get_extra() function +typedef void* (*gfsmFlexScannerGetExtraFunc) (gfsmFlexScanner yyscanner); + +//------------------------------------------------------ +/// typedef for flex scanner get_text() function +typedef char* (*gfsmFlexScannerGetTextFunc) (gfsmFlexScanner yyscanner); + +//------------------------------------------------------ +/// typedef for flex scanner restart() function +typedef void (*gfsmFlexScannerScanFileFunc) (FILE *in, gfsmFlexScanner yyscanner); + +/// typedef for flex scanner scan_bytes() function +typedef void (*gfsmFlexScannerScanBytesFunc) (const char *str, int len, gfsmFlexScanner yyscanner); + +/// typedef for flex scanner pop_buffer_state() function +typedef void (*gfsmFlexScannerScanPopFunc) (gfsmFlexScanner yyscanner); + + +//------------------------------------------------------ + +/** \brief Extra data struct for generic flex scanners. + * \detail To use, add the line + * \code #define YY_EXTRA_TYPE gfsmScannerExtra* + * to scanner.l + */ +typedef struct { + gfsmFlexScanner yyscanner; ///< underlying flex scanner + void *data; ///< user data + char *name; ///< name of this scanner, for errors & warnings; may be NULL + FILE *infile; ///< current input file if we opened it ourselves, else NULL + char *filename; ///< name of input file or NULL + gfsmError *err; ///< Holds scanner error if something goes wrong + gboolean emit_warnings; ///< write warnings to stderr + + //-- funcs: init, free + gfsmFlexScannerFreeFunc yyfree_func; ///< ${PREFIX}lex_destroy(yyscanner) + + //-- funcs & data: buffer switching + gfsmFlexScannerScanFileFunc yyscan_file_func; ///< ${PREFIX}restart(FILE*,yyscanner) + gfsmFlexScannerScanBytesFunc yyscan_bytes_func; ///< ${PREFIX}_scan_bytes(bytes,len,yyscanner) + gfsmFlexScannerScanPopFunc yyscan_pop_func; ///< ${PREFIX}pop_buffer_state(yyscanner) + + //-- funcs: text + gfsmFlexScannerGetTextFunc yyget_text_func; ///< ${PREFIX}get_text(yyscanner) + + //-- funcs: position + gfsmFlexScannerGetPosFunc yyget_lineno_func; ///< ${PREFIX}get_lineno(yyscanner) + gfsmFlexScannerSetPosFunc yyset_lineno_func; ///< ${PREFIX}set_lineno(yyscanner) + + //-- funcs: extra data + gfsmFlexScannerSetExtraFunc yyset_extra_func; ///< ${PREFIX}get_extra(yyscanner) + +} gfsmScanner; + + +/*====================================================================== + * Constants + */ +/** Default scanner name */ +extern const char *gfsmScannerDefaultName; + +/** Default scanner filename */ +extern const char *gfsmScannerDefaultFilename; + +/*====================================================================== + * gfsmScanner: Constructors etc. + */ + +/// \name gfsmScanner: Constructors etc. +//@{ + + +/** Initialize and return a gfsmScanner given the flex prefix. + * Scanner is initialized for a reentrant flex scanner with prefix PREFIX. + */ +#define gfsm_scanner_init(scanner,name,PREFIX) \ + gfsm_scanner_init_full((scanner), \ + (name), \ + (gfsmFlexScannerInitFunc) ( PREFIX ## lex_init ), \ + (gfsmFlexScannerFreeFunc) ( PREFIX ## lex_destroy ), \ + (gfsmFlexScannerScanFileFunc) ( PREFIX ## restart ), \ + (gfsmFlexScannerScanBytesFunc) ( PREFIX ## _scan_bytes ), \ + (gfsmFlexScannerScanPopFunc) ( PREFIX ## pop_buffer_state ), \ + (gfsmFlexScannerGetTextFunc) ( PREFIX ## get_text ), \ + (gfsmFlexScannerGetPosFunc) ( PREFIX ## get_lineno ), \ + (gfsmFlexScannerSetPosFunc) ( PREFIX ## set_lineno ), \ + (gfsmFlexScannerSetExtraFunc) ( PREFIX ## set_extra ) ) + +/** Create, initialize, and return a new gfsmScanner given the a flex prefix. + * Scanner is initialized for a reentrant flex scanner with prefix PREFIX. + */ +#define gfsm_scanner_new(name,PREFIX) \ + gfsm_scanner_init( g_new0(gfsmScanner,1), (name), PREFIX ) + + +/** Create, initialize, and return a new gfsmFlexScanner \a scanner. + * Underlying flex scanner will be available as \a scanner->yyscanner. + */ +gfsmScanner *gfsm_scanner_new_full(const char *name, + gfsmFlexScannerInitFunc yyinit_func, + gfsmFlexScannerFreeFunc yyfree_func, + gfsmFlexScannerScanFileFunc yyscan_file_func, + gfsmFlexScannerScanBytesFunc yyscan_bytes_func, + gfsmFlexScannerScanPopFunc yyscan_pop_func, + gfsmFlexScannerGetTextFunc yyget_text_func, + gfsmFlexScannerGetPosFunc yyget_lineno_func, + gfsmFlexScannerSetPosFunc yyset_lineno_func, + gfsmFlexScannerSetExtraFunc yyset_extra_func); + + + +/** Initialize a gfsmScanner. */ +gfsmScanner *gfsm_scanner_init_full(gfsmScanner *scanner, + const char *name, + gfsmFlexScannerInitFunc yyinit_func, + gfsmFlexScannerFreeFunc yyfree_func, + gfsmFlexScannerScanFileFunc yyscan_file_func, + gfsmFlexScannerScanBytesFunc yyscan_bytes_func, + gfsmFlexScannerScanPopFunc yyscan_pop_func, + gfsmFlexScannerGetTextFunc yyget_text_func, + gfsmFlexScannerGetPosFunc yyget_lineno_func, + gfsmFlexScannerSetPosFunc yyset_lineno_func, + gfsmFlexScannerSetExtraFunc yyset_extra_func); + +/** Frees memory associated with a gfsmScanner. + * Calls \a scanner->yyfree_func() to destroy the underlying flex scanner. + */ +void gfsm_scanner_free(gfsmScanner *scanner); + + +/*====================================================================== + * gfsmScanner: I/O Selection + */ + +/// \name gfsmScanner: I/O Selection +//@{ + +/** Close any file associated with a scanner */ +void gfsm_scanner_close(gfsmScanner *scanner); + +/** Scan from an open FILE* */ +void gfsm_scanner_scan_file(gfsmScanner *scanner, FILE *f); + +/** Scan from a named file */ +void gfsm_scanner_scan_filename(gfsmScanner *scanner, const char *filename); + +/** Scan an in-memory buffer */ +void gfsm_scanner_scan_bytes(gfsmScanner *scanner, const char *bytes, int len); + +/** Scan from a GString* */ +void gfsm_scanner_scan_gstring(gfsmScanner *scanner, GString *gstr); + +/** Scan a NUL-terminated string */ +void gfsm_scanner_scan_string(gfsmScanner *scanner, const char *str); + +//@} + +/*====================================================================== + * Scanner Methods: Flex scanner utilities + */ +/// \name gfsmScanner: Flex Scanner Utilities +//@{ + +/** Can be used as an input wrapper; doesn't wrap input at all */ +int gfsm_scanner_yywrap(gfsmScanner *scanner); + +/** Use default yywrap */ +#define GFSM_SCANNER_YYWRAP(PREFIX) \ + int PREFIX ## wrap(gfsmFlexScanner yyscanner) \ + { return gfsm_scanner_yywrap( yyscanner ); } + +//@} + +/*====================================================================== + * Scanner Methods: Errors + */ +/// \name gfsmScanner: Error Reporting +//@{ + +/** Wrapper for gfsm_scanner_carp_full_v(); sets \a scanner->err */ +void gfsm_scanner_carp(gfsmScanner *scanner, const char *fmt, ...); + +/** Wrapper for gfsm_scanner_carp_full_v(); sets \a scanner->err */ +void gfsm_scanner_carp_full(gfsmScanner *scanner, + GQuark domain, + gint code, + const char *fmt, + ...); + +/** Warning function - sets \a scanner->err */ +void gfsm_scanner_carp_full_v(gfsmScanner *scanner, + GQuark domain, + gint code, + const char *fmt, + va_list ap); +//@} + +#endif /* _GFSM_SCANNER_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmSemiring.c b/gfsm/gfsm/src/libgfsm/gfsmSemiring.c new file mode 100644 index 0000000..a1620dc --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmSemiring.c @@ -0,0 +1,104 @@ +/*=============================================================================*\ + * File: gfsmSemiring.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmSemiring.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmSemiring.hi> +#endif + +/*====================================================================== + * Semiring: methods: constructors etc. + */ + + + +/*====================================================================== + * Semiring: methods: general accessors + */ + +/*-------------------------------------------------------------- + * compare() + */ +gint gfsm_sr_compare(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y) +{ + switch (sr->type) { + case gfsmSRTLog: + case gfsmSRTTropical: return (x < y ? -1 : (x > y ? 1 : 0)); + case gfsmSRTTrivial: return 0; + + case gfsmSRTPLog: return (x < y ? 1 : (x > y ? -1 : 0)); + + case gfsmSRTUser: + return (gfsm_sr_compare(sr,x,y) ? -1 : (gfsm_sr_compare(sr,y,x) ? 1 : 0)); + if (((gfsmUserSemiring*)sr)->less_func) { + if ((*((gfsmUserSemiring*)sr)->less_func)(sr,x,y)) return -1; + if ((*((gfsmUserSemiring*)sr)->less_func)(sr,y,x)) return 1; + return 0; + } + + case gfsmSRTBoolean: + case gfsmSRTReal: + default: return (x > y ? -1 : (x < y ? 1 : 0)); + } + return 0; //-- should never happen +} + +/*====================================================================== + * Semiring: string utilities + */ + +/*-------------------------------------------------------------- + * name_to_type() + */ +gfsmSRType gfsm_sr_name_to_type(const char *name) +{ + if (strcmp(name,"boolean") ==0) return gfsmSRTBoolean; + else if (strcmp(name,"log") ==0) return gfsmSRTLog; + else if (strcmp(name,"plog") ==0) return gfsmSRTPLog; + else if (strcmp(name,"real") ==0) return gfsmSRTReal; + else if (strcmp(name,"trivial") ==0) return gfsmSRTTrivial; + else if (strcmp(name,"tropical") ==0) return gfsmSRTTropical; + else if (strcmp(name,"user") ==0) return gfsmSRTUser; + return gfsmSRTUnknown; +} + +/*-------------------------------------------------------------- + * type_to_name() + */ +gchar *gfsm_sr_type_to_name(gfsmSRType type) +{ + switch (type) { + case gfsmSRTBoolean: return "boolean"; + case gfsmSRTLog: return "log"; + case gfsmSRTPLog: return "plog"; + case gfsmSRTTrivial: return "trivial"; + case gfsmSRTTropical: return "tropical"; + case gfsmSRTReal: return "real"; + default: return "unknown"; + } +} + +/*====================================================================== + * Semiring: general utilities + */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmSemiring.h b/gfsm/gfsm/src/libgfsm/gfsmSemiring.h new file mode 100644 index 0000000..e03b5ce --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmSemiring.h @@ -0,0 +1,183 @@ +/*=============================================================================*\ + * File: gfsmSemiring.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmSemiring.h + * \brief semiring types & operations + */ + +#ifndef _GFSM_SEMIRING_H +#define _GFSM_SEMIRING_H + +#include <gfsmCommon.h> +#include <float.h> + +/*====================================================================== + * Semiring: types + */ +/** Builtin semiring types + * \see fsmcost(3) + */ +typedef enum { + gfsmSRTUnknown = 0, ///< unknown semiring (should never happen) + gfsmSRTBoolean = 1, ///< boolean semiring <set:{0,1}, plus:||, times:&&, less:>, zero:0, one:1> + gfsmSRTLog = 2, ///< negative log semiring <set:[-inf,inf], plus:-log(e^-x+e^-y), times:+, less:<, zero:inf, one:0> + gfsmSRTReal = 3, ///< real semiring: <set:[0,inf], plus:+, times:*, less:<, zero:0, one:1> + gfsmSRTTrivial = 4, ///< trivial semiring <set:{0}, plus:+, times:+, less:!=, zero:0, one:0> + gfsmSRTTropical = 5, ///< tropical semiring: <set:[-inf,inf], plus:min, times:+, less:<, zero:inf, one:0> + gfsmSRTPLog = 6, ///< positive log semiring <set:[-inf,inf], plus:log(e^x+e^y), times:+, less:>, zero:-inf, one:0> + gfsmSRTUser = 256 ///< user-defined semiring +} gfsmSRType; + +/*====================================================================== + * Semiring: types: structs + */ +/// struct to represent a builtin semi-ring for gfsm arc weights +typedef struct { + gfsmSRType type; /**< type of this semiring */ + gfsmWeight zero; /**< nil element of this semiring (identity for '+') */ + gfsmWeight one; /**< unity element of this semiring (idendity for '*') */ +} gfsmSemiring; + +/*====================================================================== + * Semiring: types: functions + */ +/** Type for user-defined semiring unary predicates (i.e. member) */ +typedef gboolean (*gfsmSRUnaryPredicate) (gfsmSemiring *sr, gfsmWeight x); + +/// Type for user-defined semiring binary predicates (i.e. equal) */ +typedef gboolean (*gfsmSRBinaryPredicate) (gfsmSemiring *sr, gfsmWeight x, gfsmWeight y); + +/// Type for user-defined semiring unary operations */ +typedef gfsmWeight (*gfsmSRUnaryOp) (gfsmSemiring *sr, gfsmWeight x); + +/// Type for user-defined semiring binary operations */ +typedef gfsmWeight (*gfsmSRBinaryOp) (gfsmSemiring *sr, gfsmWeight x, gfsmWeight y); + + +/*====================================================================== + * Semiring: types: user structs + */ +/// User-defined semirings for gfsm operations +typedef struct { + gfsmSemiring sr; /**< inheritance magic */ + + //-- user-defined semirings *must* set these functions + gfsmSRBinaryPredicate equal_func; /**< equality predicate */ + gfsmSRBinaryPredicate less_func; /**< order predicate */ + gfsmSRBinaryOp plus_func; /**< addition operation */ + gfsmSRBinaryOp times_func; /**< multiplication operation */ +} gfsmUserSemiring; + +/*====================================================================== + * Semiring: methods: constructors etc. + */ +///\name Constructors etc. +//@{ + +/** Create, initialize (for builtin types), and return new semiring of type \a type */ +GFSM_INLINE +gfsmSemiring *gfsm_semiring_new(gfsmSRType type); + +/** Initialize and return a builtin semiring */ +GFSM_INLINE +void gfsm_semiring_init(gfsmSemiring *sr, gfsmSRType type); + +/** Initialize and return a semiring */ +GFSM_INLINE +gfsmUserSemiring *gfsm_user_semiring_new(gfsmSRBinaryPredicate equal_func, + gfsmSRBinaryPredicate less_func, + gfsmSRBinaryOp plus_func, + gfsmSRBinaryOp times_func); + +/** Copy a semiring */ +GFSM_INLINE +gfsmSemiring *gfsm_semiring_copy(gfsmSemiring *sr); + +/** Destroy a gfsmSemiring */ +GFSM_INLINE +void gfsm_semiring_free(gfsmSemiring *sr); +//@} + +/*====================================================================== + * Semiring: general accessors + */ +///\name General Accessors +//@{ + +/** Get 'zero' element of the ::gfsmSemiring* \a sr */ +GFSM_INLINE +gfsmWeight gfsm_sr_zero(gfsmSemiring *sr); + +/** Get 'one' element of the ::gfsmSemiring* \a sr */ +GFSM_INLINE +gfsmWeight gfsm_sr_one(gfsmSemiring *sr); + +/** Check equality of elements \a x and \a y with respect to ::gfsmSemiring* \a sr */ +GFSM_INLINE +gboolean gfsm_sr_equal(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y); + +/** Check semiring element order */ +GFSM_INLINE +gboolean gfsm_sr_less(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y); + +/** 3-way comparison for semiring values */ +gint gfsm_sr_compare(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y); + +/** Semiring addition */ +GFSM_INLINE +gfsmWeight gfsm_sr_plus(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y); + +/** Semiring multiplication */ +GFSM_INLINE +gfsmWeight gfsm_sr_times(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y); +//@} + +/*====================================================================== + * Semiring: methods: string utilities + */ +///\name String utilities +//@{ +/** Convert symbolic name of a semiring to a gfsmSRType */ +gfsmSRType gfsm_sr_name_to_type(const char *name); + +/** Convert a gfsmSRType to a (constant) symbolic name */ +gchar *gfsm_sr_type_to_name(gfsmSRType type); +//@} + +/*====================================================================== + * Semiring: methods: general utilities + */ +///\name General utilities +//@{ +/** stable log addition. + * \returns log(exp(x)+exp(y)) + */ +GFSM_INLINE +gfsmWeight gfsm_log_add(gfsmWeight x, gfsmWeight y); +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmSemiring.hi> +#endif + +#endif /* _GFSM_SEMIRING_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmSemiring.hi b/gfsm/gfsm/src/libgfsm/gfsmSemiring.hi new file mode 100644 index 0000000..cff434b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmSemiring.hi @@ -0,0 +1,257 @@ + +/*=============================================================================*\ + * File: gfsmSemiring.def + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: semirings: inline definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <float.h> +#include <math.h> +#include <string.h> + +/*====================================================================== + * Semiring: general utilities + */ + +// LOG_BIG = log(1E31) +#define LOG_BIG 71.3801378828154 +/* +//#define LOG_ZERO -FLT_MAX +#define LOG_ZERO -1E+38F +#define LOG_ONE 0 +#define LOG_NONE 1 +*/ +GFSM_INLINE +gfsmWeight gfsm_log_add(gfsmWeight x, gfsmWeight y) +{ + if (y-x > LOG_BIG) return y; + else if (x-y > LOG_BIG) return x; + /*else return min(x,y) + log(exp(x-min(x,y)) + exp(y-min(x,y))); */ + else if (x<y) return x + log( 1 + exp(y-x)); + else return y + log(exp(x-y) + 1); +} +#undef LOG_BIG + + +/*====================================================================== + * Semiring: methods: constructors etc. + */ + +/*-------------------------------------------------------------- + * init() + */ +GFSM_INLINE +void gfsm_semiring_init(gfsmSemiring *sr, gfsmSRType type) +{ + memset(sr, 0, type==gfsmSRTUser ? sizeof(gfsmUserSemiring) : sizeof(gfsmSemiring)); + sr->type = type; + switch (type) { + case gfsmSRTBoolean: + sr->zero = 0; + sr->one = 1; + break; + case gfsmSRTLog: + sr->zero = FLT_MAX; + sr->one = 0; + break; + case gfsmSRTPLog: + sr->zero = -FLT_MAX; + sr->one = 0; + break; + case gfsmSRTTrivial: + sr->zero = 0; + sr->one = 0; + break; + case gfsmSRTTropical: + sr->zero = FLT_MAX; + sr->one = 0; + break; + case gfsmSRTReal: + default: + sr->zero = 0; + sr->one = 1; + break; + } +} + +/*-------------------------------------------------------------- + * new() + */ +GFSM_INLINE +gfsmSemiring *gfsm_semiring_new(gfsmSRType type) +{ + gfsmSemiring *sr = (gfsmSemiring*)g_new(gfsmSemiring,1); + gfsm_semiring_init(sr,type); + return sr; +} + +/*-------------------------------------------------------------- + * user_new() + */ +GFSM_INLINE +gfsmUserSemiring *gfsm_user_semiring_new(gfsmSRBinaryPredicate equal_func, + gfsmSRBinaryPredicate less_func, + gfsmSRBinaryOp plus_func, + gfsmSRBinaryOp times_func) +{ + gfsmUserSemiring *sr = g_new(gfsmUserSemiring, 1); + gfsm_semiring_init((gfsmSemiring*)sr,gfsmSRTUser); + sr->equal_func = equal_func; + sr->less_func = less_func; + sr->plus_func = plus_func; + sr->times_func = times_func; + return sr; +} + +/*-------------------------------------------------------------- + * copy() + */ +GFSM_INLINE +gfsmSemiring *gfsm_semiring_copy(gfsmSemiring *sr) +{ + if (sr->type==gfsmSRTUser) + return (gfsmSemiring*)gfsm_user_semiring_new(((gfsmUserSemiring*)sr)->equal_func, + ((gfsmUserSemiring*)sr)->less_func, + ((gfsmUserSemiring*)sr)->plus_func, + ((gfsmUserSemiring*)sr)->times_func); + return gfsm_semiring_new(sr->type); +} + +/*-------------------------------------------------------------- + * free() + */ +GFSM_INLINE +void gfsm_semiring_free(gfsmSemiring *sr) +{ + if (sr->type==gfsmSRTUser) g_free((gfsmUserSemiring*)sr); + else g_free(sr); +} + + +/*====================================================================== + * Semiring: methods: general accessors + */ + +/*-------------------------------------------------------------- + * zero() + */ +GFSM_INLINE +gfsmWeight gfsm_sr_zero(gfsmSemiring *sr) { return sr ? sr->zero : 0; } + +/*-------------------------------------------------------------- + * one() + */ +GFSM_INLINE +gfsmWeight gfsm_sr_one(gfsmSemiring *sr) { return sr ? sr->one : 1; } + +/*-------------------------------------------------------------- + * equal() + */ +GFSM_INLINE +gboolean gfsm_sr_equal(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y) +{ + return (sr->type == gfsmSRTUser && ((gfsmUserSemiring*)sr)->equal_func + ? ((*((gfsmUserSemiring*)sr)->equal_func)(sr,x,y)) \ + : (x==y)); +} + +/*-------------------------------------------------------------- + * less() + */ +GFSM_INLINE +gboolean gfsm_sr_less(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y) +{ + switch (sr->type) { + case gfsmSRTLog: + case gfsmSRTTropical: return (x < y); + + case gfsmSRTPLog: return (x > y); + + case gfsmSRTTrivial: return 0; + + case gfsmSRTUser: + if (((gfsmUserSemiring*)sr)->less_func) + return (*((gfsmUserSemiring*)sr)->less_func)(sr,x,y); + + case gfsmSRTBoolean: + case gfsmSRTReal: + default: return (x > y); + } + return FALSE; //-- should never happen +} + +/*-------------------------------------------------------------- + * plus() + */ +GFSM_INLINE +gfsmWeight gfsm_sr_plus(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y) +{ + switch (sr->type) { + case gfsmSRTBoolean: return x || y; + //case gfsmSRTLog: return -log(exp(-x) + exp(-y)); + case gfsmSRTLog: return -gfsm_log_add(-x,-y); + case gfsmSRTPLog: return gfsm_log_add( x, y); + case gfsmSRTTropical: return (x < y ? x : y); + case gfsmSRTTrivial: return 0; + + case gfsmSRTUser: + if (((gfsmUserSemiring*)sr)->plus_func) + return (*((gfsmUserSemiring*)sr)->plus_func)(sr,x,y); + + case gfsmSRTReal: + default: return x + y; + } + return sr->zero; //-- should never happen +} + +/*-------------------------------------------------------------- + * times() + */ +GFSM_INLINE +gfsmWeight gfsm_sr_times(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y) +{ + switch (sr->type) { + case gfsmSRTBoolean: return x && y; + case gfsmSRTLog: + case gfsmSRTPLog: + case gfsmSRTTropical: return x + y; + case gfsmSRTTrivial: return 0; + + case gfsmSRTUser: + if (((gfsmUserSemiring*)sr)->times_func) + return (*((gfsmUserSemiring*)sr)->times_func)(sr,x,y); + + case gfsmSRTReal: + default: return x * y; + } + return sr->zero; //-- should never happen +} + +/*-------------------------------------------------------------- + * div() + */ +/*gboolean gfsm_sr_div(gfsmSemiring *sr, gfsmWeight x, gfsmWeight y) + { return (sr->div_func ? ((*sr->div_func)(sr,x,y)) : (x/y)); } +*/ +//@} + + +/*====================================================================== + * Semiring: string utilities + */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmSet.c b/gfsm/gfsm/src/libgfsm/gfsmSet.c new file mode 100644 index 0000000..1083caa --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmSet.c @@ -0,0 +1,143 @@ +/*=============================================================================*\ + * File: gfsmSet.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmSet.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmSet.hi> +#endif + +/*====================================================================== + * Constructors etc. + */ + +/*-------------------------------------------------------------- + * copy_foreach_func() + */ +gboolean gfsm_set_copy_foreach_func(gpointer key, gpointer value, gfsmSet *data) +{ + if (data) g_tree_insert(data,key,value); + return FALSE; // don't stop iterating +} + +/*-------------------------------------------------------------- + * clear() + */ +void gfsm_set_clear(gfsmSet *set) +{ + guint i; + GPtrArray *keys = g_ptr_array_sized_new(gfsm_set_size(set)); + gfsm_set_to_ptr_array(set,keys); + for (i=0; i < keys->len; i++) { + g_tree_remove(set, g_ptr_array_index(keys,i)); + } + g_ptr_array_free(keys,TRUE); +} + + +/*====================================================================== + * Algebra + */ + +/*-------------------------------------------------------------- + * union_func() + */ +gboolean gfsm_set_union_func(gpointer key, gpointer value, gfsmSetUnionData *data) +{ + if (!data->dupfunc) { + //-- no memory hairiness: just insert + gfsm_set_insert(data->dst, key); + } else { + if (!gfsm_set_contains(data->dst, key)) + gfsm_set_insert(data->dst, (*data->dupfunc)(key)); + } + return FALSE; +} + +/*-------------------------------------------------------------- + * difference_func() + */ +gboolean gfsm_set_difference_func(gpointer key, gpointer value, gfsmSet *set1) +{ + gfsm_set_remove(set1,key); + return FALSE; +} + +/*-------------------------------------------------------------- + * intersection() + */ +gfsmSet *gfsm_set_intersection(gfsmSet *set1, gfsmSet *set2) +{ + guint i; + GPtrArray *elts1 = g_ptr_array_sized_new(gfsm_set_size(set1)); + gfsm_set_to_ptr_array(set1,elts1); + for (i=0; i < elts1->len; i++) { + gpointer elt = g_ptr_array_index(elts1,i); + if (!gfsm_set_contains(set2,elt)) gfsm_set_remove(set1,elt); + } + g_ptr_array_free(elts1,TRUE); + return set1; +} + + +/*====================================================================== + * Converters + */ + +/*-------------------------------------------------------------- + * to_slist_foreach_func() + */ +gboolean gfsm_set_to_slist_foreach_func(gpointer key, gpointer value, GSList **dst) +{ + *dst = g_slist_prepend(*dst, key); + return FALSE; //-- don't stop iterating +} + +/*-------------------------------------------------------------- + * to_ptr_array_foreach_func() + */ +gboolean gfsm_set_to_ptr_array_foreach_func(gpointer key, gpointer value, GPtrArray *dst) +{ + g_ptr_array_add(dst,key); + return FALSE; +} + +/*====================================================================== + * Debugging + */ +#ifdef GFSM_DEBUG_ENABLED + +gboolean gfsm_set_print_foreach_func(gpointer key, gpointer data, FILE *f) +{ + fprintf(f, " %u", GPOINTER_TO_UINT(key)); + return FALSE; +} + +void gfsm_set_print_uint(gfsmSet *set, FILE *f) +{ + fputc('{',f); + g_tree_foreach(set, (GTraverseFunc)gfsm_set_print_foreach_func, f); + fputs(" }", f); +} + +#endif /*GFSM_DEBUG_ENABLED*/ diff --git a/gfsm/gfsm/src/libgfsm/gfsmSet.h b/gfsm/gfsm/src/libgfsm/gfsmSet.h new file mode 100644 index 0000000..c95c26e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmSet.h @@ -0,0 +1,163 @@ + +/*=============================================================================*\ + * File: gfsmSet.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmSet.h + * \brief Abstract set type using GTree + */ + +#ifndef _GFSM_SET_H +#define _GFSM_SET_H + +#include <gfsmMem.h> + +/*====================================================================== + * Types + */ +/** \brief Type for sets of pointers + * \detail really just an ugly wrapper for GTree + */ +typedef GTree gfsmSet; + + +/*====================================================================== + * Constructors etc. + */ +///\name gfsmSet: Constructors etc. +//@{ +/** Create and return a new set */ +GFSM_INLINE +gfsmSet *gfsm_set_new_full(GCompareDataFunc key_cmp_func, gpointer key_cmp_data, GDestroyNotify key_free_func); + +/** gfsm_set_new(key_compare_func): create and return a new set + * (returned set will not free elements) + */ +GFSM_INLINE +gfsmSet *gfsm_set_new(GCompareFunc key_cmp_func); + +/** Copy set \a src to \a dst. \returns \a dst */ +GFSM_INLINE +gfsmSet *gfsm_set_copy(gfsmSet *dst, gfsmSet *src); + +/** Utilitiy for gfsm_set_copy() */ +gboolean gfsm_set_copy_foreach_func(gpointer key, gpointer value, gfsmSet *dst); + +/** clear a set */ +void gfsm_set_clear(gfsmSet *set); + +/** Destroy a set + * \code void gfsm_set_free(gfsmSet *set); \endcode + */ +#define gfsm_set_free g_tree_destroy + +//@} + + +/*====================================================================== + * Accessors + */ +///\name gfsmSet: Accessors +//@{ +/** check set membership */ +#define gfsm_set_contains(set,key) g_tree_lookup(set,key) + +/** insert a new key into the set */ +#define gfsm_set_insert(set,key) g_tree_insert(set,key,(gpointer)1) + +/** get size of set */ +#define gfsm_set_size(set) g_tree_nnodes(set) + +/** Remove an element from a set */ +#define gfsm_set_remove(set,key) g_tree_remove(set,key) + +/** Traversal (see g_tree_foreach) */ +#define gfsm_set_foreach(set,func,data) g_tree_foreach(set,func,data) +//@} + +/*====================================================================== + * set: Algebra + */ +///\name gfsmSet: Algebra +//@{ + +/** Add all elements of set \a set2 to \a set1. + * If \a dupfunc is non-NULL, it will be used to copy elements from \a set2, + * otherwise elements will be copied as literal gpointer values. + * \returns altered \a set1 + */ +GFSM_INLINE +gfsmSet *gfsm_set_union(gfsmSet *set1, gfsmSet *set2, gfsmDupFunc dupfunc); + +/** Remove all elements in \a set2 from \a set1. + * \returns altered \a set1 */ +GFSM_INLINE +gfsmSet *gfsm_set_difference(gfsmSet *set1, gfsmSet *set2); + +/** Remove all elements from \a set1 which are not also in \a set2. + * \returns altered \a set1 */ +gfsmSet *gfsm_set_intersection(gfsmSet *set1, gfsmSet *set2); + +//@} + + +/*====================================================================== + * Converters + */ +///\name gfsmSet: converters +//@{ + +/** Get a GSList of a set's elements */ +GFSM_INLINE +GSList *gfsm_set_to_slist(gfsmSet *set); + +/** Low-level utilitity for gfsm_set_to_slist() */ +gboolean gfsm_set_to_slist_foreach_func(gpointer key, gpointer value, GSList **dst); + +/** Append a set's elements to a GPtrArray */ +GFSM_INLINE +void gfsm_set_to_ptr_array(gfsmSet *set, GPtrArray *array); + +/** Low-level foreach utilitity for gfsm_set_to_array() */ +gboolean gfsm_set_to_ptr_array_foreach_func(gpointer key, gpointer value, GPtrArray *dst); +//@} + +/*====================================================================== + * Debugging + */ +#ifdef GFSM_DEBUG_ENABLED +#include <stdio.h> + +///\name gfsmSet: debugging +//@{ + +/** Dump contents of a gfsmSet using '%u' to a FILE* */ +void gfsm_set_print_uint(gfsmSet *set, FILE *f); + +//@} +#endif /* GFSM_DEBUG_ENABLED */ + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmSet.hi> +#endif + +#endif /* _GFSM_SET_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmSet.hi b/gfsm/gfsm/src/libgfsm/gfsmSet.hi new file mode 100644 index 0000000..6ff98af --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmSet.hi @@ -0,0 +1,136 @@ +/*=============================================================================*\ + * File: gfsmSet.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/*====================================================================== + * Converters + */ + +/*-------------------------------------------------------------- + * to_slist() + */ +GFSM_INLINE +GSList *gfsm_set_to_slist(gfsmSet *set) +{ + GSList *l = NULL; + gfsm_set_foreach(set,(GTraverseFunc)gfsm_set_to_slist_foreach_func, &l); + return l; +} + +/*-------------------------------------------------------------- + * to_ptr_array() + */ +GFSM_INLINE +void gfsm_set_to_ptr_array(gfsmSet *set, GPtrArray *array) +{ + gfsm_set_foreach(set,(GTraverseFunc)gfsm_set_to_ptr_array_foreach_func, array); +} + + +/*====================================================================== + * Constructors etc. + */ + +/*-------------------------------------------------------------- + * new_full() + */ +GFSM_INLINE +gfsmSet *gfsm_set_new_full(GCompareDataFunc key_cmp_func, gpointer key_cmp_data, GDestroyNotify key_free_func) +{ + return g_tree_new_full(key_cmp_func, key_cmp_data, key_free_func, NULL); +} + +/*-------------------------------------------------------------- + * new() + */ +GFSM_INLINE +gfsmSet *gfsm_set_new(GCompareFunc key_cmp_func) +{ + return g_tree_new(key_cmp_func); +} + +/*-------------------------------------------------------------- + * copy() + */ +GFSM_INLINE +gfsmSet *gfsm_set_copy(gfsmSet *dst, gfsmSet *src) +{ + gfsm_set_clear(dst); + g_tree_foreach(src, (GTraverseFunc)gfsm_set_copy_foreach_func, dst); + return dst; +} + + +/*-------------------------------------------------------------- + * clear() + */ +//-- extern + +/*-------------------------------------------------------------- + * free() + */ +#if 0 +GFSM_INLINE +void gfsm_set_free(gfsmSet *set) +{ + g_tree_destroy(set); +} +#endif + + +/*====================================================================== + * Set Algebra + */ + +/*-------------------------------------------------------------- + * union(): data + */ +typedef struct { + gfsmSet *dst; + gfsmDupFunc dupfunc; +} gfsmSetUnionData; + + +/*-------------------------------------------------------------- + * union() + */ +gboolean gfsm_set_union_func(gpointer key, gpointer value, gfsmSetUnionData *data); + +GFSM_INLINE +gfsmSet *gfsm_set_union(gfsmSet *set1, gfsmSet *set2, gfsmDupFunc dupfunc) +{ + gfsmSetUnionData data = { set1, dupfunc }; + g_tree_foreach(set2, (GTraverseFunc)gfsm_set_union_func, &data); + return set1; +} + +/*-------------------------------------------------------------- + * difference() + */ +gboolean gfsm_set_difference_func(gpointer key, gpointer value, gfsmSet *set1); + +GFSM_INLINE +gfsmSet *gfsm_set_difference(gfsmSet *set1, gfsmSet *set2) +{ + g_tree_foreach(set2, (GTraverseFunc)gfsm_set_difference_func, set1); + return set1; +} + diff --git a/gfsm/gfsm/src/libgfsm/gfsmState.c b/gfsm/gfsm/src/libgfsm/gfsmState.c new file mode 100644 index 0000000..c7bb5f6 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmState.c @@ -0,0 +1,30 @@ + +/*=============================================================================*\ + * File: gfsmState.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: states: extern + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmConfig.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmState.h> +# include <gfsmState.hi> +#endif diff --git a/gfsm/gfsm/src/libgfsm/gfsmState.h b/gfsm/gfsm/src/libgfsm/gfsmState.h new file mode 100644 index 0000000..a3fd363 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmState.h @@ -0,0 +1,109 @@ + +/*=============================================================================*\ + * File: gfsmState.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: states + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmState.h + * \brief Automaton state definitions & utilities + */ + +#ifndef _GFSM_STATE_H +#define _GFSM_STATE_H + +#include <gfsmArcList.h> + +/*====================================================================== + * Types + */ + +/// Automaton state structure +typedef struct { + guint32 is_valid : 1; /**< whether this is a valid state */ + guint32 is_final : 1; /**< whether this is a final state */ + guint32 is_temp : 1; /**< whether this state should be freed on gfsm_state_close() */ + guint32 arc_list_temp : 1; /**< whether arc list should be freed on gfsm_state_close() */ + guint32 arc_data_temp : 1; /**< whether arc data should be freed on gfsm_state_close(): implies arc_list_temp=1 */ + guint32 unused : 27; /**< reserved */ + gfsmArcList *arcs; /**< list of outgoing arcs */ +} gfsmState; + + +/*====================================================================== + * Methods: gfsmState: Constructors etc. + */ +/// \name gfsmState: Constructors etc. +//@{ +/** Create a new state (generic) */ +GFSM_INLINE +gfsmState *gfsm_state_new_full(gboolean is_final, gfsmArcList *arcs); + +/** Create a new state (empty) */ +GFSM_INLINE +gfsmState *gfsm_state_new(void); + +/** Copy an existing state */ +GFSM_INLINE +gfsmState *gfsm_state_copy(gfsmState *dst, const gfsmState *src); + +/** Clear an existing state */ +GFSM_INLINE +void gfsm_state_clear(gfsmState *s); + +/** Destroy a state */ +GFSM_INLINE +void gfsm_state_free(gfsmState *s, gboolean free_arcs); + +/** Close a state (generic) */ +GFSM_INLINE +void gfsm_state_close(gfsmState *s); + +//@} + +/*====================================================================== + * Methods: gfsmState: Accessors + */ +///\name gfsmState: Accessors +//@{ + +/** Check if state is valid */ +GFSM_INLINE +gboolean gfsm_state_is_ok(const gfsmState *s); + +/** Check for final state */ +GFSM_INLINE +gboolean gfsm_state_is_final(const gfsmState *s); + +/** Set final state flag */ +GFSM_INLINE +void gfsm_state_set_final(gfsmState *s, gboolean is_final); + +/** Get number of outgoing arcs */ +GFSM_INLINE +guint gfsm_state_out_degree(const gfsmState *s); + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmState.hi> +#endif + +#endif /* _GFSM_STATE_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmState.hi b/gfsm/gfsm/src/libgfsm/gfsmState.hi new file mode 100644 index 0000000..15b2669 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmState.hi @@ -0,0 +1,144 @@ + +/*=============================================================================*\ + * File: gfsmState.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: states: inline definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmMem.h> + +/*====================================================================== + * Methods: Constructors etc. + */ + +/*-------------------------------------------------------------- + * new_full() + */ +GFSM_INLINE +gfsmState *gfsm_state_new_full(gboolean is_final, gfsmArcList *arcs) +{ + gfsmState *s = g_new(gfsmState,1); + s->is_valid = TRUE; + s->is_final = is_final; + s->arcs = arcs; + return s; +} + +/*-------------------------------------------------------------- + * new() + */ +GFSM_INLINE +gfsmState *gfsm_state_new(void) +{ + return gfsm_state_new_full(FALSE, NULL); +} + +/*-------------------------------------------------------------- + * clear() + */ +GFSM_INLINE +void gfsm_state_clear(gfsmState *s) +{ + gfsm_arclist_free(s->arcs); + s->is_valid = FALSE; + s->is_final = FALSE; + s->arcs = NULL; +} + +/*-------------------------------------------------------------- + * clone() + */ +GFSM_INLINE +gfsmState *gfsm_state_copy(gfsmState *dst, const gfsmState *src) +{ + gfsm_state_clear(dst); + if (!src->is_valid) return dst; + dst->is_valid = src->is_valid; + dst->is_final = src->is_final; + //dst->arcs = g_slist_concat(gfsm_arclist_clone(src->arcs), dst->arcs); + dst->arcs = gfsm_arclist_clone(src->arcs); + return dst; +} + + +/*-------------------------------------------------------------- + * free() + */ +GFSM_INLINE +void gfsm_state_free(gfsmState *s, gboolean free_arcs) +{ + if (free_arcs && s->arcs) gfsm_arclist_free(s->arcs); + g_free(s); +} + +/*-------------------------------------------------------------- + * close() + */ +GFSM_INLINE +void gfsm_state_close(gfsmState *s) +{ + if (s->arc_data_temp) { + //-- data=temp, list=temp + gfsm_arclist_free(s->arcs); + s->arcs = NULL; + } +#if 0 //-- only sensible for GSList arclists + else if (s->arc_list_temp) { + //-- data=shared, list=temp + g_slist_free(s->arcs); + s->arcs = NULL; + } +#endif + if (s->is_temp) { g_free(s); } +} + +/*====================================================================== + * Methods: Accessors + */ + +/*-------------------------------------------------------------- + * is_ok() + */ +GFSM_INLINE +gboolean gfsm_state_is_ok(const gfsmState *s) +{ return s && s->is_valid; } + + +/*-------------------------------------------------------------- + * is_final() + */ +GFSM_INLINE +gboolean gfsm_state_is_final(const gfsmState *s) +{ return s && s->is_final; } + +/*-------------------------------------------------------------- + * set_final() + */ +GFSM_INLINE +void gfsm_state_set_final(gfsmState *s, gboolean is_final) +{ s->is_final=is_final; } + +/*-------------------------------------------------------------- + * out_degree() + */ +GFSM_INLINE +guint gfsm_state_out_degree(const gfsmState *s) +{ + return gfsm_arclist_length(s->arcs); +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmStateSet.c b/gfsm/gfsm/src/libgfsm/gfsmStateSet.c new file mode 100644 index 0000000..0afc852 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmStateSet.c @@ -0,0 +1,222 @@ + +/*=============================================================================*\ + * File: gfsmStateSet.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmStateSet.h> +#include <gfsmArcIter.h> + +//-- no-inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmStateSet.hi> +#endif + +/*====================================================================== + * Constants + */ +const guint gfsmStateSetDefaultSize = 2; + +/*====================================================================== + * Methods: Constructors etc. + */ +//-- inlined + +/*====================================================================== + * Methods: Accessors + */ + +//-------------------------------------------------------------- +// find() +gfsmStateSetIter gfsm_stateset_find(gfsmStateSet *sset, gfsmStateId id) +{ + gfsmStateSetIter sseti; + gfsmStateId iid; + for (sseti = gfsm_stateset_iter_begin(sset); + (iid=gfsm_stateset_iter_id(sseti)) != gfsmNoState; + sseti = gfsm_stateset_iter_next(sset,sseti)) + { + if (id == iid) return sseti; + else if (id < iid) return NULL; + } + return NULL; +} + +//-------------------------------------------------------------- +// insert() +gboolean gfsm_stateset_insert(gfsmStateSet *sset, gfsmStateId id) +{ + guint i; + for (i = 0; i < sset->len && id > g_array_index(sset,gfsmStateId,i); i++) ; + + if (i == sset->len) { + g_array_append_val(sset,id); + } + else if (id == g_array_index(sset,gfsmStateId,i)) { + return TRUE; + } + else { + g_array_insert_val(sset,i,id); + } + return FALSE; +} + +//-------------------------------------------------------------- +// union() +gfsmStateSet *gfsm_stateset_union(gfsmStateSet *sset1, gfsmStateSet *sset2) +{ + guint i1=0, i2; + for (i2=0; i2 < sset2->len; i2++) { + gfsmStateId id = g_array_index(sset2,gfsmStateId,i2); + for (; i1 < sset1->len && id > g_array_index(sset1,gfsmStateId,i1); i1++) ; + + if (i1 == sset1->len) g_array_append_val(sset1,id); + else if (id == g_array_index(sset1,gfsmStateId,i1)) continue; + else g_array_insert_val(sset1,i1,id); + } + return sset1; +} + +//-------------------------------------------------------------- +// remove() +gboolean gfsm_stateset_remove(gfsmStateSet *sset, gfsmStateId id) { + guint i; + for (i = 0; i < sset->len && id > g_array_index(sset,gfsmStateId,i); i++) ; + if (i != sset->len && id == g_array_index(sset,gfsmStateId,i)) { + g_array_remove_index(sset,i); + return TRUE; + } + return FALSE; +} + + +//-------------------------------------------------------------- +// equal() +gboolean gfsm_stateset_equal(gfsmStateSet *sset1, gfsmStateSet *sset2) +{ + guint i; + if (sset1->len != sset2->len) return FALSE; + for (i=0; i < sset1->len; i++) { + if (g_array_index(sset1,gfsmStateId,i) != g_array_index(sset2,gfsmStateId,i)) return FALSE; + } + return TRUE; +} + +//-------------------------------------------------------------- +// foreach() +void gfsm_stateset_foreach(gfsmStateSet *sset, gfsmStateSetForeachFunc func, gpointer data) +{ + guint i; + for (i = 0; i < sset->len; i++) { + if ((*func)(g_array_index(sset,gfsmStateId,i), data)) break; + } +} + + +/*====================================================================== + * Methods: iterators + */ +//-- inlined + +/*====================================================================== + * Methods: Utilities + */ + +/*-------------------------------------------------------------- + * hash() + */ +guint gfsm_stateset_hash(gfsmStateSet *sset) +{ + guint hv = 0; + gfsmStateSetIter sseti; + gfsmStateId iid; + for (sseti = gfsm_stateset_iter_begin(sset); + (iid=gfsm_stateset_iter_id(sseti)) != gfsmNoState; + sseti = gfsm_stateset_iter_next(sset,sseti)) + { + hv += 5*iid; + } + return hv; +} + + +/*====================================================================== + * Methods: Automaton access + */ + +//-------------------------------------------------------------- +// has_final_state() +gboolean gfsm_stateset_has_final_state(gfsmStateSet *sset, gfsmAutomaton *fsm) +{ + guint i; + for (i = 0; i < sset->len; i++) { + if (gfsm_automaton_is_final_state(fsm, g_array_index(sset,gfsmStateId,i))) return TRUE; + } + return FALSE; +} + +//-------------------------------------------------------------- +// lookup_final_weight() +gboolean gfsm_stateset_lookup_final_weight(gfsmStateSet *sset, gfsmAutomaton *fsm, gfsmWeight *wp) +{ + guint i; + gboolean rc=FALSE; + *wp = fsm->sr->one; + gfsmWeight w; + for (i = 0; i < sset->len; i++) { + gfsmStateId id = g_array_index(sset,gfsmStateId,i); + if (gfsm_automaton_lookup_final(fsm,id,&w)) { + *wp = gfsm_sr_plus(fsm->sr, *wp, w); + rc = TRUE; + } + } + return rc; +} + +/*-------------------------------------------------------------- + * populate() + */ +void gfsm_stateset_populate(gfsmStateSet *sset, + gfsmAutomaton *fsm, + gfsmStateId id, + gfsmLabelVal lo, + gfsmLabelVal hi) +{ + gfsmArcIter ai; + if (gfsm_stateset_insert(sset,id)) return; + + for (gfsm_arciter_open(&ai,fsm,id), gfsm_arciter_seek_both(&ai,lo,hi); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_both(&ai,lo,hi)) + { + gfsm_stateset_populate(sset,fsm,gfsm_arciter_arc(&ai)->target,lo,hi); + } + gfsm_arciter_close(&ai); +} + +/*-------------------------------------------------------------- + * has_final_state() + */ +//--inlined + +/*-------------------------------------------------------------- + * lookup_final_weight() + */ +//--inlined diff --git a/gfsm/gfsm/src/libgfsm/gfsmStateSet.h b/gfsm/gfsm/src/libgfsm/gfsmStateSet.h new file mode 100644 index 0000000..e520df2 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmStateSet.h @@ -0,0 +1,219 @@ + +/*=============================================================================*\ + * File: gfsmStateSet.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmStateSet.h + * \brief Sets of (gfsmStateId)s + */ + +#ifndef _GFSM_STATESET_H +#define _GFSM_STATESET_H + +#include <gfsmAutomaton.h> + +/*====================================================================== + * Types + */ + +/** \brief typedef for sets of automaton state-Ids + * \detail current implementation uses a sorted array +*/ +typedef GArray gfsmStateSet; + +/** \brief typedef for weighted sets of automaton state-Ids */ +typedef struct { + gfsmStateSet *set; /**< Set of gfsmStateIds */ + gfsmWeight weight; /**< Weight of this set */ +} gfsmWeightedStateSet; + + +/*====================================================================== + * Constants + */ +///\name Constants +//@{ +/** Default initial size of state-sets */ +extern const guint gfsmStateSetDefaultSize; +//@} + +/*====================================================================== + * Methods: Constructors etc. + */ +/// \name Constructors etc. +//@{ + +/** Create and return a new state-set, giving initial reserved size */ +GFSM_INLINE +gfsmStateSet *gfsm_stateset_sized_new(guint isize); + +/** Create and return a new state-set */ +GFSM_INLINE +gfsmStateSet *gfsm_stateset_new(void); + +/** Create and return a new singleton state-set */ +GFSM_INLINE +gfsmStateSet *gfsm_stateset_new_singleton(gfsmStateId id); + +/** clear a state-set */ +GFSM_INLINE +void gfsm_stateset_clear(gfsmStateSet *sset); + +/** Create and return an exact copy of a state-set */ +GFSM_INLINE +gfsmStateSet *gfsm_stateset_clone(gfsmStateSet *src); + +/** destroy a state-set */ +GFSM_INLINE +void gfsm_stateset_free(gfsmStateSet *sset); + +//@} + +/*====================================================================== + * Methods: Accessors + */ +///\name Accessors +//@{ + +/** Get minimum element of a state-set */ +GFSM_INLINE +gfsmStateId gfsm_stateset_min(gfsmStateSet *sset); + +/** Get number of elements in a state-set */ +GFSM_INLINE +guint gfsm_stateset_size(gfsmStateSet *sset); + +/** Check whether a state-id is contained in a state-set */ +GFSM_INLINE +gboolean gfsm_stateset_contains(gfsmStateSet *sset, gfsmStateId id); + +/** Insert a single state-id into a state-set. + * \returns true iff \a sset already contained \a id + */ +gboolean gfsm_stateset_insert(gfsmStateSet *sset, gfsmStateId id); + +/** Assign \a sset1 to be the union of itself with \a sset2 */ +gfsmStateSet *gfsm_stateset_union(gfsmStateSet *sset1, gfsmStateSet *sset2); + +/** Remove a state-id from a state-set + * \returns true iff \a sset contained \a id + */ +gboolean gfsm_stateset_remove(gfsmStateSet *sset, gfsmStateId id); + +/** Equality test */ +gboolean gfsm_stateset_equal(gfsmStateSet *sset1, gfsmStateSet *sset2); +//@} + +/*====================================================================== + * Methods: Iteration + */ +///\name Iteration +//@{ + +/// state-set iterator type +typedef gfsmStateId* gfsmStateSetIter; + +/** Iterator access: get current state-id, or gfsmNoState if none defined */ +#define gfsm_stateset_iter_id(sseti) \ + ((sseti) ? *(sseti) : gfsmNoState) + +/** Check validity of a state-set iterator */ +#define gfsm_stateset_iter_ok(sseti) \ + ((sseti)!=NULL) + +/** Get first state in the state-set. + * \returns iterator (value) pointing to the first state in the + * state-set. + */ +#define gfsm_stateset_iter_begin(sset) \ + ((gfsmStateId*)((sset)->data)) + +/** Increment a state-set iterator one position. */ +#define gfsm_stateset_iter_next(sset,sseti) \ + ((++(sseti)) < (((gfsmStateId*)((sset)->data))+((sset)->len)) \ + ? (sseti) \ + : NULL) + +/** Find an iterator pointing to the element for \a id in \a sset, + * or a bad iterator if no such element exists */ +gfsmStateSetIter gfsm_stateset_find(gfsmStateSet *sset, gfsmStateId id); + +//@} + +/*====================================================================== + * Methods: Utiltiies + */ +///\name Utilities +//@{ + +/// typedef for iteration functions: return TRUE to stop iteration +typedef gboolean (*gfsmStateSetForeachFunc) (gfsmStateId id, gpointer data); + +/** General iteration utilitiy for state-sets */ +void gfsm_stateset_foreach(gfsmStateSet *sset, gfsmStateSetForeachFunc func, gpointer data); + +/** Hashing function for state-sets */ +guint gfsm_stateset_hash(gfsmStateSet *sset); + +//@} + +/*====================================================================== + * Methods: Automaton + */ +///\name Methods: Automaton access +//@{ + +/** Populate a state-set representing targets of arcs with + * lower label \a lo and upper label \a hi leaving state with id \a id + * in automaton \a fsm. + * + * If either \a lo or \a hi is gfsmNoLabel, the corresponding labels + * will be ignored. + * + * Note that this method does not clear \a sset. + */ +void gfsm_stateset_populate(gfsmStateSet *sset, + gfsmAutomaton *fsm, + gfsmStateId id, + gfsmLabelVal lo, + gfsmLabelVal hi); + +/** Convenience macro to populate a state-set from I/O-epsilon arcs */ +#define gfsm_stateset_populate_eps(sset,fsm,id) \ + gfsm_stateset_populate((sset),(fsm),(id),gfsmEpsilon,gfsmEpsilon) + +/** Returns true iff some \a id in \a sset is a final state in \a fsm */ +gboolean gfsm_stateset_has_final_state(gfsmStateSet *sset, gfsmAutomaton *fsm); + +/** Lookup sum of final weights in \a fsm of states \a id in \a sset + * Returns TRUE iff at least one state in \a sset is final, and + * sets \a *wp to the sum of final weights. + */ +gboolean gfsm_stateset_lookup_final_weight(gfsmStateSet *sset, gfsmAutomaton *fsm, gfsmWeight *wp); + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmStateSet.hi> +#endif + +#endif /* _GFSM_STATESET_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmStateSet.hi b/gfsm/gfsm/src/libgfsm/gfsmStateSet.hi new file mode 100644 index 0000000..c2d1248 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmStateSet.hi @@ -0,0 +1,154 @@ + +/*=============================================================================*\ + * File: gfsmStateSet.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: state sets: inlined definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/*====================================================================== + * Methods: Constructors etc. + */ + +//-------------------------------------------------------------- +// sized_new() +GFSM_INLINE +gfsmStateSet *gfsm_stateset_sized_new(guint isize) +{ + return g_array_sized_new(FALSE,TRUE,sizeof(gfsmStateId),isize); +} + +//-------------------------------------------------------------- +// new() +GFSM_INLINE +gfsmStateSet *gfsm_stateset_new(void) +{ + return gfsm_stateset_sized_new(gfsmStateSetDefaultSize); +} + +//-------------------------------------------------------------- +// new_singleton() +GFSM_INLINE +gfsmStateSet *gfsm_stateset_new_singleton(gfsmStateId id) +{ + gfsmStateSet *sset = gfsm_stateset_new(); + g_array_insert_val(sset,0,id); + return sset; +} + +//-------------------------------------------------------------- +// clear() +GFSM_INLINE +void gfsm_stateset_clear(gfsmStateSet *sset) +{ + g_array_set_size(sset,0); +} + +//-------------------------------------------------------------- +// clone() +GFSM_INLINE +gfsmStateSet *gfsm_stateset_clone(gfsmStateSet *src) +{ + return g_array_append_vals(gfsm_stateset_sized_new(src->len), src->data, src->len); +} + +//-------------------------------------------------------------- +// free() +GFSM_INLINE +void gfsm_stateset_free(gfsmStateSet *sset) +{ + g_array_free(sset,TRUE); +} + +/*====================================================================== + * Methods: Accessors + */ + +//-------------------------------------------------------------- +// min() +GFSM_INLINE +gfsmStateId gfsm_stateset_min(gfsmStateSet *sset) +{ + return sset->len > 0 ? (*((gfsmStateId*)sset->data)) : gfsmNoState; +} + +//-------------------------------------------------------------- +// size() +GFSM_INLINE +guint gfsm_stateset_size(gfsmStateSet *sset) +{ + return sset->len; +} + +//-------------------------------------------------------------- +// find() +//--EXTERN + +//-------------------------------------------------------------- +// contains() +GFSM_INLINE +gboolean gfsm_stateset_contains(gfsmStateSet *sset, gfsmStateId id) +{ + gfsmStateSetIter sseti = gfsm_stateset_find(sset,id); + return sseti != NULL && *sseti != gfsmNoState; +} + +//-------------------------------------------------------------- +// insert() +//--EXTERN + +//-------------------------------------------------------------- +// union() +//--EXTERN + +//-------------------------------------------------------------- +// remove() +//--EXTERN + +//-------------------------------------------------------------- +// equal() +//--EXTERN + +/*====================================================================== + * Methods: Utilities + */ + +//-------------------------------------------------------------- +// hash() +//--EXTERN + + +//-------------------------------------------------------------- +// foreach() +//--EXTERN + +/*====================================================================== + * Methods: Automaton access + */ + +//-------------------------------------------------------------- +// populate() +//--EXTERN + +//-------------------------------------------------------------- +// has_final_state() +//--EXTERN + +//-------------------------------------------------------------- +// lookup_final_weight() +//--EXTERN diff --git a/gfsm/gfsm/src/libgfsm/gfsmTrie.c b/gfsm/gfsm/src/libgfsm/gfsmTrie.c new file mode 100644 index 0000000..952e55d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmTrie.c @@ -0,0 +1,282 @@ + +/*=============================================================================*\ + * File: gfsmTrie.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2006 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmTrie.h> +#include <gfsmArcIter.h> + +/*====================================================================== + * Constants + */ + +const gfsmAutomatonFlags gfsmTrieDefaultFlags = + { + TRUE, //-- is_transducer:1 + TRUE, //-- is_weighted:1 + TRUE, //-- is_deterministic:1 + gfsmASMLower, //-- sort_mode:24 + 0 //-- unused:5 + }; + +const gfsmSRType gfsmTrieDefaultSRType = gfsmSRTReal; + + +/*====================================================================== + * Methods: ensure path (adding weight) + */ + +//-------------------------------------------------------------- +gfsmStateId gfsm_trie_add_path(gfsmTrie *trie, + gfsmLabelVector *lo, + gfsmLabelVector *hi, + gfsmWeight w) +{ + return gfsm_trie_add_path_full(trie,lo,hi,w,TRUE,TRUE,TRUE,NULL); +} + + +//-------------------------------------------------------------- +gfsmStateId gfsm_trie_add_path_full(gfsmTrie *trie, + gfsmLabelVector *lo, + gfsmLabelVector *hi, + gfsmWeight w, + gboolean add_to_arcs, + gboolean add_to_state_final, + gboolean add_to_path_final, + gfsmStateIdVector *path_states + ) +{ + gfsmStateId qid; + guint i; + + //-- ensure trie has a root state + if (!gfsm_automaton_has_state(trie,trie->root_id)) { + trie->root_id = gfsm_automaton_add_state(trie); + } + qid = trie->root_id; + + //-- initialize state-path, if specified + if (path_states) { + g_ptr_array_set_size(path_states, (lo ? lo->len : 0) + (hi ? hi->len : 0)); + path_states->len = 0; + g_ptr_array_add(path_states, GUINT_TO_POINTER(qid)); + } + + //-- add lower path + for (i=0; lo && i < lo->len; i++) { + if (add_to_state_final) { + gfsm_automaton_set_final_state_full(trie, qid, TRUE, + gfsm_sr_plus(trie->sr, w, gfsm_automaton_get_final_weight(trie, qid))); + } + qid = gfsm_trie_get_arc_lower(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(lo,i))), w, add_to_arcs); + if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid)); + } + + //-- add upper path + for (i=0; hi && i < hi->len; i++) { + if (add_to_state_final) { + gfsm_automaton_set_final_state_full(trie, qid, TRUE, + gfsm_sr_plus(trie->sr, w, gfsm_automaton_get_final_weight(trie, qid))); + } + qid = gfsm_trie_get_arc_upper(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(hi,i))), w, add_to_arcs); + if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid)); + } + + //-- add final epsilon-arc + //qid = gfsm_trie_get_arc_both(trie, qid, gfsmEpsilon, gfsmEpsilon, w, add_to_arcs); + //if (path_states) g_ptr_array_add(path_states,qid); + + if (add_to_state_final || add_to_path_final) { + gfsm_automaton_set_final_state_full(trie, qid, TRUE, + gfsm_sr_plus(trie->sr, w, gfsm_automaton_get_final_weight(trie, qid))); + } else { + gfsm_automaton_set_final_state(trie,qid,TRUE); + } + + return qid; +} + +/*====================================================================== + * Methods: find prefix + */ +gfsmStateId gfsm_trie_find_prefix(gfsmTrie *trie, + gfsmLabelVector *lo, + gfsmLabelVector *hi, + guint *lo_i, + guint *hi_i, + gfsmWeight *w_last, + gfsmStateIdVector *path_states + ) +{ + gfsmStateId qid = trie->root_id; + gfsmWeight fw, w = gfsm_sr_zero(trie->sr); + guint i, j=0; + gfsmArc *a; + + //-- initialize state-path, if specified + if (path_states) { + g_ptr_array_set_size(path_states, (lo ? lo->len : 0) + (hi ? hi->len : 0)); + path_states->len = 0; + g_ptr_array_add(path_states, GUINT_TO_POINTER(qid)); + } + + //-- find lower path + for (i=0; lo && i < lo->len; i++) { + if ( !(a=gfsm_trie_find_arc_lower(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(lo,i))))) ) + break; + + qid = a->target; + w = a->weight; + if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid)); + } + + //-- find upper path + if (i==lo->len) { + for (j=0; hi && j < hi->len; j++) { + if ( !(a = gfsm_trie_find_arc_upper(trie, qid, ((gfsmLabelVal)GPOINTER_TO_UINT(g_ptr_array_index(hi,j))))) ) + break; + + qid = a->target; + w = a->weight; + if (path_states) g_ptr_array_add(path_states, GUINT_TO_POINTER(qid)); + } + + //-- final state? + if (j==hi->len && gfsm_automaton_lookup_final(trie, qid, &fw)) + w = fw; + } + + //-- output variables + if (lo_i) *lo_i = i; + if (hi_i) *hi_i = j; + if (w_last) *w_last = w; + + return qid; +} + +/*====================================================================== + * Methods: find arcs + */ + +//-------------------------------------------------------------- +gfsmArc* gfsm_trie_find_arc_lower(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab) +{ + gfsmArcIter ai; + gfsmArc *a=NULL; + for (gfsm_arciter_open(&ai, trie, qid), gfsm_arciter_seek_lower(&ai, lab); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,lab)) + { + a = gfsm_arciter_arc(&ai); + break; + } + gfsm_arciter_close(&ai); + return a; +} + +//-------------------------------------------------------------- +gfsmArc* gfsm_trie_find_arc_upper(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab) +{ + gfsmArcIter ai; + gfsmArc *a=NULL; + for (gfsm_arciter_open(&ai, trie, qid), gfsm_arciter_seek_upper(&ai, lab); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_upper(&ai,lab)) + { + a = gfsm_arciter_arc(&ai); + break; + } + gfsm_arciter_close(&ai); + return a; +} + +//-------------------------------------------------------------- +gfsmArc* gfsm_trie_find_arc_both(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lo, gfsmLabelVal hi) +{ + gfsmArcIter ai; + gfsmArc *a=NULL; + for (gfsm_arciter_open(&ai,trie,qid), gfsm_arciter_seek_both(&ai,lo,hi); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_both(&ai,lo,hi)) + { + a = gfsm_arciter_arc(&ai); + break; + } + gfsm_arciter_close(&ai); + return a; +} + + +/*====================================================================== + * Methods: find or insert arcs + */ + +//-------------------------------------------------------------- +gfsmStateId gfsm_trie_get_arc_lower(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab, gfsmWeight w, gboolean add_weight) +{ + gfsmArc *a=gfsm_trie_find_arc_lower(trie,qid,lab); + + if (a==NULL) { + gfsmStateId qid2 = gfsm_automaton_add_state(trie); + gfsm_automaton_add_arc(trie,qid,qid2,lab,gfsmEpsilon, add_weight ? w : trie->sr->zero); + return qid2; + } + + //-- found an existing arc + if (add_weight) a->weight = gfsm_sr_plus(trie->sr, a->weight, w); + return a->target; +} + +//-------------------------------------------------------------- +gfsmStateId gfsm_trie_get_arc_upper(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab, gfsmWeight w, gboolean add_weight) +{ + gfsmArc *a=gfsm_trie_find_arc_upper(trie,qid,lab); + + if (a==NULL) { + gfsmStateId qid2 = gfsm_automaton_add_state(trie); + gfsm_automaton_add_arc(trie,qid,qid2,gfsmEpsilon,lab, add_weight ? w : trie->sr->zero); + //trie->flags.is_deterministic = TRUE; //-- HACK + return qid2; + } + + //-- found an existing arc + if (add_weight) a->weight = gfsm_sr_plus(trie->sr, a->weight, w); + return a->target; +} + +//-------------------------------------------------------------- +gfsmStateId gfsm_trie_get_arc_both(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gboolean add_weight) +{ + gfsmArc *a=gfsm_trie_find_arc_both(trie,qid,lo,hi); + + if (a==NULL) { + gfsmStateId qid2 = gfsm_automaton_add_state(trie); + gfsm_automaton_add_arc(trie,qid,qid2,lo,hi, add_weight ? w : trie->sr->zero); + //trie->flags.is_deterministic = TRUE; //-- HACK + return qid2; + } + + //-- found an existing arc + if (add_weight) a->weight = gfsm_sr_plus(trie->sr, a->weight, w); + return a->target; +} + diff --git a/gfsm/gfsm/src/libgfsm/gfsmTrie.h b/gfsm/gfsm/src/libgfsm/gfsmTrie.h new file mode 100644 index 0000000..430e1c1 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmTrie.h @@ -0,0 +1,201 @@ + +/*=============================================================================*\ + * File: gfsmTrie.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmTrie.h + * \brief Deterministic prefix tree automata + */ + +#ifndef _GFSM_TRIE_H +#define _GFSM_TRIE_H + +#include <gfsmAutomaton.h> + +/*====================================================================== + * Types: Trie + */ +/** Alias for gfsmAutomaton */ +typedef gfsmAutomaton gfsmTrie; + +/*====================================================================== + * Constants + */ +/** Default initial Trie flags */ +extern const gfsmAutomatonFlags gfsmTrieDefaultFlags; + +/** Default initial Trie semiring */ +extern const gfsmSRType gfsmTrieDefaultSRType; + + +/*====================================================================== + * Methods: Constructors etc. + */ +///\name Constructors etc. +//@{ + +//------------------------------ +/** Create a new ::gfsmTrie automaton */ +#define gfsm_trie_new() \ + gfsm_automaton_new_full(gfsmTrieDefaultFlags, gfsmTrieDefaultSRType, gfsmAutomatonDefaultSize) + +//@} + + +/*====================================================================== + * Methods: Manipulation + */ +///\name Manipulation +//@{ + +//------------------------------ +/** Add a string-pair \a (lo,hi) to the trie with weight \a w + * \param trie Trie + * \param lo lower string (NULL for epsilon) + * \param trie upper string (NULL for epsilon) + * \param w weight which is added (gfsm_sr_plus) to all arcs for this pair + * \returns Id of the final state of the added path + * + * \note really just a wrapper for \a gfsm_trie_add_path_full() with + * \a add_to_arcs=true , \a add_to_state_final=true, \a add_to_path_final=true. + */ +gfsmStateId gfsm_trie_add_path(gfsmTrie *trie, + gfsmLabelVector *lo, + gfsmLabelVector *hi, + gfsmWeight w); + +//------------------------------ +/** Add a string-pair \a (lo,hi) to the trie with weight \a w + * \param trie Trie + * \param lo lower string (NULL for epsilon) + * \param hi upper string (NULL for epsilon) + * \param w weight associated with this pair + * \param add_to_arcs whether to add (gfsm_sr_plus) \a w to all arc-weights + * \param add_to_state_final whether to add (gfsm_sr_plus) \a w to all intermediate state final-weights; + * implies that all states will be marked as final in the resulting automaton + * \param add_to_path_final whether to add (gfsm_sr_plus) \a w to the final weight for the last node + * in the path + * \param path_states If non-NULL, contains the state-path corresponding to \a (lo,hi) on return + * \returns Id of the final state of the added path + */ +gfsmStateId gfsm_trie_add_path_full(gfsmTrie *trie, + gfsmLabelVector *lo, + gfsmLabelVector *hi, + gfsmWeight w, + gboolean add_to_arcs, + gboolean add_to_state_final, + gboolean add_to_path_final, + gfsmStateIdVector *path_states + ); + +/*====================================================================== + * Methods: find path + */ +/** Find state of longest prefix for a string-pair \a (lo,hi) in the trie. + * \param trie Trie + * \param lo lower string (NULL for epsilon) + * \param hi upper string (NULL for epsilon) + * \param lo_i on return holds number of labels in \a lo which were matched + * \param hi_i on return holds number of labels in \a hi which were matched + * \param w_last pointer to weight of last arc followed or final weight + * \param path_states if non-NULL, contains the state-path corresponding to the prefix on return + * \returns Id of the state matching the longest prefix of \a (lo,hi) + */ +gfsmStateId gfsm_trie_find_prefix(gfsmTrie *trie, + gfsmLabelVector *lo, + gfsmLabelVector *hi, + guint *lo_i, + guint *hi_i, + gfsmWeight *w_last, + gfsmStateIdVector *path_states + ); + + +/*====================================================================== + * Methods: find arcs + */ +/** Find an arc from state \a qid with lower label \a lab in trie \a trie. + * \param trie Trie + * \param qid outgoing state qid + * \param lab lower label + * \returns gfsmArc* or NULL on failure + */ +gfsmArc* gfsm_trie_find_arc_lower(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab); + +/** Find an arc from state \a qid with upper label \a lab in trie \a trie. + * \param trie Trie + * \param qid outgoing state qid + * \param lab upper label id + * \returns gfsmArc* or NULL on failure + */ +gfsmArc* gfsm_trie_find_arc_upper(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab); + +/** Find an arc from state \a qid with lower label \a lo and upper label \a hi in trie \a trie. + * \param trie Trie + * \param qid outgoing state qid + * \param lo lower label id + * \param hi upper label id + * \returns gfsmArc* or NULL on failure + */ +gfsmArc* gfsm_trie_find_arc_both(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lo, gfsmLabelVal hi); + + +/*====================================================================== + * Methods: find or insert arcs + */ +/** Find or insert an arc from state \a qid with lower label \a lab in trie \a trie; + * adding weight \a w. + * \param trie Trie + * \param qid outgoing state qid + * \param lab lower label + * \param w arc weight + * \param add_weight whether to add weight to the arc + * \returns gfsmStateId of the (unique) destination state + */ +gfsmStateId gfsm_trie_get_arc_lower(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab, gfsmWeight w, gboolean add_weight); + +/** Find or insert an arc from state \a qid with upper label \a lab in trie \a trie; + * adding weight \a w. + * \param trie Trie + * \param qid outgoing state qid + * \param lab upper label + * \param w arc weight + * \param add_weight whether to add weight to the arc + * \returns gfsmStateId of the (unique) destination state + */ +gfsmStateId gfsm_trie_get_arc_upper(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lab, gfsmWeight w, gboolean add_weight); + +/** Find or insert an arc from state \a qid with lower label \a lo and upper label \a hi + * with weight \a w in trie \a trie. + * \param trie Trie + * \param qid outgoing state qid + * \param lo lower label id + * \param hi upper label id + * \param w arc weight + * \param add_weight whether to add weight to the arc + * \returns gfsmStateId of the (unique) destination state + */ +gfsmStateId gfsm_trie_get_arc_both(gfsmTrie *trie, gfsmStateId qid, gfsmLabelVal lo, gfsmLabelVal hi, gfsmWeight w, gboolean add_weight); + +//@} + + +#endif /* _GFSM_LOOKUP_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmUtils.c b/gfsm/gfsm/src/libgfsm/gfsmUtils.c new file mode 100644 index 0000000..2fe8e1e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmUtils.c @@ -0,0 +1,101 @@ +/*=============================================================================*\ + * File: gfsmUtils.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <glib.h> +#include <gfsmUtils.h> +#include <gfsmError.h> +#include <string.h> +#include <errno.h> + +/*====================================================================== + * glib utility functions + */ + +/*-------------------------------------------------------------- + * int_compare() + */ +gint gfsm_int_compare(gconstpointer a, gconstpointer b) +{ + return (GPOINTER_TO_INT(b) - GPOINTER_TO_INT(a)); +} + +/*-------------------------------------------------------------- + * int_compare_data() + */ +gint gfsm_int_compare_data(gconstpointer a, gconstpointer b, gpointer data) +{ + return (GPOINTER_TO_INT(b) - GPOINTER_TO_INT(a)); +} + +/*-------------------------------------------------------------- + * uint_compare() + */ +#define gfsm_uint_compare_code(a,b) \ + ( GPOINTER_TO_UINT(a) < GPOINTER_TO_UINT(b) ? (-1) : (GPOINTER_TO_UINT(a) == GPOINTER_TO_UINT(b) ? 0 : 1) ) + //( (gint) ((glong((guint)b)) - (glong((guint)a))) ); + +gint gfsm_uint_compare(gconstpointer a, gconstpointer b) +{ + return gfsm_uint_compare_code(GPOINTER_TO_UINT(a),GPOINTER_TO_UINT(b)); +} + +/*-------------------------------------------------------------- + * uint_compare_data() + */ +gint gfsm_uint_compare_data(gconstpointer a, gconstpointer b, gpointer data) +{ + return gfsm_uint_compare_code(a,b); +} + +/*====================================================================== + * Hash Utilties + */ +/*-------------------------------------------------------------- + * gfsm_hash_clear_func() + */ +gboolean gfsm_hash_clear_func (gpointer key, gpointer value, gpointer user_data) +{ return TRUE; }; + + +/*====================================================================== + * File Utilties + */ + +/*-------------------------------------------------------------- + * open_filename() + */ +FILE *gfsm_open_filename(const char *filename, const char *mode, gfsmError **errp) +{ + FILE *f; + if (strcmp(filename,"-")==0) { + if (strchr(mode,'w')) f = stdout; + else f = stdin; + } + else if (!(f = fopen(filename,mode))) { + g_set_error(errp, + g_quark_from_static_string("gfsm"), //-- domain + g_quark_from_static_string("fopen"), //-- code + "open failed for file '%s': %s", + filename, strerror(errno)); + } + return f; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmUtils.h b/gfsm/gfsm/src/libgfsm/gfsmUtils.h new file mode 100644 index 0000000..3734197 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmUtils.h @@ -0,0 +1,72 @@ + +/*=============================================================================*\ + * File: gfsmUtils.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmUtils.h + * \brief Miscellaneous utilities + */ + +#ifndef _GFSM_UTILS_H +#define _GFSM_UTILS_H + +#include <stdio.h> +#include <gfsmError.h> + +/*====================================================================== + * Constants + */ +/*(none)*/ + +/*====================================================================== + * glib utility functions + */ +/** 3-way comparison predicate for integers */ +gint gfsm_int_compare(gconstpointer a, gconstpointer b); + +/** 3-way comparison predicate for integers, with user-data slot */ +gint gfsm_int_compare_data(gconstpointer a, gconstpointer b, gpointer data); + +/** 3-way comparison predicate for unsigned integers */ +gint gfsm_uint_compare(gconstpointer a, gconstpointer b); + +/** 3-way comparison predicate for unsigned integers, with user-data slot */ +gint gfsm_uint_compare_data(gconstpointer a, gconstpointer b, gpointer data); + +/*====================================================================== + * Hash Utilties + */ +/** Utility function to clear hash tables */ +gboolean gfsm_hash_clear_func (gpointer key, gpointer value, gpointer user_data); + +/*====================================================================== + * File Utilties + */ +/* Open a named file. + * The filename \a "-" may be used to indicate stdin or stdout, + * depending on \a mode. + * + * If the file cannot be opened, **errp is set (if non-NULL) and NULL is returned. + */ +FILE *gfsm_open_filename(const char *filename, const char *mode, gfsmError **errp); + + +#endif /* _GFSM_UTILS_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmVersion.c b/gfsm/gfsm/src/libgfsm/gfsmVersion.c new file mode 100644 index 0000000..86f55b3 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmVersion.c @@ -0,0 +1,52 @@ + +/*=============================================================================*\ + * File: gfsmVersion.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmVersion.h> +#include <gfsmUtils.h> + +#ifdef HAVE_CONFIG_H +#include <gfsmConfig.h> +#endif + +/*====================================================================== + * Constants + */ +const gfsmVersionInfo gfsm_version = + { + GFSM_VERSION_MAJOR, + GFSM_VERSION_MINOR, + GFSM_VERSION_MICRO + }; + +const char *gfsm_version_string = PACKAGE_VERSION; + +/*====================================================================== + * Comparison + */ +int gfsm_version_compare(gfsmVersionInfo v1, gfsmVersionInfo v2) +{ + int rc; + if ((rc=gfsm_uint_compare(GUINT_TO_POINTER(v1.major),GUINT_TO_POINTER(v2.major)))) return rc; + else if ((rc=gfsm_uint_compare(GUINT_TO_POINTER(v1.minor),GUINT_TO_POINTER(v2.minor)))) return rc; + return gfsm_uint_compare(GUINT_TO_POINTER(v1.micro),GUINT_TO_POINTER(v2.micro)); +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmVersion.h b/gfsm/gfsm/src/libgfsm/gfsmVersion.h new file mode 100644 index 0000000..7e0a530 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmVersion.h @@ -0,0 +1,73 @@ + +/*=============================================================================*\ + * File: gfsmVersion.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmVersion.h + * \brief Library version information + */ + +#ifndef _GFSM_VERSION_H +#define _GFSM_VERSION_H + +#include <glib.h> + +/*====================================================================== + * Types + */ +/// Library Version information +typedef struct { + guint32 major; /**< major version */ + guint32 minor; /**< minor version */ + guint32 micro; /**< micro version */ +} gfsmVersionInfo; + +/*====================================================================== + * Constants + */ +/** Current version information */ +extern const gfsmVersionInfo gfsm_version; + +/** Current version string */ +extern const char *gfsm_version_string; + +/*====================================================================== + * Comparison + */ +/** 3-way comparison two gfsmVersionInfo structures */ +int gfsm_version_compare(gfsmVersionInfo v1, gfsmVersionInfo v2); + +/** Equality check for gfsmVersionInfo structures */ +#define gfsm_version_eq(v1,v2) (gfsm_version_compare((gfsmVersionInfo)(v1),(gfsmVersionInfo)(v2))==0) + +/** Less-than comparison for gfsmVersionInfo structures: v1 < v2 */ +#define gfsm_version_less(v1,v2) (gfsm_version_compare((gfsmVersionInfo)(v1),(gfsmVersionInfo)(v2))<0) + +/** Less-than-or-equal comparison for gfsmVersionInfo structures: v1 <= v2 */ +#define gfsm_version_le(v1,v2) (gfsm_version_compare((gfsmVersionInfo)(v1),(gfsmVersionInfo)(v2))<=0) + +/** Greater-than comparison for gfsmVersionInfo structures: v1 > v2 */ +#define gfsm_version_greater(v1,v2) (gfsm_version_compare((gfsmVersionInfo)(v1),(gfsmVersionInfo)(v2))>0) + +/** Greater-than-or-equal comparison for gfsmVersionInfo structures: v1 >= v2 */ +#define gfsm_version_ge(v1,v2) (gfsm_version_compare((gfsmVersionInfo)(v1),(gfsmVersionInfo)(v2))>=0) + +#endif /* _GFSM_VERSION_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmWeightMap.c b/gfsm/gfsm/src/libgfsm/gfsmWeightMap.c new file mode 100644 index 0000000..090b0d3 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmWeightMap.c @@ -0,0 +1,54 @@ +/*=============================================================================*\ + * File: gfsmWeightmap.c + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: weight maps: extern definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmWeightMap.h> +#include <gfsmCompound.h> + +//-- (no-)inline definitions +#ifndef GFSM_INLINE_ENABLED +# include <gfsmWeightMap.hi> +#endif + +//-------------------------------------------------------------- +// weightmap_to_array_foreach_func_() +static +gboolean gfsm_weightmap_to_array_foreach_func_(gpointer stateid_p, gpointer weight_p, gfsmStateWeightPairArray *array) +{ + gfsmStateWeightPair wp; + wp.id = GPOINTER_TO_UINT(stateid_p); + wp.w = gfsm_ptr2weight(weight_p); + g_array_append_val(array,wp); + return FALSE; //-- continue traversal +} + +//-------------------------------------------------------------- +gfsmStateWeightPairArray *gfsm_weightmap_to_array(gfsmWeightMap *weightmap, gfsmStateWeightPairArray *array) +{ + if (!array) { + array = g_array_sized_new(FALSE,FALSE,sizeof(gfsmStateWeightPair),gfsm_weightmap_size(weightmap)); + } else { + g_array_set_size(array,gfsm_weightmap_size(weightmap)); + array->len = 0; + } + gfsm_weightmap_foreach(weightmap, (GTraverseFunc)gfsm_weightmap_to_array_foreach_func_, array); + return array; +} diff --git a/gfsm/gfsm/src/libgfsm/gfsmWeightMap.h b/gfsm/gfsm/src/libgfsm/gfsmWeightMap.h new file mode 100644 index 0000000..ef7a5d7 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmWeightMap.h @@ -0,0 +1,222 @@ + +/*=============================================================================*\ + * File: gfsmWeightMap.h + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library + * + * Copyright (c) 2005-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +/** \file gfsmWeightMap.h + * \brief Abstract map from gpointers to gfsmWeights using GTree + */ + +#ifndef _GFSM_WEIGHTMAP_H +#define _GFSM_WEIGHTMAP_H + +#include <gfsmSet.h> +#include <gfsmSemiring.h> + +/*====================================================================== + * Types + */ +/** \brief Type for maps from arbitrary data to gfsmWeights with a balanced binary tree. + * \detail really just an ugly wrapper for GTree + */ +typedef GTree gfsmWeightMap; + +/** \brief Structure for mapping arbitrary data to gfsmWeights with a hash. + * \detail really just an ugly wrapper for GHashTable + */ +typedef struct { + GHashTable *table; ///< hash table which does the dirty work + gfsmDupFunc key_dup; ///< key copying function +} gfsmWeightHash; + +/** \brief Union type for converting between gfsmWeight and gpointer. + * \detail Requires that sizeof(gpointer)>=sizeof(gfsmWeight) in order to work properly. + */ +typedef union { + gfsmWeight w; /**< Interpret underlying binary data as a gfsmWeight */ + gpointer p; /**< Interpret underlying binary data as a gpointer */ +} gfsmWeightOrPointer; + +/** \brief Type for a GArray of ::gfsmStateWeightPair */ +typedef GArray gfsmStateWeightPairArray; + +/*====================================================================== + * gfsmWeight <-> gpointer conversions + */ + +///\name gfsmWeight <-> gpointer Conversions +//@{ + +/** Convert a gpointer to a gfsmWeight */ +GFSM_INLINE +gfsmWeight gfsm_ptr2weight(const gpointer p); + +/** Macro to convert gfsmWeight->gpointer */ +GFSM_INLINE +gpointer gfsm_weight2ptr(const gfsmWeight w); + +//@} + +/*====================================================================== + * gfsmWeightMap: Constructors etc. + */ + +///\name gfsmWeightMap: Constructors etc. +//@{ + +/** Create and return a new ::gfsmWeightMap + */ +GFSM_INLINE +gfsmWeightMap *gfsm_weightmap_new_full(GCompareDataFunc key_cmp_func, + gpointer key_cmp_data, + GDestroyNotify key_free_func); + +/** Create and return a new weightmap which does not stored keys. */ +GFSM_INLINE +gfsmWeightMap *gfsm_weightmap_new(GCompareFunc key_cmp_func); + +/** Copy weightmap \a src to \a dst. \returns \a dst */ +GFSM_INLINE +gfsmWeightMap *gfsm_weightmap_copy(gfsmWeightMap *dst, gfsmWeightMap *src); + +/** Clear a ::gfsmWeightMap */ +GFSM_INLINE +void gfsm_weightmap_clear(gfsmWeightMap *wm); + +/** Destroy a weightmap */ +GFSM_INLINE +void gfsm_weightmap_free(gfsmWeightMap *wm); +//@} + + +/*====================================================================== + * Accessors + */ +///\name gfsmWeightmap: Accessors +//@{ + +/** lookup: check weightmap membership */ +GFSM_INLINE +gboolean gfsm_weightmap_contains(gfsmWeightMap *weightmap, gconstpointer key); + +/** extended lookup: get weight associated with key */ +GFSM_INLINE +gboolean gfsm_weightmap_lookup(gfsmWeightMap *weightmap, gconstpointer key, gfsmWeight *wp); + +/** insert a new key->weight mapping into the weightmap */ +//#define _gfsm_weightmap_insert(weightmap,key,w) g_tree_insert((weightmap),((gpointer)(key)),gfsm_weight2ptr(w)) + +/** insert a new key->weight mapping into the weightmap */ +GFSM_INLINE +void gfsm_weightmap_insert(gfsmWeightMap *weightmap, gconstpointer key, gfsmWeight w); + +/** Get size of weightmap */ +#define gfsm_weightmap_size(weightmap) g_tree_nnodes(weightmap) + +/** Remove an element from a weightmap */ +#define gfsm_weightmap_remove(weightmap,key) g_tree_remove((weightmap),((gpointer)(key))) + +/** Traversal (see g_tree_foreach) */ +#define gfsm_weightmap_foreach(weightmap,func,data) g_tree_foreach((weightmap),(func),(data)) + +/** Copy contents of a ::gfsmWeightMap into a ::gfsmStateWeightPairArray + * \param weightmap weightmap to examine + * \param array array to be populated, or NULL to allocate a new array + * \returns \a array, or a newly allocated ::gfsmStateWeightPairArray + * \note Caller is responsible for freeing \a array when it is no longer needed. + */ +gfsmStateWeightPairArray *gfsm_weightmap_to_array(gfsmWeightMap *weightmap, gfsmStateWeightPairArray *array); + +//@} + + + +/*====================================================================== + * gfsmWeightHash: Constructors etc. + */ +///\name gfsmWeightHash: Constructors etc. +//@{ +/** Create and return a new hashing weight-map */ +GFSM_INLINE +gfsmWeightHash *gfsm_weighthash_new_full(gfsmDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func); + +/** create & return a new hashing weightmap (returned map will not copy or free keys */ +#define gfsm_weighthash_new(key_hash_f,key_equal_f) \ + gfsm_weighthash_new_full(NULL,(key_hash_f),(key_equal_f),NULL) + +/** clear a weight-hash */ +GFSM_INLINE +void gfsm_weighthash_clear(gfsmWeightHash *wh); + +/** destroy a weight-hash */ +GFSM_INLINE +void gfsm_weighthash_free(gfsmWeightHash *wh); +//@} + + +/*====================================================================== + * gfsmWeightHash: Accessors + */ +///\name gfsmWeightHash: Accessors +//@{ + +/** extended lookup: get weight associated with key */ +GFSM_INLINE +gboolean gfsm_weighthash_lookup(gfsmWeightHash *wh, gconstpointer key, gfsmWeight *wp); + +/** insert a key->weight mapping into the weighthash */ +GFSM_INLINE +void gfsm_weighthash_insert(gfsmWeightHash *wh, gconstpointer key, gfsmWeight w); + +/** Possibly insert a key->weight mapping into the weighthash + * The mapping \a (key=>w) is inserted if either no mapping for \a key exists in \a wh, + * or if \a w is strictly less-than the stored weight for \a key according to \a sr. + * + * \returns TRUE if the mapping was updated, otherwise FALSE. + */ +GFSM_INLINE +gboolean gfsm_weighthash_insert_if_less(gfsmWeightHash *wh, gconstpointer key, gfsmWeight w, gfsmSemiring *sr); + +/** Possibly insert a key->weight mapping into the weighthash + * The mapping \a (key=>w) is inserted if no mapping for \a key exists in \a wh. + * Otherwise, the stored weight \a (stored_w) for \a key is set to \a (w+stored_w) + * just in case \a (w+stored_w) is strictly less than \a stored_w for \a key according to \a sr. + * + * \returns TRUE if the mapping was updated, otherwise FALSE. + */ +GFSM_INLINE +gboolean gfsm_weighthash_insert_sum_if_less(gfsmWeightHash *wh, gconstpointer key, gfsmWeight w, gfsmSemiring *sr); + +/** Traversal (see g_hash_table_foreach) */ +#define gfsm_weighthash_foreach(wh,func,data) \ + g_hash_table_foreach((wh)->table,(func),(data)) + +//@} + +//-- inline definitions +#ifdef GFSM_INLINE_ENABLED +# include <gfsmWeightMap.hi> +#endif + +#endif /* _GFSM_WEIGHTMAP_H */ diff --git a/gfsm/gfsm/src/libgfsm/gfsmWeightMap.hi b/gfsm/gfsm/src/libgfsm/gfsmWeightMap.hi new file mode 100644 index 0000000..1361bb8 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gfsmWeightMap.hi @@ -0,0 +1,232 @@ +/*=============================================================================*\ + * File: gfsmWeightmap.hi + * Author: Bryan Jurish <moocow@ling.uni-potsdam.de> + * Description: finite state machine library: weight maps: inlined definitions + * + * Copyright (c) 2004-2007 Bryan Jurish. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *=============================================================================*/ + +#include <gfsmUtils.h> + +/*====================================================================== + * gfsmWeight <-> gpointer conversions + */ +GFSM_INLINE +gfsmWeight gfsm_ptr2weight(const gpointer p) +{ + gfsmWeightOrPointer wp; + wp.p=p; + return wp.w; +} + +GFSM_INLINE +gpointer gfsm_weight2ptr(const gfsmWeight w) +{ + gfsmWeightOrPointer wp; + wp.p=0; + wp.w=w; + return wp.p; +} + + +/*====================================================================== + * gfsmWeightMap: Constructors etc. + */ + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmWeightMap *gfsm_weightmap_new_full(GCompareDataFunc key_cmp_func, + gpointer key_cmp_data, + GDestroyNotify key_free_func) +{ + return g_tree_new_full(key_cmp_func,key_cmp_data,key_free_func,NULL); +} + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmWeightMap *gfsm_weightmap_new(GCompareFunc key_cmp_func) +{ + return g_tree_new(key_cmp_func); +} + +//-------------------------------------------------------------- +GFSM_INLINE +gfsmWeightMap *gfsm_weightmap_copy(gfsmWeightMap *dst, gfsmWeightMap *src) +{ + return gfsm_set_copy(dst,src); +} + +//-------------------------------------------------------------- +GFSM_INLINE +void gfsm_weightmap_clear(gfsmWeightMap *wm) +{ gfsm_set_clear(wm); } + +//-------------------------------------------------------------- +GFSM_INLINE +void gfsm_weightmap_free(gfsmWeightMap *wm) +{ + gfsm_set_free(wm); +} + +/*====================================================================== + * gfsmWeightMap: Accessors + */ + + +//-------------------------------------------------------------- +GFSM_INLINE +gboolean gfsm_weightmap_contains(gfsmWeightMap *weightmap, gconstpointer key) +{ + gfsmWeight w; + return gfsm_weightmap_lookup(weightmap, key, &w); +} + +//-------------------------------------------------------------- +GFSM_INLINE +gboolean gfsm_weightmap_lookup(gfsmWeightMap *weightmap, gconstpointer key, gfsmWeight *wp) +{ + gpointer orig_key, orig_value; + if (g_tree_lookup_extended(weightmap, key, &orig_key, &orig_value)) { + *wp = gfsm_ptr2weight(orig_value); + return TRUE; + } + return FALSE; +} + +//-------------------------------------------------------------- +GFSM_INLINE +void gfsm_weightmap_insert(gfsmWeightMap *weightmap, gconstpointer key, gfsmWeight w) +{ + g_tree_insert(weightmap, (gpointer)key, gfsm_weight2ptr(w)); +} + +/*====================================================================== + * gfsmWeighHash: Constructors etc. + */ +//-------------------------------------------------------------- +GFSM_INLINE +gfsmWeightHash *gfsm_weighthash_new_full(gfsmDupFunc key_dup_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GDestroyNotify key_destroy_func) +{ + gfsmWeightHash *wh = g_new(gfsmWeightHash,1); + wh->table = g_hash_table_new_full(key_hash_func, key_equal_func, key_destroy_func, NULL); + wh->key_dup = key_dup_func; + return wh; +} + + +//-------------------------------------------------------------- +GFSM_INLINE +void gfsm_weighthash_clear(gfsmWeightHash *wh) +{ + g_hash_table_foreach_remove(wh->table, gfsm_hash_clear_func,NULL); +} + +//-------------------------------------------------------------- +GFSM_INLINE +void gfsm_weighthash_free(gfsmWeightHash *wh) +{ + g_hash_table_destroy(wh->table); + g_free(wh); +} + + +/*====================================================================== + * gfsmWeightHash: Accessors + */ + +//-------------------------------------------------------------- +GFSM_INLINE +gboolean gfsm_weighthash_lookup(gfsmWeightHash *wh, gconstpointer key, gfsmWeight *wp) +{ + gpointer s_key, s_val; + if (g_hash_table_lookup_extended(wh->table, key, &s_key, &s_val)) { + *wp = gfsm_ptr2weight(s_val); + return TRUE; + } + return FALSE; +} + +//-------------------------------------------------------------- +GFSM_INLINE +void gfsm_weighthash_insert(gfsmWeightHash *wh, gconstpointer key, gfsmWeight w) +{ + gpointer s_key, s_val; + if (wh->key_dup && g_hash_table_lookup_extended(wh->table, key, &s_key, &s_val)) { + //-- already present: steal & replace + g_hash_table_steal(wh->table, s_key); + g_hash_table_insert(wh->table, s_key, gfsm_weight2ptr(w)); + } + else { + //-- not yet present: insert new mapping + if (wh->key_dup) s_key = (*(wh->key_dup))(key); + else s_key = (gpointer)key; + g_hash_table_insert(wh->table, s_key, gfsm_weight2ptr(w)); + } +} + +//-------------------------------------------------------------- +GFSM_INLINE +gboolean gfsm_weighthash_insert_if_less(gfsmWeightHash *wh, gconstpointer key, gfsmWeight w, gfsmSemiring *sr) +{ + gpointer s_key, s_val; + if (wh->key_dup && g_hash_table_lookup_extended(wh->table, key, &s_key, &s_val)) { + //-- already present + gfsmWeight s_w = gfsm_ptr2weight(s_val); + if (!gfsm_sr_less(sr, w, s_w)) return FALSE; //-- (s_w) <= (w) : no update required + + //-- steal & update + g_hash_table_steal(wh->table, s_key); + g_hash_table_insert(wh->table, s_key, gfsm_weight2ptr(w)); + } + else { + //-- not yet present: insert new mapping + if (wh->key_dup) s_key = (*(wh->key_dup))(key); + else s_key = (gpointer)key; + g_hash_table_insert(wh->table, s_key, gfsm_weight2ptr(w)); + } + + return TRUE; //-- update performed +} + +//-------------------------------------------------------------- +GFSM_INLINE +gboolean gfsm_weighthash_insert_sum_if_less(gfsmWeightHash *wh, gconstpointer key, gfsmWeight w, gfsmSemiring *sr) +{ + gpointer s_key, s_val; + if (wh->key_dup && g_hash_table_lookup_extended(wh->table, key, &s_key, &s_val)) { + //-- already present + gfsmWeight s_w = gfsm_ptr2weight(s_val); + w = gfsm_sr_plus(sr,w,s_w); + if (!gfsm_sr_less(sr,w,s_w)) return FALSE; //-- (s_w) <= (w+s_w) : no update required + + //-- steal & update + g_hash_table_steal(wh->table, s_key); + g_hash_table_insert(wh->table, s_key, gfsm_weight2ptr(w)); + } + else { + //-- not yet present: insert new mapping + if (wh->key_dup) s_key = (*(wh->key_dup))(key); + else s_key = (gpointer)key; + g_hash_table_insert(wh->table, s_key, gfsm_weight2ptr(w)); + } + + return TRUE; //-- update performed +} + diff --git a/gfsm/gfsm/src/libgfsm/gnulib/.cvsignore b/gfsm/gfsm/src/libgfsm/gnulib/.cvsignore new file mode 100644 index 0000000..8ef01bb --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/.cvsignore @@ -0,0 +1,13 @@ +.*~ +*~ +*.o +*.lo +*.a +*.la +*.so + +Makefile +Makefile.in + +orig +.libs diff --git a/gfsm/gfsm/src/libgfsm/gnulib/Makefile.am b/gfsm/gfsm/src/libgfsm/gnulib/Makefile.am new file mode 100644 index 0000000..e879251 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/Makefile.am @@ -0,0 +1,70 @@ +## Process this file with automake to produce Makefile.in. +# Copyright (C) 2004 Free Software Foundation, Inc. +# +# This file is free software, distributed under the terms of the GNU +# General Public License. As a special exception to the GNU General +# Public License, this file may be distributed as part of a program +# that contains a configuration script generated by Automake, under +# the same distribution terms as the rest of that program. +# +# Generated by gnulib-tool. +# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=src/libgfsm/gnulib --m4-base=m4 --aux-dir=config --lgpl --libtool --macro-prefix=gl getdelim vasprintf + +##-- moo: cpp flags +AM_CPPFLAGS = -I. -I.. -I@srcdir@ -I@srcdir@/.. +AM_CFLAGS = @gfsm_OFLAGS@ @gfsm_WFLAGS@ + +AUTOMAKE_OPTIONS = 1.5 gnits no-dependencies + +noinst_LTLIBRARIES = libgnu.la + +libgnu_la_SOURCES = +libgnu_la_LIBADD = @LTLIBOBJS@ +EXTRA_DIST = +BUILT_SOURCES = +SUFFIXES = +MOSTLYCLEANFILES = +CLEANFILES = +DISTCLEANFILES = +MAINTAINERCLEANFILES = + +## begin gnulib module alloca-opt + +BUILT_SOURCES += $(ALLOCA_H) +EXTRA_DIST += alloca_.h + +# We need the following in order to create <alloca.h> when the system +# doesn't have one that works with the given compiler. +alloca.h: alloca_.h + cp $(srcdir)/alloca_.h $@-t + mv $@-t $@ +MOSTLYCLEANFILES += alloca.h alloca.h-t + +## end gnulib module alloca-opt + +## begin gnulib module size_max + +libgnu_la_SOURCES += size_max.h + +## end gnulib module size_max + +## begin gnulib module vasnprintf + +libgnu_la_SOURCES += printf-args.h printf-parse.h vasnprintf.h + +## end gnulib module vasnprintf + +## begin gnulib module vasprintf + +libgnu_la_SOURCES += vasprintf.h + +## end gnulib module vasprintf + +## begin gnulib module xsize + +libgnu_la_SOURCES += xsize.h + +## end gnulib module xsize + + +# Makefile.am ends here diff --git a/gfsm/gfsm/src/libgfsm/gnulib/Makefile.in b/gfsm/gfsm/src/libgfsm/gnulib/Makefile.in new file mode 100644 index 0000000..9b296a8 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/Makefile.in @@ -0,0 +1,555 @@ +# Makefile.in generated by automake 1.9.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright (C) 2004 Free Software Foundation, Inc. +# +# This file is free software, distributed under the terms of the GNU +# General Public License. As a special exception to the GNU General +# Public License, this file may be distributed as part of a program +# that contains a configuration script generated by Automake, under +# the same distribution terms as the rest of that program. +# +# Generated by gnulib-tool. +# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=src/libgfsm/gnulib --m4-base=m4 --aux-dir=config --lgpl --libtool --macro-prefix=gl getdelim vasprintf + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = ../../.. +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src/libgfsm/gnulib +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in asnprintf.c \ + asprintf.c getdelim.c getdelim.h printf-args.c printf-parse.c \ + vasnprintf.c vasprintf.c +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/alloca.m4 \ + $(top_srcdir)/m4/eoverflow.m4 $(top_srcdir)/m4/getdelim.m4 \ + $(top_srcdir)/m4/gnulib-comp.m4 $(top_srcdir)/m4/intmax_t.m4 \ + $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/longdouble.m4 \ + $(top_srcdir)/m4/longlong.m4 $(top_srcdir)/m4/onceonly_2_57.m4 \ + $(top_srcdir)/m4/signed.m4 $(top_srcdir)/m4/size_max.m4 \ + $(top_srcdir)/m4/stdint_h.m4 $(top_srcdir)/m4/vasnprintf.m4 \ + $(top_srcdir)/m4/vasprintf.m4 $(top_srcdir)/m4/wchar_t.m4 \ + $(top_srcdir)/m4/wint_t.m4 $(top_srcdir)/m4/xsize.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/libgfsm/gfsmConfigAuto.h +CONFIG_CLEAN_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libgnu_la_DEPENDENCIES = @LTLIBOBJS@ +am_libgnu_la_OBJECTS = +libgnu_la_OBJECTS = $(am_libgnu_la_OBJECTS) +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/src/libgfsm +depcomp = +am__depfiles_maybe = +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +SOURCES = $(libgnu_la_SOURCES) +DIST_SOURCES = $(libgnu_la_SOURCES) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALLOCA_H = @ALLOCA_H@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BINDIST_CPU = @BINDIST_CPU@ +BINDIST_OS = @BINDIST_OS@ +BINDIST_PKGNAME = @BINDIST_PKGNAME@ +BINDIST_RELEASE = @BINDIST_RELEASE@ +BISON = @BISON@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CONFIG_DOC_WANT_DVI = @CONFIG_DOC_WANT_DVI@ +CONFIG_DOC_WANT_HTML = @CONFIG_DOC_WANT_HTML@ +CONFIG_DOC_WANT_LATEX = @CONFIG_DOC_WANT_LATEX@ +CONFIG_DOC_WANT_MAN = @CONFIG_DOC_WANT_MAN@ +CONFIG_DOC_WANT_PDF = @CONFIG_DOC_WANT_PDF@ +CONFIG_DOC_WANT_PS = @CONFIG_DOC_WANT_PS@ +CONFIG_DOC_WANT_TXT = @CONFIG_DOC_WANT_TXT@ +CONFIG_OPTIONS = @CONFIG_OPTIONS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DISTCHECK = @DISTCHECK@ +DOC_GOGS = @DOC_GOGS@ +DOC_MAN1_GOGS = @DOC_MAN1_GOGS@ +DOC_MAN5_PODS = @DOC_MAN5_PODS@ +DOC_PODS = @DOC_PODS@ +DOC_PROG_INDEX_SKEL = @DOC_PROG_INDEX_SKEL@ +DOC_WANT_DVI_FALSE = @DOC_WANT_DVI_FALSE@ +DOC_WANT_DVI_TRUE = @DOC_WANT_DVI_TRUE@ +DOC_WANT_HTML_FALSE = @DOC_WANT_HTML_FALSE@ +DOC_WANT_HTML_TRUE = @DOC_WANT_HTML_TRUE@ +DOC_WANT_LATEX_FALSE = @DOC_WANT_LATEX_FALSE@ +DOC_WANT_LATEX_TRUE = @DOC_WANT_LATEX_TRUE@ +DOC_WANT_MAN_FALSE = @DOC_WANT_MAN_FALSE@ +DOC_WANT_MAN_TRUE = @DOC_WANT_MAN_TRUE@ +DOC_WANT_PDF_FALSE = @DOC_WANT_PDF_FALSE@ +DOC_WANT_PDF_TRUE = @DOC_WANT_PDF_TRUE@ +DOC_WANT_PS_FALSE = @DOC_WANT_PS_FALSE@ +DOC_WANT_PS_TRUE = @DOC_WANT_PS_TRUE@ +DOC_WANT_TXT_FALSE = @DOC_WANT_TXT_FALSE@ +DOC_WANT_TXT_TRUE = @DOC_WANT_TXT_TRUE@ +DOXYGEN = @DOXYGEN@ +DOXYGEN_SOURCES = @DOXYGEN_SOURCES@ +DOXY_DEFINES = @DOXY_DEFINES@ +DOXY_FILTER = @DOXY_FILTER@ +DOXY_INPUT_FILTER = @DOXY_INPUT_FILTER@ +DOXY_TAGFILES = @DOXY_TAGFILES@ +DOXY_WANT_HTML = @DOXY_WANT_HTML@ +DOXY_WANT_LATEX = @DOXY_WANT_LATEX@ +DOXY_WANT_MAN = @DOXY_WANT_MAN@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EOVERFLOW = @EOVERFLOW@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FLEX = @FLEX@ +GFSM_SRC_SUBDIRS = @GFSM_SRC_SUBDIRS@ +GFSM_SUBDIRS = @GFSM_SUBDIRS@ +GFSM_VERSION_MAJOR = @GFSM_VERSION_MAJOR@ +GFSM_VERSION_MICRO = @GFSM_VERSION_MICRO@ +GFSM_VERSION_MINOR = @GFSM_VERSION_MINOR@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_LIBS = @GLIB_LIBS@ +GREP = @GREP@ +HAVE_BISON_FALSE = @HAVE_BISON_FALSE@ +HAVE_BISON_TRUE = @HAVE_BISON_TRUE@ +HAVE_DOXYGEN_FALSE = @HAVE_DOXYGEN_FALSE@ +HAVE_DOXYGEN_TRUE = @HAVE_DOXYGEN_TRUE@ +HAVE_DOXY_FILTER_FALSE = @HAVE_DOXY_FILTER_FALSE@ +HAVE_DOXY_FILTER_TRUE = @HAVE_DOXY_FILTER_TRUE@ +HAVE_FLEX_FALSE = @HAVE_FLEX_FALSE@ +HAVE_FLEX_TRUE = @HAVE_FLEX_TRUE@ +HAVE_OPTGEN_FALSE = @HAVE_OPTGEN_FALSE@ +HAVE_OPTGEN_TRUE = @HAVE_OPTGEN_TRUE@ +HAVE_POD2HTML_FALSE = @HAVE_POD2HTML_FALSE@ +HAVE_POD2HTML_TRUE = @HAVE_POD2HTML_TRUE@ +HAVE_POD2LATEX_FALSE = @HAVE_POD2LATEX_FALSE@ +HAVE_POD2LATEX_TRUE = @HAVE_POD2LATEX_TRUE@ +HAVE_POD2MAN_FALSE = @HAVE_POD2MAN_FALSE@ +HAVE_POD2MAN_TRUE = @HAVE_POD2MAN_TRUE@ +HAVE_POD2TEXT_FALSE = @HAVE_POD2TEXT_FALSE@ +HAVE_POD2TEXT_TRUE = @HAVE_POD2TEXT_TRUE@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +OBJEXT = @OBJEXT@ +OPTGEN_PERL = @OPTGEN_PERL@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POD2HTML = @POD2HTML@ +POD2LATEX = @POD2LATEX@ +POD2MAN = @POD2MAN@ +POD2TEXT = @POD2TEXT@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +gfsm_LIBS = @gfsm_LIBS@ +gfsm_OFLAGS = @gfsm_OFLAGS@ +gfsm_WFLAGS = @gfsm_WFLAGS@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pkgconfigdir = @pkgconfigdir@ +pkgdocdir = @pkgdocdir@ +pkgdoclibdir = @pkgdoclibdir@ +pkgdocprogdir = @pkgdocprogdir@ +pkgdoctutdir = @pkgdoctutdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +spkgincludedir = @spkgincludedir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +AM_CPPFLAGS = -I. -I.. -I@srcdir@ -I@srcdir@/.. +AM_CFLAGS = @gfsm_OFLAGS@ @gfsm_WFLAGS@ +AUTOMAKE_OPTIONS = 1.5 gnits no-dependencies +noinst_LTLIBRARIES = libgnu.la +libgnu_la_SOURCES = size_max.h printf-args.h printf-parse.h \ + vasnprintf.h vasprintf.h xsize.h +libgnu_la_LIBADD = @LTLIBOBJS@ +EXTRA_DIST = alloca_.h +BUILT_SOURCES = $(ALLOCA_H) +SUFFIXES = +MOSTLYCLEANFILES = alloca.h alloca.h-t +CLEANFILES = +DISTCLEANFILES = +MAINTAINERCLEANFILES = +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnits src/libgfsm/gnulib/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnits src/libgfsm/gnulib/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libgnu.la: $(libgnu_la_OBJECTS) $(libgnu_la_DEPENDENCIES) + $(LINK) $(libgnu_la_LDFLAGS) $(libgnu_la_OBJECTS) $(libgnu_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +.c.o: + $(COMPILE) -c $< + +.c.obj: + $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: + $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkdir_p) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-info-am + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstLTLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-exec \ + install-exec-am install-info install-info-am install-man \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags uninstall uninstall-am \ + uninstall-info-am + + +# We need the following in order to create <alloca.h> when the system +# doesn't have one that works with the given compiler. +alloca.h: alloca_.h + cp $(srcdir)/alloca_.h $@-t + mv $@-t $@ + +# Makefile.am ends here +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/gfsm/gfsm/src/libgfsm/gnulib/alloca_.h b/gfsm/gfsm/src/libgfsm/gnulib/alloca_.h new file mode 100644 index 0000000..65a40ff --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/alloca_.h @@ -0,0 +1,52 @@ +/* Memory allocation on the stack. + + Copyright (C) 1995, 1999, 2001, 2002, 2003, 2004 Free Software + Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + +/* Avoid using the symbol _ALLOCA_H here, as Bison assumes _ALLOCA_H + means there is a real alloca function. */ +#ifndef _GNULIB_ALLOCA_H +# define _GNULIB_ALLOCA_H + +/* alloca (N) returns a pointer to N bytes of memory + allocated on the stack, which will last until the function returns. + Use of alloca should be avoided: + - inside arguments of function calls - undefined behaviour, + - in inline functions - the allocation may actually last until the + calling function returns, + - for huge N (say, N >= 65536) - you never know how large (or small) + the stack is, and when the stack cannot fulfill the memory allocation + request, the program just crashes. + */ + +#ifdef __GNUC__ +# define alloca __builtin_alloca +#elif defined _AIX +# define alloca __alloca +#elif defined _MSC_VER +# include <malloc.h> +# define alloca _alloca +#else +# include <stddef.h> +# ifdef __cplusplus +extern "C" +# endif +void *alloca (size_t); +#endif + +#endif /* _GNULIB_ALLOCA_H */ diff --git a/gfsm/gfsm/src/libgfsm/gnulib/asnprintf.c b/gfsm/gfsm/src/libgfsm/gnulib/asnprintf.c new file mode 100644 index 0000000..ddc2cd3 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/asnprintf.c @@ -0,0 +1,37 @@ +/* Formatted output to strings. + Copyright (C) 1999, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "vasnprintf.h" + +#include <stdarg.h> + +char * +asnprintf (char *resultbuf, size_t *lengthp, const char *format, ...) +{ + va_list args; + char *result; + + va_start (args, format); + result = vasnprintf (resultbuf, lengthp, format, args); + va_end (args); + return result; +} diff --git a/gfsm/gfsm/src/libgfsm/gnulib/asprintf.c b/gfsm/gfsm/src/libgfsm/gnulib/asprintf.c new file mode 100644 index 0000000..71466b3 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/asprintf.c @@ -0,0 +1,37 @@ +/* Formatted output to strings. + Copyright (C) 1999, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "vasprintf.h" + +#include <stdarg.h> + +int +asprintf (char **resultp, const char *format, ...) +{ + va_list args; + int result; + + va_start (args, format); + result = vasprintf (resultp, format, args); + va_end (args); + return result; +} diff --git a/gfsm/gfsm/src/libgfsm/gnulib/getdelim.c b/gfsm/gfsm/src/libgfsm/gnulib/getdelim.c new file mode 100644 index 0000000..ab9a4eb --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/getdelim.c @@ -0,0 +1,128 @@ +/* getdelim.c --- Implementation of replacement getdelim function. + Copyright (C) 1994, 1996, 1997, 1998, 2001, 2003, 2005 Free + Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Ported from glibc by Simon Josefsson. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "getdelim.h" + +#include <limits.h> +#include <stdlib.h> +#include <errno.h> + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif +#ifndef SSIZE_MAX +# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) +#endif +#if !HAVE_FLOCKFILE +# undef flockfile +# define flockfile(x) ((void) 0) +#endif +#if !HAVE_FUNLOCKFILE +# undef funlockfile +# define funlockfile(x) ((void) 0) +#endif + +/* Read up to (and including) a DELIMITER from FP into *LINEPTR (and + NUL-terminate it). *LINEPTR is a pointer returned from malloc (or + NULL), pointing to *N characters of space. It is realloc'ed as + necessary. Returns the number of characters read (not including + the null terminator), or -1 on error or EOF. */ + +ssize_t +getdelim (char **lineptr, size_t *n, int delimiter, FILE *fp) +{ + ssize_t result; + size_t cur_len = 0; + + if (lineptr == NULL || n == NULL || fp == NULL) + { + errno = EINVAL; + return -1; + } + + flockfile (fp); + + if (*lineptr == NULL || *n == 0) + { + *n = 120; + *lineptr = (char *) malloc (*n); + if (*lineptr == NULL) + { + result = -1; + goto unlock_return; + } + } + + for (;;) + { + int i; + + i = getc (fp); + if (i == EOF) + { + result = -1; + break; + } + + /* Make enough space for len+1 (for final NUL) bytes. */ + if (cur_len + 1 >= *n) + { + size_t needed_max = + SSIZE_MAX < SIZE_MAX ? (size_t) SSIZE_MAX + 1 : SIZE_MAX; + size_t needed = 2 * *n + 1; /* Be generous. */ + char *new_lineptr; + + if (needed_max < needed) + needed = needed_max; + if (cur_len + 1 >= needed) + { + result = -1; + goto unlock_return; + } + + new_lineptr = (char *) realloc (*lineptr, needed); + if (new_lineptr == NULL) + { + result = -1; + goto unlock_return; + } + + *lineptr = new_lineptr; + *n = needed; + } + + (*lineptr)[cur_len] = i; + cur_len++; + + if (i == delimiter) + break; + } + (*lineptr)[cur_len] = '\0'; + result = cur_len ? cur_len : result; + + unlock_return: + funlockfile (fp); + return result; +} diff --git a/gfsm/gfsm/src/libgfsm/gnulib/getdelim.h b/gfsm/gfsm/src/libgfsm/gnulib/getdelim.h new file mode 100644 index 0000000..9e6c931 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/getdelim.h @@ -0,0 +1,28 @@ +/* getdelim.h --- Prototype for replacement getdelim function. + Copyright (C) 2005 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Written by Simon Josefsson. */ + +/* Get size_t, FILE, ssize_t. And getdelim, if available. */ +# include <stddef.h> +# include <stdio.h> +# include <sys/types.h> + +#if !HAVE_DECL_GETDELIM +ssize_t getdelim (char **lineptr, size_t *n, int delimiter, FILE *stream); +#endif /* !HAVE_GETDELIM */ diff --git a/gfsm/gfsm/src/libgfsm/gnulib/printf-args.c b/gfsm/gfsm/src/libgfsm/gnulib/printf-args.c new file mode 100644 index 0000000..ba4b8b3 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/printf-args.c @@ -0,0 +1,118 @@ +/* Decomposed printf argument list. + Copyright (C) 1999, 2002-2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "printf-args.h" + +#ifdef STATIC +STATIC +#endif +int +printf_fetchargs (va_list args, arguments *a) +{ + size_t i; + argument *ap; + + for (i = 0, ap = &a->arg[0]; i < a->count; i++, ap++) + switch (ap->type) + { + case TYPE_SCHAR: + ap->a.a_schar = va_arg (args, /*signed char*/ int); + break; + case TYPE_UCHAR: + ap->a.a_uchar = va_arg (args, /*unsigned char*/ int); + break; + case TYPE_SHORT: + ap->a.a_short = va_arg (args, /*short*/ int); + break; + case TYPE_USHORT: + ap->a.a_ushort = va_arg (args, /*unsigned short*/ int); + break; + case TYPE_INT: + ap->a.a_int = va_arg (args, int); + break; + case TYPE_UINT: + ap->a.a_uint = va_arg (args, unsigned int); + break; + case TYPE_LONGINT: + ap->a.a_longint = va_arg (args, long int); + break; + case TYPE_ULONGINT: + ap->a.a_ulongint = va_arg (args, unsigned long int); + break; +#ifdef HAVE_LONG_LONG + case TYPE_LONGLONGINT: + ap->a.a_longlongint = va_arg (args, long long int); + break; + case TYPE_ULONGLONGINT: + ap->a.a_ulonglongint = va_arg (args, unsigned long long int); + break; +#endif + case TYPE_DOUBLE: + ap->a.a_double = va_arg (args, double); + break; +#ifdef HAVE_LONG_DOUBLE + case TYPE_LONGDOUBLE: + ap->a.a_longdouble = va_arg (args, long double); + break; +#endif + case TYPE_CHAR: + ap->a.a_char = va_arg (args, int); + break; +#ifdef HAVE_WINT_T + case TYPE_WIDE_CHAR: + ap->a.a_wide_char = va_arg (args, wint_t); + break; +#endif + case TYPE_STRING: + ap->a.a_string = va_arg (args, const char *); + break; +#ifdef HAVE_WCHAR_T + case TYPE_WIDE_STRING: + ap->a.a_wide_string = va_arg (args, const wchar_t *); + break; +#endif + case TYPE_POINTER: + ap->a.a_pointer = va_arg (args, void *); + break; + case TYPE_COUNT_SCHAR_POINTER: + ap->a.a_count_schar_pointer = va_arg (args, signed char *); + break; + case TYPE_COUNT_SHORT_POINTER: + ap->a.a_count_short_pointer = va_arg (args, short *); + break; + case TYPE_COUNT_INT_POINTER: + ap->a.a_count_int_pointer = va_arg (args, int *); + break; + case TYPE_COUNT_LONGINT_POINTER: + ap->a.a_count_longint_pointer = va_arg (args, long int *); + break; +#ifdef HAVE_LONG_LONG + case TYPE_COUNT_LONGLONGINT_POINTER: + ap->a.a_count_longlongint_pointer = va_arg (args, long long int *); + break; +#endif + default: + /* Unknown type. */ + return -1; + } + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/gnulib/printf-args.h b/gfsm/gfsm/src/libgfsm/gnulib/printf-args.h new file mode 100644 index 0000000..3806246 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/printf-args.h @@ -0,0 +1,136 @@ +/* Decomposed printf argument list. + Copyright (C) 1999, 2002-2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _PRINTF_ARGS_H +#define _PRINTF_ARGS_H + +/* Get size_t. */ +#include <stddef.h> + +/* Get wchar_t. */ +#ifdef HAVE_WCHAR_T +# include <stddef.h> +#endif + +/* Get wint_t. */ +#ifdef HAVE_WINT_T +# include <wchar.h> +#endif + +/* Get va_list. */ +#include <stdarg.h> + + +/* Argument types */ +typedef enum +{ + TYPE_NONE, + TYPE_SCHAR, + TYPE_UCHAR, + TYPE_SHORT, + TYPE_USHORT, + TYPE_INT, + TYPE_UINT, + TYPE_LONGINT, + TYPE_ULONGINT, +#ifdef HAVE_LONG_LONG + TYPE_LONGLONGINT, + TYPE_ULONGLONGINT, +#endif + TYPE_DOUBLE, +#ifdef HAVE_LONG_DOUBLE + TYPE_LONGDOUBLE, +#endif + TYPE_CHAR, +#ifdef HAVE_WINT_T + TYPE_WIDE_CHAR, +#endif + TYPE_STRING, +#ifdef HAVE_WCHAR_T + TYPE_WIDE_STRING, +#endif + TYPE_POINTER, + TYPE_COUNT_SCHAR_POINTER, + TYPE_COUNT_SHORT_POINTER, + TYPE_COUNT_INT_POINTER, + TYPE_COUNT_LONGINT_POINTER +#ifdef HAVE_LONG_LONG +, TYPE_COUNT_LONGLONGINT_POINTER +#endif +} arg_type; + +/* Polymorphic argument */ +typedef struct +{ + arg_type type; + union + { + signed char a_schar; + unsigned char a_uchar; + short a_short; + unsigned short a_ushort; + int a_int; + unsigned int a_uint; + long int a_longint; + unsigned long int a_ulongint; +#ifdef HAVE_LONG_LONG + long long int a_longlongint; + unsigned long long int a_ulonglongint; +#endif + float a_float; + double a_double; +#ifdef HAVE_LONG_DOUBLE + long double a_longdouble; +#endif + int a_char; +#ifdef HAVE_WINT_T + wint_t a_wide_char; +#endif + const char* a_string; +#ifdef HAVE_WCHAR_T + const wchar_t* a_wide_string; +#endif + void* a_pointer; + signed char * a_count_schar_pointer; + short * a_count_short_pointer; + int * a_count_int_pointer; + long int * a_count_longint_pointer; +#ifdef HAVE_LONG_LONG + long long int * a_count_longlongint_pointer; +#endif + } + a; +} +argument; + +typedef struct +{ + size_t count; + argument *arg; +} +arguments; + + +/* Fetch the arguments, putting them into a. */ +#ifdef STATIC +STATIC +#else +extern +#endif +int printf_fetchargs (va_list args, arguments *a); + +#endif /* _PRINTF_ARGS_H */ diff --git a/gfsm/gfsm/src/libgfsm/gnulib/printf-parse.c b/gfsm/gfsm/src/libgfsm/gnulib/printf-parse.c new file mode 100644 index 0000000..afd6247 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/printf-parse.c @@ -0,0 +1,536 @@ +/* Formatted output to strings. + Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#if WIDE_CHAR_VERSION +# include "wprintf-parse.h" +#else +# include "printf-parse.h" +#endif + +/* Get size_t, NULL. */ +#include <stddef.h> + +/* Get intmax_t. */ +#if HAVE_STDINT_H_WITH_UINTMAX +# include <stdint.h> +#endif +#if HAVE_INTTYPES_H_WITH_UINTMAX +# include <inttypes.h> +#endif + +/* malloc(), realloc(), free(). */ +#include <stdlib.h> + +/* Checked size_t computations. */ +#include "xsize.h" + +#if WIDE_CHAR_VERSION +# define PRINTF_PARSE wprintf_parse +# define CHAR_T wchar_t +# define DIRECTIVE wchar_t_directive +# define DIRECTIVES wchar_t_directives +#else +# define PRINTF_PARSE printf_parse +# define CHAR_T char +# define DIRECTIVE char_directive +# define DIRECTIVES char_directives +#endif + +#ifdef STATIC +STATIC +#endif +int +PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a) +{ + const CHAR_T *cp = format; /* pointer into format */ + size_t arg_posn = 0; /* number of regular arguments consumed */ + size_t d_allocated; /* allocated elements of d->dir */ + size_t a_allocated; /* allocated elements of a->arg */ + size_t max_width_length = 0; + size_t max_precision_length = 0; + + d->count = 0; + d_allocated = 1; + d->dir = malloc (d_allocated * sizeof (DIRECTIVE)); + if (d->dir == NULL) + /* Out of memory. */ + return -1; + + a->count = 0; + a_allocated = 0; + a->arg = NULL; + +#define REGISTER_ARG(_index_,_type_) \ + { \ + size_t n = (_index_); \ + if (n >= a_allocated) \ + { \ + size_t memory_size; \ + argument *memory; \ + \ + a_allocated = xtimes (a_allocated, 2); \ + if (a_allocated <= n) \ + a_allocated = xsum (n, 1); \ + memory_size = xtimes (a_allocated, sizeof (argument)); \ + if (size_overflow_p (memory_size)) \ + /* Overflow, would lead to out of memory. */ \ + goto error; \ + memory = (a->arg \ + ? realloc (a->arg, memory_size) \ + : malloc (memory_size)); \ + if (memory == NULL) \ + /* Out of memory. */ \ + goto error; \ + a->arg = memory; \ + } \ + while (a->count <= n) \ + a->arg[a->count++].type = TYPE_NONE; \ + if (a->arg[n].type == TYPE_NONE) \ + a->arg[n].type = (_type_); \ + else if (a->arg[n].type != (_type_)) \ + /* Ambiguous type for positional argument. */ \ + goto error; \ + } + + while (*cp != '\0') + { + CHAR_T c = *cp++; + if (c == '%') + { + size_t arg_index = ARG_NONE; + DIRECTIVE *dp = &d->dir[d->count];/* pointer to next directive */ + + /* Initialize the next directive. */ + dp->dir_start = cp - 1; + dp->flags = 0; + dp->width_start = NULL; + dp->width_end = NULL; + dp->width_arg_index = ARG_NONE; + dp->precision_start = NULL; + dp->precision_end = NULL; + dp->precision_arg_index = ARG_NONE; + dp->arg_index = ARG_NONE; + + /* Test for positional argument. */ + if (*cp >= '0' && *cp <= '9') + { + const CHAR_T *np; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + ; + if (*np == '$') + { + size_t n = 0; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + n = xsum (xtimes (n, 10), *np - '0'); + if (n == 0) + /* Positional argument 0. */ + goto error; + if (size_overflow_p (n)) + /* n too large, would lead to out of memory later. */ + goto error; + arg_index = n - 1; + cp = np + 1; + } + } + + /* Read the flags. */ + for (;;) + { + if (*cp == '\'') + { + dp->flags |= FLAG_GROUP; + cp++; + } + else if (*cp == '-') + { + dp->flags |= FLAG_LEFT; + cp++; + } + else if (*cp == '+') + { + dp->flags |= FLAG_SHOWSIGN; + cp++; + } + else if (*cp == ' ') + { + dp->flags |= FLAG_SPACE; + cp++; + } + else if (*cp == '#') + { + dp->flags |= FLAG_ALT; + cp++; + } + else if (*cp == '0') + { + dp->flags |= FLAG_ZERO; + cp++; + } + else + break; + } + + /* Parse the field width. */ + if (*cp == '*') + { + dp->width_start = cp; + cp++; + dp->width_end = cp; + if (max_width_length < 1) + max_width_length = 1; + + /* Test for positional argument. */ + if (*cp >= '0' && *cp <= '9') + { + const CHAR_T *np; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + ; + if (*np == '$') + { + size_t n = 0; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + n = xsum (xtimes (n, 10), *np - '0'); + if (n == 0) + /* Positional argument 0. */ + goto error; + if (size_overflow_p (n)) + /* n too large, would lead to out of memory later. */ + goto error; + dp->width_arg_index = n - 1; + cp = np + 1; + } + } + if (dp->width_arg_index == ARG_NONE) + { + dp->width_arg_index = arg_posn++; + if (dp->width_arg_index == ARG_NONE) + /* arg_posn wrapped around. */ + goto error; + } + REGISTER_ARG (dp->width_arg_index, TYPE_INT); + } + else if (*cp >= '0' && *cp <= '9') + { + size_t width_length; + + dp->width_start = cp; + for (; *cp >= '0' && *cp <= '9'; cp++) + ; + dp->width_end = cp; + width_length = dp->width_end - dp->width_start; + if (max_width_length < width_length) + max_width_length = width_length; + } + + /* Parse the precision. */ + if (*cp == '.') + { + cp++; + if (*cp == '*') + { + dp->precision_start = cp - 1; + cp++; + dp->precision_end = cp; + if (max_precision_length < 2) + max_precision_length = 2; + + /* Test for positional argument. */ + if (*cp >= '0' && *cp <= '9') + { + const CHAR_T *np; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + ; + if (*np == '$') + { + size_t n = 0; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + n = xsum (xtimes (n, 10), *np - '0'); + if (n == 0) + /* Positional argument 0. */ + goto error; + if (size_overflow_p (n)) + /* n too large, would lead to out of memory + later. */ + goto error; + dp->precision_arg_index = n - 1; + cp = np + 1; + } + } + if (dp->precision_arg_index == ARG_NONE) + { + dp->precision_arg_index = arg_posn++; + if (dp->precision_arg_index == ARG_NONE) + /* arg_posn wrapped around. */ + goto error; + } + REGISTER_ARG (dp->precision_arg_index, TYPE_INT); + } + else + { + size_t precision_length; + + dp->precision_start = cp - 1; + for (; *cp >= '0' && *cp <= '9'; cp++) + ; + dp->precision_end = cp; + precision_length = dp->precision_end - dp->precision_start; + if (max_precision_length < precision_length) + max_precision_length = precision_length; + } + } + + { + arg_type type; + + /* Parse argument type/size specifiers. */ + { + int flags = 0; + + for (;;) + { + if (*cp == 'h') + { + flags |= (1 << (flags & 1)); + cp++; + } + else if (*cp == 'L') + { + flags |= 4; + cp++; + } + else if (*cp == 'l') + { + flags += 8; + cp++; + } +#ifdef HAVE_INTMAX_T + else if (*cp == 'j') + { + if (sizeof (intmax_t) > sizeof (long)) + { + /* intmax_t = long long */ + flags += 16; + } + else if (sizeof (intmax_t) > sizeof (int)) + { + /* intmax_t = long */ + flags += 8; + } + cp++; + } +#endif + else if (*cp == 'z' || *cp == 'Z') + { + /* 'z' is standardized in ISO C 99, but glibc uses 'Z' + because the warning facility in gcc-2.95.2 understands + only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */ + if (sizeof (size_t) > sizeof (long)) + { + /* size_t = long long */ + flags += 16; + } + else if (sizeof (size_t) > sizeof (int)) + { + /* size_t = long */ + flags += 8; + } + cp++; + } + else if (*cp == 't') + { + if (sizeof (ptrdiff_t) > sizeof (long)) + { + /* ptrdiff_t = long long */ + flags += 16; + } + else if (sizeof (ptrdiff_t) > sizeof (int)) + { + /* ptrdiff_t = long */ + flags += 8; + } + cp++; + } + else + break; + } + + /* Read the conversion character. */ + c = *cp++; + switch (c) + { + case 'd': case 'i': +#ifdef HAVE_LONG_LONG + if (flags >= 16 || (flags & 4)) + type = TYPE_LONGLONGINT; + else +#endif + if (flags >= 8) + type = TYPE_LONGINT; + else if (flags & 2) + type = TYPE_SCHAR; + else if (flags & 1) + type = TYPE_SHORT; + else + type = TYPE_INT; + break; + case 'o': case 'u': case 'x': case 'X': +#ifdef HAVE_LONG_LONG + if (flags >= 16 || (flags & 4)) + type = TYPE_ULONGLONGINT; + else +#endif + if (flags >= 8) + type = TYPE_ULONGINT; + else if (flags & 2) + type = TYPE_UCHAR; + else if (flags & 1) + type = TYPE_USHORT; + else + type = TYPE_UINT; + break; + case 'f': case 'F': case 'e': case 'E': case 'g': case 'G': + case 'a': case 'A': +#ifdef HAVE_LONG_DOUBLE + if (flags >= 16 || (flags & 4)) + type = TYPE_LONGDOUBLE; + else +#endif + type = TYPE_DOUBLE; + break; + case 'c': + if (flags >= 8) +#ifdef HAVE_WINT_T + type = TYPE_WIDE_CHAR; +#else + goto error; +#endif + else + type = TYPE_CHAR; + break; +#ifdef HAVE_WINT_T + case 'C': + type = TYPE_WIDE_CHAR; + c = 'c'; + break; +#endif + case 's': + if (flags >= 8) +#ifdef HAVE_WCHAR_T + type = TYPE_WIDE_STRING; +#else + goto error; +#endif + else + type = TYPE_STRING; + break; +#ifdef HAVE_WCHAR_T + case 'S': + type = TYPE_WIDE_STRING; + c = 's'; + break; +#endif + case 'p': + type = TYPE_POINTER; + break; + case 'n': +#ifdef HAVE_LONG_LONG + if (flags >= 16 || (flags & 4)) + type = TYPE_COUNT_LONGLONGINT_POINTER; + else +#endif + if (flags >= 8) + type = TYPE_COUNT_LONGINT_POINTER; + else if (flags & 2) + type = TYPE_COUNT_SCHAR_POINTER; + else if (flags & 1) + type = TYPE_COUNT_SHORT_POINTER; + else + type = TYPE_COUNT_INT_POINTER; + break; + case '%': + type = TYPE_NONE; + break; + default: + /* Unknown conversion character. */ + goto error; + } + } + + if (type != TYPE_NONE) + { + dp->arg_index = arg_index; + if (dp->arg_index == ARG_NONE) + { + dp->arg_index = arg_posn++; + if (dp->arg_index == ARG_NONE) + /* arg_posn wrapped around. */ + goto error; + } + REGISTER_ARG (dp->arg_index, type); + } + dp->conversion = c; + dp->dir_end = cp; + } + + d->count++; + if (d->count >= d_allocated) + { + size_t memory_size; + DIRECTIVE *memory; + + d_allocated = xtimes (d_allocated, 2); + memory_size = xtimes (d_allocated, sizeof (DIRECTIVE)); + if (size_overflow_p (memory_size)) + /* Overflow, would lead to out of memory. */ + goto error; + memory = realloc (d->dir, memory_size); + if (memory == NULL) + /* Out of memory. */ + goto error; + d->dir = memory; + } + } + } + d->dir[d->count].dir_start = cp; + + d->max_width_length = max_width_length; + d->max_precision_length = max_precision_length; + return 0; + +error: + if (a->arg) + free (a->arg); + if (d->dir) + free (d->dir); + return -1; +} + +#undef DIRECTIVES +#undef DIRECTIVE +#undef CHAR_T +#undef PRINTF_PARSE diff --git a/gfsm/gfsm/src/libgfsm/gnulib/printf-parse.h b/gfsm/gfsm/src/libgfsm/gnulib/printf-parse.h new file mode 100644 index 0000000..6d254e4 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/printf-parse.h @@ -0,0 +1,74 @@ +/* Parse printf format string. + Copyright (C) 1999, 2002-2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _PRINTF_PARSE_H +#define _PRINTF_PARSE_H + +#include "printf-args.h" + + +/* Flags */ +#define FLAG_GROUP 1 /* ' flag */ +#define FLAG_LEFT 2 /* - flag */ +#define FLAG_SHOWSIGN 4 /* + flag */ +#define FLAG_SPACE 8 /* space flag */ +#define FLAG_ALT 16 /* # flag */ +#define FLAG_ZERO 32 + +/* arg_index value indicating that no argument is consumed. */ +#define ARG_NONE (~(size_t)0) + +/* A parsed directive. */ +typedef struct +{ + const char* dir_start; + const char* dir_end; + int flags; + const char* width_start; + const char* width_end; + size_t width_arg_index; + const char* precision_start; + const char* precision_end; + size_t precision_arg_index; + char conversion; /* d i o u x X f e E g G c s p n U % but not C S */ + size_t arg_index; +} +char_directive; + +/* A parsed format string. */ +typedef struct +{ + size_t count; + char_directive *dir; + size_t max_width_length; + size_t max_precision_length; +} +char_directives; + + +/* Parses the format string. Fills in the number N of directives, and fills + in directives[0], ..., directives[N-1], and sets directives[N].dir_start + to the end of the format string. Also fills in the arg_type fields of the + arguments and the needed count of arguments. */ +#ifdef STATIC +STATIC +#else +extern +#endif +int printf_parse (const char *format, char_directives *d, arguments *a); + +#endif /* _PRINTF_PARSE_H */ diff --git a/gfsm/gfsm/src/libgfsm/gnulib/size_max.h b/gfsm/gfsm/src/libgfsm/gnulib/size_max.h new file mode 100644 index 0000000..3bb87ae --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/size_max.h @@ -0,0 +1,27 @@ +/* size_max.h -- declare SIZE_MAX through system headers + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Simon Josefsson. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef GNULIB_SIZE_MAX_H +#define GNULIB_SIZE_MAX_H + +# include <limits.h> +# if HAVE_STDINT_H +# include <stdint.h> +# endif + +#endif /* GNULIB_SIZE_MAX_H */ diff --git a/gfsm/gfsm/src/libgfsm/gnulib/vasnprintf.c b/gfsm/gfsm/src/libgfsm/gnulib/vasnprintf.c new file mode 100644 index 0000000..1e5ecd5 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/vasnprintf.c @@ -0,0 +1,901 @@ +/* vsprintf with automatic memory allocation. + Copyright (C) 1999, 2002-2005 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Tell glibc's <stdio.h> to provide a prototype for snprintf(). + This must come before <config.h> because <config.h> may include + <features.h>, and once <features.h> has been included, it's too late. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif +#ifndef IN_LIBINTL +# include <alloca.h> +#endif + +/* Specification. */ +#if WIDE_CHAR_VERSION +# include "vasnwprintf.h" +#else +# include "vasnprintf.h" +#endif + +#include <stdio.h> /* snprintf(), sprintf() */ +#include <stdlib.h> /* abort(), malloc(), realloc(), free() */ +#include <string.h> /* memcpy(), strlen() */ +#include <errno.h> /* errno */ +#include <limits.h> /* CHAR_BIT, INT_MAX */ +#include <float.h> /* DBL_MAX_EXP, LDBL_MAX_EXP */ +#if WIDE_CHAR_VERSION +# include "wprintf-parse.h" +#else +# include "printf-parse.h" +#endif + +/* Checked size_t computations. */ +#include "xsize.h" + +/* Some systems, like OSF/1 4.0 and Woe32, don't have EOVERFLOW. */ +#ifndef EOVERFLOW +# define EOVERFLOW E2BIG +#endif + +#ifdef HAVE_WCHAR_T +# ifdef HAVE_WCSLEN +# define local_wcslen wcslen +# else + /* Solaris 2.5.1 has wcslen() in a separate library libw.so. To avoid + a dependency towards this library, here is a local substitute. + Define this substitute only once, even if this file is included + twice in the same compilation unit. */ +# ifndef local_wcslen_defined +# define local_wcslen_defined 1 +static size_t +local_wcslen (const wchar_t *s) +{ + const wchar_t *ptr; + + for (ptr = s; *ptr != (wchar_t) 0; ptr++) + ; + return ptr - s; +} +# endif +# endif +#endif + +#if WIDE_CHAR_VERSION +# define VASNPRINTF vasnwprintf +# define CHAR_T wchar_t +# define DIRECTIVE wchar_t_directive +# define DIRECTIVES wchar_t_directives +# define PRINTF_PARSE wprintf_parse +# define USE_SNPRINTF 1 +# if HAVE_DECL__SNWPRINTF + /* On Windows, the function swprintf() has a different signature than + on Unix; we use the _snwprintf() function instead. */ +# define SNPRINTF _snwprintf +# else + /* Unix. */ +# define SNPRINTF swprintf +# endif +#else +# define VASNPRINTF vasnprintf +# define CHAR_T char +# define DIRECTIVE char_directive +# define DIRECTIVES char_directives +# define PRINTF_PARSE printf_parse +# define USE_SNPRINTF (HAVE_DECL__SNPRINTF || HAVE_SNPRINTF) +# if HAVE_DECL__SNPRINTF + /* Windows. */ +# define SNPRINTF _snprintf +# else + /* Unix. */ +# define SNPRINTF snprintf +# endif +#endif + +CHAR_T * +VASNPRINTF (CHAR_T *resultbuf, size_t *lengthp, const CHAR_T *format, va_list args) +{ + DIRECTIVES d; + arguments a; + + if (PRINTF_PARSE (format, &d, &a) < 0) + { + errno = EINVAL; + return NULL; + } + +#define CLEANUP() \ + free (d.dir); \ + if (a.arg) \ + free (a.arg); + + if (printf_fetchargs (args, &a) < 0) + { + CLEANUP (); + errno = EINVAL; + return NULL; + } + + { + size_t buf_neededlength; + CHAR_T *buf; + CHAR_T *buf_malloced; + const CHAR_T *cp; + size_t i; + DIRECTIVE *dp; + /* Output string accumulator. */ + CHAR_T *result; + size_t allocated; + size_t length; + + /* Allocate a small buffer that will hold a directive passed to + sprintf or snprintf. */ + buf_neededlength = + xsum4 (7, d.max_width_length, d.max_precision_length, 6); +#if HAVE_ALLOCA + if (buf_neededlength < 4000 / sizeof (CHAR_T)) + { + buf = (CHAR_T *) alloca (buf_neededlength * sizeof (CHAR_T)); + buf_malloced = NULL; + } + else +#endif + { + size_t buf_memsize = xtimes (buf_neededlength, sizeof (CHAR_T)); + if (size_overflow_p (buf_memsize)) + goto out_of_memory_1; + buf = (CHAR_T *) malloc (buf_memsize); + if (buf == NULL) + goto out_of_memory_1; + buf_malloced = buf; + } + + if (resultbuf != NULL) + { + result = resultbuf; + allocated = *lengthp; + } + else + { + result = NULL; + allocated = 0; + } + length = 0; + /* Invariants: + result is either == resultbuf or == NULL or malloc-allocated. + If length > 0, then result != NULL. */ + + /* Ensures that allocated >= needed. Aborts through a jump to + out_of_memory if needed is SIZE_MAX or otherwise too big. */ +#define ENSURE_ALLOCATION(needed) \ + if ((needed) > allocated) \ + { \ + size_t memory_size; \ + CHAR_T *memory; \ + \ + allocated = (allocated > 0 ? xtimes (allocated, 2) : 12); \ + if ((needed) > allocated) \ + allocated = (needed); \ + memory_size = xtimes (allocated, sizeof (CHAR_T)); \ + if (size_overflow_p (memory_size)) \ + goto out_of_memory; \ + if (result == resultbuf || result == NULL) \ + memory = (CHAR_T *) malloc (memory_size); \ + else \ + memory = (CHAR_T *) realloc (result, memory_size); \ + if (memory == NULL) \ + goto out_of_memory; \ + if (result == resultbuf && length > 0) \ + memcpy (memory, result, length * sizeof (CHAR_T)); \ + result = memory; \ + } + + for (cp = format, i = 0, dp = &d.dir[0]; ; cp = dp->dir_end, i++, dp++) + { + if (cp != dp->dir_start) + { + size_t n = dp->dir_start - cp; + size_t augmented_length = xsum (length, n); + + ENSURE_ALLOCATION (augmented_length); + memcpy (result + length, cp, n * sizeof (CHAR_T)); + length = augmented_length; + } + if (i == d.count) + break; + + /* Execute a single directive. */ + if (dp->conversion == '%') + { + size_t augmented_length; + + if (!(dp->arg_index == ARG_NONE)) + abort (); + augmented_length = xsum (length, 1); + ENSURE_ALLOCATION (augmented_length); + result[length] = '%'; + length = augmented_length; + } + else + { + if (!(dp->arg_index != ARG_NONE)) + abort (); + + if (dp->conversion == 'n') + { + switch (a.arg[dp->arg_index].type) + { + case TYPE_COUNT_SCHAR_POINTER: + *a.arg[dp->arg_index].a.a_count_schar_pointer = length; + break; + case TYPE_COUNT_SHORT_POINTER: + *a.arg[dp->arg_index].a.a_count_short_pointer = length; + break; + case TYPE_COUNT_INT_POINTER: + *a.arg[dp->arg_index].a.a_count_int_pointer = length; + break; + case TYPE_COUNT_LONGINT_POINTER: + *a.arg[dp->arg_index].a.a_count_longint_pointer = length; + break; +#ifdef HAVE_LONG_LONG + case TYPE_COUNT_LONGLONGINT_POINTER: + *a.arg[dp->arg_index].a.a_count_longlongint_pointer = length; + break; +#endif + default: + abort (); + } + } + else + { + arg_type type = a.arg[dp->arg_index].type; + CHAR_T *p; + unsigned int prefix_count; + int prefixes[2]; +#if !USE_SNPRINTF + size_t tmp_length; + CHAR_T tmpbuf[700]; + CHAR_T *tmp; + + /* Allocate a temporary buffer of sufficient size for calling + sprintf. */ + { + size_t width; + size_t precision; + + width = 0; + if (dp->width_start != dp->width_end) + { + if (dp->width_arg_index != ARG_NONE) + { + int arg; + + if (!(a.arg[dp->width_arg_index].type == TYPE_INT)) + abort (); + arg = a.arg[dp->width_arg_index].a.a_int; + width = (arg < 0 ? (unsigned int) (-arg) : arg); + } + else + { + const CHAR_T *digitp = dp->width_start; + + do + width = xsum (xtimes (width, 10), *digitp++ - '0'); + while (digitp != dp->width_end); + } + } + + precision = 6; + if (dp->precision_start != dp->precision_end) + { + if (dp->precision_arg_index != ARG_NONE) + { + int arg; + + if (!(a.arg[dp->precision_arg_index].type == TYPE_INT)) + abort (); + arg = a.arg[dp->precision_arg_index].a.a_int; + precision = (arg < 0 ? 0 : arg); + } + else + { + const CHAR_T *digitp = dp->precision_start + 1; + + precision = 0; + while (digitp != dp->precision_end) + precision = xsum (xtimes (precision, 10), *digitp++ - '0'); + } + } + + switch (dp->conversion) + { + + case 'd': case 'i': case 'u': +# ifdef HAVE_LONG_LONG + if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT) + tmp_length = + (unsigned int) (sizeof (unsigned long long) * CHAR_BIT + * 0.30103 /* binary -> decimal */ + * 2 /* estimate for FLAG_GROUP */ + ) + + 1 /* turn floor into ceil */ + + 1; /* account for leading sign */ + else +# endif + if (type == TYPE_LONGINT || type == TYPE_ULONGINT) + tmp_length = + (unsigned int) (sizeof (unsigned long) * CHAR_BIT + * 0.30103 /* binary -> decimal */ + * 2 /* estimate for FLAG_GROUP */ + ) + + 1 /* turn floor into ceil */ + + 1; /* account for leading sign */ + else + tmp_length = + (unsigned int) (sizeof (unsigned int) * CHAR_BIT + * 0.30103 /* binary -> decimal */ + * 2 /* estimate for FLAG_GROUP */ + ) + + 1 /* turn floor into ceil */ + + 1; /* account for leading sign */ + break; + + case 'o': +# ifdef HAVE_LONG_LONG + if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT) + tmp_length = + (unsigned int) (sizeof (unsigned long long) * CHAR_BIT + * 0.333334 /* binary -> octal */ + ) + + 1 /* turn floor into ceil */ + + 1; /* account for leading sign */ + else +# endif + if (type == TYPE_LONGINT || type == TYPE_ULONGINT) + tmp_length = + (unsigned int) (sizeof (unsigned long) * CHAR_BIT + * 0.333334 /* binary -> octal */ + ) + + 1 /* turn floor into ceil */ + + 1; /* account for leading sign */ + else + tmp_length = + (unsigned int) (sizeof (unsigned int) * CHAR_BIT + * 0.333334 /* binary -> octal */ + ) + + 1 /* turn floor into ceil */ + + 1; /* account for leading sign */ + break; + + case 'x': case 'X': +# ifdef HAVE_LONG_LONG + if (type == TYPE_LONGLONGINT || type == TYPE_ULONGLONGINT) + tmp_length = + (unsigned int) (sizeof (unsigned long long) * CHAR_BIT + * 0.25 /* binary -> hexadecimal */ + ) + + 1 /* turn floor into ceil */ + + 2; /* account for leading sign or alternate form */ + else +# endif + if (type == TYPE_LONGINT || type == TYPE_ULONGINT) + tmp_length = + (unsigned int) (sizeof (unsigned long) * CHAR_BIT + * 0.25 /* binary -> hexadecimal */ + ) + + 1 /* turn floor into ceil */ + + 2; /* account for leading sign or alternate form */ + else + tmp_length = + (unsigned int) (sizeof (unsigned int) * CHAR_BIT + * 0.25 /* binary -> hexadecimal */ + ) + + 1 /* turn floor into ceil */ + + 2; /* account for leading sign or alternate form */ + break; + + case 'f': case 'F': +# ifdef HAVE_LONG_DOUBLE + if (type == TYPE_LONGDOUBLE) + tmp_length = + (unsigned int) (LDBL_MAX_EXP + * 0.30103 /* binary -> decimal */ + * 2 /* estimate for FLAG_GROUP */ + ) + + 1 /* turn floor into ceil */ + + 10; /* sign, decimal point etc. */ + else +# endif + tmp_length = + (unsigned int) (DBL_MAX_EXP + * 0.30103 /* binary -> decimal */ + * 2 /* estimate for FLAG_GROUP */ + ) + + 1 /* turn floor into ceil */ + + 10; /* sign, decimal point etc. */ + tmp_length = xsum (tmp_length, precision); + break; + + case 'e': case 'E': case 'g': case 'G': + case 'a': case 'A': + tmp_length = + 12; /* sign, decimal point, exponent etc. */ + tmp_length = xsum (tmp_length, precision); + break; + + case 'c': +# if defined HAVE_WINT_T && !WIDE_CHAR_VERSION + if (type == TYPE_WIDE_CHAR) + tmp_length = MB_CUR_MAX; + else +# endif + tmp_length = 1; + break; + + case 's': +# ifdef HAVE_WCHAR_T + if (type == TYPE_WIDE_STRING) + { + tmp_length = + local_wcslen (a.arg[dp->arg_index].a.a_wide_string); + +# if !WIDE_CHAR_VERSION + tmp_length = xtimes (tmp_length, MB_CUR_MAX); +# endif + } + else +# endif + tmp_length = strlen (a.arg[dp->arg_index].a.a_string); + break; + + case 'p': + tmp_length = + (unsigned int) (sizeof (void *) * CHAR_BIT + * 0.25 /* binary -> hexadecimal */ + ) + + 1 /* turn floor into ceil */ + + 2; /* account for leading 0x */ + break; + + default: + abort (); + } + + if (tmp_length < width) + tmp_length = width; + + tmp_length = xsum (tmp_length, 1); /* account for trailing NUL */ + } + + if (tmp_length <= sizeof (tmpbuf) / sizeof (CHAR_T)) + tmp = tmpbuf; + else + { + size_t tmp_memsize = xtimes (tmp_length, sizeof (CHAR_T)); + + if (size_overflow_p (tmp_memsize)) + /* Overflow, would lead to out of memory. */ + goto out_of_memory; + tmp = (CHAR_T *) malloc (tmp_memsize); + if (tmp == NULL) + /* Out of memory. */ + goto out_of_memory; + } +#endif + + /* Construct the format string for calling snprintf or + sprintf. */ + p = buf; + *p++ = '%'; + if (dp->flags & FLAG_GROUP) + *p++ = '\''; + if (dp->flags & FLAG_LEFT) + *p++ = '-'; + if (dp->flags & FLAG_SHOWSIGN) + *p++ = '+'; + if (dp->flags & FLAG_SPACE) + *p++ = ' '; + if (dp->flags & FLAG_ALT) + *p++ = '#'; + if (dp->flags & FLAG_ZERO) + *p++ = '0'; + if (dp->width_start != dp->width_end) + { + size_t n = dp->width_end - dp->width_start; + memcpy (p, dp->width_start, n * sizeof (CHAR_T)); + p += n; + } + if (dp->precision_start != dp->precision_end) + { + size_t n = dp->precision_end - dp->precision_start; + memcpy (p, dp->precision_start, n * sizeof (CHAR_T)); + p += n; + } + + switch (type) + { +#ifdef HAVE_LONG_LONG + case TYPE_LONGLONGINT: + case TYPE_ULONGLONGINT: + *p++ = 'l'; + /*FALLTHROUGH*/ +#endif + case TYPE_LONGINT: + case TYPE_ULONGINT: +#ifdef HAVE_WINT_T + case TYPE_WIDE_CHAR: +#endif +#ifdef HAVE_WCHAR_T + case TYPE_WIDE_STRING: +#endif + *p++ = 'l'; + break; +#ifdef HAVE_LONG_DOUBLE + case TYPE_LONGDOUBLE: + *p++ = 'L'; + break; +#endif + default: + break; + } + *p = dp->conversion; +#if USE_SNPRINTF + p[1] = '%'; + p[2] = 'n'; + p[3] = '\0'; +#else + p[1] = '\0'; +#endif + + /* Construct the arguments for calling snprintf or sprintf. */ + prefix_count = 0; + if (dp->width_arg_index != ARG_NONE) + { + if (!(a.arg[dp->width_arg_index].type == TYPE_INT)) + abort (); + prefixes[prefix_count++] = a.arg[dp->width_arg_index].a.a_int; + } + if (dp->precision_arg_index != ARG_NONE) + { + if (!(a.arg[dp->precision_arg_index].type == TYPE_INT)) + abort (); + prefixes[prefix_count++] = a.arg[dp->precision_arg_index].a.a_int; + } + +#if USE_SNPRINTF + /* Prepare checking whether snprintf returns the count + via %n. */ + ENSURE_ALLOCATION (xsum (length, 1)); + result[length] = '\0'; +#endif + + for (;;) + { + size_t maxlen; + int count; + int retcount; + + maxlen = allocated - length; + count = -1; + retcount = 0; + +#if USE_SNPRINTF +# define SNPRINTF_BUF(arg) \ + switch (prefix_count) \ + { \ + case 0: \ + retcount = SNPRINTF (result + length, maxlen, buf, \ + arg, &count); \ + break; \ + case 1: \ + retcount = SNPRINTF (result + length, maxlen, buf, \ + prefixes[0], arg, &count); \ + break; \ + case 2: \ + retcount = SNPRINTF (result + length, maxlen, buf, \ + prefixes[0], prefixes[1], arg, \ + &count); \ + break; \ + default: \ + abort (); \ + } +#else +# define SNPRINTF_BUF(arg) \ + switch (prefix_count) \ + { \ + case 0: \ + count = sprintf (tmp, buf, arg); \ + break; \ + case 1: \ + count = sprintf (tmp, buf, prefixes[0], arg); \ + break; \ + case 2: \ + count = sprintf (tmp, buf, prefixes[0], prefixes[1],\ + arg); \ + break; \ + default: \ + abort (); \ + } +#endif + + switch (type) + { + case TYPE_SCHAR: + { + int arg = a.arg[dp->arg_index].a.a_schar; + SNPRINTF_BUF (arg); + } + break; + case TYPE_UCHAR: + { + unsigned int arg = a.arg[dp->arg_index].a.a_uchar; + SNPRINTF_BUF (arg); + } + break; + case TYPE_SHORT: + { + int arg = a.arg[dp->arg_index].a.a_short; + SNPRINTF_BUF (arg); + } + break; + case TYPE_USHORT: + { + unsigned int arg = a.arg[dp->arg_index].a.a_ushort; + SNPRINTF_BUF (arg); + } + break; + case TYPE_INT: + { + int arg = a.arg[dp->arg_index].a.a_int; + SNPRINTF_BUF (arg); + } + break; + case TYPE_UINT: + { + unsigned int arg = a.arg[dp->arg_index].a.a_uint; + SNPRINTF_BUF (arg); + } + break; + case TYPE_LONGINT: + { + long int arg = a.arg[dp->arg_index].a.a_longint; + SNPRINTF_BUF (arg); + } + break; + case TYPE_ULONGINT: + { + unsigned long int arg = a.arg[dp->arg_index].a.a_ulongint; + SNPRINTF_BUF (arg); + } + break; +#ifdef HAVE_LONG_LONG + case TYPE_LONGLONGINT: + { + long long int arg = a.arg[dp->arg_index].a.a_longlongint; + SNPRINTF_BUF (arg); + } + break; + case TYPE_ULONGLONGINT: + { + unsigned long long int arg = a.arg[dp->arg_index].a.a_ulonglongint; + SNPRINTF_BUF (arg); + } + break; +#endif + case TYPE_DOUBLE: + { + double arg = a.arg[dp->arg_index].a.a_double; + SNPRINTF_BUF (arg); + } + break; +#ifdef HAVE_LONG_DOUBLE + case TYPE_LONGDOUBLE: + { + long double arg = a.arg[dp->arg_index].a.a_longdouble; + SNPRINTF_BUF (arg); + } + break; +#endif + case TYPE_CHAR: + { + int arg = a.arg[dp->arg_index].a.a_char; + SNPRINTF_BUF (arg); + } + break; +#ifdef HAVE_WINT_T + case TYPE_WIDE_CHAR: + { + wint_t arg = a.arg[dp->arg_index].a.a_wide_char; + SNPRINTF_BUF (arg); + } + break; +#endif + case TYPE_STRING: + { + const char *arg = a.arg[dp->arg_index].a.a_string; + SNPRINTF_BUF (arg); + } + break; +#ifdef HAVE_WCHAR_T + case TYPE_WIDE_STRING: + { + const wchar_t *arg = a.arg[dp->arg_index].a.a_wide_string; + SNPRINTF_BUF (arg); + } + break; +#endif + case TYPE_POINTER: + { + void *arg = a.arg[dp->arg_index].a.a_pointer; + SNPRINTF_BUF (arg); + } + break; + default: + abort (); + } + +#if USE_SNPRINTF + /* Portability: Not all implementations of snprintf() + are ISO C 99 compliant. Determine the number of + bytes that snprintf() has produced or would have + produced. */ + if (count >= 0) + { + /* Verify that snprintf() has NUL-terminated its + result. */ + if (count < maxlen && result[length + count] != '\0') + abort (); + /* Portability hack. */ + if (retcount > count) + count = retcount; + } + else + { + /* snprintf() doesn't understand the '%n' + directive. */ + if (p[1] != '\0') + { + /* Don't use the '%n' directive; instead, look + at the snprintf() return value. */ + p[1] = '\0'; + continue; + } + else + { + /* Look at the snprintf() return value. */ + if (retcount < 0) + { + /* HP-UX 10.20 snprintf() is doubly deficient: + It doesn't understand the '%n' directive, + *and* it returns -1 (rather than the length + that would have been required) when the + buffer is too small. */ + size_t bigger_need = + xsum (xtimes (allocated, 2), 12); + ENSURE_ALLOCATION (bigger_need); + continue; + } + else + count = retcount; + } + } +#endif + + /* Attempt to handle failure. */ + if (count < 0) + { + if (!(result == resultbuf || result == NULL)) + free (result); + if (buf_malloced != NULL) + free (buf_malloced); + CLEANUP (); + errno = EINVAL; + return NULL; + } + +#if !USE_SNPRINTF + if (count >= tmp_length) + /* tmp_length was incorrectly calculated - fix the + code above! */ + abort (); +#endif + + /* Make room for the result. */ + if (count >= maxlen) + { + /* Need at least count bytes. But allocate + proportionally, to avoid looping eternally if + snprintf() reports a too small count. */ + size_t n = + xmax (xsum (length, count), xtimes (allocated, 2)); + + ENSURE_ALLOCATION (n); +#if USE_SNPRINTF + continue; +#endif + } + +#if USE_SNPRINTF + /* The snprintf() result did fit. */ +#else + /* Append the sprintf() result. */ + memcpy (result + length, tmp, count * sizeof (CHAR_T)); + if (tmp != tmpbuf) + free (tmp); +#endif + + length += count; + break; + } + } + } + } + + /* Add the final NUL. */ + ENSURE_ALLOCATION (xsum (length, 1)); + result[length] = '\0'; + + if (result != resultbuf && length + 1 < allocated) + { + /* Shrink the allocated memory if possible. */ + CHAR_T *memory; + + memory = (CHAR_T *) realloc (result, (length + 1) * sizeof (CHAR_T)); + if (memory != NULL) + result = memory; + } + + if (buf_malloced != NULL) + free (buf_malloced); + CLEANUP (); + *lengthp = length; + if (length > INT_MAX) + goto length_overflow; + return result; + + length_overflow: + /* We could produce such a big string, but its length doesn't fit into + an 'int'. POSIX says that snprintf() fails with errno = EOVERFLOW in + this case. */ + if (result != resultbuf) + free (result); + errno = EOVERFLOW; + return NULL; + + out_of_memory: + if (!(result == resultbuf || result == NULL)) + free (result); + if (buf_malloced != NULL) + free (buf_malloced); + out_of_memory_1: + CLEANUP (); + errno = ENOMEM; + return NULL; + } +} + +#undef SNPRINTF +#undef USE_SNPRINTF +#undef PRINTF_PARSE +#undef DIRECTIVES +#undef DIRECTIVE +#undef CHAR_T +#undef VASNPRINTF diff --git a/gfsm/gfsm/src/libgfsm/gnulib/vasnprintf.h b/gfsm/gfsm/src/libgfsm/gnulib/vasnprintf.h new file mode 100644 index 0000000..3ed5714 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/vasnprintf.h @@ -0,0 +1,77 @@ +/* vsprintf with automatic memory allocation. + Copyright (C) 2002-2004 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _VASNPRINTF_H +#define _VASNPRINTF_H + +/* Get va_list. */ +#include <stdarg.h> + +/* Get size_t. */ +#include <stddef.h> + +#ifndef __attribute__ +/* This feature is available in gcc versions 2.5 and later. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__ +# define __attribute__(Spec) /* empty */ +# endif +/* The __-protected variants of `format' and `printf' attributes + are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) +# define __format__ format +# define __printf__ printf +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Write formatted output to a string dynamically allocated with malloc(). + You can pass a preallocated buffer for the result in RESULTBUF and its + size in *LENGTHP; otherwise you pass RESULTBUF = NULL. + If successful, return the address of the string (this may be = RESULTBUF + if no dynamic memory allocation was necessary) and set *LENGTHP to the + number of resulting bytes, excluding the trailing NUL. Upon error, set + errno and return NULL. + + When dynamic memory allocation occurs, the preallocated buffer is left + alone (with possibly modified contents). This makes it possible to use + a statically allocated or stack-allocated buffer, like this: + + char buf[100]; + size_t len = sizeof (buf); + char *output = vasnprintf (buf, &len, format, args); + if (output == NULL) + ... error handling ...; + else + { + ... use the output string ...; + if (output != buf) + free (output); + } + */ +extern char * asnprintf (char *resultbuf, size_t *lengthp, const char *format, ...) + __attribute__ ((__format__ (__printf__, 3, 4))); +extern char * vasnprintf (char *resultbuf, size_t *lengthp, const char *format, va_list args) + __attribute__ ((__format__ (__printf__, 3, 0))); + +#ifdef __cplusplus +} +#endif + +#endif /* _VASNPRINTF_H */ diff --git a/gfsm/gfsm/src/libgfsm/gnulib/vasprintf.c b/gfsm/gfsm/src/libgfsm/gnulib/vasprintf.c new file mode 100644 index 0000000..3ee1721 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/vasprintf.c @@ -0,0 +1,42 @@ +/* Formatted output to strings. + Copyright (C) 1999, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "vasprintf.h" + +#include <stdlib.h> + +#include "vasnprintf.h" + +int +vasprintf (char **resultp, const char *format, va_list args) +{ + size_t length; + char *result = vasnprintf (NULL, &length, format, args); + if (result == NULL) + return -1; + + *resultp = result; + /* Return the number of resulting bytes, excluding the trailing NUL. + If it wouldn't fit in an 'int', vasnprintf() would have returned NULL + and set errno to EOVERFLOW. */ + return length; +} diff --git a/gfsm/gfsm/src/libgfsm/gnulib/vasprintf.h b/gfsm/gfsm/src/libgfsm/gnulib/vasprintf.h new file mode 100644 index 0000000..d12514b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/vasprintf.h @@ -0,0 +1,63 @@ +/* vsprintf with automatic memory allocation. + Copyright (C) 2002-2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _VASPRINTF_H +#define _VASPRINTF_H + +#if HAVE_VASPRINTF + +/* Get asprintf(), vasprintf() declarations. */ +#include <stdio.h> + +#else + +/* Get va_list. */ +#include <stdarg.h> + +#ifndef __attribute__ +/* This feature is available in gcc versions 2.5 and later. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__ +# define __attribute__(Spec) /* empty */ +# endif +/* The __-protected variants of `format' and `printf' attributes + are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) +# define __format__ format +# define __printf__ printf +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Write formatted output to a string dynamically allocated with malloc(). + If the memory allocation succeeds, store the address of the string in + *RESULT and return the number of resulting bytes, excluding the trailing + NUL. Upon memory allocation error, or some other error, return -1. */ +extern int asprintf (char **result, const char *format, ...) + __attribute__ ((__format__ (__printf__, 2, 3))); +extern int vasprintf (char **result, const char *format, va_list args) + __attribute__ ((__format__ (__printf__, 2, 0))); + +#ifdef __cplusplus +} +#endif + +#endif + +#endif /* _VASPRINTF_H */ diff --git a/gfsm/gfsm/src/libgfsm/gnulib/xsize.h b/gfsm/gfsm/src/libgfsm/gnulib/xsize.h new file mode 100644 index 0000000..65356bb --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/gnulib/xsize.h @@ -0,0 +1,108 @@ +/* xsize.h -- Checked size_t computations. + + Copyright (C) 2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _XSIZE_H +#define _XSIZE_H + +/* Get size_t. */ +#include <stddef.h> + +/* Get SIZE_MAX. */ +#include <limits.h> +#if HAVE_STDINT_H +# include <stdint.h> +#endif + +/* The size of memory objects is often computed through expressions of + type size_t. Example: + void* p = malloc (header_size + n * element_size). + These computations can lead to overflow. When this happens, malloc() + returns a piece of memory that is way too small, and the program then + crashes while attempting to fill the memory. + To avoid this, the functions and macros in this file check for overflow. + The convention is that SIZE_MAX represents overflow. + malloc (SIZE_MAX) is not guaranteed to fail -- think of a malloc + implementation that uses mmap --, it's recommended to use size_overflow_p() + or size_in_bounds_p() before invoking malloc(). + The example thus becomes: + size_t size = xsum (header_size, xtimes (n, element_size)); + void *p = (size_in_bounds_p (size) ? malloc (size) : NULL); +*/ + +/* Convert an arbitrary value >= 0 to type size_t. */ +#define xcast_size_t(N) \ + ((N) <= SIZE_MAX ? (size_t) (N) : SIZE_MAX) + +/* Sum of two sizes, with overflow check. */ +static inline size_t +#if __GNUC__ >= 3 +__attribute__ ((__pure__)) +#endif +xsum (size_t size1, size_t size2) +{ + size_t sum = size1 + size2; + return (sum >= size1 ? sum : SIZE_MAX); +} + +/* Sum of three sizes, with overflow check. */ +static inline size_t +#if __GNUC__ >= 3 +__attribute__ ((__pure__)) +#endif +xsum3 (size_t size1, size_t size2, size_t size3) +{ + return xsum (xsum (size1, size2), size3); +} + +/* Sum of four sizes, with overflow check. */ +static inline size_t +#if __GNUC__ >= 3 +__attribute__ ((__pure__)) +#endif +xsum4 (size_t size1, size_t size2, size_t size3, size_t size4) +{ + return xsum (xsum (xsum (size1, size2), size3), size4); +} + +/* Maximum of two sizes, with overflow check. */ +static inline size_t +#if __GNUC__ >= 3 +__attribute__ ((__pure__)) +#endif +xmax (size_t size1, size_t size2) +{ + /* No explicit check is needed here, because for any n: + max (SIZE_MAX, n) == SIZE_MAX and max (n, SIZE_MAX) == SIZE_MAX. */ + return (size1 >= size2 ? size1 : size2); +} + +/* Multiplication of a count with an element size, with overflow check. + The count must be >= 0 and the element size must be > 0. + This is a macro, not an inline function, so that it works correctly even + when N is of a wider tupe and N > SIZE_MAX. */ +#define xtimes(N, ELSIZE) \ + ((N) <= SIZE_MAX / (ELSIZE) ? (size_t) (N) * (ELSIZE) : SIZE_MAX) + +/* Check for overflow. */ +#define size_overflow_p(SIZE) \ + ((SIZE) == SIZE_MAX) +/* Check against overflow. */ +#define size_in_bounds_p(SIZE) \ + ((SIZE) != SIZE_MAX) + +#endif /* _XSIZE_H */ diff --git a/gfsm/gfsm/src/libgfsm/graveyard.c b/gfsm/gfsm/src/libgfsm/graveyard.c new file mode 100644 index 0000000..8df1638 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/graveyard.c @@ -0,0 +1,631 @@ +//================================================================================= +// arc sorting (old) +//================================================================================= + +/// Typedef for mode-dependent arc-sorting parameters +/** \see gfsm_arc_compare(), gfsm_arclist_sort(), gfsm_automaton_arcsort() */ +typedef struct { + gfsmArcSortMode mode; /**< sort mode */ + gfsmSemiring *sr; /**< semiring for weight-mode, otherwise ignored */ +} gfsmArcSortDataOLD; + +/** Old-style default 3-way comparison on arcs (inline version) + * \param a1 first arc to compare + * \param a2 second arc to compare + * \param sdata specifies comparison mode + * \returns + * negative, zero, or positive integer depending on whether + * \a a1 is less-than, equal-to, or greater-than \a a2 according to \a sdata. + * \note + * \li Prefer gfsm_arc_compare_inline() for literal direct arc comparisons in new code + * \li Prefer gfsm_arc_compare() if you're passing function pointers around, since + * its address is guaranteed not to change between compilation units. + */ +GFSM_INLINE +gint gfsm_arc_compare_inline(gfsmArc *a1, gfsmArc *a2, gfsmArcSortDataOLD *sdata); + +/** Old-style default 3-way comparison on arcs (extern version) + * Really just a wrapper for gfsm_arc_compare_inline() + */ +gint gfsm_arc_compare(gfsmArc *a1, gfsmArc *a2, gfsmArcSortDataOLD *sdata); + +GFSM_INLINE +gint gfsm_arc_compare_inline(gfsmArc *a1, gfsmArc *a2, gfsmArcSortData *sdata) +{ + if (!a1) { + if (!a2) return 0; + return 1; + } + if (!a2) return -1; + switch (sdata->mode) { + case gfsmASMLower: + if (a1->lower < a2->lower) return -1; + if (a1->lower > a2->lower) return 1; + if (a1->upper < a2->upper) return -1; + if (a1->upper > a2->upper) return 1; + //if (a1->source < a2->source) return -1; + //if (a1->source > a2->source) return 1; + if (a1->target < a2->target) return -1; + if (a1->target > a2->target) return 1; + return 0; + case gfsmASMUpper: + if (a1->upper < a2->upper) return -1; + if (a1->upper > a2->upper) return 1; + if (a1->lower < a2->lower) return -1; + if (a1->lower > a2->lower) return 1; + //if (a1->source < a2->source) return -1; + //if (a1->source > a2->source) return 1; + if (a1->target < a2->target) return -1; + if (a1->target > a2->target) return 1; + return 0; + case gfsmASMWeight: + return gfsm_sr_compare(sdata->sr, a1->weight, a2->weight); + case gfsmASMNone: + default: + return (GPOINTER_TO_INT(a2)-GPOINTER_TO_INT(a1)); + } + return 0; +} + +/** Backwards-compatible wrapper for gfsm_arc_compare_bymask() */ +gint gfsm_arc_compare(gfsmArc *a1, gfsmArc *a2, gfsmArcSortDataOLD *sdata); + +/** Return symbolic name of an (old-style) arc-sort mode */ +const gchar *gfsm_arc_sortmode_to_name(gfsmArcSortModeOLD mode); + +/*-------------------------------------------------------------- + * compare() + */ +gint gfsm_arc_compare(gfsmArc *a1, gfsmArc *a2, gfsmArcSortDataOLD *sdata) +{ + gfsmArcCompData acdata = { 0, sdata->sr, NULL,NULL }; + switch (sdata->mode) { + case gfsmASMLowerOLD: acdata.mask = gfsmASMLower; break; + case gfsmASMUpperOLD: acdata.mask = gfsmASMUpper; break; + case gfsmASMWeightOLD: acdata.mask = gfsmASMWeight; break; + default: + break; + } + return gfsm_arc_compare_bymask_inline(a1,a2,&acdata); +} + +//================================================================================= +// statepair2weightXXX +//================================================================================= + +/// Typedef for mapping (gfsmStatePair)s to (gfsmWeight)s +/// used by gfsm_automaton_rmepsilon() +typedef struct { + gfsmWeightMap *wm; /**< underlying weight-map */ + gfsmSemiring *sr; /**< semiring used for comparison */ +} gfsmStatePair2WeightMap; + +/// Typedef for mapping (gfsmStatePair)s to (gfsmWeight)s +/// used by gfsm_automaton_rmepsilon() +typedef struct { + GHashTable *h; /**< underlying hash */ + gfsmSemiring *sr; /**< semiring used for comparison */ +} gfsmStatePair2WeightHash; + +/*====================================================================== + * Methods: gfsmStatePair2WeightHash + */ +///\name gfsmStatePair2WeightHash Methods +//@{ + + +/** create a new gfsmStatePair2WeightHash (copies & frees keys) + * \see gfsmWeightHash + */ +gfsmStatePair2WeightHash *gfsm_statepair2weighthash_new(gfsmSemiring *sr); + +/** Hash \a sp to \a w in \a spw. + * \returns TRUE if \a sp was already present in \a spw with a less-than-or-equal weight */ +gboolean gfsm_statepair2weighthash_insert(gfsmStatePair2WeightHash *sp2wh, gfsmStatePair *sp, gfsmWeight w); + +/** Lookup weight for \a sp in \a sp2wh . + * \returns TRUE if \a sp was already present in \a sp2wh and sets \a *wp to its stored weight */ +gboolean gfsm_statepair2weighthash_lookup(gfsmStatePair2WeightHash *sp2wh, gfsmStatePair *sp, gfsmWeight *wp); + +/** Clear a gfsmStatePair2WeightHash */ +#define gfsm_statepair2weighthash_clear(sp2wh) \ + g_hash_free((sp2wh)->h) + +/** Free all memory allocated by a gfsmStatePair2WeightHash */ +void gfsm_statepair2weighthash_free(gfsmStatePair2WeightHash *sp2wh); + +/** Alias; \sa gfsm_weighthash_foreach() */ +#define gfsm_statepair2weighthash_foreach(sp2wh,func,data) \ + gfsm_weighthash_foreach((sp2wh)->h,(func),(data)) + +//@} + + +/*====================================================================== + * Methods: gfsmStatePair2WeightMap + */ + +/*-------------------------------------------------------------- + * statepair2weightmap_new() + */ +gfsmStatePair2WeightMap *gfsm_statepair2weightmap_new(gfsmSemiring *sr) +{ + gfsmStatePair2WeightMap *sp2wm = g_new(gfsmStatePair2WeightMap,1); + sp2wm->wm = gfsm_weightmap_new_full((GCompareDataFunc)gfsm_statepair_compare, + sr, + (GDestroyNotify)g_free); + sp2wm->sr = sr; + return sp2wm; +} + +/*-------------------------------------------------------------- + * statepair2weightmap_insert() + */ +gboolean gfsm_statepair2weightmap_insert(gfsmStatePair2WeightMap *spw, gfsmStatePair *sp, gfsmWeight w) +{ + gpointer orig_key; + gpointer orig_val; + if (g_tree_lookup_extended(spw->wm, sp, &orig_key, &orig_val)) { + //-- already present: is the stored weight better? + if (gfsm_sr_less(spw->sr, gfsm_ptr2weight(orig_val), w)) { + return TRUE; + } + } + //-- either not present or we need to update the stored weight because (w) is better + gfsm_weightmap_insert(spw->wm, gfsm_statepair_clone(sp), w); + return FALSE; +} + +/*-------------------------------------------------------------- + * statepair2weightmap_lookup() + */ +gboolean gfsm_statepair2weightmap_lookup(gfsmStatePair2WeightMap *spw, gfsmStatePair *sp, gfsmWeight *wp) +{ + gpointer orig_key; + return g_tree_lookup_extended(spw->wm, sp, &orig_key, &((gpointer*)wp)); +} + +/*-------------------------------------------------------------- + * statepair2weightmap_free() + */ +void gfsm_statepair2weightmap_free(gfsmStatePair2WeightMap *sp2wm) +{ + gfsm_weightmap_free(sp2wm->wm); + g_free(sp2wm); +} + +/*====================================================================== + * Methods: gfsmStatePair2WeightHash + */ + +/*-------------------------------------------------------------- + * statepair2weighthash_new() + */ +gfsmStatePair2WeightHash *gfsm_statepair2weighthash_new(gfsmSemiring *sr) +{ + gfsmStatePair2WeightHash *sp2wh = g_new(gfsmStatePair2WeightHash,1); + sp2wh->h = g_hash_table_new_full((GHashFunc)gfsm_statepair_hash, + (GEqualFunc)gfsm_statepair_equal, + (GDestroyNotify)gfsm_statepair_free, + NULL); + sp2wh->sr = sr; + return sp2wh; +} + +/*-------------------------------------------------------------- + * statepair2weighthash_insert() + */ +gboolean gfsm_statepair2weighthash_insert(gfsmStatePair2WeightHash *sp2wh, gfsmStatePair *sp, gfsmWeight w) +{ + gpointer orig_key; + gpointer orig_val; + if (g_tree_lookup_extended(spw->h, sp, &orig_key, &orig_val)) { + //-- already present: is (stored_weight+w) better than (stored_weight)? + gfsmWeight ow = gfsm_ptr2weight(orig_val); + w = gfsm_sr_plus(sp2wh->sr, ow, w); + if (gfsm_sr_less(sp2wh->sr, ow, w)) { + return TRUE; + } + //-- adding new weight is better + g_hash_table_steal(sp2wh->h, orig_key); + g_hash_table_insert(sp2wh->h, orig_key, gfsm_weight2ptr(w)); + } + else { + //-- pair (sp) was not present: copy & insert + g_hash_table_insert(sp2wh->h, gfsm_statepair_clone(sp), gfsm_weight2ptr(w)); + } + return FALSE; //-- either not present or updated +} + +/*-------------------------------------------------------------- + * statepair2weighthash_lookup() + */ +gboolean gfsm_statepair2weighthash_lookup(gfsmStatePair2WeightHash *sp2wh, gfsmStatePair *sp, gfsmWeight *wp) +{ + gpointer orig_key; + return g_hash_table_lookup_extended(spw->wm, sp, &orig_key, &((gpointer*)wp)); +} + +/*-------------------------------------------------------------- + * statepair2weighthash_free() + */ +void gfsm_statepair2weighthash_free(gfsmStatePair2WeightHash *sp2wh) +{ + gfsm_weighthash_free(sp2wh->wm); + g_free(sp2wh); +} + + +//================================================================================= +// DETERMINIZE (v1) +//================================================================================= + +/*-------------------------------------------------------------- + * _determinize_lp2ec_foreach_func() + */ +typedef struct { + gfsmAutomaton *nfa; + gfsmAutomaton *dfa; + gfsmStateId dfa_src_id; + gfsmEnum *ec2id; + gfsmStateSet *ec_tmp; +} gfsmLp2EcForeachData; + +gboolean _gfsm_determinize_lp2ec_foreach_func(gfsmLabelPair lp, + gfsmWeightedStateSet *wss, + gfsmLp2EcForeachData *data) +{ + gfsmStateId ec2id_val; + gfsmStateSet *ec2id_key; + + if ( gfsm_enum_lookup_extended(data->ec2id, + wss->set, + (gpointer)(&ec2id_key), + (gpointer)(&ec2id_val)) ) + { + //-- target node-set is already present: just add an arc in @dfa + gfsm_automaton_add_arc(data->dfa, + data->dfa_src_id, + ec2id_val, + gfsm_labelpair_lower(lp), + gfsm_labelpair_upper(lp), + wss->weight); + + //-- ... and maybe free the embedded state set + if (wss->set != ec2id_key) gfsm_stateset_free(wss->set); + wss->set = NULL; + } + else + { + //-- image of equiv-class (wss->set) was not yet present: make a new one + ec2id_val = gfsm_automaton_ensure_state(data->dfa, + gfsm_enum_insert(data->ec2id, wss->set)); + + //-- ... add @dfa arc + gfsm_automaton_add_arc(data->dfa, + data->dfa_src_id, + ec2id_val, + gfsm_labelpair_lower(lp), + gfsm_labelpair_upper(lp), + wss->weight); + + //-- ... and recurse + _gfsm_determinize_visit_state(data->nfa, data->dfa, + wss->set, ec2id_val, + data->ec2id, data->ec_tmp); + } + return FALSE; +} + + +/*-------------------------------------------------------------- + * _determinize_visit_state() + */ +void _gfsm_determinize_visit_state(gfsmAutomaton *nfa, gfsmAutomaton *dfa, + gfsmStateSet *nfa_ec, gfsmStateId dfa_id, + gfsmEnum *ec2id, gfsmStateSet *ec_tmp) +{ + GTree *lp2ecw; //-- maps label-pairs@nfa.src.ec => (eq-class@nfa.sink, sum(weight)) + gfsmStateSetIter eci; + gfsmStateId ecid; + gfsmLp2EcForeachData lp2ec_foreach_data; + gfsmWeight fw; + + //-- check for final state + if (gfsm_stateset_lookup_final_weight(nfa_ec,nfa,&fw)) { + gfsm_automaton_set_final_state_full(dfa, dfa_id, TRUE, fw); + } + + //-- build label-pair => (sink-eqc, sum(weight)) mapping 'lp2ecw' for node-set nfa_ec + lp2ecw = g_tree_new_full(((GCompareDataFunc) + gfsm_labelpair_compare_with_data), //-- key_comp_func + NULL, //-- key_comp_data + NULL, //-- key_free_func + (GDestroyNotify)g_free); //-- val_free_func + + for (eci=gfsm_stateset_iter_begin(nfa_ec); + (ecid=gfsm_stateset_iter_id(eci)) != gfsmNoState; + eci=gfsm_stateset_iter_next(nfa_ec,eci)) + { + gfsmArcIter ai; + for (gfsm_arciter_open(&ai, nfa, ecid); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + gfsmLabelPair lp; + gfsmLabelPair *lp2ec_key; + gfsmWeightedStateSet *lp2ec_val; + + if (a->lower==gfsmEpsilon && a->upper==gfsmEpsilon) continue; //-- ignore eps arcs + lp = gfsm_labelpair_new(a->lower, a->upper); + + //-- populate state-set with all nodes eps-reachable from this arc's target + gfsm_stateset_clear(ec_tmp); + gfsm_stateset_populate_eps(ec_tmp, nfa, a->target); + + //-- add equivalence class to local mapping + if ( g_tree_lookup_extended(lp2ecw, + (gpointer)lp, + (gpointer)(&lp2ec_key), + (gpointer)(&lp2ec_val)) ) + { + //-- already present: compute union and add new arc's weight + gfsm_stateset_union(lp2ec_val->set, ec_tmp); + lp2ec_val->weight = gfsm_sr_plus(nfa->sr, lp2ec_val->weight, a->weight); + } + else + { + //-- not yet present: insert new value + lp2ec_val = g_new(gfsmWeightedStateSet,1); + lp2ec_val->set = gfsm_stateset_clone(ec_tmp); + lp2ec_val->weight = a->weight; + g_tree_insert(lp2ecw, (gpointer)lp, lp2ec_val); + } + } + + //-- tmp-cleanup + gfsm_arciter_close(&ai); + } + + //-- stateset-iter (eci) cleanup + //(none) + + //-- insert computed arcs into @dfa + lp2ec_foreach_data.nfa = nfa; + lp2ec_foreach_data.dfa = dfa; + lp2ec_foreach_data.dfa_src_id = dfa_id; + lp2ec_foreach_data.ec2id = ec2id; + lp2ec_foreach_data.ec_tmp = ec_tmp; + g_tree_foreach(lp2ecw, + (GTraverseFunc)_gfsm_determinize_lp2ec_foreach_func, + (gpointer)(&lp2ec_foreach_data)); + + //-- cleanup + g_tree_destroy(lp2ecw); + + /* + g_printerr("_gfsm_automaton_determinize_visit_state(): not yet implemented!"); + g_assert_not_reached(); + */ +} + +/*-------------------------------------------------------------- + * determinize() + */ +gfsmAutomaton *gfsm_automaton_determinize(gfsmAutomaton *nfa) +{ + if (!nfa->flags.is_deterministic) { + gfsmAutomaton *dfa = gfsm_automaton_determinize_full(nfa,NULL); + gfsm_automaton_swap(nfa,dfa); + gfsm_automaton_free(dfa); + } + return nfa; +} + +/*-------------------------------------------------------------- + * determinize_full() + */ +gfsmAutomaton *gfsm_automaton_determinize_full(gfsmAutomaton *nfa, gfsmAutomaton *dfa) +{ + gfsmEnum *ec2id; //-- (global) maps literal(equiv-class@nfa) => node-id@dfa + gfsmStateSet *nfa_ec; //-- (temp) equiv-class@nfa + gfsmStateId dfa_id; //-- (temp) id @ dfa + gfsmStateSet *ec_tmp; //-- (temp) equiv-class@nfa + + //-- sanity check(s) + if (!nfa) return NULL; + else if (nfa->flags.is_deterministic) { + if (dfa) gfsm_automaton_copy(dfa,nfa); + else dfa = gfsm_automaton_clone(nfa); + return dfa; + } + + //-- initialization: nfa + //gfsm_automaton_arcsort(nfa,gfsmASMLower); + + //-- initialization: dfa + if (!dfa) { + dfa = gfsm_automaton_shadow(nfa); + } else { + gfsm_automaton_clear(dfa); + gfsm_automaton_copy_shallow(dfa,nfa); + } + dfa->flags.sort_mode = gfsmASMNone; //-- avoid "smart" arc-insertion + + //-- initialization: ec2id + ec2id = gfsm_enum_new_full(NULL /*(gfsmDupFunc)gfsm_stateset_clone*/ , + (GHashFunc)gfsm_stateset_hash, + (GEqualFunc)gfsm_stateset_equal, + (GDestroyNotify)gfsm_stateset_free); + + //-- initialization: nfa_ec + nfa_ec = gfsm_stateset_sized_new(32); + ec_tmp = gfsm_stateset_sized_new(32); + gfsm_stateset_populate_eps(nfa_ec, nfa, nfa->root_id); + + //-- set root in dfa + dfa_id = gfsm_automaton_ensure_state(dfa, gfsm_enum_insert(ec2id, nfa_ec)); + gfsm_automaton_set_root(dfa, dfa_id); + + //-- guts: determinize recursively outwards from root node + _gfsm_determinize_visit_state(nfa, dfa, nfa_ec, dfa_id, ec2id, ec_tmp); + + //-- set flag in dfa + dfa->flags.is_deterministic = TRUE; + + //-- cleanup + //gfsm_stateset_free(nfa_ec); //-- this ought to be freed by gfsm_enum_free(ec2id) + gfsm_stateset_free(ec_tmp); //-- ... but not this + gfsm_enum_free(ec2id); + + return dfa; +} + +//================================================================================= +// END DETERMINIZE (v1) +//================================================================================= + + + +//------------------------------------------------------------------------------- +#define GFSM_ALPHABET_CSET \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\"#$%&'()*+,-./0123456789:;<=>?" \ + "\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264" \ + "\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310" \ + "\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334" \ + "\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360" + +const GScannerConfig gfsm_alphabet_scanner_config = { + /* Character sets */ + ( " \t\r\n" ) /* skip chars */, + ( GFSM_ALPHABET_CSET ) /* identifier_first */, + ( GFSM_ALPHABET_CSET ) /* identifier_nth */, + "" /* comment_single */, + + FALSE /* case_sensitive */, + FALSE /* skip_comment_multi */, + FALSE /* skip_comment_single */, + FALSE /* scan_comment_multi */, + FALSE /* scan_comment_single */, + TRUE /* scan_identifier */, + TRUE /* scan_identifier_1char */, + FALSE /* scan_identifier_NULL */, + FALSE /* scan_symbols */, + + FALSE /* scan_binary */, + TRUE /* scan_octal */, + FALSE /* scan_float */, + TRUE /* scan_hex */, + FALSE /* scan_hex_dollar */, + + FALSE /* scan_string_sq : string: 'anything' */, + FALSE /* scan_string_dq : string: "\\-escapes!\n" */, + TRUE /* numbers_2_int : bin, octal, hex => int */, + FALSE /* int_2_float : int => G_TOKEN_FLOAT? */, + FALSE /* identifier_2_string : 1 */, + FALSE /* 2_token : 1 : return G_TOKEN_CHAR? */, + FALSE /* symbol_2_token : 1 */, + FALSE /* scope_0_fallback : try scope 0 on lookups? */, + //FALSE /* store_int64 : use value.v_int64 rather than v_int */, + //0 /* padding_dummy; */ +}; + +gboolean gfsm_automaton_compile_file (gfsmAutomaton *fsm, FILE *f, const gchar *filename) +{ + GScanner *scanner = g_scanner_new(&gfsm_automaton_scanner_config); + GTokenType typ; + + g_scanner_input_file(scanner, fileno(f)); + while ((typ = g_scanner_get_next_token(scanner)) != G_TOKEN_EOF) { + g_printerr("<DEBUG:SCANNER:%u.%u> typ=%d : ", + g_scanner_cur_line(scanner), g_scanner_cur_position(scanner), typ); + + switch (typ) { + case G_TOKEN_INT: + g_printerr("(INT) value=%ld\n", g_scanner_cur_value(scanner).v_int); + break; + + case G_TOKEN_FLOAT: + g_printerr("(FLOAT) value=%g\n", g_scanner_cur_value(scanner).v_float); + break; + + case G_TOKEN_CHAR: + g_printerr("(CHAR) value=%d\n", g_scanner_cur_value(scanner).v_char); + break; + + default: + g_printerr("(?) text='%s'\n", scanner->text); //'text' is private: debugging use only! + break; + } + } + return TRUE; +} + + +/*-------------------------------------------------------------- + * scanner template for compile_file() + */ +const GScannerConfig gfsm_automaton_scanner_config = { + /* Character sets */ + ( " \t\r" ) /* skip chars */, + ( "" ) /* identifier_first */, + ( "" ) /* identifier_nth */, + "#\n" /* comment_single */, + + FALSE /* case_sensitive */, + + FALSE /* skip_comment_multi */, + FALSE /* skip_comment_single */, + FALSE /* scan_comment_multi */, + + FALSE /* scan_identifier */, + FALSE /* scan_identifier_1char */, + FALSE /* scan_identifier_NULL */, + FALSE /* scan_symbols */, + + FALSE /* scan_binary */, + TRUE /* scan_octal */, + TRUE /* scan_float */, + TRUE /* scan_hex */, + FALSE /* scan_hex_dollar */, + + FALSE /* scan_string_sq : string: 'anything' */, + FALSE /* scan_string_dq : string: "\\-escapes!\n" */, + TRUE /* numbers_2_int : bin, octal, hex => int */, + FALSE /* int_2_float : int => G_TOKEN_FLOAT? */, + FALSE /* identifier_2_string : 1 */, + FALSE /* 2_token : 1 : return G_TOKEN_CHAR? */, + FALSE /* symbol_2_token : 1 */, + FALSE /* scope_0_fallback : try scope 0 on lookups? */, + FALSE /* store_int64 : use value.v_int64 rather than v_int */, + //0 /* padding_dummy; */ +}; + +/*-------------------------------------------------------------- + * gfsm_automaton_scan_field() + */ +gboolean gfsm_automaton_scan_field(GScanner *scanner, + GTokenType expect1, + GTokenType expect2, + gboolean complain_on_newline) +{ + GTokenType typ; + while (1) { + typ = g_scanner_get_next_token(scanner); + if (expect1 != typ + && expect2 != typ + && (typ == G_TOKEN_CHAR || typ == G_TOKEN_EOF) + && g_scanner_cur_value(scanner).v_char == '\n') + { + //-- return but don't complain on unexpected newlines + if (complain_on_newline) + g_scanner_unexp_token(scanner, expect1, NULL, NULL, NULL, NULL, TRUE); + return FALSE; + } + + if (expect1 != typ && expect2 != typ) { + g_scanner_unexp_token(scanner, expect1, NULL, NULL, NULL, NULL, TRUE); + return FALSE; + } + return TRUE; + } + return FALSE; +} diff --git a/gfsm/gfsm/src/libgfsm/inc2dot.perl b/gfsm/gfsm/src/libgfsm/inc2dot.perl new file mode 100755 index 0000000..f406262 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/inc2dot.perl @@ -0,0 +1,48 @@ +#!/usr/bin/perl -w + +%inc = qw(); +%nod = qw(); +foreach $file (@ARGV) { + open(F,"<$file") or die("$0: open failed for '$file': $!"); + @incs = qw(); + while (<F>) { + chomp; + if ($_ =~ m/^\s*\#\s*include\s*[\<\"\']\s*(\S+)[\>\"\']/) { push(@incs,$1); } + } + close(F); + $inc{$file} = [@incs]; + @nod{$file,@incs} = undef; +} + +##-- subs +sub safestr { + my $str = shift; + $str =~ s/[\.\,\+\-\=]/_/g; + return $str; +} + +##-- write dot file +print + ("digraph include {\n", + " rankdir = LR;\n", + " rotate = 90;\n", + ); + +##-- all nodes +foreach $f (sort keys(%nod)) { + $f_str = safestr($f); + if (exists($inc{$f})) { $attrs = "[ label=\"$f\", shape=box ]"; } + else { $attrs = "[ label=\"$f\", shape=box, style=filled, fill=gray ]"; } + print " $f_str $attrs;\n"; + + ##-- arcs + if (defined($incs=$inc{$f})) { + foreach $i (@$incs) { + $i_str = safestr($i); + if (exists($inc{$i})) { $e_attrs = "[ color=black ]"; } + else { $e_attrs = "[ color=red, style=dashed ]" } + print "\t$f_str -> $i_str $e_attrs;\n" + } + } +} +print "}\n"; diff --git a/gfsm/gfsm/src/libgfsm/tests/.cvsignore b/gfsm/gfsm/src/libgfsm/tests/.cvsignore new file mode 100644 index 0000000..ef3800b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/.cvsignore @@ -0,0 +1,38 @@ +*~ +.*~ + +*.gfst +*.output +*.lex.c +*.lex.h +*.tab.c +*.tab.h +*.gz + +tagh* +*.dat +negra* +lts-grimm.* + +offsettest +priotest +seek2test +seek3test +seektest +gfsmlabdump +iotest +pathtest +ftest +memtest-general +calctest +calc2test +flextest +flex2test +flex3test +compretest +comprelex +compre2test +structtest + +flextest.c +flextest.h diff --git a/gfsm/gfsm/src/libgfsm/tests/Makefile b/gfsm/gfsm/src/libgfsm/tests/Makefile new file mode 100644 index 0000000..f4ec863 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/Makefile @@ -0,0 +1,116 @@ +TARGETS = priotest + +KNOWN_TARGETS = \ + priotest \ + seek3test \ + seek2test \ + offsettest \ + arctab-1test \ + seektest \ + gfsmlabdump \ + iotest \ + structtest \ + pathtest \ + memtest \ + memtest-general \ + ftest \ + flextest \ + calctest \ + flex2test \ + calc2test \ + flex3test \ + compretest \ + comprelex \ + compre2test + + +CC = gcc +#CC = ccmalloc --no-wrapper gcc +LD = $(CC) + +CPPFLAGS = -I. -I.. -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include +CFLAGS ?= -O2 -pipe +#CFLAGS ?= -Wall -g +#CFLAGS += -Wall -g +#CFLAGS = -Wall -g +CFLAGS += -Wall -Winline + +LDFLAGS = -L../libgfsm/.libs -L/usr/local/lib -L/usr/lib +LIBS = -lz -lglib-2.0 -lm +GFSMLIBS = ../.libs/libgfsm.a + +LEX ?= flex +FLEX ?= flex +FLEXFLAGS ?= +BISON ?= bison +BISONFLAGS ?= --verbose + +all: $(TARGETS) + +##-- keep intermediate files +.SECONDARY: + +seektest.o: seektest.c labprobs.h +seektest: seektest.o + +##-- flex +%.lex.c %.lex.h: %.l + $(FLEX) $(FLEXFLAGS) --outfile="$*.lex.c" --header-file="$*.lex.h" $^ + +calctest.lex.o: calctest.tab.h + +calc2test.lex.o: calc2test.tab.h ../gfsmRegexCompiler.h + +compretest.lex.o: compretest.tab.h ../gfsmRegexCompiler.h + +comprelex.lex.o: compretest.lex.c compretest.lex.h ../gfsmRegexCompiler.h + +##-- gfsmlabdump +gfsmlabdump: gfsmlabdump.o ../.libs/libgfsm.a + $(LD) $(LDFLAGS) -o $@ $< $(GFSMLIBS) $(LIBS) + +##-- bison +%.tab.c %.tab.h: %.y + $(BISON) $(BISONFLAGS) --defines --file-prefix="$*" --name-prefix="$*_yy" $^ + +##-- flex+bison +calctest: calctest.lex.o calctest.tab.o $(GFSMLIBS) + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +calc2test: calc2test.lex.o calc2test.tab.o $(GFSMLIBS) + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +flex2test: flex2test.lex.o $(GFSMLIBS) + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +flex3test: flex3test.lex.o $(GFSMLIBS) + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +compretest: compretest.lex.o compretest.tab.o $(GFSMLIBS) + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +comprelex: comprelex.lex.o $(GFSMLIBS) + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +compre2test: compre2test.o $(GFSMLIBS) + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +##-- io +iotest: iotest.o + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +##-- .c -> .o +%.o: %.c + $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $< + +##-- clean +clean: + rm -f *.o *.lo *.tab.[ch] *.lex.[ch] $(KNOWN_TARGETS) + + +##-- executables +%test: %test.o + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) + +%-general: %-general.o + $(LD) $(LDFLAGS) -o $@ $^ $(GFSMLIBS) $(LIBS) diff --git a/gfsm/gfsm/src/libgfsm/tests/NOTES.lookup b/gfsm/gfsm/src/libgfsm/tests/NOTES.lookup new file mode 100644 index 0000000..538feaf --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/NOTES.lookup @@ -0,0 +1,26 @@ +lookup (NITERS=524288) + real 0m3.410s + user 0m3.403s + sys 0m0.002s + + -> iters/s = 153750.147 = 153.75 K + + : lookup @ 153.75 K iters/s + +lookup + paths (NITERS=524288) + real 0m5.880s + user 0m5.873s + sys 0m0.002s + + -> iters/s = 89164.626 = 89.16 K + + : paths @ (52488/(5.88-3.41)) = 21250.2 = 21.25 K iters/s + +lookup + paths + strings (NITERS=524288) + real 0m9.013s + user 0m9.005s + sys 0m0.002s + + -> iters/s = 58170.199 = 58.17 K + + : strings @ (52488/(9.013-5.88)) = 16753.27 = 16.75 K iters/s diff --git a/gfsm/gfsm/src/libgfsm/tests/alphachurn.c b/gfsm/gfsm/src/libgfsm/tests/alphachurn.c new file mode 100644 index 0000000..8ea47a8 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/alphachurn.c @@ -0,0 +1,11 @@ +#include <gfsmAlphabet.h> +#include <stdlib.h> + +int main (void) { + gfsmStringAlphabet *a = gfsm_string_alphabet_new(); + + gfsm_string_alphabet_load_filename(a,"test.lab"); + gfsm_string_alphabet_save_filename(a,"-"); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/arctab-1test.c b/gfsm/gfsm/src/libgfsm/tests/arctab-1test.c new file mode 100644 index 0000000..d4cdb4b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/arctab-1test.c @@ -0,0 +1,54 @@ +#include <gfsm.h> +#include <stdio.h> +#include <stdlib.h> + +int main (int argc, char **argv) +{ + const char *infilename = "-"; + const char *prog = argv[0]; + gfsmAutomaton *fsm=NULL; + gfsmError *err=NULL; + gfsmArcTableIndex *ix=NULL; + gfsmArcLabelIndex *lx=NULL; + + if (argc > 1) { infilename = argv[1]; } + + //-- load automaton + fprintf(stderr, "%s: loading file: '%s'... ", prog,infilename); fflush(stderr); + fsm = gfsm_automaton_new(); + if ( !(gfsm_automaton_load_bin_filename(fsm,infilename,&err)) ) { + fprintf(stderr, "FAILED: %s\n", (err ? err->message : "?")); + exit(1); + } + fprintf(stderr, "loaded.\n"); fflush(stderr); + + //-- ArcTableIndex + fprintf(stderr, "%s: creating ArcTableIndex... ", prog); fflush(stderr); + if ( !(ix = gfsm_automaton_to_arc_table_index(fsm,ix)) ) { + fprintf(stderr, "FAILED\n"); + exit(2); + } + fprintf(stderr, "created.\n"); fflush(stderr); + // + //-- sort table (todo: check for existing sort mode?!) + fprintf(stderr, "%s: sorting ArcTableIndex (priority sort)... ", prog); fflush(stderr); + gfsm_arc_table_index_priority_sort(ix,gfsmASP_LW,fsm->sr); + fprintf(stderr, "sorted.\n"); fflush(stderr); + + //-- ArcLabelIndex + fprintf(stderr, "%s: creating ArcLabelIndex... ", prog); fflush(stderr); + if ( !(lx = gfsm_automaton_to_arc_label_index_lower(fsm,lx)) ) { + fprintf(stderr, "FAILED\n"); + exit(3); + } + fprintf(stderr, "created.\n"); fflush(stderr); + + //-- cleanup + fprintf(stderr, "%s: cleanup... ", prog); fflush(stderr); + gfsm_automaton_free(fsm); + gfsm_arc_table_index_free(ix); + gfsm_arc_label_index_free(lx); + fprintf(stderr, "done.\n"); fflush(stderr); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/arytest.c b/gfsm/gfsm/src/libgfsm/tests/arytest.c new file mode 100644 index 0000000..d766b89 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/arytest.c @@ -0,0 +1,27 @@ +#include <glib.h> +#include <stdio.h> + +int main (void) { + GPtrArray *a; + gpointer p; + g_mem_set_vtable(glib_mem_profiler_table); + + printf("<PROF:1>--------\n"); + + + a = g_ptr_array_sized_new(128); + g_ptr_array_set_size(a,1024); + p = g_ptr_array_free(a,TRUE); + printf("p=ptr_array_free()=%p\n", p); + + printf("<CHUNKS:1>--------\n"); + //g_mem_chunk_info(); + + //printf("<CHUNKS:2>--------\n"); + g_blow_chunks(); + //g_mem_chunk_info(); + + printf("<PROF:2>--------\n"); + g_mem_profile(); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/arytest2.c b/gfsm/gfsm/src/libgfsm/tests/arytest2.c new file mode 100644 index 0000000..ebef7f5 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/arytest2.c @@ -0,0 +1,63 @@ +#include <glib.h> +#include <stdio.h> +#include <stdlib.h> + +void print_array(const char *label, GArray *a) { + int i; + printf("Array %s: {", label); + for (i=0; i < a->len; i++) { + printf(" %d", g_array_index(a,int,i)); + } + printf(" }\n"); +} + +gint compare_int(int *x, int *y) { + return (*x)-(*y); + //return (*x < *y ? -1 : (*x > *y ? 1 : 0)); +} + +int main (int argc, char **argv) { + GArray *a1, *a2; + int i, val; + + a1 = g_array_new(FALSE, TRUE, sizeof(int)); + a2 = g_array_new(FALSE, TRUE, sizeof(int)); + for (i=1; i < argc; i++) { + val = strtol(argv[i],NULL,10); + g_array_append_val(a1,val); + } + print_array("a1", a1); + + //-- insert(0,0) + val = 0; + g_array_insert_val(a1,0,val); + printf("--\n"); + print_array("a1/insert(0,0)",a1); + + //-- copy a1 into a2 + g_array_append_vals(a2,a1->data,a1->len); + print_array("a2", a2); + printf("--\n"); + + //-- move elements of a1 up one notch + /*-- NOT ok + g_array_insert_vals(a1, 1, a1->data, a1->len-1); + g_array_index(a1,int,0) = 0; + */ + + /*-- ok */ + i = 0; + g_array_insert_val(a1,0,i); + print_array("a1/moved", a1); + printf("--\n"); + + //-- sort + //g_array_sort(a1,NULL); //-- NOT ok + g_array_sort(a1,(GCompareFunc)compare_int); //-- ok + print_array("a1/sorted", a1); + + g_array_free(a1,TRUE); + g_array_free(a2,TRUE); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/asciigen.perl b/gfsm/gfsm/src/libgfsm/tests/asciigen.perl new file mode 100755 index 0000000..1cabeb9 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/asciigen.perl @@ -0,0 +1,17 @@ +#!/usr/bin/perl -w + +@ascii = (ord('A')..ord('Z'), + ord('a')..ord('z'), + ord('!')..ord('/'), + ord('0')..ord('9'), + ord(':')..ord('?')); + +foreach $c (@ascii) { + print chr($c); +} +print "\n"; + +foreach $c (161..255) { + printf("\\%o", $c); +} +print "\n"; diff --git a/gfsm/gfsm/src/libgfsm/tests/bvtest.c b/gfsm/gfsm/src/libgfsm/tests/bvtest.c new file mode 100644 index 0000000..3cf190a --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/bvtest.c @@ -0,0 +1,30 @@ +#include <gfsm.h> + +int main (int argc, char **argv) { + int i; + guint bit; + gfsmBitVector *bv = gfsm_bitvector_new(); + + for (i = 0; i <= 16 ; i++) { + printf("\t%d bits ~= %d bytes @ %d\n", + i, _gfsm_bitvector_bits2bytes(i), i%8); + } + + for (i = 1; i < argc; i++) { + sscanf(argv[i], "%u", &bit); + printf("%s: setting bit=%u : bit2byte=%u\n", *argv, bit, _gfsm_bitvector_bits2bytes(bit)); + gfsm_bitvector_set(bv,bit,1); + } + + printf("%s: vector [bytes=%u ; bits=%u] =\n", + *argv, bv->len, gfsm_bitvector_size(bv)); + for (bit = 0; bit < gfsm_bitvector_size(bv); bit++) { + if ((bit%8)==0) fputc(' ', stdout); + fputc((gfsm_bitvector_get(bv,bit) ? '1' : '0'), stdout); + } + printf("\n"); + + gfsm_bitvector_free(bv); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/calc2test.l b/gfsm/gfsm/src/libgfsm/tests/calc2test.l new file mode 100644 index 0000000..7072a4c --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/calc2test.l @@ -0,0 +1,56 @@ +/*====================================================================== + * Flex Options + */ +%option outfile="calc2test.lex.c" +%option header-file="calc2test.lex.h" +%option prefix="calc2test_yy" +%option reentrant +%option 8bit +%option yylineno + +%option bison-bridge + +%{ +/*====================================================================== + * User C Header + */ + +/* bison stuff */ +#include <gfsmScanner.h> +#include "calc2test.tab.h" + +%} + +/*====================================================================== + * Flex Definitions + */ + + +/*====================================================================== + * Flex Rules + */ +%% + +([[:digit:]]*\.?)[[:digit:]]+ { yylval->dbl = strtod(yytext,NULL); return NUMBER;} + +\( { return LPAREN; } +\) { return RPAREN; } +\+ { return PLUS; } +\- { return MINUS; } +\* { return TIMES; } +\/ { return DIV; } + +\n { return NEWLINE; } + +[[:space:]] { /* ignore */ } + +. { return OTHER; /* ignore */ } + +%% + +/*====================================================================== + * User C Code + */ + +//-- wrapper: use default +GFSM_SCANNER_YYWRAP(calc2test_yy) diff --git a/gfsm/gfsm/src/libgfsm/tests/calc2test.y b/gfsm/gfsm/src/libgfsm/tests/calc2test.y new file mode 100644 index 0000000..4754613 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/calc2test.y @@ -0,0 +1,89 @@ +/*====================================================================== + * Bison Options + */ +%pure_parser + +%{ +/*====================================================================== + * Bison C Header + */ +#include <stdio.h> +#include <gfsmScanner.h> +#include "calc2test.tab.h" +#include "calc2test.lex.h" + +typedef struct calc2testDataS { + gfsmScanner scanner; + double val; +} calc2testData; + +#define YYLEX_PARAM ((calc2testData*)pparam)->scanner.yyscanner +#define YYPARSE_PARAM pparam + +#define YYERROR_VERBOSE 1 +#define calc2test_yyerror(msg) \ + gfsm_scanner_carp((gfsmScanner*)pparam, (msg)); + +%} + +/*====================================================================== + * Bison Definitions + */ +%union { + double dbl; +} + +%token <dbl> NUMBER PLUS MINUS TIMES DIV LPAREN RPAREN NEWLINE OTHER +%type <dbl> expr exprs + +%left PLUS MINUS +%left TIMES DIV +%nonassoc UMINUS + +/*====================================================================== + * Bison Rules + */ +%% + +exprs: /* empty */ + { $$=0; } + | exprs expr NEWLINE + { printf("%g\n", $2); ((calc2testData*)pparam)->val=$$=$2; } + ; + +expr: LPAREN expr RPAREN { $$=$2; } + | MINUS expr { $$=-$2; } + | expr TIMES expr { $$=$1*$3; } + | expr DIV expr { $$=$1/$3; } + | expr PLUS expr { $$=$1+$3; } + | expr MINUS expr %prec UMINUS { $$=$1-$3; } + | NUMBER { $$=$1; } + ; + +%% + +/*====================================================================== + * User C Code + */ + +void calc2test_yyerror_func(const char *msg) +{ + fprintf(stderr, "yyerror: %s\n", msg); +} + + +int main (void) { + calc2testData *pparams = g_new0(calc2testData,1); + gfsm_scanner_init((gfsmScanner*)pparams, "calctest2Scanner", calc2test_yy); + + calc2test_yyparse(pparams); + if (pparams->scanner.err) { + fprintf(stderr, "Error: %s\n", pparams->scanner.err->message); + } + + printf("Final calctest value=%g\n", pparams->val); + + gfsm_scanner_free((gfsmScanner*)pparams); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/calctest.l b/gfsm/gfsm/src/libgfsm/tests/calctest.l new file mode 100644 index 0000000..1a03bc1 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/calctest.l @@ -0,0 +1,52 @@ +/*====================================================================== + * Flex Options + */ +%option outfile="calctest.lex.c" +%option header-file="calctest.lex.h" +%option prefix="calctest_yy" +%option reentrant +%option 8bit + +%option bison-bridge + +%{ +/*====================================================================== + * User C Header + */ + +/* bison stuff */ +#include "calctest.tab.h" + +%} + +/*====================================================================== + * Flex Definitions + */ + + +/*====================================================================== + * Flex Rules + */ +%% + +([[:digit:]]*\.?)[[:digit:]]+ { yylval->dbl = strtod(yytext,NULL); return NUMBER;} + +\( { return LPAREN; } +\) { return RPAREN; } +\+ { return PLUS; } +\- { return MINUS; } +\* { return TIMES; } +\/ { return DIV; } + +\n { return NEWLINE; } + +. { return OTHER; /* ignore */ } + +%% + +/*====================================================================== + * User C Code + */ + +//-- wrapper: don't +int calctest_yywrap(yyscan_t yyscanner) { return 1; } diff --git a/gfsm/gfsm/src/libgfsm/tests/calctest.y b/gfsm/gfsm/src/libgfsm/tests/calctest.y new file mode 100644 index 0000000..befec0e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/calctest.y @@ -0,0 +1,118 @@ +/*====================================================================== + * Bison Options + */ +%pure_parser + +%{ +/*====================================================================== + * Bison C Header + */ +#include <stdio.h> +#include <stdarg.h> +#include "calctest.tab.h" +#include "calctest.lex.h" + +typedef struct _yyparse_param { + char *name; + char *filename; + yyscan_t scanner; + float val; +} yyparseParam; + +#define YYLEX_PARAM ((yyparseParam*)pparam)->scanner +#define YYPARSE_PARAM pparam + +extern void calctest_yyerror(const char *msg); +extern void calctest_yycarp(yyparseParam *pparams, const char *fmt, ...); + +%} + +/*====================================================================== + * Bison Definitions + */ +%union { + double dbl; +} + +%token <dbl> NUMBER PLUS MINUS TIMES DIV LPAREN RPAREN NEWLINE OTHER +%type <dbl> expr exprs + +%left PLUS MINUS +%left TIMES DIV +%nonassoc UMINUS + +/*====================================================================== + * Bison Rules + */ +%% + +exprs: /* empty */ + { $$=0; } + | exprs expr NEWLINE + { printf("%g\n", $2); ((yyparseParam*)pparam)->val=$$=$2; } + ; + +expr: LPAREN expr RPAREN { $$=$2; } + | MINUS expr { $$=-$2; } + | expr TIMES expr { $$=$1*$3; } + | expr DIV expr { $$=$1/$3; } + | expr PLUS expr { $$=$1+$3; } + | expr MINUS expr %prec UMINUS { $$=$1-$3; } + | NUMBER { $$=$1; } + | OTHER + { + calctest_yycarp((yyparseParam*)pparam, "Failed to parse expression"); + YYABORT; + } + ; + +%% + +/*====================================================================== + * User C Code + */ + +void calctest_yyerror(const char *msg) +{ + fprintf(stderr, "yyerror: %s\n", msg); +} + +void calctest_yycarp(yyparseParam *pparams, const char *fmt, ...) +{ + va_list ap; + fprintf(stderr, "%s: ", (pparams->name ? pparams->name : "calctest_parser")); + + va_start(ap,fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, " in %s%s%s at line %u, column %u.\n", + (pparams->filename ? "file \"" : ""), + (pparams->filename ? pparams->filename : "input"), + (pparams->filename ? "\"" : ""), + calctest_yyget_lineno(pparams->scanner), + calctest_yyget_column(pparams->scanner)); +} + +int main (void) { + yyscan_t scanner; + yyparseParam pparams; + + calctest_yylex_init(&scanner); //-- initialize reentrant flex scanner + + pparams.name = NULL; + pparams.filename = NULL; + //-- + //pparams.name = "myParser"; + //pparams.filename = "(stdin)"; + + pparams.scanner = scanner; + + calctest_yyparse(&pparams); + + calctest_yylex_destroy(pparams.scanner); //-- cleanup reentrant flex scanner + + printf("Final calctest value=%g\n", pparams.val); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/commatest.c b/gfsm/gfsm/src/libgfsm/tests/commatest.c new file mode 100644 index 0000000..b704970 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/commatest.c @@ -0,0 +1,8 @@ +#include <stdio.h> + +int main (void) { + int x = 0; + + x = 1, printf("foo\n"), printf("bar\n"), x=2, printf("%d\n", x); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/compre2test.c b/gfsm/gfsm/src/libgfsm/tests/compre2test.c new file mode 100644 index 0000000..07d2b07 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/compre2test.c @@ -0,0 +1,41 @@ +#include <gfsmRegexCompiler.h> +#include <gfsmAutomatonIO.h> + + +/*====================================================================== + * User C Code + */ + +int main (int argc, char **argv) { + gfsmRegexCompiler *rec = gfsm_regex_compiler_new(); + gfsmAutomaton *fsm = NULL; + + //-- initialization + rec->abet = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(rec->abet, "test.lab", &(rec->scanner.err))) { + g_printerr("%s: load failed for labels file '%s': %s\n", + *argv, "test.lab", (rec->scanner.err ? rec->scanner.err->message : "?")); + exit(2); + } + + //-- debug: lexer + rec->scanner.emit_warnings = TRUE; + + //-- parse + fsm = gfsm_regex_compiler_parse(rec); + + //-- sanity check + if (rec->scanner.err) { + fprintf(stderr, "%s: %s\n", *argv, rec->scanner.err->message); + } + + if (fsm) { + gfsm_automaton_save_bin_file(fsm, stdout, NULL); + } else { + fprintf(stderr, "%s: Error: no fsm!\n", *argv); + } + + gfsm_regex_compiler_free(rec,TRUE,TRUE); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/comprelex.l b/gfsm/gfsm/src/libgfsm/tests/comprelex.l new file mode 100644 index 0000000..b82178d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/comprelex.l @@ -0,0 +1,297 @@ +/*====================================================================== + * Flex Options + */ +%option outfile="comprelex.lex.c" +%option header-file="comprelex.lex.h" +%option prefix="compretest_yy" +%option reentrant +%option 8bit +%option yylineno + +%{ +/*====================================================================== + * User C Header + */ + +#include <gfsmRegexCompiler.h> +//#include "compretest.tab.h" + +#define my_compiler ((gfsmRegexCompiler*)yyextra) +#define my_gstr (my_compiler->gstr) +#define my_abet (my_compiler->abet) + +typedef enum { + TOK_UNKNOWN =256, + TOK_CHAR =257, + TOK_STRING =258, + TOK_WEIGHT =259, + TOK_UINT =260, + TOK_BRACKETED =261, + TOK_EOF =262 +} TokenType; + +%} + +/*====================================================================== + * Flex Definitions + */ + +DIGIT [0-9] +SPACE [[:space:]] +WCHAR [^\<\[\\ \t\n\r\#] +BCHAR [^\\\]\ \t\n\r\=] +UTF8PREFIX "Ã" + +%x STATE_ESCAPE +%x STATE_BRACKETED +%x STATE_BRACKETED_SEP +%x STATE_BRACKETED_ESCAPE +%x STATE_COMMENT +%x STATE_WEIGHT +%x STATE_UINT + +/*====================================================================== + * Rules + */ +%% + +{UTF8PREFIX}+. { + g_string_truncate(my_gstr,0); + g_string_append_len(my_gstr,yytext,yyleng); + return TOK_STRING; +} + +"\\" { BEGIN(STATE_ESCAPE); } + +"#" { BEGIN(STATE_COMMENT); } + +"[" { + BEGIN(STATE_BRACKETED); + return '['; +} + +"<" { BEGIN(STATE_WEIGHT); return '<'; } + +"(" { return '('; } +")" { return ')'; } + +"*" { return '*'; } +"+" { return '+'; } +"^" { BEGIN(STATE_UINT); return '|'; } +"?" { return '?'; } +"!" { return '!'; } +"|" { return '|'; } +"&" { return '&'; } +":" { return ':'; } +"@" { return '@'; } +"-" { return '-'; /*"/1"{return PROJ1;}*/ /*"/2"{return PROJ2;}*/ } + +"$" { return '$'; /* non-AT&T: determinize */ } +"%" { return '%'; /* non-AT&T: rmepsilon */ } +"~" { return '~'; /* non-AT&T: connect */ } + +{SPACE} { /* ignore */ } + +{WCHAR} { + //yylval->u = gfsm_alphabet_find_label(my_abet, yytext); + return TOK_CHAR; +} + + + +<STATE_WEIGHT>([\+\-])?({DIGIT}*\.)?({DIGIT}+)([Ee]([\+\-]?){DIGIT}+)? { + //yylval->w = strtod(yytext,NULL); + return TOK_WEIGHT; +} + +<STATE_WEIGHT>\> { + BEGIN(INITIAL); + return '>'; +} + + + +<STATE_UINT>{DIGIT}+ { + BEGIN(INITIAL); + //yylval->u = strtol(yytext,NULL,0); + return TOK_UINT; +} + + +<STATE_COMMENT>[^\n]*\n { BEGIN(INITIAL); /* ignore comments */ } + + + + +<STATE_ESCAPE>. { + BEGIN(INITIAL); + return TOK_CHAR; +} + + + +<STATE_BRACKETED>{UTF8PREFIX}+. { + g_string_append_len(my_gstr,yytext,yyleng); +} +<STATE_BRACKETED>{SPACE}*"]" { + unput(']'); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) return TOK_STRING; +} +<STATE_BRACKETED>{BCHAR}+ { + g_string_append(my_gstr, yytext); + //return TOK_STRING; +} +<STATE_BRACKETED>({SPACE}*)"="({SPACE}*) { + unput('='); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) return TOK_STRING; +} +<STATE_BRACKETED>{SPACE}+ { + unput(' '); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) return TOK_STRING; +} +<STATE_BRACKETED>\\ { + BEGIN(STATE_BRACKETED_ESCAPE); +} + + +<STATE_BRACKETED_SEP>"]" { + BEGIN(INITIAL); + if (my_gstr->len) { + g_string_truncate(my_gstr,0); + return ']'; + } +} +<STATE_BRACKETED_SEP>"=" { + BEGIN(STATE_BRACKETED); + g_string_truncate(my_gstr,0); + return '='; +} +<STATE_BRACKETED_SEP>" " { + BEGIN(STATE_BRACKETED); + if (my_gstr->len) { + g_string_truncate(my_gstr,0); + return ' '; + } +} + + + +<STATE_BRACKETED_ESCAPE>. { + BEGIN(STATE_BRACKETED); + g_string_append_c(my_gstr, yytext[0]); + //return TOK_STRING; +} + + + +<*>. { + gfsm_scanner_carp(yyextra, "bad character '%s'", yytext); + return TOK_UNKNOWN; +} + +<<EOF>> { return TOK_EOF; } + +%% + +/*====================================================================== + * User C Code + */ + +void testme(gfsmScanner *scanner) { + TokenType tok; + double weight; + unsigned int uint; + + while ((tok=compretest_yylex(scanner->yyscanner)) != TOK_EOF) { + switch (tok) { + case TOK_CHAR: + printf("(char) '%s'\n", compretest_yyget_text(scanner->yyscanner)); + break; + + case TOK_BRACKETED: + printf("(bracketed) text=\"%s\" gstr=\"%s\"\n", + compretest_yyget_text(scanner->yyscanner), + ((gfsmRegexCompiler*)scanner)->gstr->str); + break; + + case TOK_STRING: + printf("(string) gstr=\"%s\"\n", + //compretest_yyget_text(scanner->yyscanner), + ((gfsmRegexCompiler*)scanner)->gstr->str + ); + break; + + case '[': + printf("(left-bracket) text=\"%s\"\n", compretest_yyget_text(scanner->yyscanner)); + break; + + case ']': + printf("(right-bracket) text=\"%s\"\n", compretest_yyget_text(scanner->yyscanner)); + break; + + case ' ': + printf("(space) text=\"%s\"\n", compretest_yyget_text(scanner->yyscanner)); + break; + + case '=': + printf("(equal) text=\"%s\"\n", compretest_yyget_text(scanner->yyscanner)); + break; + + case TOK_WEIGHT: + weight = strtod(compretest_yyget_text(scanner->yyscanner),NULL); + printf("(weight) =%g\n", weight); + break; + + case TOK_UINT: + uint = strtol(compretest_yyget_text(scanner->yyscanner),NULL,0); + printf("(uint) =%d\n", uint); + break; + + default: + printf("(other=%d~'%c'): (%s)\n", tok, tok, compretest_yyget_text(scanner->yyscanner)); + break; + } + + if (scanner->err) { + fprintf(stderr, "Error: %s\n", scanner->err->message); + g_clear_error(&(scanner->err)); + break; + } + } +} + +int main(int argc,char **argv) { + gfsmRegexCompiler *reparser = g_new0(gfsmRegexCompiler,1); + gfsm_scanner_init((gfsmScanner*)reparser, "gfsmRegexCompiler", compretest_yy); + + //-- initialization + reparser->srtype = gfsmSRTTropical; + reparser->gstr = g_string_new(""); + reparser->abet = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(reparser->abet, "test.lab", &(reparser->scanner.err))) { + g_printerr("%s: load failed for labels file '%s': %s\n", + *argv, "test.lab", (reparser->scanner.err ? reparser->scanner.err->message : "?")); + exit(2); + } + + //-- debug: lexer + reparser->scanner.emit_warnings = TRUE; + + //-- lex + testme((gfsmScanner*)reparser); + + //-- sanity check + if (reparser->scanner.err) { + fprintf(stderr, "Error: %s\n", reparser->scanner.err->message); + } + + gfsm_scanner_free((gfsmScanner*)reparser); + + return 0; +} + + +GFSM_SCANNER_YYWRAP(compretest_yy) diff --git a/gfsm/gfsm/src/libgfsm/tests/compretest.l b/gfsm/gfsm/src/libgfsm/tests/compretest.l new file mode 100644 index 0000000..731655c --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/compretest.l @@ -0,0 +1,199 @@ +/*====================================================================== + * Flex Options + */ +%option outfile="compretest.lex.c" +%option header-file="compretest.lex.h" +%option prefix="compretest_yy" +%option reentrant +%option 8bit +%option yylineno + +%option bison-bridge + +%{ +/*====================================================================== + * User C Header + */ + +#include <gfsmMem.h> +#include <gfsmRegexCompiler.h> +#include "compretest.tab.h" + +#define my_compiler ((gfsmRegexCompiler*)yyextra) +#define my_gstr (my_compiler->gstr) +#define my_abet (my_compiler->abet) + +%} + +/*====================================================================== + * Flex Definitions + */ + +DIGIT [0-9] +SPACE [[:space:]] +WCHAR [^\<\[\\ \t\n\r\#] +BCHAR [^\\\]\ \t\n\r\=] +UTF8PREFIX "Ã" + +%x STATE_ESCAPE +%x STATE_BRACKETED +%x STATE_BRACKETED_SEP +%x STATE_BRACKETED_ESCAPE +%x STATE_COMMENT +%x STATE_WEIGHT +%x STATE_UINT + +/*====================================================================== + * Rules + */ +%% + +{UTF8PREFIX}+. { + yylval->gs = gfsm_gstring_new_bytes(yytext,yyleng); + return TOK_STRING; +} + +"\\" { BEGIN(STATE_ESCAPE); } + +"#" { BEGIN(STATE_COMMENT); } + +"[" { + BEGIN(STATE_BRACKETED); + return '['; +} + +"<" { BEGIN(STATE_WEIGHT); return '<'; } + +"(" { return '('; } +")" { return ')'; } + +"*" { return '*'; } +"+" { return '+'; } +"^" { BEGIN(STATE_UINT); return '|'; } +"?" { return '?'; } +"!" { return '!'; } +"|" { return '|'; } +"&" { return '&'; } +":" { return ':'; } +"@" { return '@'; } +"-" { return '-'; /*"/1"{return PROJ1;}*/ /*"/2"{return PROJ2;}*/ } + +"$" { return '$'; /* non-AT&T: determinize */ } +"%" { return '%'; /* non-AT&T: rmepsilon */ } +"~" { return '~'; /* non-AT&T: connect */ } + +{SPACE} { /* ignore */ } + +{WCHAR} { + yylval->c = yytext[0]; + return TOK_CHAR; +} + + + +<STATE_WEIGHT>([\+\-])?({DIGIT}*\.)?({DIGIT}+)([Ee]([\+\-]?){DIGIT}+)? { + yylval->w = strtod(yytext,NULL); + return TOK_WEIGHT; +} + +<STATE_WEIGHT>\> { + BEGIN(INITIAL); + return '>'; +} + + + +<STATE_UINT>{DIGIT}+ { + BEGIN(INITIAL); + yylval->u = strtol(yytext,NULL,0); + return TOK_UINT; +} + + +<STATE_COMMENT>[^\n]*\n { BEGIN(INITIAL); /* ignore comments */ } + + + + +<STATE_ESCAPE>. { + BEGIN(INITIAL); + yylval->c = yytext[0]; + return TOK_CHAR; +} + + +<STATE_BRACKETED>{UTF8PREFIX}+. { + g_string_append_len(my_gstr,yytext,yyleng); +} +<STATE_BRACKETED>{SPACE}*"]" { + unput(']'); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) { + yylval->gs = gfsm_gstring_dup(my_gstr); + return TOK_STRING; + } +} +<STATE_BRACKETED>{BCHAR}+ { + g_string_append(my_gstr, yytext); +} +<STATE_BRACKETED>({SPACE}*)"="({SPACE}*) { + unput('='); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) { + yylval->gs = gfsm_gstring_dup(my_gstr); + return TOK_STRING; + } +} +<STATE_BRACKETED>{SPACE}+ { + unput(' '); + BEGIN(STATE_BRACKETED_SEP); + if (my_gstr->len) { + yylval->gs = gfsm_gstring_dup(my_gstr); + return TOK_STRING; + } +} +<STATE_BRACKETED>\\ { + BEGIN(STATE_BRACKETED_ESCAPE); +} + + +<STATE_BRACKETED_SEP>"]" { + BEGIN(INITIAL); + if (my_gstr->len) { + g_string_truncate(my_gstr,0); + return ']'; + } +} +<STATE_BRACKETED_SEP>"=" { + BEGIN(STATE_BRACKETED); + g_string_truncate(my_gstr,0); + return '='; +} +<STATE_BRACKETED_SEP>" " { + BEGIN(STATE_BRACKETED); + if (my_gstr->len) { + g_string_truncate(my_gstr,0); + return ' '; + } +} + + +<STATE_BRACKETED_ESCAPE>. { + BEGIN(STATE_BRACKETED); + g_string_append_c(my_gstr, yytext[0]); +} + + + +<*>. { + gfsm_scanner_carp(yyextra, "bad character '%s'", yytext); + return TOK_UNKNOWN; +} + +%% + +/*====================================================================== + * User C Code + */ + +GFSM_SCANNER_YYWRAP(compretest_yy) diff --git a/gfsm/gfsm/src/libgfsm/tests/compretest.y b/gfsm/gfsm/src/libgfsm/tests/compretest.y new file mode 100644 index 0000000..d55f488 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/compretest.y @@ -0,0 +1,185 @@ +/*====================================================================== + * Bison Options + */ +%pure_parser + +%{ +/*====================================================================== + * Bison C Header + */ +#include <gfsmRegexCompiler.h> +#include <gfsmAutomatonIO.h> + +#include "compretest.tab.h" +#include "compretest.lex.h" + +#define my_compiler ((gfsmRegexCompiler*)reparser) + +#define YYLEX_PARAM ((gfsmRegexCompiler*)reparser)->scanner.yyscanner +#define YYPARSE_PARAM reparser + +#define YYERROR_VERBOSE 1 +#define compretest_yyerror(msg) \ + gfsm_scanner_carp((gfsmScanner*)reparser, (msg)); + +%} + +/*====================================================================== + * Bison Definitions + */ +%union { + gfsmAutomaton *fsm; //-- automaton + GString *gs; //-- needs to be freed by hand + gchar c; + guint32 u; + gfsmWeight w; +} + +%token <c> TOK_UNKNOWN TOK_CHAR +%token <u> TOK_UINT +%token <gs> TOK_STRING +%token <w> TOK_WEIGHT + +%type <u> label +%type <w> weight +%type <fsm> regex + +/* +empty { $$=gfsm_regex_automaton_epsilon(my_compiler); } +*/ + +/* + | gfsmRETChar %prec LAB + { $$=gfsm_regex_automaton_lab(my_compiler, $1); } +*/ + +// -- Operator precedence and associativity +%left CONCAT +%left LABCONCAT +%left WEIGHT +%right '%' //-- non-AT&T: rmepsilon: % REGEX +%right '$' //-- non-AT&T: determinize: $ REGEX +%right '~' //-- non-AT&T: connect: ~ REGEX +%left '*' '+' '?' '^' +%right '!' +%left '@' +%left ':' +%left '-' +%left '&' +%left '|' + +/*====================================================================== + * Bison Rules + */ +%% + +regex: '('regex ')' + { $$=$2; } + + | label + { $$=gfsm_regex_compiler_label_fsm(my_compiler, $1); } + + | label regex %prec LABCONCAT + { $$=gfsm_regex_compiler_prepend_lab(my_compiler, $1, $2); } + + | regex regex %prec CONCAT + { $$=gfsm_regex_compiler_concat(my_compiler, $1, $2); } + + | '%' regex + { $$=gfsm_regex_compiler_rmepsilon(my_compiler, $2); /* non-ATT */ } + + | '$' regex + { $$=gfsm_regex_compiler_determinize(my_compiler, $2); /* non-ATT */ } + + | '~' regex + { $$=gfsm_regex_compiler_connect(my_compiler, $2); /* non-ATT */ } + + | regex '*' + { $$=gfsm_regex_compiler_closure(my_compiler,$1,FALSE); } + + | regex '+' + { $$=gfsm_regex_compiler_closure(my_compiler,$1,TRUE); } + + | regex '^' TOK_UINT + { $$=gfsm_regex_compiler_power(my_compiler,$1,$3); } + + | regex '?' + { $$=gfsm_regex_compiler_optional(my_compiler,$1); } + + | '!' regex + { $$=gfsm_regex_compiler_complement(my_compiler,$2); } + + | regex '|' regex + { $$=gfsm_regex_compiler_union(my_compiler,$1,$3); } + + | regex '&' regex + { $$=gfsm_regex_compiler_intersect(my_compiler,$1,$3); } + + | regex ':' regex + { $$=gfsm_regex_compiler_product(my_compiler,$1,$3); } + + | regex '@' regex + { $$=gfsm_regex_compiler_compose(my_compiler,$1,$3); } + + | regex '-' regex + { $$=gfsm_regex_compiler_difference(my_compiler,$1,$3); } + + | regex weight %prec WEIGHT + { $$=gfsm_regex_compiler_weight(my_compiler,$1,$2); } + ; + +label: TOK_CHAR + { $$=gfsm_regex_compiler_char2label(my_compiler, $1); } + + | TOK_STRING + { $$=gfsm_regex_compiler_gstring2label(my_compiler, $1); } + + | '[' TOK_STRING ']' + { $$=gfsm_regex_compiler_gstring2label(my_compiler, $2); } + ; + +weight: '<' TOK_WEIGHT '>' { $$=$2; } + ; + +%% + +/*====================================================================== + * User C Code + */ + +int main (int argc, char **argv) { + gfsmRegexCompiler *reparser = g_new0(gfsmRegexCompiler,1); + gfsm_scanner_init((gfsmScanner*)reparser, "gfsmRegexCompiler", compretest_yy); + + + //-- initialization + reparser->srtype = gfsmSRTTropical; + reparser->gstr = g_string_new(""); + reparser->abet = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(reparser->abet, "test.lab", &(reparser->scanner.err))) { + g_printerr("%s: load failed for labels file '%s': %s\n", + *argv, "test.lab", (reparser->scanner.err ? reparser->scanner.err->message : "?")); + exit(2); + } + + //-- debug: lexer + reparser->scanner.emit_warnings = TRUE; + + //-- parse + compretest_yyparse(reparser); + + //-- sanity check + if (reparser->scanner.err) { + fprintf(stderr, "%s: %s\n", *argv, reparser->scanner.err->message); + } + + if (reparser->fsm) { + gfsm_automaton_save_bin_file(reparser->fsm, stdout, NULL); + } else { + fprintf(stderr, "%s: Error: no fsm!\n", *argv); + } + + gfsm_scanner_free((gfsmScanner*)reparser); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/ctest.c b/gfsm/gfsm/src/libgfsm/tests/ctest.c new file mode 100644 index 0000000..78d9002 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/ctest.c @@ -0,0 +1,18 @@ +#include <stdio.h> + +typedef struct testme_s { + int i : 1; + int j : 1; + int k : 1; +} testme_t; + +int main (void) { + int x,y,z; + float f = +inf; + printf("sizeof(testme_t)=%u\n", sizeof(testme_t)); + + z = (x=42,y=24,17); + printf ("z = (x=42,y=24) = %d\n", z); + printf("f = %g\n", f); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/dettest.c b/gfsm/gfsm/src/libgfsm/tests/dettest.c new file mode 100644 index 0000000..278535c --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/dettest.c @@ -0,0 +1,21 @@ +#include <gfsm.h> +#include <stdio.h> + +int main (void) { + gfsmAutomaton *fsm = gfsm_automaton_new(); + + printf("##-- determinize()-1...\n"); + gfsm_automaton_determinize(fsm); + gfsm_automaton_print_file(fsm,stdout,NULL); + + printf("##-- determinize()-2...\n"); + gfsm_automaton_determinize(fsm); + gfsm_automaton_print_file(fsm,stdout,NULL); + + printf("##-- determinize()-3...\n"); + gfsm_automaton_determinize(fsm); + gfsm_automaton_print_file(fsm,stdout,NULL); + + printf("done.\n"); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/flex2test.l b/gfsm/gfsm/src/libgfsm/tests/flex2test.l new file mode 100644 index 0000000..b81e7f7 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/flex2test.l @@ -0,0 +1,174 @@ +/*====================================================================== + * Flex Options + */ +%option outfile="flex2test.lex.c" +%option header-file="flex2test.lex.h" +%option prefix="testme_yy" +%option reentrant +%option 8bit +%option yylineno + +%{ +/*====================================================================== + * User C Header + */ + +#include <gfsm.h> + +typedef enum { + T_EOF, + T_CHAR, + T_STRING, + T_WEIGHT, + + T_OP_PROJ_1, + T_OP_PROJ_2, + T_OP_STAR, + T_OP_PLUS, + T_OP_POWER, //-- "^n" + T_OP_MINUS, + T_OP_OPTIONAL, + T_OP_UNION, + T_OP_COMPLEMENT, + T_OP_INTERSECT, + T_OP_PRODUCT, + T_OP_COMPOSE, + + T_LPAREN, + T_RPAREN, + T_LBRACKET, + T_RBRACKET, + + T_EQUAL, + T_SEMICOLON, //-- semicolon + + T_OTHER +} TokenType; + +#define GSTR_BUF ((GString*)((gfsmScanner*)yyextra)->data) + +%} + +/*====================================================================== + * Flex Definitions + */ + +DIGIT [0-9] +SPACE [[:space:]] +WCHAR [^\\\[ \t\n\r\#\<] +SCHAR [^\\\]] + +%x ESCAPE STRING SESCAPE COMMENT WEIGHT + +/*====================================================================== + * Rules + */ +%% + +<INITIAL>{WCHAR} { return T_CHAR; } + +<INITIAL>{SPACE} { /* ignore */ } + +<INITIAL># { BEGIN(COMMENT); } + +<INITIAL>\\ { BEGIN(ESCAPE); } + +<INITIAL>\[ { BEGIN(STRING); GSTR_BUF->len = 0; } + +<INITIAL>\< { BEGIN(WEIGHT); } + + + +<WEIGHT>[^\>]+ { return WEIGHT; /* hack */ } + +<WEIGHT>\> { BEGIN(INITIAL); } + + + +<COMMENT>[^\n]*\n { BEGIN(INITIAL); } + + + + +<ESCAPE>. { BEGIN(INITIAL); return T_CHAR; } + + + + +<STRING>\] { BEGIN(INITIAL); return T_STRING; } + +<STRING>{SCHAR} { g_string_append_c(GSTR_BUF, yytext[0]); } + +<STRING>\\ { BEGIN(SESCAPE); } + + + +<SESCAPE>. { BEGIN(STRING); g_string_append_c(GSTR_BUF, yytext[0]); } + + + +<<EOF>> { return T_EOF; } + + +<*>. { + gfsm_scanner_carp(yyextra, "bad character '%s'", yytext); + return T_OTHER; +} + +%% + +/*====================================================================== + * User C Code + */ + +void testme(gfsmScanner *scanner) { + TokenType tok; + double weight; + + while ((tok=testme_yylex(scanner->yyscanner)) != T_EOF) { + switch (tok) { + case T_CHAR: + printf("(char) '%s'\n", testme_yyget_text(scanner->yyscanner)); + break; + case STRING: + printf("(string) \"%s\"\n", ((GString*)scanner->data)->str); + break; + case WEIGHT: + weight = strtod(testme_yyget_text(scanner->yyscanner),NULL); + printf("(weight) %g\n", weight); + break; + default: + printf("(other=%d): (%s)\n", tok, testme_yyget_text(scanner->yyscanner)); + break; + } + + if (scanner->err) { + fprintf(stderr, "Error: %s\n", scanner->err->message); + g_clear_error(&(scanner->err)); + break; + } + } +} + +int main(void) { + gfsmScanner *scanner = gfsm_scanner_new("myScanner",testme_yy); + scanner->data = g_string_new(""); + + //-- first, scan a string + /* + gfsm_scanner_scan_string(scanner, "line 1\nline 2\nline 3."); + scanner->filename = g_strdup("string"); + testme(scanner); + */ + + //-- now scan stdin + gfsm_scanner_scan_filename(scanner, "-"); + testme(scanner); + + gfsm_scanner_free(scanner); + + return 0; +} + +//int testme_yywrap(yyscan_t yyscanner) { return 1; } +GFSM_SCANNER_YYWRAP(testme_yy) diff --git a/gfsm/gfsm/src/libgfsm/tests/flex3test.l b/gfsm/gfsm/src/libgfsm/tests/flex3test.l new file mode 100644 index 0000000..27c1cc1 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/flex3test.l @@ -0,0 +1,232 @@ +/*====================================================================== + * Flex Options + */ +%option outfile="flex3test.lex.c" +%option header-file="flex3test.lex.h" +%option prefix="testme_yy" +%option reentrant +%option 8bit +%option yylineno + +%{ +/*====================================================================== + * User C Header + */ + +#include <gfsm.h> + +typedef enum { + T_EOF = 256, + T_CHAR = 257, + T_BRACKETED = 258, + T_BSPACE = 259, + T_WEIGHT = 260, + T_UINT = 261, + T_UTF8 = 262, + + T_LPAREN = '(', + T_RPAREN = ')', + T_LBRACKET = '[', + T_RBRACKET = ']', + T_LANGLE = '<', + T_RANGLE = '>', + + T_BEQUAL = '=', + + T_OTHER = 65535 +} TokenType; + +#define GSTR_BUF ((GString*)((gfsmScanner*)yyextra)->data) + +%} + +/*====================================================================== + * Flex Definitions + */ + +DIGIT [0-9] +SPACE [[:space:]] +WCHAR [^\<\>\[\]\\ \t\n\r\#] +BCHAR [^\\\]\ \t\n\r\=] +UTF8PREFIX "Ã" + +%x ESCAPE BRACKETED BESCAPE COMMENT WEIGHT UINT UTF8 BUTF8 + +/*====================================================================== + * Rules + */ +%% + +{UTF8PREFIX} { BEGIN(UTF8); g_string_set_size(GSTR_BUF,1); GSTR_BUF->str[0] = yytext[0]; } + +"\\" { BEGIN(ESCAPE); } + +"#" { BEGIN(COMMENT); } + +"[" { BEGIN(BRACKETED); g_string_truncate(GSTR_BUF,0); return T_LBRACKET; } + +"<" { BEGIN(WEIGHT); return T_LANGLE; } + + +"(" { return '('; } +")" { return ')'; } + +"*" { return '*'; } +"+" { return '+'; } +"^" { BEGIN(UINT); return '|'; } +"?" { return '?'; } +"!" { return '!'; } +"|" { return '|'; } +"&" { return '&'; } +":" { return ':'; } +"@" { return '@'; } +"-" { return '-'; } + +{SPACE} { /* ignore */ } + +{WCHAR} { return T_CHAR; } + + + +<UTF8>{UTF8PREFIX}+ { g_string_append(GSTR_BUF, yytext); } + +<UTF8>. { BEGIN(INITIAL); g_string_append_c(GSTR_BUF, yytext[0]); return T_UTF8; } + + +<WEIGHT>([\+\-])?({DIGIT}*\.)?({DIGIT}+)([Ee]([\+\-]?){DIGIT}+)? { return T_WEIGHT; /* hack */ } + +<WEIGHT>\> { BEGIN(INITIAL); return T_RANGLE; } + + + +<UINT>{DIGIT}+ { BEGIN(INITIAL); return T_UINT; } + + +<COMMENT>[^\n]*\n { BEGIN(INITIAL); /* ignore comments */ } + + + + +<ESCAPE>. { BEGIN(INITIAL); return T_CHAR; } + + + +<BRACKETED>{UTF8PREFIX} { BEGIN(BUTF8); g_string_append_c(GSTR_BUF, yytext[0]); } + +<BRACKETED>\] { BEGIN(INITIAL); return T_RBRACKET; } + +<BRACKETED>{BCHAR} { g_string_append_c(GSTR_BUF, yytext[0]); } + +<BRACKETED>{SPACE} { return T_BSPACE; } + +<BRACKETED>"=" { return T_BEQUAL; } + +<BRACKETED>\\ { BEGIN(BESCAPE); } + + +<BUTF8>{UTF8PREFIX}+ { g_string_append(GSTR_BUF, yytext); } + +<BUTF8>. { BEGIN(BRACKETED); g_string_append_c(GSTR_BUF, yytext[0]); } + + +<BESCAPE>. { BEGIN(BRACKETED); g_string_append_c(GSTR_BUF, yytext[0]); } + + + +<<EOF>> { return T_EOF; } + + +<*>. { + gfsm_scanner_carp(yyextra, "bad character '%s'", yytext); + return T_OTHER; +} + +%% + +/*====================================================================== + * User C Code + */ + +void testme(gfsmScanner *scanner) { + TokenType tok; + double weight; + unsigned int uint; + + while ((tok=testme_yylex(scanner->yyscanner)) != T_EOF) { + switch (tok) { + case T_CHAR: + printf("(char) '%s'\n", testme_yyget_text(scanner->yyscanner)); + break; + + case T_UTF8: + printf("(utf8) '%s'\n", ((GString*)scanner->data)->str); + break; + + case T_LBRACKET: + printf("(lbracket)\n"); + break; + + case T_RBRACKET: + printf("(rbracket) gstr=\"%s\"\n", ((GString*)scanner->data)->str); + g_string_truncate(((GString*)scanner->data),0); + break; + case T_BSPACE: + printf("(bspace) gstr=\"%s\"\n", ((GString*)scanner->data)->str); + g_string_truncate(((GString*)scanner->data),0); + break; + case T_BEQUAL: + printf("(bequal) gstr=\"%s\"\n", ((GString*)scanner->data)->str); + g_string_truncate(((GString*)scanner->data),0); + break; + + case T_LANGLE: + printf("(langle)\n"); + break; + case T_RANGLE: + printf("(rangle)\n"); + break; + case T_WEIGHT: + weight = strtod(testme_yyget_text(scanner->yyscanner),NULL); + printf("(weight) =%g\n", weight); + break; + + case T_UINT: + uint = strtol(testme_yyget_text(scanner->yyscanner),NULL,0); + printf("(uint) =%d\n", uint); + break; + + default: + printf("(other=%d~'%c'): (%s)\n", tok, tok, testme_yyget_text(scanner->yyscanner)); + break; + } + + if (scanner->err) { + fprintf(stderr, "Error: %s\n", scanner->err->message); + g_clear_error(&(scanner->err)); + break; + } + } +} + +int main(void) { + gfsmScanner *scanner = gfsm_scanner_new("myScanner",testme_yy); + scanner->data = g_string_new(""); + + //-- first, scan a string + /* + gfsm_scanner_scan_string(scanner, "line 1\nline 2\nline 3."); + scanner->filename = g_strdup("string"); + testme(scanner); + */ + + //-- now scan stdin + gfsm_scanner_scan_filename(scanner, "-"); + testme(scanner); + + gfsm_scanner_free(scanner); + + return 0; +} + +//int testme_yywrap(yyscan_t yyscanner) { return 1; } +GFSM_SCANNER_YYWRAP(testme_yy) diff --git a/gfsm/gfsm/src/libgfsm/tests/flextest.l b/gfsm/gfsm/src/libgfsm/tests/flextest.l new file mode 100644 index 0000000..c0d103d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/flextest.l @@ -0,0 +1,59 @@ +/*====================================================================== + * Flex Options + */ +%option outfile="flextest.c" +%option header-file="flextest.h" +%option prefix="testme_yy" +%option reentrant +%option 8bit + +%{ +/*====================================================================== + * User C Header + */ + +int nlines=0; + +%} + +/*====================================================================== + * Flex Definitions + */ + + +/*====================================================================== + * Rules + */ +%% + +\n { return 1; } + +<<EOF>> { return 0; } + +. { /* do nothing */; } + +%% + +/*====================================================================== + * User C Code + */ + +int main(void) { + yyscan_t scanner; + int nlines=0; + int tok; + + testme_yylex_init(&scanner); //-- initialize reentrant flex scanner + + while ((tok=testme_yylex(scanner))) { + //printf("tok=%d\n", tok); + nlines++; + } + + testme_yylex_destroy(scanner); //-- cleanup reentrant flex scanner + + printf("%d\n", nlines); + return 0; +} + +int testme_yywrap(yyscan_t yyscanner) { return 1; } diff --git a/gfsm/gfsm/src/libgfsm/tests/ftest.c b/gfsm/gfsm/src/libgfsm/tests/ftest.c new file mode 100644 index 0000000..e571f31 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/ftest.c @@ -0,0 +1,17 @@ +#include <stdio.h> +#include <glib.h> + +int main (int argc, char **argv) { + float f, f2; + gpointer p; + int i; + + for (i=1; i<argc; i++) { + sscanf(argv[i], "%f", &f); + p = (gpointer)(*((int*)(&f))); + f2 = *((float*)(&p)); + printf("argv[i=%d]='%s' ; f=%g ; f->p=%p ; p->f=%g\n", i, argv[i], f, p, f2); + } + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/gbtest.c b/gfsm/gfsm/src/libgfsm/tests/gbtest.c new file mode 100644 index 0000000..e0e406d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/gbtest.c @@ -0,0 +1,27 @@ +#include <glib.h> +#include <stdio.h> + +GArray *ary1; +GArray *ary2; +GArray *ary3; +guint esize = 8; +guint nelts = 128; + +int main (void) { + int i; + + ary1 = g_array_sized_new(FALSE, TRUE, esize, nelts); + ary2 = g_array_sized_new(FALSE, TRUE, esize, nelts); + ary3 = g_array_sized_new(FALSE, TRUE, esize, nelts); + + for (i=0; i < 128; i++) { + g_array_free(ary2,TRUE); + ary2 = g_array_sized_new(FALSE,TRUE,esize,nelts*i); + } + + g_array_free(ary1,TRUE); + g_array_free(ary2,TRUE); + g_array_free(ary3,TRUE); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/gfsm-chop.perl b/gfsm/gfsm/src/libgfsm/tests/gfsm-chop.perl new file mode 100755 index 0000000..7f81bd8 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/gfsm-chop.perl @@ -0,0 +1,40 @@ +#!/usr/bin/perl -w + +use Gfsm; + +our @want_states = + ( + 5, + 4, + 11, + 98, + 5257, + 45623, + 290132, + 0 + ); + +push(@ARGV,'-') if (!@ARGV); + +$fsmfile = shift(@ARGV); +our $fsm = Gfsm::Automaton->new(); +$fsm->load($fsmfile) or die("$0: load failed for fsm file '$fsmfile': $!"); + +##-- chop it +my %q2i = map { $want_states[$_]=>$_ } (0..$#want_states); +my $qid_dummy = scalar(@want_states); +$fsm2 = $fsm->shadow; +$ai=Gfsm::ArcIter->new; + +foreach $qid_dst (0..$#want_states) { + $fsm2->ensure_state($qid_dst); + $qid_src = $want_states[$qid_dst]; + for ($ai->open($fsm,$qid_src); $ai->ok; $ai->next) { + $fsm2->add_arc($qid_dst, $qid_dummy, $ai->lower, $ai->upper, $ai->weight); + } + $fsm2->add_arc($qid_dummy,$qid_dst,0,0,0); +} +$fsm2->root($qid_dummy); +$fsm2->final_weight($qid_dummy,0); + +$fsm2->save('-'); diff --git a/gfsm/gfsm/src/libgfsm/tests/gfsm-out-degrees.perl b/gfsm/gfsm/src/libgfsm/tests/gfsm-out-degrees.perl new file mode 100755 index 0000000..38caf2d --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/gfsm-out-degrees.perl @@ -0,0 +1,11 @@ +#!/usr/bin/perl -w + +use Gfsm; + +$fsmfile = @ARGV ? shift : '-'; +$fsm = Gfsm::Automaton->new(); +die("$0: load failed for '$fsmfile': $!") if (!$fsm->load($fsmfile)); + +foreach $qid (0..($fsm->n_states-1)) { + print $qid, "\t", $fsm->out_degree($qid), "\n"; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/gfsmRegexCompiler-v1.c b/gfsm/gfsm/src/libgfsm/tests/gfsmRegexCompiler-v1.c new file mode 100644 index 0000000..12495fa --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/gfsmRegexCompiler-v1.c @@ -0,0 +1,210 @@ +#include <gfsmRegexCompiler.h> +#include <gfsmArith.h> +#include <gfsmUtils.h> + +#define RETURN(rec,_rea) (rec)->rea=(_rea); return (_rea); + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_epsilon(gfsmRegexCompiler *rec) +{ + rec->rea.typ = gfsmREATEmpty; + rec->rea.val.lab = 0; + return rec->rea; +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_automaton_new_fsm(gfsmRegexCompiler *rec) +{ + gfsmAutomaton *fsm = gfsm_automaton_new_full(gfsmAutomatonDefaultFlags, + rec->srtype, + gfsmAutomatonDefaultSize); + fsm->flags.is_transducer = FALSE; + return fsm; +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_automaton_epsilon_fsm(gfsmRegexCompiler *rec) +{ + gfsmAutomaton *fsm = gfsm_regex_automaton_new_fsm(rec); + fsm->root_id = gfsm_automaton_add_state(fsm); + gfsm_automaton_set_final_state(fsm,fsm->root_id,TRUE); + return fsm; +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_automaton_label_fsm(gfsmRegexCompiler *rec, gfsmLabelVal lab) +{ + gfsmAutomaton *fsm = gfsm_regex_automaton_new_fsm(rec); + gfsmStateId labid; + fsm->root_id = gfsm_automaton_add_state(fsm); + labid = gfsm_automaton_add_state(fsm); + gfsm_automaton_add_arc(fsm, fsm->root_id, labid, lab, lab, fsm->sr->one); + gfsm_automaton_set_final_state(fsm,labid,TRUE); + return fsm; +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_automaton_fsm(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea) +{ + switch (rea.typ) { + case gfsmREATEmpty: + return gfsm_regex_automaton_epsilon_fsm(rec); + break; + case gfsmREATLabel: + return gfsm_regex_automaton_label_fsm(rec, rea.val.lab); + break; + case gfsmREATFull: + default: + return rea.val.fsm; + break; + } +} + +//-------------------------------------------------------------- +gfsmAutomaton *gfsm_regex_automaton_expand(gfsmRegexCompiler *rec, gfsmRegexAutomaton *rea) +{ + rea->val.fsm = gfsm_regex_automaton_fsm(rec,*rea); + rea->typ = gfsmREATFull; + return rea->val.fsm; +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_label(gfsmRegexCompiler *rec, gfsmLabelVal lab) +{ + rec->rea.typ = gfsmREATLabel; + rec->rea.val.lab = lab; + return rec->rea; +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_concat(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmRegexAutomaton rea2) +{ + switch (rea2.typ) { + case gfsmREATEmpty: + break; + case gfsmREATLabel: + gfsm_regex_automaton_append_lab(rec, gfsm_regex_automaton_expand(rec,&rea1), rea2.val.lab); + break; + case gfsmREATFull: + default: + gfsm_automaton_concat(gfsm_regex_automaton_expand(rec,&rea1), rea2.val.fsm); + gfsm_automaton_free(rea2.val.fsm); + break; + } + + RETURN(rec,rea1); +} + + +//-------------------------------------------------------------- +struct _gfsm_regex_append_lab_data { + gfsmAutomaton *fsm; + gfsmLabelVal lab; + gfsmStateId newid; +}; + +gboolean _gfsm_regex_append_lab_foreach_func(gfsmStateId qid, gpointer pw, + struct _gfsm_regex_append_lab_data *data) +{ + gfsm_automaton_get_state(data->fsm,qid)->is_final = FALSE; + gfsm_automaton_add_arc(data->fsm, qid, data->newid, data->lab, data->lab, gfsm_ptr2weight(pw)); + return FALSE; +} + +gfsmAutomaton *gfsm_regex_automaton_append_lab(gfsmRegexCompiler *rec, gfsmAutomaton *fsm, gfsmLabelVal lab) +{ + struct _gfsm_regex_append_lab_data data = { fsm, lab, gfsm_automaton_add_state(fsm) }; + gfsm_weightmap_foreach(fsm->finals, + (GTraverseFunc)_gfsm_regex_append_lab_foreach_func, + &data); + gfsm_weightmap_clear(fsm->finals); + gfsm_automaton_set_final_state(fsm, data.newid, TRUE); + return fsm; +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_closure(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea, gboolean is_plus) +{ + gfsm_automaton_closure(gfsm_regex_automaton_expand(rec,&rea),is_plus); + RETURN(rec,rea); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_power(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea, guint32 n) +{ + gfsm_automaton_n_closure(gfsm_regex_automaton_expand(rec,&rea),n); + RETURN(rec,rea); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_project(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea, + gfsmLabelSide which) +{ + gfsm_automaton_project(gfsm_regex_automaton_expand(rec,&rea),which); + RETURN(rec,rea); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_optional(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea) +{ + gfsm_automaton_optional(gfsm_regex_automaton_expand(rec,&rea)); + RETURN(rec,rea); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_complement(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea) +{ + gfsm_automaton_complement_full(gfsm_regex_automaton_expand(rec,&rea),rec->abet); + RETURN(rec,rea); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_union(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea1, gfsmRegexAutomaton rea2) +{ + gfsm_automaton_union(gfsm_regex_automaton_expand(rec,&rea1),gfsm_regex_automaton_expand(rec,&rea2)); + gfsm_automaton_free(gfsm_regex_automaton_expand(rec,&rea2)); + RETURN(rec,rea1); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_intersect(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea1, gfsmRegexAutomaton rea2) +{ + gfsm_automaton_intersect(gfsm_regex_automaton_expand(rec,&rea1),gfsm_regex_automaton_expand(rec,&rea2)); + gfsm_automaton_free(gfsm_regex_automaton_expand(rec,&rea2)); + RETURN(rec,rea1); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_product(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea1, gfsmRegexAutomaton rea2) +{ + _gfsm_automaton_product(gfsm_regex_automaton_expand(rec,&rea1),gfsm_regex_automaton_expand(rec,&rea2)); + gfsm_automaton_free(gfsm_regex_automaton_expand(rec,&rea2)); + RETURN(rec,rea1); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_compose(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea1, gfsmRegexAutomaton rea2) +{ + gfsm_automaton_compose(gfsm_regex_automaton_expand(rec,&rea1),gfsm_regex_automaton_expand(rec,&rea2)); + gfsm_automaton_free(gfsm_regex_automaton_expand(rec,&rea2)); + RETURN(rec,rea1); +} + +//-------------------------------------------------------------- +gfsmRegexAutomaton gfsm_regex_automaton_difference(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea1, gfsmRegexAutomaton rea2) +{ + gfsm_automaton_difference(gfsm_regex_automaton_expand(rec,&rea1),gfsm_regex_automaton_expand(rec,&rea2)); + gfsm_automaton_free(gfsm_regex_automaton_expand(rec,&rea2)); + RETURN(rec,rea1); +} + +//-------------------------------------------------------------- +/** Weight */ +gfsmRegexAutomaton gfsm_regex_automaton_weight(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea, gfsmWeight w) +{ + gfsm_automaton_arith_final(gfsm_regex_automaton_expand(rec,&rea), gfsmAOSRTimes, w, FALSE); + RETURN(rec,rea); +} diff --git a/gfsm/gfsm/src/libgfsm/tests/gfsmRegexCompiler-v1.h b/gfsm/gfsm/src/libgfsm/tests/gfsmRegexCompiler-v1.h new file mode 100644 index 0000000..aefc1cc --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/gfsmRegexCompiler-v1.h @@ -0,0 +1,121 @@ +#ifndef _GFSM_REGEX_COMPILER_H +#define _GFSM_REGEX_COMPILER_H + +#include <gfsmScanner.h> +#include <gfsmAutomaton.h> +#include <gfsmAlphabet.h> +#include <gfsmAlgebra.h> + +/** Regex automaton type */ +typedef enum { + gfsmREATEmpty, ///< empty acceptor + gfsmREATLabel, ///< single label + gfsmREATFull ///< full automaton +} gfsmRegexAutomatonType; + +/** Regex automaton value */ +typedef union { + gfsmLabelVal lab; ///< single label + gfsmAutomaton *fsm; ///< full automaton +} gfsmRegexAutomatonValue; + +/** Regex automaton */ +typedef struct { + gfsmRegexAutomatonType typ; ///< regex type + gfsmRegexAutomatonValue val; ///< regex value +} gfsmRegexAutomaton; + +/** Data structure for regex compiler */ +typedef struct { + gfsmScanner scanner; ///< scanner + gfsmSRType srtype; ///< semiring type + gfsmRegexAutomaton rea; ///< regex automaton under construction + gfsmAlphabet *abet; ///< alphabet + GString *gstr; ///< buffer + gboolean is_label : 1; ///< is this a singleton fsm? (if so, *fsm is a gfsmLabelVal) +} gfsmRegexCompiler; + +/** New full-fleded automaton */ +gfsmAutomaton *gfsm_regex_automaton_new_fsm(gfsmRegexCompiler *rec); + +/** Get full-fledged automaton */ +gfsmAutomaton *gfsm_regex_automaton_fsm(gfsmRegexCompiler *rec, gfsmRegexAutomaton rea); + +/** Full Epsilon recognizer */ +gfsmAutomaton *gfsm_regex_automaton_epsilon_fsm(gfsmRegexCompiler *rec); + +/** Full single-character recognizer */ +gfsmAutomaton *gfsm_regex_automaton_label_fsm(gfsmRegexCompiler *rec, gfsmLabelVal lab); + + +/** Single-label recognizer */ +gfsmRegexAutomaton gfsm_regex_automaton_label(gfsmRegexCompiler *rec, gfsmLabelVal lab); + +/** Single-label concatenation (low-level) */ +gfsmAutomaton *gfsm_regex_automaton_append_lab(gfsmRegexCompiler *rec, + gfsmAutomaton *fsm, + gfsmLabelVal lab); + +/** General concatenation */ +gfsmRegexAutomaton gfsm_regex_automaton_concat(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmRegexAutomaton rea2); + +/** Closure */ +gfsmRegexAutomaton gfsm_regex_automaton_closure(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea, + gboolean is_plus); + +/** Power (n-ary closure) */ +gfsmRegexAutomaton gfsm_regex_automaton_power(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea, + guint32 n); + +/** Projection */ +gfsmRegexAutomaton gfsm_regex_automaton_project(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea, + gfsmLabelSide which); + + +/** Optionality */ +gfsmRegexAutomaton gfsm_regex_automaton_optional(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea); + +/** Complement */ +gfsmRegexAutomaton gfsm_regex_automaton_complement(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea); + +/** Union */ +gfsmRegexAutomaton gfsm_regex_automaton_union(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmRegexAutomaton rea2); + +/** Intersection */ +gfsmRegexAutomaton gfsm_regex_automaton_intersect(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmRegexAutomaton rea2); + +/** Product */ +gfsmRegexAutomaton gfsm_regex_automaton_product(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmRegexAutomaton rea2); + +/** Composition */ +gfsmRegexAutomaton gfsm_regex_automaton_compose(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmRegexAutomaton rea2); + +/** Difference */ +gfsmRegexAutomaton gfsm_regex_automaton_difference(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmRegexAutomaton rea2); + +/** Weight */ +gfsmRegexAutomaton gfsm_regex_automaton_weight(gfsmRegexCompiler *rec, + gfsmRegexAutomaton rea1, + gfsmWeight w); + + + + +#endif /* _GFSM_REGEX_COMPILER_H */ diff --git a/gfsm/gfsm/src/libgfsm/tests/gfsmlabdump.c b/gfsm/gfsm/src/libgfsm/tests/gfsmlabdump.c new file mode 100644 index 0000000..05916ac --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/gfsmlabdump.c @@ -0,0 +1,73 @@ +#include <gfsm.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int main (int argc, char **argv) { + gfsmStringAlphabet *sa = (gfsmStringAlphabet*)gfsm_string_alphabet_new(); + gfsmAlphabet *a = (gfsmAlphabet*)sa; + gfsmError *err = NULL; + char *filename= NULL; + gfsmLabelVal lv1,lv2; + char *key; + + /*-- test: insert --*/ + printf("testing insert: "); + lv1 = gfsm_alphabet_insert(a,"foo",gfsmNoLabel); + printf("%s\n", lv1 != gfsmNoLabel ? "ok" : "FAILED"); + + /*-- test: find --*/ + printf("testing find_label: "); + lv2 = gfsm_alphabet_find_label(a,"foo"); + printf("%s\n", lv1==lv2 ? "ok" : "FAILED"); + + /*-- test: find key --*/ + printf("testing find_key: "); + key = gfsm_alphabet_find_key(a,lv1); + printf("%s\n", key != NULL && strcmp(key,"foo")==0 ? "ok" : "FAILED"); + + /*-- clear test --*/ + printf("testing clear: "); + gfsm_alphabet_clear(a); + printf("%s\n", gfsm_alphabet_size(a)==0 ? "ok" : "FAILED"); + + /*-- load labels file --*/ + if (argc > 0) { filename=argv[1]; } + else { filename="-"; } + printf("\nLoading alphabet from file %s: ", argc==0 ? "(stdin)" : filename); + if (!a || !gfsm_alphabet_load_filename(a, filename, &err)) { + g_printerr("couldn't load labels from stdin: %s\n", err->message); + exit(1); + } + printf("loaded.\n\n"); + + /*-- get some basic information --*/ + printf("Basic Information:\n"); + printf(" + gfsmAlphabet:\n"); + printf(" type : %u (%s)\n", a->type, + (a->type==gfsmATString ? "string keys" : "weird type: tell moocow")); + printf(" lab_min: %u\n", a->lab_min); + printf(" lab_max: %u\n", a->lab_max); + printf(" + gfsmPointerAlphabet:\n"); + printf(" labels2keys: %p [size=%u]\n", + sa->labels2keys, sa->labels2keys->len); + printf(" keys2labels: %p [size=%u]\n", + sa->keys2labels, g_hash_table_size(sa->keys2labels)); + printf(" keydupfunc : %p (%s)\n", + sa->key_dup_func, (sa->key_dup_func == NULL + ? "no key copying: tell moocow" + : ((void*)sa->key_dup_func == (void*)gfsm_alphabet_strdup + ? "keys are copied: ok" + : "strangeness: tell moocow"))); + + + /*-- dump it --*/ + printf("\n--BEGIN dump--\n"); + if (!gfsm_alphabet_save_file(a,stdout,&err)) { + g_printerr("couldn't save labels to stdout: %s\n", err->message); + exit(1); + } + printf("--END dump--\n\n"); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/gscantest.c b/gfsm/gfsm/src/libgfsm/tests/gscantest.c new file mode 100644 index 0000000..199c414 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/gscantest.c @@ -0,0 +1,51 @@ +#include <gfsm.h> +#include <glib.h> +#include <stdlib.h> + +#undef VERBOSE + +int main (int argc, char **argv) { + GScanner *scanner = g_scanner_new(&gfsm_automaton_scanner_config); + GTokenType typ; + + scanner->input_name = *argv; + g_scanner_input_file(scanner, fileno(stdin)); + while ((typ = g_scanner_get_next_token(scanner)) != G_TOKEN_EOF) { +#ifdef VERBOSE + switch (typ) { + case G_TOKEN_INT: + printf("INT %ld\n", scanner->value.v_int); + break; + case G_TOKEN_FLOAT: + printf("FLOAT %g\n", scanner->value.v_float); + break; + case G_TOKEN_CHAR: + if (scanner->value.v_char == '\n') printf ("CHAR '\\n'\n"); + else printf("CHAR '%c'\n", scanner->value.v_char); + break; + default: + printf("? (typ=%d)\n", typ); + break; + } +#else // !VERBOSE + switch (typ) { + case G_TOKEN_INT: + printf("%ld\t", scanner->value.v_int); + break; + case G_TOKEN_FLOAT: + printf("%g\t", scanner->value.v_float); + break; + case G_TOKEN_CHAR: + if (scanner->value.v_char == '\n') fputc('\n',stdout); + else exit(1); + break; + default: + exit(1); + break; + } +#endif // VERBOSE + } + g_scanner_destroy(scanner); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/intfloat.c b/gfsm/gfsm/src/libgfsm/tests/intfloat.c new file mode 100644 index 0000000..098b860 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/intfloat.c @@ -0,0 +1,31 @@ +#include <stdio.h> + +#define ptr2int(p) ((int)(p)) +#define int2ptr(i) ((void*)(i)) +#define ptr2float(p) (*((float*)(&(p)))) +#define int2float(i) (*((float*)(&(i)))) + +#define float2int(f) (*((int*)(&(f)))) +#define float2ptr(f) (*((void**)(&(f)))) + +int main(void) { + float f = 42.24; + int fi; + void *fp; + float fif, fpf; + + printf("f=%f\n", f); + fi = float2int(f); + printf("->fi=%d\n", fi); + + fif = int2float(fi); + printf("-->fif=%f\n", fif); + + fp = float2ptr(f); + printf("->fp=%p\n", fp); + + fpf=ptr2float(fp); + printf("-->fpf=%f\n", fpf); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/iotest.c b/gfsm/gfsm/src/libgfsm/tests/iotest.c new file mode 100644 index 0000000..c218627 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/iotest.c @@ -0,0 +1,180 @@ +#include <glib.h> +#include <gfsmIO.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/*--------------------------------------------------- + * generic test: output + */ +void test_output_generic(gfsmIOHandle *io, const char *label) +{ + gboolean rc; + fprintf(stderr, "\n-------- I/O: %s: output\n", label); + + fprintf(stderr, "+ write(\"foo\\n\",4):\n"); + rc = gfsmio_write(io, "foo\n", 4); + gfsmio_flush(io); + fprintf(stderr, " --> %d\n", rc); + + fprintf(stderr, "+ puts(\"bar\\n\"):\n"); + rc = gfsmio_puts(io, "bar\n"); + gfsmio_flush(io); + fprintf(stderr, " --> %d\n", rc); + + fprintf(stderr, "+ putc('x'); putc('\\n'):\n"); + rc = gfsmio_putc(io, 'x') && gfsmio_putc(io, '\n'); + gfsmio_flush(io); + fprintf(stderr, " --> %d\n", rc); + + fprintf(stderr, "+ printf(\"%%s%%s\\n\",\"foo\",\"bar\"):\n"); + rc = gfsmio_printf(io, "%s%s\n", "foo", "bar"); + gfsmio_flush(io); + fprintf(stderr, " --> %d\n", rc); +} + +/*--------------------------------------------------- + * generic test: input + */ +void test_input_generic(gfsmIOHandle *io, const char *label) +{ + char buf[2]; + char *linebuf=NULL; + size_t n=0; + ssize_t nread=0; + gboolean rc; + + fprintf(stderr, "\n-------- I/O: %s: input\n", label); + + fprintf(stderr, "+ read(2)\n"); + rc = gfsmio_read(io, buf, 2); + fprintf(stderr, " --> %d ; buf=\"%c%c\"\n", rc, buf[0], buf[1]); + + fprintf(stderr, "+ getline()\n"); + while ( (nread=gfsmio_getline(io, &linebuf, &n)) > 0) { + fprintf(stderr, " --> %d ; linebuf=\"%s\"\n", nread, linebuf); + fprintf(stderr, "+ getline()\n"); + } + fprintf(stderr, " --> %d ; linebuf=\"%s\"\n", nread, linebuf); + + if (linebuf) free(linebuf); +} + + +/*--------------------------------------------------- + * test: FILE* + */ +void test_io_cfile(void) { + gfsmIOHandle *ioh=NULL; + + //-- I/O to file: output + ioh = gfsmio_new_file(stdout); + test_output_generic(ioh, "FILE* (stdout)"); + gfsmio_handle_free(ioh); + + //-- I/O from file: input + ioh = gfsmio_new_file(stdin); + test_input_generic(ioh, "FILE* (stdin)"); + gfsmio_handle_free(ioh); +} + +/*--------------------------------------------------- + * test: GString* + */ +void test_io_gstring(void) { + GString *gs = g_string_new(""); + gfsmPosGString pgs = { gs, 0 }; + gfsmIOHandle *ioh = NULL; + + //-- I/O to GString*: output + ioh = gfsmio_new_gstring(&pgs); + test_output_generic(ioh, "GString*"); + fprintf(stderr, "+ OUTPUT=\"%s\"\n", gs->str); + + //-- I/O from GString*: input + pgs.pos = 0; + /* + g_string_assign(gs, "ab\ncde"); + test_input_generic(ioh, "GString* \"ab\\nc\")"); + */ + /* + g_string_assign(gs, "a b c\nd e f"); + test_input_generic(ioh, "GString* \"a b c\\nd e f\")"); + */ + /* + g_string_assign(gs, "abcde\nfghij\nklmnopqrstuvwxyz"); + test_input_generic(ioh, "GString* \"...\")"); + */ + g_string_assign(gs, "abc\n\ndef\n"); + test_input_generic(ioh, "GString* \"...\")"); + + + gfsmio_handle_free(ioh); + g_string_free(gs,TRUE); +} + +/*--------------------------------------------------- + * test: gzFile + */ +void test_io_zfile(void) { + gfsmIOHandle *ioh=NULL; + gfsmError *err=NULL; + + //-- I/O to gzGile: output + ioh = gfsmio_new_filename("iotest-out.gz", "wb", 0, &err); + test_output_generic(ioh, "gzFile (iotest-out.gz)"); + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + + //-- I/O from gzFile: input + ioh = gfsmio_new_filename("iotest-in.gz", "rb", -1, &err); + test_input_generic(ioh, "gzFile (iotest-in.gz)"); + gfsmio_close(ioh); + gfsmio_handle_free(ioh); +} + +/*--------------------------------------------------- + * test: gzFile from FILE* + */ +void test_io_zcfile(void) { + gfsmIOHandle *ioh=NULL; + int zlevel = -1; + + //-- I/O to gzGile: output + ioh = gfsmio_new_zfile(stdout, "wb", zlevel); + test_output_generic(ioh, "gzFile(fileno(stdout))"); + gfsmio_close(ioh); + gfsmio_handle_free(ioh); + + //-- I/O from gzFile: input + ioh = gfsmio_new_zfile(stdin, "rb", zlevel); + test_input_generic(ioh, "gzFile(fileno(stdin))"); + gfsmio_close(ioh); + gfsmio_handle_free(ioh); +} + + +/*--------------------------------------------------- + * MAIN + */ +int main (void) { + /* + fprintf(stderr, "\n=================================\n"); + test_io_cfile(); + */ + + /* + fprintf(stderr, "\n=================================\n"); + test_io_gstring(); + */ + + /* + fprintf(stderr, "\n=================================\n"); + test_io_zfile(); + */ + + fprintf(stderr, "\n=================================\n"); + test_io_zcfile(); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/lab2ary.c b/gfsm/gfsm/src/libgfsm/tests/lab2ary.c new file mode 100644 index 0000000..e6dbee9 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/lab2ary.c @@ -0,0 +1,62 @@ +#include <gfsm.h> +#include <stdlib.h> + + +gfsmAutomaton *fsm; +gfsmAlphabet *ialph; +const char *tfstname = "lab2ary.tfst"; +gfsmError *err = NULL; + + +int main (int argc, char **argv) { + guint i; + ialph = gfsm_identity_alphabet_new(); + //GArray *ary; + GPtrArray *ary; + + fsm = gfsm_automaton_new(); + if (!gfsm_automaton_compile_filename(fsm,tfstname,&err)) { + g_printerr("%s: compile failed for '%s': %s\n", *argv, tfstname, err->message); + exit(255); + } + printf("%s: compiled test automaton from '%s'\n", *argv, tfstname); + + ialph = gfsm_automaton_get_alphabet(fsm, gfsmLSLower, ialph); + + printf("--\n"); + printf("alphabet size=%u ; min=%u ; max=%u\n", + gfsm_alphabet_size(ialph), ialph->lab_min, ialph->lab_max); + + printf("--\n"); + printf("alphabet array={"); + /*-- ok + ary = g_array_new(FALSE,FALSE,sizeof(gfsmLabelVal)); + gfsm_alphabet_labels_to_array(ialph,ary); + */ + /*-- ok + ary = g_array_sized_new(FALSE,FALSE,sizeof(gfsmLabelVal),gfsm_alphabet_size(ialph)); + gfsm_alphabet_labels_to_array(ialph,ary); + */ + /*-- ok */ + //ary = gfsm_alphabet_labels_to_array(ialph,NULL); + + /*-- ptr_array */ + ary = g_ptr_array_sized_new(gfsm_alphabet_size(ialph)); + gfsm_alphabet_labels_to_array(ialph,ary); + + for (i=0; i < ary->len; i++) { + //printf(" %u", g_array_index(ary,gfsmLabelVal,i)); + printf(" %u", (gfsmLabelVal)g_ptr_array_index(ary,i)); + } + printf(" }\n"); + + //-- cleanup + //g_array_free(ary,TRUE); + g_ptr_array_free(ary,TRUE); + + gfsm_automaton_free(fsm); + gfsm_alphabet_free(ialph); + + return 0; +} + diff --git a/gfsm/gfsm/src/libgfsm/tests/labchurn.c b/gfsm/gfsm/src/libgfsm/tests/labchurn.c new file mode 100644 index 0000000..b8f0a99 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/labchurn.c @@ -0,0 +1,23 @@ +#include <gfsm.h> + +gfsmAlphabet *alph; + +int main (int argc, char **argv) +{ + char *infilename = (argc > 1 ? argv[1] : "-"); + + g_mem_set_vtable(glib_mem_profiler_table); + + alph = gfsm_string_alphabet_new(); + //g_mem_profile(); + + gfsm_alphabet_load_filename(alph,infilename,NULL); + //gfsm_alphabet_save_file(alph,stdout,NULL); + + + gfsm_alphabet_free(alph); + + g_blow_chunks(); + g_mem_profile(); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/labcount.perl b/gfsm/gfsm/src/libgfsm/tests/labcount.perl new file mode 100755 index 0000000..4086797 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/labcount.perl @@ -0,0 +1,28 @@ +#!/usr/bin/perl -w + +use Gfsm; + +if (!@ARGV) { + print STDERR "Usage: $0 LABFILE [DATA_FILE(s)...]\n"; + exit 1; +} + +$labfile = shift; +$labs = Gfsm::Alphabet->new(); +$labs->load($labfile) or die("$0: load failed for labels file '$labfile': $!"); +$sym2id = $labs->asHash; + +##-- read data +%labf = qw(); +$ftotal = 0; +while (defined($line=<>)) { + chomp($line); + @labs = grep {defined($_)} @$sym2id{split(//,$line)}; + $ftotal += scalar(@labs); + foreach (@labs) { ++$labf{$_}; } +} + +##-- write data vector +#print map { pack('d', (defined($_) ? $_ : 0)/$ftotal) } @labf; + +print map {pack('Sd',$_,$labf{$_}/$ftotal)} sort(keys(%labf)); diff --git a/gfsm/gfsm/src/libgfsm/tests/labprobs.h b/gfsm/gfsm/src/libgfsm/tests/labprobs.h new file mode 100644 index 0000000..d8589d0 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/labprobs.h @@ -0,0 +1,71 @@ +#include <gfsm.h> + +guint32 grand_seed = 42; +const char *probfile = "tagh-probs.bin"; + +extern gulong count_test; +GArray *seekus = NULL; /*-- lab = g_array_index(seekus,i); 1<=i<=count_test --*/ + +typedef struct { + gfsmLabelId lab; + double prob; +} seekProb; +GArray *labp=NULL; /*-- g_array_index(probs,seekProb,i) = (lab,p(lab)) --*/ + +GRand *grand=NULL; + +/*====================================================================== + * load_label_probs() + */ +void load_label_probs(void) { + seekProb sp; + double total=0, tmp; + FILE *f = fopen(probfile,"r"); + if (!f) { + fprintf(stderr, "error: open failed for probability file '%s'\n", probfile); + exit(1); + } + labp = g_array_sized_new(FALSE,TRUE,sizeof(seekProb),256); + labp->len = 0; + while ( !feof(f) ) { + if (fread(&(sp.lab), sizeof(gfsmLabelId), 1, f) != 1 + || fread(&(sp.prob), sizeof(double), 1, f) != 1) + { + break; + } + tmp = sp.prob; + sp.prob += total; + total += tmp; + g_array_append_val(labp,sp); + + } + fclose(f); + fprintf(stderr, "[info]: read probability distribution over %d labels from '%s'\n", + labp->len, probfile); +} + +/*====================================================================== + * random_label() + */ +gfsmLabelId random_label(void) { + double w; + int i; + if (!grand) { grand = g_rand_new_with_seed(grand_seed); } + w = g_rand_double(grand); + for (i=0; i < labp->len && w > g_array_index(labp,seekProb,i).prob; i++) { ; } + if (i==labp->len) { --i; } + return g_array_index(labp,seekProb,i).lab; +} + +/*====================================================================== + * populate_seek_labels() + */ +void populate_seek_labels(void) { + int i; + gfsmLabelId lab; + seekus = g_array_sized_new(FALSE,TRUE,sizeof(gfsmLabelId),count_test); + for (i=0; i < count_test; i++) { + lab = random_label(); + g_array_append_val(seekus,lab); + } +} diff --git a/gfsm/gfsm/src/libgfsm/tests/lkptest.tfst b/gfsm/gfsm/src/libgfsm/tests/lkptest.tfst new file mode 100644 index 0000000..11f371e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/lkptest.tfst @@ -0,0 +1,8 @@ +0 0 1 2 1 +0 0 2 3 1 +0 0 3 1 1 +0 1 2 2 1 +0 0 +1 2 2 2 1 +2 3 3 3 1 +3 0 diff --git a/gfsm/gfsm/src/libgfsm/tests/macrotest.i b/gfsm/gfsm/src/libgfsm/tests/macrotest.i new file mode 100644 index 0000000..e540e43 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/macrotest.i @@ -0,0 +1,11 @@ +#define _gfsm_bitvector_bits2bytes(nbits) ((nbits)>0 ? ((((nbits)-1)/8)+1) : 1) + +#define gfsm_bitvector_set(bv,i,v) \ + ( (i >= gfsm_bitvector_size(bv) ? gfsm_bitvector_resize(bv,i) : 0), \ + (v ? ( (bv)->data[ _gfsm_bitvector_bits2bytes(i)-1 ] |= (1<<((i)%8)) ) \ + : ( (bv)->data[ _gfsm_bitvector_bits2bytes(i)-1 ] &= ~(1<<((i)%8)) ) ) ) + +bits2bytes: _gfsm_bitvector_bits2bytes(MyBit) + +set: gfsm_bitvector_set(MyVector,MyBit,MyValue) + diff --git a/gfsm/gfsm/src/libgfsm/tests/memtest-alphabet.c b/gfsm/gfsm/src/libgfsm/tests/memtest-alphabet.c new file mode 100644 index 0000000..edb1492 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/memtest-alphabet.c @@ -0,0 +1,26 @@ +#include <glib.h> +#include <gfsmAlphabet.h> + +void dofree (gpointer p) { g_free(p); } + +int main (void) { + gfsmAlphabet *a; + g_mem_set_vtable(glib_mem_profiler_table); + + //a = (gfsmAlphabet*)g_new0(gfsmPointerAlphabet,1); + a = gfsm_string_alphabet_new(); + + gfsm_alphabet_insert(a,"foo",42); + + gfsm_alphabet_free(a); + + printf("<CHUNKS:1>--------\n"); + //g_mem_chunk_info(); + + + + printf("<PROF:1>--------\n"); + g_mem_profile(); + g_blow_chunks(); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/memtest-automaton.c b/gfsm/gfsm/src/libgfsm/tests/memtest-automaton.c new file mode 100644 index 0000000..ace1fbd --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/memtest-automaton.c @@ -0,0 +1,20 @@ +#include <glib.h> +#include <gfsm.h> + +void dofree (gpointer p) { g_free(p); } + +int main (void) { + gfsmAutomaton *fsm; + g_mem_set_vtable(glib_mem_profiler_table); + + fsm = gfsm_automaton_new(); + gfsm_automaton_free(fsm); + + //printf("<CHUNKS:1>--------\n"); + //g_mem_chunk_info(); + + printf("<PROF:1>--------\n"); + g_blow_chunks(); + g_mem_profile(); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/memtest-general.c b/gfsm/gfsm/src/libgfsm/tests/memtest-general.c new file mode 100644 index 0000000..50fe42f --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/memtest-general.c @@ -0,0 +1,168 @@ +#include <gfsm.h> +#include <glib.h> + +#define VTABLE_PROFILE 1 +//#define VTABLE_LOCAL 1 + + +//#define USE_ALLOCATORS 1 +//#define DELAY_ALLOCATOR_FREE 1 + + +//#define NITEMS 0 +//#define NITEMS 1 +//#define NITEMS 10 +#define NITEMS 128 +//#define NITEMS 256 +//#define NITEMS 1024 +//#define NITEMS 65536 +//#define NITEMS 131072 +//#define NITEMS 262144 +//#define NITEMS 524288 +//#define NITEMS 1048576 + + +//#define NITERS +//#define NITERS 1 +//#define NITERS 10 +#define NITERS 128 +//#define NITERS 1024 +//#define NITERS 65536 +//#define NITERS 131072 +//#define NITERS 262144 +//#define NITERS 524288 +//#define NITERS 1048576 + +//#define PRINT_CHUNK_INFO 1 +//#define DO_PROFILE 1 + +#define DO_GMALLOC 1 +#define GMALLOC_SIZE 45 + +#define DO_GNEW 1 +#define GNEW_SIZE 70 + +#define DO_SLIST 1 + +#define DO_PTRARRAY 1 +#define PTRARRAY_SIZE 128 + + +/*-------------------------------------------------------------------- + * mem table + */ +gpointer my_malloc(gsize n_bytes) +{ return (gpointer)malloc(n_bytes); } + +gpointer my_realloc(gpointer mem, gsize n_bytes) +{ return (gpointer)realloc(mem, n_bytes); } + +void my_free(gpointer mem) +{ free(mem); } + +GMemVTable my_vtable = + { + my_malloc, + my_realloc, + my_free, + NULL, + NULL, + NULL + }; + +/*-------------------------------------------------------------------- + * operation macro + */ +#define MEMOP(code) \ + printf("%s\n", #code); \ + code; + +#define ITEMOP(code) \ + for (i=0; i<NITEMS; i++) { code; } + +/*-------------------------------------------------------------------- + * variables + */ +gpointer mallocp[NITERS]; +gpointer newp[NITERS]; +GSList *slist[NITERS]; +GPtrArray *ptrarray[NITERS]; + +/*-------------------------------------------------------------------- + * MAIN + */ +int main(int argc, char **argv) { + int i,j; + + + //-- memory debugging +#if defined(VTABLE_PROFILE) + g_mem_set_vtable(glib_mem_profiler_table); +#elif defined(VTABLE_LOCAL) + g_mem_set_vtable(&my_vtable); +#endif + + //-- setup gfsm allocators +#if defined(USE_ALLOCATORS) + MEMOP(gfsm_allocators_enable()); +#endif + + + //--------------- iteration + for (j=0; j < NITERS; j++) { + //-- alloc +# ifdef DO_GMALLOC + ITEMOP(mallocp[i]=g_malloc(GMALLOC_SIZE)); +# endif +# ifdef DO_GNEW + ITEMOP(newp[i]=g_new(char,GNEW_SIZE)); +# endif +# ifdef DO_SLIST + ITEMOP(slist[i]=g_slist_prepend(NULL,NULL)); +# endif +# ifdef DO_PTRARRAY + ITEMOP(ptrarray[i]=g_ptr_array_sized_new(PTRARRAY_SIZE)); +# endif + + //-- free +# ifdef DO_GMALLOC + ITEMOP(g_free(mallocp[i])); +# endif +# ifdef DO_GNEW + ITEMOP(g_free(newp[i])); +# endif +# ifdef DO_SLIST + ITEMOP(g_slist_free(slist[i])); +# endif +# ifdef DO_PTRARRAY + ITEMOP(g_ptr_array_free(ptrarray[i],TRUE)); +# endif + } + + + //-- pop gfsm allocators +#if defined(USE_ALLOCATORS) && !defined(DELAY_ALLOCATOR_FREE) + MEMOP(gfsm_allocators_free()); +#endif + + //-- memory debugging +#if defined(PRINT_CHUNK_INFO) && defined(VTABLE_PROFILE) + printf("\n<CHUNKS:1>--------\n"); + g_blow_chunks(); + g_mem_chunk_info(); +#endif + // +#ifdef VTABLE_PROFILE + printf("\n<PROF:1>--------\n"); + g_blow_chunks(); + g_mem_profile(); +#endif + + g_blow_chunks(); + +#if defined(USE_ALLOCATORS) && defined(DELAY_ALLOCATOR_FREE) + MEMOP(gfsm_allocators_free()); +#endif + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/memtest.c b/gfsm/gfsm/src/libgfsm/tests/memtest.c new file mode 100644 index 0000000..edb1492 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/memtest.c @@ -0,0 +1,26 @@ +#include <glib.h> +#include <gfsmAlphabet.h> + +void dofree (gpointer p) { g_free(p); } + +int main (void) { + gfsmAlphabet *a; + g_mem_set_vtable(glib_mem_profiler_table); + + //a = (gfsmAlphabet*)g_new0(gfsmPointerAlphabet,1); + a = gfsm_string_alphabet_new(); + + gfsm_alphabet_insert(a,"foo",42); + + gfsm_alphabet_free(a); + + printf("<CHUNKS:1>--------\n"); + //g_mem_chunk_info(); + + + + printf("<PROF:1>--------\n"); + g_mem_profile(); + g_blow_chunks(); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/module/.cvsignore b/gfsm/gfsm/src/libgfsm/tests/module/.cvsignore new file mode 100644 index 0000000..58d248b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/module/.cvsignore @@ -0,0 +1,18 @@ +*~ +.*~ + +*.gfst +*.output +*.lex.c +*.lex.h +*.tab.c +*.tab.h +*.gz +*.so +*.o + +tagh* +*.dat +negra* + +mod1test diff --git a/gfsm/gfsm/src/libgfsm/tests/module/Makefile b/gfsm/gfsm/src/libgfsm/tests/module/Makefile new file mode 100644 index 0000000..c310837 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/module/Makefile @@ -0,0 +1,39 @@ +TARGETS = mod1test libmod1a.so libmod1b.so +KNOWN_TARGETS = mod1test libmod1a.so libmod1b.so + +CC = gcc +#CC = ccmalloc --no-wrapper gcc +LD = $(CC) + +CPPFLAGS = $(shell pkg-config --cflags-only-I gmodule-2.0) +#CFLAGS ?= -O2 -pipe +#CFLAGS ?= -Wall -g +#CFLAGS += -Wall -g +CFLAGS ?= -g +CFLAGS += -Wall -fPIC -DPIC + +LDFLAGS = $(shell pkg-config --libs-only-L gmodule-2.0) +LIBS = $(shell pkg-config --libs-only-l gmodule-2.0) -lm + +LDFLAGS_MODULE = -shared $(LDFLAGS) + +all: $(TARGETS) + +##-- keep intermediate files +.SECONDARY: + +##-- PATTERN: .c -> .o +%.o: %.c + $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $< + +##-- clean +clean: + rm -f *.o *.lo *.tab.[ch] *.lex.[ch] $(KNOWN_TARGETS) + +##-- modules +lib%.so: %.o + $(LD) $(LDFLAGS_MODULE) -o $@ $^ $(LIBS) + +##-- Executables +%test: %test.o + $(LD) $(LDFLAGS) -o $@ $^ $(LIBS) diff --git a/gfsm/gfsm/src/libgfsm/tests/module/mod1a.c b/gfsm/gfsm/src/libgfsm/tests/module/mod1a.c new file mode 100644 index 0000000..69674c5 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/module/mod1a.c @@ -0,0 +1,5 @@ +#include <stdio.h> + +void foo(void) { + printf("mod1a:foo() called.\n"); +} diff --git a/gfsm/gfsm/src/libgfsm/tests/module/mod1b.c b/gfsm/gfsm/src/libgfsm/tests/module/mod1b.c new file mode 100644 index 0000000..4705f7c --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/module/mod1b.c @@ -0,0 +1,5 @@ +#include <stdio.h> + +void foo(void) { + printf("mod1b:foo() called.\n"); +} diff --git a/gfsm/gfsm/src/libgfsm/tests/module/mod1test.c b/gfsm/gfsm/src/libgfsm/tests/module/mod1test.c new file mode 100644 index 0000000..6389972 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/module/mod1test.c @@ -0,0 +1,52 @@ +#include <stdio.h> +#include <glib.h> +#include <gmodule.h> + +typedef void (*fooFunc) (void); + +int main(int argc, char **argv) { + int i; + const gchar *prog = argv[0]; + //const gchar *moddir = g_getenv("PWD"); //-- get module directory (hack) + const gchar *moddir = "."; + gchar *modpath; + GModule *mod; + fooFunc foofunc; + + g_assert(g_module_supported()); + + for (i=1; i < argc; i++) { + //-- build full module pathname + modpath = g_module_build_path(moddir,argv[i]); + printf("%s: argv[%d]='%s': moddir='%s', modpath='%s'\n", prog, i, moddir, argv[i], modpath); + //-- + //modpath = argv[i]; + //printf("%s: argv[%d]='%s': modfile='%s'\n", prog, i, argv[i], modpath); + + //-- open module + if ( !(mod = g_module_open(modpath,G_MODULE_BIND_LOCAL)) ) { + g_printerr("%s: could not load module '%s': %s - skipping\n", prog, modpath, g_module_error()); + if (modpath != argv[i]) g_free(modpath); + continue; + } + printf("-> open(): %p\n", mod); + + //-- get symbol 'foo' from module + if (!g_module_symbol(mod,"foo",(gpointer *)&foofunc)) { + g_printerr("%s: could not load symbol 'foo' from module '%s': %s\n", prog, modpath, g_module_error()); + g_module_close(mod); + if (modpath != argv[i]) g_free(modpath); + continue; + } + printf("-> symbol('foo'): %p\n", foofunc); + + //-- call 'foo' as a foofunc + printf("-> calling foo(): "); + foofunc(); + + //-- cleanup + if (modpath != argv[i]) g_free(modpath); + g_module_close(mod); + } + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/nofinal.tfst b/gfsm/gfsm/src/libgfsm/tests/nofinal.tfst new file mode 100644 index 0000000..ab866fa --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/nofinal.tfst @@ -0,0 +1 @@ +0 0 1 1 diff --git a/gfsm/gfsm/src/libgfsm/tests/offsettest.c b/gfsm/gfsm/src/libgfsm/tests/offsettest.c new file mode 100644 index 0000000..bec8e8f --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/offsettest.c @@ -0,0 +1,101 @@ +#include <gfsm.h> +#include <glib.h> +#include <stdio.h> + +static const gulong count_test +//= 1048576UL //== 2^20 +//= 16777216UL //== 2^24 +//= 33554432UL //==2^25 += 268435456UL //==2^28 +//= 4294967295UL //== 2^32-1 +; + +//====================================================================== +// Basic bench subs + +inline gfsmLabelId get_lower(gfsmArc *a) { return a->lower; } +inline gfsmLabelId get_upper(gfsmArc *a) { return a->upper; } +inline gfsmLabelId get_label_offset(gfsmArc *a, gint offset) { + return *((gfsmLabelId*)G_STRUCT_MEMBER_P(&a,offset)); +} + +//====================================================================== +// Bench: literal: lower + +double bench_literal_lower(gfsmArc *a) { + gfsmLabelId l; + GTimer *timer = g_timer_new(); + gulong i; + double elapsed; + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + l = get_lower(a); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + g_timer_destroy(timer); + return elapsed; +} + +//====================================================================== +// Bench: offset + +double bench_offset(gfsmArc *a, gint lab_offset) { + gfsmLabelId l; + GTimer *timer = g_timer_new(); + gulong i; + double elapsed; + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + l = get_label_offset(a,lab_offset); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + g_timer_destroy(timer); + return elapsed; +} + +//====================================================================== +// Bench: offset: lower + +#define bench_offset_lower(a) bench_offset((a),G_STRUCT_OFFSET(gfsmArc, lower)) + + +//====================================================================== +// MAIN +int main(int argc, char **argv) { + gfsmArc a = {0,1,2,3,4.5}; + double elapsed_literal, elapsed_offset, count_dbl=count_test; + + printf("G_STRUCT_OFFSET(gfsmArc, lower)=%d: *()=%d\n", + G_STRUCT_OFFSET(gfsmArc, lower), + *((gfsmLabelId*)G_STRUCT_MEMBER_P(&a,G_STRUCT_OFFSET(gfsmArc, lower))) + ); + + printf("G_STRUCT_OFFSET(gfsmArc, upper)=%d: *()=%d\n", + G_STRUCT_OFFSET(gfsmArc, upper), + *((gfsmLabelId*)G_STRUCT_MEMBER_P(&a,G_STRUCT_OFFSET(gfsmArc, upper))) + ); + + printf("G_STRUCT_OFFSET(gfsmArc, weight)=%d: *()=%g\n", + G_STRUCT_OFFSET(gfsmArc, weight), + *((gfsmWeight*)G_STRUCT_MEMBER_P(&a,G_STRUCT_OFFSET(gfsmArc, weight))) + ); + + //-- bench + elapsed_literal = bench_literal_lower(&a); + elapsed_literal = bench_literal_lower(&a); + // + elapsed_offset = bench_offset_lower(&a); + elapsed_offset = bench_offset_lower(&a); + // + // + fprintf(stderr, "%16s: %.2f sec, %ld iters, %.2e iter/sec\n", + "literal", elapsed_literal, count_test, count_dbl/elapsed_literal); + fprintf(stderr, "%16s: %.2f sec, %ld iters, %.2e iter/sec\n", + "offset", elapsed_offset, count_test, count_dbl/elapsed_offset); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/ortest.c b/gfsm/gfsm/src/libgfsm/tests/ortest.c new file mode 100644 index 0000000..8327e0b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/ortest.c @@ -0,0 +1,7 @@ +#include <stdio.h> +int main(void) { + int x=0, y=-1; + int z = x||y; + printf("x||y=%d\n", z); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/pathtest.c b/gfsm/gfsm/src/libgfsm/tests/pathtest.c new file mode 100644 index 0000000..86a3ad3 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/pathtest.c @@ -0,0 +1,289 @@ +#include <gfsm.h> + + +//#define USE_VTABLE 1 +//#define USE_ALLOCATORS 1 +//#define DELAY_ALLOCATOR_FREE 1 + +#define NEW_FST 1 + +//#define MAKE_SLIST 1 +//#define MAKE_LIST 1 +//#define MAKE_FST 1 +//#define COMPILE_FST 1 +#define LOAD_FST 1 +//#define LOAD_EMPTY +//#define LOAD_NOFINAL + +#define LOAD_NITERS 1 +//#define LOAD_NITERS 10 +//#define LOAD_NITERS 32768 +//#define LOAD_NITERS 65535 +//#define LOAD_NITERS 131072 +//#define LOAD_NITERS 262144 +//#define LOAD_NITERS 524288 +//#define LOAD_NITERS 1048576 + + +#define NEW_ABET 1 +#define LOAD_ABET 1 + +//#define DO_INPUT 1 +//#define DO_LOOKUP 1 +//#define DO_PATHS 1 +//#define DO_STRINGS 1 +//#define DO_PTRARRAY 1 + +#define NITERS 0 +//#define NITERS 1 +//#define NITERS 10 +//#define NITERS 1024 +//#define NITERS 65536 +//#define NITERS 131072 +//#define NITERS 262144 +//#define NITERS 524288 +//#define NITERS 1048576 + +//#define PRINT_CHUNK_INFO 1 +//#define DO_PROFILE 1 + +const char *progname = "pathtest"; +const char *labfile = "test.lab"; + +#if defined(LOAD_EMPTY) +const char *fstfile = "empty.gfst"; +const char *tfstfile = "empty.tfst"; +#elif defined(LOAD_NOFINAL) +const char *fstfile = "nofinal.gfst"; +const char *tfstfile = "nofinal.tfst"; +#else +const char *fstfile = "lkptest.gfst"; +const char *tfstfile = "lkptest.tfst"; +#endif + + +gfsmLabelVector *input = NULL; +gfsmAlphabet *abet = NULL; +gfsmAutomaton *fst = NULL; +gfsmAutomaton *result = NULL; +gfsmSet *paths = NULL; +GSList *strings = NULL; +GPtrArray *ptrarray = NULL; +gfsmError *err = NULL; +GSList *sltmp = NULL; +gfsmState *st = NULL; +gfsmArc *arc=NULL; +gfsmArcList *al=NULL; +char line[256]; + + +gpointer my_malloc(gsize n_bytes) +{ + return (gpointer)malloc(n_bytes); +} + +gpointer my_realloc(gpointer mem, gsize n_bytes) +{ + return (gpointer)realloc(mem, n_bytes); +} + +void my_free(gpointer mem) +{ + free(mem); +} + +GMemVTable my_vtable = + { + my_malloc, + my_realloc, + my_free, + NULL, + NULL, + NULL + }; + +#define MEMOP(code) \ + fprintf(stderr,"%s\n", #code); \ + code; + +int main(int argc, char **argv) { + int i; + + //-- memory debugging +#if defined(DO_PROFILE) + g_mem_set_vtable(glib_mem_profiler_table); +#elif defined(USE_VTABLE) + g_mem_set_vtable(&my_vtable); +#endif + + //-- setup gfsm allocators +#if defined(USE_ALLOCATORS) + MEMOP(gfsm_allocators_enable()); +#endif + + //-- load or make fst +#ifdef NEW_FST + MEMOP(fst = gfsm_automaton_new();); + +#if defined(MAKE_SLIST) + //-- this is the culprit! + MEMOP(al = g_slist_prepend(NULL,NULL)); + MEMOP(g_slist_free(al)); +#elif defined(MAKE_LIST) + { + GList *l=NULL; + MEMOP(l=g_list_prepend(NULL,NULL)); + MEMOP(g_list_free(l)); + } +#elif defined(MAKE_FST) + MEMOP(st = gfsm_automaton_get_state(fst,0)); //-- ok + MEMOP(gfsm_automaton_set_final_state(fst, 0, TRUE)); //-- ok + + MEMOP(gfsm_automaton_add_arc(fst,0,0,1,1,0)); //-- NOT ok! + + //-- alloc + //MEMOP(arc=gfsm_arc_new_full(0,1,1,0)); //--ok + //MEMOP(st->arcs = g_slist_prepend((gpointer)arc,st->arcs)); //-- ok w/ allocator + + //-- free + //MEMOP(g_slist_free(st->arcs); st->arcs=NULL;); //-- ok w/ allocator + //MEMOP(gfsm_arc_free(arc)); //-- /ok + +#elif defined(COMPILE_FST) + fprintf(stderr,"gfsm_automaton_compile_filename(\"%s\");\n", tfstfile); + if (!gfsm_automaton_compile_filename(fst,tfstfile,&err)) { + fprintf(stderr,"%s: compile failed for '%s': %s\n", progname, tfstfile, (err ? err->message : "?")); + exit(3); + } + //g_mem_profile(); +#elif defined(LOAD_FST) + fprintf(stderr,"gfsm_automaton_load_bin_filename(\"%s\"); //---[x %d]---\n", fstfile, LOAD_NITERS); + for (i=0; i < LOAD_NITERS; i++) { + if (fst) gfsm_automaton_free(fst); + fst = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(fst,fstfile,&err)) { + fprintf(stderr,"%s: load failed for '%s': %s\n", progname, fstfile, (err ? err->message : "?")); + exit(3); + } + g_blow_chunks(); + } + //g_mem_profile(); +#endif // make or load FST +#endif // NEW_FST + + //-- load labels +#ifdef NEW_ABET + MEMOP(abet = gfsm_string_alphabet_new();); +#ifdef LOAD_ABET + fprintf(stderr,"gfsm_alphabet_load_filename(\"%s\");\n", labfile); + if (!gfsm_alphabet_load_filename(abet,labfile,&err)) { + fprintf(stderr,"%s: load failed for labels file '%s': %s\n", + progname, labfile, (err ? err->message : "?")); + exit(2); + } + //g_mem_profile(); +#endif //-- LOAD_ABET +#endif //-- NEW_ABET + + //-- setup input vector +#ifdef DO_INPUT + MEMOP(input = g_ptr_array_new()); + MEMOP(g_ptr_array_add(input,(gpointer)2)); + MEMOP(g_ptr_array_add(input,(gpointer)2)); + MEMOP(g_ptr_array_add(input,(gpointer)3)); +#endif //-- DEFINE_INPUT + + //-- guts + fprintf(stderr, "\n--bench[%d] :lookup=%d, paths=%d, strings=%d, ptrarray=%d--\n\n", + NITERS, +#ifdef DO_LOOKUP + 1 +#else + 0 +#endif + , +#ifdef DO_PATHS + 1 +#else + 0 +#endif + , +#ifdef DO_STRINGS + 1 +#else + 0 +#endif + , +#ifdef DO_PTRARRAY + 1 +#else + 0 +#endif + ); + + for (i=0; i < NITERS; i++) { +#ifdef DO_LOOKUP + result = gfsm_automaton_lookup(fst, input, result); +#endif +#ifdef DO_PATHS + paths = gfsm_automaton_paths(result,paths); +#endif +#ifdef DO_STRINGS + strings = gfsm_paths_to_strings(paths, abet, NULL, fst->sr, TRUE, TRUE, strings); +#endif +#ifdef DO_PTRARRAY + ptrarray=g_ptr_array_sized_new(gfsm_set_size(paths)); + gfsm_set_to_ptr_array(paths, ptrarray); +#endif + + //-- cleanup + for (sltmp=strings; sltmp; sltmp=sltmp->next) { g_free(sltmp->data); } + if (ptrarray) g_ptr_array_free(ptrarray,TRUE); + if (strings) g_slist_free(strings); + if (paths) gfsm_set_clear(paths); + g_blow_chunks(); + } + + //-- pop gfsm allocators (too early: segfaults) + /* +#if defined(USE_ALLOCATORS) && !defined(DELAY_ALLOCATOR_FREE) + MEMOP(gfsm_allocators_disable()); +#endif + */ + + //-- cleanup + if (result) { MEMOP(gfsm_automaton_free(result)); } + if (paths) { MEMOP(gfsm_set_free(paths)); } + if (input) { MEMOP(g_ptr_array_free(input,TRUE)); } + if (fst) { MEMOP(gfsm_automaton_free(fst)); } + if (abet) { MEMOP(gfsm_alphabet_free(abet);); } + + //-- pop gfsm allocators +#if defined(USE_ALLOCATORS) && !defined(DELAY_ALLOCATOR_FREE) + MEMOP(gfsm_allocators_free()); +#endif + + //-- memory debugging +#ifdef PRINT_CHUNK_INFO + printf("\n<CHUNKS:1>--------\n"); + g_blow_chunks(); + g_mem_chunk_info(); +#endif + // +#ifdef DO_PROFILE + printf("\n<PROF:1>--------\n"); + g_blow_chunks(); + g_mem_profile(); +#endif + +#if defined(USE_ALLOCATORS) && defined(DELAY_ALLOCATOR_FREE) + MEMOP(gfsm_allocators_free()); +#endif + + { + printf("OK to exit? "); + scanf("%s", &line); + } + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/pathtest.tfst b/gfsm/gfsm/src/libgfsm/tests/pathtest.tfst new file mode 100644 index 0000000..fd189c5 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/pathtest.tfst @@ -0,0 +1,9 @@ +0 2 3 3 1 +0 1 2 2 1 +1 6 2 2 1 +2 4 3 3 1 +2 3 2 2 1 +4 5 1 1 1 +5 0 +6 7 3 3 1 +7 0 diff --git a/gfsm/gfsm/src/libgfsm/tests/priotest.c b/gfsm/gfsm/src/libgfsm/tests/priotest.c new file mode 100644 index 0000000..c56a39e --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/priotest.c @@ -0,0 +1,171 @@ +#include <glib.h> +#include <stdio.h> +#include <string.h> + +const char *prog=NULL; + +//====================================================================== +// typedefs +typedef enum { + gfsmAFNone = 0x0, /**< no sort field */ + gfsmAFLower = 0x1, /**< sort by lower label */ + gfsmAFUpper = 0x2, /**< sort by upper label */ + gfsmAFWeight = 0x3, /**< sort by weight (refers to semiring) */ + gfsmAFSource = 0x4, /**< sort by arc source (if supported and meaningful) */ + gfsmAFTarget = 0x5, /**< sort by arc target (if supported and meaningful) */ + gfsmAFUser = 0x6, /**< user-defined sort function */ + gfsmAFAll = 0x7, /**< not really a sort field: mask of all valid sort fields */ + gfsmAFReverse = 0x8, /**< not really a sort field: if set, indicates that arc comparisons should be reversed */ + gfsmAFMask = 0xf /**< not really a sort field: mask of valid sort fields & reverse flag */ +} gfsmArcField; + +typedef enum { + gfsmAFNone = 0x0, /**< '_': no sort field */ + gfsmAFLower = 0x1, /**< 'l': sort by lower label */ + gfsmAFUpper = 0x2, /**< 'u': sort by upper label */ + gfsmAFWeight = 0x3, /**< 'w': sort by weight (refers to semiring) */ + gfsmAFSource = 0x4, /**< 's': sort by arc source (if supported and meaningful) */ + gfsmAFTarget = 0x5, /**< 't': sort by arc target (if supported and meaningful) */ + gfsmAFLowerR = 0x6, /**< 'L': reverse sort by lower label */ + gfsmAFUpperR = 0x7, /**< 'U': reverse sort by upper label */ + gfsmAFWeightR = 0x8, /**< 'W': reverse sort semiring weight */ + gfsmAFSourceR = 0x9, /**< 'S': reverse sort source state (if supported and meaningful) */ + gfsmAFTargetR = 0xa, /**< 'T': reverse sort target state (if supported and meaningful) */ + gfsmAFUser = 0xf /**< 'x': pseudo-field for user-defined comparisons */ +} gfsmArcFieldId; + +#define gfsmArcFieldShift 4 //-- number of bits in a single logical ::gfsmArcField element +const guint32 gfsmNArcFields = 5; //-- maximum 'nth' paramter supported by ::gfsmArcFieldMask + +typedef guint32 gfsmArcFieldMask; //-- mask of ::gfsmArcField values, left-shifted by ::gfsmArcFieldShift + + +const guint32 gfsmAFM_L = gfsmAFLower; +const guint32 gfsmAFM_LU = gfsmAFLower|(gfsmAFUpper<<gfsmArcFieldShift); +const guint32 gfsmAFM_LUW = gfsmAFLower|(gfsmAFUpper<<gfsmArcFieldShift)|(gfsmAFWeight<<(2*gfsmArcFieldShift)); + +gfsmArcFieldMask gfsm_arc_field_mask_new(guint nth, gfsmArcField field, gboolean reverse) +{ + gfsmArcFieldMask m = field; + if (reverse) m |= gfsmAFReverse; + return m << (nth*gfsmArcFieldShift); +} + +gfsmArcFieldMask gfsm_arc_field_mask_add(gfsmArcFieldMask m, guint nth, gfsmArcField field, gboolean reverse) +{ return (m | gfsm_arc_field_mask_new(nth,field,reverse)); } + +gfsmArcFieldMask gfsm_arc_field_mask_clear(gfsmArcFieldMask m, guint nth) +{ return m & ((~gfsmAFMask)<<(nth*gfsmArcFieldShift)); } + +gfsmArcField gfsm_arc_field_mask_get_field(gfsmArcFieldMask m, guint nth) +{ return (m>>(nth*gfsmArcFieldShift))&gfsmAFAll; } + +gboolean gfsm_arc_field_mask_get_reverse(gfsmArcFieldMask m, guint nth) +{ return ((m>>(nth*gfsmArcFieldShift))&gfsmAFReverse) ? TRUE : FALSE; } + + +//====================================================================== +// parse +gfsmArcFieldMask parse_mask(const char *str) +{ + gfsmArcFieldMask m = 0; + gint i; + guint nth=0; + /* + gint max_tokens = 32; + gchar **toks = g_strsplit(str,",; \n\t",max_tokens); + + //-- parse + for (i=0; toks[i] != NULL; i++) { + gchar *tok = toks[i]; + g_strstrip(tok); + } + */ + for (i=0; str[i] && nth < gfsmNArcFields; i++) { + switch (str[i]) { + case 'l' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFLower,0); break; + case 'L' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFLower,1); break; + + case 'u' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFUpper,0); break; + case 'U' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFUpper,1); break; + + case 'w' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFWeight,0); break; + case 'W' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFWeight,1); break; + + case 's' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFSource,0); break; + case 'S' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFSource,1); break; + + case 't' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFTarget,0); break; + case 'T' : m |= gfsm_arc_field_mask_new(nth++,gfsmAFTarget,1); break; + + //-- silently ignore these + case 'x': + case 'X': + case '-': + case ',': + case ' ': + case '\t': + case '\n': + break; + + default: + g_printerr("%s: character '%c' is not in [sStTlLuUwW] in field string '%s' - skipping\n", prog, str[i], str); + break; + } + } + if (str[i] && nth==gfsmNArcFields) { + g_printerr("%s: ignoring trailing characters '%s' in field string '%s'\n", prog, (str+i), str); + } + + //-- cleanup + //g_strfreev(toks); + + return m; +} + +//====================================================================== +// dump + +const char *mask_field_str(gfsmArcFieldMask afm, guint nth) +{ + switch (gfsm_arc_field_mask_get_field(afm, nth)) { + case gfsmAFNone: return "none"; + case gfsmAFLower: return "lower"; + case gfsmAFUpper: return "upper"; + case gfsmAFWeight: return "weight"; + case gfsmAFSource: return "source"; + case gfsmAFTarget: return "target"; + default: return "?"; + } + return "?"; +} +const char *mask_reverse_str(gfsmArcFieldMask afm, guint nth) +{ + return gfsm_arc_field_mask_get_reverse(afm, nth) ? ">" : "<"; +} + +void dump_mask(gfsmArcFieldMask afm, const char *str) +{ + printf("%s: str='%s': priorities = %u = %#0.6x = { %s%s, %s%s, %s%s, %s%s, %s%s }\n", + prog, str, afm, afm, + mask_field_str(afm,0), mask_reverse_str(afm,0), + mask_field_str(afm,1), mask_reverse_str(afm,1), + mask_field_str(afm,2), mask_reverse_str(afm,2), + mask_field_str(afm,3), mask_reverse_str(afm,3), + mask_field_str(afm,4), mask_reverse_str(afm,4) + ); +} + +//====================================================================== +// MAIN +int main(int argc, char **argv) { + int i; + gfsmArcFieldMask afm = 0; + + prog = argv[0]; + for (i=1; i < argc; i++) { + afm = parse_mask(argv[i]); + dump_mask(afm, argv[i]); + } + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/prtest.c b/gfsm/gfsm/src/libgfsm/tests/prtest.c new file mode 100644 index 0000000..d93f207 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/prtest.c @@ -0,0 +1,28 @@ +#include <gfsm.h> +#include <stdlib.h> + +//char *fsmfile = "prtest.tfst"; +char *fsmfile = "prtest.gfst"; +gfsmError *errp = NULL; + +int main (int argc, char **argv) { + gfsmAutomaton *fsm=NULL; + + if (argc > 1) fsmfile = argv[1]; + + fsm = gfsm_automaton_new(); + + //gfsm_automaton_compile_filename(fsm,fsmfile,&errp); + gfsm_automaton_load_bin_filename(fsm,fsmfile,&errp); + if (errp) { g_printerr("error: %s\n", errp->message); exit(1); } + + gfsm_automaton_prune(fsm); + gfsm_automaton_renumber_states(fsm); + + gfsm_automaton_print_file(fsm,stdout,&errp); + if (errp) { g_printerr("error: %s\n", errp->message); exit(1); } + + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/prtest.tfst b/gfsm/gfsm/src/libgfsm/tests/prtest.tfst new file mode 100644 index 0000000..f3ebd16 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/prtest.tfst @@ -0,0 +1,8 @@ +0 1 1 1 0 +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 diff --git a/gfsm/gfsm/src/libgfsm/tests/ptest.c b/gfsm/gfsm/src/libgfsm/tests/ptest.c new file mode 100644 index 0000000..44911cd --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/ptest.c @@ -0,0 +1,25 @@ +#include <glib.h> +#include <stdio.h> + +typedef struct _xstruc { + int x; + int y; +} xstruc; + +int main (void) { + xstruc xs = {42,24}; + xstruc *xsp = &xs; + void *vp = xsp; + char *s = NULL; + char *s2; + + printf ("&xs =%p ; xsp =%p ; vp =%p\n", &xs, xsp, vp); + printf ("&xs.x =%p ; &xs.y =%p\n", &xs.x, &xs.y); + printf ("&xsp->x=%p ; &xsp->y=%p\n", &xsp->x, &xsp->y); + printf ("(vp)->x=%p ; (vp)->y=%p\n", &((xstruc*)vp)->x, &((xstruc*)vp)->y); + + printf("\n"); + printf("s=%p ; s2=%p\n", s, g_strdup(s)); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/rudtest.c b/gfsm/gfsm/src/libgfsm/tests/rudtest.c new file mode 100644 index 0000000..7329b70 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/rudtest.c @@ -0,0 +1,100 @@ +#include <gfsm.h> +#include <gfsmDebug.h> +#include <stdlib.h> + +//char *fsm1file = "rudtest.gfst"; +char *fsm1file = "rudtest2.gfst"; +char *fsm2file = "rudtest.gfst"; + +gfsmError *errp = NULL; + +void hackme(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2, char *label) +{ + printf("<%s>.a : reverse(fsm1)\n", label); + gfsm_automaton_reverse(fsm1); + + printf("<%s>.b : union(fsm1,fsm2)\n", label); + gfsm_automaton_union(fsm1, fsm2); + + printf("<%s>.c : determinize(fsm1)\n", label); + gfsm_automaton_determinize(fsm1); + + printf("<%s> : done.\n", label); +} + +void print_sizes(void) +{ + //-- show some memory sizes: + fprintf(stderr, "%-32s | %6s\n", "TYPE", "SIZE"); +#define PRINTSIZE(type) fprintf(stderr, "%-32s | %u\n", #type, sizeof(type)) + PRINTSIZE(gfsmAlphabet); + PRINTSIZE(gfsmIdentityAlphabet); + PRINTSIZE(gfsmRangeAlphabet); + + PRINTSIZE(gfsmArc); + PRINTSIZE(gfsmArcIter); + PRINTSIZE(gfsmArcList); + PRINTSIZE(gfsmAutomaton); + PRINTSIZE(gfsmAutomatonFlags); + PRINTSIZE(gfsmAutomatonHeader); + PRINTSIZE(gfsmSemiring); + //PRINTSIZE(gfsmSet); + //PRINTSIZE(gfsmSetUnionData); + PRINTSIZE(gfsmWeightedStateSet); + PRINTSIZE(gfsmState); + PRINTSIZE(gfsmStatePair); + PRINTSIZE(gfsmStateSet); + PRINTSIZE(gfsmStateSetIter); + PRINTSIZE(gfsmStoredState); + PRINTSIZE(gfsmStoredArc); + PRINTSIZE(gfsmVersionInfo); +#undef PRINTSIZE +} + +void rudtest_construct(gfsmAutomaton *fsm) +{ + gfsm_automaton_clear(fsm); + fsm->root_id = 0; + gfsm_automaton_add_state_full(fsm,0); + gfsm_automaton_add_state_full(fsm,1); + gfsm_automaton_add_state_full(fsm,2); + gfsm_automaton_set_final_state(fsm,2,TRUE); + gfsm_automaton_add_arc(fsm,0,1,1,1,0); +} + +int main (int argc, char **argv) { + gfsmAutomaton *fsm1=NULL, *fsm2=NULL; + + //g_thread_init(NULL); + GFSM_DEBUG_INIT(); + + //print_sizes(); + + if (argc > 1) fsm1file = argv[1]; + if (argc > 2) fsm2file = argv[2]; + + fsm1 = gfsm_automaton_new(); + fsm2 = gfsm_automaton_new(); + + gfsm_automaton_load_bin_filename(fsm1,fsm1file,&errp); + //rudtest_construct(fsm1); + if (errp) { g_printerr("error: %s\n", errp->message); exit(1); } + + if (fsm2) gfsm_automaton_load_bin_filename(fsm2,fsm1file,&errp); + if (errp) { g_printerr("error: %s\n", errp->message); exit(1); } + + hackme(fsm1,fsm2,"1"); + hackme(fsm1,fsm2,"2"); + hackme(fsm1,fsm2,"3"); + hackme(fsm1,fsm2,"4"); + /* */ + + if (fsm1) gfsm_automaton_free(fsm1); + if (fsm2) gfsm_automaton_free(fsm2); + + + GFSM_DEBUG_FINISH(); + GFSM_DEBUG_PRINT(); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/rudtest.tfst b/gfsm/gfsm/src/libgfsm/tests/rudtest.tfst new file mode 100644 index 0000000..ac8c248 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/rudtest.tfst @@ -0,0 +1,3 @@ +0 1 1 1 0 +1 2 2 2 0 +2 0 diff --git a/gfsm/gfsm/src/libgfsm/tests/rudtest2.tfst b/gfsm/gfsm/src/libgfsm/tests/rudtest2.tfst new file mode 100644 index 0000000..65261c9 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/rudtest2.tfst @@ -0,0 +1,5 @@ +0 1 1 1 0 +1 2 2 2 0 +2 3 3 3 0 +3 4 4 4 0 +4 0 diff --git a/gfsm/gfsm/src/libgfsm/tests/seek2test.c b/gfsm/gfsm/src/libgfsm/tests/seek2test.c new file mode 100644 index 0000000..cf12ac8 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/seek2test.c @@ -0,0 +1,259 @@ +#include <gfsm.h> +#include <stdio.h> +#include <stdlib.h> +#include "labprobs.h" + +/*====================================================================== + * Globals + */ +const char *prog = "seek2test"; + +const char *fsmfile = "tagh-chopped.gfst"; +//const char *xfsmfile = "tagh-lo.gfstx"; + +gfsmStateId qid_test = 0; +guint out_degree_test = 0; +gulong count_test = +//1024 +//1048576 +4194304 +//16777216 +; + +//#define BENCH_SORTED 1 +#undef BENCH_SORTED + +/*====================================================================== + * bench_seek_vanilla() + */ +double bench_seek_vanilla(gfsmAutomaton *fsm) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(gfsm_automaton_out_degree(fsm,qid_test)); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = qid_test; + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcIter ai; + ary->len=0; + for (gfsm_arciter_open(&ai,fsm,qid), gfsm_arciter_seek_lower(&ai,lab); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,lab)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + if (fsm->flags.sort_mode==gfsmASMLower && a->lower!=lab) break; + g_ptr_array_add(ary, a); + } + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_labx() + */ +#ifndef SEEK_LABX_BINSEARCH_CUTOFF +#define SEEK_LABX_BINSEARCH_CUTOFF 0 +//#define SEEK_LABX_BINSEARCH_CUTOFF 4 +//#define SEEK_LABX_BINSEARCH_CUTOFF 8 +//#define SEEK_LABX_BINSEARCH_CUTOFF 16 +//#define SEEK_LABX_BINSEARCH_CUTOFF 32 +//#define SEEK_LABX_BINSEARCH_CUTOFF 64 +//#define SEEK_LABX_BINSEARCH_CUTOFF 128 +//#define SEEK_LABX_BINSEARCH_CUTOFF 256 +#endif + +double bench_seek_labx(gfsmArcLabelIndex *labx) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(gfsm_arc_label_index_out_degree(labx,qid_test)); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = qid_test; + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcRange range; + gfsmArc *a; + ary->len=0; + gfsm_arcrange_open_label_index(&range,labx,qid); + if ((range.max - range.min) <= SEEK_LABX_BINSEARCH_CUTOFF) { + for ( ; gfsm_arcrange_ok(&range); gfsm_arcrange_next(&range) ) { + a = gfsm_arcrange_arc(&range); + if (a->lower<lab) continue; + if (a->lower>lab) break; + g_ptr_array_add(ary, a); + } + } else { + for (gfsm_arcrange_seek_lower(&range,lab); gfsm_arcrange_ok(&range); gfsm_arcrange_next(&range)) { + a = gfsm_arcrange_arc(&range); + if (a->lower!=lab) break; + g_ptr_array_add(ary, a); + } + } + //gfsm_arcrange_close(&range); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + + + +/*====================================================================== + * Report + */ +GString *dat_header=NULL; +GString *dat_data=NULL; +gint dat_row=0; +gint dat_col=0; + +void report_new_row(void) { + fprintf(stderr, "%s: qid=%u, out_degree=%u\n", prog, qid_test, out_degree_test); + // + //-- save data for gnuplot output + dat_col=0; + if (!dat_header) dat_header = g_string_new(""); + if (!dat_data) dat_data = g_string_new(""); + if (dat_row==0) { + g_string_append(dat_header,"#1:out_deg"); + } + g_string_append_printf(dat_data, "%u", out_degree_test); +} + +void report_column(char *label, double elapsed) { + double iters_per_sec = ((double)count_test)/elapsed; + // + //-- to stderr + fprintf(stderr, "BENCH[%16s]: %ld iters in %.2g sec: %.2g iters/sec\n", + label, count_test, elapsed, iters_per_sec); + fflush(stderr); + // + //-- to data strings + if (dat_row==0) { + g_string_append_printf(dat_header, "\t%d:%s_secs\t%d:%s_ips", (2*dat_col+2),label, (2*dat_col+3),label); + } + g_string_append_c(dat_data,'\t'); + g_string_append_printf(dat_data,"\t%g\t%g", elapsed,iters_per_sec); + ++dat_col; +} + +void report_end_row(void) { + ++dat_row; + g_string_append(dat_data,"\n"); +} + +void report_gnuplot(void) { + fflush(stderr); + printf("%s\n%s", dat_header->str, dat_data->str); +} + + +/*====================================================================== + * Main + */ +int main(int argc, char **argv) +{ + char *qid_str="0"; + gfsmError *err=NULL; + int argi; + // + gfsmAutomaton *fsm=NULL; + double elapsed_vanilla; +#ifdef BENCH_SORTED + gfsmAutomaton *fsm_sorted=NULL; + double elapsed_sorted; +#endif + gfsmArcLabelIndex *labx=NULL; + double elapsed_labx; + + //-- sanity check + if (argc < 2) { + fprintf(stderr, "Usage: %s [QID(s)...]\n", prog); + exit(1); + } + + //-- load probabilities & initialize + load_label_probs(); + populate_seek_labels(); + + //-- report + fprintf(stderr, "%s: count=%lu\n", prog, count_test); + fflush(stderr); + + //-- load/create: vanilla + fprintf(stderr, "%s: loading vanilla automaton '%s'... ", prog, fsmfile); fflush(stderr); + fsm = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(fsm,fsmfile,&err)) { + fprintf(stderr,"%s: load failed for '%s': %s\n", prog, fsmfile, (err ? err->message : "?")); + exit(3); + } + fsm->flags.sort_mode = gfsmASMNone; + fprintf(stderr, "loaded.\n"); fflush(stderr); + +#ifdef BENCH_SORTED + //-- load/create: sorted + fprintf(stderr, "%s: sorting... ", prog); fflush(stderr); + fsm_sorted = gfsm_automaton_clone(fsm); + gfsm_automaton_arcsort(fsm_sorted,gfsmASMLower); + fprintf(stderr, "sorted.\n"); fflush(stderr); +#endif + + //-- load/create: labx + fprintf(stderr, "%s: indexing to gfsmArcLabelIndex... ", prog); fflush(stderr); + labx = gfsm_automaton_to_arc_label_index_lower(fsm,NULL); + fprintf(stderr, "indexed.\n"); fflush(stderr); + + //-- main loop + for (argi=1; argi < argc; argi++) { + qid_str = argv[argi]; + qid_test = strtol(qid_str,NULL,0); + out_degree_test = gfsm_automaton_out_degree(fsm,qid_test); + + report_new_row(); + + //-- benchmark: vanilla + elapsed_vanilla = bench_seek_vanilla(fsm); + report_column("vanilla", elapsed_vanilla); + +#ifdef BENCH_SORTED + //-- benchmark: vanilla+sorted + gfsm_automaton_arcsort(fsm,gfsmASMLower); + elapsed_sorted = bench_seek_vanilla(fsm); + report_column("sorted", elapsed_sorted); +#endif + + //-- benchmark: indexed + elapsed_labx = bench_seek_labx(labx); + report_column("labx", elapsed_labx); + + report_end_row(); + } + + //-- gnuplot output + report_gnuplot(); + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); +#ifdef BENCH_SORTED + if (fsm_sorted) gfsm_automaton_free(fsm_sorted); +#endif + if (labx) gfsm_arc_label_index_free(labx); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/seek3test.c b/gfsm/gfsm/src/libgfsm/tests/seek3test.c new file mode 100644 index 0000000..8c3e8af --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/seek3test.c @@ -0,0 +1,611 @@ +#include <gfsm.h> +#include <stdio.h> +#include <stdlib.h> + +/*====================================================================== + * Globals + */ +const char *prog = "seek2test"; + +gfsmStateId qid_test = 0; +guint out_degree_test; +gulong count_test_max = +//1024UL //==2^10 +//1048576UL //==2^20 +2097152UL //==2^21 +//4194304UL //==2^22 +//16777216UL //==2^24 +//67108864UL //==2^26 +//268435456UL //==2^28 +; +gulong count_test=0; //-- count_test_max/out_degree + +/*====================================================================== + * Label population + */ + +//-------------------------------------------------------------- +// globals +GRand *grand = NULL; +const guint32 grand_seed = 42; +#define GRAND_USE_SEED 1 +//#undef GRAND_USE_SEED + +const guint32 n_labels = 128; +const guint32 n_states = 8192; + +GArray *seekus = NULL; /*-- lab = g_array_index(seekus,i); 1<=i<=count_test --*/ +GArray *seekfrom = NULL; /*-- qid = g_array_index(seekus,i); 1<=i<=count_test --*/ + +//-------------------------------------------------------------- +// random_label() +gfsmLabelId random_label(void) { + if (!grand) { + grand = g_rand_new(); +#ifdef GRAND_USE_SEED + g_rand_set_seed(grand,grand_seed); +#endif + } + return g_rand_int_range(grand,0,n_labels); +} + +//-------------------------------------------------------------- +// populate_seek_labels() +void populate_seek_labels(void) { + int i; + gfsmLabelId lab; + seekus = g_array_sized_new(FALSE,TRUE,sizeof(gfsmLabelId),count_test_max); + for (i=0; i < count_test_max; i++) { + lab = random_label(); + g_array_append_val(seekus,lab); + } +} + +//-------------------------------------------------------------- +// random_state() +gfsmStateId random_state(void) { + if (!grand) { grand = g_rand_new_with_seed(grand_seed); } + return g_rand_int_range(grand,0,n_states); +} + +//-------------------------------------------------------------- +// populate_seek_states() +void populate_seek_states(void) { + int i; + seekfrom = g_array_sized_new(FALSE,TRUE,sizeof(gfsmStateId),count_test_max); + for (i=0; i < count_test_max; i++) { + gfsmStateId qid = random_state(); + g_array_append_val(seekfrom,qid); + } +} + + +/*====================================================================== + * bench_seek_vanilla() + */ +double bench_seek_vanilla(gfsmAutomaton *fsm) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId, i); + gfsmLabelId lab = g_array_index(seekus, gfsmLabelId, i); + gfsmArcIter ai; + ary->len=0; + for (gfsm_arciter_open(&ai,fsm,qid); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + if (a->lower != lab) continue; + g_ptr_array_add(ary, a); + } + gfsm_arciter_close(&ai); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_sorted() + */ +double bench_seek_sorted(gfsmAutomaton *fsm) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId,i); + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcIter ai; + ary->len=0; + for (gfsm_arciter_open(&ai,fsm,qid); gfsm_arciter_ok(&ai); gfsm_arciter_next(&ai)) { + gfsmArc *a = gfsm_arciter_arc(&ai); + if (a->lower < lab) continue; + if (a->lower > lab) break; + g_ptr_array_add(ary, a); + } + gfsm_arciter_close(&ai); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_tabx_vanilla() + */ +double bench_seek_tabx_vanilla(gfsmArcTableIndex *tabx) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId,i); + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcRange range; + ary->len=0; + for (gfsm_arcrange_open_table_index(&range,tabx,qid); gfsm_arcrange_ok(&range); gfsm_arcrange_next(&range)) { + gfsmArc *a = gfsm_arcrange_arc(&range); + if (a->lower != lab) continue; + g_ptr_array_add(ary, a); + } + gfsm_arcrange_close(&range); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_tabx_sorted() : linear search + */ +double bench_seek_tabx_sorted(gfsmArcTableIndex *tabx) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId,i); + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcRange range; + ary->len=0; + for (gfsm_arcrange_open_table_index(&range,tabx,qid); gfsm_arcrange_ok(&range); gfsm_arcrange_next(&range)) { + gfsmArc *a = gfsm_arcrange_arc(&range); + if (a->lower < lab) continue; + if (a->lower > lab) break; + g_ptr_array_add(ary, a); + } + gfsm_arcrange_close(&range); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_tabx_sorted_2() : linear search (v2) [identical to v1] + */ +double bench_seek_tabx_sorted_2(gfsmArcTableIndex *tabx) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId,i); + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcRange range; + ary->len=0; + for (gfsm_arcrange_open_table_index(&range,tabx,qid); range.min<range.max; ++range.min) { + if (range.min->lower < lab) continue; + if (range.min->lower > lab) break; + g_ptr_array_add(ary, range.min); + } + gfsm_arcrange_close(&range); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_tabx_seek_lib() : binary search: library function + */ +inline void gfsm_arcrange_seek_lower(gfsmArcRange *range, gfsmLabelId find) +{ + g_assert(range != NULL); + while (gfsm_arcrange_ok(range) && gfsm_arcrange_arc(range)->lower < find) + gfsm_arcrange_next(range); +} + +double bench_seek_tabx_seek_lib(gfsmArcTableIndex *tabx) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId,i); + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcRange range; + ary->len=0; + for (gfsm_arcrange_open_table_index(&range,tabx,qid), gfsm_arcrange_seek_lower(&range,lab); + gfsm_arcrange_ok(&range); + gfsm_arcrange_next(&range)) + { + gfsmArc *a = gfsm_arcrange_arc(&range); + if (a->lower > lab) break; + g_ptr_array_add(ary, a); + } + gfsm_arcrange_close(&range); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_tabx_bsearch_inl() : binary search: inline function + */ +#define BSEARCH_CUTOFF 16 +static inline gfsmArc *bsearch_lower(gfsmArc *min, gfsmArc *max, gfsmLabelId find) +{ + while (min < max) { + gint diff = max-min; + if (diff < BSEARCH_CUTOFF) { + do { + if (min->lower >= find) break; + min++; + } while (min < max); + return min; + } + else { + gfsmArc *mid = min + diff/2; + if (mid->lower < find) min = mid+1; + else max = mid; + } + } + return min; +} + +double bench_seek_tabx_bsearch_inl(gfsmArcTableIndex *tabx) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId,i); + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcRange range; + ary->len=0; + for (gfsm_arcrange_open_table_index(&range,tabx,qid), range.min=bsearch_lower(range.min,range.max,lab); + gfsm_arcrange_ok(&range); + gfsm_arcrange_next(&range)) + { + gfsmArc *a = gfsm_arcrange_arc(&range); + if (a->lower > lab) break; + g_ptr_array_add(ary, a); + } + gfsm_arcrange_close(&range); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * bench_seek_tabx_bsearch_func() : binary search: inline function + */ +static void bsearch_range_func(gfsmArcRange *range, gfsmLabelId find) +{ + gfsmArc *min=range->min, *max=range->max; + while (min < max) { + gfsmArc *mid = min + (max-min)/2; + if (mid->lower < find) min = mid+1; + else max = mid; + } + range->min = min; +} + +double bench_seek_tabx_bsearch_func(gfsmArcTableIndex *tabx) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(out_degree_test); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = g_array_index(seekfrom,gfsmStateId,i); + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcRange range; + ary->len=0; + for (gfsm_arcrange_open_table_index(&range,tabx,qid), bsearch_range_func(&range,lab); + gfsm_arcrange_ok(&range); + gfsm_arcrange_next(&range)) + { + gfsmArc *a = gfsm_arcrange_arc(&range); + if (a->lower > lab) break; + g_ptr_array_add(ary, a); + } + gfsm_arcrange_close(&range); + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + + +/*====================================================================== + * Report + */ +GString *dat_header=NULL; +GString *dat_data=NULL; +gint dat_row=0; +gint dat_col=0; + +void report_new_row(void) { + fprintf(stderr, "%s: n_states=%u, n_labels=%u, out_degree=%u\n", prog, n_states, n_labels, out_degree_test); + // + //-- save data for gnuplot output + dat_col=0; + if (!dat_header) dat_header = g_string_new(""); + if (!dat_data) dat_data = g_string_new(""); + if (dat_row==0) { + g_string_append(dat_header,"#1:out_deg"); + } + g_string_append_printf(dat_data, "%u", out_degree_test); +} + +void report_column(char *label, double elapsed) { + double iters_per_sec = ((double)count_test)/elapsed; + // + //-- to stderr + fprintf(stderr, "BENCH[%24s]: %ld iters in %5.3f sec: %7.2e iters/sec\n", + label, count_test, elapsed, iters_per_sec); + fflush(stderr); + // + //-- to data strings + if (dat_row==0) { + g_string_append_printf(dat_header, "\t%d:%s_secs\t%d:%s_ips", (2*dat_col+2),label, (2*dat_col+3),label); + } + g_string_append_c(dat_data,'\t'); + g_string_append_printf(dat_data,"\t%g\t%g", elapsed,iters_per_sec); + ++dat_col; +} + +void report_end_row(void) { + ++dat_row; + g_string_append(dat_data,"\n"); +} + +void report_gnuplot(void) { + fflush(stderr); + printf("%s\n%s", dat_header->str, dat_data->str); +} + + +/*====================================================================== + * Main + */ +//#define BENCH_VANILLA 1 +//#define BENCH_SORTED 1 +//#define BENCH_TABX_VANILLA 1 +#define BENCH_TABX_SORTED 1 +//#define BENCH_TABX_SORTED_2 1 +//#define BENCH_TABX_SEEK_LIB 1 +#define BENCH_TABX_BSEARCH_FUNC 1 +//#define BENCH_TABX_BSEARCH_INL 1 +int main(int argc, char **argv) +{ + char *out_degree_str="32"; + int argi, arci, qi; + // + gfsmAutomaton *fsm=NULL; + double elapsed_vanilla=0; + // + gfsmAutomaton *fsm_sorted=NULL; + double elapsed_sorted=0; + // + gfsmArcTableIndex *tabx=NULL; + double elapsed_tabx_vanilla=0; + // + gfsmArcTableIndex *tabx_sorted=NULL; + double elapsed_tabx_sorted=0; + // + gfsmArcTableIndex *tabx_sorted_2=NULL; + double elapsed_tabx_sorted_2=0; + // + gfsmArcTableIndex *tabx_seek_lib=NULL; + double elapsed_tabx_seek_lib=0; + // + gfsmArcTableIndex *tabx_bsearch_func=NULL; + double elapsed_tabx_bsearch_func=0; + // + gfsmArcTableIndex *tabx_bsearch_inl=NULL; + double elapsed_tabx_bsearch_inl=0; + + //-- sanity check + if (argc < 2) { + fprintf(stderr, "Usage: %s [OUT_DEGREE(s)...]\n", prog); + exit(1); + } + + //-- initialize labels to seek + populate_seek_labels(); + populate_seek_states(); + + //-- report + fprintf(stderr, "%s: count_test_max=%lu\n", prog, count_test_max); + fflush(stderr); + + //-- create: vanilla + fsm = gfsm_automaton_new(); + + //-- main loop + for (argi=1; argi < argc; argi++) { + out_degree_str = argv[argi]; + out_degree_test = strtol(out_degree_str,NULL,0); + //count_test = count_test_max / out_degree_test; + count_test = count_test_max; + + //-- populate: vanilla + gfsm_automaton_clear(fsm); + gfsm_automaton_set_root(fsm,gfsm_automaton_ensure_state(fsm,0)); + gfsm_automaton_set_final_state_full(fsm,0,TRUE,fsm->sr->one); + for (qi=1; qi < n_states; qi++) { + gfsm_automaton_ensure_state(fsm,qi); + for (arci=0; arci < out_degree_test; arci++) { + gfsmLabelId lo = random_label(); + gfsmLabelId hi = random_label(); + gfsmWeight w = arci + 1.0; + gfsm_automaton_add_arc(fsm,qi,qi, lo,hi, w); + } + } + + //-------- bench + report_new_row(); + + //-- benchmark: vanilla (twice for cache optimization) +#ifdef BENCH_VANILLA + elapsed_vanilla = bench_seek_vanilla(fsm); + elapsed_vanilla = bench_seek_vanilla(fsm); + report_column("vanilla", elapsed_vanilla); +#endif + +#ifdef BENCH_SORTED + //-- benchmark: vanilla+sorted + fsm_sorted = gfsm_automaton_clone(fsm); + gfsm_automaton_arcsort(fsm_sorted,gfsmASMLower); + elapsed_sorted = bench_seek_sorted(fsm_sorted); + elapsed_sorted = bench_seek_sorted(fsm_sorted); + report_column("sorted", elapsed_sorted); +#endif + +#ifdef BENCH_TABX_VANILLA + //-- benchmark: table: vanilla + tabx = gfsm_automaton_to_arc_table_index(fsm,tabx); + elapsed_tabx_vanilla = bench_seek_tabx_vanilla(tabx); + elapsed_tabx_vanilla = bench_seek_tabx_vanilla(tabx); + report_column("tabx_vanilla", elapsed_tabx_vanilla); +#endif + +#ifdef BENCH_TABX_SORTED + //-- benchmark: table: sorted linear + tabx_sorted = gfsm_automaton_to_arc_table_index(fsm,tabx_sorted); + gfsm_arc_table_index_priority_sort(tabx_sorted, gfsmASP_LU, fsm->sr); + elapsed_tabx_sorted = bench_seek_tabx_sorted(tabx_sorted); + elapsed_tabx_sorted = bench_seek_tabx_sorted(tabx_sorted); + report_column("tabx_sorted", elapsed_tabx_sorted); +#endif + +#ifdef BENCH_TABX_SORTED_2 + //-- benchmark: table: sorted linear (v2) + tabx_sorted_2 = gfsm_automaton_to_arc_table_index(fsm,tabx_sorted_2); + gfsm_arc_table_index_priority_sort(tabx_sorted_2, gfsmASP_LU, fsm->sr); + elapsed_tabx_sorted_2 = bench_seek_tabx_sorted_2(tabx_sorted_2); + elapsed_tabx_sorted_2 = bench_seek_tabx_sorted_2(tabx_sorted_2); + report_column("tabx_sorted_2", elapsed_tabx_sorted_2); +#endif + +#ifdef BENCH_TABX_SEEK_LIB + //-- benchmark: table: binary search: lib + tabx_seek_lib = gfsm_automaton_to_arc_table_index(fsm,tabx_seek_lib); + gfsm_arc_table_index_priority_sort(tabx_seek_lib, gfsmASP_LU, fsm->sr); + elapsed_tabx_seek_lib = bench_seek_tabx_seek_lib(tabx_seek_lib); + elapsed_tabx_seek_lib = bench_seek_tabx_seek_lib(tabx_seek_lib); + report_column("tabx_seek_lib", elapsed_tabx_seek_lib); +#endif + +#ifdef BENCH_TABX_BSEARCH_FUNC + //-- benchmark: table: binary search: func + tabx_bsearch_func = gfsm_automaton_to_arc_table_index(fsm,tabx_bsearch_func); + gfsm_arc_table_index_priority_sort(tabx_bsearch_func, gfsmASP_LU, fsm->sr); + elapsed_tabx_bsearch_func = bench_seek_tabx_bsearch_func(tabx_bsearch_func); + elapsed_tabx_bsearch_func = bench_seek_tabx_bsearch_func(tabx_bsearch_func); + report_column("tabx_bsearch_func", elapsed_tabx_bsearch_func); +#endif + +#ifdef BENCH_TABX_BSEARCH_INL + //-- benchmark: table: binary search: inline + tabx_bsearch_inl = gfsm_automaton_to_arc_table_index(fsm,tabx_bsearch_inl); + gfsm_arc_table_index_priority_sort(tabx_bsearch_inl, gfsmASP_LU, fsm->sr); + elapsed_tabx_bsearch_inl = bench_seek_tabx_bsearch_inl(tabx_bsearch_inl); + elapsed_tabx_bsearch_inl = bench_seek_tabx_bsearch_inl(tabx_bsearch_inl); + report_column("tabx_bsearch_inl", elapsed_tabx_bsearch_inl); +#endif + + report_end_row(); + } + + //-- gnuplot output + report_gnuplot(); + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + if (fsm_sorted) gfsm_automaton_free(fsm_sorted); + if (tabx) gfsm_arc_table_index_free(tabx); + if (tabx_sorted) gfsm_arc_table_index_free(tabx_sorted); + if (tabx_sorted_2) gfsm_arc_table_index_free(tabx_sorted_2); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/seektest.c b/gfsm/gfsm/src/libgfsm/tests/seektest.c new file mode 100644 index 0000000..cc59874 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/seektest.c @@ -0,0 +1,365 @@ +#include <gfsm.h> +#include <gfsmIndexed2.h> +#include <stdio.h> +#include <stdlib.h> +#include "labprobs.h" + +/*====================================================================== + * Globals + */ +const char *prog = "seektest"; + +const char *fsmfile = "tagh-chopped.gfst"; +//const char *xfsmfile = "tagh-lo.gfstx"; + +gfsmStateId qid_test = 0; +guint out_degree_test = 0; +gulong count_test = +//1024 +//1048576 +4194304 +//16777216 +; + +//#define BENCH_SORTED 1 +#undef BENCH_SORTED + +/*====================================================================== + * bench_seek_vanilla() + */ +double bench_seek_vanilla(gfsmAutomaton *fsm) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(gfsm_automaton_out_degree(fsm,qid_test)); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = qid_test; + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcIter ai; + ary->len=0; + for (gfsm_arciter_open(&ai,fsm,qid), gfsm_arciter_seek_lower(&ai,lab); + gfsm_arciter_ok(&ai); + gfsm_arciter_next(&ai), gfsm_arciter_seek_lower(&ai,lab)) + { + gfsmArc *a = gfsm_arciter_arc(&ai); + if (fsm->flags.sort_mode==gfsmASMLower && a->lower!=lab) break; + g_ptr_array_add(ary, a); + } + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + +/*====================================================================== + * indexed_lower_lb() + */ +gfsmArcId indexed_lower_lb(gfsmIndexedAutomaton *fsm, + gfsmLabelId lab, + gfsmArcId aid_lo, + gfsmArcId aid_hi) + +{ + /* + gfsmArcId aid_mid; + gfsmArc *a; + */ + + //if (!gfsm_indexed_automaton_has_state(fsm,qid)) return gfsmNoArc; + /* + aid_lo = g_array_index(fsm->state_first_arc, gfsmArcId, qid); + aid_hi = g_array_index(fsm->state_first_arc, gfsmArcId, qid+1); + */ + + while (aid_lo < aid_hi) { + gfsmArcId aid_mid = (aid_lo+aid_hi)/2; + gfsmArc *a = &g_array_index(fsm->arcs, gfsmArc, g_array_index(fsm->arcix_lower, gfsmArcId, aid_mid)); + if (a->lower < lab) { aid_lo = aid_mid+1; } + else { aid_hi = aid_mid; } + } + //return aid_lo <= aid_hi ? aid_lo : gfsmNoArc; + return aid_lo; +} + + +/*====================================================================== + * bench_seek_indexed() + */ +#ifndef SEEK_INDEXED_BINSEARCH_CUTOFF +//#define SEEK_INDEXED_BINSEARCH_CUTOFF 0 +//#define SEEK_INDEXED_BINSEARCH_CUTOFF 4 +//#define SEEK_INDEXED_BINSEARCH_CUTOFF 8 +//#define SEEK_INDEXED_BINSEARCH_CUTOFF 16 +//#define SEEK_INDEXED_BINSEARCH_CUTOFF 32 +#define SEEK_INDEXED_BINSEARCH_CUTOFF 64 +#endif +double bench_seek_indexed(gfsmIndexedAutomaton *fsm) { +#if 1 + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(gfsm_indexed_automaton_out_degree(fsm,qid_test)); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = qid_test; + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcId aid_lo = g_array_index(fsm->state_first_arc, gfsmArcId, qid); + gfsmArcId aid_hi = g_array_index(fsm->state_first_arc, gfsmArcId, qid+1); + gfsmArcId aid; + gfsmArc *a; + ary->len=0; + if (aid_hi-aid_lo >= SEEK_INDEXED_BINSEARCH_CUTOFF) { + for (aid=indexed_lower_lb(fsm,lab,aid_lo,aid_hi); aid<aid_hi; aid++) { + a = &g_array_index(fsm->arcs, gfsmArc, g_array_index(fsm->arcix_lower, gfsmArcId, aid)); + if (a->lower!=lab) break; + g_ptr_array_add(ary, a); + } + } else { + for (a=((gfsmArc*)fsm->arcs->data)+aid_lo; a < ((gfsmArc*)fsm->arcs->data)+aid_hi; a++) { + if (a->lower==lab) g_ptr_array_add(ary,a); + } + } + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +#else + return 1e38; //-- dummy +#endif +} + + +/*====================================================================== + * indexed2_lower_lb() + */ +gfsmArcId indexed2_lower_lb(gfsmIndexedAutomaton2 *fsm, + gfsmLabelId lab, + gfsmArcId aid_lo, + gfsmArcId aid_hi) + +{ + while (aid_lo < aid_hi) { + gfsmArcId aid_mid = (aid_lo+aid_hi)/2; + gfsmArc *a = g_ptr_array_index(fsm->arcix_lower, aid_mid); + if (a->lower < lab) { aid_lo = aid_mid+1; } + else { aid_hi = aid_mid; } + } + //return aid_lo <= aid_hi ? aid_lo : gfsmNoArc; + return aid_lo; +} + + +/*====================================================================== + * bench_seek_indexed2() + */ +double bench_seek_indexed2(gfsmIndexedAutomaton2 *fsm) { + guint i; + double elapsed; + GPtrArray *ary = g_ptr_array_sized_new(gfsm_indexed_automaton2_out_degree(fsm,qid_test)); + GTimer *timer = g_timer_new(); + + g_timer_start(timer); + for (i=0; i < count_test; i++) { + //-- BEGIN TEST CODE + gfsmStateId qid = qid_test; + gfsmLabelId lab = g_array_index(seekus,gfsmLabelId,i); + gfsmArcId aid_lo = g_array_index(fsm->state_first_arc, gfsmArcId, qid); + gfsmArcId aid_hi = g_array_index(fsm->state_first_arc, gfsmArcId, qid+1); + gfsmArcId aid; + gfsmArc **app; + ary->len=0; + if (aid_hi-aid_lo >= SEEK_INDEXED_BINSEARCH_CUTOFF) { + for (app = ((gfsmArc**)(fsm->arcix_lower->pdata)) + indexed2_lower_lb(fsm,lab,aid_lo,aid_hi); + app < ((gfsmArc**)(fsm->arcix_lower->pdata)) + aid_hi && (*app)->lower==lab; + app++) + { + g_ptr_array_add(ary, (*app)); + } + } else { + for (app = ((gfsmArc**)(fsm->arcix_lower->pdata)) + aid_lo; + app < ((gfsmArc**)(fsm->arcix_lower->pdata)) + aid_hi; + app++) + { + if ((*app)->lower==lab) g_ptr_array_add(ary,(*app)); + } + } + //-- END TEST CODE + } + elapsed = g_timer_elapsed(timer,NULL); + + //-- cleanup + g_ptr_array_free(ary,TRUE); + g_timer_destroy(timer); + + return elapsed; +} + + +/*====================================================================== + * Report + */ +GString *dat_header=NULL; +GString *dat_data=NULL; +gint dat_row=0; +gint dat_col=0; + +void report_new_row(void) { + fprintf(stderr, "%s: qid=%u, out_degree=%u\n", prog, qid_test, out_degree_test); + // + //-- save data for gnuplot output + dat_col=0; + if (!dat_header) dat_header = g_string_new(""); + if (!dat_data) dat_data = g_string_new(""); + if (dat_row==0) { + g_string_append(dat_header,"#1:out_deg"); + } + g_string_append_printf(dat_data, "%u", out_degree_test); +} + +void report_column(char *label, double elapsed) { + double iters_per_sec = ((double)count_test)/elapsed; + // + //-- to stderr + fprintf(stderr, "BENCH[%16s]: %ld iters in %.2g sec: %.2g iters/sec\n", + label, count_test, elapsed, iters_per_sec); + fflush(stderr); + // + //-- to data strings + if (dat_row==0) { + g_string_append_printf(dat_header, "\t%d:%s_secs\t%d:%s_ips", (2*dat_col+2),label, (2*dat_col+3),label); + } + g_string_append_c(dat_data,'\t'); + g_string_append_printf(dat_data,"\t%g\t%g", elapsed,iters_per_sec); + ++dat_col; +} + +void report_end_row(void) { + ++dat_row; + g_string_append(dat_data,"\n"); +} + +void report_gnuplot(void) { + fflush(stderr); + printf("%s\n%s", dat_header->str, dat_data->str); +} + + +/*====================================================================== + * Main + */ +int main(int argc, char **argv) +{ + char *qid_str="0"; + gfsmError *err=NULL; + int argi; + // + gfsmAutomaton *fsm=NULL; + double elapsed_vanilla; +#ifdef BENCH_SORTED + gfsmAutomaton *fsm_sorted=NULL; + double elapsed_sorted; +#endif + gfsmIndexedAutomaton *xfsm=NULL; + double elapsed_indexed; + gfsmIndexedAutomaton2 *xfsm2=NULL; + double elapsed_indexed2; + + //-- sanity check + if (argc < 2) { + fprintf(stderr, "Usage: %s [QID(s)...]\n", prog); + exit(1); + } + + //-- load probabilities & initialize + load_label_probs(); + populate_seek_labels(); + + //-- report + fprintf(stderr, "%s: count=%lu\n", prog, count_test); + fflush(stderr); + + //-- load/create: vanilla + fprintf(stderr, "%s: loading vanilla automaton '%s'... ", prog, fsmfile); fflush(stderr); + fsm = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(fsm,fsmfile,&err)) { + fprintf(stderr,"%s: load failed for '%s': %s\n", prog, fsmfile, (err ? err->message : "?")); + exit(3); + } + fsm->flags.sort_mode = gfsmASMNone; + fprintf(stderr, "loaded.\n"); fflush(stderr); + +#ifdef BENCH_SORTED + //-- load/create: sorted + fprintf(stderr, "%s: sorting... ", prog); fflush(stderr); + fsm_sorted = gfsm_automaton_clone(fsm); + gfsm_automaton_arcsort(fsm_sorted,gfsmASMLower); + fprintf(stderr, "sorted.\n"); fflush(stderr); +#endif + + //-- load/create: indexed + fprintf(stderr, "%s: indexing... ", prog); fflush(stderr); + xfsm = gfsm_automaton_to_indexed(fsm,NULL); + fprintf(stderr, "indexed.\n"); fflush(stderr); + + //-- load/create: indexed2 + fprintf(stderr, "%s: indexing(2)... ", prog); fflush(stderr); + xfsm2 = gfsm_automaton_to_indexed2(fsm,NULL); + fprintf(stderr, "indexed.\n"); fflush(stderr); + + //-- main loop + for (argi=1; argi < argc; argi++) { + qid_str = argv[argi]; + qid_test = strtol(qid_str,NULL,0); + out_degree_test = gfsm_automaton_out_degree(fsm,qid_test); + + report_new_row(); + + //-- benchmark: vanilla + elapsed_vanilla = bench_seek_vanilla(fsm); + report_column("vanilla", elapsed_vanilla); + +#ifdef BENCH_SORTED + //-- benchmark: vanilla+sorted + gfsm_automaton_arcsort(fsm,gfsmASMLower); + elapsed_sorted = bench_seek_vanilla(fsm); + report_column("sorted", elapsed_sorted); +#endif + + //-- benchmark: indexed + elapsed_indexed = bench_seek_indexed(xfsm); + report_column("indexed", elapsed_indexed); + + //-- benchmark: indexed2 + elapsed_indexed2 = bench_seek_indexed2(xfsm2); + report_column("indexed2", elapsed_indexed2); + + report_end_row(); + } + + //-- gnuplot output + report_gnuplot(); + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); +#ifdef BENCH_SORTED + if (fsm_sorted) gfsm_automaton_free(fsm_sorted); +#endif + if (xfsm) gfsm_indexed_automaton_free(xfsm); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/seektest.dat b/gfsm/gfsm/src/libgfsm/tests/seektest.dat new file mode 100644 index 0000000..2f05a4b --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/seektest.dat @@ -0,0 +1,9 @@ +#1:out_deg 2:vanilla_secs 3:vanilla_ips 4:indexed_secs 5:indexed_ips +1 0.100445 4.17572e+07 0.057384 7.30919e+07 +2 0.108812 3.85463e+07 0.139554 3.00551e+07 +4 0.141588 2.96233e+07 0.148173 2.83068e+07 +8 0.195003 2.15089e+07 0.147859 2.83669e+07 +16 0.292195 1.43545e+07 0.21333 1.96611e+07 +32 0.539624 7.77264e+06 0.328515 1.27675e+07 +64 0.889506 4.71532e+06 0.394453 1.06332e+07 +148 1.79711 2.33392e+06 0.544203 7.70724e+06 diff --git a/gfsm/gfsm/src/libgfsm/tests/settest.c b/gfsm/gfsm/src/libgfsm/tests/settest.c new file mode 100644 index 0000000..443f4d1 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/settest.c @@ -0,0 +1,36 @@ +#include <glib.h> +#include <gfsm.h> + +int main (void) +{ + gfsmSet *set; + GSList *setl; + GPtrArray *setary; + g_mem_set_vtable(glib_mem_profiler_table); + + set = gfsm_set_new(gfsm_uint_compare); + gfsm_set_insert(set,(gpointer)2); + + //gfsm_set_clear(set); + + /* + fprintf(stderr,"**** set="); + gfsm_set_print_uint(set,stderr); + fprintf(stderr,"\n"); + */ + //setl = gfsm_set_to_slist(set); + //g_slist_free(setl); + /* + setl = g_slist_prepend(NULL,(gpointer)2); + g_slist_free(setl); + */ + setary = g_ptr_array_sized_new(gfsm_set_size(set)); + gfsm_set_to_ptr_array(set,setary); + g_ptr_array_free(setary,TRUE); + + gfsm_set_free(set); + + g_blow_chunks(); + g_mem_profile(); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/sizes.bc b/gfsm/gfsm/src/libgfsm/tests/sizes.bc new file mode 100644 index 0000000..28632e4 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/sizes.bc @@ -0,0 +1,70 @@ +##-*- Mode: Shell-Script -*- + +##-- tagh size (carrot) +tagh_carrot_n_states = 5345565; +tagh_carrot_n_finals = 1; +tagh_carrot_n_arcs = 9407741; + +##-- test constants (tagh, carrot) +n_states = tagh_carrot_n_states; +n_finals = tagh_carrot_n_finals; +n_arcs = tagh_carrot_n_arcs; + +##-- basic type sizes +size_ptr = 4; +size_int32 = 4; +size_float = 4; +size_arc = 12; +size_arc_src = 16; +size_weight = size_float; + +##-- vanilla automaton sizes +define size_vanilla_arcs(n_arcs) { return n_arcs*(size_arc+size_ptr); } +define size_vanilla_states(n_states,n_finals) { + return (n_states*(size_int32+size_ptr)) + (n_finals*(size_float+size_int32)); +} +define size_vanilla_full(n_states,n_finals,n_arcs) { + return size_vanilla_states(n_states,n_finals) + size_vanilla_arcs(n_arcs); +} +size_vanilla = size_vanilla_full(n_states,n_finals,n_arcs) / 2^20; ##== 184MB + +##-- basic index type sizes +define size_arctab_ptr(n_arcs) { return (n_arcs*size_ptr); } +define size_arctab(n_arcs) { return (n_arcs*size_arc); } +define size_arcfirst(n_states) { return (n_states*size_ptr); } +define size_finaltab(n_states) { return (n_states*size_weight); } +define size_bitvec(n_bits) { return (n_bits/8); } + +##-- arcindex_ptr: auxilliary index: { arc*[] arc_ptrs_sorted; arc** first; } +define size_arcindex_ptr_full(n_states,n_finals,n_arcs) { + return size_arctab_ptr(n_arcs) + size_arcfirst(n_states); +} +size_arcindex_ptr = size_arcindex_ptr_full(n_states,n_finals,n_arcs)/2^20; ##== 56MB + +##-- arcindex_tab: copy index: { arc[] arcs_sorted; arc* first; } +define size_arcindex_tab_full(n_states,n_finals,n_arcs) { + return size_arctab(n_arcs) + size_arcfirst(n_states); +} +size_arcindex_tab = size_arcindex_tab_full(n_states,n_finals,n_arcs) / 2^20; + +##-- fsmtab: copy index: { arc[] arcs_sorted; arc* first; weight[] finals; } +## --> SMALLEST, likely also MOST EFFICIENT! +## + w/ finals: 149MB (vs. vanilla 184MB) +## + w/o finals: 129MB +define size_fsmtab_full(n_states,n_finals,n_arcs) { + return size_arctab(n_arcs) + size_arcfirst(n_states) + size_finaltab(n_states) + size_bitvec(n_states); +} +size_fsmtab = size_fsmtab_full(n_states,n_finals,n_arcs) / 2^20; ##== 149MB + +##-- fsmbitab: bi-indexed: { arc[] arcs; int32~arc*[] first; weight[] finals; int32[] ix_lo; int32[] ix_hi; } +define size_ixtab(n_arcs) { return n_arcs*size_int32; } +define size_fsmbitab_full(n_states,n_finals,n_arcs) { + return size_fsmtab_full(n_states,n_finals,n_arcs) + 2*size_ixtab(n_arcs); +} +size_fsmbitab = size_fsmbitab_full(n_states,n_finals,n_arcs) / 2^20; ##== 220MB + +##-- fsmbitab2: bi-indexed: { arc[] arcs~ix_lo; int32~arc*[] first; weight[] finals; int32[] ix_hi; } +define size_fsmbitab2_full(n_states,n_finals,n_arcs) { + return size_fsmtab_full(n_states,n_finals,n_arcs) + size_ixtab(n_arcs); +} +size_fsmbitab2 = size_fsmbitab2_full(n_states,n_finals,n_arcs) / 2^20; ##== 185MB diff --git a/gfsm/gfsm/src/libgfsm/tests/sizetest.c b/gfsm/gfsm/src/libgfsm/tests/sizetest.c new file mode 100644 index 0000000..5e09070 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/sizetest.c @@ -0,0 +1,16 @@ +#include <stdio.h> + +typedef struct { + int x1 : 1; + int x2 : 1; + int x3 : 30; +} tstruc; + +int main (void) { + int i; + tstruc ts; + + printf("sizeof(int)=%ld ; sizeof(tstruc)=%ld\n", sizeof(int), sizeof(tstruc)); + printf("sizeof(float)=%ld, sizeof(void*)=%ld\n", sizeof(float), sizeof(void*)); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/sltest.c b/gfsm/gfsm/src/libgfsm/tests/sltest.c new file mode 100644 index 0000000..8567e6a --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/sltest.c @@ -0,0 +1,25 @@ +#include <glib.h> +#include <stdio.h> + +int main (void) { + GSList *sl=NULL; + GAllocator *myalloc=NULL; + + g_mem_set_vtable(glib_mem_profiler_table); + + //-- allocator hack + myalloc = g_allocator_new("myAllocator", 128); + g_slist_push_allocator(myalloc); + + sl = g_slist_prepend(NULL,(gpointer)2); + g_slist_free(sl); + + //-- allocator hack + g_slist_pop_allocator(); + g_allocator_free(myalloc); + + g_blow_chunks(); + g_mem_profile(); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/statetest.tfst b/gfsm/gfsm/src/libgfsm/tests/statetest.tfst new file mode 100644 index 0000000..175575f --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/statetest.tfst @@ -0,0 +1,8 @@ +0 1 0 0 0 +0 2 0 0 0 +0 3 0 0 0 +3 4 0 0 0 +0 99 99 99 0 +1 +2 +3 diff --git a/gfsm/gfsm/src/libgfsm/tests/structtest.c b/gfsm/gfsm/src/libgfsm/tests/structtest.c new file mode 100644 index 0000000..6bb0558 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/structtest.c @@ -0,0 +1,14 @@ +#include <glib.h> +#include <stdio.h> + +typedef struct { + guint32 i1 : 1; + guint32 i2 : 31; +} testme; + +int main (void) { + + printf("guint32:%u ; testme=%u\n", sizeof(guint32), sizeof(testme)); + + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/tagh-lo.testus b/gfsm/gfsm/src/libgfsm/tests/tagh-lo.testus new file mode 100644 index 0000000..b8eeec7 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/tagh-lo.testus @@ -0,0 +1,8 @@ +5 out_deg=1 +4 out_deg=2 +11 out_deg=4 +98 out_deg=8 +5257 out_deg=16 +45623 out_deg=32 +290132 out_deg=64 +0 out_deg=148 diff --git a/gfsm/gfsm/src/libgfsm/tests/test-255.lab b/gfsm/gfsm/src/libgfsm/tests/test-255.lab new file mode 100644 index 0000000..d2ad9fa --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/test-255.lab @@ -0,0 +1,3 @@ +a 1 +ÿ 2 +b 3 diff --git a/gfsm/gfsm/src/libgfsm/tests/test-stateset.c b/gfsm/gfsm/src/libgfsm/tests/test-stateset.c new file mode 100644 index 0000000..4e56112 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/test-stateset.c @@ -0,0 +1,61 @@ +#include <gfsm.h> +#include <stdio.h> +#include <stdlib.h> + +const char *progname = "test-stateset"; +const char *infilename = "statetest.tfst"; + +gfsmAutomaton *fsm; +gfsmError *err = NULL; + +gboolean stateset_print_func(gfsmStateId id, gpointer data) +{ + printf(" %u", id); + return FALSE; +} + +void stateset_print(const char *label, gfsmStateSet *sset) +{ + gfsmStateSetIter ssi; + gfsmStateId ssid; + + printf("%s: {", label); + + //gfsm_stateset_foreach(sset, stateset_print_func, NULL); + for (ssi = gfsm_stateset_iter_begin(sset); + (ssid=gfsm_stateset_iter_id(ssi)) != gfsmNoState; + ssi = gfsm_stateset_iter_next(sset,ssi)) + { + printf(" %u", ssid); + } + + printf(" }\n"); +} + +int main (int argc, char **argv) { + int i; + gfsmStateId id; + gfsmStateSet *sset; + + fsm = gfsm_automaton_new(); + sset = gfsm_stateset_new(); + + if (!gfsm_automaton_compile_filename(fsm,infilename,&err)) { + g_printerr("%s: compile failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + printf("%s: compiled test automaton from '%s'\n", *argv, infilename); + + for (i=0; i < argc; i++) { + id = strtol(argv[i],NULL,10); + gfsm_stateset_clear(sset); + gfsm_stateset_populate(sset,fsm,id, gfsmEpsilon, gfsmEpsilon); + + printf("--\nseed=%u\n", id); + stateset_print("equiv", sset); + } + + gfsm_stateset_free(sset); + gfsm_automaton_free(fsm); + return 0; +} diff --git a/gfsm/gfsm/src/libgfsm/tests/test.lab b/gfsm/gfsm/src/libgfsm/tests/test.lab new file mode 100644 index 0000000..ff5af08 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/test.lab @@ -0,0 +1,10 @@ +<epsilon> 0 +a 1 +b 2 +c 3 +d 4 +e 5 +f 6 +foo 42 +bar 43 +baz 44 diff --git a/gfsm/gfsm/src/libgfsm/tests/test.txt b/gfsm/gfsm/src/libgfsm/tests/test.txt new file mode 100644 index 0000000..c89f586 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/test.txt @@ -0,0 +1,2 @@ +This is a test. +This is ONLY a test. diff --git a/gfsm/gfsm/src/libgfsm/tests/test2.lab b/gfsm/gfsm/src/libgfsm/tests/test2.lab new file mode 100644 index 0000000..2398dad --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/test2.lab @@ -0,0 +1,2 @@ +a 1 +b 2 diff --git a/gfsm/gfsm/src/libgfsm/tests/test2.txt b/gfsm/gfsm/src/libgfsm/tests/test2.txt new file mode 100644 index 0000000..ed2c580 --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/test2.txt @@ -0,0 +1 @@ +a b c
\ No newline at end of file diff --git a/gfsm/gfsm/src/libgfsm/tests/testme.lab b/gfsm/gfsm/src/libgfsm/tests/testme.lab new file mode 100644 index 0000000..78226ef --- /dev/null +++ b/gfsm/gfsm/src/libgfsm/tests/testme.lab @@ -0,0 +1,5 @@ +<eps> 0 +a 1 +b 2 +c 3 +answer 42 diff --git a/gfsm/gfsm/src/programs/.cvsignore b/gfsm/gfsm/src/programs/.cvsignore new file mode 100644 index 0000000..ccc326d --- /dev/null +++ b/gfsm/gfsm/src/programs/.cvsignore @@ -0,0 +1,79 @@ +gfsmindex +gfsmarith +gfsmheader +gfsmoptional +gfsmreplace +gfsmsigma +gfsmarcsort +gfsmclosure +gfsmcompile +gfsmcomplement +gfsmcompose +gfsmcompre +gfsmconcat +gfsmconnect +gfsmconvert +gfsmdeterminize +gfsmdifference +gfsmdraw +gfsminfo +gfsmintersect +gfsminvert +gfsmlabels +gfsmlookup +gfsmprint +gfsmproduct +gfsmproject +gfsmprune +gfsmrenumber +gfsmreverse +gfsmrmepsilon +gfsmstrings +gfsmunion +gfsmviterbi + +*.out +*.tmp +tmp.* +tmp +SFST* + +*~ +.*~ +*.o +*.lo +*.la +.libs +*.a +*.so +.deps +*.ps +*.lfsa +*.fst +#*.tfst +*.fsa +#*.tfsa +*.thfst +*.afst +*.afsa +*.gfsa +*.gfst +*.dot +#*.lab +*.scl + +#*_cmdparser.c +#*_cmdparser.h + +argh +save +tdata + +stamp-h* +aclocal.m4 +Makefile +Makefile.in +README.txt +configure + +ma-dwds-hacks diff --git a/gfsm/gfsm/src/programs/Makefile.am b/gfsm/gfsm/src/programs/Makefile.am new file mode 100644 index 0000000..b59e087 --- /dev/null +++ b/gfsm/gfsm/src/programs/Makefile.am @@ -0,0 +1,613 @@ +## File: src/programs/Makefile.am +## Package: gfsm +## Description: +## + src-level automake file +## +## Process this file with Automake to create Makefile.in. +##----------------------------------------------------------------------- + +##----------------------------------------------------------------------- +## subdirectories +##----------------------------------------------------------------------- +#SUBDIRS = + +##----------------------------------------------------------------------- +## primary targets +##----------------------------------------------------------------------- + +## --- executable programs +#bin_PROGRAMS = ${prog_1} ... ${prog_N} +bin_PROGRAMS = \ + gfsmarcsort \ + gfsmarith \ + gfsmclosure \ + gfsmcompile \ + gfsmcomplement \ + gfsmcompose \ + gfsmcompre \ + gfsmconcat \ + gfsmconnect \ + gfsmconvert \ + gfsmdeterminize \ + gfsmdifference \ + gfsmdraw \ + gfsmheader \ + gfsminfo \ + gfsmintersect \ + gfsminvert \ + gfsmlabels \ + gfsmlookup \ + gfsmoptional \ + gfsmprint \ + gfsmproduct \ + gfsmproject \ + gfsmrenumber \ + gfsmreplace \ + gfsmreverse \ + gfsmrmepsilon \ + gfsmsigma \ + gfsmstrings \ + gfsmunion \ + gfsmviterbi + +EXTRA_PROGRAMS = gfsmindex +bin_PROGRAMS += gfsmindex + +EXTRA_DIST = + +#bin_SCRIPTS = ${script1} ... ${scriptN} + +## --- libtool libraries +#lib_LTLIBRARIES = ${lib_1}.la ... ${lib_N}.la + +## --- shared data in @pkgdatadir@ +#pkgdata_DATA = ${data_1} ... ${data_N} + +## --- manpages -- section is auto-detected +#man_MANS = ${man_1} ... ${man_N} + +##----------------------------------------------------------------------- +## sources +##----------------------------------------------------------------------- + +SUFFIXES = \ + .gog _cmdparser.c _cmdparser.cc _cmdparser.h \ + .pod .txt .1 .html .tex + + +##----------------------------------------------------------------------- +## individual programs +##----------------------------------------------------------------------- + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +## --- Required sources +#${prog_i}_SOURCES = +## +## --- Extra (possible)sources +#EXTRA_${prog_i}_SOURCES = +#EXTRA_${lib_i}_la_SOURCES = +## +## --- leftovers +#mootconfig_SOURCES = mootconfig.cc +## +## --- Additional dependencies +#gfsmcompile_main.o: gfsmcompile_cmdparser.h +## +## --- Linker flags +#${prog_i}_LDFLAGS = -L. -static +#${prog_i}_LDADD = ${non_src_file}.o -lsomelib +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +#gfsmX_SOURCES = \ +# gfsmX_main.c \ +# gfsmX_cmdparser.c gfsmX_cmdparser.h +# +#gfsmX_main.o: gfsmX_cmdparser.h +# +#gfsmX_LDFLAGS = $(LDFLAGS_COMMON) +#gfsmX_LDADD = $(LDADD_COMMON) +# +#EXTRA_DIST += gfsmX.gog +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmarith_SOURCES = \ + gfsmarith_main.c \ + gfsmarith_cmdparser.c gfsmarith_cmdparser.h + +gfsmarith_main.o: gfsmarith_cmdparser.h + +gfsmarith_LDFLAGS = $(LDFLAGS_COMMON) +gfsmarith_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmarith.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmcompile_SOURCES = \ + gfsmcompile_main.c \ + gfsmcompile_cmdparser.c gfsmcompile_cmdparser.h + +gfsmcompile_main.o: gfsmcompile_cmdparser.h + +gfsmcompile_LDFLAGS = $(LDFLAGS_COMMON) +gfsmcompile_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmcompile.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmconvert_SOURCES = \ + gfsmconvert_main.c \ + gfsmconvert_cmdparser.c gfsmconvert_cmdparser.h + +gfsmconvert_main.o: gfsmconvert_cmdparser.h + +gfsmconvert_LDFLAGS = $(LDFLAGS_COMMON) +gfsmconvert_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmconvert.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmprint_SOURCES = \ + gfsmprint_main.c \ + gfsmprint_cmdparser.c gfsmprint_cmdparser.h + +gfsmprint_main.o: gfsmprint_cmdparser.h + +gfsmprint_LDFLAGS = $(LDFLAGS_COMMON) +gfsmprint_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmprint.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmindex_SOURCES = \ + gfsmindex_main.c \ + gfsmindex_cmdparser.c gfsmindex_cmdparser.h + +gfsmindex_main.o: gfsmindex_cmdparser.h + +gfsmindex_LDFLAGS = $(LDFLAGS_COMMON) +gfsmindex_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmindex.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsminfo_SOURCES = \ + gfsminfo_main.c \ + gfsminfo_cmdparser.c gfsminfo_cmdparser.h + +gfsminfo_main.o: gfsminfo_cmdparser.h + +gfsminfo_LDFLAGS = $(LDFLAGS_COMMON) +gfsminfo_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsminfo.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmarcsort_SOURCES = \ + gfsmarcsort_main.c \ + gfsmarcsort_cmdparser.c gfsmarcsort_cmdparser.h + +gfsmarcsort_main.o: gfsmarcsort_cmdparser.h + +gfsmarcsort_LDFLAGS = $(LDFLAGS_COMMON) +gfsmarcsort_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmarcsort.gog +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmcomplement_SOURCES = \ + gfsmcomplement_main.c \ + gfsmcomplement_cmdparser.c gfsmcomplement_cmdparser.h + +gfsmcomplement_main.o: gfsmcomplement_cmdparser.h + +gfsmcomplement_LDFLAGS = $(LDFLAGS_COMMON) +gfsmcomplement_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmcomplement.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmcompose_SOURCES = \ + gfsmcompose_main.c \ + gfsmcompose_cmdparser.c gfsmcompose_cmdparser.h + +gfsmcompose_main.o: gfsmcompose_cmdparser.h + +gfsmcompose_LDFLAGS = $(LDFLAGS_COMMON) +gfsmcompose_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmcompose.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmcompre_SOURCES = \ + gfsmcompre_main.c \ + gfsmcompre_cmdparser.c gfsmcompre_cmdparser.h + +gfsmcompre_main.o: gfsmcompre_cmdparser.h + +gfsmcompre_LDFLAGS = $(LDFLAGS_COMMON) +gfsmcompre_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmcompre.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmconcat_SOURCES = \ + gfsmconcat_main.c \ + gfsmconcat_cmdparser.c gfsmconcat_cmdparser.h + +gfsmconcat_main.o: gfsmconcat_cmdparser.h + +gfsmconcat_LDFLAGS = $(LDFLAGS_COMMON) +gfsmconcat_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmconcat.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmclosure_SOURCES = \ + gfsmclosure_main.c \ + gfsmclosure_cmdparser.c gfsmclosure_cmdparser.h + +gfsmclosure_main.o: gfsmclosure_cmdparser.h + +gfsmclosure_LDFLAGS = $(LDFLAGS_COMMON) +gfsmclosure_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmclosure.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmconnect_SOURCES = \ + gfsmconnect_main.c \ + gfsmconnect_cmdparser.c gfsmconnect_cmdparser.h + +gfsmconnect_main.o: gfsmconnect_cmdparser.h + +gfsmconnect_LDFLAGS = $(LDFLAGS_COMMON) +gfsmconnect_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmconnect.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmdeterminize_SOURCES = \ + gfsmdeterminize_main.c \ + gfsmdeterminize_cmdparser.c gfsmdeterminize_cmdparser.h + +gfsmdeterminize_main.o: gfsmdeterminize_cmdparser.h + +gfsmdeterminize_LDFLAGS = $(LDFLAGS_COMMON) +gfsmdeterminize_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmdeterminize.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmdifference_SOURCES = \ + gfsmdifference_main.c \ + gfsmdifference_cmdparser.c gfsmdifference_cmdparser.h + +gfsmdifference_main.o: gfsmdifference_cmdparser.h + +gfsmdifference_LDFLAGS = $(LDFLAGS_COMMON) +gfsmdifference_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmdifference.gog + + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmdraw_SOURCES = \ + gfsmdraw_main.c \ + gfsmdraw_cmdparser.c gfsmdraw_cmdparser.h + +gfsmdraw_main.o: gfsmdraw_cmdparser.h + +gfsmdraw_LDFLAGS = $(LDFLAGS_COMMON) +gfsmdraw_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmdraw.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmheader_SOURCES = \ + gfsmheader_main.c \ + gfsmheader_cmdparser.c gfsmheader_cmdparser.h + +gfsmheader_main.o: gfsmheader_cmdparser.h + +gfsmheader_LDFLAGS = $(LDFLAGS_COMMON) +gfsmheader_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmheader.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmintersect_SOURCES = \ + gfsmintersect_main.c \ + gfsmintersect_cmdparser.c gfsmintersect_cmdparser.h + +gfsmintersect_main.o: gfsmintersect_cmdparser.h + +gfsmintersect_LDFLAGS = $(LDFLAGS_COMMON) +gfsmintersect_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmintersect.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsminvert_SOURCES = \ + gfsminvert_main.c \ + gfsminvert_cmdparser.c gfsminvert_cmdparser.h + +gfsminvert_main.o: gfsminvert_cmdparser.h + +gfsminvert_LDFLAGS = $(LDFLAGS_COMMON) +gfsminvert_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsminvert.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmlabels_SOURCES = \ + gfsmlabels_main.c \ + gfsmlabels_cmdparser.c gfsmlabels_cmdparser.h + +gfsmlabels_main.o: gfsmlabels_cmdparser.h + +gfsmlabels_LDFLAGS = $(LDFLAGS_COMMON) +gfsmlabels_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmlabels.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmlookup_SOURCES = \ + gfsmlookup_main.c \ + gfsmlookup_cmdparser.c gfsmlookup_cmdparser.h + +gfsmlookup_main.o: gfsmlookup_cmdparser.h + +gfsmlookup_LDFLAGS = $(LDFLAGS_COMMON) +gfsmlookup_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmlookup.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmoptional_SOURCES = \ + gfsmoptional_main.c \ + gfsmoptional_cmdparser.c gfsmoptional_cmdparser.h + +gfsmoptional_main.o: gfsmoptional_cmdparser.h + +gfsmoptional_LDFLAGS = $(LDFLAGS_COMMON) +gfsmoptional_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmoptional.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmproduct_SOURCES = \ + gfsmproduct_main.c \ + gfsmproduct_cmdparser.c gfsmproduct_cmdparser.h + +gfsmproduct_main.o: gfsmproduct_cmdparser.h + +gfsmproduct_LDFLAGS = $(LDFLAGS_COMMON) +gfsmproduct_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmproduct.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmproject_SOURCES = \ + gfsmproject_main.c \ + gfsmproject_cmdparser.c gfsmproject_cmdparser.h + +gfsmproject_main.o: gfsmproject_cmdparser.h + +gfsmproject_LDFLAGS = $(LDFLAGS_COMMON) +gfsmproject_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmproject.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmrenumber_SOURCES = \ + gfsmrenumber_main.c \ + gfsmrenumber_cmdparser.c gfsmrenumber_cmdparser.h + +gfsmrenumber_main.o: gfsmrenumber_cmdparser.h + +gfsmrenumber_LDFLAGS = $(LDFLAGS_COMMON) +gfsmrenumber_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmrenumber.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmreplace_SOURCES = \ + gfsmreplace_main.c \ + gfsmreplace_cmdparser.c gfsmreplace_cmdparser.h + +gfsmreplace_main.o: gfsmreplace_cmdparser.h + +gfsmreplace_LDFLAGS = $(LDFLAGS_COMMON) +gfsmreplace_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmreplace.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmreverse_SOURCES = \ + gfsmreverse_main.c \ + gfsmreverse_cmdparser.c gfsmreverse_cmdparser.h + +gfsmreverse_main.o: gfsmreverse_cmdparser.h + +gfsmreverse_LDFLAGS = $(LDFLAGS_COMMON) +gfsmreverse_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmreverse.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmrmepsilon_SOURCES = \ + gfsmrmepsilon_main.c \ + gfsmrmepsilon_cmdparser.c gfsmrmepsilon_cmdparser.h + +gfsmrmepsilon_main.o: gfsmrmepsilon_cmdparser.h + +gfsmrmepsilon_LDFLAGS = $(LDFLAGS_COMMON) +gfsmrmepsilon_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmrmepsilon.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmsigma_SOURCES = \ + gfsmsigma_main.c \ + gfsmsigma_cmdparser.c gfsmsigma_cmdparser.h + +gfsmsigma_main.o: gfsmsigma_cmdparser.h + +gfsmsigma_LDFLAGS = $(LDFLAGS_COMMON) +gfsmsigma_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmsigma.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmstrings_SOURCES = \ + gfsmstrings_main.c \ + gfsmstrings_cmdparser.c gfsmstrings_cmdparser.h + +gfsmstrings_main.o: gfsmstrings_cmdparser.h + +gfsmstrings_LDFLAGS = $(LDFLAGS_COMMON) +gfsmstrings_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmstrings.gog + + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmunion_SOURCES = \ + gfsmunion_main.c \ + gfsmunion_cmdparser.c gfsmunion_cmdparser.h + +gfsmunion_main.o: gfsmunion_cmdparser.h + +gfsmunion_LDFLAGS = $(LDFLAGS_COMMON) +gfsmunion_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmunion.gog + +##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +gfsmviterbi_SOURCES = \ + gfsmviterbi_main.c \ + gfsmviterbi_cmdparser.c gfsmviterbi_cmdparser.h + +gfsmviterbi_main.o: gfsmviterbi_cmdparser.h + +gfsmviterbi_LDFLAGS = $(LDFLAGS_COMMON) +gfsmviterbi_LDADD = $(LDADD_COMMON) + +EXTRA_DIST += gfsmviterbi.gog + +##----------------------------------------------------------------------- +## headers +##----------------------------------------------------------------------- + +## --- installable headers: @includedir@/@pkgname@ +#pkginclude_HEADERS = ${hfile_1} ... ${hfile_N} + +## --- no-install headers +noinst_HEADERS = config.h + +##----------------------------------------------------------------------- +## pre-compile rules: gengetopt +##----------------------------------------------------------------------- + +##----------------------------------------------------------------------- +## pre-compile rules: Getopt::Gen +##----------------------------------------------------------------------- +.SECONDARY: + +OPTGENFLAGS = -u -l --no-handle-rcfile + +if HAVE_OPTGEN + +.gog_cmdparser.h: + optgen.perl $(OPTGENFLAGS) --nopod -F $*_cmdparser $< + +.gog_cmdparser.c: + optgen.perl $(OPTGENFLAGS) --nopod -F $*_cmdparser $< + +.gog.pod: + optgen.perl $(OPTGENFLAGS) --nocfile --nohfile -F $* $< + +endif + +##----------------------------------------------------------------------- +## compile flags +##----------------------------------------------------------------------- + +## --- preprocessor flags +AM_CPPFLAGS = -I@srcdir@/../libgfsm -I../libgfsm + +## --- compiler flags (c++) +AM_CFLAGS = $(gfsm_WFLAGS) $(gfsm_OFLAGS) + +##----------------------------------------------------------------------- +## linker flags +## + library special handling of configure.in vars: +##----------------------------------------------------------------------- +#${prog_i}_LDFLAGS = -L. -static +#${prog_i}_LDADD = ${non_src_file}.o -lsomelib + +LDFLAGS_COMMON = -L../libgfsm +LDADD_COMMON = ../libgfsm/libgfsm.la @gfsm_LIBS@ + +#${lib_i}_la_LDFLAGS = -L. -version-info ${lib_i_current}:${lib_i_rev}:${lib_i_age} +#${lib_i}_LIBADD = -lotherlib + +##----------------------------------------------------------------------- +## cleanup: fst +##----------------------------------------------------------------------- + +fsmclean: + rm -f *.afsa *.afst *.gfsa *.gfst *.thfsa *.thfst + +##----------------------------------------------------------------------- +## Variables: cleanup +##----------------------------------------------------------------------- + +## --- mostlyclean: built by 'make' & commonly rebuilt +#MOSTLYCLEANFILES = + +## --- clean: built by 'make' +#CLEANFILES = + +## --- distclean: built by 'configure' +#DISTCLEANFILES = + +## -- maintainerclean: built by maintainer / by hand +MAINTAINERCLEANFILES = \ + *~ .*~ Makefile Makefile.in \ + *.man *.1 *.html *.txt *.tex \ + *.afsa *.afst *.gfsa *.gfst *.thfst \ + *.dot *.ps *.vcg + +# *_cmdparser.c *_cmdparser.h + +##----------------------------------------------------------------------- +## Variables: distribution +##----------------------------------------------------------------------- + +## --- extra distribution files +#EXTRA_DIST = + +## --- recursion subdirectories for 'make dist' +#DIST_SUBDIRS = $(SUBDIRS) + +## --- dist-hook: when another 'Makefile.am' is overkill +#DISTHOOK_DIRS = foo +#DISTHOOK_FILES = foo/bar.txt foo/baz.txt +#dist-hook: +# for d in $(DISTHOOK_DIRS); do\ +# mkdir -p $(distdir)/$$d ;\ +# done +# for f in $(DISTHOOK_FILES); do\ +# cp -p $(srcdir)/$$f $(distdir)/$$f ;\ +# done + +##----------------------------------------------------------------------- +## Rules: cleanup +##----------------------------------------------------------------------- +.PHONY: cvsclean + +cvsclean: maintainer-clean ; diff --git a/gfsm/gfsm/src/programs/arith.tfst b/gfsm/gfsm/src/programs/arith.tfst new file mode 100644 index 0000000..d7e7b72 --- /dev/null +++ b/gfsm/gfsm/src/programs/arith.tfst @@ -0,0 +1,9 @@ +0 1 1 1 1 +0 2 2 2 2 +0 3 3 3 -3 +0 4 4 4 inf +0 0 +1 1 +2 2 +3 -3 +4 inf diff --git a/gfsm/gfsm/src/programs/c1.tfst b/gfsm/gfsm/src/programs/c1.tfst new file mode 100644 index 0000000..fef3d10 --- /dev/null +++ b/gfsm/gfsm/src/programs/c1.tfst @@ -0,0 +1,5 @@ +0 0 1 2 +0 1 0 0 +1 1 1 3 +0 +1 diff --git a/gfsm/gfsm/src/programs/c2.lab b/gfsm/gfsm/src/programs/c2.lab new file mode 100644 index 0000000..5288861 --- /dev/null +++ b/gfsm/gfsm/src/programs/c2.lab @@ -0,0 +1,11 @@ +<eps> 0 +a 1 +b 2 +A 3 +B 4 +AA 5 +{A|B} 6 +{AA|B} 7 +{} 8 +[e2] 65533 +[e1] 65534 diff --git a/gfsm/gfsm/src/programs/c2.tfst b/gfsm/gfsm/src/programs/c2.tfst new file mode 100644 index 0000000..c437b71 --- /dev/null +++ b/gfsm/gfsm/src/programs/c2.tfst @@ -0,0 +1,4 @@ +0 0 2 3 +0 1 0 0 +1 1 3 1 +0 diff --git a/gfsm/gfsm/src/programs/c2a.tfst b/gfsm/gfsm/src/programs/c2a.tfst new file mode 100644 index 0000000..d6c5236 --- /dev/null +++ b/gfsm/gfsm/src/programs/c2a.tfst @@ -0,0 +1,10 @@ +0 1 a {A|B} +1 2 <eps> a +2 3 a {AA|B} +3 2 <eps> a +2 4 b {A|B} +4 0 <eps> b +0 5 b {A|B} +5 0 <eps> b +0 +2 diff --git a/gfsm/gfsm/src/programs/c2b.tfst b/gfsm/gfsm/src/programs/c2b.tfst new file mode 100644 index 0000000..b16d00e --- /dev/null +++ b/gfsm/gfsm/src/programs/c2b.tfst @@ -0,0 +1,22 @@ +0 1 <eps> <eps> +0 2 <eps> <eps> +0 3 <eps> <eps> +3 4 {A|B} B +3 4 {AA|B} B +4 1 b <eps> +2 5 {A|B} A +2 5 {AA|B} AA +5 1 a <eps> +3 6 {A|B} B +3 6 {AA|B} B +6 2 b <eps> +2 7 {A|B} A +2 7 {AA|B} AA +7 2 a <eps> +3 8 {A|B} B +3 8 {AA|B} B +8 3 b <eps> +2 9 {A|B} A +2 9 {AA|B} AA +9 3 a <eps> +1 diff --git a/gfsm/gfsm/src/programs/c3a.tfst b/gfsm/gfsm/src/programs/c3a.tfst new file mode 100644 index 0000000..d82ca52 --- /dev/null +++ b/gfsm/gfsm/src/programs/c3a.tfst @@ -0,0 +1,8 @@ +0 1 a a +1 0 <eps> A +0 2 b b +2 0 <eps> B +0 +0 0 <eps> <eps1> +1 1 <eps> <eps1> +2 2 <eps> <eps1> diff --git a/gfsm/gfsm/src/programs/c3b.tfst b/gfsm/gfsm/src/programs/c3b.tfst new file mode 100644 index 0000000..4a4133f --- /dev/null +++ b/gfsm/gfsm/src/programs/c3b.tfst @@ -0,0 +1,26 @@ +0 1 <eps1> <eps> +1 +0 2 <eps1> <eps> +2 4 a A +4 1 A <eps> +2 5 a A +5 2 A <eps> +2 6 a A +6 3 A <eps> +0 3 <eps1> <eps> +3 7 b B +7 1 B <eps> +3 8 b B +8 3 B <eps> +3 9 b B +9 2 B <eps> +0 0 <eps2> <eps> +1 1 <eps2> <eps> +2 2 <eps2> <eps> +3 3 <eps2> <eps> +4 4 <eps2> <eps> +5 5 <eps2> <eps> +6 6 <eps2> <eps> +7 7 <eps2> <eps> +8 8 <eps2> <eps> +9 9 <eps2> <eps> diff --git a/gfsm/gfsm/src/programs/c3filt.tfst b/gfsm/gfsm/src/programs/c3filt.tfst new file mode 100644 index 0000000..34cc982 --- /dev/null +++ b/gfsm/gfsm/src/programs/c3filt.tfst @@ -0,0 +1,20 @@ +0 0 <eps2> <eps1> +0 0 a a +0 0 b b +0 0 A A +0 0 B B +0 +0 1 <eps1> <eps1> +1 1 <eps1> <eps1> +1 0 a a +1 0 b b +1 0 A A +1 0 B B +1 +0 2 <eps2> <eps2> +2 2 <eps2> <eps2> +2 0 a a +2 0 b b +2 0 A A +2 0 B B +2 diff --git a/gfsm/gfsm/src/programs/cat1.tfst b/gfsm/gfsm/src/programs/cat1.tfst new file mode 100644 index 0000000..9a51daf --- /dev/null +++ b/gfsm/gfsm/src/programs/cat1.tfst @@ -0,0 +1,3 @@ +0 1 1 1 1.0 +1 2 2 2 2.0 +2 20 diff --git a/gfsm/gfsm/src/programs/cat2.tfst b/gfsm/gfsm/src/programs/cat2.tfst new file mode 100644 index 0000000..5e9e740 --- /dev/null +++ b/gfsm/gfsm/src/programs/cat2.tfst @@ -0,0 +1,4 @@ +0 1 3 3 30 +0 2 4 4 40 +1 100 +2 200 diff --git a/gfsm/gfsm/src/programs/comp1a.tfst b/gfsm/gfsm/src/programs/comp1a.tfst new file mode 100644 index 0000000..c98130b --- /dev/null +++ b/gfsm/gfsm/src/programs/comp1a.tfst @@ -0,0 +1,4 @@ +0 1 1 0 0 +1 2 0 0 0 +2 3 0 2 0 +3 0 diff --git a/gfsm/gfsm/src/programs/comp1b.tfst b/gfsm/gfsm/src/programs/comp1b.tfst new file mode 100644 index 0000000..7818495 --- /dev/null +++ b/gfsm/gfsm/src/programs/comp1b.tfst @@ -0,0 +1,4 @@ +0 1 2 0 0 +1 2 0 0 0 +2 3 0 3 0 +3 0 diff --git a/gfsm/gfsm/src/programs/comp2.tfst b/gfsm/gfsm/src/programs/comp2.tfst new file mode 100644 index 0000000..5eb2b86 --- /dev/null +++ b/gfsm/gfsm/src/programs/comp2.tfst @@ -0,0 +1,6 @@ +0 1 1 0 0 +1 2 0 0 0 +2 3 0 0 0 +3 4 0 0 0 +4 5 0 3 0 +5 0 diff --git a/gfsm/gfsm/src/programs/comp2a.tfst b/gfsm/gfsm/src/programs/comp2a.tfst new file mode 100644 index 0000000..ed416e9 --- /dev/null +++ b/gfsm/gfsm/src/programs/comp2a.tfst @@ -0,0 +1,2 @@ +0 1 1 2 0 +1 0 diff --git a/gfsm/gfsm/src/programs/comp2b.tfst b/gfsm/gfsm/src/programs/comp2b.tfst new file mode 100644 index 0000000..dbf6c01 --- /dev/null +++ b/gfsm/gfsm/src/programs/comp2b.tfst @@ -0,0 +1,3 @@ +0 1 2 0 0 +1 2 0 3 0 +2 0 diff --git a/gfsm/gfsm/src/programs/complement-in.tfst b/gfsm/gfsm/src/programs/complement-in.tfst new file mode 100644 index 0000000..45e1e1b --- /dev/null +++ b/gfsm/gfsm/src/programs/complement-in.tfst @@ -0,0 +1,4 @@ +0 1 1 1 +1 2 2 2 +2 2 2 2 +2 0 diff --git a/gfsm/gfsm/src/programs/compose-mohri-1.tfst b/gfsm/gfsm/src/programs/compose-mohri-1.tfst new file mode 100644 index 0000000..ebf0277 --- /dev/null +++ b/gfsm/gfsm/src/programs/compose-mohri-1.tfst @@ -0,0 +1,5 @@ +0 1 1 1 +1 2 2 0 +2 3 3 0 +3 4 4 4 +4 diff --git a/gfsm/gfsm/src/programs/compose-mohri-2.tfst b/gfsm/gfsm/src/programs/compose-mohri-2.tfst new file mode 100644 index 0000000..154e266 --- /dev/null +++ b/gfsm/gfsm/src/programs/compose-mohri-2.tfst @@ -0,0 +1,4 @@ +0 1 1 4 +1 2 0 5 +2 3 4 1 +3 diff --git a/gfsm/gfsm/src/programs/config.h b/gfsm/gfsm/src/programs/config.h new file mode 100644 index 0000000..2b1cb3c --- /dev/null +++ b/gfsm/gfsm/src/programs/config.h @@ -0,0 +1,13 @@ +#include <gfsmConfig.h> + +/* Define this for verbose memory debugging */ +//#define GFSM_DEBUG_VERBOSE + +#ifdef GFSM_DEBUG_VERBOSE +# include <gfsmDebug.h> +# define GFSM_INIT gfsm_debug_init(); +# define GFSM_FINISH gfsm_debug_finish(); gfsm_debug_print(); +#else +# define GFSM_INIT +# define GFSM_FINISH +#endif diff --git a/gfsm/gfsm/src/programs/connect-in.tfst b/gfsm/gfsm/src/programs/connect-in.tfst new file mode 100644 index 0000000..f160c6c --- /dev/null +++ b/gfsm/gfsm/src/programs/connect-in.tfst @@ -0,0 +1,7 @@ +0 1 0 1 +0 2 0 2 +1 3 1 3 +1 4 1 4 +2 4 2 4 +2 3 2 3 +3 diff --git a/gfsm/gfsm/src/programs/ctest.tfst b/gfsm/gfsm/src/programs/ctest.tfst new file mode 100644 index 0000000..f160c6c --- /dev/null +++ b/gfsm/gfsm/src/programs/ctest.tfst @@ -0,0 +1,7 @@ +0 1 0 1 +0 2 0 2 +1 3 1 3 +1 4 1 4 +2 4 2 4 +2 3 2 3 +3 diff --git a/gfsm/gfsm/src/programs/ctest2.tfst b/gfsm/gfsm/src/programs/ctest2.tfst new file mode 100644 index 0000000..b27f50a --- /dev/null +++ b/gfsm/gfsm/src/programs/ctest2.tfst @@ -0,0 +1,6 @@ +0 1 0 1 +0 2 0 2 +1 3 1 3 +1 4 1 4 +2 4 2 4 +2 3 2 3 diff --git a/gfsm/gfsm/src/programs/det.tfst b/gfsm/gfsm/src/programs/det.tfst new file mode 100644 index 0000000..f93e247 --- /dev/null +++ b/gfsm/gfsm/src/programs/det.tfst @@ -0,0 +1,5 @@ +0 1 1 1 +0 2 1 2 +2 3 3 3 +1 +3 diff --git a/gfsm/gfsm/src/programs/determinize-in.tfst b/gfsm/gfsm/src/programs/determinize-in.tfst new file mode 100644 index 0000000..8a6d556 --- /dev/null +++ b/gfsm/gfsm/src/programs/determinize-in.tfst @@ -0,0 +1,7 @@ +0 1 1 1 +0 2 2 2 +0 2 1 1 +2 1 1 1 +2 3 3 3 +1 10 +3 30 diff --git a/gfsm/gfsm/src/programs/elookup.tfst b/gfsm/gfsm/src/programs/elookup.tfst new file mode 100644 index 0000000..a7a0db1 --- /dev/null +++ b/gfsm/gfsm/src/programs/elookup.tfst @@ -0,0 +1,2 @@ +0 0 0 1 0 +0 0 diff --git a/gfsm/gfsm/src/programs/gfsmarcsort.gog b/gfsm/gfsm/src/programs/gfsmarcsort.gog new file mode 100644 index 0000000..ab827d6 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarcsort.gog @@ -0,0 +1,107 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmarcsort" +#program_version "0.01" + +purpose "Sort arcs of a finite state machine" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +flag "lower" l "Sort by (lower,upper,target) [default]." +flag - i "Alias for -l" + +flag "upper" u "Sort by (upper,lower,target)." +flag - o "Alias for '-u'" + +flag "weight" w "Sort by weight (ascending)." +flag "cost" C "Alias for '-w'" + +string "mode" m "Sort by explicit mode string (overrides -l, -u, etc.)" \ + arg="MODE" \ + default="" \ + details=" +Sort by a mode string over the characters \"luwstLUWST_\", as accepted +by gfsm_acmask_from_chars(). Each character in the string represents +a single elementary arc field comparison, and the comparison with the +highest priority should occur leftmost in the string. Correspondence +between characters in MODE and arc comparisons is: + + CHAR : COMPARISON + l : sort by lower labels in ascending order + u : sort by upper labels in ascending order + w : sort by arc weight in semiring-ascending order + s : sort by source state in ascending order (useless here) + t : sort by target state in ascending order + + L : sort by lower labels in descending order + U : sort by upper labels in descending order + W : sort by arc weight in semiring-descending order + S : sort by source state in descending order (useless here) + T : sort by target state in descending order + + _ : no comparison at the corresponding priority level + x : user-specified comparison (useless here) + +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmarcsort_cmdparser.c b/gfsm/gfsm/src/programs/gfsmarcsort_cmdparser.c new file mode 100644 index 0000000..94f4f16 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarcsort_cmdparser.c @@ -0,0 +1,595 @@ +/* -*- Mode: C -*- + * + * File: gfsmarcsort_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.06 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmarcsort_cmdparser gfsmarcsort.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmarcsort" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmarcsort_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmarcsort (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Sort arcs of a finite state machine\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmarcsort"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -l --lower Sort by (lower,upper,target) [default].\n"); + printf(" -i Alias for -l\n"); + printf(" -u --upper Sort by (upper,lower,target).\n"); + printf(" -o Alias for '-u'\n"); + printf(" -w --weight Sort by weight (ascending).\n"); + printf(" -C --cost Alias for '-w'\n"); + printf(" -mMODE --mode=MODE Sort by explicit mode string (overrides -l, -u, etc.)\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->lower_flag = 0; + args_info->i_flag = 0; + args_info->upper_flag = 0; + args_info->o_flag = 0; + args_info->weight_flag = 0; + args_info->cost_flag = 0; + args_info->mode_arg = strdup(""); + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->lower_given = 0; + args_info->i_given = 0; + args_info->upper_given = 0; + args_info->o_given = 0; + args_info->weight_given = 0; + args_info->cost_given = 0; + args_info->mode_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "lower", 0, NULL, 'l' }, + { "upper", 0, NULL, 'u' }, + { "weight", 0, NULL, 'w' }, + { "cost", 0, NULL, 'C' }, + { "mode", 1, NULL, 'm' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'l', + 'i', + 'u', + 'o', + 'w', + 'C', + 'm', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'l': /* Sort by (lower,upper,target) [default]. */ + if (args_info->lower_given) { + fprintf(stderr, "%s: `--lower' (`-l') option given more than once\n", PROGRAM); + } + args_info->lower_given++; + if (args_info->lower_given <= 1) + args_info->lower_flag = !(args_info->lower_flag); + break; + + case 'i': /* Alias for -l */ + if (args_info->i_given) { + fprintf(stderr, "%s: (`-i') option given more than once\n", PROGRAM); + } + args_info->i_given++; + if (args_info->i_given <= 1) + args_info->i_flag = !(args_info->i_flag); + break; + + case 'u': /* Sort by (upper,lower,target). */ + if (args_info->upper_given) { + fprintf(stderr, "%s: `--upper' (`-u') option given more than once\n", PROGRAM); + } + args_info->upper_given++; + if (args_info->upper_given <= 1) + args_info->upper_flag = !(args_info->upper_flag); + break; + + case 'o': /* Alias for '-u' */ + if (args_info->o_given) { + fprintf(stderr, "%s: (`-o') option given more than once\n", PROGRAM); + } + args_info->o_given++; + if (args_info->o_given <= 1) + args_info->o_flag = !(args_info->o_flag); + break; + + case 'w': /* Sort by weight (ascending). */ + if (args_info->weight_given) { + fprintf(stderr, "%s: `--weight' (`-w') option given more than once\n", PROGRAM); + } + args_info->weight_given++; + if (args_info->weight_given <= 1) + args_info->weight_flag = !(args_info->weight_flag); + break; + + case 'C': /* Alias for '-w' */ + if (args_info->cost_given) { + fprintf(stderr, "%s: `--cost' (`-C') option given more than once\n", PROGRAM); + } + args_info->cost_given++; + if (args_info->cost_given <= 1) + args_info->cost_flag = !(args_info->cost_flag); + break; + + case 'm': /* Sort by explicit mode string (overrides -l, -u, etc.) */ + if (args_info->mode_given) { + fprintf(stderr, "%s: `--mode' (`-m') option given more than once\n", PROGRAM); + } + args_info->mode_given++; + if (args_info->mode_arg) free(args_info->mode_arg); + args_info->mode_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Sort by (lower,upper,target) [default]. */ + else if (strcmp(olong, "lower") == 0) { + if (args_info->lower_given) { + fprintf(stderr, "%s: `--lower' (`-l') option given more than once\n", PROGRAM); + } + args_info->lower_given++; + if (args_info->lower_given <= 1) + args_info->lower_flag = !(args_info->lower_flag); + } + + /* Sort by (upper,lower,target). */ + else if (strcmp(olong, "upper") == 0) { + if (args_info->upper_given) { + fprintf(stderr, "%s: `--upper' (`-u') option given more than once\n", PROGRAM); + } + args_info->upper_given++; + if (args_info->upper_given <= 1) + args_info->upper_flag = !(args_info->upper_flag); + } + + /* Sort by weight (ascending). */ + else if (strcmp(olong, "weight") == 0) { + if (args_info->weight_given) { + fprintf(stderr, "%s: `--weight' (`-w') option given more than once\n", PROGRAM); + } + args_info->weight_given++; + if (args_info->weight_given <= 1) + args_info->weight_flag = !(args_info->weight_flag); + } + + /* Alias for '-w' */ + else if (strcmp(olong, "cost") == 0) { + if (args_info->cost_given) { + fprintf(stderr, "%s: `--cost' (`-C') option given more than once\n", PROGRAM); + } + args_info->cost_given++; + if (args_info->cost_given <= 1) + args_info->cost_flag = !(args_info->cost_flag); + } + + /* Sort by explicit mode string (overrides -l, -u, etc.) */ + else if (strcmp(olong, "mode") == 0) { + if (args_info->mode_given) { + fprintf(stderr, "%s: `--mode' (`-m') option given more than once\n", PROGRAM); + } + args_info->mode_given++; + if (args_info->mode_arg) free(args_info->mode_arg); + args_info->mode_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmarcsort_cmdparser.h b/gfsm/gfsm/src/programs/gfsmarcsort_cmdparser.h new file mode 100644 index 0000000..29f5f6a --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarcsort_cmdparser.h @@ -0,0 +1,77 @@ +/* -*- Mode: C -*- + * + * File: gfsmarcsort_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.06. + * + */ + +#ifndef gfsmarcsort_cmdparser_h +#define gfsmarcsort_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int lower_flag; /* Sort by (lower,upper,target) [default]. (default=0). */ + int i_flag; /* Alias for -l (default=0). */ + int upper_flag; /* Sort by (upper,lower,target). (default=0). */ + int o_flag; /* Alias for '-u' (default=0). */ + int weight_flag; /* Sort by weight (ascending). (default=0). */ + int cost_flag; /* Alias for '-w' (default=0). */ + char * mode_arg; /* Sort by explicit mode string (overrides -l, -u, etc.) (default=). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int lower_given; /* Whether lower was given */ + int i_given; /* Whether - was given */ + int upper_given; /* Whether upper was given */ + int o_given; /* Whether - was given */ + int weight_given; /* Whether weight was given */ + int cost_given; /* Whether cost was given */ + int mode_given; /* Whether mode was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmarcsort_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmarcsort_main.c b/gfsm/gfsm/src/programs/gfsmarcsort_main.c new file mode 100644 index 0000000..ac3e48f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarcsort_main.c @@ -0,0 +1,102 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004-2007 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmarcsort_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmarcsort"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs & vars +gfsmAutomaton *fsm; +gfsmArcCompMask mode = gfsmASMLower; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- mode? + if (args.mode_given) { mode = gfsm_acmask_from_chars(args.mode_arg); } + else if (args.lower_given || args.i_given) mode = gfsmASMLower; + else if (args.upper_given || args.o_given) mode = gfsmASMUpper; + else if (args.weight_given || args.cost_given) mode = gfsmASMWeight; + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- sort + gfsm_automaton_arcsort(fsm,mode); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmarith.gog b/gfsm/gfsm/src/programs/gfsmarith.gog new file mode 100644 index 0000000..b2f14b9 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarith.gog @@ -0,0 +1,132 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmarith" +#program_version "0.01" + +purpose "Perform an elementary arithmetic operation on automaton weights" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "FSMFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- + +#------------------------------------------------------- +# Basic Options + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#------------------------------------------------------- +# Operator Selection +group "Operator Selection" + +flag "exp" E "Real exponentiation" + +flag "log" L "Real logarithm" + +float "multiply" m "Real multiplication by WEIGHT" \ + arg="WEIGHT" + +float "add" a "Real addition of WEIGHT" \ + arg="WEIGHT" + +flag "positive" p "Set real negative weights to zero" + + +float "times" - "Semiring multiplication by WEIGHT" \ + arg="WEIGHT" + +float "plus" - "Semiring addition of WEIGHT" \ + arg="WEIGHT" + +flag "sr-positive" - "Set semiring-negative weights to semiring-zero" + +#------------------------------------------------------- +# Weight Selection +group "Weight Selection" + +flag "no-arcs" A "Exclude arc weights (default: include)" + +flag "no-finals" f "Exclude final weights (default: include)" + +#flag "potentials" P "Include potentials (default: include)" ##-- what the heck are 'potentials'? + +flag "zero" Z "Include semiring-zero weights (default: exclude)" + +#------------------------------------------------------- +# State & Label Selection +group "State & Label Selection" + +flag "initial" i "Select only initial state and its arcs/finality" + +int "state" s "Select only state ID and its arcs/finality (default=all)" \ + arg="ID" \ + default="-1" + +int "lower" l "Select only arcs with lower-label LO (default=any)" \ + arg="LO" \ + default="-1" + +int "upper" u "Select only arcs with upper-label HI (defualt=any)" \ + arg="HI" \ + default="-1" + + + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +Note that the --plus and --times operations are +sensitive to the semiring associated with the stored FSM file, while +the --add and --multiply operations have the usual real-number interpretations, +regardless of the automaton semiring. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmarith_cmdparser.c b/gfsm/gfsm/src/programs/gfsmarith_cmdparser.c new file mode 100644 index 0000000..7aade2f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarith_cmdparser.c @@ -0,0 +1,773 @@ +/* -*- Mode: C -*- + * + * File: gfsmarith_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmarith_cmdparser gfsmarith.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmarith" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmarith_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmarith (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Perform an elementary arithmetic operation on automaton weights\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... FSMFILE\n", "gfsmarith"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" FSMFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); + printf("\n"); + printf(" Operator Selection:\n"); + printf(" -E --exp Real exponentiation\n"); + printf(" -L --log Real logarithm\n"); + printf(" -mFLOAT --multiply=FLOAT Real multiplication by WEIGHT\n"); + printf(" -aFLOAT --add=FLOAT Real addition of WEIGHT\n"); + printf(" -p --positive Set real negative weights to zero\n"); + printf(" --times=FLOAT Semiring multiplication by WEIGHT\n"); + printf(" --plus=FLOAT Semiring addition of WEIGHT\n"); + printf(" --sr-positive Set semiring-negative weights to semiring-zero\n"); + printf("\n"); + printf(" Weight Selection:\n"); + printf(" -A --no-arcs Exclude arc weights (default: include)\n"); + printf(" -f --no-finals Exclude final weights (default: include)\n"); + printf(" -Z --zero Include semiring-zero weights (default: exclude)\n"); + printf("\n"); + printf(" State & Label Selection:\n"); + printf(" -i --initial Select only initial state and its arcs/finality\n"); + printf(" -sID --state=ID Select only state ID and its arcs/finality (default=all)\n"); + printf(" -lLO --lower=LO Select only arcs with lower-label LO (default=any)\n"); + printf(" -uHI --upper=HI Select only arcs with upper-label HI (defualt=any)\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); + args_info->exp_flag = 0; + args_info->log_flag = 0; + args_info->multiply_arg = 0; + args_info->add_arg = 0; + args_info->positive_flag = 0; + args_info->times_arg = 0; + args_info->plus_arg = 0; + args_info->sr_positive_flag = 0; + args_info->no_arcs_flag = 0; + args_info->no_finals_flag = 0; + args_info->zero_flag = 0; + args_info->initial_flag = 0; + args_info->state_arg = -1; + args_info->lower_arg = -1; + args_info->upper_arg = -1; +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + args_info->exp_given = 0; + args_info->log_given = 0; + args_info->multiply_given = 0; + args_info->add_given = 0; + args_info->positive_given = 0; + args_info->times_given = 0; + args_info->plus_given = 0; + args_info->sr_positive_given = 0; + args_info->no_arcs_given = 0; + args_info->no_finals_given = 0; + args_info->zero_given = 0; + args_info->initial_given = 0; + args_info->state_given = 0; + args_info->lower_given = 0; + args_info->upper_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { "exp", 0, NULL, 'E' }, + { "log", 0, NULL, 'L' }, + { "multiply", 1, NULL, 'm' }, + { "add", 1, NULL, 'a' }, + { "positive", 0, NULL, 'p' }, + { "times", 1, NULL, 0 }, + { "plus", 1, NULL, 0 }, + { "sr-positive", 0, NULL, 0 }, + { "no-arcs", 0, NULL, 'A' }, + { "no-finals", 0, NULL, 'f' }, + { "zero", 0, NULL, 'Z' }, + { "initial", 0, NULL, 'i' }, + { "state", 1, NULL, 's' }, + { "lower", 1, NULL, 'l' }, + { "upper", 1, NULL, 'u' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + 'E', + 'L', + 'm', ':', + 'a', ':', + 'p', + 'A', + 'f', + 'Z', + 'i', + 's', ':', + 'l', ':', + 'u', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 'E': /* Real exponentiation */ + if (args_info->exp_given) { + fprintf(stderr, "%s: `--exp' (`-E') option given more than once\n", PROGRAM); + } + args_info->exp_given++; + if (args_info->exp_given <= 1) + args_info->exp_flag = !(args_info->exp_flag); + break; + + case 'L': /* Real logarithm */ + if (args_info->log_given) { + fprintf(stderr, "%s: `--log' (`-L') option given more than once\n", PROGRAM); + } + args_info->log_given++; + if (args_info->log_given <= 1) + args_info->log_flag = !(args_info->log_flag); + break; + + case 'm': /* Real multiplication by WEIGHT */ + if (args_info->multiply_given) { + fprintf(stderr, "%s: `--multiply' (`-m') option given more than once\n", PROGRAM); + } + args_info->multiply_given++; + args_info->multiply_arg = (float)strtod(val, NULL); + break; + + case 'a': /* Real addition of WEIGHT */ + if (args_info->add_given) { + fprintf(stderr, "%s: `--add' (`-a') option given more than once\n", PROGRAM); + } + args_info->add_given++; + args_info->add_arg = (float)strtod(val, NULL); + break; + + case 'p': /* Set real negative weights to zero */ + if (args_info->positive_given) { + fprintf(stderr, "%s: `--positive' (`-p') option given more than once\n", PROGRAM); + } + args_info->positive_given++; + if (args_info->positive_given <= 1) + args_info->positive_flag = !(args_info->positive_flag); + break; + + case 'A': /* Exclude arc weights (default: include) */ + if (args_info->no_arcs_given) { + fprintf(stderr, "%s: `--no-arcs' (`-A') option given more than once\n", PROGRAM); + } + args_info->no_arcs_given++; + if (args_info->no_arcs_given <= 1) + args_info->no_arcs_flag = !(args_info->no_arcs_flag); + break; + + case 'f': /* Exclude final weights (default: include) */ + if (args_info->no_finals_given) { + fprintf(stderr, "%s: `--no-finals' (`-f') option given more than once\n", PROGRAM); + } + args_info->no_finals_given++; + if (args_info->no_finals_given <= 1) + args_info->no_finals_flag = !(args_info->no_finals_flag); + break; + + case 'Z': /* Include semiring-zero weights (default: exclude) */ + if (args_info->zero_given) { + fprintf(stderr, "%s: `--zero' (`-Z') option given more than once\n", PROGRAM); + } + args_info->zero_given++; + if (args_info->zero_given <= 1) + args_info->zero_flag = !(args_info->zero_flag); + break; + + case 'i': /* Select only initial state and its arcs/finality */ + if (args_info->initial_given) { + fprintf(stderr, "%s: `--initial' (`-i') option given more than once\n", PROGRAM); + } + args_info->initial_given++; + if (args_info->initial_given <= 1) + args_info->initial_flag = !(args_info->initial_flag); + break; + + case 's': /* Select only state ID and its arcs/finality (default=all) */ + if (args_info->state_given) { + fprintf(stderr, "%s: `--state' (`-s') option given more than once\n", PROGRAM); + } + args_info->state_given++; + args_info->state_arg = (int)atoi(val); + break; + + case 'l': /* Select only arcs with lower-label LO (default=any) */ + if (args_info->lower_given) { + fprintf(stderr, "%s: `--lower' (`-l') option given more than once\n", PROGRAM); + } + args_info->lower_given++; + args_info->lower_arg = (int)atoi(val); + break; + + case 'u': /* Select only arcs with upper-label HI (defualt=any) */ + if (args_info->upper_given) { + fprintf(stderr, "%s: `--upper' (`-u') option given more than once\n", PROGRAM); + } + args_info->upper_given++; + args_info->upper_arg = (int)atoi(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + /* Real exponentiation */ + else if (strcmp(olong, "exp") == 0) { + if (args_info->exp_given) { + fprintf(stderr, "%s: `--exp' (`-E') option given more than once\n", PROGRAM); + } + args_info->exp_given++; + if (args_info->exp_given <= 1) + args_info->exp_flag = !(args_info->exp_flag); + } + + /* Real logarithm */ + else if (strcmp(olong, "log") == 0) { + if (args_info->log_given) { + fprintf(stderr, "%s: `--log' (`-L') option given more than once\n", PROGRAM); + } + args_info->log_given++; + if (args_info->log_given <= 1) + args_info->log_flag = !(args_info->log_flag); + } + + /* Real multiplication by WEIGHT */ + else if (strcmp(olong, "multiply") == 0) { + if (args_info->multiply_given) { + fprintf(stderr, "%s: `--multiply' (`-m') option given more than once\n", PROGRAM); + } + args_info->multiply_given++; + args_info->multiply_arg = (float)strtod(val, NULL); + } + + /* Real addition of WEIGHT */ + else if (strcmp(olong, "add") == 0) { + if (args_info->add_given) { + fprintf(stderr, "%s: `--add' (`-a') option given more than once\n", PROGRAM); + } + args_info->add_given++; + args_info->add_arg = (float)strtod(val, NULL); + } + + /* Set real negative weights to zero */ + else if (strcmp(olong, "positive") == 0) { + if (args_info->positive_given) { + fprintf(stderr, "%s: `--positive' (`-p') option given more than once\n", PROGRAM); + } + args_info->positive_given++; + if (args_info->positive_given <= 1) + args_info->positive_flag = !(args_info->positive_flag); + } + + /* Semiring multiplication by WEIGHT */ + else if (strcmp(olong, "times") == 0) { + if (args_info->times_given) { + fprintf(stderr, "%s: `--times' option given more than once\n", PROGRAM); + } + args_info->times_given++; + args_info->times_arg = (float)strtod(val, NULL); + } + + /* Semiring addition of WEIGHT */ + else if (strcmp(olong, "plus") == 0) { + if (args_info->plus_given) { + fprintf(stderr, "%s: `--plus' option given more than once\n", PROGRAM); + } + args_info->plus_given++; + args_info->plus_arg = (float)strtod(val, NULL); + } + + /* Set semiring-negative weights to semiring-zero */ + else if (strcmp(olong, "sr-positive") == 0) { + if (args_info->sr_positive_given) { + fprintf(stderr, "%s: `--sr-positive' option given more than once\n", PROGRAM); + } + args_info->sr_positive_given++; + if (args_info->sr_positive_given <= 1) + args_info->sr_positive_flag = !(args_info->sr_positive_flag); + } + + /* Exclude arc weights (default: include) */ + else if (strcmp(olong, "no-arcs") == 0) { + if (args_info->no_arcs_given) { + fprintf(stderr, "%s: `--no-arcs' (`-A') option given more than once\n", PROGRAM); + } + args_info->no_arcs_given++; + if (args_info->no_arcs_given <= 1) + args_info->no_arcs_flag = !(args_info->no_arcs_flag); + } + + /* Exclude final weights (default: include) */ + else if (strcmp(olong, "no-finals") == 0) { + if (args_info->no_finals_given) { + fprintf(stderr, "%s: `--no-finals' (`-f') option given more than once\n", PROGRAM); + } + args_info->no_finals_given++; + if (args_info->no_finals_given <= 1) + args_info->no_finals_flag = !(args_info->no_finals_flag); + } + + /* Include semiring-zero weights (default: exclude) */ + else if (strcmp(olong, "zero") == 0) { + if (args_info->zero_given) { + fprintf(stderr, "%s: `--zero' (`-Z') option given more than once\n", PROGRAM); + } + args_info->zero_given++; + if (args_info->zero_given <= 1) + args_info->zero_flag = !(args_info->zero_flag); + } + + /* Select only initial state and its arcs/finality */ + else if (strcmp(olong, "initial") == 0) { + if (args_info->initial_given) { + fprintf(stderr, "%s: `--initial' (`-i') option given more than once\n", PROGRAM); + } + args_info->initial_given++; + if (args_info->initial_given <= 1) + args_info->initial_flag = !(args_info->initial_flag); + } + + /* Select only state ID and its arcs/finality (default=all) */ + else if (strcmp(olong, "state") == 0) { + if (args_info->state_given) { + fprintf(stderr, "%s: `--state' (`-s') option given more than once\n", PROGRAM); + } + args_info->state_given++; + args_info->state_arg = (int)atoi(val); + } + + /* Select only arcs with lower-label LO (default=any) */ + else if (strcmp(olong, "lower") == 0) { + if (args_info->lower_given) { + fprintf(stderr, "%s: `--lower' (`-l') option given more than once\n", PROGRAM); + } + args_info->lower_given++; + args_info->lower_arg = (int)atoi(val); + } + + /* Select only arcs with upper-label HI (defualt=any) */ + else if (strcmp(olong, "upper") == 0) { + if (args_info->upper_given) { + fprintf(stderr, "%s: `--upper' (`-u') option given more than once\n", PROGRAM); + } + args_info->upper_given++; + args_info->upper_arg = (int)atoi(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmarith_cmdparser.h b/gfsm/gfsm/src/programs/gfsmarith_cmdparser.h new file mode 100644 index 0000000..0f8b5c4 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarith_cmdparser.h @@ -0,0 +1,93 @@ +/* -*- Mode: C -*- + * + * File: gfsmarith_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmarith_cmdparser_h +#define gfsmarith_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + int exp_flag; /* Real exponentiation (default=0). */ + int log_flag; /* Real logarithm (default=0). */ + float multiply_arg; /* Real multiplication by WEIGHT (default=0). */ + float add_arg; /* Real addition of WEIGHT (default=0). */ + int positive_flag; /* Set real negative weights to zero (default=0). */ + float times_arg; /* Semiring multiplication by WEIGHT (default=0). */ + float plus_arg; /* Semiring addition of WEIGHT (default=0). */ + int sr_positive_flag; /* Set semiring-negative weights to semiring-zero (default=0). */ + int no_arcs_flag; /* Exclude arc weights (default: include) (default=0). */ + int no_finals_flag; /* Exclude final weights (default: include) (default=0). */ + int zero_flag; /* Include semiring-zero weights (default: exclude) (default=0). */ + int initial_flag; /* Select only initial state and its arcs/finality (default=0). */ + int state_arg; /* Select only state ID and its arcs/finality (default=all) (default=-1). */ + int lower_arg; /* Select only arcs with lower-label LO (default=any) (default=-1). */ + int upper_arg; /* Select only arcs with upper-label HI (defualt=any) (default=-1). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + int exp_given; /* Whether exp was given */ + int log_given; /* Whether log was given */ + int multiply_given; /* Whether multiply was given */ + int add_given; /* Whether add was given */ + int positive_given; /* Whether positive was given */ + int times_given; /* Whether times was given */ + int plus_given; /* Whether plus was given */ + int sr_positive_given; /* Whether sr-positive was given */ + int no_arcs_given; /* Whether no-arcs was given */ + int no_finals_given; /* Whether no-finals was given */ + int zero_given; /* Whether zero was given */ + int initial_given; /* Whether initial was given */ + int state_given; /* Whether state was given */ + int lower_given; /* Whether lower was given */ + int upper_given; /* Whether upper was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmarith_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmarith_main.c b/gfsm/gfsm/src/programs/gfsmarith_main.c new file mode 100644 index 0000000..53f5708 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmarith_main.c @@ -0,0 +1,132 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <gfsm.h> + +#include "gfsmarith_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmarith"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; +gfsmError *err = NULL; + +//-- arithmetic operation +gfsmArithOp op=gfsmAONone; +gfsmWeight arg=0; + +//-- weight selection +gboolean do_arcs; +gboolean do_finals; +gboolean do_zero; + +//-- state & label selection +gfsmStateId qid; +gfsmLabelVal lo; +gfsmLabelVal hi; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + outfilename = args.output_arg; + + //-- operator selection + if (args.exp_given) { op = gfsmAOExp; } + else if (args.log_given) { op = gfsmAOLog; } + else if (args.multiply_given) { op = gfsmAOMult; arg=args.multiply_arg; } + else if (args.add_given) { op = gfsmAOAdd; arg=args.add_arg; } + else if (args.positive_given) { op = gfsmAONoNeg; } + else if (args.times_given) { op = gfsmAOSRTimes; arg=args.times_arg; } + else if (args.plus_given) { op = gfsmAOSRPlus; arg=args.plus_arg; } + else if (args.sr_positive_given) { op = gfsmAOSRNoNeg; } + + //-- weight selection + do_arcs = !args.no_arcs_given; + do_finals = !args.no_finals_given; + do_zero = args.zero_given; + + //-- state & label selection + qid = args.state_given ? args.state_arg : gfsmNoState; + lo = args.lower_given ? args.lower_arg : gfsmNoLabel; + hi = args.upper_given ? args.upper_arg : gfsmNoLabel; + + //-- initialize fsm + fsm = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + GFSM_INIT + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(3); + } + + //-- hack: initial-state selection + if (args.initial_flag) qid=fsm->root_id; + + //-- perform weight aritmetic + gfsm_automaton_arith_state(fsm, qid, op, arg, lo, hi, do_arcs, do_finals, do_zero); + + //-- store automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + gfsm_automaton_free(fsm); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmclosure.gog b/gfsm/gfsm/src/programs/gfsmclosure.gog new file mode 100644 index 0000000..77b5884 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmclosure.gog @@ -0,0 +1,79 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmclosure" +#program_version "0.01" + +purpose "Compute Kleene closure of a finite state machine" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "times" n "Compute n-ary (^n) closure." \ + arg="N" \ + default="0" + +flag "plus" p "Compute transitive (+) closure." \ + details=" +Alias for '-n 1'. +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmclosure_cmdparser.c b/gfsm/gfsm/src/programs/gfsmclosure_cmdparser.c new file mode 100644 index 0000000..11f7bfd --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmclosure_cmdparser.c @@ -0,0 +1,495 @@ +/* -*- Mode: C -*- + * + * File: gfsmclosure_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmclosure_cmdparser gfsmclosure.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmclosure" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmclosure_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmclosure (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compute Kleene closure of a finite state machine\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmclosure"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -nN --times=N Compute n-ary (^n) closure.\n"); + printf(" -p --plus Compute transitive (+) closure.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->times_arg = 0; + args_info->plus_flag = 0; + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->times_given = 0; + args_info->plus_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "times", 1, NULL, 'n' }, + { "plus", 0, NULL, 'p' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'n', ':', + 'p', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'n': /* Compute n-ary (^n) closure. */ + if (args_info->times_given) { + fprintf(stderr, "%s: `--times' (`-n') option given more than once\n", PROGRAM); + } + args_info->times_given++; + args_info->times_arg = (int)atoi(val); + break; + + case 'p': /* Compute transitive (+) closure. */ + if (args_info->plus_given) { + fprintf(stderr, "%s: `--plus' (`-p') option given more than once\n", PROGRAM); + } + args_info->plus_given++; + if (args_info->plus_given <= 1) + args_info->plus_flag = !(args_info->plus_flag); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Compute n-ary (^n) closure. */ + else if (strcmp(olong, "times") == 0) { + if (args_info->times_given) { + fprintf(stderr, "%s: `--times' (`-n') option given more than once\n", PROGRAM); + } + args_info->times_given++; + args_info->times_arg = (int)atoi(val); + } + + /* Compute transitive (+) closure. */ + else if (strcmp(olong, "plus") == 0) { + if (args_info->plus_given) { + fprintf(stderr, "%s: `--plus' (`-p') option given more than once\n", PROGRAM); + } + args_info->plus_given++; + if (args_info->plus_given <= 1) + args_info->plus_flag = !(args_info->plus_flag); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmclosure_cmdparser.h b/gfsm/gfsm/src/programs/gfsmclosure_cmdparser.h new file mode 100644 index 0000000..0837b6a --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmclosure_cmdparser.h @@ -0,0 +1,67 @@ +/* -*- Mode: C -*- + * + * File: gfsmclosure_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmclosure_cmdparser_h +#define gfsmclosure_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int times_arg; /* Compute n-ary (^n) closure. (default=0). */ + int plus_flag; /* Compute transitive (+) closure. (default=0). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int times_given; /* Whether times was given */ + int plus_given; /* Whether plus was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmclosure_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmclosure_main.c b/gfsm/gfsm/src/programs/gfsmclosure_main.c new file mode 100644 index 0000000..8b8d029 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmclosure_main.c @@ -0,0 +1,100 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmclosure_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmclosure"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsm=NULL; +guint times = 0; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- get closure length + if (args.plus_given) times = 1; + else times = args.times_arg; + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- compute closure + gfsm_automaton_n_closure(fsm, times); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmcompile.gog b/gfsm/gfsm/src/programs/gfsmcompile.gog new file mode 100644 index 0000000..e53307b --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompile.gog @@ -0,0 +1,102 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmcompile" +#program_version "0.01" + +purpose "Compile text format gfsm files to binary" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "TXTFILE" "Input text file" \ + details=" +If unspecified, standard input will be read +" + +#argument "BINFILE" "Output binary gfsm file" \ +# details=" +#If unspecified, output will be written to standard output. +#" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +flag "acceptor" a "Compile as acceptor (default=transducer)" \ + default=0 + +string "ilabels" i "Specify input (lower) labels file." \ + arg="LABELS" + +string "olabels" o "Specify output (upper) labels file." \ + arg="LABELS" + +string "slabels" S "Specify state labels file." \ + arg="LABELS" + +string "semiring" s "Specify semiring type." \ + arg="SRTYPE" \ + default="tropical" \ + details=" +Specify one of the following: + + boolean + log + plog + real + trivial + tropical + +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmcompile_cmdparser.c b/gfsm/gfsm/src/programs/gfsmcompile_cmdparser.c new file mode 100644 index 0000000..ea17d5d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompile_cmdparser.c @@ -0,0 +1,569 @@ +/* -*- Mode: C -*- + * + * File: gfsmcompile_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmcompile_cmdparser gfsmcompile.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmcompile" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmcompile_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmcompile (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compile text format gfsm files to binary\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... TXTFILE\n", "gfsmcompile"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" TXTFILE Input text file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -a --acceptor Compile as acceptor (default=transducer)\n"); + printf(" -iLABELS --ilabels=LABELS Specify input (lower) labels file.\n"); + printf(" -oLABELS --olabels=LABELS Specify output (upper) labels file.\n"); + printf(" -SLABELS --slabels=LABELS Specify state labels file.\n"); + printf(" -sSRTYPE --semiring=SRTYPE Specify semiring type.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->acceptor_flag = 0; + args_info->ilabels_arg = NULL; + args_info->olabels_arg = NULL; + args_info->slabels_arg = NULL; + args_info->semiring_arg = strdup("tropical"); + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->acceptor_given = 0; + args_info->ilabels_given = 0; + args_info->olabels_given = 0; + args_info->slabels_given = 0; + args_info->semiring_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "acceptor", 0, NULL, 'a' }, + { "ilabels", 1, NULL, 'i' }, + { "olabels", 1, NULL, 'o' }, + { "slabels", 1, NULL, 'S' }, + { "semiring", 1, NULL, 's' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'a', + 'i', ':', + 'o', ':', + 'S', ':', + 's', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'a': /* Compile as acceptor (default=transducer) */ + if (args_info->acceptor_given) { + fprintf(stderr, "%s: `--acceptor' (`-a') option given more than once\n", PROGRAM); + } + args_info->acceptor_given++; + if (args_info->acceptor_given <= 1) + args_info->acceptor_flag = !(args_info->acceptor_flag); + break; + + case 'i': /* Specify input (lower) labels file. */ + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + break; + + case 'o': /* Specify output (upper) labels file. */ + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + break; + + case 'S': /* Specify state labels file. */ + if (args_info->slabels_given) { + fprintf(stderr, "%s: `--slabels' (`-S') option given more than once\n", PROGRAM); + } + args_info->slabels_given++; + if (args_info->slabels_arg) free(args_info->slabels_arg); + args_info->slabels_arg = strdup(val); + break; + + case 's': /* Specify semiring type. */ + if (args_info->semiring_given) { + fprintf(stderr, "%s: `--semiring' (`-s') option given more than once\n", PROGRAM); + } + args_info->semiring_given++; + if (args_info->semiring_arg) free(args_info->semiring_arg); + args_info->semiring_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Compile as acceptor (default=transducer) */ + else if (strcmp(olong, "acceptor") == 0) { + if (args_info->acceptor_given) { + fprintf(stderr, "%s: `--acceptor' (`-a') option given more than once\n", PROGRAM); + } + args_info->acceptor_given++; + if (args_info->acceptor_given <= 1) + args_info->acceptor_flag = !(args_info->acceptor_flag); + } + + /* Specify input (lower) labels file. */ + else if (strcmp(olong, "ilabels") == 0) { + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + } + + /* Specify output (upper) labels file. */ + else if (strcmp(olong, "olabels") == 0) { + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + } + + /* Specify state labels file. */ + else if (strcmp(olong, "slabels") == 0) { + if (args_info->slabels_given) { + fprintf(stderr, "%s: `--slabels' (`-S') option given more than once\n", PROGRAM); + } + args_info->slabels_given++; + if (args_info->slabels_arg) free(args_info->slabels_arg); + args_info->slabels_arg = strdup(val); + } + + /* Specify semiring type. */ + else if (strcmp(olong, "semiring") == 0) { + if (args_info->semiring_given) { + fprintf(stderr, "%s: `--semiring' (`-s') option given more than once\n", PROGRAM); + } + args_info->semiring_given++; + if (args_info->semiring_arg) free(args_info->semiring_arg); + args_info->semiring_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmcompile_cmdparser.h b/gfsm/gfsm/src/programs/gfsmcompile_cmdparser.h new file mode 100644 index 0000000..53c4d59 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompile_cmdparser.h @@ -0,0 +1,73 @@ +/* -*- Mode: C -*- + * + * File: gfsmcompile_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmcompile_cmdparser_h +#define gfsmcompile_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int acceptor_flag; /* Compile as acceptor (default=transducer) (default=0). */ + char * ilabels_arg; /* Specify input (lower) labels file. (default=NULL). */ + char * olabels_arg; /* Specify output (upper) labels file. (default=NULL). */ + char * slabels_arg; /* Specify state labels file. (default=NULL). */ + char * semiring_arg; /* Specify semiring type. (default=tropical). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int acceptor_given; /* Whether acceptor was given */ + int ilabels_given; /* Whether ilabels was given */ + int olabels_given; /* Whether olabels was given */ + int slabels_given; /* Whether slabels was given */ + int semiring_given; /* Whether semiring was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmcompile_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmcompile_main.c b/gfsm/gfsm/src/programs/gfsmcompile_main.c new file mode 100644 index 0000000..feeba69 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompile_main.c @@ -0,0 +1,132 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <gfsm.h> + +#include "gfsmcompile_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmcompile"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; +gfsmAlphabet *ilabels=NULL, *olabels=NULL, *slabels=NULL; +gfsmError *err = NULL; +gfsmSRType srtype = gfsmSRTUnknown; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + outfilename = args.output_arg; + + //-- labels: input + if (args.ilabels_given) { + ilabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(ilabels,args.ilabels_arg,&err)) { + g_printerr("%s: load failed for input-labels file '%s': %s\n", + progname, args.ilabels_arg, err->message); + exit(2); + } + } + //-- labels: output + if (args.olabels_given) { + olabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(olabels,args.olabels_arg,&err)) { + g_printerr("%s: load failed for output-labels file '%s': %s\n", + progname, args.olabels_arg, err->message); + exit(2); + } + } + //-- labels: state + if (args.slabels_given) { + slabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(slabels,args.slabels_arg,&err)) { + g_printerr("%s: load failed for state-labels file '%s': %s\n", + progname, args.slabels_arg, err->message); + exit(2); + } + } + + //-- initialize fsm + fsm = gfsm_automaton_new(); + if (args.acceptor_given) fsm->flags.is_transducer = FALSE; + + //-- set semiring + srtype = gfsm_sr_name_to_type(args.semiring_arg); + if (srtype != fsm->sr->type) { + gfsm_automaton_set_semiring(fsm, gfsm_semiring_new(srtype)); + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + GFSM_INIT + get_my_options(argc,argv); + + //-- compile automaton + if (!gfsm_automaton_compile_filename_full(fsm,infilename,ilabels,olabels,slabels,&err)) { + g_printerr("%s: compile failed for '%s': %s\n", progname, infilename, err->message); + exit(3); + } + + //-- store automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (ilabels) gfsm_alphabet_free(ilabels); + if (olabels) gfsm_alphabet_free(olabels); + if (slabels) gfsm_alphabet_free(slabels); + gfsm_automaton_free(fsm); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmcomplement.gog b/gfsm/gfsm/src/programs/gfsmcomplement.gog new file mode 100644 index 0000000..7712b3c --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcomplement.gog @@ -0,0 +1,73 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmcomplement" +#program_version "0.01" + +purpose "Compute complement of finite state acceptors" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +string "ilabels" i "Specify input (lower) labels file for alphabet." \ + arg="LABELS" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +Epsilon arcs are not handled correctly. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmcomplement_cmdparser.c b/gfsm/gfsm/src/programs/gfsmcomplement_cmdparser.c new file mode 100644 index 0000000..d697579 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcomplement_cmdparser.c @@ -0,0 +1,473 @@ +/* -*- Mode: C -*- + * + * File: gfsmcomplement_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmcomplement_cmdparser gfsmcomplement.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmcomplement" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmcomplement_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmcomplement (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compute complement of finite state acceptors\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmcomplement"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -iLABELS --ilabels=LABELS Specify input (lower) labels file for alphabet.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->ilabels_arg = NULL; + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->ilabels_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "ilabels", 1, NULL, 'i' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'i', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'i': /* Specify input (lower) labels file for alphabet. */ + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify input (lower) labels file for alphabet. */ + else if (strcmp(olong, "ilabels") == 0) { + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmcomplement_cmdparser.h b/gfsm/gfsm/src/programs/gfsmcomplement_cmdparser.h new file mode 100644 index 0000000..4b3d57d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcomplement_cmdparser.h @@ -0,0 +1,65 @@ +/* -*- Mode: C -*- + * + * File: gfsmcomplement_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmcomplement_cmdparser_h +#define gfsmcomplement_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * ilabels_arg; /* Specify input (lower) labels file for alphabet. (default=NULL). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int ilabels_given; /* Whether ilabels was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmcomplement_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmcomplement_main.c b/gfsm/gfsm/src/programs/gfsmcomplement_main.c new file mode 100644 index 0000000..227b963 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcomplement_main.c @@ -0,0 +1,105 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmcomplement_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmcomplement"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; +gfsmAlphabet *ilabels=NULL; +gfsmError *err=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- labels: input + if (args.ilabels_given) { + ilabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(ilabels,args.ilabels_arg,&err)) { + g_printerr("%s: load failed for input-labels file '%s': %s\n", + progname, args.ilabels_arg, err->message); + exit(2); + } + } + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- complement + if (ilabels) gfsm_automaton_complement_full(fsm,ilabels); + else gfsm_automaton_complement(fsm); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (ilabels) gfsm_alphabet_free(ilabels); + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmcompose.gog b/gfsm/gfsm/src/programs/gfsmcompose.gog new file mode 100644 index 0000000..c4e34f3 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompose.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmcompose" +#program_version "0.01" + +purpose "Compute composition of finite-state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE(s)" "Stored binary gfsm file(s)" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +Probably many. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmcompose_cmdparser.c b/gfsm/gfsm/src/programs/gfsmcompose_cmdparser.c new file mode 100644 index 0000000..5a82199 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompose_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmcompose_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmcompose_cmdparser gfsmcompose.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmcompose" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmcompose_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmcompose (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compute composition of finite-state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE(s)\n", "gfsmcompose"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE(s) Stored binary gfsm file(s)\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmcompose_cmdparser.h b/gfsm/gfsm/src/programs/gfsmcompose_cmdparser.h new file mode 100644 index 0000000..a99342e --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompose_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmcompose_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmcompose_cmdparser_h +#define gfsmcompose_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmcompose_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmcompose_main.c b/gfsm/gfsm/src/programs/gfsmcompose_main.c new file mode 100644 index 0000000..232c5e6 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompose_main.c @@ -0,0 +1,123 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005-2007 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmcompose_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmcompose"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsmOut=NULL, *fsmIn=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- require at least one file argument + if (args.inputs_num < 1) { + cmdline_parser_print_help(); + exit(2); + } + + //-- output + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsmIn = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * compute_operation() + * + utility routine + */ +void compute_operation(const char *infilename) +{ + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsmIn,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- compute underlying FSM operation + if (fsmOut == NULL) { + fsmOut = fsmIn; + fsmIn = gfsm_automaton_new(); + } else { + gfsm_automaton_compose(fsmOut,fsmIn); + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + int i; + + GFSM_INIT + + get_my_options(argc,argv); + + for (i = 0; i < args.inputs_num; i++) { + compute_operation(args.inputs[i]); + } + if (args.inputs_num == 1) compute_operation("-"); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsmOut,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsmIn) gfsm_automaton_free(fsmIn); + if (fsmOut) gfsm_automaton_free(fsmOut); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmcompre.gog b/gfsm/gfsm/src/programs/gfsmcompre.gog new file mode 100644 index 0000000..7e4058f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompre.gog @@ -0,0 +1,94 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmcompre" +#program_version "0.01" + +purpose "Compile an automaton from a regular expression" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +#argument "LABELID..." "Input label IDs" \ +# details=" +#In ASCII decimal notation. +#" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +string "labels" l "Basic label alphabet" \ + arg="LABFILE" + +string "regex" s "Regular expression to compile" \ + arg="REGEX" + +flag "string" x "Compile as string, not regex" \ + default=0 + +string "semiring" r "Specify semiring type" \ + arg="SRTYPE" \ + default="tropical" \ + details=" +Specify one of the following: + + boolean + log + plog + real + trivial + tropical + +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmcompre_cmdparser.c b/gfsm/gfsm/src/programs/gfsmcompre_cmdparser.c new file mode 100644 index 0000000..75f5810 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompre_cmdparser.c @@ -0,0 +1,542 @@ +/* -*- Mode: C -*- + * + * File: gfsmcompre_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmcompre_cmdparser gfsmcompre.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmcompre" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmcompre_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmcompre (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compile an automaton from a regular expression\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... [FILES]...\n", "gfsmcompre"); + + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -lLABFILE --labels=LABFILE Basic label alphabet\n"); + printf(" -sREGEX --regex=REGEX Regular expression to compile\n"); + printf(" -x --string Compile as string, not regex\n"); + printf(" -rSRTYPE --semiring=SRTYPE Specify semiring type\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->labels_arg = NULL; + args_info->regex_arg = NULL; + args_info->string_flag = 0; + args_info->semiring_arg = strdup("tropical"); + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->labels_given = 0; + args_info->regex_given = 0; + args_info->string_given = 0; + args_info->semiring_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "labels", 1, NULL, 'l' }, + { "regex", 1, NULL, 's' }, + { "string", 0, NULL, 'x' }, + { "semiring", 1, NULL, 'r' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'l', ':', + 's', ':', + 'x', + 'r', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'l': /* Basic label alphabet */ + if (args_info->labels_given) { + fprintf(stderr, "%s: `--labels' (`-l') option given more than once\n", PROGRAM); + } + args_info->labels_given++; + if (args_info->labels_arg) free(args_info->labels_arg); + args_info->labels_arg = strdup(val); + break; + + case 's': /* Regular expression to compile */ + if (args_info->regex_given) { + fprintf(stderr, "%s: `--regex' (`-s') option given more than once\n", PROGRAM); + } + args_info->regex_given++; + if (args_info->regex_arg) free(args_info->regex_arg); + args_info->regex_arg = strdup(val); + break; + + case 'x': /* Compile as string, not regex */ + if (args_info->string_given) { + fprintf(stderr, "%s: `--string' (`-x') option given more than once\n", PROGRAM); + } + args_info->string_given++; + if (args_info->string_given <= 1) + args_info->string_flag = !(args_info->string_flag); + break; + + case 'r': /* Specify semiring type */ + if (args_info->semiring_given) { + fprintf(stderr, "%s: `--semiring' (`-r') option given more than once\n", PROGRAM); + } + args_info->semiring_given++; + if (args_info->semiring_arg) free(args_info->semiring_arg); + args_info->semiring_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Basic label alphabet */ + else if (strcmp(olong, "labels") == 0) { + if (args_info->labels_given) { + fprintf(stderr, "%s: `--labels' (`-l') option given more than once\n", PROGRAM); + } + args_info->labels_given++; + if (args_info->labels_arg) free(args_info->labels_arg); + args_info->labels_arg = strdup(val); + } + + /* Regular expression to compile */ + else if (strcmp(olong, "regex") == 0) { + if (args_info->regex_given) { + fprintf(stderr, "%s: `--regex' (`-s') option given more than once\n", PROGRAM); + } + args_info->regex_given++; + if (args_info->regex_arg) free(args_info->regex_arg); + args_info->regex_arg = strdup(val); + } + + /* Compile as string, not regex */ + else if (strcmp(olong, "string") == 0) { + if (args_info->string_given) { + fprintf(stderr, "%s: `--string' (`-x') option given more than once\n", PROGRAM); + } + args_info->string_given++; + if (args_info->string_given <= 1) + args_info->string_flag = !(args_info->string_flag); + } + + /* Specify semiring type */ + else if (strcmp(olong, "semiring") == 0) { + if (args_info->semiring_given) { + fprintf(stderr, "%s: `--semiring' (`-r') option given more than once\n", PROGRAM); + } + args_info->semiring_given++; + if (args_info->semiring_arg) free(args_info->semiring_arg); + args_info->semiring_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmcompre_cmdparser.h b/gfsm/gfsm/src/programs/gfsmcompre_cmdparser.h new file mode 100644 index 0000000..32361ea --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompre_cmdparser.h @@ -0,0 +1,71 @@ +/* -*- Mode: C -*- + * + * File: gfsmcompre_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmcompre_cmdparser_h +#define gfsmcompre_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * labels_arg; /* Basic label alphabet (default=NULL). */ + char * regex_arg; /* Regular expression to compile (default=NULL). */ + int string_flag; /* Compile as string, not regex (default=0). */ + char * semiring_arg; /* Specify semiring type (default=tropical). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int labels_given; /* Whether labels was given */ + int regex_given; /* Whether regex was given */ + int string_given; /* Whether string was given */ + int semiring_given; /* Whether semiring was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmcompre_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmcompre_main.c b/gfsm/gfsm/src/programs/gfsmcompre_main.c new file mode 100644 index 0000000..78e7721 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmcompre_main.c @@ -0,0 +1,143 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +#include <gfsm.h> + +#include "gfsmcompre_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmcompre"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *fstfilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmRegexCompiler *rec = NULL; +gfsmAlphabet *abet = NULL; +gfsmError *err = NULL; +gboolean emit_warnings = TRUE; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- sanity checks + if (!args.regex_given) { + g_printerr("%s: no regex specified!\n", progname); + cmdline_parser_print_help(); + exit(-1); + } + + //-- filenames + outfilename = args.output_arg; + + //-- alphabet: basic labels + abet = gfsm_string_alphabet_new(); + if (args.labels_given) { + if (!gfsm_alphabet_load_filename(abet, args.labels_arg, &err)) { + g_printerr("%s: load failed for labels file '%s': %s\n", + progname, args.labels_arg, err->message); + exit(2); + } + } + + //-- options for regex compiler + rec = gfsm_regex_compiler_new_full("gfsmRegexCompiler", + abet, + gfsm_sr_name_to_type(args.semiring_arg), + emit_warnings); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmAutomaton *fsm; + char *regex=NULL; + + GFSM_INIT + get_my_options(argc,argv); + + //-- string-compile hack: escape everything + if (args.string_flag) { + GString *tmp = g_string_new(""); + int i; + for (i=0; i < strlen(args.regex_arg); i++) { + g_string_append_c(tmp,'\\'); + g_string_append_c(tmp,args.regex_arg[i]); + } + regex = tmp->str; + g_string_free(tmp,FALSE); + } else { + regex = args.regex_arg; + } + + //-- parse regex string + gfsm_scanner_scan_string(&(rec->scanner), regex); + fsm = gfsm_regex_compiler_parse(rec); + + //-- check for errors + if (rec->scanner.err) { + g_printerr("%s: %s\n", progname, err->message); + exit(3); + } + if (!fsm) { + g_printerr("%s: no automaton!\n", progname); + exit(4); + } + + + //-- save output fsm + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(5); + } + + //-- cleanup + gfsm_regex_compiler_free(rec,TRUE,TRUE); + + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmconcat.gog b/gfsm/gfsm/src/programs/gfsmconcat.gog new file mode 100644 index 0000000..f9b1e47 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconcat.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmconcat" +#program_version "0.01" + +purpose "Concatenate finite state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE(s)" "Stored binary gfsm file(s)" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmconcat_cmdparser.c b/gfsm/gfsm/src/programs/gfsmconcat_cmdparser.c new file mode 100644 index 0000000..617a630 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconcat_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmconcat_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmconcat_cmdparser gfsmconcat.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmconcat" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmconcat_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmconcat (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Concatenate finite state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE(s)\n", "gfsmconcat"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE(s) Stored binary gfsm file(s)\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmconcat_cmdparser.h b/gfsm/gfsm/src/programs/gfsmconcat_cmdparser.h new file mode 100644 index 0000000..43125dd --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconcat_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmconcat_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmconcat_cmdparser_h +#define gfsmconcat_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmconcat_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmconcat_main.c b/gfsm/gfsm/src/programs/gfsmconcat_main.c new file mode 100644 index 0000000..9d63792 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconcat_main.c @@ -0,0 +1,118 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmconcat_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmconcat"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsmOut=NULL, *fsmIn=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- require at least one file argument + if (args.inputs_num < 1) { + cmdline_parser_print_help(); + exit(2); + } + + //-- output + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsmIn = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * compute_operation() + * + utility routine + */ +void compute_operation(const char *infilename) +{ + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsmIn,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- compute concat + if (fsmOut == NULL) { + fsmOut = fsmIn; + fsmIn = gfsm_automaton_new(); + } else { + gfsm_automaton_concat(fsmOut,fsmIn); + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + int i; + get_my_options(argc,argv); + + for (i = 0; i < args.inputs_num; i++) { + compute_operation(args.inputs[i]); + } + if (args.inputs_num == 1) compute_operation("-"); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsmOut,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsmIn) gfsm_automaton_free(fsmIn); + if (fsmOut) gfsm_automaton_free(fsmOut); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmconnect.gog b/gfsm/gfsm/src/programs/gfsmconnect.gog new file mode 100644 index 0000000..2de8230 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconnect.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmconnect" +#program_version "0.01" + +purpose "Remove non-coaccessible states from finite state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmconnect_cmdparser.c b/gfsm/gfsm/src/programs/gfsmconnect_cmdparser.c new file mode 100644 index 0000000..a3f30cb --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconnect_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmconnect_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmconnect_cmdparser gfsmconnect.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmconnect" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmconnect_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmconnect (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Remove non-coaccessible states from finite state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmconnect"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmconnect_cmdparser.h b/gfsm/gfsm/src/programs/gfsmconnect_cmdparser.h new file mode 100644 index 0000000..08f42e0 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconnect_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmconnect_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmconnect_cmdparser_h +#define gfsmconnect_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmconnect_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmconnect_main.c b/gfsm/gfsm/src/programs/gfsmconnect_main.c new file mode 100644 index 0000000..82c9c22 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconnect_main.c @@ -0,0 +1,99 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmconnect_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmconnect"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + int rc = 0; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- connect + gfsm_automaton_connect(fsm); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- get exit status: 255 if no root state + if (fsm->root_id == gfsmNoState) rc=255; + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + return rc; +} diff --git a/gfsm/gfsm/src/programs/gfsmconvert.gog b/gfsm/gfsm/src/programs/gfsmconvert.gog new file mode 100644 index 0000000..4bb24af --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconvert.gog @@ -0,0 +1,89 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmconvert" +#program_version "0.01" + +purpose "Convert binary format gfsm files" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "FSMFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "transducer" t "Specify whether automaton is a transducer" \ + arg="BOOL" + +int "weighted" w "Specify whether automaton is weighted" \ + arg="BOOL" + +string "semiring" s "Specify semiring type." \ + arg="SRTYPE" \ + details=" +Specify one of the following: + + boolean + log + real + trivial + tropical + +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmconvert_cmdparser.c b/gfsm/gfsm/src/programs/gfsmconvert_cmdparser.c new file mode 100644 index 0000000..3e77ce5 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconvert_cmdparser.c @@ -0,0 +1,517 @@ +/* -*- Mode: C -*- + * + * File: gfsmconvert_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmconvert_cmdparser gfsmconvert.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmconvert" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmconvert_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmconvert (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Convert binary format gfsm files\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... FSMFILE\n", "gfsmconvert"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" FSMFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -tBOOL --transducer=BOOL Specify whether automaton is a transducer\n"); + printf(" -wBOOL --weighted=BOOL Specify whether automaton is weighted\n"); + printf(" -sSRTYPE --semiring=SRTYPE Specify semiring type.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->transducer_arg = 0; + args_info->weighted_arg = 0; + args_info->semiring_arg = NULL; + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->transducer_given = 0; + args_info->weighted_given = 0; + args_info->semiring_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "transducer", 1, NULL, 't' }, + { "weighted", 1, NULL, 'w' }, + { "semiring", 1, NULL, 's' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 't', ':', + 'w', ':', + 's', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 't': /* Specify whether automaton is a transducer */ + if (args_info->transducer_given) { + fprintf(stderr, "%s: `--transducer' (`-t') option given more than once\n", PROGRAM); + } + args_info->transducer_given++; + args_info->transducer_arg = (int)atoi(val); + break; + + case 'w': /* Specify whether automaton is weighted */ + if (args_info->weighted_given) { + fprintf(stderr, "%s: `--weighted' (`-w') option given more than once\n", PROGRAM); + } + args_info->weighted_given++; + args_info->weighted_arg = (int)atoi(val); + break; + + case 's': /* Specify semiring type. */ + if (args_info->semiring_given) { + fprintf(stderr, "%s: `--semiring' (`-s') option given more than once\n", PROGRAM); + } + args_info->semiring_given++; + if (args_info->semiring_arg) free(args_info->semiring_arg); + args_info->semiring_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify whether automaton is a transducer */ + else if (strcmp(olong, "transducer") == 0) { + if (args_info->transducer_given) { + fprintf(stderr, "%s: `--transducer' (`-t') option given more than once\n", PROGRAM); + } + args_info->transducer_given++; + args_info->transducer_arg = (int)atoi(val); + } + + /* Specify whether automaton is weighted */ + else if (strcmp(olong, "weighted") == 0) { + if (args_info->weighted_given) { + fprintf(stderr, "%s: `--weighted' (`-w') option given more than once\n", PROGRAM); + } + args_info->weighted_given++; + args_info->weighted_arg = (int)atoi(val); + } + + /* Specify semiring type. */ + else if (strcmp(olong, "semiring") == 0) { + if (args_info->semiring_given) { + fprintf(stderr, "%s: `--semiring' (`-s') option given more than once\n", PROGRAM); + } + args_info->semiring_given++; + if (args_info->semiring_arg) free(args_info->semiring_arg); + args_info->semiring_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmconvert_cmdparser.h b/gfsm/gfsm/src/programs/gfsmconvert_cmdparser.h new file mode 100644 index 0000000..93a2291 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconvert_cmdparser.h @@ -0,0 +1,69 @@ +/* -*- Mode: C -*- + * + * File: gfsmconvert_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmconvert_cmdparser_h +#define gfsmconvert_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int transducer_arg; /* Specify whether automaton is a transducer (default=0). */ + int weighted_arg; /* Specify whether automaton is weighted (default=0). */ + char * semiring_arg; /* Specify semiring type. (default=NULL). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int transducer_given; /* Whether transducer was given */ + int weighted_given; /* Whether weighted was given */ + int semiring_given; /* Whether semiring was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmconvert_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmconvert_main.c b/gfsm/gfsm/src/programs/gfsmconvert_main.c new file mode 100644 index 0000000..7c09f08 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmconvert_main.c @@ -0,0 +1,110 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <gfsm.h> + +#include "gfsmconvert_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmconvert"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; +gfsmError *err = NULL; +gfsmSRType srtype = gfsmSRTUnknown; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + outfilename = args.output_arg; + + //-- initialize fsm + fsm = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + GFSM_INIT + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(3); + } + + //-- set flags + if (args.transducer_given) fsm->flags.is_transducer = args.transducer_arg; + if (args.weighted_given) fsm->flags.is_weighted = args.weighted_arg; + + //-- set semiring + if (args.semiring_given) { + srtype = gfsm_sr_name_to_type(args.semiring_arg); + if (srtype == gfsmSRTUnknown) { + g_printerr("%s: Warning: unknown semiring name '%s' defaults to type 'tropical'.\n", + progname, args.semiring_arg); + srtype = gfsmSRTTropical; + } + if (srtype != fsm->sr->type) { + gfsm_automaton_set_semiring(fsm, gfsm_semiring_new(srtype)); + } + } + + //-- store automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + gfsm_automaton_free(fsm); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmdeterminize.gog b/gfsm/gfsm/src/programs/gfsmdeterminize.gog new file mode 100644 index 0000000..1fed7e8 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdeterminize.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmdeterminize" +#program_version "0.01" + +purpose "Determinize a finite state machine" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmdeterminize_cmdparser.c b/gfsm/gfsm/src/programs/gfsmdeterminize_cmdparser.c new file mode 100644 index 0000000..0ce40e9 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdeterminize_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmdeterminize_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmdeterminize_cmdparser gfsmdeterminize.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmdeterminize" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmdeterminize_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmdeterminize (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Determinize a finite state machine\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmdeterminize"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmdeterminize_cmdparser.h b/gfsm/gfsm/src/programs/gfsmdeterminize_cmdparser.h new file mode 100644 index 0000000..a0324f4 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdeterminize_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmdeterminize_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmdeterminize_cmdparser_h +#define gfsmdeterminize_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmdeterminize_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmdeterminize_main.c b/gfsm/gfsm/src/programs/gfsmdeterminize_main.c new file mode 100644 index 0000000..66ba402 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdeterminize_main.c @@ -0,0 +1,100 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmdeterminize_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmdeterminize"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + + GFSM_INIT + + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- determinize + gfsm_automaton_determinize(fsm); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm, outfilename, args.compress_arg, &err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmdifference.gog b/gfsm/gfsm/src/programs/gfsmdifference.gog new file mode 100644 index 0000000..9bdb612 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdifference.gog @@ -0,0 +1,71 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmdifference" +#program_version "0.01" + +purpose "Compute difference of epsilon-free finite state acceptors" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE(s)" "Stored binary gfsm file(s)" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +Epsilon arcs are not handled correctly. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmdifference_cmdparser.c b/gfsm/gfsm/src/programs/gfsmdifference_cmdparser.c new file mode 100644 index 0000000..80c51f7 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdifference_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmdifference_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmdifference_cmdparser gfsmdifference.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmdifference" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmdifference_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmdifference (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compute difference of epsilon-free finite state acceptors\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE(s)\n", "gfsmdifference"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE(s) Stored binary gfsm file(s)\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmdifference_cmdparser.h b/gfsm/gfsm/src/programs/gfsmdifference_cmdparser.h new file mode 100644 index 0000000..b61aa90 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdifference_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmdifference_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmdifference_cmdparser_h +#define gfsmdifference_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmdifference_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmdifference_main.c b/gfsm/gfsm/src/programs/gfsmdifference_main.c new file mode 100644 index 0000000..19aaedf --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdifference_main.c @@ -0,0 +1,123 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmdifference_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmdifference"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsmOut=NULL, *fsmIn=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- require at least one file argument + if (args.inputs_num < 1) { + cmdline_parser_print_help(); + exit(2); + } + + //-- output + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsmIn = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * compute_operation() + * + utility routine + */ +void compute_operation(const char *infilename) +{ + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsmIn,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- compute difference + if (fsmOut == NULL) { + fsmOut = fsmIn; + fsmIn = gfsm_automaton_new(); + } else { + gfsm_automaton_difference(fsmOut,fsmIn); + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + int i; + + GFSM_INIT + + get_my_options(argc,argv); + + for (i = 0; i < args.inputs_num; i++) { + compute_operation(args.inputs[i]); + } + if (args.inputs_num == 1) compute_operation("-"); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsmOut,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsmIn) gfsm_automaton_free(fsmIn); + if (fsmOut) gfsm_automaton_free(fsmOut); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmdraw.gog b/gfsm/gfsm/src/programs/gfsmdraw.gog new file mode 100644 index 0000000..63fbb4f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdraw.gog @@ -0,0 +1,138 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmdraw" +#program_version "0.01" + +purpose "Generate graph specifications from binary gfsm files" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details " +gfsmdraw reads a binary GFSM file file and produces a graph specification +in either dot(1) or vcg(1) format. +" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Common Options +#----------------------------------------------------------------------------- +group "Common Options" + +string "ilabels" i "Specify input (lower) labels file." \ + arg="LABELS" + +string "olabels" o "Specify output (upper) labels file." \ + arg="LABELS" + +string "slabels" s "Specify state labels file." \ + arg="LABELS" + +string "title" t "Specify output title." \ + arg="TITLE" \ + details="Default is input filename." + +flag "vertical" v "Draw vertically." \ + details="Default mode is top-to-bottom" + +string "output" F "Output graph specification." \ + arg="DOTFILE" \ + details=" +Default behavior is to write to stdout. +" + +#----------------------------------------------------------------------------- +# dot Options +#----------------------------------------------------------------------------- +group "dot Options" + +flag "dot" d "Request dot output (default)" + +float "width" W "Specify output width (default=bbox)." \ + arg="WIDTH" \ + default="8.5" +float "height" H "Specify output height (default=bbox)." \ + arg="HEIGHT" \ + default="11" + +int "fontsize" f "Set output font size." \ + arg="POINTS" \ + default="14" +string "font" - "Set output font name." \ + arg="FONT" + +float "nodesep" n "Set min node separation." \ + default="0.25" +float "ranksep" r "Set min rank separation." \ + default="0.40" + +flag "portrait" p "Generate in portrait mode." \ + details="Default mode is landscape." + +#----------------------------------------------------------------------------- +# VCG Options +#----------------------------------------------------------------------------- +group "VCG Options" + +flag "vcg" g "Request VCG output." + +int "xspace" x "Set xspace graph parameter." \ + default="40" +int "yspace" y "Set yspace graph parameter." \ + default="20" + +string "state-shape" - "Set shape for state nodes." \ + arg="SHAPE" \ + default="box" + +string "state-color" - "Set default state color." \ + arg="COLOR" \ + default="white" + +string "final-color" - "Set color for final states." \ + arg="COLOR" \ + default="lightgrey" + + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils>, +dot(1), +vcg(1), +xvcg(1) +" diff --git a/gfsm/gfsm/src/programs/gfsmdraw_cmdparser.c b/gfsm/gfsm/src/programs/gfsmdraw_cmdparser.c new file mode 100644 index 0000000..40b52e6 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdraw_cmdparser.c @@ -0,0 +1,835 @@ +/* -*- Mode: C -*- + * + * File: gfsmdraw_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmdraw_cmdparser gfsmdraw.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmdraw" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmdraw_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmdraw (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Generate graph specifications from binary gfsm files\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmdraw"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf("\n"); + printf(" Common Options:\n"); + printf(" -iLABELS --ilabels=LABELS Specify input (lower) labels file.\n"); + printf(" -oLABELS --olabels=LABELS Specify output (upper) labels file.\n"); + printf(" -sLABELS --slabels=LABELS Specify state labels file.\n"); + printf(" -tTITLE --title=TITLE Specify output title.\n"); + printf(" -v --vertical Draw vertically.\n"); + printf(" -FDOTFILE --output=DOTFILE Output graph specification.\n"); + printf("\n"); + printf(" dot Options:\n"); + printf(" -d --dot Request dot output (default)\n"); + printf(" -WFLOAT --width=FLOAT Specify output width (default=bbox).\n"); + printf(" -HFLOAT --height=FLOAT Specify output height (default=bbox).\n"); + printf(" -fPOINTS --fontsize=POINTS Set output font size.\n"); + printf(" --font=FONT Set output font name.\n"); + printf(" -nFLOAT --nodesep=FLOAT Set min node separation.\n"); + printf(" -rFLOAT --ranksep=FLOAT Set min rank separation.\n"); + printf(" -p --portrait Generate in portrait mode.\n"); + printf("\n"); + printf(" VCG Options:\n"); + printf(" -g --vcg Request VCG output.\n"); + printf(" -xINT --xspace=INT Set xspace graph parameter.\n"); + printf(" -yINT --yspace=INT Set yspace graph parameter.\n"); + printf(" --state-shape=SHAPE Set shape for state nodes.\n"); + printf(" --state-color=COLOR Set default state color.\n"); + printf(" --final-color=COLOR Set color for final states.\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->ilabels_arg = NULL; + args_info->olabels_arg = NULL; + args_info->slabels_arg = NULL; + args_info->title_arg = NULL; + args_info->vertical_flag = 0; + args_info->output_arg = NULL; + args_info->dot_flag = 0; + args_info->width_arg = 8.5; + args_info->height_arg = 11; + args_info->fontsize_arg = 14; + args_info->font_arg = NULL; + args_info->nodesep_arg = 0.25; + args_info->ranksep_arg = 0.40; + args_info->portrait_flag = 0; + args_info->vcg_flag = 0; + args_info->xspace_arg = 40; + args_info->yspace_arg = 20; + args_info->state_shape_arg = strdup("box"); + args_info->state_color_arg = strdup("white"); + args_info->final_color_arg = strdup("lightgrey"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->ilabels_given = 0; + args_info->olabels_given = 0; + args_info->slabels_given = 0; + args_info->title_given = 0; + args_info->vertical_given = 0; + args_info->output_given = 0; + args_info->dot_given = 0; + args_info->width_given = 0; + args_info->height_given = 0; + args_info->fontsize_given = 0; + args_info->font_given = 0; + args_info->nodesep_given = 0; + args_info->ranksep_given = 0; + args_info->portrait_given = 0; + args_info->vcg_given = 0; + args_info->xspace_given = 0; + args_info->yspace_given = 0; + args_info->state_shape_given = 0; + args_info->state_color_given = 0; + args_info->final_color_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "ilabels", 1, NULL, 'i' }, + { "olabels", 1, NULL, 'o' }, + { "slabels", 1, NULL, 's' }, + { "title", 1, NULL, 't' }, + { "vertical", 0, NULL, 'v' }, + { "output", 1, NULL, 'F' }, + { "dot", 0, NULL, 'd' }, + { "width", 1, NULL, 'W' }, + { "height", 1, NULL, 'H' }, + { "fontsize", 1, NULL, 'f' }, + { "font", 1, NULL, 0 }, + { "nodesep", 1, NULL, 'n' }, + { "ranksep", 1, NULL, 'r' }, + { "portrait", 0, NULL, 'p' }, + { "vcg", 0, NULL, 'g' }, + { "xspace", 1, NULL, 'x' }, + { "yspace", 1, NULL, 'y' }, + { "state-shape", 1, NULL, 0 }, + { "state-color", 1, NULL, 0 }, + { "final-color", 1, NULL, 0 }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'i', ':', + 'o', ':', + 's', ':', + 't', ':', + 'v', + 'F', ':', + 'd', + 'W', ':', + 'H', ':', + 'f', ':', + 'n', ':', + 'r', ':', + 'p', + 'g', + 'x', ':', + 'y', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'i': /* Specify input (lower) labels file. */ + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + break; + + case 'o': /* Specify output (upper) labels file. */ + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + break; + + case 's': /* Specify state labels file. */ + if (args_info->slabels_given) { + fprintf(stderr, "%s: `--slabels' (`-s') option given more than once\n", PROGRAM); + } + args_info->slabels_given++; + if (args_info->slabels_arg) free(args_info->slabels_arg); + args_info->slabels_arg = strdup(val); + break; + + case 't': /* Specify output title. */ + if (args_info->title_given) { + fprintf(stderr, "%s: `--title' (`-t') option given more than once\n", PROGRAM); + } + args_info->title_given++; + if (args_info->title_arg) free(args_info->title_arg); + args_info->title_arg = strdup(val); + break; + + case 'v': /* Draw vertically. */ + if (args_info->vertical_given) { + fprintf(stderr, "%s: `--vertical' (`-v') option given more than once\n", PROGRAM); + } + args_info->vertical_given++; + if (args_info->vertical_given <= 1) + args_info->vertical_flag = !(args_info->vertical_flag); + break; + + case 'F': /* Output graph specification. */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 'd': /* Request dot output (default) */ + if (args_info->dot_given) { + fprintf(stderr, "%s: `--dot' (`-d') option given more than once\n", PROGRAM); + } + args_info->dot_given++; + if (args_info->dot_given <= 1) + args_info->dot_flag = !(args_info->dot_flag); + break; + + case 'W': /* Specify output width (default=bbox). */ + if (args_info->width_given) { + fprintf(stderr, "%s: `--width' (`-W') option given more than once\n", PROGRAM); + } + args_info->width_given++; + args_info->width_arg = (float)strtod(val, NULL); + break; + + case 'H': /* Specify output height (default=bbox). */ + if (args_info->height_given) { + fprintf(stderr, "%s: `--height' (`-H') option given more than once\n", PROGRAM); + } + args_info->height_given++; + args_info->height_arg = (float)strtod(val, NULL); + break; + + case 'f': /* Set output font size. */ + if (args_info->fontsize_given) { + fprintf(stderr, "%s: `--fontsize' (`-f') option given more than once\n", PROGRAM); + } + args_info->fontsize_given++; + args_info->fontsize_arg = (int)atoi(val); + break; + + case 'n': /* Set min node separation. */ + if (args_info->nodesep_given) { + fprintf(stderr, "%s: `--nodesep' (`-n') option given more than once\n", PROGRAM); + } + args_info->nodesep_given++; + args_info->nodesep_arg = (float)strtod(val, NULL); + break; + + case 'r': /* Set min rank separation. */ + if (args_info->ranksep_given) { + fprintf(stderr, "%s: `--ranksep' (`-r') option given more than once\n", PROGRAM); + } + args_info->ranksep_given++; + args_info->ranksep_arg = (float)strtod(val, NULL); + break; + + case 'p': /* Generate in portrait mode. */ + if (args_info->portrait_given) { + fprintf(stderr, "%s: `--portrait' (`-p') option given more than once\n", PROGRAM); + } + args_info->portrait_given++; + if (args_info->portrait_given <= 1) + args_info->portrait_flag = !(args_info->portrait_flag); + break; + + case 'g': /* Request VCG output. */ + if (args_info->vcg_given) { + fprintf(stderr, "%s: `--vcg' (`-g') option given more than once\n", PROGRAM); + } + args_info->vcg_given++; + if (args_info->vcg_given <= 1) + args_info->vcg_flag = !(args_info->vcg_flag); + break; + + case 'x': /* Set xspace graph parameter. */ + if (args_info->xspace_given) { + fprintf(stderr, "%s: `--xspace' (`-x') option given more than once\n", PROGRAM); + } + args_info->xspace_given++; + args_info->xspace_arg = (int)atoi(val); + break; + + case 'y': /* Set yspace graph parameter. */ + if (args_info->yspace_given) { + fprintf(stderr, "%s: `--yspace' (`-y') option given more than once\n", PROGRAM); + } + args_info->yspace_given++; + args_info->yspace_arg = (int)atoi(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify input (lower) labels file. */ + else if (strcmp(olong, "ilabels") == 0) { + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + } + + /* Specify output (upper) labels file. */ + else if (strcmp(olong, "olabels") == 0) { + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + } + + /* Specify state labels file. */ + else if (strcmp(olong, "slabels") == 0) { + if (args_info->slabels_given) { + fprintf(stderr, "%s: `--slabels' (`-s') option given more than once\n", PROGRAM); + } + args_info->slabels_given++; + if (args_info->slabels_arg) free(args_info->slabels_arg); + args_info->slabels_arg = strdup(val); + } + + /* Specify output title. */ + else if (strcmp(olong, "title") == 0) { + if (args_info->title_given) { + fprintf(stderr, "%s: `--title' (`-t') option given more than once\n", PROGRAM); + } + args_info->title_given++; + if (args_info->title_arg) free(args_info->title_arg); + args_info->title_arg = strdup(val); + } + + /* Draw vertically. */ + else if (strcmp(olong, "vertical") == 0) { + if (args_info->vertical_given) { + fprintf(stderr, "%s: `--vertical' (`-v') option given more than once\n", PROGRAM); + } + args_info->vertical_given++; + if (args_info->vertical_given <= 1) + args_info->vertical_flag = !(args_info->vertical_flag); + } + + /* Output graph specification. */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + /* Request dot output (default) */ + else if (strcmp(olong, "dot") == 0) { + if (args_info->dot_given) { + fprintf(stderr, "%s: `--dot' (`-d') option given more than once\n", PROGRAM); + } + args_info->dot_given++; + if (args_info->dot_given <= 1) + args_info->dot_flag = !(args_info->dot_flag); + } + + /* Specify output width (default=bbox). */ + else if (strcmp(olong, "width") == 0) { + if (args_info->width_given) { + fprintf(stderr, "%s: `--width' (`-W') option given more than once\n", PROGRAM); + } + args_info->width_given++; + args_info->width_arg = (float)strtod(val, NULL); + } + + /* Specify output height (default=bbox). */ + else if (strcmp(olong, "height") == 0) { + if (args_info->height_given) { + fprintf(stderr, "%s: `--height' (`-H') option given more than once\n", PROGRAM); + } + args_info->height_given++; + args_info->height_arg = (float)strtod(val, NULL); + } + + /* Set output font size. */ + else if (strcmp(olong, "fontsize") == 0) { + if (args_info->fontsize_given) { + fprintf(stderr, "%s: `--fontsize' (`-f') option given more than once\n", PROGRAM); + } + args_info->fontsize_given++; + args_info->fontsize_arg = (int)atoi(val); + } + + /* Set output font name. */ + else if (strcmp(olong, "font") == 0) { + if (args_info->font_given) { + fprintf(stderr, "%s: `--font' option given more than once\n", PROGRAM); + } + args_info->font_given++; + if (args_info->font_arg) free(args_info->font_arg); + args_info->font_arg = strdup(val); + } + + /* Set min node separation. */ + else if (strcmp(olong, "nodesep") == 0) { + if (args_info->nodesep_given) { + fprintf(stderr, "%s: `--nodesep' (`-n') option given more than once\n", PROGRAM); + } + args_info->nodesep_given++; + args_info->nodesep_arg = (float)strtod(val, NULL); + } + + /* Set min rank separation. */ + else if (strcmp(olong, "ranksep") == 0) { + if (args_info->ranksep_given) { + fprintf(stderr, "%s: `--ranksep' (`-r') option given more than once\n", PROGRAM); + } + args_info->ranksep_given++; + args_info->ranksep_arg = (float)strtod(val, NULL); + } + + /* Generate in portrait mode. */ + else if (strcmp(olong, "portrait") == 0) { + if (args_info->portrait_given) { + fprintf(stderr, "%s: `--portrait' (`-p') option given more than once\n", PROGRAM); + } + args_info->portrait_given++; + if (args_info->portrait_given <= 1) + args_info->portrait_flag = !(args_info->portrait_flag); + } + + /* Request VCG output. */ + else if (strcmp(olong, "vcg") == 0) { + if (args_info->vcg_given) { + fprintf(stderr, "%s: `--vcg' (`-g') option given more than once\n", PROGRAM); + } + args_info->vcg_given++; + if (args_info->vcg_given <= 1) + args_info->vcg_flag = !(args_info->vcg_flag); + } + + /* Set xspace graph parameter. */ + else if (strcmp(olong, "xspace") == 0) { + if (args_info->xspace_given) { + fprintf(stderr, "%s: `--xspace' (`-x') option given more than once\n", PROGRAM); + } + args_info->xspace_given++; + args_info->xspace_arg = (int)atoi(val); + } + + /* Set yspace graph parameter. */ + else if (strcmp(olong, "yspace") == 0) { + if (args_info->yspace_given) { + fprintf(stderr, "%s: `--yspace' (`-y') option given more than once\n", PROGRAM); + } + args_info->yspace_given++; + args_info->yspace_arg = (int)atoi(val); + } + + /* Set shape for state nodes. */ + else if (strcmp(olong, "state-shape") == 0) { + if (args_info->state_shape_given) { + fprintf(stderr, "%s: `--state-shape' option given more than once\n", PROGRAM); + } + args_info->state_shape_given++; + if (args_info->state_shape_arg) free(args_info->state_shape_arg); + args_info->state_shape_arg = strdup(val); + } + + /* Set default state color. */ + else if (strcmp(olong, "state-color") == 0) { + if (args_info->state_color_given) { + fprintf(stderr, "%s: `--state-color' option given more than once\n", PROGRAM); + } + args_info->state_color_given++; + if (args_info->state_color_arg) free(args_info->state_color_arg); + args_info->state_color_arg = strdup(val); + } + + /* Set color for final states. */ + else if (strcmp(olong, "final-color") == 0) { + if (args_info->final_color_given) { + fprintf(stderr, "%s: `--final-color' option given more than once\n", PROGRAM); + } + args_info->final_color_given++; + if (args_info->final_color_arg) free(args_info->final_color_arg); + args_info->final_color_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmdraw_cmdparser.h b/gfsm/gfsm/src/programs/gfsmdraw_cmdparser.h new file mode 100644 index 0000000..b494e48 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdraw_cmdparser.h @@ -0,0 +1,99 @@ +/* -*- Mode: C -*- + * + * File: gfsmdraw_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmdraw_cmdparser_h +#define gfsmdraw_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * ilabels_arg; /* Specify input (lower) labels file. (default=NULL). */ + char * olabels_arg; /* Specify output (upper) labels file. (default=NULL). */ + char * slabels_arg; /* Specify state labels file. (default=NULL). */ + char * title_arg; /* Specify output title. (default=NULL). */ + int vertical_flag; /* Draw vertically. (default=0). */ + char * output_arg; /* Output graph specification. (default=NULL). */ + int dot_flag; /* Request dot output (default) (default=0). */ + float width_arg; /* Specify output width (default=bbox). (default=8.5). */ + float height_arg; /* Specify output height (default=bbox). (default=11). */ + int fontsize_arg; /* Set output font size. (default=14). */ + char * font_arg; /* Set output font name. (default=NULL). */ + float nodesep_arg; /* Set min node separation. (default=0.25). */ + float ranksep_arg; /* Set min rank separation. (default=0.40). */ + int portrait_flag; /* Generate in portrait mode. (default=0). */ + int vcg_flag; /* Request VCG output. (default=0). */ + int xspace_arg; /* Set xspace graph parameter. (default=40). */ + int yspace_arg; /* Set yspace graph parameter. (default=20). */ + char * state_shape_arg; /* Set shape for state nodes. (default=box). */ + char * state_color_arg; /* Set default state color. (default=white). */ + char * final_color_arg; /* Set color for final states. (default=lightgrey). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int ilabels_given; /* Whether ilabels was given */ + int olabels_given; /* Whether olabels was given */ + int slabels_given; /* Whether slabels was given */ + int title_given; /* Whether title was given */ + int vertical_given; /* Whether vertical was given */ + int output_given; /* Whether output was given */ + int dot_given; /* Whether dot was given */ + int width_given; /* Whether width was given */ + int height_given; /* Whether height was given */ + int fontsize_given; /* Whether fontsize was given */ + int font_given; /* Whether font was given */ + int nodesep_given; /* Whether nodesep was given */ + int ranksep_given; /* Whether ranksep was given */ + int portrait_given; /* Whether portrait was given */ + int vcg_given; /* Whether vcg was given */ + int xspace_given; /* Whether xspace was given */ + int yspace_given; /* Whether yspace was given */ + int state_shape_given; /* Whether state-shape was given */ + int state_color_given; /* Whether state-color was given */ + int final_color_given; /* Whether final-color was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmdraw_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmdraw_main.c b/gfsm/gfsm/src/programs/gfsmdraw_main.c new file mode 100644 index 0000000..7748419 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmdraw_main.c @@ -0,0 +1,166 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmdraw_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmdraw"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; +gfsmAlphabet *ilabels=NULL, *olabels=NULL, *slabels=NULL; +gfsmError *err = NULL; + +typedef enum _gfsmDrawMode { + gfsmDMNone, + gfsmDMDot, + gfsmDMVCG +} gfsmDrawMode; +gfsmDrawMode mode = gfsmDMDot; //-- default mode + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + if (args.output_given) outfilename = args.output_arg; + + //-- labels: input + if (args.ilabels_given) { + ilabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(ilabels,args.ilabels_arg,&err)) { + g_printerr("%s: load failed for input-labels file '%s': %s\n", + progname, args.ilabels_arg, (err ? err->message : "?")); + exit(2); + } + } + //-- labels: output + if (args.olabels_given) { + olabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(olabels,args.olabels_arg,&err)) { + g_printerr("%s: load failed for output-labels file '%s': %s\n", + progname, args.olabels_arg, (err ? err->message : "?")); + exit(2); + } + } + //-- labels: state + if (args.slabels_given) { + slabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(slabels,args.slabels_arg,&err)) { + g_printerr("%s: load failed for state-labels file '%s': %s\n", + progname, args.slabels_arg, (err ? err->message : "?")); + exit(2); + } + } + + //-- draw mode + if (args.dot_given) mode = gfsmDMDot; + else if (args.vcg_given) mode = gfsmDMVCG; + + + //-- initialize fsm + fsm = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gboolean rc = FALSE; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, (err ? err->message : "?")); + exit(3); + } + + //-- draw automaton + if (mode==gfsmDMDot) + rc = gfsm_automaton_draw_dot_filename_full(fsm, + outfilename, + ilabels, + olabels, + slabels, + (args.title_given ? args.title_arg : infilename), + args.width_arg, + args.height_arg, + args.fontsize_arg, + args.font_arg, + args.portrait_given, + args.vertical_given, + args.nodesep_arg, + args.ranksep_arg, + &err); + else if (mode==gfsmDMVCG) + rc = gfsm_automaton_draw_vcg_filename_full(fsm, + outfilename, + ilabels, + olabels, + slabels, + (args.title_given ? args.title_arg : infilename), + args.xspace_arg, + args.yspace_arg, + (args.vertical_given ? "top_to_bottom" : "left_to_right"), + args.state_shape_arg, + args.state_color_arg, + args.final_color_arg, + &err); + + if (!rc) { + g_printerr("%s: store failed to '%s': %s\n", + progname, outfilename, (err ? err->message : "?")); + exit(4); + } + + //-- cleanup + if (ilabels) gfsm_alphabet_free(ilabels); + if (olabels) gfsm_alphabet_free(olabels); + if (slabels) gfsm_alphabet_free(slabels); + gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmheader.gog b/gfsm/gfsm/src/programs/gfsmheader.gog new file mode 100644 index 0000000..c3555ee --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmheader.gog @@ -0,0 +1,57 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmheader" +#program_version "0.01" + +purpose "Show header information from stored gfsm files" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmheader_cmdparser.c b/gfsm/gfsm/src/programs/gfsmheader_cmdparser.c new file mode 100644 index 0000000..58f982f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmheader_cmdparser.c @@ -0,0 +1,403 @@ +/* -*- Mode: C -*- + * + * File: gfsmheader_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmheader_cmdparser gfsmheader.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmheader" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmheader_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmheader (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Show header information from stored gfsm files\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmheader"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmheader_cmdparser.h b/gfsm/gfsm/src/programs/gfsmheader_cmdparser.h new file mode 100644 index 0000000..b9e0baa --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmheader_cmdparser.h @@ -0,0 +1,60 @@ +/* -*- Mode: C -*- + * + * File: gfsmheader_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmheader_cmdparser_h +#define gfsmheader_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmheader_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmheader_main.c b/gfsm/gfsm/src/programs/gfsmheader_main.c new file mode 100644 index 0000000..5ba0a87 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmheader_main.c @@ -0,0 +1,129 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmheader_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmheader"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; + +//-- global structs +gfsmAutomatonHeader hdr; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + + //-- initialize header + memset(&hdr, 0, sizeof(gfsmAutomatonHeader)); +} + +/*-------------------------------------------------------------------------- + * Utilities + *--------------------------------------------------------------------------*/ +#define bool2char(b) (b ? 'y' : 'n') + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + gfsmIOHandle *ioh = NULL; + GString *modestr = NULL; + + GFSM_INIT + + get_my_options(argc,argv); + + //-- open file + if (!(ioh = gfsmio_new_filename(infilename,"rb",-1,&err)) || err) { + g_printerr("%s: open failed for '%s': %s\n", progname, infilename, err->message); + exit(2); + } + + //-- read header + if (!gfsmio_read(ioh, &hdr, sizeof(gfsmAutomatonHeader))) { + g_printerr("%s: failed to read header!\n", progname); + exit(3); + } + gfsmio_close(ioh); + + //-- print header information + printf("%-24s: %s\n", "Filename", infilename); + printf("%-24s: %s\n", "magic", hdr.magic); + printf("%-24s: %d.%d.%d\n", "version", + hdr.version.major, hdr.version.minor, hdr.version.micro); + printf("%-24s: %d.%d.%d\n", "version_min", + hdr.version_min.major, hdr.version_min.minor, hdr.version_min.micro); + + printf("%-24s: %d\n", "flags.is_transducer", hdr.flags.is_transducer); + printf("%-24s: %d\n", "flags.is_weighted", hdr.flags.is_transducer); +#if 0 + printf("%-24s: %d (%s)\n", "flags.sort_mode", + hdr.flags.sort_mode, gfsm_arc_sortmode_to_name(hdr.flags.sort_mode)); +#else + modestr = gfsm_acmask_to_gstring(hdr.flags.sort_mode, modestr); + printf("%-24s: %d (%s)\n", "flags.sort_mode", hdr.flags.sort_mode, modestr->str); + g_string_free(modestr,TRUE); +#endif + printf("%-24s: %d\n", "flags.is_deterministic", hdr.flags.is_deterministic); + printf("%-24s: %d\n", "flags.unused", hdr.flags.unused); + + printf("%-24s: %u\n", "root_id", hdr.root_id); + printf("%-24s: %u\n", "n_states", hdr.n_states); + printf("%-24s: %u\n", "n_arcs", hdr.n_arcs_007); + printf("%-24s: %u (%s)\n", "srtype", hdr.srtype, gfsm_sr_type_to_name(hdr.srtype)); + + printf("%-24s: %u\n", "unused1", hdr.unused1); + printf("%-24s: %u\n", "unused2", hdr.unused2); + printf("%-24s: %u\n", "unused3", hdr.unused3); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmindex.gog b/gfsm/gfsm/src/programs/gfsmindex.gog new file mode 100644 index 0000000..53c2a14 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmindex.gog @@ -0,0 +1,75 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmindex" +#program_version "0.01" + +purpose "Convert between indexed and un-indexed binary gfsm automata" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "FSMFILE" "Stored binary gfsm automaton file or index" \ + details=" +If unspecified, standard input will be read. +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +flag "unindex" u "Convert indexed automaton to unindexed format" \ + details=" +Default behavior is to convert a plain unindexed automaton to an indexed automaton. +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmindex_cmdparser.c b/gfsm/gfsm/src/programs/gfsmindex_cmdparser.c new file mode 100644 index 0000000..8de1602 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmindex_cmdparser.c @@ -0,0 +1,473 @@ +/* -*- Mode: C -*- + * + * File: gfsmindex_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.06 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmindex_cmdparser gfsmindex.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmindex" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmindex_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmindex (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Convert between indexed and un-indexed binary gfsm automata\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... FSMFILE\n", "gfsmindex"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" FSMFILE Stored binary gfsm automaton file or index\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -u --unindex Convert indexed automaton to unindexed format\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->unindex_flag = 0; + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->unindex_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "unindex", 0, NULL, 'u' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'u', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'u': /* Convert indexed automaton to unindexed format */ + if (args_info->unindex_given) { + fprintf(stderr, "%s: `--unindex' (`-u') option given more than once\n", PROGRAM); + } + args_info->unindex_given++; + if (args_info->unindex_given <= 1) + args_info->unindex_flag = !(args_info->unindex_flag); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Convert indexed automaton to unindexed format */ + else if (strcmp(olong, "unindex") == 0) { + if (args_info->unindex_given) { + fprintf(stderr, "%s: `--unindex' (`-u') option given more than once\n", PROGRAM); + } + args_info->unindex_given++; + if (args_info->unindex_given <= 1) + args_info->unindex_flag = !(args_info->unindex_flag); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmindex_cmdparser.h b/gfsm/gfsm/src/programs/gfsmindex_cmdparser.h new file mode 100644 index 0000000..7dbcfb6 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmindex_cmdparser.h @@ -0,0 +1,65 @@ +/* -*- Mode: C -*- + * + * File: gfsmindex_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.06. + * + */ + +#ifndef gfsmindex_cmdparser_h +#define gfsmindex_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int unindex_flag; /* Convert indexed automaton to unindexed format (default=0). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int unindex_given; /* Whether unindex was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmindex_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmindex_main.c b/gfsm/gfsm/src/programs/gfsmindex_main.c new file mode 100644 index 0000000..76adf26 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmindex_main.c @@ -0,0 +1,124 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2007 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <gfsm.h> + +#include "gfsmindex_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmindex"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm=NULL; +gfsmIndexedAutomaton *xfsm=NULL; +gfsmError *err=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + outfilename = args.output_arg; +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + GFSM_INIT + get_my_options(argc,argv); + + //-- dispatch + if (args.unindex_given) { + //-- convert indexed --> vanilla + + //-- load index + xfsm = gfsm_indexed_automaton_new(); + if (!gfsm_indexed_automaton_load_bin_filename(xfsm,infilename,&err)) { + g_printerr("%s: load failed for indexed automaton from '%s': %s\n", progname, infilename, + (err ? err->message : "?")); + exit(3); + } + + //-- unindex + fsm = gfsm_indexed_to_automaton(xfsm,NULL); + + //-- store vanilla + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed for vanilla automaton to '%s': %s\n", progname, outfilename, + (err ? err->message : "?")); + exit(4); + } + } + else { + //-- convert vanilla --> indexed + + //-- load vanilla + fsm = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for vanilla automaton from '%s': %s\n", progname, infilename, + (err ? err->message : "?")); + exit(3); + } + + //-- index & sort + xfsm = gfsm_automaton_to_indexed(fsm,NULL); + //gfsm_indexed_automaton_sort(xfsm, gfsm_acmask_from_args(gfsmACLower,gfsmACWeight)); //-- TODO: make these options! + + //-- store indexed + if (!gfsm_indexed_automaton_save_bin_filename(xfsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed for indexed automaton to '%s': %s\n", progname, outfilename, + (err ? err->message : "?")); + exit(4); + } + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + if (xfsm) gfsm_indexed_automaton_free(xfsm); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsminfo.gog b/gfsm/gfsm/src/programs/gfsminfo.gog new file mode 100644 index 0000000..22ddbec --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminfo.gog @@ -0,0 +1,57 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsminfo" +#program_version "0.01" + +purpose "Show basic information about stored gfsm files" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsminfo_cmdparser.c b/gfsm/gfsm/src/programs/gfsminfo_cmdparser.c new file mode 100644 index 0000000..79e978f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminfo_cmdparser.c @@ -0,0 +1,403 @@ +/* -*- Mode: C -*- + * + * File: gfsminfo_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsminfo_cmdparser gfsminfo.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsminfo" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsminfo_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsminfo (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Show basic information about stored gfsm files\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsminfo"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsminfo_cmdparser.h b/gfsm/gfsm/src/programs/gfsminfo_cmdparser.h new file mode 100644 index 0000000..8aa6727 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminfo_cmdparser.h @@ -0,0 +1,60 @@ +/* -*- Mode: C -*- + * + * File: gfsminfo_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsminfo_cmdparser_h +#define gfsminfo_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsminfo_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsminfo_main.c b/gfsm/gfsm/src/programs/gfsminfo_main.c new file mode 100644 index 0000000..7c6377d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminfo_main.c @@ -0,0 +1,123 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsminfo_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsminfo"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + + //-- initialize fsm + fsm = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * Utilities + *--------------------------------------------------------------------------*/ +#define bool2char(b) (b ? 'y' : 'n') + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + GString *modestr = NULL; + + GFSM_INIT + + get_my_options(argc,argv); + guint n_eps_i, n_eps_o, n_eps_io; + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(2); + } + + //-- print information + printf("%-24s: %s\n", "Filename", infilename); + printf("%-24s: %s\n", "Semiring", gfsm_sr_type_to_name(fsm->sr->type)); + printf("%-24s: %c\n", "Transducer?", bool2char(gfsm_automaton_is_transducer(fsm))); + printf("%-24s: %c\n", "Weighted?", bool2char(gfsm_automaton_is_weighted(fsm))); + printf("%-24s: %c\n", "Deterministic?", bool2char(fsm->flags.is_deterministic)); +#if 0 + printf("%-24s: %s\n", "Sort Mode", gfsm_arc_sortmode_to_name(gfsm_automaton_sortmode(fsm))); +#else + modestr = gfsm_acmask_to_gstring(fsm->flags.sort_mode, modestr); + printf("%-24s: %s\n", "Sort Mode", modestr->str); + g_string_free(modestr,TRUE); +#endif + if (fsm->root_id != gfsmNoState) { + printf("%-24s: %u\n", "Initial state", fsm->root_id); + } else { + printf("%-24s: %s\n", "Initial state", "none"); + } + printf("%-24s: %u\n", "# of states", gfsm_automaton_n_states(fsm)); + printf("%-24s: %u\n", "# of final states", gfsm_automaton_n_final_states(fsm)); + printf("%-24s: %u\n", "# of arcs", gfsm_automaton_n_arcs_full(fsm, &n_eps_i, &n_eps_o, &n_eps_io)); + printf("%-24s: %u\n", "# of i/o epsilon arcs", n_eps_io); + printf("%-24s: %u\n", "# of input epsilon arcs", n_eps_i); + printf("%-24s: %u\n", "# of output epsilon arcs", n_eps_o); + + printf("%-24s: %c\n", "cyclic?", bool2char(gfsm_automaton_is_cyclic(fsm))); + //... + + //-- cleanup + gfsm_automaton_free(fsm); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmintersect.gog b/gfsm/gfsm/src/programs/gfsmintersect.gog new file mode 100644 index 0000000..9ce5f79 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmintersect.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmintersect" +#program_version "0.01" + +purpose "Compute intersection of finite state acceptors" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE(s)" "Stored binary gfsm file(s)" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +Probably many. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmintersect_cmdparser.c b/gfsm/gfsm/src/programs/gfsmintersect_cmdparser.c new file mode 100644 index 0000000..28b385e --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmintersect_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmintersect_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmintersect_cmdparser gfsmintersect.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmintersect" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmintersect_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmintersect (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compute intersection of finite state acceptors\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE(s)\n", "gfsmintersect"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE(s) Stored binary gfsm file(s)\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmintersect_cmdparser.h b/gfsm/gfsm/src/programs/gfsmintersect_cmdparser.h new file mode 100644 index 0000000..4ff1144 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmintersect_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmintersect_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmintersect_cmdparser_h +#define gfsmintersect_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmintersect_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmintersect_main.c b/gfsm/gfsm/src/programs/gfsmintersect_main.c new file mode 100644 index 0000000..4fd05ff --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmintersect_main.c @@ -0,0 +1,123 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmintersect_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmintersect"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsmOut=NULL, *fsmIn=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- require at least one file argument + if (args.inputs_num < 1) { + cmdline_parser_print_help(); + exit(2); + } + + //-- output + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsmIn = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * compute_operation() + * + utility routine + */ +void compute_operation(const char *infilename) +{ + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsmIn,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- compute underlying FSM operation + if (fsmOut == NULL) { + fsmOut = fsmIn; + fsmIn = gfsm_automaton_new(); + } else { + gfsm_automaton_intersect(fsmOut,fsmIn); + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + int i; + + GFSM_INIT + + get_my_options(argc,argv); + + for (i = 0; i < args.inputs_num; i++) { + compute_operation(args.inputs[i]); + } + if (args.inputs_num == 1) compute_operation("-"); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsmOut,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsmIn) gfsm_automaton_free(fsmIn); + if (fsmOut) gfsm_automaton_free(fsmOut); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsminvert.gog b/gfsm/gfsm/src/programs/gfsminvert.gog new file mode 100644 index 0000000..4bb0f2e --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminvert.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsminvert" +#program_version "0.01" + +purpose "Invert input/output tapes of finite state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsminvert_cmdparser.c b/gfsm/gfsm/src/programs/gfsminvert_cmdparser.c new file mode 100644 index 0000000..04d03c1 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminvert_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsminvert_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsminvert_cmdparser gfsminvert.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsminvert" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsminvert_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsminvert (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Invert input/output tapes of finite state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsminvert"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsminvert_cmdparser.h b/gfsm/gfsm/src/programs/gfsminvert_cmdparser.h new file mode 100644 index 0000000..47b8bad --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminvert_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsminvert_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsminvert_cmdparser_h +#define gfsminvert_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsminvert_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsminvert_main.c b/gfsm/gfsm/src/programs/gfsminvert_main.c new file mode 100644 index 0000000..acb285c --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsminvert_main.c @@ -0,0 +1,95 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsminvert_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsminvert"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- invert + gfsm_automaton_invert(fsm); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmlabels.gog b/gfsm/gfsm/src/programs/gfsmlabels.gog new file mode 100644 index 0000000..56642da --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlabels.gog @@ -0,0 +1,78 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmlabels" +#program_version "0.01" + +purpose "Map input text characters to gfsm labels" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "TXTFILE" "Input text file" \ + details=" +If unspecified, standard input will be read. +" + +#argument "BINFILE" "Output binary gfsm file" \ +# details=" +#If unspecified, output will be written to standard output. +#" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +string "labels" l "Specify input alphabet (labels) file." \ + arg="LABELS" + +flag "att-mode" a "Parse string(s) in AT&T-compatible mode." \ + default="0" + +flag "map-mode" m "Output original strings in addition to label vectors." \ + default="0" + +flag "quiet" q "Suppress warnings about undefined symbols." \ + default="0" + +string "output" o "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmlabels_cmdparser.c b/gfsm/gfsm/src/programs/gfsmlabels_cmdparser.c new file mode 100644 index 0000000..078f4e5 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlabels_cmdparser.c @@ -0,0 +1,523 @@ +/* -*- Mode: C -*- + * + * File: gfsmlabels_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.06 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmlabels_cmdparser gfsmlabels.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmlabels" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmlabels_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmlabels (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Map input text characters to gfsm labels\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... TXTFILE\n", "gfsmlabels"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" TXTFILE Input text file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -lLABELS --labels=LABELS Specify input alphabet (labels) file.\n"); + printf(" -a --att-mode Parse string(s) in AT&T-compatible mode.\n"); + printf(" -m --map-mode Output original strings in addition to label vectors.\n"); + printf(" -q --quiet Suppress warnings about undefined symbols.\n"); + printf(" -oFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->labels_arg = NULL; + args_info->att_mode_flag = 0; + args_info->map_mode_flag = 0; + args_info->quiet_flag = 0; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->labels_given = 0; + args_info->att_mode_given = 0; + args_info->map_mode_given = 0; + args_info->quiet_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "labels", 1, NULL, 'l' }, + { "att-mode", 0, NULL, 'a' }, + { "map-mode", 0, NULL, 'm' }, + { "quiet", 0, NULL, 'q' }, + { "output", 1, NULL, 'o' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'l', ':', + 'a', + 'm', + 'q', + 'o', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'l': /* Specify input alphabet (labels) file. */ + if (args_info->labels_given) { + fprintf(stderr, "%s: `--labels' (`-l') option given more than once\n", PROGRAM); + } + args_info->labels_given++; + if (args_info->labels_arg) free(args_info->labels_arg); + args_info->labels_arg = strdup(val); + break; + + case 'a': /* Parse string(s) in AT&T-compatible mode. */ + if (args_info->att_mode_given) { + fprintf(stderr, "%s: `--att-mode' (`-a') option given more than once\n", PROGRAM); + } + args_info->att_mode_given++; + if (args_info->att_mode_given <= 1) + args_info->att_mode_flag = !(args_info->att_mode_flag); + break; + + case 'm': /* Output original strings in addition to label vectors. */ + if (args_info->map_mode_given) { + fprintf(stderr, "%s: `--map-mode' (`-m') option given more than once\n", PROGRAM); + } + args_info->map_mode_given++; + if (args_info->map_mode_given <= 1) + args_info->map_mode_flag = !(args_info->map_mode_flag); + break; + + case 'q': /* Suppress warnings about undefined symbols. */ + if (args_info->quiet_given) { + fprintf(stderr, "%s: `--quiet' (`-q') option given more than once\n", PROGRAM); + } + args_info->quiet_given++; + if (args_info->quiet_given <= 1) + args_info->quiet_flag = !(args_info->quiet_flag); + break; + + case 'o': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-o') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify input alphabet (labels) file. */ + else if (strcmp(olong, "labels") == 0) { + if (args_info->labels_given) { + fprintf(stderr, "%s: `--labels' (`-l') option given more than once\n", PROGRAM); + } + args_info->labels_given++; + if (args_info->labels_arg) free(args_info->labels_arg); + args_info->labels_arg = strdup(val); + } + + /* Parse string(s) in AT&T-compatible mode. */ + else if (strcmp(olong, "att-mode") == 0) { + if (args_info->att_mode_given) { + fprintf(stderr, "%s: `--att-mode' (`-a') option given more than once\n", PROGRAM); + } + args_info->att_mode_given++; + if (args_info->att_mode_given <= 1) + args_info->att_mode_flag = !(args_info->att_mode_flag); + } + + /* Output original strings in addition to label vectors. */ + else if (strcmp(olong, "map-mode") == 0) { + if (args_info->map_mode_given) { + fprintf(stderr, "%s: `--map-mode' (`-m') option given more than once\n", PROGRAM); + } + args_info->map_mode_given++; + if (args_info->map_mode_given <= 1) + args_info->map_mode_flag = !(args_info->map_mode_flag); + } + + /* Suppress warnings about undefined symbols. */ + else if (strcmp(olong, "quiet") == 0) { + if (args_info->quiet_given) { + fprintf(stderr, "%s: `--quiet' (`-q') option given more than once\n", PROGRAM); + } + args_info->quiet_given++; + if (args_info->quiet_given <= 1) + args_info->quiet_flag = !(args_info->quiet_flag); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-o') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmlabels_cmdparser.h b/gfsm/gfsm/src/programs/gfsmlabels_cmdparser.h new file mode 100644 index 0000000..71f8d95 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlabels_cmdparser.h @@ -0,0 +1,69 @@ +/* -*- Mode: C -*- + * + * File: gfsmlabels_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.06. + * + */ + +#ifndef gfsmlabels_cmdparser_h +#define gfsmlabels_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * labels_arg; /* Specify input alphabet (labels) file. (default=NULL). */ + int att_mode_flag; /* Parse string(s) in AT&T-compatible mode. (default=0). */ + int map_mode_flag; /* Output original strings in addition to label vectors. (default=0). */ + int quiet_flag; /* Suppress warnings about undefined symbols. (default=0). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int labels_given; /* Whether labels was given */ + int att_mode_given; /* Whether att-mode was given */ + int map_mode_given; /* Whether map-mode was given */ + int quiet_given; /* Whether quiet was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmlabels_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmlabels_main.c b/gfsm/gfsm/src/programs/gfsmlabels_main.c new file mode 100644 index 0000000..0e25c78 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlabels_main.c @@ -0,0 +1,208 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005-2008 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +#include <gfsm.h> + +/*-- use gnulib --*/ +#include "gnulib/getdelim.h" + +#include "gfsmlabels_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmlabels"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +FILE *outfile = NULL; + +//-- global structs +gfsmAlphabet *labels=NULL; +gfsmError *err = NULL; +gboolean att_mode = FALSE; +gboolean map_mode = FALSE; +gboolean warn_on_undef = TRUE; + +/* HACK */ +//extern ssize_t getline(char **LINEPTR, size_t *N, FILE *STREAM); + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + outfilename = args.output_arg; + + //-- open output file + if (args.output_given) { + outfilename = args.output_arg; + outfile = gfsm_open_filename(outfilename,"w",&err); + if (!outfile) { + g_printerr("%s: open failed for output file '%s': %s\n", + progname, outfilename, strerror(errno)); + exit(2); + } + } + else { + outfile = stdout; + } + + //-- labels + if (args.labels_given) { + labels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(labels, args.labels_arg, &err)) { + g_printerr("%s: load failed for labels file '%s': %s\n", + progname, args.labels_arg, err->message); + exit(3); + } + } else { + g_printerr("%s: no labels file specified!\n", progname); + exit(3); + } + + //-- mode flags + att_mode = args.att_mode_flag; + map_mode = args.map_mode_flag; + warn_on_undef = !args.quiet_flag; +} + +/*-------------------------------------------------------------------------- + * apply_labels_file() + */ +void apply_labels_file(gfsmAlphabet *labels, FILE *infile, FILE *outfile) +{ + char *str = NULL; + size_t buflen = 0; + ssize_t linelen = 0; + ssize_t i; + gfsmLabelVal lab; + gfsmLabelVector *vec = g_ptr_array_new(); + + while (!feof(infile)) { + linelen = getdelim(&str,&buflen,'\n',infile); + if (linelen<0) { break; } //-- EOF + + //-- truncate terminating newline character + if (str[linelen-1] == '\n') { str[linelen-1] = 0; } + + //-- map mode? + if (map_mode) { fprintf(outfile, "%s\t", str); } + + //-- convert + vec = gfsm_alphabet_generic_string_to_labels(labels,str,vec,warn_on_undef,att_mode); + + //-- dump labels + for (i=0; i<vec->len; i++) { + lab = GPOINTER_TO_UINT(vec->pdata[i]); + if (i>0) { fputc(' ',outfile); } + fprintf(outfile, "%d", lab); + } + fputc('\n', outfile); + } + + if (str) free(str); + if (vec) g_ptr_array_free(vec,TRUE); +} + +void apply_labels_file_0(gfsmAlphabet *labels, FILE *infile, FILE *outfile) +{ + char *str = NULL; + size_t buflen = 0; + ssize_t linelen = 0; + ssize_t i; + gfsmLabelVal lab; + char cs[2] = {'\0', '\0'}; + + while (!feof(infile)) { + /*linelen = getline(&str,&buflen,infile);*/ + linelen = getdelim(&str,&buflen,'\n',infile); + for (i=0; i < linelen; i++) { + if (isspace(str[i])) continue; + cs[0] = str[i]; + lab = gfsm_alphabet_find_label(labels,cs); + + if (lab==gfsmNoLabel) { + g_printerr("%s: Warning: no label for character '%c' -- skipping.\n", + progname, cs[0]); + continue; + } + + fprintf(outfile, "%d ", lab); + } + fputs("\n", outfile); + } + + if (str) free(str); +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + int i; + + GFSM_INIT + get_my_options(argc,argv); + + //-- process input(s) + if (args.inputs_num==0) { + apply_labels_file(labels,stdin,outfile); + } + for (i=0; i < args.inputs_num; i++) { + FILE *infile = (strcmp(args.inputs[i],"-")==0 ? stdin : fopen(args.inputs[i], "r")); + if (!infile) { + g_printerr("%s: load failed for input file '%s': %s\n", progname, args.inputs[i], strerror(errno)); + exit(255); + } + apply_labels_file(labels,infile,outfile); + if (infile != stdin) fclose(infile); + } + + + //-- cleanup + if (labels) gfsm_alphabet_free(labels); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmlookup.gog b/gfsm/gfsm/src/programs/gfsmlookup.gog new file mode 100644 index 0000000..500108f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlookup.gog @@ -0,0 +1,77 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmlookup" +#program_version "0.01" + +purpose "Apply a transducer to a linear label sequence" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "LABELID..." "Input label IDs" \ + details=" +In ASCII decimal notation. +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +string "fst" f "Transducer to apply (default=stdin)." \ + arg="FSTFILE" \ + default="-" \ + details=" +If unspecified, standard input will be read. +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmlookup_cmdparser.c b/gfsm/gfsm/src/programs/gfsmlookup_cmdparser.c new file mode 100644 index 0000000..ae36643 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlookup_cmdparser.c @@ -0,0 +1,473 @@ +/* -*- Mode: C -*- + * + * File: gfsmlookup_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmlookup_cmdparser gfsmlookup.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmlookup" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmlookup_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmlookup (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Apply a transducer to a linear label sequence\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... LABELID...\n", "gfsmlookup"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" LABELID... Input label IDs\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -fFSTFILE --fst=FSTFILE Transducer to apply (default=stdin).\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->fst_arg = strdup("-"); + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->fst_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "fst", 1, NULL, 'f' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'f', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'f': /* Transducer to apply (default=stdin). */ + if (args_info->fst_given) { + fprintf(stderr, "%s: `--fst' (`-f') option given more than once\n", PROGRAM); + } + args_info->fst_given++; + if (args_info->fst_arg) free(args_info->fst_arg); + args_info->fst_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Transducer to apply (default=stdin). */ + else if (strcmp(olong, "fst") == 0) { + if (args_info->fst_given) { + fprintf(stderr, "%s: `--fst' (`-f') option given more than once\n", PROGRAM); + } + args_info->fst_given++; + if (args_info->fst_arg) free(args_info->fst_arg); + args_info->fst_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmlookup_cmdparser.h b/gfsm/gfsm/src/programs/gfsmlookup_cmdparser.h new file mode 100644 index 0000000..3b20b24 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlookup_cmdparser.h @@ -0,0 +1,65 @@ +/* -*- Mode: C -*- + * + * File: gfsmlookup_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmlookup_cmdparser_h +#define gfsmlookup_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * fst_arg; /* Transducer to apply (default=stdin). (default=-). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int fst_given; /* Whether fst was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmlookup_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmlookup_main.c b/gfsm/gfsm/src/programs/gfsmlookup_main.c new file mode 100644 index 0000000..39dd8a2 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmlookup_main.c @@ -0,0 +1,125 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +#include <gfsm.h> + +#include "gfsmlookup_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmlookup"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *fstfilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fst = NULL; +gfsmError *err = NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.fst_given) fstfilename = args.fst_arg; + outfilename = args.output_arg; + + //-- load FST + fst = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(fst, fstfilename, &err)) { + g_printerr("%s: load failed for FST file '%s': %s\n", progname, fstfilename, err->message); + exit(255); + } +} + +/*-------------------------------------------------------------------------- + * lookup_labels() + */ +gfsmAutomaton *lookup_labels(gfsmAutomaton *fst, int argc, char **argv) +{ + gfsmLabelVector *vec = g_ptr_array_sized_new(argc); + char *s=NULL, *tail=NULL; + gfsmLabelVal lab; + gfsmAutomaton *result = NULL; + int i; + + //-- fill input vector + for (i=0; i < argc; i++) { + for (s=argv[i], lab=strtol(s,&tail,0); s != tail; s=tail, lab=strtol(s,&tail,0)) { + g_ptr_array_add(vec, (gpointer)lab); + } + } + + //-- actual lookup + result = gfsm_automaton_lookup(fst, vec, result); + + //-- cleanup + g_ptr_array_free(vec,TRUE); + + return result; +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmAutomaton *result; + + GFSM_INIT + get_my_options(argc,argv); + + //-- process input + result = lookup_labels(fst, args.inputs_num, args.inputs); + + //-- save output + if (!gfsm_automaton_save_bin_filename(result,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fst) gfsm_automaton_free(fst); + if (result) gfsm_automaton_free(result); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmoptional.gog b/gfsm/gfsm/src/programs/gfsmoptional.gog new file mode 100644 index 0000000..3bf6102 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmoptional.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmoptional" +#program_version "0.01" + +purpose "Make a an automaton optional" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmoptional_cmdparser.c b/gfsm/gfsm/src/programs/gfsmoptional_cmdparser.c new file mode 100644 index 0000000..0f90ca4 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmoptional_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmoptional_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmoptional_cmdparser gfsmoptional.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmoptional" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmoptional_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmoptional (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Make a an automaton optional\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmoptional"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmoptional_cmdparser.h b/gfsm/gfsm/src/programs/gfsmoptional_cmdparser.h new file mode 100644 index 0000000..566495e --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmoptional_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmoptional_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmoptional_cmdparser_h +#define gfsmoptional_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmoptional_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmoptional_main.c b/gfsm/gfsm/src/programs/gfsmoptional_main.c new file mode 100644 index 0000000..73c442c --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmoptional_main.c @@ -0,0 +1,101 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmoptional_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmoptional"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + + //-- initialize fsm + fsm = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * Utilities + *--------------------------------------------------------------------------*/ +#define bool2char(b) (b ? 'y' : 'n') + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + + GFSM_INIT + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(2); + } + + //-- make optional + gfsm_automaton_optional(fsm); + + //-- store automaton + if (!gfsm_automaton_save_bin_filename(fsm,args.output_arg,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, args.output_arg, err->message); + exit(4); + } + + //-- cleanup + gfsm_automaton_free(fsm); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmprint.gog b/gfsm/gfsm/src/programs/gfsmprint.gog new file mode 100644 index 0000000..4a01ef2 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmprint.gog @@ -0,0 +1,80 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmprint" +#program_version "0.01" + +purpose "Convert binary format gfsm files to text" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +argument "TXTFILE" "Output text file" \ + details=" +If unspecified, output will be written to standard output. +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +string "ilabels" i "Specify input (lower) labels file." \ + arg="LABELS" + +string "olabels" o "Specify output (upper) labels file." \ + arg="LABELS" + +string "slabels" s "Specify state labels file." \ + arg="LABELS" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="0" \ + details=" +Specify zlib compression level of output file. -1 indicates +the library default compression level, 0 (default) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmprint_cmdparser.c b/gfsm/gfsm/src/programs/gfsmprint_cmdparser.c new file mode 100644 index 0000000..253eb1d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmprint_cmdparser.c @@ -0,0 +1,498 @@ +/* -*- Mode: C -*- + * + * File: gfsmprint_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmprint_cmdparser gfsmprint.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmprint" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmprint_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmprint (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Convert binary format gfsm files to text\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE TXTFILE\n", "gfsmprint"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + printf(" TXTFILE Output text file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -iLABELS --ilabels=LABELS Specify input (lower) labels file.\n"); + printf(" -oLABELS --olabels=LABELS Specify output (upper) labels file.\n"); + printf(" -sLABELS --slabels=LABELS Specify state labels file.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->ilabels_arg = NULL; + args_info->olabels_arg = NULL; + args_info->slabels_arg = NULL; + args_info->compress_arg = 0; +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->ilabels_given = 0; + args_info->olabels_given = 0; + args_info->slabels_given = 0; + args_info->compress_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "ilabels", 1, NULL, 'i' }, + { "olabels", 1, NULL, 'o' }, + { "slabels", 1, NULL, 's' }, + { "compress", 1, NULL, 'z' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'i', ':', + 'o', ':', + 's', ':', + 'z', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'i': /* Specify input (lower) labels file. */ + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + break; + + case 'o': /* Specify output (upper) labels file. */ + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + break; + + case 's': /* Specify state labels file. */ + if (args_info->slabels_given) { + fprintf(stderr, "%s: `--slabels' (`-s') option given more than once\n", PROGRAM); + } + args_info->slabels_given++; + if (args_info->slabels_arg) free(args_info->slabels_arg); + args_info->slabels_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify input (lower) labels file. */ + else if (strcmp(olong, "ilabels") == 0) { + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + } + + /* Specify output (upper) labels file. */ + else if (strcmp(olong, "olabels") == 0) { + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + } + + /* Specify state labels file. */ + else if (strcmp(olong, "slabels") == 0) { + if (args_info->slabels_given) { + fprintf(stderr, "%s: `--slabels' (`-s') option given more than once\n", PROGRAM); + } + args_info->slabels_given++; + if (args_info->slabels_arg) free(args_info->slabels_arg); + args_info->slabels_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmprint_cmdparser.h b/gfsm/gfsm/src/programs/gfsmprint_cmdparser.h new file mode 100644 index 0000000..0b06787 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmprint_cmdparser.h @@ -0,0 +1,67 @@ +/* -*- Mode: C -*- + * + * File: gfsmprint_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmprint_cmdparser_h +#define gfsmprint_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * ilabels_arg; /* Specify input (lower) labels file. (default=NULL). */ + char * olabels_arg; /* Specify output (upper) labels file. (default=NULL). */ + char * slabels_arg; /* Specify state labels file. (default=NULL). */ + int compress_arg; /* Specify compression level of output file. (default=0). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int ilabels_given; /* Whether ilabels was given */ + int olabels_given; /* Whether olabels was given */ + int slabels_given; /* Whether slabels was given */ + int compress_given; /* Whether compress was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmprint_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmprint_main.c b/gfsm/gfsm/src/programs/gfsmprint_main.c new file mode 100644 index 0000000..fe6d7f3 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmprint_main.c @@ -0,0 +1,125 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmprint_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmprint"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; +gfsmAlphabet *ilabels=NULL, *olabels=NULL, *slabels=NULL; +gfsmError *err = NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + if (args.inputs_num > 1) outfilename = args.inputs[1]; + + //-- labels: input + if (args.ilabels_given) { + ilabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(ilabels,args.ilabels_arg,&err)) { + g_printerr("%s: load failed for input-labels file '%s': %s\n", + progname, args.ilabels_arg, err->message); + exit(2); + } + } + //-- labels: output + if (args.olabels_given) { + olabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(olabels,args.olabels_arg,&err)) { + g_printerr("%s: load failed for output-labels file '%s': %s\n", + progname, args.olabels_arg, err->message); + exit(2); + } + } + //-- labels: state + if (args.slabels_given) { + slabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(slabels,args.slabels_arg,&err)) { + g_printerr("%s: load failed for state-labels file '%s': %s\n", + progname, args.slabels_arg, err->message); + exit(2); + } + } + + //-- initialize fsm + fsm = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + GFSM_INIT + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(3); + } + + //-- print automaton + if (!gfsm_automaton_print_filename_full(fsm,outfilename,ilabels,olabels,slabels,args.compress_arg,&err)) { + g_printerr("%s: print failed to '%s': %s\n", progname, outfilename, err->message); + exit(3); + } + + //-- cleanup + if (ilabels) gfsm_alphabet_free(ilabels); + if (olabels) gfsm_alphabet_free(olabels); + if (slabels) gfsm_alphabet_free(slabels); + gfsm_automaton_free(fsm); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmproduct.gog b/gfsm/gfsm/src/programs/gfsmproduct.gog new file mode 100644 index 0000000..120c823 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproduct.gog @@ -0,0 +1,78 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmproduct" +#program_version "0.01" + +purpose "Compute Cartesian product of finite state acceptors" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "LOWER" "Stored binary gfsm file" \ + required=1 \ + details=" +FSM accepting the lower side of the output FST. +Required argument. +" + +argument "UPPER" "Stored binary gfsm file" \ + details=" +FSM accepting the upper side of the output FST. +If unspecified, standard input will be read. +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmproduct_cmdparser.c b/gfsm/gfsm/src/programs/gfsmproduct_cmdparser.c new file mode 100644 index 0000000..27fd759 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproduct_cmdparser.c @@ -0,0 +1,450 @@ +/* -*- Mode: C -*- + * + * File: gfsmproduct_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmproduct_cmdparser gfsmproduct.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmproduct" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmproduct_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmproduct (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compute Cartesian product of finite state acceptors\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... LOWER UPPER\n", "gfsmproduct"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" LOWER Stored binary gfsm file\n"); + printf(" UPPER Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmproduct_cmdparser.h b/gfsm/gfsm/src/programs/gfsmproduct_cmdparser.h new file mode 100644 index 0000000..527630e --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproduct_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmproduct_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmproduct_cmdparser_h +#define gfsmproduct_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmproduct_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmproduct_main.c b/gfsm/gfsm/src/programs/gfsmproduct_main.c new file mode 100644 index 0000000..446e875 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproduct_main.c @@ -0,0 +1,123 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmproduct_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmproduct"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsmOut=NULL, *fsmIn=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- require at least one file argument + if (args.inputs_num < 1) { + cmdline_parser_print_help(); + exit(2); + } + + //-- output + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsmIn = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * compute_operation() + * + utility routine + */ +void compute_operation(const char *infilename) +{ + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsmIn,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- compute product + if (fsmOut == NULL) { + fsmOut = fsmIn; + fsmIn = gfsm_automaton_new(); + } else { + gfsm_automaton_product2(fsmOut,fsmIn); //-- dual-destructive version + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + int i; + + GFSM_INIT + + get_my_options(argc,argv); + + for (i = 0; i < args.inputs_num && i < 2; i++) { + compute_operation(args.inputs[i]); + } + if (args.inputs_num == 1) compute_operation("-"); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsmOut,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsmIn) gfsm_automaton_free(fsmIn); + if (fsmOut) gfsm_automaton_free(fsmOut); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmproject.gog b/gfsm/gfsm/src/programs/gfsmproject.gog new file mode 100644 index 0000000..42fcb6c --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproject.gog @@ -0,0 +1,76 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmproject" +#program_version "0.01" + +purpose "Project one side of finite state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +flag "first" 1 "Project lower side [default]." +flag - i "Project input tape (alias for -1)" + +flag "second" 2 "Project upper side." +flag - o "Project output side (alias for -2)" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmproject_cmdparser.c b/gfsm/gfsm/src/programs/gfsmproject_cmdparser.c new file mode 100644 index 0000000..5905210 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproject_cmdparser.c @@ -0,0 +1,523 @@ +/* -*- Mode: C -*- + * + * File: gfsmproject_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmproject_cmdparser gfsmproject.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmproject" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmproject_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmproject (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Project one side of finite state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmproject"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -1 --first Project lower side [default].\n"); + printf(" -i Project input tape (alias for -1)\n"); + printf(" -2 --second Project upper side.\n"); + printf(" -o Project output side (alias for -2)\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->first_flag = 0; + args_info->i_flag = 0; + args_info->second_flag = 0; + args_info->o_flag = 0; + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->first_given = 0; + args_info->i_given = 0; + args_info->second_given = 0; + args_info->o_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "first", 0, NULL, '1' }, + { "second", 0, NULL, '2' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + '1', + 'i', + '2', + 'o', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case '1': /* Project lower side [default]. */ + if (args_info->first_given) { + fprintf(stderr, "%s: `--first' (`-1') option given more than once\n", PROGRAM); + } + args_info->first_given++; + if (args_info->first_given <= 1) + args_info->first_flag = !(args_info->first_flag); + break; + + case 'i': /* Project input tape (alias for -1) */ + if (args_info->i_given) { + fprintf(stderr, "%s: (`-i') option given more than once\n", PROGRAM); + } + args_info->i_given++; + if (args_info->i_given <= 1) + args_info->i_flag = !(args_info->i_flag); + break; + + case '2': /* Project upper side. */ + if (args_info->second_given) { + fprintf(stderr, "%s: `--second' (`-2') option given more than once\n", PROGRAM); + } + args_info->second_given++; + if (args_info->second_given <= 1) + args_info->second_flag = !(args_info->second_flag); + break; + + case 'o': /* Project output side (alias for -2) */ + if (args_info->o_given) { + fprintf(stderr, "%s: (`-o') option given more than once\n", PROGRAM); + } + args_info->o_given++; + if (args_info->o_given <= 1) + args_info->o_flag = !(args_info->o_flag); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Project lower side [default]. */ + else if (strcmp(olong, "first") == 0) { + if (args_info->first_given) { + fprintf(stderr, "%s: `--first' (`-1') option given more than once\n", PROGRAM); + } + args_info->first_given++; + if (args_info->first_given <= 1) + args_info->first_flag = !(args_info->first_flag); + } + + /* Project upper side. */ + else if (strcmp(olong, "second") == 0) { + if (args_info->second_given) { + fprintf(stderr, "%s: `--second' (`-2') option given more than once\n", PROGRAM); + } + args_info->second_given++; + if (args_info->second_given <= 1) + args_info->second_flag = !(args_info->second_flag); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmproject_cmdparser.h b/gfsm/gfsm/src/programs/gfsmproject_cmdparser.h new file mode 100644 index 0000000..e7fbb80 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproject_cmdparser.h @@ -0,0 +1,71 @@ +/* -*- Mode: C -*- + * + * File: gfsmproject_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmproject_cmdparser_h +#define gfsmproject_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int first_flag; /* Project lower side [default]. (default=0). */ + int i_flag; /* Project input tape (alias for -1) (default=0). */ + int second_flag; /* Project upper side. (default=0). */ + int o_flag; /* Project output side (alias for -2) (default=0). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int first_given; /* Whether first was given */ + int i_given; /* Whether - was given */ + int second_given; /* Whether second was given */ + int o_given; /* Whether - was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmproject_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmproject_main.c b/gfsm/gfsm/src/programs/gfsmproject_main.c new file mode 100644 index 0000000..528b456 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmproject_main.c @@ -0,0 +1,100 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmproject_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmproject"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs & vars +gfsmAutomaton *fsm; +gfsmLabelSide which = gfsmLSLower; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- which? + if (args.second_given || args.o_given) which = gfsmLSUpper; + else which = gfsmLSLower; + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- invert + gfsm_automaton_project(fsm,which); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmrenumber.gog b/gfsm/gfsm/src/programs/gfsmrenumber.gog new file mode 100644 index 0000000..df2d0b6 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrenumber.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmrenumber" +#program_version "0.01" + +purpose "Renumber states in finite state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmrenumber_cmdparser.c b/gfsm/gfsm/src/programs/gfsmrenumber_cmdparser.c new file mode 100644 index 0000000..a058eb2 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrenumber_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmrenumber_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmrenumber_cmdparser gfsmrenumber.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmrenumber" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmrenumber_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmrenumber (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Renumber states in finite state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmrenumber"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmrenumber_cmdparser.h b/gfsm/gfsm/src/programs/gfsmrenumber_cmdparser.h new file mode 100644 index 0000000..08db747 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrenumber_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmrenumber_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmrenumber_cmdparser_h +#define gfsmrenumber_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmrenumber_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmrenumber_main.c b/gfsm/gfsm/src/programs/gfsmrenumber_main.c new file mode 100644 index 0000000..7bf7a59 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrenumber_main.c @@ -0,0 +1,95 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmrenumber_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmrenumber"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- renumber + gfsm_automaton_renumber_states(fsm); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmreplace.gog b/gfsm/gfsm/src/programs/gfsmreplace.gog new file mode 100644 index 0000000..466526d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreplace.gog @@ -0,0 +1,79 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmreplace" +#program_version "0.01" + +purpose "Replace some automaton arcs with a whole automaton" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +string "replacement" r "Replacement automaton (binary gfsm file)" \ + arg="FSMFILE" \ + default="-" \ + details=" +If unspecified, standard input will be read. +" + +int "lower" l "Lower label to replace (default=any)" \ + arg="LABEL" + +int "upper" u "Upper label to replace (default=any)" \ + arg="LABEL" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmreplace_cmdparser.c b/gfsm/gfsm/src/programs/gfsmreplace_cmdparser.c new file mode 100644 index 0000000..652c7a7 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreplace_cmdparser.c @@ -0,0 +1,514 @@ +/* -*- Mode: C -*- + * + * File: gfsmreplace_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmreplace_cmdparser gfsmreplace.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmreplace" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmreplace_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmreplace (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Replace some automaton arcs with a whole automaton\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... [FILES]...\n", "gfsmreplace"); + + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -rFSMFILE --replacement=FSMFILE Replacement automaton (binary gfsm file)\n"); + printf(" -lLABEL --lower=LABEL Lower label to replace (default=any)\n"); + printf(" -uLABEL --upper=LABEL Upper label to replace (default=any)\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->replacement_arg = strdup("-"); + args_info->lower_arg = 0; + args_info->upper_arg = 0; + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->replacement_given = 0; + args_info->lower_given = 0; + args_info->upper_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "replacement", 1, NULL, 'r' }, + { "lower", 1, NULL, 'l' }, + { "upper", 1, NULL, 'u' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'r', ':', + 'l', ':', + 'u', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'r': /* Replacement automaton (binary gfsm file) */ + if (args_info->replacement_given) { + fprintf(stderr, "%s: `--replacement' (`-r') option given more than once\n", PROGRAM); + } + args_info->replacement_given++; + if (args_info->replacement_arg) free(args_info->replacement_arg); + args_info->replacement_arg = strdup(val); + break; + + case 'l': /* Lower label to replace (default=any) */ + if (args_info->lower_given) { + fprintf(stderr, "%s: `--lower' (`-l') option given more than once\n", PROGRAM); + } + args_info->lower_given++; + args_info->lower_arg = (int)atoi(val); + break; + + case 'u': /* Upper label to replace (default=any) */ + if (args_info->upper_given) { + fprintf(stderr, "%s: `--upper' (`-u') option given more than once\n", PROGRAM); + } + args_info->upper_given++; + args_info->upper_arg = (int)atoi(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Replacement automaton (binary gfsm file) */ + else if (strcmp(olong, "replacement") == 0) { + if (args_info->replacement_given) { + fprintf(stderr, "%s: `--replacement' (`-r') option given more than once\n", PROGRAM); + } + args_info->replacement_given++; + if (args_info->replacement_arg) free(args_info->replacement_arg); + args_info->replacement_arg = strdup(val); + } + + /* Lower label to replace (default=any) */ + else if (strcmp(olong, "lower") == 0) { + if (args_info->lower_given) { + fprintf(stderr, "%s: `--lower' (`-l') option given more than once\n", PROGRAM); + } + args_info->lower_given++; + args_info->lower_arg = (int)atoi(val); + } + + /* Upper label to replace (default=any) */ + else if (strcmp(olong, "upper") == 0) { + if (args_info->upper_given) { + fprintf(stderr, "%s: `--upper' (`-u') option given more than once\n", PROGRAM); + } + args_info->upper_given++; + args_info->upper_arg = (int)atoi(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmreplace_cmdparser.h b/gfsm/gfsm/src/programs/gfsmreplace_cmdparser.h new file mode 100644 index 0000000..f7c704d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreplace_cmdparser.h @@ -0,0 +1,69 @@ +/* -*- Mode: C -*- + * + * File: gfsmreplace_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmreplace_cmdparser_h +#define gfsmreplace_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * replacement_arg; /* Replacement automaton (binary gfsm file) (default=-). */ + int lower_arg; /* Lower label to replace (default=any) (default=0). */ + int upper_arg; /* Upper label to replace (default=any) (default=0). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int replacement_given; /* Whether replacement was given */ + int lower_given; /* Whether lower was given */ + int upper_given; /* Whether upper was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmreplace_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmreplace_main.c b/gfsm/gfsm/src/programs/gfsmreplace_main.c new file mode 100644 index 0000000..c2b1107 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreplace_main.c @@ -0,0 +1,108 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +#include <gfsm.h> + +#include "gfsmreplace_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmreplace"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *replfilename = "-"; +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *repl = NULL, *fsm=NULL; +gfsmError *err = NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num) infilename = args.inputs[0]; + if (args.replacement_given) replfilename = args.replacement_arg; + outfilename = args.output_arg; + + //-- load replacement automaton + repl = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(repl, replfilename, &err)) { + g_printerr("%s: load failed for FST file '%s': %s\n", progname, replfilename, err->message); + exit(255); + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + GFSM_INIT + get_my_options(argc,argv); + + //-- load input automaton + fsm = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- perform replacement + gfsm_automaton_replace(fsm, + (args.lower_given ? args.lower_arg : gfsmNoLabel), + (args.upper_given ? args.upper_arg : gfsmNoLabel), + repl); + + //-- save output + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (repl) gfsm_automaton_free(repl); + if (fsm) gfsm_automaton_free(fsm); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmreverse.gog b/gfsm/gfsm/src/programs/gfsmreverse.gog new file mode 100644 index 0000000..f67c417 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreverse.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmreverse" +#program_version "0.01" + +purpose "Reverse a finite state machine" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read. +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmreverse_cmdparser.c b/gfsm/gfsm/src/programs/gfsmreverse_cmdparser.c new file mode 100644 index 0000000..f992776 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreverse_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmreverse_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmreverse_cmdparser gfsmreverse.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmreverse" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmreverse_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmreverse (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Reverse a finite state machine\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmreverse"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmreverse_cmdparser.h b/gfsm/gfsm/src/programs/gfsmreverse_cmdparser.h new file mode 100644 index 0000000..81ed264 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreverse_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmreverse_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmreverse_cmdparser_h +#define gfsmreverse_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmreverse_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmreverse_main.c b/gfsm/gfsm/src/programs/gfsmreverse_main.c new file mode 100644 index 0000000..e2b24f1 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmreverse_main.c @@ -0,0 +1,101 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmreverse_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmreverse"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs & vars +gfsmAutomaton *fsm; +int which = 1; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + + GFSM_INIT + + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- invert + gfsm_automaton_reverse(fsm); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmrmepsilon.gog b/gfsm/gfsm/src/programs/gfsmrmepsilon.gog new file mode 100644 index 0000000..e1b78d6 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrmepsilon.gog @@ -0,0 +1,80 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmrmepsilon" +#program_version "0.01" + +purpose "Remove epsilon arcs from finite state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +=over 4 + +=item + +No negative-cost epsilon cycles are allowed in the input automaton. + +=item + +Probably many more. + +=back + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmrmepsilon_cmdparser.c b/gfsm/gfsm/src/programs/gfsmrmepsilon_cmdparser.c new file mode 100644 index 0000000..1f4351f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrmepsilon_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmrmepsilon_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmrmepsilon_cmdparser gfsmrmepsilon.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmrmepsilon" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmrmepsilon_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmrmepsilon (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Remove epsilon arcs from finite state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmrmepsilon"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmrmepsilon_cmdparser.h b/gfsm/gfsm/src/programs/gfsmrmepsilon_cmdparser.h new file mode 100644 index 0000000..e9deda5 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrmepsilon_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmrmepsilon_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmrmepsilon_cmdparser_h +#define gfsmrmepsilon_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmrmepsilon_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmrmepsilon_main.c b/gfsm/gfsm/src/programs/gfsmrmepsilon_main.c new file mode 100644 index 0000000..ec3e899 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmrmepsilon_main.c @@ -0,0 +1,95 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmrmepsilon_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmrmepsilon"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fsm; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) infilename = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsm = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmError *err = NULL; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- rmepsilon + gfsm_automaton_rmepsilon(fsm); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsm,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsm) gfsm_automaton_free(fsm); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmsigma.gog b/gfsm/gfsm/src/programs/gfsmsigma.gog new file mode 100644 index 0000000..68b7243 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmsigma.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmsigma" +#program_version "0.01" + +purpose "Create a single-state alphabet acceptor" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "LABFILE" "Alphabet file" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmsigma_cmdparser.c b/gfsm/gfsm/src/programs/gfsmsigma_cmdparser.c new file mode 100644 index 0000000..7cf017f --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmsigma_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmsigma_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmsigma_cmdparser gfsmsigma.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmsigma" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmsigma_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmsigma (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Create a single-state alphabet acceptor\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... LABFILE\n", "gfsmsigma"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" LABFILE Alphabet file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmsigma_cmdparser.h b/gfsm/gfsm/src/programs/gfsmsigma_cmdparser.h new file mode 100644 index 0000000..4a7832c --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmsigma_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmsigma_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmsigma_cmdparser_h +#define gfsmsigma_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmsigma_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmsigma_main.c b/gfsm/gfsm/src/programs/gfsmsigma_main.c new file mode 100644 index 0000000..ce5c59d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmsigma_main.c @@ -0,0 +1,100 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmsigma_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmsigma"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *abetname = "-"; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsmOut=NULL; +gfsmAlphabet *abet=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- output + if (args.inputs_num) abetname = args.inputs[0]; + if (args.output_arg) outfilename = args.output_arg; + + //-- initialize automaton + fsmOut = gfsm_automaton_new(); +} + + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + GFSM_INIT + + get_my_options(argc,argv); + + //-- load alphabet + abet = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(abet, abetname, &err)) { + g_printerr("%s: load failed for alphabet file '%s': %s\n", + progname, abetname, err->message); + exit(2); + } + + //-- compute operation + gfsm_automaton_sigma(fsmOut,abet); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsmOut,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (abet) gfsm_alphabet_free(abet); + if (fsmOut) gfsm_automaton_free(fsmOut); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmstrings.gog b/gfsm/gfsm/src/programs/gfsmstrings.gog new file mode 100644 index 0000000..5672f36 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmstrings.gog @@ -0,0 +1,76 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmstrings" +#program_version "0.01" + +purpose "Serialize binary acyclic gfsm files to lists of strings" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +#details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/gfsmrc" +#rcfile "~/.gfsmrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE" "Stored binary gfsm file" \ + details=" +If unspecified, standard input will be read. +" + +#----------------------------------------------------------------------------- +# Common Options +#----------------------------------------------------------------------------- +group "Common Options" + +string "ilabels" i "Specify input (lower) labels file." \ + arg="LABELS" + +string "olabels" o "Specify output (upper) labels file." \ + arg="LABELS" + +flag "att" a "Output in AT&T regex format." \ + default=0 + +flag "viterbi" v "Treat input automaton as a Viterbi trellis." \ + default=0 + +string "output" F "Output file." \ + arg="TXTFILE" \ + details=" +Default behavior is to write to stdout. +" + + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils>, +" diff --git a/gfsm/gfsm/src/programs/gfsmstrings_cmdparser.c b/gfsm/gfsm/src/programs/gfsmstrings_cmdparser.c new file mode 100644 index 0000000..6f2c30b --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmstrings_cmdparser.c @@ -0,0 +1,525 @@ +/* -*- Mode: C -*- + * + * File: gfsmstrings_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmstrings_cmdparser gfsmstrings.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmstrings" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmstrings_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmstrings (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Serialize binary acyclic gfsm files to lists of strings\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE\n", "gfsmstrings"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE Stored binary gfsm file\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf("\n"); + printf(" Common Options:\n"); + printf(" -iLABELS --ilabels=LABELS Specify input (lower) labels file.\n"); + printf(" -oLABELS --olabels=LABELS Specify output (upper) labels file.\n"); + printf(" -a --att Output in AT&T regex format.\n"); + printf(" -v --viterbi Treat input automaton as a Viterbi trellis.\n"); + printf(" -FTXTFILE --output=TXTFILE Output file.\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->ilabels_arg = NULL; + args_info->olabels_arg = NULL; + args_info->att_flag = 0; + args_info->viterbi_flag = 0; + args_info->output_arg = NULL; +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->ilabels_given = 0; + args_info->olabels_given = 0; + args_info->att_given = 0; + args_info->viterbi_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "ilabels", 1, NULL, 'i' }, + { "olabels", 1, NULL, 'o' }, + { "att", 0, NULL, 'a' }, + { "viterbi", 0, NULL, 'v' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'i', ':', + 'o', ':', + 'a', + 'v', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'i': /* Specify input (lower) labels file. */ + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + break; + + case 'o': /* Specify output (upper) labels file. */ + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + break; + + case 'a': /* Output in AT&T regex format. */ + if (args_info->att_given) { + fprintf(stderr, "%s: `--att' (`-a') option given more than once\n", PROGRAM); + } + args_info->att_given++; + if (args_info->att_given <= 1) + args_info->att_flag = !(args_info->att_flag); + break; + + case 'v': /* Treat input automaton as a Viterbi trellis. */ + if (args_info->viterbi_given) { + fprintf(stderr, "%s: `--viterbi' (`-v') option given more than once\n", PROGRAM); + } + args_info->viterbi_given++; + if (args_info->viterbi_given <= 1) + args_info->viterbi_flag = !(args_info->viterbi_flag); + break; + + case 'F': /* Output file. */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify input (lower) labels file. */ + else if (strcmp(olong, "ilabels") == 0) { + if (args_info->ilabels_given) { + fprintf(stderr, "%s: `--ilabels' (`-i') option given more than once\n", PROGRAM); + } + args_info->ilabels_given++; + if (args_info->ilabels_arg) free(args_info->ilabels_arg); + args_info->ilabels_arg = strdup(val); + } + + /* Specify output (upper) labels file. */ + else if (strcmp(olong, "olabels") == 0) { + if (args_info->olabels_given) { + fprintf(stderr, "%s: `--olabels' (`-o') option given more than once\n", PROGRAM); + } + args_info->olabels_given++; + if (args_info->olabels_arg) free(args_info->olabels_arg); + args_info->olabels_arg = strdup(val); + } + + /* Output in AT&T regex format. */ + else if (strcmp(olong, "att") == 0) { + if (args_info->att_given) { + fprintf(stderr, "%s: `--att' (`-a') option given more than once\n", PROGRAM); + } + args_info->att_given++; + if (args_info->att_given <= 1) + args_info->att_flag = !(args_info->att_flag); + } + + /* Treat input automaton as a Viterbi trellis. */ + else if (strcmp(olong, "viterbi") == 0) { + if (args_info->viterbi_given) { + fprintf(stderr, "%s: `--viterbi' (`-v') option given more than once\n", PROGRAM); + } + args_info->viterbi_given++; + if (args_info->viterbi_given <= 1) + args_info->viterbi_flag = !(args_info->viterbi_flag); + } + + /* Output file. */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmstrings_cmdparser.h b/gfsm/gfsm/src/programs/gfsmstrings_cmdparser.h new file mode 100644 index 0000000..51e085e --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmstrings_cmdparser.h @@ -0,0 +1,69 @@ +/* -*- Mode: C -*- + * + * File: gfsmstrings_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmstrings_cmdparser_h +#define gfsmstrings_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * ilabels_arg; /* Specify input (lower) labels file. (default=NULL). */ + char * olabels_arg; /* Specify output (upper) labels file. (default=NULL). */ + int att_flag; /* Output in AT&T regex format. (default=0). */ + int viterbi_flag; /* Treat input automaton as a Viterbi trellis. (default=0). */ + char * output_arg; /* Output file. (default=NULL). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int ilabels_given; /* Whether ilabels was given */ + int olabels_given; /* Whether olabels was given */ + int att_given; /* Whether att was given */ + int viterbi_given; /* Whether viterbi was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmstrings_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmstrings_main.c b/gfsm/gfsm/src/programs/gfsmstrings_main.c new file mode 100644 index 0000000..4b561e6 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmstrings_main.c @@ -0,0 +1,153 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmstrings_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmstrings"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename = "-"; +const char *outfilename = "-"; +FILE *outfile = NULL; + +//-- global structs +gfsmAutomaton *fsm; +gfsmAlphabet *ilabels=NULL, *olabels=NULL; +gfsmError *err = NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.inputs_num > 0) infilename = args.inputs[0]; + if (args.output_given) outfilename = args.output_arg; + + //-- labels: input + if (args.ilabels_given) { + ilabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(ilabels,args.ilabels_arg,&err)) { + g_printerr("%s: load failed for input-labels file '%s': %s\n", + progname, args.ilabels_arg, (err ? err->message : "?")); + exit(2); + } + } + //-- labels: output + if (args.olabels_given) { + olabels = gfsm_string_alphabet_new(); + if (!gfsm_alphabet_load_filename(olabels,args.olabels_arg,&err)) { + g_printerr("%s: load failed for output-labels file '%s': %s\n", + progname, args.olabels_arg, (err ? err->message : "?")); + exit(2); + } + } + + //-- initialize fsm + fsm = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmSet *paths = NULL; + GSList *strings = NULL; + get_my_options(argc,argv); + + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsm,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, (err ? err->message : "?")); + exit(3); + } + + //-- sanity check + if (gfsm_automaton_is_cyclic(fsm)) { + g_printerr("%s: input automaton must be acyclic!\n", progname); + exit(255); + } + + //-- open output file + outfile = gfsm_open_filename(outfilename, "w", &err); + if (!outfile) { + g_printerr("%s: %s\n", progname, (err ? err->message : "?")); + exit(4); + } + + + //-- get & stringify full paths + if (args.viterbi_flag) { + //-- serialize Viterbi trellis automaton + paths = gfsm_viterbi_trellis_paths_full(fsm, NULL, gfsmLSBoth); + } + else { + //-- serialize "normal" automaton + paths = gfsm_automaton_paths_full(fsm, NULL, gfsmLSBoth); + } + strings = gfsm_paths_to_strings(paths, + ilabels, + olabels, + fsm->sr, + TRUE, + args.att_given, + NULL); + while (strings) { + //-- pop first datum + char *s = (char *)strings->data; + strings = g_slist_delete_link(strings,strings); + + //-- print string + fputs(s, outfile); + fputc('\n', outfile); + + g_free(s); + } + + //-- cleanup + if (paths) gfsm_set_free(paths); + if (ilabels) gfsm_alphabet_free(ilabels); + if (olabels) gfsm_alphabet_free(olabels); + if (fsm) gfsm_automaton_free(fsm); + + if (outfile != stdout) fclose(outfile); + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmunion.gog b/gfsm/gfsm/src/programs/gfsmunion.gog new file mode 100644 index 0000000..96d84e2 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmunion.gog @@ -0,0 +1,70 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmunion" +#program_version "0.01" + +purpose "Compute union of finite state machines" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "BINFILE(s)" "Stored binary gfsm file(s)" \ + details=" +If unspecified, standard input will be read +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +None known. + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmunion_cmdparser.c b/gfsm/gfsm/src/programs/gfsmunion_cmdparser.c new file mode 100644 index 0000000..31aafa8 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmunion_cmdparser.c @@ -0,0 +1,449 @@ +/* -*- Mode: C -*- + * + * File: gfsmunion_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmunion_cmdparser gfsmunion.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmunion" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmunion_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmunion (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" Compute union of finite state machines\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... BINFILE(s)\n", "gfsmunion"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" BINFILE(s) Stored binary gfsm file(s)\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmunion_cmdparser.h b/gfsm/gfsm/src/programs/gfsmunion_cmdparser.h new file mode 100644 index 0000000..6086492 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmunion_cmdparser.h @@ -0,0 +1,63 @@ +/* -*- Mode: C -*- + * + * File: gfsmunion_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmunion_cmdparser_h +#define gfsmunion_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmunion_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmunion_main.c b/gfsm/gfsm/src/programs/gfsmunion_main.c new file mode 100644 index 0000000..dd7998d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmunion_main.c @@ -0,0 +1,123 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2004 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <gfsm.h> + +#include "gfsmunion_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmunion"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *infilename; +const char *outfilename = "-"; + +//-- global structs etc. +gfsmError *err = NULL; +gfsmAutomaton *fsmUnion=NULL, *fsmIn=NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- require at least one file argument + if (args.inputs_num < 1) { + cmdline_parser_print_help(); + exit(2); + } + + //-- output + if (args.output_arg) outfilename = args.output_arg; + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- initialize automaton + fsmIn = gfsm_automaton_new(); +} + +/*-------------------------------------------------------------------------- + * compute_union() + * + utility routine + */ +void compute_union(const char *infilename) +{ + //-- load automaton + if (!gfsm_automaton_load_bin_filename(fsmIn,infilename,&err)) { + g_printerr("%s: load failed for '%s': %s\n", progname, infilename, err->message); + exit(255); + } + + //-- compute union + if (fsmUnion == NULL) { + fsmUnion = fsmIn; + fsmIn = gfsm_automaton_new(); + } else { + gfsm_automaton_union(fsmUnion,fsmIn); + } +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + int i; + + GFSM_INIT + + get_my_options(argc,argv); + + for (i = 0; i < args.inputs_num; i++) { + compute_union(args.inputs[i]); + } + if (args.inputs_num == 1) compute_union("-"); + + //-- spew automaton + if (!gfsm_automaton_save_bin_filename(fsmUnion,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fsmIn) gfsm_automaton_free(fsmIn); + if (fsmUnion) gfsm_automaton_free(fsmUnion); + + GFSM_FINISH + + return 0; +} diff --git a/gfsm/gfsm/src/programs/gfsmview.sh b/gfsm/gfsm/src/programs/gfsmview.sh new file mode 100755 index 0000000..d8df9c2 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmview.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +GFSMDRAW=./gfsmdraw +DOTGV=dotgv.sh + +exec $GFSMDRAW "$@" | $DOTGV diff --git a/gfsm/gfsm/src/programs/gfsmviterbi.gog b/gfsm/gfsm/src/programs/gfsmviterbi.gog new file mode 100644 index 0000000..827d26e --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmviterbi.gog @@ -0,0 +1,88 @@ +# -*- Mode: Shell-Script -*- +# +# Getopt::Gen specification +#----------------------------------------------------------------------------- +program "gfsmviterbi" +#program_version "0.01" + +purpose "EXPERIMENTAL: Compute Viterbi trellis for a linear label sequence in a transducer" +author "Bryan Jurish <moocow@ling.uni-potsdam.de>" +on_reparse "warn" + +#----------------------------------------------------------------------------- +# Details +#----------------------------------------------------------------------------- +details "" + +#----------------------------------------------------------------------------- +# Files +#----------------------------------------------------------------------------- +#rcfile "/etc/mootrc" +#rcfile "~/.mootrc" + +#----------------------------------------------------------------------------- +# Arguments +#----------------------------------------------------------------------------- +argument "LABELID..." "Input label IDs" \ + details=" +In ASCII decimal notation. +" + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- +#group "Basic Options" + +string "fst" f "Weighted transducer to apply (default=stdin)." \ + arg="FSTFILE" \ + default="-" \ + details=" +If unspecified, standard input will be read. +" + +int "compress" z "Specify compression level of output file." \ + arg="LEVEL" \ + default="-1" \ + details=" +Specify zlib compression level of output file. -1 (default) indicates +the default compression level, 0 (zero) indicates no zlib compression at all, +and 9 indicates the best possible compression. +" + +string "output" F "Specifiy output file (default=stdout)." \ + arg="FILE" \ + default="-" + +#----------------------------------------------------------------------------- +# Addenda +#----------------------------------------------------------------------------- +#addenda "" + +#----------------------------------------------------------------------------- +# Bugs +#----------------------------------------------------------------------------- +bugs " + +=over 4 + +=item + +No negative-cost epsilon cycles are allowed in the transducer. + +=item + +The Viterbi API, the existence of the gfsmviterbi command-line program, +its arguments & optiosn, etc. are HIGHLY UNSTABLE. + +=back + +" + +#----------------------------------------------------------------------------- +# Footer +#----------------------------------------------------------------------------- +#acknowledge `cat acknowledge.pod` + +seealso " +L<gfsmutils> +" diff --git a/gfsm/gfsm/src/programs/gfsmviterbi_cmdparser.c b/gfsm/gfsm/src/programs/gfsmviterbi_cmdparser.c new file mode 100644 index 0000000..f4de7a9 --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmviterbi_cmdparser.c @@ -0,0 +1,473 @@ +/* -*- Mode: C -*- + * + * File: gfsmviterbi_cmdparser.c + * Description: Code for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05 + * generated with the following command: + * /usr/local/bin/optgen.perl -u -l --no-handle-rcfile --nopod -F gfsmviterbi_cmdparser gfsmviterbi.gog + * + * The developers of optgen.perl consider the fixed text that goes in all + * optgen.perl output files to be in the public domain: + * we make no copyright claims on it. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +/* If we use autoconf/autoheader. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_PWD_H +# include <pwd.h> +#endif + +/* Allow user-overrides for PACKAGE and VERSION */ +#ifndef PACKAGE +# define PACKAGE "PACKAGE" +#endif + +#ifndef VERSION +# define VERSION "VERSION" +#endif + + +#ifndef PROGRAM +# define PROGRAM "gfsmviterbi" +#endif + +/* #define cmdline_parser_DEBUG */ + +/* Check for "configure's" getopt check result. */ +#ifndef HAVE_GETOPT_LONG +# include "getopt.h" +#else +# include <getopt.h> +#endif + +#if !defined(HAVE_STRDUP) && !defined(strdup) +# define strdup gengetopt_strdup +#endif /* HAVE_STRDUP */ + +#include "gfsmviterbi_cmdparser.h" + + +/* user code section */ + +/* end user code section */ + + +void +cmdline_parser_print_version (void) +{ + printf("gfsmviterbi (%s %s) by Bryan Jurish <moocow@ling.uni-potsdam.de>\n", PACKAGE, VERSION); +} + +void +cmdline_parser_print_help (void) +{ + cmdline_parser_print_version (); + printf("\n"); + printf("Purpose:\n"); + printf(" EXPERIMENTAL: Compute Viterbi trellis for a linear label sequence in a transducer\n"); + printf("\n"); + + printf("Usage: %s [OPTIONS]... LABELID...\n", "gfsmviterbi"); + + printf("\n"); + printf(" Arguments:\n"); + printf(" LABELID... Input label IDs\n"); + + printf("\n"); + printf(" Options:\n"); + printf(" -h --help Print help and exit.\n"); + printf(" -V --version Print version and exit.\n"); + printf(" -fFSTFILE --fst=FSTFILE Weighted transducer to apply (default=stdin).\n"); + printf(" -zLEVEL --compress=LEVEL Specify compression level of output file.\n"); + printf(" -FFILE --output=FILE Specifiy output file (default=stdout).\n"); +} + +#if !defined(HAVE_STRDUP) && !defined(strdup) +/* gengetopt_strdup(): automatically generated from strdup.c. */ +/* strdup.c replacement of strdup, which is not standard */ +static char * +gengetopt_strdup (const char *s) +{ + char *result = (char*)malloc(strlen(s) + 1); + if (result == (char*)0) + return (char*)0; + strcpy(result, s); + return result; +} +#endif /* HAVE_STRDUP */ + +/* clear_args(args_info): clears all args & resets to defaults */ +static void +clear_args(struct gengetopt_args_info *args_info) +{ + args_info->fst_arg = strdup("-"); + args_info->compress_arg = -1; + args_info->output_arg = strdup("-"); +} + + +int +cmdline_parser (int argc, char * const *argv, struct gengetopt_args_info *args_info) +{ + int c; /* Character of the parsed option. */ + int missing_required_options = 0; + + args_info->help_given = 0; + args_info->version_given = 0; + args_info->fst_given = 0; + args_info->compress_given = 0; + args_info->output_given = 0; + + clear_args(args_info); + + /* rcfile handling */ + + /* end rcfile handling */ + + optarg = 0; + optind = 1; + opterr = 1; + optopt = '?'; + + while (1) + { + int option_index = 0; + static struct option long_options[] = { + { "help", 0, NULL, 'h' }, + { "version", 0, NULL, 'V' }, + { "fst", 1, NULL, 'f' }, + { "compress", 1, NULL, 'z' }, + { "output", 1, NULL, 'F' }, + { NULL, 0, NULL, 0 } + }; + static char short_options[] = { + 'h', + 'V', + 'f', ':', + 'z', ':', + 'F', ':', + '\0' + }; + + c = getopt_long (argc, argv, short_options, long_options, &option_index); + + if (c == -1) break; /* Exit from 'while (1)' loop. */ + + if (cmdline_parser_parse_option(c, long_options[option_index].name, optarg, args_info) != 0) { + exit (EXIT_FAILURE); + } + } /* while */ + + + + if ( missing_required_options ) + exit (EXIT_FAILURE); + + + if (optind < argc) { + int i = 0 ; + args_info->inputs_num = argc - optind ; + args_info->inputs = (char **)(malloc ((args_info->inputs_num)*sizeof(char *))) ; + while (optind < argc) + args_info->inputs[ i++ ] = strdup (argv[optind++]) ; + } + + return 0; +} + + +/* Parse a single option */ +int +cmdline_parser_parse_option(char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info) +{ + if (!oshort && !(olong && *olong)) return 1; /* ignore null options */ + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "parse_option(): oshort='%c', olong='%s', val='%s'\n", oshort, olong, val);*/ +#endif + + switch (oshort) + { + case 'h': /* Print help and exit. */ + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + break; + + case 'V': /* Print version and exit. */ + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + break; + + case 'f': /* Weighted transducer to apply (default=stdin). */ + if (args_info->fst_given) { + fprintf(stderr, "%s: `--fst' (`-f') option given more than once\n", PROGRAM); + } + args_info->fst_given++; + if (args_info->fst_arg) free(args_info->fst_arg); + args_info->fst_arg = strdup(val); + break; + + case 'z': /* Specify compression level of output file. */ + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + break; + + case 'F': /* Specifiy output file (default=stdout). */ + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + break; + + case 0: /* Long option(s) with no short form */ + /* Print help and exit. */ + if (strcmp(olong, "help") == 0) { + if (args_info->help_given) { + fprintf(stderr, "%s: `--help' (`-h') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_help(); + exit(EXIT_SUCCESS); + + } + + /* Print version and exit. */ + else if (strcmp(olong, "version") == 0) { + if (args_info->version_given) { + fprintf(stderr, "%s: `--version' (`-V') option given more than once\n", PROGRAM); + } + clear_args(args_info); + cmdline_parser_print_version(); + exit(EXIT_SUCCESS); + + } + + /* Weighted transducer to apply (default=stdin). */ + else if (strcmp(olong, "fst") == 0) { + if (args_info->fst_given) { + fprintf(stderr, "%s: `--fst' (`-f') option given more than once\n", PROGRAM); + } + args_info->fst_given++; + if (args_info->fst_arg) free(args_info->fst_arg); + args_info->fst_arg = strdup(val); + } + + /* Specify compression level of output file. */ + else if (strcmp(olong, "compress") == 0) { + if (args_info->compress_given) { + fprintf(stderr, "%s: `--compress' (`-z') option given more than once\n", PROGRAM); + } + args_info->compress_given++; + args_info->compress_arg = (int)atoi(val); + } + + /* Specifiy output file (default=stdout). */ + else if (strcmp(olong, "output") == 0) { + if (args_info->output_given) { + fprintf(stderr, "%s: `--output' (`-F') option given more than once\n", PROGRAM); + } + args_info->output_given++; + if (args_info->output_arg) free(args_info->output_arg); + args_info->output_arg = strdup(val); + } + + else { + fprintf(stderr, "%s: unknown long option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + } + break; + + case '?': /* Invalid Option */ + fprintf(stderr, "%s: unknown option '%s'.\n", PROGRAM, olong); + return (EXIT_FAILURE); + + + default: /* bug: options not considered. */ + fprintf (stderr, "%s: option unknown: %c\n", PROGRAM, oshort); + abort (); + } /* switch */ + return 0; +} + + +/* Initialize options not yet given from environmental defaults */ +void +cmdline_parser_envdefaults(struct gengetopt_args_info *args_info) +{ + + + return; +} + + +/* Load option values from an .rc file */ +void +cmdline_parser_read_rcfile(const char *filename, + struct gengetopt_args_info *args_info, + int user_specified) +{ + char *fullname; + FILE *rcfile; + + if (!filename) return; /* ignore NULL filenames */ + +#if defined(HAVE_GETUID) && defined(HAVE_GETPWUID) + if (*filename == '~') { + /* tilde-expansion hack */ + struct passwd *pwent = getpwuid(getuid()); + if (!pwent) { + fprintf(stderr, "%s: user-id %d not found!\n", PROGRAM, getuid()); + return; + } + if (!pwent->pw_dir) { + fprintf(stderr, "%s: home directory for user-id %d not found!\n", PROGRAM, getuid()); + return; + } + fullname = (char *)malloc(strlen(pwent->pw_dir)+strlen(filename)); + strcpy(fullname, pwent->pw_dir); + strcat(fullname, filename+1); + } else { + fullname = strdup(filename); + } +#else /* !(defined(HAVE_GETUID) && defined(HAVE_GETPWUID)) */ + fullname = strdup(filename); +#endif /* defined(HAVE_GETUID) && defined(HAVE_GETPWUID) */ + + /* try to open */ + rcfile = fopen(fullname,"r"); + if (!rcfile) { + if (user_specified) { + fprintf(stderr, "%s: warning: open failed for rc-file '%s': %s\n", + PROGRAM, fullname, strerror(errno)); + } + } + else { + cmdline_parser_read_rc_stream(rcfile, fullname, args_info); + } + + /* cleanup */ + if (fullname != filename) free(fullname); + if (rcfile) fclose(rcfile); + + return; +} + + +/* Parse option values from an .rc file : guts */ +#define OPTPARSE_GET 32 +void +cmdline_parser_read_rc_stream(FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info) +{ + char *optname = (char *)malloc(OPTPARSE_GET); + char *optval = (char *)malloc(OPTPARSE_GET); + size_t onsize = OPTPARSE_GET; + size_t ovsize = OPTPARSE_GET; + size_t onlen = 0; + size_t ovlen = 0; + int lineno = 0; + char c; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'):\n", filename); +#endif + + while ((c = fgetc(rcfile)) != EOF) { + onlen = 0; + ovlen = 0; + lineno++; + + /* -- get next option-name */ + /* skip leading space and comments */ + if (isspace(c)) continue; + if (c == '#') { + while ((c = fgetc(rcfile)) != EOF) { + if (c == '\n') break; + } + continue; + } + + /* parse option-name */ + while (c != EOF && c != '=' && !isspace(c)) { + /* re-allocate if necessary */ + if (onlen >= onsize-1) { + char *tmp = (char *)malloc(onsize+OPTPARSE_GET); + strcpy(tmp,optname); + free(optname); + + onsize += OPTPARSE_GET; + optname = tmp; + } + optname[onlen++] = c; + c = fgetc(rcfile); + } + optname[onlen++] = '\0'; + +#ifdef cmdline_parser_DEBUG + fprintf(stderr, "cmdline_parser_read_rc_stream('%s'): line %d: optname='%s'\n", + filename, lineno, optname); +#endif + + /* -- get next option-value */ + /* skip leading space */ + while ((c = fgetc(rcfile)) != EOF && isspace(c)) { + ; + } + + /* parse option-value */ + while (c != EOF && c != '\n') { + /* re-allocate if necessary */ + if (ovlen >= ovsize-1) { + char *tmp = (char *)malloc(ovsize+OPTPARSE_GET); + strcpy(tmp,optval); + free(optval); + ovsize += OPTPARSE_GET; + optval = tmp; + } + optval[ovlen++] = c; + c = fgetc(rcfile); + } + optval[ovlen++] = '\0'; + + /* now do the action for the option */ + if (cmdline_parser_parse_option('\0',optname,optval,args_info) != 0) { + fprintf(stderr, "%s: error in file '%s' at line %d.\n", PROGRAM, filename, lineno); + + } + } + + /* cleanup */ + free(optname); + free(optval); + + return; +} diff --git a/gfsm/gfsm/src/programs/gfsmviterbi_cmdparser.h b/gfsm/gfsm/src/programs/gfsmviterbi_cmdparser.h new file mode 100644 index 0000000..d38087d --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmviterbi_cmdparser.h @@ -0,0 +1,65 @@ +/* -*- Mode: C -*- + * + * File: gfsmviterbi_cmdparser.h + * Description: Headers for command-line parser struct gengetopt_args_info. + * + * File autogenerated by optgen.perl version 0.05. + * + */ + +#ifndef gfsmviterbi_cmdparser_h +#define gfsmviterbi_cmdparser_h + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * moocow: Never set PACKAGE and VERSION here. + */ + +struct gengetopt_args_info { + char * fst_arg; /* Weighted transducer to apply (default=stdin). (default=-). */ + int compress_arg; /* Specify compression level of output file. (default=-1). */ + char * output_arg; /* Specifiy output file (default=stdout). (default=-). */ + + int help_given; /* Whether help was given */ + int version_given; /* Whether version was given */ + int fst_given; /* Whether fst was given */ + int compress_given; /* Whether compress was given */ + int output_given; /* Whether output was given */ + + char **inputs; /* unnamed arguments */ + unsigned inputs_num; /* number of unnamed arguments */ +}; + +/* read rc files (if any) and parse all command-line options in one swell foop */ +int cmdline_parser (int argc, char *const *argv, struct gengetopt_args_info *args_info); + +/* instantiate defaults from environment variables: you must call this yourself! */ +void cmdline_parser_envdefaults (struct gengetopt_args_info *args_info); + +/* read a single rc-file */ +void cmdline_parser_read_rcfile (const char *filename, + struct gengetopt_args_info *args_info, + int user_specified); + +/* read a single rc-file (stream) */ +void cmdline_parser_read_rc_stream (FILE *rcfile, + const char *filename, + struct gengetopt_args_info *args_info); + +/* parse a single option */ +int cmdline_parser_parse_option (char oshort, const char *olong, const char *val, + struct gengetopt_args_info *args_info); + +/* print help message */ +void cmdline_parser_print_help(void); + +/* print version */ +void cmdline_parser_print_version(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* gfsmviterbi_cmdparser_h */ diff --git a/gfsm/gfsm/src/programs/gfsmviterbi_main.c b/gfsm/gfsm/src/programs/gfsmviterbi_main.c new file mode 100644 index 0000000..33bb01a --- /dev/null +++ b/gfsm/gfsm/src/programs/gfsmviterbi_main.c @@ -0,0 +1,125 @@ +/* + gfsm-utils : finite state automaton utilities + Copyright (C) 2005 by Bryan Jurish <moocow@ling.uni-potsdam.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +#include <gfsm.h> + +#include "gfsmviterbi_cmdparser.h" + +/*-------------------------------------------------------------------------- + * Globals + *--------------------------------------------------------------------------*/ +char *progname = "gfsmviterbi"; + +//-- options +struct gengetopt_args_info args; + +//-- files +const char *fstfilename = "-"; +const char *outfilename = "-"; + +//-- global structs +gfsmAutomaton *fst = NULL; +gfsmError *err = NULL; + +/*-------------------------------------------------------------------------- + * Option Processing + *--------------------------------------------------------------------------*/ +void get_my_options(int argc, char **argv) +{ + if (cmdline_parser(argc, argv, &args) != 0) + exit(1); + + //-- load environmental defaults + //cmdline_parser_envdefaults(&args); + + //-- filenames + if (args.fst_given) fstfilename = args.fst_arg; + outfilename = args.output_arg; + + //-- load FST + fst = gfsm_automaton_new(); + if (!gfsm_automaton_load_bin_filename(fst, fstfilename, &err)) { + g_printerr("%s: load failed for FST file '%s': %s\n", progname, fstfilename, err->message); + exit(255); + } +} + +/*-------------------------------------------------------------------------- + * viterbi_labels() + */ +gfsmAutomaton *viterbi_labels(gfsmAutomaton *fst, int argc, char **argv) +{ + gfsmLabelVector *vec = g_ptr_array_sized_new(argc); + char *s=NULL, *tail=NULL; + gfsmLabelVal lab; + gfsmAutomaton *trellis = NULL; + int i; + + //-- fill input vector + for (i=0; i < argc; i++) { + for (s=argv[i], lab=strtol(s,&tail,0); s != tail; s=tail, lab=strtol(s,&tail,0)) { + g_ptr_array_add(vec, (gpointer)lab); + } + } + + //-- actual viterbi lookup + trellis = gfsm_automaton_lookup_viterbi(fst, vec, trellis); + + //-- cleanup + g_ptr_array_free(vec,TRUE); + + return trellis; +} + +/*-------------------------------------------------------------------------- + * MAIN + *--------------------------------------------------------------------------*/ +int main (int argc, char **argv) +{ + gfsmAutomaton *trellis; + + GFSM_INIT + get_my_options(argc,argv); + + //-- process input + trellis = viterbi_labels(fst, args.inputs_num, args.inputs); + + //-- save output + if (!gfsm_automaton_save_bin_filename(trellis,outfilename,args.compress_arg,&err)) { + g_printerr("%s: store failed to '%s': %s\n", progname, outfilename, err->message); + exit(4); + } + + //-- cleanup + if (fst) gfsm_automaton_free(fst); + if (trellis) gfsm_automaton_free(trellis); + + GFSM_FINISH + return 0; +} diff --git a/gfsm/gfsm/src/programs/isect1.tfst b/gfsm/gfsm/src/programs/isect1.tfst new file mode 100644 index 0000000..5781fec --- /dev/null +++ b/gfsm/gfsm/src/programs/isect1.tfst @@ -0,0 +1,4 @@ +0 1 0 0 +1 +1 2 1 1 +2 diff --git a/gfsm/gfsm/src/programs/isect2.tfst b/gfsm/gfsm/src/programs/isect2.tfst new file mode 100644 index 0000000..d64b6cb --- /dev/null +++ b/gfsm/gfsm/src/programs/isect2.tfst @@ -0,0 +1,5 @@ +0 0 1 1 +0 1 0 0 +0 +1 1 2 2 +1 diff --git a/gfsm/gfsm/src/programs/lkptest.tfst b/gfsm/gfsm/src/programs/lkptest.tfst new file mode 100644 index 0000000..40c6d7a --- /dev/null +++ b/gfsm/gfsm/src/programs/lkptest.tfst @@ -0,0 +1,13 @@ +0 0 1 2 1 +0 0 2 3 1 +0 0 3 1 1 +0 0 + +0 1 2 2 1 +1 2 2 2 1 +2 3 3 3 1 +3 0 + +0 4 1 0 1 +4 5 0 3 1 +5 0 diff --git a/gfsm/gfsm/src/programs/p1.tfst b/gfsm/gfsm/src/programs/p1.tfst new file mode 100644 index 0000000..671e9e0 --- /dev/null +++ b/gfsm/gfsm/src/programs/p1.tfst @@ -0,0 +1,5 @@ +0 1 1 1 +0 1 2 2 +1 +1 2 3 3 +2 diff --git a/gfsm/gfsm/src/programs/p2.tfst b/gfsm/gfsm/src/programs/p2.tfst new file mode 100644 index 0000000..f8fd663 --- /dev/null +++ b/gfsm/gfsm/src/programs/p2.tfst @@ -0,0 +1,3 @@ +0 1 4 4 +0 1 5 5 +1 diff --git a/gfsm/gfsm/src/programs/prunetest.tfst b/gfsm/gfsm/src/programs/prunetest.tfst new file mode 100644 index 0000000..0dc8950 --- /dev/null +++ b/gfsm/gfsm/src/programs/prunetest.tfst @@ -0,0 +1,6 @@ +0 1 1 1 0 +1 2 2 2 0 +0 3 3 3 0 +1 0 +4 5 4 4 0 +5 0 diff --git a/gfsm/gfsm/src/programs/r1.tfst b/gfsm/gfsm/src/programs/r1.tfst new file mode 100644 index 0000000..f9608f2 --- /dev/null +++ b/gfsm/gfsm/src/programs/r1.tfst @@ -0,0 +1,9 @@ +0 1 1 1 +0 2 0 1 +0 3 1 0 +1 4 4 4 +2 5 5 5 +3 6 6 6 +4 +5 +6 diff --git a/gfsm/gfsm/src/programs/r2.tfst b/gfsm/gfsm/src/programs/r2.tfst new file mode 100644 index 0000000..a61738d --- /dev/null +++ b/gfsm/gfsm/src/programs/r2.tfst @@ -0,0 +1,4 @@ +0 1 24 7 +0 2 7 24 +1 10 +2 20 diff --git a/gfsm/gfsm/src/programs/renumber.tfst b/gfsm/gfsm/src/programs/renumber.tfst new file mode 100644 index 0000000..b4697b0 --- /dev/null +++ b/gfsm/gfsm/src/programs/renumber.tfst @@ -0,0 +1,5 @@ +5 4 43 0 0 +5 1 42 0 0 +7 0 +1 7 1 1 0 +4 7 1 1 0 diff --git a/gfsm/gfsm/src/programs/renumber2.tfst b/gfsm/gfsm/src/programs/renumber2.tfst new file mode 100644 index 0000000..1c71c3d --- /dev/null +++ b/gfsm/gfsm/src/programs/renumber2.tfst @@ -0,0 +1,8 @@ +5 1 42 0 0 +5 4 43 0 0 +6 7 1 1 0 +7 0 +0 1 42 0 0 +1 7 1 1 0 +3 4 43 0 0 +4 7 1 1 0 diff --git a/gfsm/gfsm/src/programs/rev1.tfst b/gfsm/gfsm/src/programs/rev1.tfst new file mode 100644 index 0000000..5a428b4 --- /dev/null +++ b/gfsm/gfsm/src/programs/rev1.tfst @@ -0,0 +1,7 @@ +0 1 0 1 1 +1 2 1 2 12 +2 2 2 2 22 +2 20 +1 3 1 3 13 +3 4 3 4 34 +4 40
\ No newline at end of file diff --git a/gfsm/gfsm/src/programs/rme.tfst b/gfsm/gfsm/src/programs/rme.tfst new file mode 100644 index 0000000..06922fa --- /dev/null +++ b/gfsm/gfsm/src/programs/rme.tfst @@ -0,0 +1,6 @@ +0 0 0 0 0 +0 1 0 0 1 +1 2 0 0 2 +0 2 0 0 10 +2 3 1 1 3 +3 diff --git a/gfsm/gfsm/src/programs/rme2.tfst b/gfsm/gfsm/src/programs/rme2.tfst new file mode 100644 index 0000000..b30d66e --- /dev/null +++ b/gfsm/gfsm/src/programs/rme2.tfst @@ -0,0 +1,8 @@ +5 0 0 0 0 +5 3 0 0 0 +6 7 0 0 0 +7 0 +0 1 42 0 0 +1 6 0 0 0 +3 4 43 0 0 +4 6 0 0 0 diff --git a/gfsm/gfsm/src/programs/rme2b.tfst b/gfsm/gfsm/src/programs/rme2b.tfst new file mode 100644 index 0000000..da2ffd2 --- /dev/null +++ b/gfsm/gfsm/src/programs/rme2b.tfst @@ -0,0 +1,8 @@ +5 0 0 0 0 +5 3 0 0 0 +6 7 1 1 0 +7 0 +0 1 42 0 0 +1 6 0 0 0 +3 4 43 0 0 +4 6 0 0 0 diff --git a/gfsm/gfsm/src/programs/rme3.tfst b/gfsm/gfsm/src/programs/rme3.tfst new file mode 100644 index 0000000..8235e2b --- /dev/null +++ b/gfsm/gfsm/src/programs/rme3.tfst @@ -0,0 +1,5 @@ +0 1 42 0 +0 2 43 0 +1 3 0 0 0 +2 3 0 0 0 +3 0 diff --git a/gfsm/gfsm/src/programs/sort.tfst b/gfsm/gfsm/src/programs/sort.tfst new file mode 100644 index 0000000..a5bb1c7 --- /dev/null +++ b/gfsm/gfsm/src/programs/sort.tfst @@ -0,0 +1,7 @@ +0 1 1 2 1 +0 1 2 1 2 +0 1 2 2 3 +0 1 1 2 3 +0 1 2 1 2 +0 1 2 2 1 +1 diff --git a/gfsm/gfsm/src/programs/sortme.tfst b/gfsm/gfsm/src/programs/sortme.tfst new file mode 100644 index 0000000..d48ea71 --- /dev/null +++ b/gfsm/gfsm/src/programs/sortme.tfst @@ -0,0 +1,15 @@ +0 1 1 10 10.0 +0 1 2 10 10.0 +0 1 1 20 10.0 +0 1 2 20 10.0 +0 1 3 10 10.0 +0 1 3 20 10.0 +0 1 3 30 10.0 +0 1 4 40 1.0 +0 1 4 40 2.0 +0 1 4 40 3.0 +0 0 5 50 10.0 +0 0 5 50 10.0 +0 0 5 50 10.0 +0 0.0 +1 0.0 diff --git a/gfsm/gfsm/src/programs/symtest.lab b/gfsm/gfsm/src/programs/symtest.lab new file mode 100644 index 0000000..4d677d5 --- /dev/null +++ b/gfsm/gfsm/src/programs/symtest.lab @@ -0,0 +1,15 @@ +<epsilon> 0 +a 1 +b 2 +c 3 +A 4 +B 5 +C 6 +acc 7 +dat 8 +sg 9 +pl 10 +_NOUN 11 +_Case 12 +_Number 13 +_VERB 14 diff --git a/gfsm/gfsm/src/programs/symtest.sym b/gfsm/gfsm/src/programs/symtest.sym new file mode 100644 index 0000000..f151932 --- /dev/null +++ b/gfsm/gfsm/src/programs/symtest.sym @@ -0,0 +1,8 @@ +Lower a b c +Upper A B C + +Case acc dat +Number sg pl + +Category: NOUN Case Number +Category: VERB Number diff --git a/gfsm/gfsm/src/programs/test+lab.tfst b/gfsm/gfsm/src/programs/test+lab.tfst new file mode 100644 index 0000000..252d6a6 --- /dev/null +++ b/gfsm/gfsm/src/programs/test+lab.tfst @@ -0,0 +1,3 @@ +0 1 a b 0 +1 2 c d 0 +2 0 diff --git a/gfsm/gfsm/src/programs/test-rn.tfst b/gfsm/gfsm/src/programs/test-rn.tfst new file mode 100644 index 0000000..694ddcd --- /dev/null +++ b/gfsm/gfsm/src/programs/test-rn.tfst @@ -0,0 +1,8 @@ +0 1 1 1 0 +0 0 +1 0 +2 0 +3 0 +4 +5 +6 diff --git a/gfsm/gfsm/src/programs/test.lab b/gfsm/gfsm/src/programs/test.lab new file mode 100644 index 0000000..b7c05ae --- /dev/null +++ b/gfsm/gfsm/src/programs/test.lab @@ -0,0 +1,15 @@ +<eps> 0 +a 1 +b 2 +c 3 +d 4 +e 5 +f 6 +fourtwenty 420 +twentyfour 24 +seven 7 +( 10 +) 11 +FOO 12 +BAR 13 +BAZ 14 diff --git a/gfsm/gfsm/src/programs/test.tfst b/gfsm/gfsm/src/programs/test.tfst new file mode 100644 index 0000000..02acee3 --- /dev/null +++ b/gfsm/gfsm/src/programs/test.tfst @@ -0,0 +1,3 @@ +0 1 420 24 0.7 +1 2 0 0 99 +2 diff --git a/gfsm/gfsm/src/programs/test2.tfst b/gfsm/gfsm/src/programs/test2.tfst new file mode 100644 index 0000000..37fe96a --- /dev/null +++ b/gfsm/gfsm/src/programs/test2.tfst @@ -0,0 +1,7 @@ +0 1 1 3 .042 +0 2 2 2 .24 +0 3 3 1 .07 +1 +2 +3 + diff --git a/gfsm/gfsm/src/programs/test3.tfst b/gfsm/gfsm/src/programs/test3.tfst new file mode 100644 index 0000000..669bfc9 --- /dev/null +++ b/gfsm/gfsm/src/programs/test3.tfst @@ -0,0 +1,6 @@ +0 1 1 1 100 +0 4 4 4 400 +1 2 2 2 200 +2 3 3 3 300 +3 0 +4 0 diff --git a/gfsm/gfsm/src/programs/test4.tfst b/gfsm/gfsm/src/programs/test4.tfst new file mode 100644 index 0000000..44d5c9e --- /dev/null +++ b/gfsm/gfsm/src/programs/test4.tfst @@ -0,0 +1,9 @@ +0 1 0 0 0 +0 1 0 0 1 +0 1 0 1 0 +0 1 0 1 1 +0 1 1 0 0 +0 1 1 0 1 +0 1 1 1 0 +0 1 1 1 1 +1 diff --git a/gfsm/gfsm/src/programs/test4b.tfst b/gfsm/gfsm/src/programs/test4b.tfst new file mode 100644 index 0000000..8890ba6 --- /dev/null +++ b/gfsm/gfsm/src/programs/test4b.tfst @@ -0,0 +1,9 @@ +0 1 1 1 1 +0 1 1 1 0 +0 1 1 0 1 +0 1 1 0 0 +0 1 0 1 1 +0 1 0 1 0 +0 1 0 0 1 +0 1 0 0 0 +1
\ No newline at end of file diff --git a/gfsm/gfsm/src/programs/test5.tfst b/gfsm/gfsm/src/programs/test5.tfst new file mode 100644 index 0000000..ac8c248 --- /dev/null +++ b/gfsm/gfsm/src/programs/test5.tfst @@ -0,0 +1,3 @@ +0 1 1 1 0 +1 2 2 2 0 +2 0 diff --git a/gfsm/gfsm/src/programs/test6.tfst b/gfsm/gfsm/src/programs/test6.tfst new file mode 100644 index 0000000..4c306da --- /dev/null +++ b/gfsm/gfsm/src/programs/test6.tfst @@ -0,0 +1,12 @@ +0 1 0 0 0 +0 2 0 0 0 +0 3 0 0 0 +3 4 0 0 0 +1 10 1 1 0 +2 20 2 2 0 +4 5 4 4 0 +4 1 4 4 1 +10 +20 +5 + diff --git a/gfsm/gfsm/src/programs/test7.lab b/gfsm/gfsm/src/programs/test7.lab new file mode 100644 index 0000000..0cedd17 --- /dev/null +++ b/gfsm/gfsm/src/programs/test7.lab @@ -0,0 +1,4 @@ +<eps> 0 +a 1 +b 2 +c 3 diff --git a/gfsm/gfsm/src/programs/test7.tfst b/gfsm/gfsm/src/programs/test7.tfst new file mode 100644 index 0000000..c96f7e6 --- /dev/null +++ b/gfsm/gfsm/src/programs/test7.tfst @@ -0,0 +1,2 @@ +0 1 1 1 0 +1 0 diff --git a/gfsm/gfsm/src/programs/test8.tfst b/gfsm/gfsm/src/programs/test8.tfst new file mode 100644 index 0000000..9009985 --- /dev/null +++ b/gfsm/gfsm/src/programs/test8.tfst @@ -0,0 +1,4 @@ +0 1 1 1 0 +0 2 2 2 0 +1 0 +2 0 diff --git a/gfsm/gfsm/src/programs/test9.tfst b/gfsm/gfsm/src/programs/test9.tfst new file mode 100644 index 0000000..47867d2 --- /dev/null +++ b/gfsm/gfsm/src/programs/test9.tfst @@ -0,0 +1,4 @@ +0 0 +1 0 +2 0 +3 0 diff --git a/gfsm/gfsm/src/programs/testa.tfsa b/gfsm/gfsm/src/programs/testa.tfsa new file mode 100644 index 0000000..f2926c5 --- /dev/null +++ b/gfsm/gfsm/src/programs/testa.tfsa @@ -0,0 +1,6 @@ +0 0 0 +0 1 1 10 +0 +1 2 2 20 +1 100 +2 200 diff --git a/gfsm/gfsm/src/programs/testa.tfst b/gfsm/gfsm/src/programs/testa.tfst new file mode 100644 index 0000000..1078be4 --- /dev/null +++ b/gfsm/gfsm/src/programs/testa.tfst @@ -0,0 +1,6 @@ +0 0 0 +0 1 1 1 +0 +1 2 2 2 20 +1 100 +2 200 diff --git a/gfsm/gfsm/src/programs/testrn.tfst b/gfsm/gfsm/src/programs/testrn.tfst new file mode 100644 index 0000000..0bcf201 --- /dev/null +++ b/gfsm/gfsm/src/programs/testrn.tfst @@ -0,0 +1,3 @@ +300 100 1 1 +100 200 2 2 +200 0 diff --git a/gfsm/gfsm/src/programs/vit-in.lab b/gfsm/gfsm/src/programs/vit-in.lab new file mode 100644 index 0000000..5c74e25 --- /dev/null +++ b/gfsm/gfsm/src/programs/vit-in.lab @@ -0,0 +1,3 @@ +<eps> 0 +a 1 +b 2 diff --git a/gfsm/gfsm/src/programs/vit-out.lab b/gfsm/gfsm/src/programs/vit-out.lab new file mode 100644 index 0000000..bd1493c --- /dev/null +++ b/gfsm/gfsm/src/programs/vit-out.lab @@ -0,0 +1,7 @@ +<eps> 0 +a_A 1 +a_B 2 +b_A 3 +b_B 4 +<eps>_A 5 +<eps>_B 6 diff --git a/gfsm/gfsm/src/programs/vit-q.lab b/gfsm/gfsm/src/programs/vit-q.lab new file mode 100644 index 0000000..0fbd83b --- /dev/null +++ b/gfsm/gfsm/src/programs/vit-q.lab @@ -0,0 +1,3 @@ +<BOS> 0 +A 1 +B 2 diff --git a/gfsm/gfsm/src/programs/vit.tfst b/gfsm/gfsm/src/programs/vit.tfst new file mode 100644 index 0000000..e92ec55 --- /dev/null +++ b/gfsm/gfsm/src/programs/vit.tfst @@ -0,0 +1,17 @@ +<BOS> A a a_A 1 +<BOS> A b b_A 1 +<BOS> B a a_B 1 +<BOS> B b b_B 1 +<BOS> 1 + +A A a a_A .4 +A A b b_A .3 +A B a a_B .2 +A B b b_B .1 +A 1 + +B B b b_B .4 +B A a a_A .3 +B A b b_A .2 +B B a a_B .1 +B 1 diff --git a/gfsm/gfsm/src/programs/vit2.tfst b/gfsm/gfsm/src/programs/vit2.tfst new file mode 100644 index 0000000..879b3a1 --- /dev/null +++ b/gfsm/gfsm/src/programs/vit2.tfst @@ -0,0 +1,15 @@ +<BOS> A <eps> <eps>_A .4 +<BOS> A a a_A .3 +<BOS> B <eps> <eps>_B .2 +<BOS> B b b_B .1 +<BOS> 1 + +A A a a_A .7 +A B <eps> <eps>_B .2 +A B b b_B .1 +A 1 + +B B b b_B .7 +B A a a_A .2 +B A <eps> <eps>_A .1 +B 1 |