aboutsummaryrefslogtreecommitdiff
path: root/gfsm/gfsm/src/libgfsm/gfsmAlgebra.h
blob: 301bec1e65da8bfb5e49db37b437d3038a15fd45 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559

/*=============================================================================*\
 * File: gfsmAlgebra.h
 * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
 * Description: finite state machine library
 *
 * Copyright (c) 2004-2007 Bryan Jurish.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *=============================================================================*/

/** \file gfsmAlgebra.h
 *  \brief Algebraic operations on automata
 */

#ifndef _GFSM_ALGEBRA_H
#define _GFSM_ALGEBRA_H

#include <gfsmStateSet.h>
#include <gfsmCompound.h>
#include <gfsmArcIter.h>
#include <gfsmArcIndex.h>

/*======================================================================
 * Methods: algebra
 */

//------------------------------
///\name Closure (self-concatenation)
//@{
/** Compute transitive or reflexive-\&-transitive
 *  closure of \a fsm. 
 *  \note Destructively alters \a fsm .
 *
 *  \param fsm Automaton
 *  \param is_plus Which type of closure to compute:
 *   \arg TRUE transitive closure
 *   \arg FALSE reflexive \& transitive closure
 *  \returns modified \a fsm
 */
gfsmAutomaton *gfsm_automaton_closure(gfsmAutomaton *fsm, gboolean is_plus);

/* Final-state pre-traversal utility for \c closure(fsm). */
//gboolean gfsm_automaton_closure_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm);

/** Compute \a n-ary closure of \a fsm.
 *  \note Destructively alters \a fsm.
 *
 *  \param fsm Automaton
 *  \param n Number of repetitions
 *  \returns modified \a fsm
 */
gfsmAutomaton *gfsm_automaton_n_closure(gfsmAutomaton *fsm, guint n);
//@}


//------------------------------
///\name Complementation and Completion
//@{
/**
 * Compute the lower-side complement of \a fsm with respect to its own lower alphabet.
 * \note Destructively alters \a fsm
 *
 * \param fsm Acceptor
 * \returns \a fsm.
 */
gfsmAutomaton *gfsm_automaton_complement(gfsmAutomaton *fsm);

/**
 * Compute the lower-side complement of \a fsm with respect to the alphabet \a alph,
 * which should contain all of the lower-labels from \a fsm.
 * \note Destructively alters \a fsm.
 *
 * \param fsm Acceptor
 * \param alph Alphabet with respect to which to compute complement.
 * \returns \a fsm
 */
gfsmAutomaton *gfsm_automaton_complement_full(gfsmAutomaton *fsm, gfsmAlphabet *alph);

/**
 * Complete the lower side of automaton \a fsm with respect to the alphabet \a alph
 * by directing "missing" arcs to the (new) state with id \a *sink.
 * \note Destructively alters \a fsm.
 *
 * \param fsm Acceptor
 * \param alpha Alphabet with respect to which \a fsm is to be completed
 * \param sinkp Pointer to a variable which on completion contains the Id of a (new) non-final sink state
 * \returns \a fsm
 */
gfsmAutomaton *gfsm_automaton_complete(gfsmAutomaton    *fsm,
				       gfsmAlphabet     *alph,
				       gfsmStateId      *sinkp);
//@}

//------------------------------
///\name Composition
//@{

/** Compute the composition of \a fsm1 with \a fsm2
 *  (upper-side of \a fsm1 intersection with lower-side of \a fsm2).
 *
 *  \param fsm1 Lower-middle transducer
 *  \param fsm2 Middle-upper transducer
 *  \returns Altered \a fsm1
 *
 *  \note
 *    \li Pseudo-destructive on \a fsm1.
 *    \li Runtime efficiency can be greatly improved if
 *        \a fsm1 is sorted on upper labels (::gfsmASMUpper)
 *        and \a fsm2 is sorted on lower labels (::gfsmASMLower).
 *
 *  \sa gfsm_automaton_compose_full()
 */
gfsmAutomaton *gfsm_automaton_compose(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2);


/** Compute the composition of two transducers \a fsm1 and \a fsm2 
 *  into the transducer \a composition.
 *
 *  \param fsm1 Lower-middle transducer
 *  \param fsm2 Middle-upper transducer
 *  \param composition Lower-upper transducer.  May be passed as NULL to create a new automaton.
 *  \param spenum
 *    Mapping from (\a fsm1,\a fsm2,\a filter) ::gfsmComposeState s to \a composition (::gfsmStateId)s,
 *    if it is passed as \a NULL, a temporary enum will be created and freed.
 *
 *  \sa Mohri, Pereira, and Riley (1996) "Weighted Automata in Text and Speech Processing",
 *      Proc. ECAI '96, John Wiley & Sons, Ltd.
 *
 *  \returns \a composition
 */
gfsmAutomaton *gfsm_automaton_compose_full(gfsmAutomaton *fsm1,
					   gfsmAutomaton *fsm2,
					   gfsmAutomaton *composition,
					   gfsmComposeStateEnum *spenum);

typedef guint32 gfsmComposeFlags; /**< flags for gfsm_automaton_compose_visit_state_() */

/** \brief Enum type for low-level flags to gfsm_automaton_compose_visit_state_() */
typedef enum {
  gfsmCFEfsm1NeedsArcSort = 0x1,
  gfsmCFEfsm2NeedsArcSort = 0x2
} gfsmComposeFlagsE;

/** Guts for gfsm_automaton_compose() \returns (new) StateId for \a sp */
gfsmStateId gfsm_automaton_compose_visit_(gfsmComposeState  sp,
					  gfsmAutomaton    *fsm1,
					  gfsmAutomaton    *fsm2,
					  gfsmAutomaton    *fsm,
					  gfsmComposeStateEnum *spenum,
					  gfsmComposeFlags  flags);
//@}

//------------------------------
///\name Concatenation

/** Append \a fsm2 onto the end of \a fsm1 \a n times.
 *  \note Destructively alters \a fsm1.
 *
 * \param fsm1 Automaton
 * \param fsm2 Automaton
 * \returns \a fsm1
 */
gfsmAutomaton *gfsm_automaton_n_concat(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2, guint n);

/** Append \a _fsm2 onto the end of \a fsm1.
 *  \note Destructively alters \a fsm1.
 *
 * \param fsm1 Automaton
 * \param fsm2 Automaton
 * \returns \a fsm1
 */
gfsmAutomaton *gfsm_automaton_concat(gfsmAutomaton *fsm1, gfsmAutomaton *_fsm2);

/* Final-state pre-traversal utility for \a concat(fsm,fsm2).
 *
 *  \note Assumes \a fsm->root_id has been temporarily set to the translated gfsmStateId
 *        of \a fsm2->root_id.
 *
 *  \param pw  final ::gfsmWeight encoded as a gpointer (e.g. with gfsm_weight2ptr())
 *  \param fsm concatenation first argument / return value
 *  \returns FALSE
 */
//gboolean gfsm_automaton_concat_final_func_(gfsmStateId id, gpointer pw, gfsmAutomaton *fsm);

//@}

//------------------------------
///\name Co-Accessibility & Pruning
//@{

/**
 * Remove non-coaccessible states from \a fsm.
 * Calls gfsm_automaton_connect_fw() and gfsm_automaton_connect_bw()
 * \note Destructively alters \a fsm
 *
 * \param fsm Automaton
 * \returns modified \a fsm
 */
gfsmAutomaton *gfsm_automaton_connect(gfsmAutomaton *fsm);

//------------------------------
/**
 * Remove non root-accessible states from \a fsm.
 * Called by gfsm_automaton_connect()
 *
 * \note Destructively alters \a fsm
 *
 * \param fsm Automaton
 * \param visited
 *   Bit-vector for traversal.  Should have all bits set to zero.
 *   If passed as NULL, a new bit-vector will be created and freed.
 * \returns modified \a fsm
 */
gfsmAutomaton *gfsm_automaton_connect_fw(gfsmAutomaton *fsm, gfsmBitVector *visited);

//------------------------------
/**
 * Remove non-finalizable states from \a fsm.
 * Called by connect().
 * \note Destructively alters \a fsm
 *
 * \param fsm Automaton
 * \param rarcs
 *   Reverse arc-index as returned by gfsm_automaton_reverse_arc_index().
 *   If passed as NULL, gfsm_automaton_reverse_arc_index() will be called
 *   on a temporary arc index.
 * \param finalizable
 *   Bit-vector for traversal.  Should have all bits set to zero.
 *   If passed as NULL, a new bit-vector will be created and freed.
 * \returns modified \a fsm
 */
gfsmAutomaton *gfsm_automaton_connect_bw(gfsmAutomaton       *fsm,
					 gfsmReverseArcIndex *rarcs,
					 gfsmBitVector       *finalizable);

//------------------------------
/**
 * Utility for gfsm_automaton_connect_fw() and gfsm_automaton_connect_bw().
 * Prunes states from \a fsm whose id bit is not set in \a want
 *
 * \param fsm    Automaton
 * \param wanted
 *   Bit-vector indexed by state id: bit \a id should be unset
 *   iff state \a id is to be removed.
 * \returns modified \a fsm
 */
gfsmAutomaton *gfsm_automaton_prune_states(gfsmAutomaton *fsm, gfsmBitVector *wanted);

//@}

//------------------------------
///\name Determinization
//@{

/** Utility for \a gfsm_automaton_determinize(). */
void gfsm_determinize_visit_state_(gfsmAutomaton *nfa,    gfsmAutomaton *dfa,
				   gfsmStateSet  *nfa_ec, gfsmStateId    dfa_id,
				   gfsmEnum      *ec2id);

/** Determinize acceptor \a fsm
 *
 *  - Pseudo-destructive on \a fsm
 *  - Epsilon is treated like any other symbol.
 *  - Arc labels are treated as (input,output) pairs for purposes
 *    of state-equivalence-class construction: this may not be what you want.
 *  .
 *
 *  \param fsm Automaton
 *  \returns altered \a fsm
 *
 *  \sa gfsm_automaton_determinize_full()
 */
gfsmAutomaton *gfsm_automaton_determinize(gfsmAutomaton *fsm);

/** Alias for gfsm_automaton_determinize() */
#define gfsm_automaton_determinise(fsm) gfsm_automaton_determinize(fsm)

/** Alias for gfsm_automaton_determinize_full() */
#define gfsm_automaton_determinise_full(nfa,dfa) gfsm_automaton_determinize_full((nfa),(dfa))

/** Determinize automaton \a nfa to \a dfa.
 *  - Epsilon is treated like any other symbol.
 *  - Arc labels are treated as (input,output) pairs.
 *  .
 *
 *  \param nfa non-deterministic acceptor 
 *  \param dfa deterministic acceptor to be constructed
 *             May be passed as \c NULL to create a new automaton.
 *  \returns \a dfa
 */
gfsmAutomaton *gfsm_automaton_determinize_full(gfsmAutomaton *nfa, gfsmAutomaton *dfa);

//@}

//------------------------------
///\name Difference
//@{

/** Remove language of acceptor \a fsm2 from acceptor \a fsm1.
 *
 *  - Pseudo-destructively alters \a fsm1.
 *  - Really just an alias for intersect_full(fsm1,fsm2,NULL)
 *  .
 *
 *  \param fsm1 Acceptor
 *  \param fsm2 Acceptor
 *  \returns \a fsm1
 */
gfsmAutomaton *gfsm_automaton_difference(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2);

/** Compute difference of acceptors (\a fsm1 - \a fsm2) into acceptor \a diff-
 *
 *  \note Really just an alias for intersect_full(fsm1,complement(clone(fsm2)),diff).
 *
 *  \param fsm1 Acceptor
 *  \param fsm2 Acceptor
 *  \param diff Output (difference) acceptor,
 *              may be passed as \c NULL to implicitly create a new automaton.
 *
 *  \returns (possibly new) difference automaton \a diff
 */
gfsmAutomaton *gfsm_automaton_difference_full(gfsmAutomaton *fsm1,
					      gfsmAutomaton *fsm2,
					      gfsmAutomaton *diff);

//@}

//------------------------------
///\name Intersection
//@{
/** Compute the intersection of two acceptors \a fsm1 and \a fsm2 (lower-side intersection).
 *  \note Pseudo-destructive on \a fsm1.
 *
 *  \param fsm1 Acceptor
 *  \param fsm2 Acceptor
 *  \returns \a fsm1.
 */
gfsmAutomaton *gfsm_automaton_intersect(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2);

/** Compute the intersection of two acceptors \a fsm1 and \a fsm2 
 *  into the acceptor \a intersect.
 *
 *  \param fsm1 Acceptor
 *  \param fsm2 Acceptor
 *  \param intersect Output acceptor, may be \c NULL to create a new automaton.
 *  \param spenum Mapping from (\a fsm1,\a fsm2) (::gfsmStatePair)s to \a intersect (::gfsmStateId)s,
 *                may be NULL to use a temporary mapping.
 *  \returns \a intersect.
 */
gfsmAutomaton *gfsm_automaton_intersect_full(gfsmAutomaton *fsm1,
					     gfsmAutomaton *fsm2,
					     gfsmAutomaton *intersect,
					     gfsmStatePairEnum *spenum);

/** Guts for gfsm_automaton_intersect()
 *  \returns (new) ::gfsmStateId for \a sp
 */
gfsmStateId gfsm_automaton_intersect_visit_(gfsmStatePair  sp,
					    gfsmAutomaton *fsm1,
					    gfsmAutomaton *fsm2,
					    gfsmAutomaton *fsm,
					    gfsmStatePairEnum *spenum,
					    gfsmComposeFlags   flags);
//@}


//------------------------------
///\name Inversion & Projection
//@{
/** Invert upper and lower labels of an automaton.
 *  \note Destructively alters \a fsm.
 *
 *  \param fsm Transducer
 *  \returns \a fsm
 */
gfsmAutomaton *gfsm_automaton_invert(gfsmAutomaton *fsm);

//------------------------------
/** Project one "side" (lower or upper) of \a fsm
 *  \note Destructively alters \a fsm
 *
 *  \param fsm Transducer
 *  \param which Which label side to project.
 *    \arg gfsmLSLower project lower side
 *    \arg gfsmLSUpper project upper side (default)
 *  \returns modified \a fsm
 */
gfsmAutomaton *gfsm_automaton_project(gfsmAutomaton *fsm, gfsmLabelSide which);

//@}

//------------------------------
///\name Optionality
//@{
/** Make \a fsm optional.
 *  \note Destructively alters \a fsm
 *
 *  \param fsm Automaton
 *  \returns \a modified fsm
 */
gfsmAutomaton *gfsm_automaton_optional(gfsmAutomaton *fsm);

//@}

//------------------------------
///\name Cartesian Product
//@{
/** Compute Cartesian product of acceptors \a fsm1 and \a fsm2.
 *  \note Destructively alters \a fsm1.
 *
 *  \param fsm1 Acceptor (lower)
 *  \param fsm2 Acceptor (upper)
 *  \returns \a fsm1 (transducer)
 *
 *  \sa gfsm_automaton_product2()
 */
gfsmAutomaton *gfsm_automaton_product(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2);

/** Compute Cartesian product of acceptors \a fsm1 and \a fsm2.
 *  \note Destructively alters \a fsm1 \b and \a fsm2.
 *
 *  \param fsm1 Acceptor (lower)
 *  \param fsm2 Acceptor (upper)
 *  \returns \a fsm1 
 */
gfsmAutomaton *gfsm_automaton_product2(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2);

/* Backwards-compatible alias for gfsm_automaton_product2() [DISABLED] */
//#define _gfsm_automaton_product gfsm_automaton_product2

//@}


//------------------------------
///\name Replacement
//@{
/** Replace label-pair \a (lo,hi) with \a fsm2 in \a fsm1 .
 *  \note Destructively alters \a fsm.
 *
 *  \param fsm1 Automaton
 *  \param lo   lower label or gfsmNoLabel to ignore lower labels
 *  \param hi   upper label or gfsmNoLabel to ignore upper labels
 *  \returns modified \a fsm1
 */
gfsmAutomaton *gfsm_automaton_replace(gfsmAutomaton *fsm1, gfsmLabelVal lo, gfsmLabelVal hi, gfsmAutomaton *fsm2);

/** Insert automaton \a fsm2 into \a fsm1 between states \a q1from and \a q1to with weight \a w.
 *  \note Destructively alters \a fsm1.
 *
 *  \param fsm1 Automaton into which \a fsm2 is inserted
 *  \param fsm2 Automaton to be inserted
 *  \param q1from Source state for inserted automaton in \a fsm1
 *  \param q1to   Final state for inserted automaton in \a fsm1
 *  \param w    Weight to add to final arcs for translated \a fsm2 in \a fsm1
 *  \returns modified \a fsm1
 */
gfsmAutomaton *gfsm_automaton_insert_automaton(gfsmAutomaton *fsm1,
					       gfsmStateId    q1from,
					       gfsmStateId    q1to,
					       gfsmAutomaton *fsm2,
					       gfsmWeight     w);

//@}


//------------------------------
///\name Reversal
//@{
/** Reverse an automaton \a fsm.
 *  \note Destructively alters \a fsm.
 *
 *  \param fsm Automaton
 *  \returns \a fsm
 */
gfsmAutomaton *gfsm_automaton_reverse(gfsmAutomaton *fsm);

//@}

//------------------------------
///\name Epsilon Removal
//@{
/**
 * Remove epsilon arcs from \a fsm.
 * - Destructively alters \a fsm.
 * - Multiple epsilon-paths between two states may not be weighted correctly in the output automaton.
 * .
 *
 * \warning negative-cost epsilon cycles in \a fsm will cause infinite recursion!
 *
 * \param fsm Automaton
 * \returns \a fsm
 */
gfsmAutomaton *gfsm_automaton_rmepsilon(gfsmAutomaton *fsm);

/** Pass-1 guts for gfsm_automaton_rmepsilon(): populates the mapping \a sp2wh
 *  with state-pairs (qid_noeps,qid_eps)=>weight for all
 *  \a qid_eps epsilon-reachable from \a qid_noeps in \a fsm
 */
void gfsm_automaton_rmeps_visit_state_(gfsmAutomaton *fsm,
				       gfsmStateId qid_noeps,
				       gfsmStateId qid_eps,
				       gfsmWeight weight_eps,
				       gfsmStatePair2WeightHash *sp2wh
				       );

/* Pass-2 for gfsm_automaton_rmepsilon(): arc-adoption iterator */
//void gfsm_automaton_rmeps_pass2_foreach_func_(gfsmStatePair *sp, gpointer pw, gfsmAutomaton *fsm);
//@}

//------------------------------
///\name Alphabet Recognizer
//@{
/**
 * Make \a fsm an identity-transducer for alphabet \a abet
 * \note Destructively alters \a fsm.
 *
 * \param fsm Automaton
 * \returns \a fsm
 */
gfsmAutomaton *gfsm_automaton_sigma(gfsmAutomaton *fsm, gfsmAlphabet *abet);
//@}

//------------------------------
///\name Union
//@{
/** Add the language or relation of \a fsm2 to \a fsm1.
 *  \note Destructively alters \a fsm1
 *
 *  \param fsm1 Automaton
 *  \param fsm2 Automaton
 *  \returns \a fsm1
 */
gfsmAutomaton *gfsm_automaton_union(gfsmAutomaton *fsm1, gfsmAutomaton *fsm2);
//@}

/** \file gfsmAlgebra.h
 *  \todo sigma()
 *  \todo bestpath()
 *  \todo encode() ?
 *  \todo equiv() ?
 *  \todo minimize() ?
 *  \todo Regex compiler
 *  \todo deterministic union, tries
 */

#endif /* _GFSM_ALGEBRA_H */