aboutsummaryrefslogtreecommitdiff
path: root/src/include/gnunet_regex_lib.h
blob: 53e7c13e066168007151651e800eb3d46381a888 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/*
     This file is part of GNUnet
     (C) 2012 Christian Grothoff (and other contributing authors)

     GNUnet is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published
     by the Free Software Foundation; either version 3, or (at your
     option) any later version.

     GNUnet is distributed in the hope that it will be useful, but
     WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with GNUnet; see the file COPYING.  If not, write to the
     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     Boston, MA 02111-1307, USA.
*/
/**
 * @file include/gnunet_regex_lib.h
 * @brief library to parse regular expressions into dfa
 * @author Maximilian Szengel
 *
 */

#ifndef GNUNET_REGEX_LIB_H
#define GNUNET_REGEX_LIB_H

#include "gnunet_util_lib.h"

#ifdef __cplusplus
extern "C"
{
#if 0                           /* keep Emacsens' auto-indent happy */
}
#endif
#endif

/**
 * Automaton (NFA/DFA) representation.
 */
struct GNUNET_REGEX_Automaton;


/**
 * Edge representation.
 */
struct GNUNET_REGEX_Edge
{
  /**
   * Label of the edge.  FIXME: might want to not consume exactly multiples of 8 bits, need length?
   */
  const char *label;

  /**
   * Destionation of the edge.
   */
  struct GNUNET_HashCode destination;
};


/**
 * Construct an NFA by parsing the regex string of length 'len'.
 *
 * @param regex regular expression string.
 * @param len length of the string.
 *
 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton.
 */
struct GNUNET_REGEX_Automaton *
GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);


/**
 * Construct DFA for the given 'regex' of length 'len'.
 *
 * @param regex regular expression string.
 * @param len length of the regular expression.
 *
 * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton.
 */
struct GNUNET_REGEX_Automaton *
GNUNET_REGEX_construct_dfa (const char *regex, const size_t len);


/**
 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton.
 * data structure.
 *
 * @param a automaton to be destroyed.
 */
void
GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a);


/**
 * Options for graph creation function
 * GNUNET_REGEX_automaton_save_graph.
 */

enum GNUNET_REGEX_GraphSavingOptions
{
  /**
   * Default. Do nothing special.
   */
  GNUNET_REGEX_GRAPH_DEFAULT = 0,

  /**
   * The generated graph will include extra information such as the NFA states
   * that were used to generate the DFA state.
   */
  GNUNET_REGEX_GRAPH_VERBOSE = 1,

  /**
   * Enable graph coloring. Will color each SCC in a different color.
   */
  GNUNET_REGEX_GRAPH_COLORING = 2
};


/**
 * Save the given automaton as a GraphViz dot file.
 *
 * @param a the automaton to be saved.
 * @param filename where to save the file.
 * @param options options for graph generation that include coloring or verbose
 *                mode
 */
void
GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
                                   const char *filename,
                                   enum GNUNET_REGEX_GraphSavingOptions options);


/**
 * Evaluates the given 'string' against the given compiled regex.
 *
 * @param a automaton.
 * @param string string to check.
 *
 * @return 0 if string matches, non 0 otherwise.
 */
int
GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a,
                   const char *string);


/**
 * Get the first key for the given 'input_string'. This hashes
 * the first x bits of the 'input_string'.
 *
 * @param input_string string.
 * @param string_len length of the 'input_string'.
 * @param key pointer to where to write the hash code.
 *
 * @return number of bits of 'input_string' that have been consumed
 *         to construct the key
 */
size_t
GNUNET_REGEX_get_first_key (const char *input_string, size_t string_len,
                            struct GNUNET_HashCode * key);


/**
 * Check if the given 'proof' matches the given 'key'.
 *
 * @param proof partial regex of a state.
 * @param key hash of a state.
 *
 * @return GNUNET_OK if the proof is valid for the given key.
 */
int
GNUNET_REGEX_check_proof (const char *proof,
                          const struct GNUNET_HashCode *key);


/**
 * Iterator callback function.
 *
 * @param cls closure.
 * @param key hash for current state.
 * @param proof proof for current state.
 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
 * @param num_edges number of edges leaving current state.
 * @param edges edges leaving current state.
 */
typedef void (*GNUNET_REGEX_KeyIterator)(void *cls,
                                         const struct GNUNET_HashCode *key,
                                         const char *proof,
                                         int accepting,
                                         unsigned int num_edges,
                                         const struct GNUNET_REGEX_Edge *edges);


/**
 * Iterate over all edges starting from start state of automaton 'a'. Calling
 * iterator for each edge.
 *
 * @param a automaton.
 * @param iterator iterator called for each edge.
 * @param iterator_cls closure.
 */
void
GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
                                GNUNET_REGEX_KeyIterator iterator,
                                void *iterator_cls);


#if 0                           /* keep Emacsens' auto-indent happy */
{
#endif
#ifdef __cplusplus
}
#endif

/* end of gnunet_regex_lib.h */
#endif