]>
Commit | Line | Data |
---|---|---|
2de4ff7b TG |
1 | /* |
2 | * lib/textsearch.c Generic text search interface | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Thomas Graf <[email protected]> | |
e03ba84a | 10 | * Pablo Neira Ayuso <[email protected]> |
2de4ff7b TG |
11 | * |
12 | * ========================================================================== | |
5968a70d RD |
13 | */ |
14 | ||
15 | /** | |
16 | * DOC: ts_intro | |
2de4ff7b TG |
17 | * INTRODUCTION |
18 | * | |
de0368d5 | 19 | * The textsearch infrastructure provides text searching facilities for |
2de4ff7b TG |
20 | * both linear and non-linear data. Individual search algorithms are |
21 | * implemented in modules and chosen by the user. | |
22 | * | |
23 | * ARCHITECTURE | |
24 | * | |
5968a70d RD |
25 | * .. code-block:: none |
26 | * | |
27 | * User | |
2de4ff7b TG |
28 | * +----------------+ |
29 | * | finish()|<--------------(6)-----------------+ | |
30 | * |get_next_block()|<--------------(5)---------------+ | | |
31 | * | | Algorithm | | | |
32 | * | | +------------------------------+ | |
33 | * | | | init() find() destroy() | | |
34 | * | | +------------------------------+ | |
35 | * | | Core API ^ ^ ^ | |
36 | * | | +---------------+ (2) (4) (8) | |
37 | * | (1)|----->| prepare() |---+ | | | |
38 | * | (3)|----->| find()/next() |-----------+ | | |
39 | * | (7)|----->| destroy() |----------------------+ | |
40 | * +----------------+ +---------------+ | |
5968a70d RD |
41 | * |
42 | * (1) User configures a search by calling textsearch_prepare() specifying | |
43 | * the search parameters such as the pattern and algorithm name. | |
2de4ff7b TG |
44 | * (2) Core requests the algorithm to allocate and initialize a search |
45 | * configuration according to the specified parameters. | |
5968a70d RD |
46 | * (3) User starts the search(es) by calling textsearch_find() or |
47 | * textsearch_next() to fetch subsequent occurrences. A state variable | |
48 | * is provided to the algorithm to store persistent variables. | |
2de4ff7b TG |
49 | * (4) Core eventually resets the search offset and forwards the find() |
50 | * request to the algorithm. | |
de0368d5 | 51 | * (5) Algorithm calls get_next_block() provided by the user continuously |
2de4ff7b TG |
52 | * to fetch the data to be searched in block by block. |
53 | * (6) Algorithm invokes finish() after the last call to get_next_block | |
54 | * to clean up any leftovers from get_next_block. (Optional) | |
5968a70d | 55 | * (7) User destroys the configuration by calling textsearch_destroy(). |
2de4ff7b TG |
56 | * (8) Core notifies the algorithm to destroy algorithm specific |
57 | * allocations. (Optional) | |
58 | * | |
59 | * USAGE | |
60 | * | |
61 | * Before a search can be performed, a configuration must be created | |
b9c79678 JP |
62 | * by calling textsearch_prepare() specifying the searching algorithm, |
63 | * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE | |
64 | * to perform case insensitive matching. But it might slow down | |
65 | * performance of algorithm, so you should use it at own your risk. | |
de0368d5 | 66 | * The returned configuration may then be used for an arbitrary |
b9c79678 JP |
67 | * amount of times and even in parallel as long as a separate struct |
68 | * ts_state variable is provided to every instance. | |
2de4ff7b | 69 | * |
5968a70d RD |
70 | * The actual search is performed by either calling |
71 | * textsearch_find_continuous() for linear data or by providing | |
72 | * an own get_next_block() implementation and | |
73 | * calling textsearch_find(). Both functions return | |
de0368d5 JDB |
74 | * the position of the first occurrence of the pattern or UINT_MAX if |
75 | * no match was found. Subsequent occurrences can be found by calling | |
2de4ff7b TG |
76 | * textsearch_next() regardless of the linearity of the data. |
77 | * | |
78 | * Once you're done using a configuration it must be given back via | |
79 | * textsearch_destroy. | |
80 | * | |
5968a70d | 81 | * EXAMPLE:: |
2de4ff7b TG |
82 | * |
83 | * int pos; | |
84 | * struct ts_config *conf; | |
85 | * struct ts_state state; | |
86 | * const char *pattern = "chicken"; | |
87 | * const char *example = "We dance the funky chicken"; | |
88 | * | |
89 | * conf = textsearch_prepare("kmp", pattern, strlen(pattern), | |
90 | * GFP_KERNEL, TS_AUTOLOAD); | |
91 | * if (IS_ERR(conf)) { | |
92 | * err = PTR_ERR(conf); | |
93 | * goto errout; | |
94 | * } | |
95 | * | |
5968a70d | 96 | * pos = textsearch_find_continuous(conf, \&state, example, strlen(example)); |
2de4ff7b | 97 | * if (pos != UINT_MAX) |
5968a70d | 98 | * panic("Oh my god, dancing chickens at \%d\n", pos); |
2de4ff7b TG |
99 | * |
100 | * textsearch_destroy(conf); | |
2de4ff7b | 101 | */ |
5968a70d | 102 | /* ========================================================================== */ |
2de4ff7b | 103 | |
2de4ff7b TG |
104 | #include <linux/module.h> |
105 | #include <linux/types.h> | |
106 | #include <linux/string.h> | |
107 | #include <linux/init.h> | |
82524746 | 108 | #include <linux/rculist.h> |
2de4ff7b TG |
109 | #include <linux/rcupdate.h> |
110 | #include <linux/err.h> | |
111 | #include <linux/textsearch.h> | |
5a0e3ad6 | 112 | #include <linux/slab.h> |
2de4ff7b TG |
113 | |
114 | static LIST_HEAD(ts_ops); | |
115 | static DEFINE_SPINLOCK(ts_mod_lock); | |
116 | ||
117 | static inline struct ts_ops *lookup_ts_algo(const char *name) | |
118 | { | |
119 | struct ts_ops *o; | |
120 | ||
121 | rcu_read_lock(); | |
122 | list_for_each_entry_rcu(o, &ts_ops, list) { | |
123 | if (!strcmp(name, o->name)) { | |
124 | if (!try_module_get(o->owner)) | |
125 | o = NULL; | |
126 | rcu_read_unlock(); | |
127 | return o; | |
128 | } | |
129 | } | |
130 | rcu_read_unlock(); | |
131 | ||
132 | return NULL; | |
133 | } | |
134 | ||
135 | /** | |
136 | * textsearch_register - register a textsearch module | |
137 | * @ops: operations lookup table | |
138 | * | |
139 | * This function must be called by textsearch modules to announce | |
140 | * their presence. The specified &@ops must have %name set to a | |
141 | * unique identifier and the callbacks find(), init(), get_pattern(), | |
142 | * and get_pattern_len() must be implemented. | |
143 | * | |
144 | * Returns 0 or -EEXISTS if another module has already registered | |
145 | * with same name. | |
146 | */ | |
147 | int textsearch_register(struct ts_ops *ops) | |
148 | { | |
149 | int err = -EEXIST; | |
150 | struct ts_ops *o; | |
151 | ||
152 | if (ops->name == NULL || ops->find == NULL || ops->init == NULL || | |
153 | ops->get_pattern == NULL || ops->get_pattern_len == NULL) | |
154 | return -EINVAL; | |
155 | ||
156 | spin_lock(&ts_mod_lock); | |
157 | list_for_each_entry(o, &ts_ops, list) { | |
158 | if (!strcmp(ops->name, o->name)) | |
159 | goto errout; | |
160 | } | |
161 | ||
162 | list_add_tail_rcu(&ops->list, &ts_ops); | |
163 | err = 0; | |
164 | errout: | |
165 | spin_unlock(&ts_mod_lock); | |
166 | return err; | |
167 | } | |
ce643a30 | 168 | EXPORT_SYMBOL(textsearch_register); |
2de4ff7b TG |
169 | |
170 | /** | |
171 | * textsearch_unregister - unregister a textsearch module | |
172 | * @ops: operations lookup table | |
173 | * | |
174 | * This function must be called by textsearch modules to announce | |
175 | * their disappearance for examples when the module gets unloaded. | |
176 | * The &ops parameter must be the same as the one during the | |
177 | * registration. | |
178 | * | |
179 | * Returns 0 on success or -ENOENT if no matching textsearch | |
180 | * registration was found. | |
181 | */ | |
182 | int textsearch_unregister(struct ts_ops *ops) | |
183 | { | |
184 | int err = 0; | |
185 | struct ts_ops *o; | |
186 | ||
187 | spin_lock(&ts_mod_lock); | |
188 | list_for_each_entry(o, &ts_ops, list) { | |
189 | if (o == ops) { | |
190 | list_del_rcu(&o->list); | |
191 | goto out; | |
192 | } | |
193 | } | |
194 | ||
195 | err = -ENOENT; | |
196 | out: | |
197 | spin_unlock(&ts_mod_lock); | |
198 | return err; | |
199 | } | |
ce643a30 | 200 | EXPORT_SYMBOL(textsearch_unregister); |
2de4ff7b TG |
201 | |
202 | struct ts_linear_state | |
203 | { | |
204 | unsigned int len; | |
205 | const void *data; | |
206 | }; | |
207 | ||
208 | static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, | |
209 | struct ts_config *conf, | |
210 | struct ts_state *state) | |
211 | { | |
212 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | |
213 | ||
214 | if (likely(consumed < st->len)) { | |
215 | *dst = st->data + consumed; | |
216 | return st->len - consumed; | |
217 | } | |
218 | ||
219 | return 0; | |
220 | } | |
221 | ||
222 | /** | |
223 | * textsearch_find_continuous - search a pattern in continuous/linear data | |
224 | * @conf: search configuration | |
225 | * @state: search state | |
226 | * @data: data to search in | |
227 | * @len: length of data | |
228 | * | |
229 | * A simplified version of textsearch_find() for continuous/linear data. | |
230 | * Call textsearch_next() to retrieve subsequent matches. | |
231 | * | |
232 | * Returns the position of first occurrence of the pattern or | |
72fd4a35 | 233 | * %UINT_MAX if no occurrence was found. |
5968a70d | 234 | */ |
2de4ff7b TG |
235 | unsigned int textsearch_find_continuous(struct ts_config *conf, |
236 | struct ts_state *state, | |
237 | const void *data, unsigned int len) | |
238 | { | |
239 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | |
240 | ||
241 | conf->get_next_block = get_linear_data; | |
242 | st->data = data; | |
243 | st->len = len; | |
244 | ||
245 | return textsearch_find(conf, state); | |
246 | } | |
ce643a30 | 247 | EXPORT_SYMBOL(textsearch_find_continuous); |
2de4ff7b TG |
248 | |
249 | /** | |
250 | * textsearch_prepare - Prepare a search | |
251 | * @algo: name of search algorithm | |
252 | * @pattern: pattern data | |
253 | * @len: length of pattern | |
254 | * @gfp_mask: allocation mask | |
255 | * @flags: search flags | |
256 | * | |
257 | * Looks up the search algorithm module and creates a new textsearch | |
fec22908 | 258 | * configuration for the specified pattern. |
2de4ff7b TG |
259 | * |
260 | * Note: The format of the pattern may not be compatible between | |
261 | * the various search algorithms. | |
262 | * | |
263 | * Returns a new textsearch configuration according to the specified | |
e03ba84a PNA |
264 | * parameters or a ERR_PTR(). If a zero length pattern is passed, this |
265 | * function returns EINVAL. | |
2de4ff7b TG |
266 | */ |
267 | struct ts_config *textsearch_prepare(const char *algo, const void *pattern, | |
fd4f2df2 | 268 | unsigned int len, gfp_t gfp_mask, int flags) |
2de4ff7b TG |
269 | { |
270 | int err = -ENOENT; | |
271 | struct ts_config *conf; | |
272 | struct ts_ops *ops; | |
273 | ||
e03ba84a PNA |
274 | if (len == 0) |
275 | return ERR_PTR(-EINVAL); | |
276 | ||
2de4ff7b | 277 | ops = lookup_ts_algo(algo); |
a00caa1f | 278 | #ifdef CONFIG_MODULES |
2de4ff7b TG |
279 | /* |
280 | * Why not always autoload you may ask. Some users are | |
281 | * in a situation where requesting a module may deadlock, | |
282 | * especially when the module is located on a NFS mount. | |
283 | */ | |
284 | if (ops == NULL && flags & TS_AUTOLOAD) { | |
285 | request_module("ts_%s", algo); | |
286 | ops = lookup_ts_algo(algo); | |
287 | } | |
288 | #endif | |
289 | ||
290 | if (ops == NULL) | |
291 | goto errout; | |
292 | ||
b9c79678 | 293 | conf = ops->init(pattern, len, gfp_mask, flags); |
2de4ff7b TG |
294 | if (IS_ERR(conf)) { |
295 | err = PTR_ERR(conf); | |
296 | goto errout; | |
297 | } | |
298 | ||
299 | conf->ops = ops; | |
300 | return conf; | |
301 | ||
302 | errout: | |
303 | if (ops) | |
304 | module_put(ops->owner); | |
305 | ||
306 | return ERR_PTR(err); | |
307 | } | |
ce643a30 | 308 | EXPORT_SYMBOL(textsearch_prepare); |
2de4ff7b TG |
309 | |
310 | /** | |
311 | * textsearch_destroy - destroy a search configuration | |
312 | * @conf: search configuration | |
313 | * | |
314 | * Releases all references of the configuration and frees | |
315 | * up the memory. | |
316 | */ | |
317 | void textsearch_destroy(struct ts_config *conf) | |
318 | { | |
319 | if (conf->ops) { | |
320 | if (conf->ops->destroy) | |
321 | conf->ops->destroy(conf); | |
322 | module_put(conf->ops->owner); | |
323 | } | |
324 | ||
325 | kfree(conf); | |
326 | } | |
2de4ff7b | 327 | EXPORT_SYMBOL(textsearch_destroy); |