]>
Commit | Line | Data |
---|---|---|
b0125085 GS |
1 | #include <linux/module.h> |
2 | #include <linux/glob.h> | |
3 | ||
4 | /* | |
5 | * The only reason this code can be compiled as a module is because the | |
6 | * ATA code that depends on it can be as well. In practice, they're | |
7 | * both usually compiled in and the module overhead goes away. | |
8 | */ | |
9 | MODULE_DESCRIPTION("glob(7) matching"); | |
10 | MODULE_LICENSE("Dual MIT/GPL"); | |
11 | ||
12 | /** | |
13 | * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) | |
14 | * @pat: Shell-style pattern to match, e.g. "*.[ch]". | |
15 | * @str: String to match. The pattern must match the entire string. | |
16 | * | |
17 | * Perform shell-style glob matching, returning true (1) if the match | |
18 | * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). | |
19 | * | |
20 | * Pattern metacharacters are ?, *, [ and \. | |
21 | * (And, inside character classes, !, - and ].) | |
22 | * | |
23 | * This is small and simple implementation intended for device blacklists | |
24 | * where a string is matched against a number of patterns. Thus, it | |
25 | * does not preprocess the patterns. It is non-recursive, and run-time | |
26 | * is at most quadratic: strlen(@str)*strlen(@pat). | |
27 | * | |
28 | * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); | |
29 | * it takes 6 passes over the pattern before matching the string. | |
30 | * | |
31 | * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT | |
32 | * treat / or leading . specially; it isn't actually used for pathnames. | |
33 | * | |
34 | * Note that according to glob(7) (and unlike bash), character classes | |
35 | * are complemented by a leading !; this does not support the regex-style | |
36 | * [^a-z] syntax. | |
37 | * | |
38 | * An opening bracket without a matching close is matched literally. | |
39 | */ | |
40 | bool __pure glob_match(char const *pat, char const *str) | |
41 | { | |
42 | /* | |
43 | * Backtrack to previous * on mismatch and retry starting one | |
44 | * character later in the string. Because * matches all characters | |
45 | * (no exception for /), it can be easily proved that there's | |
46 | * never a need to backtrack multiple levels. | |
47 | */ | |
48 | char const *back_pat = NULL, *back_str = back_str; | |
49 | ||
50 | /* | |
51 | * Loop over each token (character or class) in pat, matching | |
52 | * it against the remaining unmatched tail of str. Return false | |
53 | * on mismatch, or true after matching the trailing nul bytes. | |
54 | */ | |
55 | for (;;) { | |
56 | unsigned char c = *str++; | |
57 | unsigned char d = *pat++; | |
58 | ||
59 | switch (d) { | |
60 | case '?': /* Wildcard: anything but nul */ | |
61 | if (c == '\0') | |
62 | return false; | |
63 | break; | |
64 | case '*': /* Any-length wildcard */ | |
65 | if (*pat == '\0') /* Optimize trailing * case */ | |
66 | return true; | |
67 | back_pat = pat; | |
68 | back_str = --str; /* Allow zero-length match */ | |
69 | break; | |
70 | case '[': { /* Character class */ | |
71 | bool match = false, inverted = (*pat == '!'); | |
72 | char const *class = pat + inverted; | |
73 | unsigned char a = *class++; | |
74 | ||
75 | /* | |
76 | * Iterate over each span in the character class. | |
77 | * A span is either a single character a, or a | |
78 | * range a-b. The first span may begin with ']'. | |
79 | */ | |
80 | do { | |
81 | unsigned char b = a; | |
82 | ||
83 | if (a == '\0') /* Malformed */ | |
84 | goto literal; | |
85 | ||
86 | if (class[0] == '-' && class[1] != ']') { | |
87 | b = class[1]; | |
88 | ||
89 | if (b == '\0') | |
90 | goto literal; | |
91 | ||
92 | class += 2; | |
93 | /* Any special action if a > b? */ | |
94 | } | |
95 | match |= (a <= c && c <= b); | |
96 | } while ((a = *class++) != ']'); | |
97 | ||
98 | if (match == inverted) | |
99 | goto backtrack; | |
100 | pat = class; | |
101 | } | |
102 | break; | |
103 | case '\\': | |
104 | d = *pat++; | |
105 | /*FALLTHROUGH*/ | |
106 | default: /* Literal character */ | |
107 | literal: | |
108 | if (c == d) { | |
109 | if (d == '\0') | |
110 | return true; | |
111 | break; | |
112 | } | |
113 | backtrack: | |
114 | if (c == '\0' || !back_pat) | |
115 | return false; /* No point continuing */ | |
116 | /* Try again from last *, one character later in str. */ | |
117 | pat = back_pat; | |
118 | str = ++back_str; | |
119 | break; | |
120 | } | |
121 | } | |
122 | } | |
123 | EXPORT_SYMBOL(glob_match); | |
5f9be824 GS |
124 | |
125 | ||
126 | #ifdef CONFIG_GLOB_SELFTEST | |
127 | ||
128 | #include <linux/printk.h> | |
129 | #include <linux/moduleparam.h> | |
130 | ||
131 | /* Boot with "glob.verbose=1" to show successful tests, too */ | |
132 | static bool verbose = false; | |
133 | module_param(verbose, bool, 0); | |
134 | ||
135 | struct glob_test { | |
136 | char const *pat, *str; | |
137 | bool expected; | |
138 | }; | |
139 | ||
140 | static bool __pure __init test(char const *pat, char const *str, bool expected) | |
141 | { | |
142 | bool match = glob_match(pat, str); | |
143 | bool success = match == expected; | |
144 | ||
145 | /* Can't get string literals into a particular section, so... */ | |
146 | static char const msg_error[] __initconst = | |
147 | KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; | |
148 | static char const msg_ok[] __initconst = | |
149 | KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; | |
150 | static char const mismatch[] __initconst = "mismatch"; | |
151 | char const *message; | |
152 | ||
153 | if (!success) | |
154 | message = msg_error; | |
155 | else if (verbose) | |
156 | message = msg_ok; | |
157 | else | |
158 | return success; | |
159 | ||
160 | printk(message, pat, str, mismatch + 3*match); | |
161 | return success; | |
162 | } | |
163 | ||
164 | /* | |
165 | * The tests are all jammed together in one array to make it simpler | |
166 | * to place that array in the .init.rodata section. The obvious | |
167 | * "array of structures containing char *" has no way to force the | |
168 | * pointed-to strings to be in a particular section. | |
169 | * | |
170 | * Anyway, a test consists of: | |
171 | * 1. Expected glob_match result: '1' or '0'. | |
172 | * 2. Pattern to match: null-terminated string | |
173 | * 3. String to match against: null-terminated string | |
174 | * | |
175 | * The list of tests is terminated with a final '\0' instead of | |
176 | * a glob_match result character. | |
177 | */ | |
178 | static char const glob_tests[] __initconst = | |
179 | /* Some basic tests */ | |
180 | "1" "a\0" "a\0" | |
181 | "0" "a\0" "b\0" | |
182 | "0" "a\0" "aa\0" | |
183 | "0" "a\0" "\0" | |
184 | "1" "\0" "\0" | |
185 | "0" "\0" "a\0" | |
186 | /* Simple character class tests */ | |
187 | "1" "[a]\0" "a\0" | |
188 | "0" "[a]\0" "b\0" | |
189 | "0" "[!a]\0" "a\0" | |
190 | "1" "[!a]\0" "b\0" | |
191 | "1" "[ab]\0" "a\0" | |
192 | "1" "[ab]\0" "b\0" | |
193 | "0" "[ab]\0" "c\0" | |
194 | "1" "[!ab]\0" "c\0" | |
195 | "1" "[a-c]\0" "b\0" | |
196 | "0" "[a-c]\0" "d\0" | |
197 | /* Corner cases in character class parsing */ | |
198 | "1" "[a-c-e-g]\0" "-\0" | |
199 | "0" "[a-c-e-g]\0" "d\0" | |
200 | "1" "[a-c-e-g]\0" "f\0" | |
201 | "1" "[]a-ceg-ik[]\0" "a\0" | |
202 | "1" "[]a-ceg-ik[]\0" "]\0" | |
203 | "1" "[]a-ceg-ik[]\0" "[\0" | |
204 | "1" "[]a-ceg-ik[]\0" "h\0" | |
205 | "0" "[]a-ceg-ik[]\0" "f\0" | |
206 | "0" "[!]a-ceg-ik[]\0" "h\0" | |
207 | "0" "[!]a-ceg-ik[]\0" "]\0" | |
208 | "1" "[!]a-ceg-ik[]\0" "f\0" | |
209 | /* Simple wild cards */ | |
210 | "1" "?\0" "a\0" | |
211 | "0" "?\0" "aa\0" | |
212 | "0" "??\0" "a\0" | |
213 | "1" "?x?\0" "axb\0" | |
214 | "0" "?x?\0" "abx\0" | |
215 | "0" "?x?\0" "xab\0" | |
216 | /* Asterisk wild cards (backtracking) */ | |
217 | "0" "*??\0" "a\0" | |
218 | "1" "*??\0" "ab\0" | |
219 | "1" "*??\0" "abc\0" | |
220 | "1" "*??\0" "abcd\0" | |
221 | "0" "??*\0" "a\0" | |
222 | "1" "??*\0" "ab\0" | |
223 | "1" "??*\0" "abc\0" | |
224 | "1" "??*\0" "abcd\0" | |
225 | "0" "?*?\0" "a\0" | |
226 | "1" "?*?\0" "ab\0" | |
227 | "1" "?*?\0" "abc\0" | |
228 | "1" "?*?\0" "abcd\0" | |
229 | "1" "*b\0" "b\0" | |
230 | "1" "*b\0" "ab\0" | |
231 | "0" "*b\0" "ba\0" | |
232 | "1" "*b\0" "bb\0" | |
233 | "1" "*b\0" "abb\0" | |
234 | "1" "*b\0" "bab\0" | |
235 | "1" "*bc\0" "abbc\0" | |
236 | "1" "*bc\0" "bc\0" | |
237 | "1" "*bc\0" "bbc\0" | |
238 | "1" "*bc\0" "bcbc\0" | |
239 | /* Multiple asterisks (complex backtracking) */ | |
240 | "1" "*ac*\0" "abacadaeafag\0" | |
241 | "1" "*ac*ae*ag*\0" "abacadaeafag\0" | |
242 | "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" | |
243 | "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" | |
244 | "1" "*abcd*\0" "abcabcabcabcdefg\0" | |
245 | "1" "*ab*cd*\0" "abcabcabcabcdefg\0" | |
246 | "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" | |
247 | "0" "*abcd*\0" "abcabcabcabcefg\0" | |
248 | "0" "*ab*cd*\0" "abcabcabcabcefg\0"; | |
249 | ||
250 | static int __init glob_init(void) | |
251 | { | |
252 | unsigned successes = 0; | |
253 | unsigned n = 0; | |
254 | char const *p = glob_tests; | |
255 | static char const message[] __initconst = | |
256 | KERN_INFO "glob: %u self-tests passed, %u failed\n"; | |
257 | ||
258 | /* | |
259 | * Tests are jammed together in a string. The first byte is '1' | |
260 | * or '0' to indicate the expected outcome, or '\0' to indicate the | |
261 | * end of the tests. Then come two null-terminated strings: the | |
262 | * pattern and the string to match it against. | |
263 | */ | |
264 | while (*p) { | |
265 | bool expected = *p++ & 1; | |
266 | char const *pat = p; | |
267 | ||
268 | p += strlen(p) + 1; | |
269 | successes += test(pat, p, expected); | |
270 | p += strlen(p) + 1; | |
271 | n++; | |
272 | } | |
273 | ||
274 | n -= successes; | |
275 | printk(message, successes, n); | |
276 | ||
277 | /* What's the errno for "kernel bug detected"? Guess... */ | |
278 | return n ? -ECANCELED : 0; | |
279 | } | |
280 | ||
281 | /* We need a dummy exit function to allow unload */ | |
282 | static void __exit glob_fini(void) { } | |
283 | ||
284 | module_init(glob_init); | |
285 | module_exit(glob_fini); | |
286 | ||
287 | #endif /* CONFIG_GLOB_SELFTEST */ |