[linux.git] / lib / glob.c

#include <linux/module.h>
#include <linux/glob.h>

/*
 * The only reason this code can be compiled as a module is because the
 * ATA code that depends on it can be as well.  In practice, they're
 * both usually compiled in and the module overhead goes away.
 */
MODULE_DESCRIPTION("glob(7) matching");
MODULE_LICENSE("Dual MIT/GPL");

/**
 * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
 * @pat: Shell-style pattern to match, e.g. "*.[ch]".
 * @str: String to match.  The pattern must match the entire string.
 *
 * Perform shell-style glob matching, returning true (1) if the match
 * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
 *
 * Pattern metacharacters are ?, *, [ and \.
 * (And, inside character classes, !, - and ].)
 *
 * This is small and simple implementation intended for device blacklists
 * where a string is matched against a number of patterns.  Thus, it
 * does not preprocess the patterns.  It is non-recursive, and run-time
 * is at most quadratic: strlen(@str)*strlen(@pat).
 *
 * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
 * it takes 6 passes over the pattern before matching the string.
 *
 * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
 * treat / or leading . specially; it isn't actually used for pathnames.
 *
 * Note that according to glob(7) (and unlike bash), character classes
 * are complemented by a leading !; this does not support the regex-style
 * [^a-z] syntax.
 *
 * An opening bracket without a matching close is matched literally.
 */
bool __pure glob_match(char const *pat, char const *str)
{
	/*
	 * Backtrack to previous * on mismatch and retry starting one
	 * character later in the string.  Because * matches all characters
	 * (no exception for /), it can be easily proved that there's
	 * never a need to backtrack multiple levels.
	 */
	char const *back_pat = NULL, *back_str = back_str;

	/*
	 * Loop over each token (character or class) in pat, matching
	 * it against the remaining unmatched tail of str.  Return false
	 * on mismatch, or true after matching the trailing nul bytes.
	 */
	for (;;) {
		unsigned char c = *str++;
		unsigned char d = *pat++;

		switch (d) {
		case '?':	/* Wildcard: anything but nul */
			if (c == '\0')
				return false;
			break;
		case '*':	/* Any-length wildcard */
			if (*pat == '\0')	/* Optimize trailing * case */
				return true;
			back_pat = pat;
			back_str = --str;	/* Allow zero-length match */
			break;
		case '[': {	/* Character class */
			bool match = false, inverted = (*pat == '!');
			char const *class = pat + inverted;
			unsigned char a = *class++;

			/*
			 * Iterate over each span in the character class.
			 * A span is either a single character a, or a
			 * range a-b.  The first span may begin with ']'.
			 */
			do {
				unsigned char b = a;

				if (a == '\0')	/* Malformed */
					goto literal;

				if (class[0] == '-' && class[1] != ']') {
					b = class[1];

					if (b == '\0')
						goto literal;

					class += 2;
					/* Any special action if a > b? */
				}
				match |= (a <= c && c <= b);
			} while ((a = *class++) != ']');

			if (match == inverted)
				goto backtrack;
			pat = class;
			}
			break;
		case '\\':
			d = *pat++;
			/*FALLTHROUGH*/
		default:	/* Literal character */
literal:
			if (c == d) {
				if (d == '\0')
					return true;
				break;
			}
backtrack:
			if (c == '\0' || !back_pat)
				return false;	/* No point continuing */
			/* Try again from last *, one character later in str. */
			pat = back_pat;
			str = ++back_str;
			break;
		}
	}
}
EXPORT_SYMBOL(glob_match);


#ifdef CONFIG_GLOB_SELFTEST

#include <linux/printk.h>
#include <linux/moduleparam.h>

/* Boot with "glob.verbose=1" to show successful tests, too */
static bool verbose = false;
module_param(verbose, bool, 0);

struct glob_test {
	char const *pat, *str;
	bool expected;
};

static bool __pure __init test(char const *pat, char const *str, bool expected)
{
	bool match = glob_match(pat, str);
	bool success = match == expected;

	/* Can't get string literals into a particular section, so... */
	static char const msg_error[] __initconst =
		KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
	static char const msg_ok[] __initconst =
		KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
	static char const mismatch[] __initconst = "mismatch";
	char const *message;

	if (!success)
		message = msg_error;
	else if (verbose)
		message = msg_ok;
	else
		return success;

	printk(message, pat, str, mismatch + 3*match);
	return success;
}

/*
 * The tests are all jammed together in one array to make it simpler
 * to place that array in the .init.rodata section.  The obvious
 * "array of structures containing char *" has no way to force the
 * pointed-to strings to be in a particular section.
 *
 * Anyway, a test consists of:
 * 1. Expected glob_match result: '1' or '0'.
 * 2. Pattern to match: null-terminated string
 * 3. String to match against: null-terminated string
 *
 * The list of tests is terminated with a final '\0' instead of
 * a glob_match result character.
 */
static char const glob_tests[] __initconst =
	/* Some basic tests */
	"1" "a\0" "a\0"
	"0" "a\0" "b\0"
	"0" "a\0" "aa\0"
	"0" "a\0" "\0"
	"1" "\0" "\0"
	"0" "\0" "a\0"
	/* Simple character class tests */
	"1" "[a]\0" "a\0"
	"0" "[a]\0" "b\0"
	"0" "[!a]\0" "a\0"
	"1" "[!a]\0" "b\0"
	"1" "[ab]\0" "a\0"
	"1" "[ab]\0" "b\0"
	"0" "[ab]\0" "c\0"
	"1" "[!ab]\0" "c\0"
	"1" "[a-c]\0" "b\0"
	"0" "[a-c]\0" "d\0"
	/* Corner cases in character class parsing */
	"1" "[a-c-e-g]\0" "-\0"
	"0" "[a-c-e-g]\0" "d\0"
	"1" "[a-c-e-g]\0" "f\0"
	"1" "[]a-ceg-ik[]\0" "a\0"
	"1" "[]a-ceg-ik[]\0" "]\0"
	"1" "[]a-ceg-ik[]\0" "[\0"
	"1" "[]a-ceg-ik[]\0" "h\0"
	"0" "[]a-ceg-ik[]\0" "f\0"
	"0" "[!]a-ceg-ik[]\0" "h\0"
	"0" "[!]a-ceg-ik[]\0" "]\0"
	"1" "[!]a-ceg-ik[]\0" "f\0"
	/* Simple wild cards */
	"1" "?\0" "a\0"
	"0" "?\0" "aa\0"
	"0" "??\0" "a\0"
	"1" "?x?\0" "axb\0"
	"0" "?x?\0" "abx\0"
	"0" "?x?\0" "xab\0"
	/* Asterisk wild cards (backtracking) */
	"0" "*??\0" "a\0"
	"1" "*??\0" "ab\0"
	"1" "*??\0" "abc\0"
	"1" "*??\0" "abcd\0"
	"0" "??*\0" "a\0"
	"1" "??*\0" "ab\0"
	"1" "??*\0" "abc\0"
	"1" "??*\0" "abcd\0"
	"0" "?*?\0" "a\0"
	"1" "?*?\0" "ab\0"
	"1" "?*?\0" "abc\0"
	"1" "?*?\0" "abcd\0"
	"1" "*b\0" "b\0"
	"1" "*b\0" "ab\0"
	"0" "*b\0" "ba\0"
	"1" "*b\0" "bb\0"
	"1" "*b\0" "abb\0"
	"1" "*b\0" "bab\0"
	"1" "*bc\0" "abbc\0"
	"1" "*bc\0" "bc\0"
	"1" "*bc\0" "bbc\0"
	"1" "*bc\0" "bcbc\0"
	/* Multiple asterisks (complex backtracking) */
	"1" "*ac*\0" "abacadaeafag\0"
	"1" "*ac*ae*ag*\0" "abacadaeafag\0"
	"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
	"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
	"1" "*abcd*\0" "abcabcabcabcdefg\0"
	"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
	"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
	"0" "*abcd*\0" "abcabcabcabcefg\0"
	"0" "*ab*cd*\0" "abcabcabcabcefg\0";

static int __init glob_init(void)
{
	unsigned successes = 0;
	unsigned n = 0;
	char const *p = glob_tests;
	static char const message[] __initconst =
		KERN_INFO "glob: %u self-tests passed, %u failed\n";

	/*
	 * Tests are jammed together in a string.  The first byte is '1'
	 * or '0' to indicate the expected outcome, or '\0' to indicate the
	 * end of the tests.  Then come two null-terminated strings: the
	 * pattern and the string to match it against.
	 */
	while (*p) {
		bool expected = *p++ & 1;
		char const *pat = p;

		p += strlen(p) + 1;
		successes += test(pat, p, expected);
		p += strlen(p) + 1;
		n++;
	}

	n -= successes;
	printk(message, successes, n);

	/* What's the errno for "kernel bug detected"?  Guess... */
	return n ? -ECANCELED : 0;
}

/* We need a dummy exit function to allow unload */
static void __exit glob_fini(void) { }

module_init(glob_init);
module_exit(glob_fini);

#endif /* CONFIG_GLOB_SELFTEST */
Commit	Line	Data
b0125085 GS	1	#include <linux/module.h>
	2	#include <linux/glob.h>
	3
	4	/*
	5	* The only reason this code can be compiled as a module is because the
	6	* ATA code that depends on it can be as well. In practice, they're
	7	* both usually compiled in and the module overhead goes away.
	8	*/
	9	MODULE_DESCRIPTION("glob(7) matching");
	10	MODULE_LICENSE("Dual MIT/GPL");
	11
	12	/**
	13	* glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
	14	* @pat: Shell-style pattern to match, e.g. "*.[ch]".
	15	* @str: String to match. The pattern must match the entire string.
	16	*
	17	* Perform shell-style glob matching, returning true (1) if the match
	18	* succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
	19	*
	20	* Pattern metacharacters are ?, *, [ and \.
	21	* (And, inside character classes, !, - and ].)
	22	*
	23	* This is small and simple implementation intended for device blacklists
	24	* where a string is matched against a number of patterns. Thus, it
	25	* does not preprocess the patterns. It is non-recursive, and run-time
	26	* is at most quadratic: strlen(@str)*strlen(@pat).
	27	*
	28	* An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
	29	* it takes 6 passes over the pattern before matching the string.
	30	*
	31	* Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
	32	* treat / or leading . specially; it isn't actually used for pathnames.
	33	*
	34	* Note that according to glob(7) (and unlike bash), character classes
	35	* are complemented by a leading !; this does not support the regex-style
	36	* [^a-z] syntax.
	37	*
	38	* An opening bracket without a matching close is matched literally.
	39	*/
	40	bool __pure glob_match(char const pat, char const str)
	41	{
	42	/*
	43	* Backtrack to previous * on mismatch and retry starting one
	44	* character later in the string. Because * matches all characters
	45	* (no exception for /), it can be easily proved that there's
	46	* never a need to backtrack multiple levels.
	47	*/
	48	char const back_pat = NULL, back_str = back_str;
	49
	50	/*
	51	* Loop over each token (character or class) in pat, matching
	52	* it against the remaining unmatched tail of str. Return false
	53	* on mismatch, or true after matching the trailing nul bytes.
	54	*/
	55	for (;;) {
	56	unsigned char c = *str++;
	57	unsigned char d = *pat++;
	58
	59	switch (d) {
	60	case '?': /* Wildcard: anything but nul */
	61	if (c == '\0')
	62	return false;
	63	break;
	64	case '': / Any-length wildcard */
65	if (pat == '\0') / Optimize trailing * case */
66	return true;
67	back_pat = pat;
68	back_str = --str; /* Allow zero-length match */
69	break;
70	case '[': { /* Character class */
71	bool match = false, inverted = (*pat == '!');
72	char const *class = pat + inverted;
73	unsigned char a = *class++;
74
75	/*
76	* Iterate over each span in the character class.
77	* A span is either a single character a, or a
78	* range a-b. The first span may begin with ']'.
79	*/
80	do {
81	unsigned char b = a;
82
83	if (a == '\0') /* Malformed */
84	goto literal;
85
86	if (class[0] == '-' && class[1] != ']') {
87	b = class[1];
88
89	if (b == '\0')
90	goto literal;
91
92	class += 2;
93	/* Any special action if a > b? */
94	}
95	match \|= (a <= c && c <= b);
96	} while ((a = *class++) != ']');
97
98	if (match == inverted)
99	goto backtrack;
100	pat = class;
101	}
102	break;
103	case '\\':
104	d = *pat++;
105	/FALLTHROUGH/
106	default: /* Literal character */
107	literal:
108	if (c == d) {
109	if (d == '\0')
110	return true;
111	break;
112	}
113	backtrack:
114	if (c == '\0' \|\| !back_pat)
115	return false; /* No point continuing */
116	/* Try again from last , one character later in str. /
117	pat = back_pat;
118	str = ++back_str;
119	break;
120	}
121	}
122	}
123	EXPORT_SYMBOL(glob_match);
5f9be824 GS	124
	125
	126	#ifdef CONFIG_GLOB_SELFTEST
	127
	128	#include <linux/printk.h>
	129	#include <linux/moduleparam.h>
	130
	131	/* Boot with "glob.verbose=1" to show successful tests, too */
	132	static bool verbose = false;
	133	module_param(verbose, bool, 0);
	134
	135	struct glob_test {
	136	char const pat, str;
	137	bool expected;
	138	};
	139
	140	static bool __pure __init test(char const pat, char const str, bool expected)
	141	{
	142	bool match = glob_match(pat, str);
	143	bool success = match == expected;
	144
	145	/* Can't get string literals into a particular section, so... */
	146	static char const msg_error[] __initconst =
	147	KERN_ERR "glob: \"%s\" vs. \"%s\": %s * ERROR *\n";
	148	static char const msg_ok[] __initconst =
	149	KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
	150	static char const mismatch[] __initconst = "mismatch";
	151	char const *message;
	152
	153	if (!success)
	154	message = msg_error;
	155	else if (verbose)
	156	message = msg_ok;
	157	else
	158	return success;
	159
	160	printk(message, pat, str, mismatch + 3*match);
	161	return success;
	162	}
	163
	164	/*
	165	* The tests are all jammed together in one array to make it simpler
	166	* to place that array in the .init.rodata section. The obvious
	167	* "array of structures containing char *" has no way to force the
	168	* pointed-to strings to be in a particular section.
	169	*
	170	* Anyway, a test consists of:
	171	* 1. Expected glob_match result: '1' or '0'.
	172	* 2. Pattern to match: null-terminated string
	173	* 3. String to match against: null-terminated string
	174	*
	175	* The list of tests is terminated with a final '\0' instead of
	176	* a glob_match result character.
	177	*/
	178	static char const glob_tests[] __initconst =
	179	/* Some basic tests */
	180	"1" "a\0" "a\0"
	181	"0" "a\0" "b\0"
	182	"0" "a\0" "aa\0"
	183	"0" "a\0" "\0"
	184	"1" "\0" "\0"
	185	"0" "\0" "a\0"
	186	/* Simple character class tests */
	187	"1" "[a]\0" "a\0"
188	"0" "[a]\0" "b\0"
189	"0" "[!a]\0" "a\0"
190	"1" "[!a]\0" "b\0"
191	"1" "[ab]\0" "a\0"
192	"1" "[ab]\0" "b\0"
193	"0" "[ab]\0" "c\0"
194	"1" "[!ab]\0" "c\0"
195	"1" "[a-c]\0" "b\0"
196	"0" "[a-c]\0" "d\0"
197	/* Corner cases in character class parsing */
198	"1" "[a-c-e-g]\0" "-\0"
199	"0" "[a-c-e-g]\0" "d\0"
200	"1" "[a-c-e-g]\0" "f\0"
201	"1" "[]a-ceg-ik[]\0" "a\0"
202	"1" "[]a-ceg-ik[]\0" "]\0"
203	"1" "[]a-ceg-ik[]\0" "[\0"
204	"1" "[]a-ceg-ik[]\0" "h\0"
205	"0" "[]a-ceg-ik[]\0" "f\0"
206	"0" "[!]a-ceg-ik[]\0" "h\0"
207	"0" "[!]a-ceg-ik[]\0" "]\0"
208	"1" "[!]a-ceg-ik[]\0" "f\0"
209	/* Simple wild cards */
210	"1" "?\0" "a\0"
211	"0" "?\0" "aa\0"
212	"0" "??\0" "a\0"
213	"1" "?x?\0" "axb\0"
214	"0" "?x?\0" "abx\0"
215	"0" "?x?\0" "xab\0"
216	/* Asterisk wild cards (backtracking) */
217	"0" "*??\0" "a\0"
218	"1" "*??\0" "ab\0"
219	"1" "*??\0" "abc\0"
220	"1" "*??\0" "abcd\0"
221	"0" "??*\0" "a\0"
222	"1" "??*\0" "ab\0"
223	"1" "??*\0" "abc\0"
224	"1" "??*\0" "abcd\0"
225	"0" "?*?\0" "a\0"
226	"1" "?*?\0" "ab\0"
227	"1" "?*?\0" "abc\0"
228	"1" "?*?\0" "abcd\0"
229	"1" "*b\0" "b\0"
230	"1" "*b\0" "ab\0"
231	"0" "*b\0" "ba\0"
232	"1" "*b\0" "bb\0"
233	"1" "*b\0" "abb\0"
234	"1" "*b\0" "bab\0"
235	"1" "*bc\0" "abbc\0"
236	"1" "*bc\0" "bc\0"
237	"1" "*bc\0" "bbc\0"
238	"1" "*bc\0" "bcbc\0"
239	/* Multiple asterisks (complex backtracking) */
240	"1" "ac\0" "abacadaeafag\0"
241	"1" "acaeag\0" "abacadaeafag\0"
242	"1" "ab[bc][ef]g\0" "abacadaeafag\0"
243	"0" "ab[ef][cd]g\0" "abacadaeafag\0"
244	"1" "abcd\0" "abcabcabcabcdefg\0"
245	"1" "abcd*\0" "abcabcabcabcdefg\0"
246	"1" "abcdabcdef*\0" "abcabcdabcdeabcdefg\0"
247	"0" "abcd\0" "abcabcabcabcefg\0"
248	"0" "abcd*\0" "abcabcabcabcefg\0";
249
250	static int __init glob_init(void)
251	{
252	unsigned successes = 0;
253	unsigned n = 0;
254	char const *p = glob_tests;
255	static char const message[] __initconst =
256	KERN_INFO "glob: %u self-tests passed, %u failed\n";
257
258	/*
259	* Tests are jammed together in a string. The first byte is '1'
260	* or '0' to indicate the expected outcome, or '\0' to indicate the
261	* end of the tests. Then come two null-terminated strings: the
262	* pattern and the string to match it against.
263	*/
264	while (*p) {
265	bool expected = *p++ & 1;
266	char const *pat = p;
267
268	p += strlen(p) + 1;
269	successes += test(pat, p, expected);
270	p += strlen(p) + 1;
271	n++;
272	}
273
274	n -= successes;
275	printk(message, successes, n);
276
277	/* What's the errno for "kernel bug detected"? Guess... */
278	return n ? -ECANCELED : 0;
279	}
280
281	/* We need a dummy exit function to allow unload */
282	static void __exit glob_fini(void) { }
283
284	module_init(glob_init);
285	module_exit(glob_fini);
286
287	#endif /* CONFIG_GLOB_SELFTEST */