Linux Audio

Check our new training course

Loading...
v3.15
 
  1/*
  2 * lib/ts_kmp.c		Knuth-Morris-Pratt text search implementation
  3 *
  4 *		This program is free software; you can redistribute it and/or
  5 *		modify it under the terms of the GNU General Public License
  6 *		as published by the Free Software Foundation; either version
  7 *		2 of the License, or (at your option) any later version.
  8 *
  9 * Authors:	Thomas Graf <tgraf@suug.ch>
 10 *
 11 * ==========================================================================
 12 * 
 13 *   Implements a linear-time string-matching algorithm due to Knuth,
 14 *   Morris, and Pratt [1]. Their algorithm avoids the explicit
 15 *   computation of the transition function DELTA altogether. Its
 16 *   matching time is O(n), for n being length(text), using just an
 17 *   auxiliary function PI[1..m], for m being length(pattern),
 18 *   precomputed from the pattern in time O(m). The array PI allows
 19 *   the transition function DELTA to be computed efficiently
 20 *   "on the fly" as needed. Roughly speaking, for any state
 21 *   "q" = 0,1,...,m and any character "a" in SIGMA, the value
 22 *   PI["q"] contains the information that is independent of "a" and
 23 *   is needed to compute DELTA("q", "a") [2]. Since the array PI
 24 *   has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
 25 *   save a factor of |SIGMA| in the preprocessing time by computing
 26 *   PI rather than DELTA.
 27 *
 28 *   [1] Cormen, Leiserson, Rivest, Stein
 29 *       Introdcution to Algorithms, 2nd Edition, MIT Press
 30 *   [2] See finite automation theory
 31 */
 32
 33#include <linux/module.h>
 34#include <linux/types.h>
 35#include <linux/string.h>
 36#include <linux/ctype.h>
 37#include <linux/textsearch.h>
 38
 39struct ts_kmp
 40{
 41	u8 *		pattern;
 42	unsigned int	pattern_len;
 43	unsigned int 	prefix_tbl[0];
 44};
 45
 46static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
 47{
 48	struct ts_kmp *kmp = ts_config_priv(conf);
 49	unsigned int i, q = 0, text_len, consumed = state->offset;
 50	const u8 *text;
 51	const int icase = conf->flags & TS_IGNORECASE;
 52
 53	for (;;) {
 54		text_len = conf->get_next_block(consumed, &text, conf, state);
 55
 56		if (unlikely(text_len == 0))
 57			break;
 58
 59		for (i = 0; i < text_len; i++) {
 60			while (q > 0 && kmp->pattern[q]
 61			    != (icase ? toupper(text[i]) : text[i]))
 62				q = kmp->prefix_tbl[q - 1];
 63			if (kmp->pattern[q]
 64			    == (icase ? toupper(text[i]) : text[i]))
 65				q++;
 66			if (unlikely(q == kmp->pattern_len)) {
 67				state->offset = consumed + i + 1;
 68				return state->offset - kmp->pattern_len;
 69			}
 70		}
 71
 72		consumed += text_len;
 73	}
 74
 75	return UINT_MAX;
 76}
 77
 78static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
 79				      unsigned int *prefix_tbl, int flags)
 80{
 81	unsigned int k, q;
 82	const u8 icase = flags & TS_IGNORECASE;
 83
 84	for (k = 0, q = 1; q < len; q++) {
 85		while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
 86		    != (icase ? toupper(pattern[q]) : pattern[q]))
 87			k = prefix_tbl[k-1];
 88		if ((icase ? toupper(pattern[k]) : pattern[k])
 89		    == (icase ? toupper(pattern[q]) : pattern[q]))
 90			k++;
 91		prefix_tbl[q] = k;
 92	}
 93}
 94
 95static struct ts_config *kmp_init(const void *pattern, unsigned int len,
 96				  gfp_t gfp_mask, int flags)
 97{
 98	struct ts_config *conf;
 99	struct ts_kmp *kmp;
100	int i;
101	unsigned int prefix_tbl_len = len * sizeof(unsigned int);
102	size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
103
104	conf = alloc_ts_config(priv_size, gfp_mask);
105	if (IS_ERR(conf))
106		return conf;
107
108	conf->flags = flags;
109	kmp = ts_config_priv(conf);
110	kmp->pattern_len = len;
111	compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
112	kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
113	if (flags & TS_IGNORECASE)
114		for (i = 0; i < len; i++)
115			kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
116	else
117		memcpy(kmp->pattern, pattern, len);
118
119	return conf;
120}
121
122static void *kmp_get_pattern(struct ts_config *conf)
123{
124	struct ts_kmp *kmp = ts_config_priv(conf);
125	return kmp->pattern;
126}
127
128static unsigned int kmp_get_pattern_len(struct ts_config *conf)
129{
130	struct ts_kmp *kmp = ts_config_priv(conf);
131	return kmp->pattern_len;
132}
133
134static struct ts_ops kmp_ops = {
135	.name		  = "kmp",
136	.find		  = kmp_find,
137	.init		  = kmp_init,
138	.get_pattern	  = kmp_get_pattern,
139	.get_pattern_len  = kmp_get_pattern_len,
140	.owner		  = THIS_MODULE,
141	.list		  = LIST_HEAD_INIT(kmp_ops.list)
142};
143
144static int __init init_kmp(void)
145{
146	return textsearch_register(&kmp_ops);
147}
148
149static void __exit exit_kmp(void)
150{
151	textsearch_unregister(&kmp_ops);
152}
153
 
154MODULE_LICENSE("GPL");
155
156module_init(init_kmp);
157module_exit(exit_kmp);
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * lib/ts_kmp.c		Knuth-Morris-Pratt text search implementation
  4 *
 
 
 
 
 
  5 * Authors:	Thomas Graf <tgraf@suug.ch>
  6 *
  7 * ==========================================================================
  8 * 
  9 *   Implements a linear-time string-matching algorithm due to Knuth,
 10 *   Morris, and Pratt [1]. Their algorithm avoids the explicit
 11 *   computation of the transition function DELTA altogether. Its
 12 *   matching time is O(n), for n being length(text), using just an
 13 *   auxiliary function PI[1..m], for m being length(pattern),
 14 *   precomputed from the pattern in time O(m). The array PI allows
 15 *   the transition function DELTA to be computed efficiently
 16 *   "on the fly" as needed. Roughly speaking, for any state
 17 *   "q" = 0,1,...,m and any character "a" in SIGMA, the value
 18 *   PI["q"] contains the information that is independent of "a" and
 19 *   is needed to compute DELTA("q", "a") [2]. Since the array PI
 20 *   has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
 21 *   save a factor of |SIGMA| in the preprocessing time by computing
 22 *   PI rather than DELTA.
 23 *
 24 *   [1] Cormen, Leiserson, Rivest, Stein
 25 *       Introdcution to Algorithms, 2nd Edition, MIT Press
 26 *   [2] See finite automaton theory
 27 */
 28
 29#include <linux/module.h>
 30#include <linux/types.h>
 31#include <linux/string.h>
 32#include <linux/ctype.h>
 33#include <linux/textsearch.h>
 34
 35struct ts_kmp
 36{
 37	u8 *		pattern;
 38	unsigned int	pattern_len;
 39	unsigned int	prefix_tbl[];
 40};
 41
 42static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
 43{
 44	struct ts_kmp *kmp = ts_config_priv(conf);
 45	unsigned int i, q = 0, text_len, consumed = state->offset;
 46	const u8 *text;
 47	const int icase = conf->flags & TS_IGNORECASE;
 48
 49	for (;;) {
 50		text_len = conf->get_next_block(consumed, &text, conf, state);
 51
 52		if (unlikely(text_len == 0))
 53			break;
 54
 55		for (i = 0; i < text_len; i++) {
 56			while (q > 0 && kmp->pattern[q]
 57			    != (icase ? toupper(text[i]) : text[i]))
 58				q = kmp->prefix_tbl[q - 1];
 59			if (kmp->pattern[q]
 60			    == (icase ? toupper(text[i]) : text[i]))
 61				q++;
 62			if (unlikely(q == kmp->pattern_len)) {
 63				state->offset = consumed + i + 1;
 64				return state->offset - kmp->pattern_len;
 65			}
 66		}
 67
 68		consumed += text_len;
 69	}
 70
 71	return UINT_MAX;
 72}
 73
 74static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
 75				      unsigned int *prefix_tbl, int flags)
 76{
 77	unsigned int k, q;
 78	const u8 icase = flags & TS_IGNORECASE;
 79
 80	for (k = 0, q = 1; q < len; q++) {
 81		while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
 82		    != (icase ? toupper(pattern[q]) : pattern[q]))
 83			k = prefix_tbl[k-1];
 84		if ((icase ? toupper(pattern[k]) : pattern[k])
 85		    == (icase ? toupper(pattern[q]) : pattern[q]))
 86			k++;
 87		prefix_tbl[q] = k;
 88	}
 89}
 90
 91static struct ts_config *kmp_init(const void *pattern, unsigned int len,
 92				  gfp_t gfp_mask, int flags)
 93{
 94	struct ts_config *conf;
 95	struct ts_kmp *kmp;
 96	int i;
 97	unsigned int prefix_tbl_len = len * sizeof(unsigned int);
 98	size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
 99
100	conf = alloc_ts_config(priv_size, gfp_mask);
101	if (IS_ERR(conf))
102		return conf;
103
104	conf->flags = flags;
105	kmp = ts_config_priv(conf);
106	kmp->pattern_len = len;
107	compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
108	kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
109	if (flags & TS_IGNORECASE)
110		for (i = 0; i < len; i++)
111			kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
112	else
113		memcpy(kmp->pattern, pattern, len);
114
115	return conf;
116}
117
118static void *kmp_get_pattern(struct ts_config *conf)
119{
120	struct ts_kmp *kmp = ts_config_priv(conf);
121	return kmp->pattern;
122}
123
124static unsigned int kmp_get_pattern_len(struct ts_config *conf)
125{
126	struct ts_kmp *kmp = ts_config_priv(conf);
127	return kmp->pattern_len;
128}
129
130static struct ts_ops kmp_ops = {
131	.name		  = "kmp",
132	.find		  = kmp_find,
133	.init		  = kmp_init,
134	.get_pattern	  = kmp_get_pattern,
135	.get_pattern_len  = kmp_get_pattern_len,
136	.owner		  = THIS_MODULE,
137	.list		  = LIST_HEAD_INIT(kmp_ops.list)
138};
139
140static int __init init_kmp(void)
141{
142	return textsearch_register(&kmp_ops);
143}
144
145static void __exit exit_kmp(void)
146{
147	textsearch_unregister(&kmp_ops);
148}
149
150MODULE_DESCRIPTION("Knuth-Morris-Pratt text search implementation");
151MODULE_LICENSE("GPL");
152
153module_init(init_kmp);
154module_exit(exit_kmp);