Linux Audio

Check our new training course

Loading...
v4.10.11
  1#include <linux/spinlock.h>
  2#include <linux/slab.h>
  3#include <linux/list.h>
  4#include <linux/list_bl.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  5#include <linux/module.h>
 
 
 
 
 
  6#include <linux/sched.h>
  7#include <linux/workqueue.h>
  8#include <linux/mbcache.h>
  9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 10/*
 11 * Mbcache is a simple key-value store. Keys need not be unique, however
 12 * key-value pairs are expected to be unique (we use this fact in
 13 * mb_cache_entry_delete_block()).
 14 *
 15 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
 16 * They use hash of a block contents as a key and block number as a value.
 17 * That's why keys need not be unique (different xattr blocks may end up having
 18 * the same hash). However block number always uniquely identifies a cache
 19 * entry.
 20 *
 21 * We provide functions for creation and removal of entries, search by key,
 22 * and a special "delete entry with given key-value pair" operation. Fixed
 23 * size hash table is used for fast key lookups.
 24 */
 25
 26struct mb_cache {
 27	/* Hash table of entries */
 28	struct hlist_bl_head	*c_hash;
 29	/* log2 of hash table size */
 30	int			c_bucket_bits;
 31	/* Maximum entries in cache to avoid degrading hash too much */
 32	unsigned long		c_max_entries;
 33	/* Protects c_list, c_entry_count */
 34	spinlock_t		c_list_lock;
 35	struct list_head	c_list;
 36	/* Number of entries in cache */
 37	unsigned long		c_entry_count;
 38	struct shrinker		c_shrink;
 39	/* Work for shrinking when the cache has too many entries */
 40	struct work_struct	c_shrink_work;
 41};
 42
 43static struct kmem_cache *mb_entry_cache;
 
 
 44
 45static unsigned long mb_cache_shrink(struct mb_cache *cache,
 46				     unsigned long nr_to_scan);
 
 47
 48static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
 49							u32 key)
 
 
 
 
 
 
 
 
 50{
 51	return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
 52}
 53
 54/*
 55 * Number of entries to reclaim synchronously when there are too many entries
 56 * in cache
 57 */
 58#define SYNC_SHRINK_BATCH 64
 59
 60/*
 61 * mb_cache_entry_create - create entry in cache
 62 * @cache - cache where the entry should be created
 63 * @mask - gfp mask with which the entry should be allocated
 64 * @key - key of the entry
 65 * @block - block that contains data
 66 * @reusable - is the block reusable by other inodes?
 67 *
 68 * Creates entry in @cache with key @key and records that data is stored in
 69 * block @block. The function returns -EBUSY if entry with the same key
 70 * and for the same block already exists in cache. Otherwise 0 is returned.
 71 */
 72int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
 73			  sector_t block, bool reusable)
 74{
 75	struct mb_cache_entry *entry, *dup;
 76	struct hlist_bl_node *dup_node;
 77	struct hlist_bl_head *head;
 78
 79	/* Schedule background reclaim if there are too many entries */
 80	if (cache->c_entry_count >= cache->c_max_entries)
 81		schedule_work(&cache->c_shrink_work);
 82	/* Do some sync reclaim if background reclaim cannot keep up */
 83	if (cache->c_entry_count >= 2*cache->c_max_entries)
 84		mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
 85
 86	entry = kmem_cache_alloc(mb_entry_cache, mask);
 87	if (!entry)
 88		return -ENOMEM;
 89
 90	INIT_LIST_HEAD(&entry->e_list);
 91	/* One ref for hash, one ref returned */
 92	atomic_set(&entry->e_refcnt, 1);
 93	entry->e_key = key;
 94	entry->e_block = block;
 95	entry->e_reusable = reusable;
 96	head = mb_cache_entry_head(cache, key);
 97	hlist_bl_lock(head);
 98	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
 99		if (dup->e_key == key && dup->e_block == block) {
100			hlist_bl_unlock(head);
101			kmem_cache_free(mb_entry_cache, entry);
102			return -EBUSY;
103		}
104	}
105	hlist_bl_add_head(&entry->e_hash_list, head);
106	hlist_bl_unlock(head);
107
108	spin_lock(&cache->c_list_lock);
109	list_add_tail(&entry->e_list, &cache->c_list);
110	/* Grab ref for LRU list */
111	atomic_inc(&entry->e_refcnt);
112	cache->c_entry_count++;
113	spin_unlock(&cache->c_list_lock);
114
115	return 0;
 
 
 
 
 
 
116}
117EXPORT_SYMBOL(mb_cache_entry_create);
118
119void __mb_cache_entry_free(struct mb_cache_entry *entry)
 
 
120{
121	kmem_cache_free(mb_entry_cache, entry);
 
 
 
 
122}
123EXPORT_SYMBOL(__mb_cache_entry_free);
124
125static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
126					   struct mb_cache_entry *entry,
127					   u32 key)
128{
129	struct mb_cache_entry *old_entry = entry;
130	struct hlist_bl_node *node;
131	struct hlist_bl_head *head;
132
133	head = mb_cache_entry_head(cache, key);
134	hlist_bl_lock(head);
135	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
136		node = entry->e_hash_list.next;
137	else
138		node = hlist_bl_first(head);
139	while (node) {
140		entry = hlist_bl_entry(node, struct mb_cache_entry,
141				       e_hash_list);
142		if (entry->e_key == key && entry->e_reusable) {
143			atomic_inc(&entry->e_refcnt);
144			goto out;
145		}
146		node = node->next;
147	}
148	entry = NULL;
149out:
150	hlist_bl_unlock(head);
151	if (old_entry)
152		mb_cache_entry_put(cache, old_entry);
153
154	return entry;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155}
156
 
157/*
158 * mb_cache_entry_find_first - find the first reusable entry with the given key
159 * @cache: cache where we should search
160 * @key: key to look for
 
161 *
162 * Search in @cache for a reusable entry with key @key. Grabs reference to the
163 * first reusable entry found and returns the entry.
 
 
164 */
165struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
166						 u32 key)
167{
168	return __entry_find(cache, NULL, key);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169}
170EXPORT_SYMBOL(mb_cache_entry_find_first);
171
172/*
173 * mb_cache_entry_find_next - find next reusable entry with the same key
174 * @cache: cache where we should search
175 * @entry: entry to start search from
176 *
177 * Finds next reusable entry in the hash chain which has the same key as @entry.
178 * If @entry is unhashed (which can happen when deletion of entry races with the
179 * search), finds the first reusable entry in the hash chain. The function drops
180 * reference to @entry and returns with a reference to the found entry.
 
181 */
182struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
183						struct mb_cache_entry *entry)
184{
185	return __entry_find(cache, entry, entry->e_key);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186}
187EXPORT_SYMBOL(mb_cache_entry_find_next);
188
189/*
190 * mb_cache_entry_get - get a cache entry by block number (and key)
191 * @cache - cache we work with
192 * @key - key of block number @block
193 * @block - block number
194 */
195struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
196					  sector_t block)
197{
198	struct hlist_bl_node *node;
199	struct hlist_bl_head *head;
200	struct mb_cache_entry *entry;
201
202	head = mb_cache_entry_head(cache, key);
203	hlist_bl_lock(head);
204	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
205		if (entry->e_key == key && entry->e_block == block) {
206			atomic_inc(&entry->e_refcnt);
207			goto out;
208		}
209	}
210	entry = NULL;
211out:
212	hlist_bl_unlock(head);
213	return entry;
214}
215EXPORT_SYMBOL(mb_cache_entry_get);
216
217/* mb_cache_entry_delete_block - remove information about block from cache
218 * @cache - cache we work with
219 * @key - key of block @block
220 * @block - block number
221 *
222 * Remove entry from cache @cache with key @key with data stored in @block.
223 */
224void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
225				 sector_t block)
226{
227	struct hlist_bl_node *node;
228	struct hlist_bl_head *head;
229	struct mb_cache_entry *entry;
230
231	head = mb_cache_entry_head(cache, key);
232	hlist_bl_lock(head);
233	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
234		if (entry->e_key == key && entry->e_block == block) {
235			/* We keep hash list reference to keep entry alive */
236			hlist_bl_del_init(&entry->e_hash_list);
237			hlist_bl_unlock(head);
238			spin_lock(&cache->c_list_lock);
239			if (!list_empty(&entry->e_list)) {
240				list_del_init(&entry->e_list);
241				cache->c_entry_count--;
242				atomic_dec(&entry->e_refcnt);
243			}
244			spin_unlock(&cache->c_list_lock);
245			mb_cache_entry_put(cache, entry);
246			return;
247		}
248	}
249	hlist_bl_unlock(head);
 
 
 
 
250}
251EXPORT_SYMBOL(mb_cache_entry_delete_block);
252
253/* mb_cache_entry_touch - cache entry got used
254 * @cache - cache the entry belongs to
255 * @entry - entry that got used
256 *
257 * Marks entry as used to give hit higher chances of surviving in cache.
 
 
258 */
259void mb_cache_entry_touch(struct mb_cache *cache,
260			  struct mb_cache_entry *entry)
261{
262	entry->e_referenced = 1;
263}
264EXPORT_SYMBOL(mb_cache_entry_touch);
265
266static unsigned long mb_cache_count(struct shrinker *shrink,
267				    struct shrink_control *sc)
268{
269	struct mb_cache *cache = container_of(shrink, struct mb_cache,
270					      c_shrink);
 
 
 
 
 
 
 
 
 
 
 
271
272	return cache->c_entry_count;
 
 
 
 
 
 
 
 
 
 
273}
274
275/* Shrink number of entries in cache */
276static unsigned long mb_cache_shrink(struct mb_cache *cache,
277				     unsigned long nr_to_scan)
 
 
 
 
 
 
 
278{
279	struct mb_cache_entry *entry;
280	struct hlist_bl_head *head;
281	unsigned long shrunk = 0;
282
283	spin_lock(&cache->c_list_lock);
284	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
285		entry = list_first_entry(&cache->c_list,
286					 struct mb_cache_entry, e_list);
287		if (entry->e_referenced) {
288			entry->e_referenced = 0;
289			list_move_tail(&entry->e_list, &cache->c_list);
290			continue;
291		}
292		list_del_init(&entry->e_list);
293		cache->c_entry_count--;
294		/*
295		 * We keep LRU list reference so that entry doesn't go away
296		 * from under us.
297		 */
298		spin_unlock(&cache->c_list_lock);
299		head = mb_cache_entry_head(cache, entry->e_key);
300		hlist_bl_lock(head);
301		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
302			hlist_bl_del_init(&entry->e_hash_list);
303			atomic_dec(&entry->e_refcnt);
304		}
305		hlist_bl_unlock(head);
306		if (mb_cache_entry_put(cache, entry))
307			shrunk++;
308		cond_resched();
309		spin_lock(&cache->c_list_lock);
310	}
311	spin_unlock(&cache->c_list_lock);
 
 
 
312
313	return shrunk;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314}
315
316static unsigned long mb_cache_scan(struct shrinker *shrink,
317				   struct shrink_control *sc)
 
 
 
 
 
 
 
 
318{
319	struct mb_cache *cache = container_of(shrink, struct mb_cache,
320					      c_shrink);
321	return mb_cache_shrink(cache, sc->nr_to_scan);
322}
323
324/* We shrink 1/X of the cache when we have too many entries in it */
325#define SHRINK_DIVISOR 16
326
327static void mb_cache_shrink_worker(struct work_struct *work)
 
 
 
 
 
 
 
328{
329	struct mb_cache *cache = container_of(work, struct mb_cache,
330					      c_shrink_work);
331	mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
 
332}
333
 
334/*
335 * mb_cache_create - create cache
336 * @bucket_bits: log2 of the hash table size
337 *
338 * Create cache for keys with 2^bucket_bits hash entries.
 
 
 
339 */
340struct mb_cache *mb_cache_create(int bucket_bits)
341{
342	struct mb_cache *cache;
343	unsigned long bucket_count = 1UL << bucket_bits;
344	unsigned long i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
346	cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
347	if (!cache)
348		goto err_out;
349	cache->c_bucket_bits = bucket_bits;
350	cache->c_max_entries = bucket_count << 4;
351	INIT_LIST_HEAD(&cache->c_list);
352	spin_lock_init(&cache->c_list_lock);
353	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
354				GFP_KERNEL);
355	if (!cache->c_hash) {
356		kfree(cache);
357		goto err_out;
358	}
359	for (i = 0; i < bucket_count; i++)
360		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
361
362	cache->c_shrink.count_objects = mb_cache_count;
363	cache->c_shrink.scan_objects = mb_cache_scan;
364	cache->c_shrink.seeks = DEFAULT_SEEKS;
365	if (register_shrinker(&cache->c_shrink)) {
366		kfree(cache->c_hash);
367		kfree(cache);
368		goto err_out;
369	}
 
370
371	INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
372
373	return cache;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
375err_out:
 
 
 
 
 
 
 
 
376	return NULL;
377}
378EXPORT_SYMBOL(mb_cache_create);
379
380/*
381 * mb_cache_destroy - destroy cache
382 * @cache: the cache to destroy
383 *
384 * Free all entries in cache and cache itself. Caller must make sure nobody
385 * (except shrinker) can reach @cache when calling this.
 
 
 
 
 
 
386 */
387void mb_cache_destroy(struct mb_cache *cache)
388{
389	struct mb_cache_entry *entry, *next;
 
 
 
 
 
 
 
 
 
 
 
390
391	unregister_shrinker(&cache->c_shrink);
392
393	/*
394	 * We don't bother with any locking. Cache must not be used at this
395	 * point.
396	 */
397	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
398		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
399			hlist_bl_del_init(&entry->e_hash_list);
400			atomic_dec(&entry->e_refcnt);
401		} else
402			WARN_ON(1);
403		list_del(&entry->e_list);
404		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
405		mb_cache_entry_put(cache, entry);
406	}
407	kfree(cache->c_hash);
408	kfree(cache);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409}
410EXPORT_SYMBOL(mb_cache_destroy);
411
412static int __init mbcache_init(void)
 
 
413{
414	mb_entry_cache = kmem_cache_create("mbcache",
415				sizeof(struct mb_cache_entry), 0,
416				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
417	if (!mb_entry_cache)
418		return -ENOMEM;
419	return 0;
420}
421
422static void __exit mbcache_exit(void)
423{
424	kmem_cache_destroy(mb_entry_cache);
425}
426
427module_init(mbcache_init)
428module_exit(mbcache_exit)
429
430MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
431MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
432MODULE_LICENSE("GPL");
v3.1
  1/*
  2 * linux/fs/mbcache.c
  3 * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
  4 */
  5
  6/*
  7 * Filesystem Meta Information Block Cache (mbcache)
  8 *
  9 * The mbcache caches blocks of block devices that need to be located
 10 * by their device/block number, as well as by other criteria (such
 11 * as the block's contents).
 12 *
 13 * There can only be one cache entry in a cache per device and block number.
 14 * Additional indexes need not be unique in this sense. The number of
 15 * additional indexes (=other criteria) can be hardwired at compile time
 16 * or specified at cache create time.
 17 *
 18 * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
 19 * in the cache. A valid entry is in the main hash tables of the cache,
 20 * and may also be in the lru list. An invalid entry is not in any hashes
 21 * or lists.
 22 *
 23 * A valid cache entry is only in the lru list if no handles refer to it.
 24 * Invalid cache entries will be freed when the last handle to the cache
 25 * entry is released. Entries that cannot be freed immediately are put
 26 * back on the lru list.
 27 */
 28
 29#include <linux/kernel.h>
 30#include <linux/module.h>
 31
 32#include <linux/hash.h>
 33#include <linux/fs.h>
 34#include <linux/mm.h>
 35#include <linux/slab.h>
 36#include <linux/sched.h>
 37#include <linux/init.h>
 38#include <linux/mbcache.h>
 39
 40
 41#ifdef MB_CACHE_DEBUG
 42# define mb_debug(f...) do { \
 43		printk(KERN_DEBUG f); \
 44		printk("\n"); \
 45	} while (0)
 46#define mb_assert(c) do { if (!(c)) \
 47		printk(KERN_ERR "assertion " #c " failed\n"); \
 48	} while(0)
 49#else
 50# define mb_debug(f...) do { } while(0)
 51# define mb_assert(c) do { } while(0)
 52#endif
 53#define mb_error(f...) do { \
 54		printk(KERN_ERR f); \
 55		printk("\n"); \
 56	} while(0)
 57
 58#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
 59
 60static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
 61		
 62MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
 63MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
 64MODULE_LICENSE("GPL");
 65
 66EXPORT_SYMBOL(mb_cache_create);
 67EXPORT_SYMBOL(mb_cache_shrink);
 68EXPORT_SYMBOL(mb_cache_destroy);
 69EXPORT_SYMBOL(mb_cache_entry_alloc);
 70EXPORT_SYMBOL(mb_cache_entry_insert);
 71EXPORT_SYMBOL(mb_cache_entry_release);
 72EXPORT_SYMBOL(mb_cache_entry_free);
 73EXPORT_SYMBOL(mb_cache_entry_get);
 74#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
 75EXPORT_SYMBOL(mb_cache_entry_find_first);
 76EXPORT_SYMBOL(mb_cache_entry_find_next);
 77#endif
 78
 79/*
 80 * Global data: list of all mbcache's, lru list, and a spinlock for
 81 * accessing cache data structures on SMP machines. The lru list is
 82 * global across all mbcaches.
 83 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 84
 85static LIST_HEAD(mb_cache_list);
 86static LIST_HEAD(mb_cache_lru_list);
 87static DEFINE_SPINLOCK(mb_cache_spinlock);
 88
 89/*
 90 * What the mbcache registers as to get shrunk dynamically.
 91 */
 92
 93static int mb_cache_shrink_fn(struct shrinker *shrink,
 94			      struct shrink_control *sc);
 95
 96static struct shrinker mb_cache_shrinker = {
 97	.shrink = mb_cache_shrink_fn,
 98	.seeks = DEFAULT_SEEKS,
 99};
100
101static inline int
102__mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
103{
104	return !list_empty(&ce->e_block_list);
105}
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
108static void
109__mb_cache_entry_unhash(struct mb_cache_entry *ce)
110{
111	if (__mb_cache_entry_is_hashed(ce)) {
112		list_del_init(&ce->e_block_list);
113		list_del(&ce->e_index.o_list);
114	}
115}
 
116
117
118static void
119__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
120{
121	struct mb_cache *cache = ce->e_cache;
122
123	mb_assert(!(ce->e_used || ce->e_queued));
124	kmem_cache_free(cache->c_entry_cache, ce);
125	atomic_dec(&cache->c_entry_count);
126}
 
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
129static void
130__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
131	__releases(mb_cache_spinlock)
132{
133	/* Wake up all processes queuing for this cache entry. */
134	if (ce->e_queued)
135		wake_up_all(&mb_cache_queue);
136	if (ce->e_used >= MB_CACHE_WRITER)
137		ce->e_used -= MB_CACHE_WRITER;
138	ce->e_used--;
139	if (!(ce->e_used || ce->e_queued)) {
140		if (!__mb_cache_entry_is_hashed(ce))
141			goto forget;
142		mb_assert(list_empty(&ce->e_lru_list));
143		list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
144	}
145	spin_unlock(&mb_cache_spinlock);
146	return;
147forget:
148	spin_unlock(&mb_cache_spinlock);
149	__mb_cache_entry_forget(ce, GFP_KERNEL);
150}
151
152
153/*
154 * mb_cache_shrink_fn()  memory pressure callback
155 *
156 * This function is called by the kernel memory management when memory
157 * gets low.
158 *
159 * @shrink: (ignored)
160 * @sc: shrink_control passed from reclaim
161 *
162 * Returns the number of objects which are present in the cache.
163 */
164static int
165mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
166{
167	LIST_HEAD(free_list);
168	struct mb_cache *cache;
169	struct mb_cache_entry *entry, *tmp;
170	int count = 0;
171	int nr_to_scan = sc->nr_to_scan;
172	gfp_t gfp_mask = sc->gfp_mask;
173
174	mb_debug("trying to free %d entries", nr_to_scan);
175	spin_lock(&mb_cache_spinlock);
176	while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) {
177		struct mb_cache_entry *ce =
178			list_entry(mb_cache_lru_list.next,
179				   struct mb_cache_entry, e_lru_list);
180		list_move_tail(&ce->e_lru_list, &free_list);
181		__mb_cache_entry_unhash(ce);
182	}
183	list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
184		mb_debug("cache %s (%d)", cache->c_name,
185			  atomic_read(&cache->c_entry_count));
186		count += atomic_read(&cache->c_entry_count);
187	}
188	spin_unlock(&mb_cache_spinlock);
189	list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
190		__mb_cache_entry_forget(entry, gfp_mask);
191	}
192	return (count / 100) * sysctl_vfs_cache_pressure;
193}
194
195
196/*
197 * mb_cache_create()  create a new cache
198 *
199 * All entries in one cache are equal size. Cache entries may be from
200 * multiple devices. If this is the first mbcache created, registers
201 * the cache with kernel memory management. Returns NULL if no more
202 * memory was available.
203 *
204 * @name: name of the cache (informal)
205 * @bucket_bits: log2(number of hash buckets)
206 */
207struct mb_cache *
208mb_cache_create(const char *name, int bucket_bits)
209{
210	int n, bucket_count = 1 << bucket_bits;
211	struct mb_cache *cache = NULL;
212
213	cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
214	if (!cache)
215		return NULL;
216	cache->c_name = name;
217	atomic_set(&cache->c_entry_count, 0);
218	cache->c_bucket_bits = bucket_bits;
219	cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head),
220	                              GFP_KERNEL);
221	if (!cache->c_block_hash)
222		goto fail;
223	for (n=0; n<bucket_count; n++)
224		INIT_LIST_HEAD(&cache->c_block_hash[n]);
225	cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head),
226				      GFP_KERNEL);
227	if (!cache->c_index_hash)
228		goto fail;
229	for (n=0; n<bucket_count; n++)
230		INIT_LIST_HEAD(&cache->c_index_hash[n]);
231	cache->c_entry_cache = kmem_cache_create(name,
232		sizeof(struct mb_cache_entry), 0,
233		SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
234	if (!cache->c_entry_cache)
235		goto fail2;
236
237	/*
238	 * Set an upper limit on the number of cache entries so that the hash
239	 * chains won't grow too long.
240	 */
241	cache->c_max_entries = bucket_count << 4;
242
243	spin_lock(&mb_cache_spinlock);
244	list_add(&cache->c_cache_list, &mb_cache_list);
245	spin_unlock(&mb_cache_spinlock);
246	return cache;
247
248fail2:
249	kfree(cache->c_index_hash);
250
251fail:
252	kfree(cache->c_block_hash);
253	kfree(cache);
254	return NULL;
255}
256
257
258/*
259 * mb_cache_shrink()
260 *
261 * Removes all cache entries of a device from the cache. All cache entries
262 * currently in use cannot be freed, and thus remain in the cache. All others
263 * are freed.
264 *
265 * @bdev: which device's cache entries to shrink
266 */
267void
268mb_cache_shrink(struct block_device *bdev)
269{
270	LIST_HEAD(free_list);
271	struct list_head *l, *ltmp;
 
 
 
 
 
 
 
 
 
 
 
 
 
272
273	spin_lock(&mb_cache_spinlock);
274	list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
275		struct mb_cache_entry *ce =
276			list_entry(l, struct mb_cache_entry, e_lru_list);
277		if (ce->e_bdev == bdev) {
278			list_move_tail(&ce->e_lru_list, &free_list);
279			__mb_cache_entry_unhash(ce);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280		}
281	}
282	spin_unlock(&mb_cache_spinlock);
283	list_for_each_safe(l, ltmp, &free_list) {
284		__mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
285						   e_lru_list), GFP_KERNEL);
286	}
287}
 
288
289
290/*
291 * mb_cache_destroy()
292 *
293 * Shrinks the cache to its minimum possible size (hopefully 0 entries),
294 * and then destroys it. If this was the last mbcache, un-registers the
295 * mbcache from kernel memory management.
296 */
297void
298mb_cache_destroy(struct mb_cache *cache)
299{
300	LIST_HEAD(free_list);
301	struct list_head *l, *ltmp;
 
302
303	spin_lock(&mb_cache_spinlock);
304	list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
305		struct mb_cache_entry *ce =
306			list_entry(l, struct mb_cache_entry, e_lru_list);
307		if (ce->e_cache == cache) {
308			list_move_tail(&ce->e_lru_list, &free_list);
309			__mb_cache_entry_unhash(ce);
310		}
311	}
312	list_del(&cache->c_cache_list);
313	spin_unlock(&mb_cache_spinlock);
314
315	list_for_each_safe(l, ltmp, &free_list) {
316		__mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
317						   e_lru_list), GFP_KERNEL);
318	}
319
320	if (atomic_read(&cache->c_entry_count) > 0) {
321		mb_error("cache %s: %d orphaned entries",
322			  cache->c_name,
323			  atomic_read(&cache->c_entry_count));
324	}
325
326	kmem_cache_destroy(cache->c_entry_cache);
327
328	kfree(cache->c_index_hash);
329	kfree(cache->c_block_hash);
330	kfree(cache);
331}
332
333/*
334 * mb_cache_entry_alloc()
335 *
336 * Allocates a new cache entry. The new entry will not be valid initially,
337 * and thus cannot be looked up yet. It should be filled with data, and
338 * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
339 * if no more memory was available.
340 */
341struct mb_cache_entry *
342mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
343{
344	struct mb_cache_entry *ce = NULL;
 
 
345
346	if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
347		spin_lock(&mb_cache_spinlock);
348		if (!list_empty(&mb_cache_lru_list)) {
349			ce = list_entry(mb_cache_lru_list.next,
350					struct mb_cache_entry, e_lru_list);
351			list_del_init(&ce->e_lru_list);
352			__mb_cache_entry_unhash(ce);
 
353		}
354		spin_unlock(&mb_cache_spinlock);
355	}
356	if (!ce) {
357		ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
358		if (!ce)
359			return NULL;
360		atomic_inc(&cache->c_entry_count);
361		INIT_LIST_HEAD(&ce->e_lru_list);
362		INIT_LIST_HEAD(&ce->e_block_list);
363		ce->e_cache = cache;
364		ce->e_queued = 0;
 
 
 
 
 
 
 
365	}
366	ce->e_used = 1 + MB_CACHE_WRITER;
367	return ce;
368}
369
370
371/*
372 * mb_cache_entry_insert()
373 *
374 * Inserts an entry that was allocated using mb_cache_entry_alloc() into
375 * the cache. After this, the cache entry can be looked up, but is not yet
376 * in the lru list as the caller still holds a handle to it. Returns 0 on
377 * success, or -EBUSY if a cache entry for that device + inode exists
378 * already (this may happen after a failed lookup, but when another process
379 * has inserted the same cache entry in the meantime).
380 *
381 * @bdev: device the cache entry belongs to
382 * @block: block number
383 * @key: lookup key
384 */
385int
386mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
387		      sector_t block, unsigned int key)
388{
389	struct mb_cache *cache = ce->e_cache;
390	unsigned int bucket;
391	struct list_head *l;
392	int error = -EBUSY;
393
394	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 
395			   cache->c_bucket_bits);
396	spin_lock(&mb_cache_spinlock);
397	list_for_each_prev(l, &cache->c_block_hash[bucket]) {
398		struct mb_cache_entry *ce =
399			list_entry(l, struct mb_cache_entry, e_block_list);
400		if (ce->e_bdev == bdev && ce->e_block == block)
401			goto out;
402	}
403	__mb_cache_entry_unhash(ce);
404	ce->e_bdev = bdev;
405	ce->e_block = block;
406	list_add(&ce->e_block_list, &cache->c_block_hash[bucket]);
407	ce->e_index.o_key = key;
408	bucket = hash_long(key, cache->c_bucket_bits);
409	list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]);
410	error = 0;
411out:
412	spin_unlock(&mb_cache_spinlock);
413	return error;
414}
415
416
417/*
418 * mb_cache_entry_release()
419 *
420 * Release a handle to a cache entry. When the last handle to a cache entry
421 * is released it is either freed (if it is invalid) or otherwise inserted
422 * in to the lru list.
423 */
424void
425mb_cache_entry_release(struct mb_cache_entry *ce)
426{
427	spin_lock(&mb_cache_spinlock);
428	__mb_cache_entry_release_unlock(ce);
 
429}
430
 
 
431
432/*
433 * mb_cache_entry_free()
434 *
435 * This is equivalent to the sequence mb_cache_entry_takeout() --
436 * mb_cache_entry_release().
437 */
438void
439mb_cache_entry_free(struct mb_cache_entry *ce)
440{
441	spin_lock(&mb_cache_spinlock);
442	mb_assert(list_empty(&ce->e_lru_list));
443	__mb_cache_entry_unhash(ce);
444	__mb_cache_entry_release_unlock(ce);
445}
446
447
448/*
449 * mb_cache_entry_get()
 
450 *
451 * Get a cache entry  by device / block number. (There can only be one entry
452 * in the cache per device and block.) Returns NULL if no such cache entry
453 * exists. The returned cache entry is locked for exclusive access ("single
454 * writer").
455 */
456struct mb_cache_entry *
457mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
458		   sector_t block)
459{
460	unsigned int bucket;
461	struct list_head *l;
462	struct mb_cache_entry *ce;
463
464	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
465			   cache->c_bucket_bits);
466	spin_lock(&mb_cache_spinlock);
467	list_for_each(l, &cache->c_block_hash[bucket]) {
468		ce = list_entry(l, struct mb_cache_entry, e_block_list);
469		if (ce->e_bdev == bdev && ce->e_block == block) {
470			DEFINE_WAIT(wait);
471
472			if (!list_empty(&ce->e_lru_list))
473				list_del_init(&ce->e_lru_list);
474
475			while (ce->e_used > 0) {
476				ce->e_queued++;
477				prepare_to_wait(&mb_cache_queue, &wait,
478						TASK_UNINTERRUPTIBLE);
479				spin_unlock(&mb_cache_spinlock);
480				schedule();
481				spin_lock(&mb_cache_spinlock);
482				ce->e_queued--;
483			}
484			finish_wait(&mb_cache_queue, &wait);
485			ce->e_used += 1 + MB_CACHE_WRITER;
486
487			if (!__mb_cache_entry_is_hashed(ce)) {
488				__mb_cache_entry_release_unlock(ce);
489				return NULL;
490			}
491			goto cleanup;
492		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493	}
494	ce = NULL;
495
496cleanup:
497	spin_unlock(&mb_cache_spinlock);
498	return ce;
499}
500
501#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
502
503static struct mb_cache_entry *
504__mb_cache_entry_find(struct list_head *l, struct list_head *head,
505		      struct block_device *bdev, unsigned int key)
506{
507	while (l != head) {
508		struct mb_cache_entry *ce =
509			list_entry(l, struct mb_cache_entry, e_index.o_list);
510		if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
511			DEFINE_WAIT(wait);
512
513			if (!list_empty(&ce->e_lru_list))
514				list_del_init(&ce->e_lru_list);
515
516			/* Incrementing before holding the lock gives readers
517			   priority over writers. */
518			ce->e_used++;
519			while (ce->e_used >= MB_CACHE_WRITER) {
520				ce->e_queued++;
521				prepare_to_wait(&mb_cache_queue, &wait,
522						TASK_UNINTERRUPTIBLE);
523				spin_unlock(&mb_cache_spinlock);
524				schedule();
525				spin_lock(&mb_cache_spinlock);
526				ce->e_queued--;
527			}
528			finish_wait(&mb_cache_queue, &wait);
529
530			if (!__mb_cache_entry_is_hashed(ce)) {
531				__mb_cache_entry_release_unlock(ce);
532				spin_lock(&mb_cache_spinlock);
533				return ERR_PTR(-EAGAIN);
534			}
535			return ce;
536		}
537		l = l->next;
538	}
539	return NULL;
540}
541
542
543/*
544 * mb_cache_entry_find_first()
 
545 *
546 * Find the first cache entry on a given device with a certain key in
547 * an additional index. Additional matches can be found with
548 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
549 * returned cache entry is locked for shared access ("multiple readers").
550 *
551 * @cache: the cache to search
552 * @bdev: the device the cache entry should belong to
553 * @key: the key in the index
554 */
555struct mb_cache_entry *
556mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
557			  unsigned int key)
558{
559	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
560	struct list_head *l;
561	struct mb_cache_entry *ce;
562
563	spin_lock(&mb_cache_spinlock);
564	l = cache->c_index_hash[bucket].next;
565	ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
566	spin_unlock(&mb_cache_spinlock);
567	return ce;
568}
569
 
570
571/*
572 * mb_cache_entry_find_next()
573 *
574 * Find the next cache entry on a given device with a certain key in an
575 * additional index. Returns NULL if no match could be found. The previous
576 * entry is atomatically released, so that mb_cache_entry_find_next() can
577 * be called like this:
578 *
579 * entry = mb_cache_entry_find_first();
580 * while (entry) {
581 * 	...
582 *	entry = mb_cache_entry_find_next(entry, ...);
583 * }
584 *
585 * @prev: The previous match
586 * @bdev: the device the cache entry should belong to
587 * @key: the key in the index
588 */
589struct mb_cache_entry *
590mb_cache_entry_find_next(struct mb_cache_entry *prev,
591			 struct block_device *bdev, unsigned int key)
592{
593	struct mb_cache *cache = prev->e_cache;
594	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
595	struct list_head *l;
596	struct mb_cache_entry *ce;
597
598	spin_lock(&mb_cache_spinlock);
599	l = prev->e_index.o_list.next;
600	ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
601	__mb_cache_entry_release_unlock(prev);
602	return ce;
603}
 
604
605#endif  /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
606
607static int __init init_mbcache(void)
608{
609	register_shrinker(&mb_cache_shrinker);
 
 
 
 
610	return 0;
611}
612
613static void __exit exit_mbcache(void)
614{
615	unregister_shrinker(&mb_cache_shrinker);
616}
617
618module_init(init_mbcache)
619module_exit(exit_mbcache)
620