Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright (C) 2017 Western Digital Corporation or its affiliates.
  4 *
  5 * This file is released under the GPL.
  6 */
  7
  8#include "dm-zoned.h"
  9
 10#include <linux/module.h>
 11
 12#define	DM_MSG_PREFIX		"zoned reclaim"
 13
 14struct dmz_reclaim {
 15	struct dmz_metadata     *metadata;
 16	struct dmz_dev		*dev;
 17
 18	struct delayed_work	work;
 19	struct workqueue_struct *wq;
 20
 21	struct dm_kcopyd_client	*kc;
 22	struct dm_kcopyd_throttle kc_throttle;
 23	int			kc_err;
 24
 25	unsigned long		flags;
 26
 27	/* Last target access time */
 28	unsigned long		atime;
 29};
 30
 31/*
 32 * Reclaim state flags.
 33 */
 34enum {
 35	DMZ_RECLAIM_KCOPY,
 36};
 37
 38/*
 39 * Number of seconds of target BIO inactivity to consider the target idle.
 40 */
 41#define DMZ_IDLE_PERIOD			(10UL * HZ)
 42
 43/*
 44 * Percentage of unmapped (free) random zones below which reclaim starts
 45 * even if the target is busy.
 46 */
 47#define DMZ_RECLAIM_LOW_UNMAP_RND	30
 48
 49/*
 50 * Percentage of unmapped (free) random zones above which reclaim will
 51 * stop if the target is busy.
 52 */
 53#define DMZ_RECLAIM_HIGH_UNMAP_RND	50
 54
 55/*
 56 * Align a sequential zone write pointer to chunk_block.
 57 */
 58static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
 59				sector_t block)
 60{
 61	struct dmz_metadata *zmd = zrc->metadata;
 62	sector_t wp_block = zone->wp_block;
 63	unsigned int nr_blocks;
 64	int ret;
 65
 66	if (wp_block == block)
 67		return 0;
 68
 69	if (wp_block > block)
 70		return -EIO;
 71
 72	/*
 73	 * Zeroout the space between the write
 74	 * pointer and the requested position.
 75	 */
 76	nr_blocks = block - wp_block;
 77	ret = blkdev_issue_zeroout(zrc->dev->bdev,
 78				   dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
 79				   dmz_blk2sect(nr_blocks), GFP_NOIO, 0);
 80	if (ret) {
 81		dmz_dev_err(zrc->dev,
 82			    "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
 83			    dmz_id(zmd, zone), (unsigned long long)wp_block,
 84			    (unsigned long long)block, nr_blocks, ret);
 85		return ret;
 86	}
 87
 88	zone->wp_block = block;
 89
 90	return 0;
 91}
 92
 93/*
 94 * dm_kcopyd_copy end notification.
 95 */
 96static void dmz_reclaim_kcopy_end(int read_err, unsigned long write_err,
 97				  void *context)
 98{
 99	struct dmz_reclaim *zrc = context;
100
101	if (read_err || write_err)
102		zrc->kc_err = -EIO;
103	else
104		zrc->kc_err = 0;
105
106	clear_bit_unlock(DMZ_RECLAIM_KCOPY, &zrc->flags);
107	smp_mb__after_atomic();
108	wake_up_bit(&zrc->flags, DMZ_RECLAIM_KCOPY);
109}
110
111/*
112 * Copy valid blocks of src_zone into dst_zone.
113 */
114static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
115			    struct dm_zone *src_zone, struct dm_zone *dst_zone)
116{
117	struct dmz_metadata *zmd = zrc->metadata;
118	struct dmz_dev *dev = zrc->dev;
119	struct dm_io_region src, dst;
120	sector_t block = 0, end_block;
121	sector_t nr_blocks;
122	sector_t src_zone_block;
123	sector_t dst_zone_block;
124	unsigned long flags = 0;
125	int ret;
126
127	if (dmz_is_seq(src_zone))
128		end_block = src_zone->wp_block;
129	else
130		end_block = dev->zone_nr_blocks;
131	src_zone_block = dmz_start_block(zmd, src_zone);
132	dst_zone_block = dmz_start_block(zmd, dst_zone);
133
134	if (dmz_is_seq(dst_zone))
135		set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
136
137	while (block < end_block) {
138		if (dev->flags & DMZ_BDEV_DYING)
139			return -EIO;
140
141		/* Get a valid region from the source zone */
142		ret = dmz_first_valid_block(zmd, src_zone, &block);
143		if (ret <= 0)
144			return ret;
145		nr_blocks = ret;
146
147		/*
148		 * If we are writing in a sequential zone, we must make sure
149		 * that writes are sequential. So Zeroout any eventual hole
150		 * between writes.
151		 */
152		if (dmz_is_seq(dst_zone)) {
153			ret = dmz_reclaim_align_wp(zrc, dst_zone, block);
154			if (ret)
155				return ret;
156		}
157
158		src.bdev = dev->bdev;
159		src.sector = dmz_blk2sect(src_zone_block + block);
160		src.count = dmz_blk2sect(nr_blocks);
161
162		dst.bdev = dev->bdev;
163		dst.sector = dmz_blk2sect(dst_zone_block + block);
164		dst.count = src.count;
165
166		/* Copy the valid region */
167		set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags);
168		dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
169			       dmz_reclaim_kcopy_end, zrc);
170
171		/* Wait for copy to complete */
172		wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY,
173			       TASK_UNINTERRUPTIBLE);
174		if (zrc->kc_err)
175			return zrc->kc_err;
176
177		block += nr_blocks;
178		if (dmz_is_seq(dst_zone))
179			dst_zone->wp_block = block;
180	}
181
182	return 0;
183}
184
185/*
186 * Move valid blocks of dzone buffer zone into dzone (after its write pointer)
187 * and free the buffer zone.
188 */
189static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
190{
191	struct dm_zone *bzone = dzone->bzone;
192	sector_t chunk_block = dzone->wp_block;
193	struct dmz_metadata *zmd = zrc->metadata;
194	int ret;
195
196	dmz_dev_debug(zrc->dev,
197		      "Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
198		      dzone->chunk, dmz_id(zmd, bzone), dmz_weight(bzone),
199		      dmz_id(zmd, dzone), dmz_weight(dzone));
200
201	/* Flush data zone into the buffer zone */
202	ret = dmz_reclaim_copy(zrc, bzone, dzone);
203	if (ret < 0)
204		return ret;
205
206	dmz_lock_flush(zmd);
207
208	/* Validate copied blocks */
209	ret = dmz_merge_valid_blocks(zmd, bzone, dzone, chunk_block);
210	if (ret == 0) {
211		/* Free the buffer zone */
212		dmz_invalidate_blocks(zmd, bzone, 0, zrc->dev->zone_nr_blocks);
213		dmz_lock_map(zmd);
214		dmz_unmap_zone(zmd, bzone);
215		dmz_unlock_zone_reclaim(dzone);
216		dmz_free_zone(zmd, bzone);
217		dmz_unlock_map(zmd);
218	}
219
220	dmz_unlock_flush(zmd);
221
222	return ret;
223}
224
225/*
226 * Merge valid blocks of dzone into its buffer zone and free dzone.
227 */
228static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
229{
230	unsigned int chunk = dzone->chunk;
231	struct dm_zone *bzone = dzone->bzone;
232	struct dmz_metadata *zmd = zrc->metadata;
233	int ret = 0;
234
235	dmz_dev_debug(zrc->dev,
236		      "Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
237		      chunk, dmz_id(zmd, dzone), dmz_weight(dzone),
238		      dmz_id(zmd, bzone), dmz_weight(bzone));
239
240	/* Flush data zone into the buffer zone */
241	ret = dmz_reclaim_copy(zrc, dzone, bzone);
242	if (ret < 0)
243		return ret;
244
245	dmz_lock_flush(zmd);
246
247	/* Validate copied blocks */
248	ret = dmz_merge_valid_blocks(zmd, dzone, bzone, 0);
249	if (ret == 0) {
250		/*
251		 * Free the data zone and remap the chunk to
252		 * the buffer zone.
253		 */
254		dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks);
255		dmz_lock_map(zmd);
256		dmz_unmap_zone(zmd, bzone);
257		dmz_unmap_zone(zmd, dzone);
258		dmz_unlock_zone_reclaim(dzone);
259		dmz_free_zone(zmd, dzone);
260		dmz_map_zone(zmd, bzone, chunk);
261		dmz_unlock_map(zmd);
262	}
263
264	dmz_unlock_flush(zmd);
265
266	return ret;
267}
268
269/*
270 * Move valid blocks of the random data zone dzone into a free sequential zone.
271 * Once blocks are moved, remap the zone chunk to the sequential zone.
272 */
273static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
274{
275	unsigned int chunk = dzone->chunk;
276	struct dm_zone *szone = NULL;
277	struct dmz_metadata *zmd = zrc->metadata;
278	int ret;
279
280	/* Get a free sequential zone */
281	dmz_lock_map(zmd);
282	szone = dmz_alloc_zone(zmd, DMZ_ALLOC_RECLAIM);
283	dmz_unlock_map(zmd);
284	if (!szone)
285		return -ENOSPC;
286
287	dmz_dev_debug(zrc->dev,
288		      "Chunk %u, move rnd zone %u (weight %u) to seq zone %u",
289		      chunk, dmz_id(zmd, dzone), dmz_weight(dzone),
290		      dmz_id(zmd, szone));
291
292	/* Flush the random data zone into the sequential zone */
293	ret = dmz_reclaim_copy(zrc, dzone, szone);
294
295	dmz_lock_flush(zmd);
296
297	if (ret == 0) {
298		/* Validate copied blocks */
299		ret = dmz_copy_valid_blocks(zmd, dzone, szone);
300	}
301	if (ret) {
302		/* Free the sequential zone */
303		dmz_lock_map(zmd);
304		dmz_free_zone(zmd, szone);
305		dmz_unlock_map(zmd);
306	} else {
307		/* Free the data zone and remap the chunk */
308		dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks);
309		dmz_lock_map(zmd);
310		dmz_unmap_zone(zmd, dzone);
311		dmz_unlock_zone_reclaim(dzone);
312		dmz_free_zone(zmd, dzone);
313		dmz_map_zone(zmd, szone, chunk);
314		dmz_unlock_map(zmd);
315	}
316
317	dmz_unlock_flush(zmd);
318
319	return ret;
320}
321
322/*
323 * Reclaim an empty zone.
324 */
325static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone)
326{
327	struct dmz_metadata *zmd = zrc->metadata;
328
329	dmz_lock_flush(zmd);
330	dmz_lock_map(zmd);
331	dmz_unmap_zone(zmd, dzone);
332	dmz_unlock_zone_reclaim(dzone);
333	dmz_free_zone(zmd, dzone);
334	dmz_unlock_map(zmd);
335	dmz_unlock_flush(zmd);
336}
337
338/*
339 * Find a candidate zone for reclaim and process it.
340 */
341static int dmz_do_reclaim(struct dmz_reclaim *zrc)
342{
343	struct dmz_metadata *zmd = zrc->metadata;
344	struct dm_zone *dzone;
345	struct dm_zone *rzone;
346	unsigned long start;
347	int ret;
348
349	/* Get a data zone */
350	dzone = dmz_get_zone_for_reclaim(zmd);
351	if (IS_ERR(dzone))
352		return PTR_ERR(dzone);
353
354	start = jiffies;
355
356	if (dmz_is_rnd(dzone)) {
357		if (!dmz_weight(dzone)) {
358			/* Empty zone */
359			dmz_reclaim_empty(zrc, dzone);
360			ret = 0;
361		} else {
362			/*
363			 * Reclaim the random data zone by moving its
364			 * valid data blocks to a free sequential zone.
365			 */
366			ret = dmz_reclaim_rnd_data(zrc, dzone);
367		}
368		rzone = dzone;
369
370	} else {
371		struct dm_zone *bzone = dzone->bzone;
372		sector_t chunk_block = 0;
373
374		ret = dmz_first_valid_block(zmd, bzone, &chunk_block);
375		if (ret < 0)
376			goto out;
377
378		if (ret == 0 || chunk_block >= dzone->wp_block) {
379			/*
380			 * The buffer zone is empty or its valid blocks are
381			 * after the data zone write pointer.
382			 */
383			ret = dmz_reclaim_buf(zrc, dzone);
384			rzone = bzone;
385		} else {
386			/*
387			 * Reclaim the data zone by merging it into the
388			 * buffer zone so that the buffer zone itself can
389			 * be later reclaimed.
390			 */
391			ret = dmz_reclaim_seq_data(zrc, dzone);
392			rzone = dzone;
393		}
394	}
395out:
396	if (ret) {
397		dmz_unlock_zone_reclaim(dzone);
398		return ret;
399	}
400
401	ret = dmz_flush_metadata(zrc->metadata);
402	if (ret) {
403		dmz_dev_debug(zrc->dev,
404			      "Metadata flush for zone %u failed, err %d\n",
405			      dmz_id(zmd, rzone), ret);
406		return ret;
407	}
408
409	dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms",
410		      dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start));
411	return 0;
412}
413
414/*
415 * Test if the target device is idle.
416 */
417static inline int dmz_target_idle(struct dmz_reclaim *zrc)
418{
419	return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD);
420}
421
422/*
423 * Test if reclaim is necessary.
424 */
425static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
426{
427	struct dmz_metadata *zmd = zrc->metadata;
428	unsigned int nr_rnd = dmz_nr_rnd_zones(zmd);
429	unsigned int nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
430	unsigned int p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
431
432	/* Reclaim when idle */
433	if (dmz_target_idle(zrc) && nr_unmap_rnd < nr_rnd)
434		return true;
435
436	/* If there are still plenty of random zones, do not reclaim */
437	if (p_unmap_rnd >= DMZ_RECLAIM_HIGH_UNMAP_RND)
438		return false;
439
440	/*
441	 * If the percentage of unmapped random zones is low,
442	 * reclaim even if the target is busy.
443	 */
444	return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND;
445}
446
447/*
448 * Reclaim work function.
449 */
450static void dmz_reclaim_work(struct work_struct *work)
451{
452	struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
453	struct dmz_metadata *zmd = zrc->metadata;
454	unsigned int nr_rnd, nr_unmap_rnd;
455	unsigned int p_unmap_rnd;
456	int ret;
457
458	if (dmz_bdev_is_dying(zrc->dev))
459		return;
460
461	if (!dmz_should_reclaim(zrc)) {
462		mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
463		return;
464	}
465
466	/*
467	 * We need to start reclaiming random zones: set up zone copy
468	 * throttling to either go fast if we are very low on random zones
469	 * and slower if there are still some free random zones to avoid
470	 * as much as possible to negatively impact the user workload.
471	 */
472	nr_rnd = dmz_nr_rnd_zones(zmd);
473	nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
474	p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
475	if (dmz_target_idle(zrc) || p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_RND / 2) {
476		/* Idle or very low percentage: go fast */
477		zrc->kc_throttle.throttle = 100;
478	} else {
479		/* Busy but we still have some random zone: throttle */
480		zrc->kc_throttle.throttle = min(75U, 100U - p_unmap_rnd / 2);
481	}
482
483	dmz_dev_debug(zrc->dev,
484		      "Reclaim (%u): %s, %u%% free rnd zones (%u/%u)",
485		      zrc->kc_throttle.throttle,
486		      (dmz_target_idle(zrc) ? "Idle" : "Busy"),
487		      p_unmap_rnd, nr_unmap_rnd, nr_rnd);
488
489	ret = dmz_do_reclaim(zrc);
490	if (ret) {
491		dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
492		if (ret == -EIO)
493			/*
494			 * LLD might be performing some error handling sequence
495			 * at the underlying device. To not interfere, do not
496			 * attempt to schedule the next reclaim run immediately.
497			 */
498			return;
499	}
500
501	dmz_schedule_reclaim(zrc);
502}
503
504/*
505 * Initialize reclaim.
506 */
507int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd,
508		    struct dmz_reclaim **reclaim)
509{
510	struct dmz_reclaim *zrc;
511	int ret;
512
513	zrc = kzalloc(sizeof(struct dmz_reclaim), GFP_KERNEL);
514	if (!zrc)
515		return -ENOMEM;
516
517	zrc->dev = dev;
518	zrc->metadata = zmd;
519	zrc->atime = jiffies;
520
521	/* Reclaim kcopyd client */
522	zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
523	if (IS_ERR(zrc->kc)) {
524		ret = PTR_ERR(zrc->kc);
525		zrc->kc = NULL;
526		goto err;
527	}
528
529	/* Reclaim work */
530	INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
531	zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM,
532					  dev->name);
533	if (!zrc->wq) {
534		ret = -ENOMEM;
535		goto err;
536	}
537
538	*reclaim = zrc;
539	queue_delayed_work(zrc->wq, &zrc->work, 0);
540
541	return 0;
542err:
543	if (zrc->kc)
544		dm_kcopyd_client_destroy(zrc->kc);
545	kfree(zrc);
546
547	return ret;
548}
549
550/*
551 * Terminate reclaim.
552 */
553void dmz_dtr_reclaim(struct dmz_reclaim *zrc)
554{
555	cancel_delayed_work_sync(&zrc->work);
556	destroy_workqueue(zrc->wq);
557	dm_kcopyd_client_destroy(zrc->kc);
558	kfree(zrc);
559}
560
561/*
562 * Suspend reclaim.
563 */
564void dmz_suspend_reclaim(struct dmz_reclaim *zrc)
565{
566	cancel_delayed_work_sync(&zrc->work);
567}
568
569/*
570 * Resume reclaim.
571 */
572void dmz_resume_reclaim(struct dmz_reclaim *zrc)
573{
574	queue_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
575}
576
577/*
578 * BIO accounting.
579 */
580void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc)
581{
582	zrc->atime = jiffies;
583}
584
585/*
586 * Start reclaim if necessary.
587 */
588void dmz_schedule_reclaim(struct dmz_reclaim *zrc)
589{
590	if (dmz_should_reclaim(zrc))
591		mod_delayed_work(zrc->wq, &zrc->work, 0);
592}
593