i915_gem_shmem.c - drivers/gpu/drm/i915/gem/i915_gem_shmem.c - Linux source code v3.1

Note: File does not exist in v3.1.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2014-2016 Intel Corporation
  5 */
  6
  7#include <linux/pagevec.h>
  8#include <linux/swap.h>
  9
 10#include "i915_drv.h"
 11#include "i915_gem_object.h"
 12#include "i915_scatterlist.h"
 13#include "i915_trace.h"
 14
 15/*
 16 * Move pages to appropriate lru and release the pagevec, decrementing the
 17 * ref count of those pages.
 18 */
 19static void check_release_pagevec(struct pagevec *pvec)
 20{
 21	check_move_unevictable_pages(pvec);
 22	__pagevec_release(pvec);
 23	cond_resched();
 24}
 25
 26static int shmem_get_pages(struct drm_i915_gem_object *obj)
 27{
 28	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 29	const unsigned long page_count = obj->base.size / PAGE_SIZE;
 30	unsigned long i;
 31	struct address_space *mapping;
 32	struct sg_table *st;
 33	struct scatterlist *sg;
 34	struct sgt_iter sgt_iter;
 35	struct page *page;
 36	unsigned long last_pfn = 0;	/* suppress gcc warning */
 37	unsigned int max_segment = i915_sg_segment_size();
 38	unsigned int sg_page_sizes;
 39	struct pagevec pvec;
 40	gfp_t noreclaim;
 41	int ret;
 42
 43	/*
 44	 * Assert that the object is not currently in any GPU domain. As it
 45	 * wasn't in the GTT, there shouldn't be any way it could have been in
 46	 * a GPU cache
 47	 */
 48	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
 49	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
 50
 51	/*
 52	 * If there's no chance of allocating enough pages for the whole
 53	 * object, bail early.
 54	 */
 55	if (page_count > totalram_pages())
 56		return -ENOMEM;
 57
 58	st = kmalloc(sizeof(*st), GFP_KERNEL);
 59	if (!st)
 60		return -ENOMEM;
 61
 62rebuild_st:
 63	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
 64		kfree(st);
 65		return -ENOMEM;
 66	}
 67
 68	/*
 69	 * Get the list of pages out of our struct file.  They'll be pinned
 70	 * at this point until we release them.
 71	 *
 72	 * Fail silently without starting the shrinker
 73	 */
 74	mapping = obj->base.filp->f_mapping;
 75	mapping_set_unevictable(mapping);
 76	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
 77	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
 78
 79	sg = st->sgl;
 80	st->nents = 0;
 81	sg_page_sizes = 0;
 82	for (i = 0; i < page_count; i++) {
 83		const unsigned int shrink[] = {
 84			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
 85			0,
 86		}, *s = shrink;
 87		gfp_t gfp = noreclaim;
 88
 89		do {
 90			cond_resched();
 91			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
 92			if (!IS_ERR(page))
 93				break;
 94
 95			if (!*s) {
 96				ret = PTR_ERR(page);
 97				goto err_sg;
 98			}
 99
100			i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
101
102			/*
103			 * We've tried hard to allocate the memory by reaping
104			 * our own buffer, now let the real VM do its job and
105			 * go down in flames if truly OOM.
106			 *
107			 * However, since graphics tend to be disposable,
108			 * defer the oom here by reporting the ENOMEM back
109			 * to userspace.
110			 */
111			if (!*s) {
112				/* reclaim and warn, but no oom */
113				gfp = mapping_gfp_mask(mapping);
114
115				/*
116				 * Our bo are always dirty and so we require
117				 * kswapd to reclaim our pages (direct reclaim
118				 * does not effectively begin pageout of our
119				 * buffers on its own). However, direct reclaim
120				 * only waits for kswapd when under allocation
121				 * congestion. So as a result __GFP_RECLAIM is
122				 * unreliable and fails to actually reclaim our
123				 * dirty pages -- unless you try over and over
124				 * again with !__GFP_NORETRY. However, we still
125				 * want to fail this allocation rather than
126				 * trigger the out-of-memory killer and for
127				 * this we want __GFP_RETRY_MAYFAIL.
128				 */
129				gfp |= __GFP_RETRY_MAYFAIL;
130			}
131		} while (1);
132
133		if (!i ||
134		    sg->length >= max_segment ||
135		    page_to_pfn(page) != last_pfn + 1) {
136			if (i) {
137				sg_page_sizes |= sg->length;
138				sg = sg_next(sg);
139			}
140			st->nents++;
141			sg_set_page(sg, page, PAGE_SIZE, 0);
142		} else {
143			sg->length += PAGE_SIZE;
144		}
145		last_pfn = page_to_pfn(page);
146
147		/* Check that the i965g/gm workaround works. */
148		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
149	}
150	if (sg) { /* loop terminated early; short sg table */
151		sg_page_sizes |= sg->length;
152		sg_mark_end(sg);
153	}
154
155	/* Trim unused sg entries to avoid wasting memory. */
156	i915_sg_trim(st);
157
158	ret = i915_gem_gtt_prepare_pages(obj, st);
159	if (ret) {
160		/*
161		 * DMA remapping failed? One possible cause is that
162		 * it could not reserve enough large entries, asking
163		 * for PAGE_SIZE chunks instead may be helpful.
164		 */
165		if (max_segment > PAGE_SIZE) {
166			for_each_sgt_page(page, sgt_iter, st)
167				put_page(page);
168			sg_free_table(st);
169
170			max_segment = PAGE_SIZE;
171			goto rebuild_st;
172		} else {
173			dev_warn(&i915->drm.pdev->dev,
174				 "Failed to DMA remap %lu pages\n",
175				 page_count);
176			goto err_pages;
177		}
178	}
179
180	if (i915_gem_object_needs_bit17_swizzle(obj))
181		i915_gem_object_do_bit_17_swizzle(obj, st);
182
183	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
184
185	return 0;
186
187err_sg:
188	sg_mark_end(sg);
189err_pages:
190	mapping_clear_unevictable(mapping);
191	pagevec_init(&pvec);
192	for_each_sgt_page(page, sgt_iter, st) {
193		if (!pagevec_add(&pvec, page))
194			check_release_pagevec(&pvec);
195	}
196	if (pagevec_count(&pvec))
197		check_release_pagevec(&pvec);
198	sg_free_table(st);
199	kfree(st);
200
201	/*
202	 * shmemfs first checks if there is enough memory to allocate the page
203	 * and reports ENOSPC should there be insufficient, along with the usual
204	 * ENOMEM for a genuine allocation failure.
205	 *
206	 * We use ENOSPC in our driver to mean that we have run out of aperture
207	 * space and so want to translate the error from shmemfs back to our
208	 * usual understanding of ENOMEM.
209	 */
210	if (ret == -ENOSPC)
211		ret = -ENOMEM;
212
213	return ret;
214}
215
216static void
217shmem_truncate(struct drm_i915_gem_object *obj)
218{
219	/*
220	 * Our goal here is to return as much of the memory as
221	 * is possible back to the system as we are called from OOM.
222	 * To do this we must instruct the shmfs to drop all of its
223	 * backing pages, *now*.
224	 */
225	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
226	obj->mm.madv = __I915_MADV_PURGED;
227	obj->mm.pages = ERR_PTR(-EFAULT);
228}
229
230static void
231shmem_writeback(struct drm_i915_gem_object *obj)
232{
233	struct address_space *mapping;
234	struct writeback_control wbc = {
235		.sync_mode = WB_SYNC_NONE,
236		.nr_to_write = SWAP_CLUSTER_MAX,
237		.range_start = 0,
238		.range_end = LLONG_MAX,
239		.for_reclaim = 1,
240	};
241	unsigned long i;
242
243	/*
244	 * Leave mmapings intact (GTT will have been revoked on unbinding,
245	 * leaving only CPU mmapings around) and add those pages to the LRU
246	 * instead of invoking writeback so they are aged and paged out
247	 * as normal.
248	 */
249	mapping = obj->base.filp->f_mapping;
250
251	/* Begin writeback on each dirty page */
252	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
253		struct page *page;
254
255		page = find_lock_entry(mapping, i);
256		if (!page || xa_is_value(page))
257			continue;
258
259		if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
260			int ret;
261
262			SetPageReclaim(page);
263			ret = mapping->a_ops->writepage(page, &wbc);
264			if (!PageWriteback(page))
265				ClearPageReclaim(page);
266			if (!ret)
267				goto put;
268		}
269		unlock_page(page);
270put:
271		put_page(page);
272	}
273}
274
275void
276__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
277				struct sg_table *pages,
278				bool needs_clflush)
279{
280	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
281
282	if (obj->mm.madv == I915_MADV_DONTNEED)
283		obj->mm.dirty = false;
284
285	if (needs_clflush &&
286	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
287	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
288		drm_clflush_sg(pages);
289
290	__start_cpu_write(obj);
291}
292
293static void
294shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
295{
296	struct sgt_iter sgt_iter;
297	struct pagevec pvec;
298	struct page *page;
299
300	__i915_gem_object_release_shmem(obj, pages, true);
301
302	i915_gem_gtt_finish_pages(obj, pages);
303
304	if (i915_gem_object_needs_bit17_swizzle(obj))
305		i915_gem_object_save_bit_17_swizzle(obj, pages);
306
307	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
308
309	pagevec_init(&pvec);
310	for_each_sgt_page(page, sgt_iter, pages) {
311		if (obj->mm.dirty)
312			set_page_dirty(page);
313
314		if (obj->mm.madv == I915_MADV_WILLNEED)
315			mark_page_accessed(page);
316
317		if (!pagevec_add(&pvec, page))
318			check_release_pagevec(&pvec);
319	}
320	if (pagevec_count(&pvec))
321		check_release_pagevec(&pvec);
322	obj->mm.dirty = false;
323
324	sg_free_table(pages);
325	kfree(pages);
326}
327
328static int
329shmem_pwrite(struct drm_i915_gem_object *obj,
330	     const struct drm_i915_gem_pwrite *arg)
331{
332	struct address_space *mapping = obj->base.filp->f_mapping;
333	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
334	u64 remain, offset;
335	unsigned int pg;
336
337	/* Caller already validated user args */
338	GEM_BUG_ON(!access_ok(user_data, arg->size));
339
340	/*
341	 * Before we instantiate/pin the backing store for our use, we
342	 * can prepopulate the shmemfs filp efficiently using a write into
343	 * the pagecache. We avoid the penalty of instantiating all the
344	 * pages, important if the user is just writing to a few and never
345	 * uses the object on the GPU, and using a direct write into shmemfs
346	 * allows it to avoid the cost of retrieving a page (either swapin
347	 * or clearing-before-use) before it is overwritten.
348	 */
349	if (i915_gem_object_has_pages(obj))
350		return -ENODEV;
351
352	if (obj->mm.madv != I915_MADV_WILLNEED)
353		return -EFAULT;
354
355	/*
356	 * Before the pages are instantiated the object is treated as being
357	 * in the CPU domain. The pages will be clflushed as required before
358	 * use, and we can freely write into the pages directly. If userspace
359	 * races pwrite with any other operation; corruption will ensue -
360	 * that is userspace's prerogative!
361	 */
362
363	remain = arg->size;
364	offset = arg->offset;
365	pg = offset_in_page(offset);
366
367	do {
368		unsigned int len, unwritten;
369		struct page *page;
370		void *data, *vaddr;
371		int err;
372		char c;
373
374		len = PAGE_SIZE - pg;
375		if (len > remain)
376			len = remain;
377
378		/* Prefault the user page to reduce potential recursion */
379		err = __get_user(c, user_data);
380		if (err)
381			return err;
382
383		err = __get_user(c, user_data + len - 1);
384		if (err)
385			return err;
386
387		err = pagecache_write_begin(obj->base.filp, mapping,
388					    offset, len, 0,
389					    &page, &data);
390		if (err < 0)
391			return err;
392
393		vaddr = kmap_atomic(page);
394		unwritten = __copy_from_user_inatomic(vaddr + pg,
395						      user_data,
396						      len);
397		kunmap_atomic(vaddr);
398
399		err = pagecache_write_end(obj->base.filp, mapping,
400					  offset, len, len - unwritten,
401					  page, data);
402		if (err < 0)
403			return err;
404
405		/* We don't handle -EFAULT, leave it to the caller to check */
406		if (unwritten)
407			return -ENODEV;
408
409		remain -= len;
410		user_data += len;
411		offset += len;
412		pg = 0;
413	} while (remain);
414
415	return 0;
416}
417
418static void shmem_release(struct drm_i915_gem_object *obj)
419{
420	fput(obj->base.filp);
421}
422
423const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
424	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
425		 I915_GEM_OBJECT_IS_SHRINKABLE,
426
427	.get_pages = shmem_get_pages,
428	.put_pages = shmem_put_pages,
429	.truncate = shmem_truncate,
430	.writeback = shmem_writeback,
431
432	.pwrite = shmem_pwrite,
433
434	.release = shmem_release,
435};
436
437static int create_shmem(struct drm_i915_private *i915,
438			struct drm_gem_object *obj,
439			size_t size)
440{
441	unsigned long flags = VM_NORESERVE;
442	struct file *filp;
443
444	drm_gem_private_object_init(&i915->drm, obj, size);
445
446	if (i915->mm.gemfs)
447		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
448						 flags);
449	else
450		filp = shmem_file_setup("i915", size, flags);
451	if (IS_ERR(filp))
452		return PTR_ERR(filp);
453
454	obj->filp = filp;
455	return 0;
456}
457
458struct drm_i915_gem_object *
459i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
460{
461	struct drm_i915_gem_object *obj;
462	struct address_space *mapping;
463	unsigned int cache_level;
464	gfp_t mask;
465	int ret;
466
467	/* There is a prevalence of the assumption that we fit the object's
468	 * page count inside a 32bit _signed_ variable. Let's document this and
469	 * catch if we ever need to fix it. In the meantime, if you do spot
470	 * such a local variable, please consider fixing!
471	 */
472	if (size >> PAGE_SHIFT > INT_MAX)
473		return ERR_PTR(-E2BIG);
474
475	if (overflows_type(size, obj->base.size))
476		return ERR_PTR(-E2BIG);
477
478	obj = i915_gem_object_alloc();
479	if (!obj)
480		return ERR_PTR(-ENOMEM);
481
482	ret = create_shmem(i915, &obj->base, size);
483	if (ret)
484		goto fail;
485
486	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
487	if (IS_I965GM(i915) || IS_I965G(i915)) {
488		/* 965gm cannot relocate objects above 4GiB. */
489		mask &= ~__GFP_HIGHMEM;
490		mask |= __GFP_DMA32;
491	}
492
493	mapping = obj->base.filp->f_mapping;
494	mapping_set_gfp_mask(mapping, mask);
495	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
496
497	i915_gem_object_init(obj, &i915_gem_shmem_ops);
498
499	obj->write_domain = I915_GEM_DOMAIN_CPU;
500	obj->read_domains = I915_GEM_DOMAIN_CPU;
501
502	if (HAS_LLC(i915))
503		/* On some devices, we can have the GPU use the LLC (the CPU
504		 * cache) for about a 10% performance improvement
505		 * compared to uncached.  Graphics requests other than
506		 * display scanout are coherent with the CPU in
507		 * accessing this cache.  This means in this mode we
508		 * don't need to clflush on the CPU side, and on the
509		 * GPU side we only need to flush internal caches to
510		 * get data visible to the CPU.
511		 *
512		 * However, we maintain the display planes as UC, and so
513		 * need to rebind when first used as such.
514		 */
515		cache_level = I915_CACHE_LLC;
516	else
517		cache_level = I915_CACHE_NONE;
518
519	i915_gem_object_set_cache_coherency(obj, cache_level);
520
521	trace_i915_gem_object_create(obj);
522
523	return obj;
524
525fail:
526	i915_gem_object_free(obj);
527	return ERR_PTR(ret);
528}
529
530/* Allocate a new GEM object and fill it with the supplied data */
531struct drm_i915_gem_object *
532i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
533				       const void *data, size_t size)
534{
535	struct drm_i915_gem_object *obj;
536	struct file *file;
537	size_t offset;
538	int err;
539
540	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
541	if (IS_ERR(obj))
542		return obj;
543
544	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
545
546	file = obj->base.filp;
547	offset = 0;
548	do {
549		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
550		struct page *page;
551		void *pgdata, *vaddr;
552
553		err = pagecache_write_begin(file, file->f_mapping,
554					    offset, len, 0,
555					    &page, &pgdata);
556		if (err < 0)
557			goto fail;
558
559		vaddr = kmap(page);
560		memcpy(vaddr, data, len);
561		kunmap(page);
562
563		err = pagecache_write_end(file, file->f_mapping,
564					  offset, len, len,
565					  page, pgdata);
566		if (err < 0)
567			goto fail;
568
569		size -= len;
570		data += len;
571		offset += len;
572	} while (size);
573
574	return obj;
575
576fail:
577	i915_gem_object_put(obj);
578	return ERR_PTR(err);
579}