Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0 OR MIT
  2/**************************************************************************
  3 *
  4 * Copyright 2017 VMware, Inc., Palo Alto, CA., USA
  5 * All Rights Reserved.
  6 *
  7 * Permission is hereby granted, free of charge, to any person obtaining a
  8 * copy of this software and associated documentation files (the
  9 * "Software"), to deal in the Software without restriction, including
 10 * without limitation the rights to use, copy, modify, merge, publish,
 11 * distribute, sub license, and/or sell copies of the Software, and to
 12 * permit persons to whom the Software is furnished to do so, subject to
 13 * the following conditions:
 14 *
 15 * The above copyright notice and this permission notice (including the
 16 * next paragraph) shall be included in all copies or substantial portions
 17 * of the Software.
 18 *
 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 21 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 22 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 23 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 24 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 25 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 26 *
 27 **************************************************************************/
 28
 29#include "vmwgfx_drv.h"
 30
 31/*
 32 * Template that implements find_first_diff() for a generic
 33 * unsigned integer type. @size and return value are in bytes.
 34 */
 35#define VMW_FIND_FIRST_DIFF(_type)			 \
 36static size_t vmw_find_first_diff_ ## _type		 \
 37	(const _type * dst, const _type * src, size_t size)\
 38{							 \
 39	size_t i;					 \
 40							 \
 41	for (i = 0; i < size; i += sizeof(_type)) {	 \
 42		if (*dst++ != *src++)			 \
 43			break;				 \
 44	}						 \
 45							 \
 46	return i;					 \
 47}
 48
 49
 50/*
 51 * Template that implements find_last_diff() for a generic
 52 * unsigned integer type. Pointers point to the item following the
 53 * *end* of the area to be examined. @size and return value are in
 54 * bytes.
 55 */
 56#define VMW_FIND_LAST_DIFF(_type)					\
 57static ssize_t vmw_find_last_diff_ ## _type(				\
 58	const _type * dst, const _type * src, size_t size)		\
 59{									\
 60	while (size) {							\
 61		if (*--dst != *--src)					\
 62			break;						\
 63									\
 64		size -= sizeof(_type);					\
 65	}								\
 66	return size;							\
 67}
 68
 69
 70/*
 71 * Instantiate find diff functions for relevant unsigned integer sizes,
 72 * assuming that wider integers are faster (including aligning) up to the
 73 * architecture native width, which is assumed to be 32 bit unless
 74 * CONFIG_64BIT is defined.
 75 */
 76VMW_FIND_FIRST_DIFF(u8);
 77VMW_FIND_LAST_DIFF(u8);
 78
 79VMW_FIND_FIRST_DIFF(u16);
 80VMW_FIND_LAST_DIFF(u16);
 81
 82VMW_FIND_FIRST_DIFF(u32);
 83VMW_FIND_LAST_DIFF(u32);
 84
 85#ifdef CONFIG_64BIT
 86VMW_FIND_FIRST_DIFF(u64);
 87VMW_FIND_LAST_DIFF(u64);
 88#endif
 89
 90
 91/* We use size aligned copies. This computes (addr - align(addr)) */
 92#define SPILL(_var, _type) ((unsigned long) _var & (sizeof(_type) - 1))
 93
 94
 95/*
 96 * Template to compute find_first_diff() for a certain integer type
 97 * including a head copy for alignment, and adjustment of parameters
 98 * for tail find or increased resolution find using an unsigned integer find
 99 * of smaller width. If finding is complete, and resolution is sufficient,
100 * the macro executes a return statement. Otherwise it falls through.
101 */
102#define VMW_TRY_FIND_FIRST_DIFF(_type)					\
103do {									\
104	unsigned int spill = SPILL(dst, _type);				\
105	size_t diff_offs;						\
106									\
107	if (spill && spill == SPILL(src, _type) &&			\
108	    sizeof(_type) - spill <= size) {				\
109		spill = sizeof(_type) - spill;				\
110		diff_offs = vmw_find_first_diff_u8(dst, src, spill);	\
111		if (diff_offs < spill)					\
112			return round_down(offset + diff_offs, granularity); \
113									\
114		dst += spill;						\
115		src += spill;						\
116		size -= spill;						\
117		offset += spill;					\
118		spill = 0;						\
119	}								\
120	if (!spill && !SPILL(src, _type)) {				\
121		size_t to_copy = size &	 ~(sizeof(_type) - 1);		\
122									\
123		diff_offs = vmw_find_first_diff_ ## _type		\
124			((_type *) dst, (_type *) src, to_copy);	\
125		if (diff_offs >= size || granularity == sizeof(_type))	\
126			return (offset + diff_offs);			\
127									\
128		dst += diff_offs;					\
129		src += diff_offs;					\
130		size -= diff_offs;					\
131		offset += diff_offs;					\
132	}								\
133} while (0)								\
134
135
136/**
137 * vmw_find_first_diff - find the first difference between dst and src
138 *
139 * @dst: The destination address
140 * @src: The source address
141 * @size: Number of bytes to compare
142 * @granularity: The granularity needed for the return value in bytes.
143 * return: The offset from find start where the first difference was
144 * encountered in bytes. If no difference was found, the function returns
145 * a value >= @size.
146 */
147static size_t vmw_find_first_diff(const u8 *dst, const u8 *src, size_t size,
148				  size_t granularity)
149{
150	size_t offset = 0;
151
152	/*
153	 * Try finding with large integers if alignment allows, or we can
154	 * fix it. Fall through if we need better resolution or alignment
155	 * was bad.
156	 */
157#ifdef CONFIG_64BIT
158	VMW_TRY_FIND_FIRST_DIFF(u64);
159#endif
160	VMW_TRY_FIND_FIRST_DIFF(u32);
161	VMW_TRY_FIND_FIRST_DIFF(u16);
162
163	return round_down(offset + vmw_find_first_diff_u8(dst, src, size),
164			  granularity);
165}
166
167
168/*
169 * Template to compute find_last_diff() for a certain integer type
170 * including a tail copy for alignment, and adjustment of parameters
171 * for head find or increased resolution find using an unsigned integer find
172 * of smaller width. If finding is complete, and resolution is sufficient,
173 * the macro executes a return statement. Otherwise it falls through.
174 */
175#define VMW_TRY_FIND_LAST_DIFF(_type)					\
176do {									\
177	unsigned int spill = SPILL(dst, _type);				\
178	ssize_t location;						\
179	ssize_t diff_offs;						\
180									\
181	if (spill && spill <= size && spill == SPILL(src, _type)) {	\
182		diff_offs = vmw_find_last_diff_u8(dst, src, spill);	\
183		if (diff_offs) {					\
184			location = size - spill + diff_offs - 1;	\
185			return round_down(location, granularity);	\
186		}							\
187									\
188		dst -= spill;						\
189		src -= spill;						\
190		size -= spill;						\
191		spill = 0;						\
192	}								\
193	if (!spill && !SPILL(src, _type)) {				\
194		size_t to_copy = round_down(size, sizeof(_type));	\
195									\
196		diff_offs = vmw_find_last_diff_ ## _type		\
197			((_type *) dst, (_type *) src, to_copy);	\
198		location = size - to_copy + diff_offs - sizeof(_type);	\
199		if (location < 0 || granularity == sizeof(_type))	\
200			return location;				\
201									\
202		dst -= to_copy - diff_offs;				\
203		src -= to_copy - diff_offs;				\
204		size -= to_copy - diff_offs;				\
205	}								\
206} while (0)
207
208
209/**
210 * vmw_find_last_diff - find the last difference between dst and src
211 *
212 * @dst: The destination address
213 * @src: The source address
214 * @size: Number of bytes to compare
215 * @granularity: The granularity needed for the return value in bytes.
216 * return: The offset from find start where the last difference was
217 * encountered in bytes, or a negative value if no difference was found.
218 */
219static ssize_t vmw_find_last_diff(const u8 *dst, const u8 *src, size_t size,
220				  size_t granularity)
221{
222	dst += size;
223	src += size;
224
225#ifdef CONFIG_64BIT
226	VMW_TRY_FIND_LAST_DIFF(u64);
227#endif
228	VMW_TRY_FIND_LAST_DIFF(u32);
229	VMW_TRY_FIND_LAST_DIFF(u16);
230
231	return round_down(vmw_find_last_diff_u8(dst, src, size) - 1,
232			  granularity);
233}
234
235
236/**
237 * vmw_memcpy - A wrapper around kernel memcpy with allowing to plug it into a
238 * struct vmw_diff_cpy.
239 *
240 * @diff: The struct vmw_diff_cpy closure argument (unused).
241 * @dest: The copy destination.
242 * @src: The copy source.
243 * @n: Number of bytes to copy.
244 */
245void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n)
246{
247	memcpy(dest, src, n);
248}
249
250
251/**
252 * vmw_adjust_rect - Adjust rectangle coordinates for newly found difference
253 *
254 * @diff: The struct vmw_diff_cpy used to track the modified bounding box.
255 * @diff_offs: The offset from @diff->line_offset where the difference was
256 * found.
257 */
258static void vmw_adjust_rect(struct vmw_diff_cpy *diff, size_t diff_offs)
259{
260	size_t offs = (diff_offs + diff->line_offset) / diff->cpp;
261	struct drm_rect *rect = &diff->rect;
262
263	rect->x1 = min_t(int, rect->x1, offs);
264	rect->x2 = max_t(int, rect->x2, offs + 1);
265	rect->y1 = min_t(int, rect->y1, diff->line);
266	rect->y2 = max_t(int, rect->y2, diff->line + 1);
267}
268
269/**
270 * vmw_diff_memcpy - memcpy that creates a bounding box of modified content.
271 *
272 * @diff: The struct vmw_diff_cpy used to track the modified bounding box.
273 * @dest: The copy destination.
274 * @src: The copy source.
275 * @n: Number of bytes to copy.
276 *
277 * In order to correctly track the modified content, the field @diff->line must
278 * be pre-loaded with the current line number, the field @diff->line_offset must
279 * be pre-loaded with the line offset in bytes where the copy starts, and
280 * finally the field @diff->cpp need to be preloaded with the number of bytes
281 * per unit in the horizontal direction of the area we're examining.
282 * Typically bytes per pixel.
283 * This is needed to know the needed granularity of the difference computing
284 * operations. A higher cpp generally leads to faster execution at the cost of
285 * bounding box width precision.
286 */
287void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src,
288		     size_t n)
289{
290	ssize_t csize, byte_len;
291
292	if (WARN_ON_ONCE(round_down(n, diff->cpp) != n))
293		return;
294
295	/* TODO: Possibly use a single vmw_find_first_diff per line? */
296	csize = vmw_find_first_diff(dest, src, n, diff->cpp);
297	if (csize < n) {
298		vmw_adjust_rect(diff, csize);
299		byte_len = diff->cpp;
300
301		/*
302		 * Starting from where first difference was found, find
303		 * location of last difference, and then copy.
304		 */
305		diff->line_offset += csize;
306		dest += csize;
307		src += csize;
308		n -= csize;
309		csize = vmw_find_last_diff(dest, src, n, diff->cpp);
310		if (csize >= 0) {
311			byte_len += csize;
312			vmw_adjust_rect(diff, csize);
313		}
314		memcpy(dest, src, byte_len);
315	}
316	diff->line_offset += n;
317}
318
319/**
320 * struct vmw_bo_blit_line_data - Convenience argument to vmw_bo_cpu_blit_line
321 *
322 * @mapped_dst: Already mapped destination page index in @dst_pages.
323 * @dst_addr: Kernel virtual address of mapped destination page.
324 * @dst_pages: Array of destination bo pages.
325 * @dst_num_pages: Number of destination bo pages.
326 * @dst_prot: Destination bo page protection.
327 * @mapped_src: Already mapped source page index in @dst_pages.
328 * @src_addr: Kernel virtual address of mapped source page.
329 * @src_pages: Array of source bo pages.
330 * @src_num_pages: Number of source bo pages.
331 * @src_prot: Source bo page protection.
332 * @diff: Struct vmw_diff_cpy, in the end forwarded to the memcpy routine.
333 */
334struct vmw_bo_blit_line_data {
335	u32 mapped_dst;
336	u8 *dst_addr;
337	struct page **dst_pages;
338	u32 dst_num_pages;
339	pgprot_t dst_prot;
340	u32 mapped_src;
341	u8 *src_addr;
342	struct page **src_pages;
343	u32 src_num_pages;
344	pgprot_t src_prot;
345	struct vmw_diff_cpy *diff;
346};
347
348/**
349 * vmw_bo_cpu_blit_line - Blit part of a line from one bo to another.
350 *
351 * @d: Blit data as described above.
352 * @dst_offset: Destination copy start offset from start of bo.
353 * @src_offset: Source copy start offset from start of bo.
354 * @bytes_to_copy: Number of bytes to copy in this line.
355 */
356static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d,
357				u32 dst_offset,
358				u32 src_offset,
359				u32 bytes_to_copy)
360{
361	struct vmw_diff_cpy *diff = d->diff;
362
363	while (bytes_to_copy) {
364		u32 copy_size = bytes_to_copy;
365		u32 dst_page = dst_offset >> PAGE_SHIFT;
366		u32 src_page = src_offset >> PAGE_SHIFT;
367		u32 dst_page_offset = dst_offset & ~PAGE_MASK;
368		u32 src_page_offset = src_offset & ~PAGE_MASK;
369		bool unmap_dst = d->dst_addr && dst_page != d->mapped_dst;
370		bool unmap_src = d->src_addr && (src_page != d->mapped_src ||
371						 unmap_dst);
372
373		copy_size = min_t(u32, copy_size, PAGE_SIZE - dst_page_offset);
374		copy_size = min_t(u32, copy_size, PAGE_SIZE - src_page_offset);
375
376		if (unmap_src) {
377			ttm_kunmap_atomic_prot(d->src_addr, d->src_prot);
378			d->src_addr = NULL;
379		}
380
381		if (unmap_dst) {
382			ttm_kunmap_atomic_prot(d->dst_addr, d->dst_prot);
383			d->dst_addr = NULL;
384		}
385
386		if (!d->dst_addr) {
387			if (WARN_ON_ONCE(dst_page >= d->dst_num_pages))
388				return -EINVAL;
389
390			d->dst_addr =
391				ttm_kmap_atomic_prot(d->dst_pages[dst_page],
392						     d->dst_prot);
393			if (!d->dst_addr)
394				return -ENOMEM;
395
396			d->mapped_dst = dst_page;
397		}
398
399		if (!d->src_addr) {
400			if (WARN_ON_ONCE(src_page >= d->src_num_pages))
401				return -EINVAL;
402
403			d->src_addr =
404				ttm_kmap_atomic_prot(d->src_pages[src_page],
405						     d->src_prot);
406			if (!d->src_addr)
407				return -ENOMEM;
408
409			d->mapped_src = src_page;
410		}
411		diff->do_cpy(diff, d->dst_addr + dst_page_offset,
412			     d->src_addr + src_page_offset, copy_size);
413
414		bytes_to_copy -= copy_size;
415		dst_offset += copy_size;
416		src_offset += copy_size;
417	}
418
419	return 0;
420}
421
422/**
423 * ttm_bo_cpu_blit - in-kernel cpu blit.
424 *
425 * @dst: Destination buffer object.
426 * @dst_offset: Destination offset of blit start in bytes.
427 * @dst_stride: Destination stride in bytes.
428 * @src: Source buffer object.
429 * @src_offset: Source offset of blit start in bytes.
430 * @src_stride: Source stride in bytes.
431 * @w: Width of blit.
432 * @h: Height of blit.
433 * return: Zero on success. Negative error value on failure. Will print out
434 * kernel warnings on caller bugs.
435 *
436 * Performs a CPU blit from one buffer object to another avoiding a full
437 * bo vmap which may exhaust- or fragment vmalloc space.
438 * On supported architectures (x86), we're using kmap_atomic which avoids
439 * cross-processor TLB- and cache flushes and may, on non-HIGHMEM systems
440 * reference already set-up mappings.
441 *
442 * Neither of the buffer objects may be placed in PCI memory
443 * (Fixed memory in TTM terminology) when using this function.
444 */
445int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
446		    u32 dst_offset, u32 dst_stride,
447		    struct ttm_buffer_object *src,
448		    u32 src_offset, u32 src_stride,
449		    u32 w, u32 h,
450		    struct vmw_diff_cpy *diff)
451{
452	struct ttm_operation_ctx ctx = {
453		.interruptible = false,
454		.no_wait_gpu = false
455	};
456	u32 j, initial_line = dst_offset / dst_stride;
457	struct vmw_bo_blit_line_data d;
458	int ret = 0;
459
460	/* Buffer objects need to be either pinned or reserved: */
461	if (!(dst->mem.placement & TTM_PL_FLAG_NO_EVICT))
462		dma_resv_assert_held(dst->base.resv);
463	if (!(src->mem.placement & TTM_PL_FLAG_NO_EVICT))
464		dma_resv_assert_held(src->base.resv);
465
466	if (dst->ttm->state == tt_unpopulated) {
467		ret = dst->ttm->bdev->driver->ttm_tt_populate(dst->ttm, &ctx);
468		if (ret)
469			return ret;
470	}
471
472	if (src->ttm->state == tt_unpopulated) {
473		ret = src->ttm->bdev->driver->ttm_tt_populate(src->ttm, &ctx);
474		if (ret)
475			return ret;
476	}
477
478	d.mapped_dst = 0;
479	d.mapped_src = 0;
480	d.dst_addr = NULL;
481	d.src_addr = NULL;
482	d.dst_pages = dst->ttm->pages;
483	d.src_pages = src->ttm->pages;
484	d.dst_num_pages = dst->num_pages;
485	d.src_num_pages = src->num_pages;
486	d.dst_prot = ttm_io_prot(dst->mem.placement, PAGE_KERNEL);
487	d.src_prot = ttm_io_prot(src->mem.placement, PAGE_KERNEL);
488	d.diff = diff;
489
490	for (j = 0; j < h; ++j) {
491		diff->line = j + initial_line;
492		diff->line_offset = dst_offset % dst_stride;
493		ret = vmw_bo_cpu_blit_line(&d, dst_offset, src_offset, w);
494		if (ret)
495			goto out;
496
497		dst_offset += dst_stride;
498		src_offset += src_stride;
499	}
500out:
501	if (d.src_addr)
502		ttm_kunmap_atomic_prot(d.src_addr, d.src_prot);
503	if (d.dst_addr)
504		ttm_kunmap_atomic_prot(d.dst_addr, d.dst_prot);
505
506	return ret;
507}