Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright 2019 NXP.
  4 *
  5 * Scaling algorithms were contributed by Dzung Hoang <dzung.hoang@nxp.com>
  6 */
  7
  8#include <linux/device.h>
  9#include <linux/slab.h>
 10
 11#include "dcss-dev.h"
 12
 13#define DCSS_SCALER_CTRL			0x00
 14#define   SCALER_EN				BIT(0)
 15#define   REPEAT_EN				BIT(4)
 16#define   SCALE2MEM_EN				BIT(8)
 17#define   MEM2OFIFO_EN				BIT(12)
 18#define DCSS_SCALER_OFIFO_CTRL			0x04
 19#define   OFIFO_LOW_THRES_POS			0
 20#define   OFIFO_LOW_THRES_MASK			GENMASK(9, 0)
 21#define   OFIFO_HIGH_THRES_POS			16
 22#define   OFIFO_HIGH_THRES_MASK			GENMASK(25, 16)
 23#define   UNDERRUN_DETECT_CLR			BIT(26)
 24#define   LOW_THRES_DETECT_CLR			BIT(27)
 25#define   HIGH_THRES_DETECT_CLR			BIT(28)
 26#define   UNDERRUN_DETECT_EN			BIT(29)
 27#define   LOW_THRES_DETECT_EN			BIT(30)
 28#define   HIGH_THRES_DETECT_EN			BIT(31)
 29#define DCSS_SCALER_SDATA_CTRL			0x08
 30#define   YUV_EN				BIT(0)
 31#define   RTRAM_8LINES				BIT(1)
 32#define   Y_UV_BYTE_SWAP			BIT(4)
 33#define   A2R10G10B10_FORMAT_POS		8
 34#define   A2R10G10B10_FORMAT_MASK		GENMASK(11, 8)
 35#define DCSS_SCALER_BIT_DEPTH			0x0C
 36#define   LUM_BIT_DEPTH_POS			0
 37#define   LUM_BIT_DEPTH_MASK			GENMASK(1, 0)
 38#define   CHR_BIT_DEPTH_POS			4
 39#define   CHR_BIT_DEPTH_MASK			GENMASK(5, 4)
 40#define DCSS_SCALER_SRC_FORMAT			0x10
 41#define DCSS_SCALER_DST_FORMAT			0x14
 42#define   FORMAT_MASK				GENMASK(1, 0)
 43#define DCSS_SCALER_SRC_LUM_RES			0x18
 44#define DCSS_SCALER_SRC_CHR_RES			0x1C
 45#define DCSS_SCALER_DST_LUM_RES			0x20
 46#define DCSS_SCALER_DST_CHR_RES			0x24
 47#define   WIDTH_POS				0
 48#define   WIDTH_MASK				GENMASK(11, 0)
 49#define   HEIGHT_POS				16
 50#define   HEIGHT_MASK				GENMASK(27, 16)
 51#define DCSS_SCALER_V_LUM_START			0x48
 52#define   V_START_MASK				GENMASK(15, 0)
 53#define DCSS_SCALER_V_LUM_INC			0x4C
 54#define   V_INC_MASK				GENMASK(15, 0)
 55#define DCSS_SCALER_H_LUM_START			0x50
 56#define   H_START_MASK				GENMASK(18, 0)
 57#define DCSS_SCALER_H_LUM_INC			0x54
 58#define   H_INC_MASK				GENMASK(15, 0)
 59#define DCSS_SCALER_V_CHR_START			0x58
 60#define DCSS_SCALER_V_CHR_INC			0x5C
 61#define DCSS_SCALER_H_CHR_START			0x60
 62#define DCSS_SCALER_H_CHR_INC			0x64
 63#define DCSS_SCALER_COEF_VLUM			0x80
 64#define DCSS_SCALER_COEF_HLUM			0x140
 65#define DCSS_SCALER_COEF_VCHR			0x200
 66#define DCSS_SCALER_COEF_HCHR			0x300
 67
 68struct dcss_scaler_ch {
 69	void __iomem *base_reg;
 70	u32 base_ofs;
 71	struct dcss_scaler *scl;
 72
 73	u32 sdata_ctrl;
 74	u32 scaler_ctrl;
 75
 76	bool scaler_ctrl_chgd;
 77
 78	u32 c_vstart;
 79	u32 c_hstart;
 80
 81	bool use_nn_interpolation;
 82};
 83
 84struct dcss_scaler {
 85	struct device *dev;
 86
 87	struct dcss_ctxld *ctxld;
 88	u32 ctx_id;
 89
 90	struct dcss_scaler_ch ch[3];
 91};
 92
 93/* scaler coefficients generator */
 94#define PSC_FRAC_BITS 30
 95#define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS)
 96#define PSC_BITS_FOR_PHASE 4
 97#define PSC_NUM_PHASES 16
 98#define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1)
 99#define PSC_NUM_TAPS 7
100#define PSC_NUM_TAPS_RGBA 5
101#define PSC_COEFF_PRECISION 10
102#define PSC_PHASE_FRACTION_BITS 13
103#define PSC_PHASE_MASK (PSC_NUM_PHASES - 1)
104#define PSC_Q_FRACTION 19
105#define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1))
106
107/**
108 * mult_q() - Performs fixed-point multiplication.
109 * @A: multiplier
110 * @B: multiplicand
111 */
112static int mult_q(int A, int B)
113{
114	int result;
115	s64 temp;
116
117	temp = (int64_t)A * (int64_t)B;
118	temp += PSC_Q_ROUND_OFFSET;
119	result = (int)(temp >> PSC_Q_FRACTION);
120	return result;
121}
122
123/**
124 * div_q() - Performs fixed-point division.
125 * @A: dividend
126 * @B: divisor
127 */
128static int div_q(int A, int B)
129{
130	int result;
131	s64 temp;
132
133	temp = (int64_t)A << PSC_Q_FRACTION;
134	if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0))
135		temp += B / 2;
136	else
137		temp -= B / 2;
138
139	result = (int)(temp / B);
140	return result;
141}
142
143/**
144 * exp_approx_q() - Compute approximation to exp(x) function using Taylor
145 *		    series.
146 * @x: fixed-point argument of exp function
147 */
148static int exp_approx_q(int x)
149{
150	int sum = 1 << PSC_Q_FRACTION;
151	int term = 1 << PSC_Q_FRACTION;
152
153	term = mult_q(term, div_q(x, 1 << PSC_Q_FRACTION));
154	sum += term;
155	term = mult_q(term, div_q(x, 2 << PSC_Q_FRACTION));
156	sum += term;
157	term = mult_q(term, div_q(x, 3 << PSC_Q_FRACTION));
158	sum += term;
159	term = mult_q(term, div_q(x, 4 << PSC_Q_FRACTION));
160	sum += term;
161
162	return sum;
163}
164
165/**
166 * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter.
167 * @fc_q: fixed-point cutoff frequency normalized to range [0, 1]
168 * @use_5_taps: indicates whether to use 5 taps or 7 taps
169 * @coef: output filter coefficients
170 */
171static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
172					bool phase0_identity,
173					int coef[][PSC_NUM_TAPS])
174{
175	int sigma_q, g0_q, g1_q, g2_q;
176	int tap_cnt1, tap_cnt2, tap_idx, phase_cnt;
177	int mid;
178	int phase;
179	int i;
180	int taps;
181
182	if (use_5_taps)
183		for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
184			coef[phase][0] = 0;
185			coef[phase][PSC_NUM_TAPS - 1] = 0;
186		}
187
188	/* seed coefficient scanner */
189	taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS;
190	mid = (PSC_NUM_PHASES * taps) / 2 - 1;
191	phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2;
192	tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
193	tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
194
195	/* seed gaussian filter generator */
196	sigma_q = div_q(PSC_Q_ROUND_OFFSET, fc_q);
197	g0_q = 1 << PSC_Q_FRACTION;
198	g1_q = exp_approx_q(div_q(-PSC_Q_ROUND_OFFSET,
199				  mult_q(sigma_q, sigma_q)));
200	g2_q = mult_q(g1_q, g1_q);
201	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q;
202
203	for (i = 0; i < mid; i++) {
204		phase_cnt++;
205		tap_cnt1--;
206		tap_cnt2++;
207
208		g0_q = mult_q(g0_q, g1_q);
209		g1_q = mult_q(g1_q, g2_q);
210
211		if ((phase_cnt & PSC_PHASE_MASK) <= 8) {
212			tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE;
213			coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q;
214		}
215		if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) {
216			tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE;
217			coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q;
218		}
219	}
220
221	phase_cnt++;
222	tap_cnt1--;
223	coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0;
224
225	/* override phase 0 with identity filter if specified */
226	if (phase0_identity)
227		for (i = 0; i < PSC_NUM_TAPS; i++)
228			coef[0][i] = i == (PSC_NUM_TAPS >> 1) ?
229						(1 << PSC_COEFF_PRECISION) : 0;
230
231	/* normalize coef */
232	for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
233		int sum = 0;
234		s64 ll_temp;
235
236		for (i = 0; i < PSC_NUM_TAPS; i++)
237			sum += coef[phase][i];
238		for (i = 0; i < PSC_NUM_TAPS; i++) {
239			ll_temp = coef[phase][i];
240			ll_temp <<= PSC_COEFF_PRECISION;
241			ll_temp += sum >> 1;
242			ll_temp /= sum;
243			coef[phase][i] = (int)ll_temp;
244		}
245	}
246}
247
248static void dcss_scaler_nearest_neighbor_filter(bool use_5_taps,
249						int coef[][PSC_NUM_TAPS])
250{
251	int i, j;
252
253	for (i = 0; i < PSC_STORED_PHASES; i++)
254		for (j = 0; j < PSC_NUM_TAPS; j++)
255			coef[i][j] = j == PSC_NUM_TAPS >> 1 ?
256						(1 << PSC_COEFF_PRECISION) : 0;
257}
258
259/**
260 * dcss_scaler_filter_design() - Compute filter coefficients using
261 *				 Gaussian filter.
262 * @src_length: length of input
263 * @dst_length: length of output
264 * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps
265 * @coef: output coefficients
266 */
267static void dcss_scaler_filter_design(int src_length, int dst_length,
268				      bool use_5_taps, bool phase0_identity,
269				      int coef[][PSC_NUM_TAPS],
270				      bool nn_interpolation)
271{
272	int fc_q;
273
274	/* compute cutoff frequency */
275	if (dst_length >= src_length)
276		fc_q = div_q(1, PSC_NUM_PHASES);
277	else
278		fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
279
280	if (nn_interpolation)
281		dcss_scaler_nearest_neighbor_filter(use_5_taps, coef);
282	else
283		/* compute gaussian filter coefficients */
284		dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
285}
286
287static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
288{
289	struct dcss_scaler *scl = ch->scl;
290
291	dcss_ctxld_write(scl->ctxld, scl->ctx_id, val, ch->base_ofs + ofs);
292}
293
294static int dcss_scaler_ch_init_all(struct dcss_scaler *scl,
295				   unsigned long scaler_base)
296{
297	struct dcss_scaler_ch *ch;
298	int i;
299
300	for (i = 0; i < 3; i++) {
301		ch = &scl->ch[i];
302
303		ch->base_ofs = scaler_base + i * 0x400;
304
305		ch->base_reg = ioremap(ch->base_ofs, SZ_4K);
306		if (!ch->base_reg) {
307			dev_err(scl->dev, "scaler: unable to remap ch base\n");
308			return -ENOMEM;
309		}
310
311		ch->scl = scl;
312	}
313
314	return 0;
315}
316
317int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base)
318{
319	struct dcss_scaler *scaler;
320
321	scaler = kzalloc(sizeof(*scaler), GFP_KERNEL);
322	if (!scaler)
323		return -ENOMEM;
324
325	dcss->scaler = scaler;
326	scaler->dev = dcss->dev;
327	scaler->ctxld = dcss->ctxld;
328	scaler->ctx_id = CTX_SB_HP;
329
330	if (dcss_scaler_ch_init_all(scaler, scaler_base)) {
331		int i;
332
333		for (i = 0; i < 3; i++) {
334			if (scaler->ch[i].base_reg)
335				iounmap(scaler->ch[i].base_reg);
336		}
337
338		kfree(scaler);
339
340		return -ENOMEM;
341	}
342
343	return 0;
344}
345
346void dcss_scaler_exit(struct dcss_scaler *scl)
347{
348	int ch_no;
349
350	for (ch_no = 0; ch_no < 3; ch_no++) {
351		struct dcss_scaler_ch *ch = &scl->ch[ch_no];
352
353		dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL);
354
355		if (ch->base_reg)
356			iounmap(ch->base_reg);
357	}
358
359	kfree(scl);
360}
361
362void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en)
363{
364	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
365	u32 scaler_ctrl;
366
367	scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0;
368
369	if (en)
370		dcss_scaler_write(ch, ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL);
371
372	if (ch->scaler_ctrl != scaler_ctrl)
373		ch->scaler_ctrl_chgd = true;
374
375	ch->scaler_ctrl = scaler_ctrl;
376}
377
378static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en)
379{
380	ch->sdata_ctrl &= ~YUV_EN;
381	ch->sdata_ctrl |= en ? YUV_EN : 0;
382}
383
384static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en)
385{
386	ch->sdata_ctrl &= ~RTRAM_8LINES;
387	ch->sdata_ctrl |= en ? RTRAM_8LINES : 0;
388}
389
390static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth)
391{
392	u32 val;
393
394	val = depth == 30 ? 2 : 0;
395
396	dcss_scaler_write(ch,
397			  ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) |
398			  ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK),
399			  DCSS_SCALER_BIT_DEPTH);
400}
401
402enum buffer_format {
403	BUF_FMT_YUV420,
404	BUF_FMT_YUV422,
405	BUF_FMT_ARGB8888_YUV444,
406};
407
408enum chroma_location {
409	PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0,
410	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1,
411	PSC_LOC_HORZ_0_VERT_0 = 2,
412	PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3,
413	PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4,
414	PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5
415};
416
417static void dcss_scaler_format_set(struct dcss_scaler_ch *ch,
418				   enum buffer_format src_fmt,
419				   enum buffer_format dst_fmt)
420{
421	dcss_scaler_write(ch, src_fmt, DCSS_SCALER_SRC_FORMAT);
422	dcss_scaler_write(ch, dst_fmt, DCSS_SCALER_DST_FORMAT);
423}
424
425static void dcss_scaler_res_set(struct dcss_scaler_ch *ch,
426				int src_xres, int src_yres,
427				int dst_xres, int dst_yres,
428				u32 pix_format, enum buffer_format dst_format)
429{
430	u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres;
431	u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres;
432	bool src_is_444 = true;
433
434	lsrc_xres = src_xres;
435	csrc_xres = src_xres;
436	lsrc_yres = src_yres;
437	csrc_yres = src_yres;
438	ldst_xres = dst_xres;
439	cdst_xres = dst_xres;
440	ldst_yres = dst_yres;
441	cdst_yres = dst_yres;
442
443	if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY ||
444	    pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) {
445		csrc_xres >>= 1;
446		src_is_444 = false;
447	} else if (pix_format == DRM_FORMAT_NV12 ||
448		   pix_format == DRM_FORMAT_NV21) {
449		csrc_xres >>= 1;
450		csrc_yres >>= 1;
451		src_is_444 = false;
452	}
453
454	if (dst_format == BUF_FMT_YUV422)
455		cdst_xres >>= 1;
456
457	/* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */
458	if (src_is_444 && dst_format == BUF_FMT_YUV422) {
459		lsrc_yres--;
460		csrc_yres--;
461	}
462
463	dcss_scaler_write(ch, (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
464			       (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
465			  DCSS_SCALER_SRC_LUM_RES);
466	dcss_scaler_write(ch, (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
467			       (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
468			  DCSS_SCALER_SRC_CHR_RES);
469	dcss_scaler_write(ch, (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
470			       (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
471			  DCSS_SCALER_DST_LUM_RES);
472	dcss_scaler_write(ch, (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
473			       (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
474			  DCSS_SCALER_DST_CHR_RES);
475}
476
477#define downscale_fp(factor, fp_pos)		((factor) << (fp_pos))
478#define upscale_fp(factor, fp_pos)		((1 << (fp_pos)) / (factor))
479
480struct dcss_scaler_factors {
481	int downscale;
482	int upscale;
483};
484
485static const struct dcss_scaler_factors dcss_scaler_factors[] = {
486	{3, 8}, {5, 8}, {5, 8},
487};
488
489static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch,
490				      int src_xres, int src_yres,
491				      int dst_xres, int dst_yres,
492				      u32 src_format, u32 dst_format,
493				      enum chroma_location src_chroma_loc)
494{
495	int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres;
496	u32 l_vinc, l_hinc, c_vinc, c_hinc;
497	u32 c_vstart, c_hstart;
498
499	src_c_xres = src_xres;
500	src_c_yres = src_yres;
501	dst_c_xres = dst_xres;
502	dst_c_yres = dst_yres;
503
504	c_vstart = 0;
505	c_hstart = 0;
506
507	/* adjustments for source chroma location */
508	if (src_format == BUF_FMT_YUV420) {
509		/* vertical input chroma position adjustment */
510		switch (src_chroma_loc) {
511		case PSC_LOC_HORZ_0_VERT_1_OVER_4:
512		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
513			/*
514			 * move chroma up to first luma line
515			 * (1/4 chroma input line spacing)
516			 */
517			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
518			break;
519		case PSC_LOC_HORZ_0_VERT_1_OVER_2:
520		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
521			/*
522			 * move chroma up to first luma line
523			 * (1/2 chroma input line spacing)
524			 */
525			c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1));
526			break;
527		default:
528			break;
529		}
530		/* horizontal input chroma position adjustment */
531		switch (src_chroma_loc) {
532		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
533		case PSC_LOC_HORZ_1_OVER_4_VERT_0:
534		case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
535			/* move chroma left 1/4 chroma input sample spacing */
536			c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
537			break;
538		default:
539			break;
540		}
541	}
542
543	/* adjustments to chroma resolution */
544	if (src_format == BUF_FMT_YUV420) {
545		src_c_xres >>= 1;
546		src_c_yres >>= 1;
547	} else if (src_format == BUF_FMT_YUV422) {
548		src_c_xres >>= 1;
549	}
550
551	if (dst_format == BUF_FMT_YUV422)
552		dst_c_xres >>= 1;
553
554	l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres;
555	c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres;
556	l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres;
557	c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres;
558
559	/* save chroma start phase */
560	ch->c_vstart = c_vstart;
561	ch->c_hstart = c_hstart;
562
563	dcss_scaler_write(ch, 0, DCSS_SCALER_V_LUM_START);
564	dcss_scaler_write(ch, l_vinc, DCSS_SCALER_V_LUM_INC);
565
566	dcss_scaler_write(ch, 0, DCSS_SCALER_H_LUM_START);
567	dcss_scaler_write(ch, l_hinc, DCSS_SCALER_H_LUM_INC);
568
569	dcss_scaler_write(ch, c_vstart, DCSS_SCALER_V_CHR_START);
570	dcss_scaler_write(ch, c_vinc, DCSS_SCALER_V_CHR_INC);
571
572	dcss_scaler_write(ch, c_hstart, DCSS_SCALER_H_CHR_START);
573	dcss_scaler_write(ch, c_hinc, DCSS_SCALER_H_CHR_INC);
574}
575
576int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num,
577				   int *min, int *max)
578{
579	*min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16);
580	*max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16);
581
582	return 0;
583}
584
585static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch,
586					   int base_addr,
587					   int coef[][PSC_NUM_TAPS])
588{
589	int i, phase;
590
591	for (i = 0; i < PSC_STORED_PHASES; i++) {
592		dcss_scaler_write(ch, ((coef[i][1] & 0xfff) << 16 |
593				       (coef[i][2] & 0xfff) << 4  |
594				       (coef[i][3] & 0xf00) >> 8),
595				  base_addr + i * sizeof(u32));
596		dcss_scaler_write(ch, ((coef[i][3] & 0x0ff) << 20 |
597				       (coef[i][4] & 0xfff) << 8  |
598				       (coef[i][5] & 0xff0) >> 4),
599				  base_addr + 0x40 + i * sizeof(u32));
600		dcss_scaler_write(ch, ((coef[i][5] & 0x00f) << 24),
601				  base_addr + 0x80 + i * sizeof(u32));
602	}
603
604	/* reverse both phase and tap orderings */
605	for (phase = (PSC_NUM_PHASES >> 1) - 1;
606			i < PSC_NUM_PHASES; i++, phase--) {
607		dcss_scaler_write(ch, ((coef[phase][5] & 0xfff) << 16 |
608				       (coef[phase][4] & 0xfff) << 4  |
609				       (coef[phase][3] & 0xf00) >> 8),
610				  base_addr + i * sizeof(u32));
611		dcss_scaler_write(ch, ((coef[phase][3] & 0x0ff) << 20 |
612				       (coef[phase][2] & 0xfff) << 8  |
613				       (coef[phase][1] & 0xff0) >> 4),
614				  base_addr + 0x40 + i * sizeof(u32));
615		dcss_scaler_write(ch, ((coef[phase][1] & 0x00f) << 24),
616				  base_addr + 0x80 + i * sizeof(u32));
617	}
618}
619
620static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch,
621					   int base_addr,
622					   int coef[][PSC_NUM_TAPS])
623{
624	int i, phase;
625
626	for (i = 0; i < PSC_STORED_PHASES; i++) {
627		dcss_scaler_write(ch, ((coef[i][0] & 0xfff) << 16 |
628				       (coef[i][1] & 0xfff) << 4  |
629				       (coef[i][2] & 0xf00) >> 8),
630				  base_addr + i * sizeof(u32));
631		dcss_scaler_write(ch, ((coef[i][2] & 0x0ff) << 20 |
632				       (coef[i][3] & 0xfff) << 8  |
633				       (coef[i][4] & 0xff0) >> 4),
634				  base_addr + 0x40 + i * sizeof(u32));
635		dcss_scaler_write(ch, ((coef[i][4] & 0x00f) << 24 |
636				       (coef[i][5] & 0xfff) << 12 |
637				       (coef[i][6] & 0xfff)),
638				  base_addr + 0x80 + i * sizeof(u32));
639	}
640
641	/* reverse both phase and tap orderings */
642	for (phase = (PSC_NUM_PHASES >> 1) - 1;
643			i < PSC_NUM_PHASES; i++, phase--) {
644		dcss_scaler_write(ch, ((coef[phase][6] & 0xfff) << 16 |
645				       (coef[phase][5] & 0xfff) << 4  |
646				       (coef[phase][4] & 0xf00) >> 8),
647				  base_addr + i * sizeof(u32));
648		dcss_scaler_write(ch, ((coef[phase][4] & 0x0ff) << 20 |
649				       (coef[phase][3] & 0xfff) << 8  |
650				       (coef[phase][2] & 0xff0) >> 4),
651				  base_addr + 0x40 + i * sizeof(u32));
652		dcss_scaler_write(ch, ((coef[phase][2] & 0x00f) << 24 |
653				       (coef[phase][1] & 0xfff) << 12 |
654				       (coef[phase][0] & 0xfff)),
655				  base_addr + 0x80 + i * sizeof(u32));
656	}
657}
658
659static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
660				     enum buffer_format src_format,
661				     enum buffer_format dst_format,
662				     bool use_5_taps,
663				     int src_xres, int src_yres, int dst_xres,
664				     int dst_yres)
665{
666	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
667	bool program_5_taps = use_5_taps ||
668			      (dst_format == BUF_FMT_YUV422 &&
669			       src_format == BUF_FMT_ARGB8888_YUV444);
670
671	/* horizontal luma */
672	dcss_scaler_filter_design(src_xres, dst_xres, false,
673				  src_xres == dst_xres, coef,
674				  ch->use_nn_interpolation);
675	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
676
677	/* vertical luma */
678	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
679				  src_yres == dst_yres, coef,
680				  ch->use_nn_interpolation);
681
682	if (program_5_taps)
683		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
684	else
685		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
686
687	/* adjust chroma resolution */
688	if (src_format != BUF_FMT_ARGB8888_YUV444)
689		src_xres >>= 1;
690	if (src_format == BUF_FMT_YUV420)
691		src_yres >>= 1;
692	if (dst_format != BUF_FMT_ARGB8888_YUV444)
693		dst_xres >>= 1;
694	if (dst_format == BUF_FMT_YUV420) /* should not happen */
695		dst_yres >>= 1;
696
697	/* horizontal chroma */
698	dcss_scaler_filter_design(src_xres, dst_xres, false,
699				  (src_xres == dst_xres) && (ch->c_hstart == 0),
700				  coef, ch->use_nn_interpolation);
701
702	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
703
704	/* vertical chroma */
705	dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
706				  (src_yres == dst_yres) && (ch->c_vstart == 0),
707				  coef, ch->use_nn_interpolation);
708	if (program_5_taps)
709		dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
710	else
711		dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
712}
713
714static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
715				     int src_xres, int src_yres, int dst_xres,
716				     int dst_yres)
717{
718	int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
719
720	/* horizontal RGB */
721	dcss_scaler_filter_design(src_xres, dst_xres, false,
722				  src_xres == dst_xres, coef,
723				  ch->use_nn_interpolation);
724	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
725
726	/* vertical RGB */
727	dcss_scaler_filter_design(src_yres, dst_yres, false,
728				  src_yres == dst_yres, coef,
729				  ch->use_nn_interpolation);
730	dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
731}
732
733static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
734					const struct drm_format_info *format)
735{
736	u32 a2r10g10b10_format;
737
738	if (format->is_yuv)
739		return;
740
741	ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK;
742
743	if (format->depth != 30)
744		return;
745
746	switch (format->format) {
747	case DRM_FORMAT_ARGB2101010:
748	case DRM_FORMAT_XRGB2101010:
749		a2r10g10b10_format = 0;
750		break;
751
752	case DRM_FORMAT_ABGR2101010:
753	case DRM_FORMAT_XBGR2101010:
754		a2r10g10b10_format = 5;
755		break;
756
757	case DRM_FORMAT_RGBA1010102:
758	case DRM_FORMAT_RGBX1010102:
759		a2r10g10b10_format = 6;
760		break;
761
762	case DRM_FORMAT_BGRA1010102:
763	case DRM_FORMAT_BGRX1010102:
764		a2r10g10b10_format = 11;
765		break;
766
767	default:
768		a2r10g10b10_format = 0;
769		break;
770	}
771
772	ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
773}
774
775void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num,
776			    enum drm_scaling_filter scaling_filter)
777{
778	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
779
780	ch->use_nn_interpolation = scaling_filter == DRM_SCALING_FILTER_NEAREST_NEIGHBOR;
781}
782
783void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
784		       const struct drm_format_info *format,
785		       int src_xres, int src_yres, int dst_xres, int dst_yres,
786		       u32 vrefresh_hz)
787{
788	struct dcss_scaler_ch *ch = &scl->ch[ch_num];
789	unsigned int pixel_depth = 0;
790	bool rtr_8line_en = false;
791	bool use_5_taps = false;
792	enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444;
793	enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444;
794	u32 pix_format = format->format;
795
796	if (format->is_yuv) {
797		dcss_scaler_yuv_enable(ch, true);
798
799		if (pix_format == DRM_FORMAT_NV12 ||
800		    pix_format == DRM_FORMAT_NV21) {
801			rtr_8line_en = true;
802			src_format = BUF_FMT_YUV420;
803		} else if (pix_format == DRM_FORMAT_UYVY ||
804			   pix_format == DRM_FORMAT_VYUY ||
805			   pix_format == DRM_FORMAT_YUYV ||
806			   pix_format == DRM_FORMAT_YVYU) {
807			src_format = BUF_FMT_YUV422;
808		}
809
810		use_5_taps = !rtr_8line_en;
811	} else {
812		dcss_scaler_yuv_enable(ch, false);
813
814		pixel_depth = format->depth;
815	}
816
817	dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres,
818				  dst_yres, src_format, dst_format,
819				  PSC_LOC_HORZ_0_VERT_1_OVER_4);
820
821	if (format->is_yuv)
822		dcss_scaler_yuv_coef_set(ch, src_format, dst_format,
823					 use_5_taps, src_xres, src_yres,
824					 dst_xres, dst_yres);
825	else
826		dcss_scaler_rgb_coef_set(ch, src_xres, src_yres,
827					 dst_xres, dst_yres);
828
829	dcss_scaler_rtr_8lines_enable(ch, rtr_8line_en);
830	dcss_scaler_bit_depth_set(ch, pixel_depth);
831	dcss_scaler_set_rgb10_order(ch, format);
832	dcss_scaler_format_set(ch, src_format, dst_format);
833	dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres,
834			    pix_format, dst_format);
835}
836
837/* This function will be called from interrupt context. */
838void dcss_scaler_write_sclctrl(struct dcss_scaler *scl)
839{
840	int chnum;
841
842	dcss_ctxld_assert_locked(scl->ctxld);
843
844	for (chnum = 0; chnum < 3; chnum++) {
845		struct dcss_scaler_ch *ch = &scl->ch[chnum];
846
847		if (ch->scaler_ctrl_chgd) {
848			dcss_ctxld_write_irqsafe(scl->ctxld, scl->ctx_id,
849						 ch->scaler_ctrl,
850						 ch->base_ofs +
851						 DCSS_SCALER_CTRL);
852			ch->scaler_ctrl_chgd = false;
853		}
854	}
855}