Loading...
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Quick'n'dirty IP checksum ...
7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 * Copyright (C) 2007 Maciej W. Rozycki
11 * Copyright (C) 2014 Imagination Technologies Ltd.
12 */
13#include <linux/errno.h>
14#include <asm/asm.h>
15#include <asm/asm-offsets.h>
16#include <asm/export.h>
17#include <asm/regdef.h>
18
19#ifdef CONFIG_64BIT
20/*
21 * As we are sharing code base with the mips32 tree (which use the o32 ABI
22 * register definitions). We need to redefine the register definitions from
23 * the n64 ABI register naming to the o32 ABI register naming.
24 */
25#undef t0
26#undef t1
27#undef t2
28#undef t3
29#define t0 $8
30#define t1 $9
31#define t2 $10
32#define t3 $11
33#define t4 $12
34#define t5 $13
35#define t6 $14
36#define t7 $15
37
38#define USE_DOUBLE
39#endif
40
41#ifdef USE_DOUBLE
42
43#define LOAD ld
44#define LOAD32 lwu
45#define ADD daddu
46#define NBYTES 8
47
48#else
49
50#define LOAD lw
51#define LOAD32 lw
52#define ADD addu
53#define NBYTES 4
54
55#endif /* USE_DOUBLE */
56
57#define UNIT(unit) ((unit)*NBYTES)
58
59#define ADDC(sum,reg) \
60 .set push; \
61 .set noat; \
62 ADD sum, reg; \
63 sltu v1, sum, reg; \
64 ADD sum, v1; \
65 .set pop
66
67#define ADDC32(sum,reg) \
68 .set push; \
69 .set noat; \
70 addu sum, reg; \
71 sltu v1, sum, reg; \
72 addu sum, v1; \
73 .set pop
74
75#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
76 LOAD _t0, (offset + UNIT(0))(src); \
77 LOAD _t1, (offset + UNIT(1))(src); \
78 LOAD _t2, (offset + UNIT(2))(src); \
79 LOAD _t3, (offset + UNIT(3))(src); \
80 ADDC(_t0, _t1); \
81 ADDC(_t2, _t3); \
82 ADDC(sum, _t0); \
83 ADDC(sum, _t2)
84
85#ifdef USE_DOUBLE
86#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
87 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
88#else
89#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
90 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
91 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
92#endif
93
94/*
95 * a0: source address
96 * a1: length of the area to checksum
97 * a2: partial checksum
98 */
99
100#define src a0
101#define sum v0
102
103 .text
104 .set noreorder
105 .align 5
106LEAF(csum_partial)
107EXPORT_SYMBOL(csum_partial)
108 move sum, zero
109 move t7, zero
110
111 sltiu t8, a1, 0x8
112 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
113 move t2, a1
114
115 andi t7, src, 0x1 /* odd buffer? */
116
117.Lhword_align:
118 beqz t7, .Lword_align
119 andi t8, src, 0x2
120
121 lbu t0, (src)
122 LONG_SUBU a1, a1, 0x1
123#ifdef __MIPSEL__
124 sll t0, t0, 8
125#endif
126 ADDC(sum, t0)
127 PTR_ADDU src, src, 0x1
128 andi t8, src, 0x2
129
130.Lword_align:
131 beqz t8, .Ldword_align
132 sltiu t8, a1, 56
133
134 lhu t0, (src)
135 LONG_SUBU a1, a1, 0x2
136 ADDC(sum, t0)
137 sltiu t8, a1, 56
138 PTR_ADDU src, src, 0x2
139
140.Ldword_align:
141 bnez t8, .Ldo_end_words
142 move t8, a1
143
144 andi t8, src, 0x4
145 beqz t8, .Lqword_align
146 andi t8, src, 0x8
147
148 LOAD32 t0, 0x00(src)
149 LONG_SUBU a1, a1, 0x4
150 ADDC(sum, t0)
151 PTR_ADDU src, src, 0x4
152 andi t8, src, 0x8
153
154.Lqword_align:
155 beqz t8, .Loword_align
156 andi t8, src, 0x10
157
158#ifdef USE_DOUBLE
159 ld t0, 0x00(src)
160 LONG_SUBU a1, a1, 0x8
161 ADDC(sum, t0)
162#else
163 lw t0, 0x00(src)
164 lw t1, 0x04(src)
165 LONG_SUBU a1, a1, 0x8
166 ADDC(sum, t0)
167 ADDC(sum, t1)
168#endif
169 PTR_ADDU src, src, 0x8
170 andi t8, src, 0x10
171
172.Loword_align:
173 beqz t8, .Lbegin_movement
174 LONG_SRL t8, a1, 0x7
175
176#ifdef USE_DOUBLE
177 ld t0, 0x00(src)
178 ld t1, 0x08(src)
179 ADDC(sum, t0)
180 ADDC(sum, t1)
181#else
182 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
183#endif
184 LONG_SUBU a1, a1, 0x10
185 PTR_ADDU src, src, 0x10
186 LONG_SRL t8, a1, 0x7
187
188.Lbegin_movement:
189 beqz t8, 1f
190 andi t2, a1, 0x40
191
192.Lmove_128bytes:
193 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
197 LONG_SUBU t8, t8, 0x01
198 .set reorder /* DADDI_WAR */
199 PTR_ADDU src, src, 0x80
200 bnez t8, .Lmove_128bytes
201 .set noreorder
202
2031:
204 beqz t2, 1f
205 andi t2, a1, 0x20
206
207.Lmove_64bytes:
208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
210 PTR_ADDU src, src, 0x40
211
2121:
213 beqz t2, .Ldo_end_words
214 andi t8, a1, 0x1c
215
216.Lmove_32bytes:
217 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
218 andi t8, a1, 0x1c
219 PTR_ADDU src, src, 0x20
220
221.Ldo_end_words:
222 beqz t8, .Lsmall_csumcpy
223 andi t2, a1, 0x3
224 LONG_SRL t8, t8, 0x2
225
226.Lend_words:
227 LOAD32 t0, (src)
228 LONG_SUBU t8, t8, 0x1
229 ADDC(sum, t0)
230 .set reorder /* DADDI_WAR */
231 PTR_ADDU src, src, 0x4
232 bnez t8, .Lend_words
233 .set noreorder
234
235/* unknown src alignment and < 8 bytes to go */
236.Lsmall_csumcpy:
237 move a1, t2
238
239 andi t0, a1, 4
240 beqz t0, 1f
241 andi t0, a1, 2
242
243 /* Still a full word to go */
244 ulw t1, (src)
245 PTR_ADDIU src, 4
246#ifdef USE_DOUBLE
247 dsll t1, t1, 32 /* clear lower 32bit */
248#endif
249 ADDC(sum, t1)
250
2511: move t1, zero
252 beqz t0, 1f
253 andi t0, a1, 1
254
255 /* Still a halfword to go */
256 ulhu t1, (src)
257 PTR_ADDIU src, 2
258
2591: beqz t0, 1f
260 sll t1, t1, 16
261
262 lbu t2, (src)
263 nop
264
265#ifdef __MIPSEB__
266 sll t2, t2, 8
267#endif
268 or t1, t2
269
2701: ADDC(sum, t1)
271
272 /* fold checksum */
273#ifdef USE_DOUBLE
274 dsll32 v1, sum, 0
275 daddu sum, v1
276 sltu v1, sum, v1
277 dsra32 sum, sum, 0
278 addu sum, v1
279#endif
280
281 /* odd buffer alignment? */
282#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
283 defined(CONFIG_CPU_LOONGSON64)
284 .set push
285 .set arch=mips32r2
286 wsbh v1, sum
287 movn sum, v1, t7
288 .set pop
289#else
290 beqz t7, 1f /* odd buffer alignment? */
291 lui v1, 0x00ff
292 addu v1, 0x00ff
293 and t0, sum, v1
294 sll t0, t0, 8
295 srl sum, sum, 8
296 and sum, sum, v1
297 or sum, sum, t0
2981:
299#endif
300 .set reorder
301 /* Add the passed partial csum. */
302 ADDC32(sum, a2)
303 jr ra
304 .set noreorder
305 END(csum_partial)
306
307
308/*
309 * checksum and copy routines based on memcpy.S
310 *
311 * csum_partial_copy_nocheck(src, dst, len)
312 * __csum_partial_copy_kernel(src, dst, len)
313 *
314 * See "Spec" in memcpy.S for details. Unlike __copy_user, all
315 * function in this file use the standard calling convention.
316 */
317
318#define src a0
319#define dst a1
320#define len a2
321#define sum v0
322#define odd t8
323
324/*
325 * All exception handlers simply return 0.
326 */
327
328/* Instruction type */
329#define LD_INSN 1
330#define ST_INSN 2
331#define LEGACY_MODE 1
332#define EVA_MODE 2
333#define USEROP 1
334#define KERNELOP 2
335
336/*
337 * Wrapper to add an entry in the exception table
338 * in case the insn causes a memory exception.
339 * Arguments:
340 * insn : Load/store instruction
341 * type : Instruction type
342 * reg : Register
343 * addr : Address
344 * handler : Exception handler
345 */
346#define EXC(insn, type, reg, addr) \
347 .if \mode == LEGACY_MODE; \
3489: insn reg, addr; \
349 .section __ex_table,"a"; \
350 PTR_WD 9b, .L_exc; \
351 .previous; \
352 /* This is enabled in EVA mode */ \
353 .else; \
354 /* If loading from user or storing to user */ \
355 .if ((\from == USEROP) && (type == LD_INSN)) || \
356 ((\to == USEROP) && (type == ST_INSN)); \
3579: __BUILD_EVA_INSN(insn##e, reg, addr); \
358 .section __ex_table,"a"; \
359 PTR_WD 9b, .L_exc; \
360 .previous; \
361 .else; \
362 /* EVA without exception */ \
363 insn reg, addr; \
364 .endif; \
365 .endif
366
367#undef LOAD
368
369#ifdef USE_DOUBLE
370
371#define LOADK ld /* No exception */
372#define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr)
373#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
374#define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr)
375#define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr)
376#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
377#define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr)
378#define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr)
379#define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr)
380#define ADD daddu
381#define SUB dsubu
382#define SRL dsrl
383#define SLL dsll
384#define SLLV dsllv
385#define SRLV dsrlv
386#define NBYTES 8
387#define LOG_NBYTES 3
388
389#else
390
391#define LOADK lw /* No exception */
392#define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr)
393#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
394#define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr)
395#define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr)
396#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
397#define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr)
398#define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr)
399#define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr)
400#define ADD addu
401#define SUB subu
402#define SRL srl
403#define SLL sll
404#define SLLV sllv
405#define SRLV srlv
406#define NBYTES 4
407#define LOG_NBYTES 2
408
409#endif /* USE_DOUBLE */
410
411#ifdef CONFIG_CPU_LITTLE_ENDIAN
412#define LDFIRST LOADR
413#define LDREST LOADL
414#define STFIRST STORER
415#define STREST STOREL
416#define SHIFT_DISCARD SLLV
417#define SHIFT_DISCARD_REVERT SRLV
418#else
419#define LDFIRST LOADL
420#define LDREST LOADR
421#define STFIRST STOREL
422#define STREST STORER
423#define SHIFT_DISCARD SRLV
424#define SHIFT_DISCARD_REVERT SLLV
425#endif
426
427#define FIRST(unit) ((unit)*NBYTES)
428#define REST(unit) (FIRST(unit)+NBYTES-1)
429
430#define ADDRMASK (NBYTES-1)
431
432#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
433 .set noat
434#else
435 .set at=v1
436#endif
437
438 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
439
440 li sum, -1
441 move odd, zero
442 /*
443 * Note: dst & src may be unaligned, len may be 0
444 * Temps
445 */
446 /*
447 * The "issue break"s below are very approximate.
448 * Issue delays for dcache fills will perturb the schedule, as will
449 * load queue full replay traps, etc.
450 *
451 * If len < NBYTES use byte operations.
452 */
453 sltu t2, len, NBYTES
454 and t1, dst, ADDRMASK
455 bnez t2, .Lcopy_bytes_checklen\@
456 and t0, src, ADDRMASK
457 andi odd, dst, 0x1 /* odd buffer? */
458 bnez t1, .Ldst_unaligned\@
459 nop
460 bnez t0, .Lsrc_unaligned_dst_aligned\@
461 /*
462 * use delay slot for fall-through
463 * src and dst are aligned; need to compute rem
464 */
465.Lboth_aligned\@:
466 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
467 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
468 nop
469 SUB len, 8*NBYTES # subtract here for bgez loop
470 .align 4
4711:
472 LOAD(t0, UNIT(0)(src))
473 LOAD(t1, UNIT(1)(src))
474 LOAD(t2, UNIT(2)(src))
475 LOAD(t3, UNIT(3)(src))
476 LOAD(t4, UNIT(4)(src))
477 LOAD(t5, UNIT(5)(src))
478 LOAD(t6, UNIT(6)(src))
479 LOAD(t7, UNIT(7)(src))
480 SUB len, len, 8*NBYTES
481 ADD src, src, 8*NBYTES
482 STORE(t0, UNIT(0)(dst))
483 ADDC(t0, t1)
484 STORE(t1, UNIT(1)(dst))
485 ADDC(sum, t0)
486 STORE(t2, UNIT(2)(dst))
487 ADDC(t2, t3)
488 STORE(t3, UNIT(3)(dst))
489 ADDC(sum, t2)
490 STORE(t4, UNIT(4)(dst))
491 ADDC(t4, t5)
492 STORE(t5, UNIT(5)(dst))
493 ADDC(sum, t4)
494 STORE(t6, UNIT(6)(dst))
495 ADDC(t6, t7)
496 STORE(t7, UNIT(7)(dst))
497 ADDC(sum, t6)
498 .set reorder /* DADDI_WAR */
499 ADD dst, dst, 8*NBYTES
500 bgez len, 1b
501 .set noreorder
502 ADD len, 8*NBYTES # revert len (see above)
503
504 /*
505 * len == the number of bytes left to copy < 8*NBYTES
506 */
507.Lcleanup_both_aligned\@:
508#define rem t7
509 beqz len, .Ldone\@
510 sltu t0, len, 4*NBYTES
511 bnez t0, .Lless_than_4units\@
512 and rem, len, (NBYTES-1) # rem = len % NBYTES
513 /*
514 * len >= 4*NBYTES
515 */
516 LOAD(t0, UNIT(0)(src))
517 LOAD(t1, UNIT(1)(src))
518 LOAD(t2, UNIT(2)(src))
519 LOAD(t3, UNIT(3)(src))
520 SUB len, len, 4*NBYTES
521 ADD src, src, 4*NBYTES
522 STORE(t0, UNIT(0)(dst))
523 ADDC(t0, t1)
524 STORE(t1, UNIT(1)(dst))
525 ADDC(sum, t0)
526 STORE(t2, UNIT(2)(dst))
527 ADDC(t2, t3)
528 STORE(t3, UNIT(3)(dst))
529 ADDC(sum, t2)
530 .set reorder /* DADDI_WAR */
531 ADD dst, dst, 4*NBYTES
532 beqz len, .Ldone\@
533 .set noreorder
534.Lless_than_4units\@:
535 /*
536 * rem = len % NBYTES
537 */
538 beq rem, len, .Lcopy_bytes\@
539 nop
5401:
541 LOAD(t0, 0(src))
542 ADD src, src, NBYTES
543 SUB len, len, NBYTES
544 STORE(t0, 0(dst))
545 ADDC(sum, t0)
546 .set reorder /* DADDI_WAR */
547 ADD dst, dst, NBYTES
548 bne rem, len, 1b
549 .set noreorder
550
551 /*
552 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
553 * A loop would do only a byte at a time with possible branch
554 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
555 * because can't assume read-access to dst. Instead, use
556 * STREST dst, which doesn't require read access to dst.
557 *
558 * This code should perform better than a simple loop on modern,
559 * wide-issue mips processors because the code has fewer branches and
560 * more instruction-level parallelism.
561 */
562#define bits t2
563 beqz len, .Ldone\@
564 ADD t1, dst, len # t1 is just past last byte of dst
565 li bits, 8*NBYTES
566 SLL rem, len, 3 # rem = number of bits to keep
567 LOAD(t0, 0(src))
568 SUB bits, bits, rem # bits = number of bits to discard
569 SHIFT_DISCARD t0, t0, bits
570 STREST(t0, -1(t1))
571 SHIFT_DISCARD_REVERT t0, t0, bits
572 .set reorder
573 ADDC(sum, t0)
574 b .Ldone\@
575 .set noreorder
576.Ldst_unaligned\@:
577 /*
578 * dst is unaligned
579 * t0 = src & ADDRMASK
580 * t1 = dst & ADDRMASK; T1 > 0
581 * len >= NBYTES
582 *
583 * Copy enough bytes to align dst
584 * Set match = (src and dst have same alignment)
585 */
586#define match rem
587 LDFIRST(t3, FIRST(0)(src))
588 ADD t2, zero, NBYTES
589 LDREST(t3, REST(0)(src))
590 SUB t2, t2, t1 # t2 = number of bytes copied
591 xor match, t0, t1
592 STFIRST(t3, FIRST(0)(dst))
593 SLL t4, t1, 3 # t4 = number of bits to discard
594 SHIFT_DISCARD t3, t3, t4
595 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
596 ADDC(sum, t3)
597 beq len, t2, .Ldone\@
598 SUB len, len, t2
599 ADD dst, dst, t2
600 beqz match, .Lboth_aligned\@
601 ADD src, src, t2
602
603.Lsrc_unaligned_dst_aligned\@:
604 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
605 beqz t0, .Lcleanup_src_unaligned\@
606 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
6071:
608/*
609 * Avoid consecutive LD*'s to the same register since some mips
610 * implementations can't issue them in the same cycle.
611 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
612 * are to the same unit (unless src is aligned, but it's not).
613 */
614 LDFIRST(t0, FIRST(0)(src))
615 LDFIRST(t1, FIRST(1)(src))
616 SUB len, len, 4*NBYTES
617 LDREST(t0, REST(0)(src))
618 LDREST(t1, REST(1)(src))
619 LDFIRST(t2, FIRST(2)(src))
620 LDFIRST(t3, FIRST(3)(src))
621 LDREST(t2, REST(2)(src))
622 LDREST(t3, REST(3)(src))
623 ADD src, src, 4*NBYTES
624#ifdef CONFIG_CPU_SB1
625 nop # improves slotting
626#endif
627 STORE(t0, UNIT(0)(dst))
628 ADDC(t0, t1)
629 STORE(t1, UNIT(1)(dst))
630 ADDC(sum, t0)
631 STORE(t2, UNIT(2)(dst))
632 ADDC(t2, t3)
633 STORE(t3, UNIT(3)(dst))
634 ADDC(sum, t2)
635 .set reorder /* DADDI_WAR */
636 ADD dst, dst, 4*NBYTES
637 bne len, rem, 1b
638 .set noreorder
639
640.Lcleanup_src_unaligned\@:
641 beqz len, .Ldone\@
642 and rem, len, NBYTES-1 # rem = len % NBYTES
643 beq rem, len, .Lcopy_bytes\@
644 nop
6451:
646 LDFIRST(t0, FIRST(0)(src))
647 LDREST(t0, REST(0)(src))
648 ADD src, src, NBYTES
649 SUB len, len, NBYTES
650 STORE(t0, 0(dst))
651 ADDC(sum, t0)
652 .set reorder /* DADDI_WAR */
653 ADD dst, dst, NBYTES
654 bne len, rem, 1b
655 .set noreorder
656
657.Lcopy_bytes_checklen\@:
658 beqz len, .Ldone\@
659 nop
660.Lcopy_bytes\@:
661 /* 0 < len < NBYTES */
662#ifdef CONFIG_CPU_LITTLE_ENDIAN
663#define SHIFT_START 0
664#define SHIFT_INC 8
665#else
666#define SHIFT_START 8*(NBYTES-1)
667#define SHIFT_INC -8
668#endif
669 move t2, zero # partial word
670 li t3, SHIFT_START # shift
671#define COPY_BYTE(N) \
672 LOADBU(t0, N(src)); \
673 SUB len, len, 1; \
674 STOREB(t0, N(dst)); \
675 SLLV t0, t0, t3; \
676 addu t3, SHIFT_INC; \
677 beqz len, .Lcopy_bytes_done\@; \
678 or t2, t0
679
680 COPY_BYTE(0)
681 COPY_BYTE(1)
682#ifdef USE_DOUBLE
683 COPY_BYTE(2)
684 COPY_BYTE(3)
685 COPY_BYTE(4)
686 COPY_BYTE(5)
687#endif
688 LOADBU(t0, NBYTES-2(src))
689 SUB len, len, 1
690 STOREB(t0, NBYTES-2(dst))
691 SLLV t0, t0, t3
692 or t2, t0
693.Lcopy_bytes_done\@:
694 ADDC(sum, t2)
695.Ldone\@:
696 /* fold checksum */
697 .set push
698 .set noat
699#ifdef USE_DOUBLE
700 dsll32 v1, sum, 0
701 daddu sum, v1
702 sltu v1, sum, v1
703 dsra32 sum, sum, 0
704 addu sum, v1
705#endif
706
707#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
708 defined(CONFIG_CPU_LOONGSON64)
709 .set push
710 .set arch=mips32r2
711 wsbh v1, sum
712 movn sum, v1, odd
713 .set pop
714#else
715 beqz odd, 1f /* odd buffer alignment? */
716 lui v1, 0x00ff
717 addu v1, 0x00ff
718 and t0, sum, v1
719 sll t0, t0, 8
720 srl sum, sum, 8
721 and sum, sum, v1
722 or sum, sum, t0
7231:
724#endif
725 .set pop
726 .set reorder
727 jr ra
728 .set noreorder
729 .endm
730
731 .set noreorder
732.L_exc:
733 jr ra
734 li v0, 0
735
736FEXPORT(__csum_partial_copy_nocheck)
737EXPORT_SYMBOL(__csum_partial_copy_nocheck)
738#ifndef CONFIG_EVA
739FEXPORT(__csum_partial_copy_to_user)
740EXPORT_SYMBOL(__csum_partial_copy_to_user)
741FEXPORT(__csum_partial_copy_from_user)
742EXPORT_SYMBOL(__csum_partial_copy_from_user)
743#endif
744__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
745
746#ifdef CONFIG_EVA
747LEAF(__csum_partial_copy_to_user)
748__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
749END(__csum_partial_copy_to_user)
750
751LEAF(__csum_partial_copy_from_user)
752__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
753END(__csum_partial_copy_from_user)
754#endif
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Quick'n'dirty IP checksum ...
7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 * Copyright (C) 2007 Maciej W. Rozycki
11 * Copyright (C) 2014 Imagination Technologies Ltd.
12 */
13#include <linux/errno.h>
14#include <asm/asm.h>
15#include <asm/asm-offsets.h>
16#include <asm/regdef.h>
17
18#ifdef CONFIG_64BIT
19/*
20 * As we are sharing code base with the mips32 tree (which use the o32 ABI
21 * register definitions). We need to redefine the register definitions from
22 * the n64 ABI register naming to the o32 ABI register naming.
23 */
24#undef t0
25#undef t1
26#undef t2
27#undef t3
28#define t0 $8
29#define t1 $9
30#define t2 $10
31#define t3 $11
32#define t4 $12
33#define t5 $13
34#define t6 $14
35#define t7 $15
36
37#define USE_DOUBLE
38#endif
39
40#ifdef USE_DOUBLE
41
42#define LOAD ld
43#define LOAD32 lwu
44#define ADD daddu
45#define NBYTES 8
46
47#else
48
49#define LOAD lw
50#define LOAD32 lw
51#define ADD addu
52#define NBYTES 4
53
54#endif /* USE_DOUBLE */
55
56#define UNIT(unit) ((unit)*NBYTES)
57
58#define ADDC(sum,reg) \
59 .set push; \
60 .set noat; \
61 ADD sum, reg; \
62 sltu v1, sum, reg; \
63 ADD sum, v1; \
64 .set pop
65
66#define ADDC32(sum,reg) \
67 .set push; \
68 .set noat; \
69 addu sum, reg; \
70 sltu v1, sum, reg; \
71 addu sum, v1; \
72 .set pop
73
74#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
75 LOAD _t0, (offset + UNIT(0))(src); \
76 LOAD _t1, (offset + UNIT(1))(src); \
77 LOAD _t2, (offset + UNIT(2))(src); \
78 LOAD _t3, (offset + UNIT(3))(src); \
79 ADDC(_t0, _t1); \
80 ADDC(_t2, _t3); \
81 ADDC(sum, _t0); \
82 ADDC(sum, _t2)
83
84#ifdef USE_DOUBLE
85#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
86 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
87#else
88#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
89 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
90 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
91#endif
92
93/*
94 * a0: source address
95 * a1: length of the area to checksum
96 * a2: partial checksum
97 */
98
99#define src a0
100#define sum v0
101
102 .text
103 .set noreorder
104 .align 5
105LEAF(csum_partial)
106 move sum, zero
107 move t7, zero
108
109 sltiu t8, a1, 0x8
110 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
111 move t2, a1
112
113 andi t7, src, 0x1 /* odd buffer? */
114
115.Lhword_align:
116 beqz t7, .Lword_align
117 andi t8, src, 0x2
118
119 lbu t0, (src)
120 LONG_SUBU a1, a1, 0x1
121#ifdef __MIPSEL__
122 sll t0, t0, 8
123#endif
124 ADDC(sum, t0)
125 PTR_ADDU src, src, 0x1
126 andi t8, src, 0x2
127
128.Lword_align:
129 beqz t8, .Ldword_align
130 sltiu t8, a1, 56
131
132 lhu t0, (src)
133 LONG_SUBU a1, a1, 0x2
134 ADDC(sum, t0)
135 sltiu t8, a1, 56
136 PTR_ADDU src, src, 0x2
137
138.Ldword_align:
139 bnez t8, .Ldo_end_words
140 move t8, a1
141
142 andi t8, src, 0x4
143 beqz t8, .Lqword_align
144 andi t8, src, 0x8
145
146 LOAD32 t0, 0x00(src)
147 LONG_SUBU a1, a1, 0x4
148 ADDC(sum, t0)
149 PTR_ADDU src, src, 0x4
150 andi t8, src, 0x8
151
152.Lqword_align:
153 beqz t8, .Loword_align
154 andi t8, src, 0x10
155
156#ifdef USE_DOUBLE
157 ld t0, 0x00(src)
158 LONG_SUBU a1, a1, 0x8
159 ADDC(sum, t0)
160#else
161 lw t0, 0x00(src)
162 lw t1, 0x04(src)
163 LONG_SUBU a1, a1, 0x8
164 ADDC(sum, t0)
165 ADDC(sum, t1)
166#endif
167 PTR_ADDU src, src, 0x8
168 andi t8, src, 0x10
169
170.Loword_align:
171 beqz t8, .Lbegin_movement
172 LONG_SRL t8, a1, 0x7
173
174#ifdef USE_DOUBLE
175 ld t0, 0x00(src)
176 ld t1, 0x08(src)
177 ADDC(sum, t0)
178 ADDC(sum, t1)
179#else
180 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
181#endif
182 LONG_SUBU a1, a1, 0x10
183 PTR_ADDU src, src, 0x10
184 LONG_SRL t8, a1, 0x7
185
186.Lbegin_movement:
187 beqz t8, 1f
188 andi t2, a1, 0x40
189
190.Lmove_128bytes:
191 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
192 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
193 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
194 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
195 LONG_SUBU t8, t8, 0x01
196 .set reorder /* DADDI_WAR */
197 PTR_ADDU src, src, 0x80
198 bnez t8, .Lmove_128bytes
199 .set noreorder
200
2011:
202 beqz t2, 1f
203 andi t2, a1, 0x20
204
205.Lmove_64bytes:
206 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
207 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
208 PTR_ADDU src, src, 0x40
209
2101:
211 beqz t2, .Ldo_end_words
212 andi t8, a1, 0x1c
213
214.Lmove_32bytes:
215 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
216 andi t8, a1, 0x1c
217 PTR_ADDU src, src, 0x20
218
219.Ldo_end_words:
220 beqz t8, .Lsmall_csumcpy
221 andi t2, a1, 0x3
222 LONG_SRL t8, t8, 0x2
223
224.Lend_words:
225 LOAD32 t0, (src)
226 LONG_SUBU t8, t8, 0x1
227 ADDC(sum, t0)
228 .set reorder /* DADDI_WAR */
229 PTR_ADDU src, src, 0x4
230 bnez t8, .Lend_words
231 .set noreorder
232
233/* unknown src alignment and < 8 bytes to go */
234.Lsmall_csumcpy:
235 move a1, t2
236
237 andi t0, a1, 4
238 beqz t0, 1f
239 andi t0, a1, 2
240
241 /* Still a full word to go */
242 ulw t1, (src)
243 PTR_ADDIU src, 4
244#ifdef USE_DOUBLE
245 dsll t1, t1, 32 /* clear lower 32bit */
246#endif
247 ADDC(sum, t1)
248
2491: move t1, zero
250 beqz t0, 1f
251 andi t0, a1, 1
252
253 /* Still a halfword to go */
254 ulhu t1, (src)
255 PTR_ADDIU src, 2
256
2571: beqz t0, 1f
258 sll t1, t1, 16
259
260 lbu t2, (src)
261 nop
262
263#ifdef __MIPSEB__
264 sll t2, t2, 8
265#endif
266 or t1, t2
267
2681: ADDC(sum, t1)
269
270 /* fold checksum */
271#ifdef USE_DOUBLE
272 dsll32 v1, sum, 0
273 daddu sum, v1
274 sltu v1, sum, v1
275 dsra32 sum, sum, 0
276 addu sum, v1
277#endif
278
279 /* odd buffer alignment? */
280#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
281 .set push
282 .set arch=mips32r2
283 wsbh v1, sum
284 movn sum, v1, t7
285 .set pop
286#else
287 beqz t7, 1f /* odd buffer alignment? */
288 lui v1, 0x00ff
289 addu v1, 0x00ff
290 and t0, sum, v1
291 sll t0, t0, 8
292 srl sum, sum, 8
293 and sum, sum, v1
294 or sum, sum, t0
2951:
296#endif
297 .set reorder
298 /* Add the passed partial csum. */
299 ADDC32(sum, a2)
300 jr ra
301 .set noreorder
302 END(csum_partial)
303
304
305/*
306 * checksum and copy routines based on memcpy.S
307 *
308 * csum_partial_copy_nocheck(src, dst, len, sum)
309 * __csum_partial_copy_kernel(src, dst, len, sum, errp)
310 *
311 * See "Spec" in memcpy.S for details. Unlike __copy_user, all
312 * function in this file use the standard calling convention.
313 */
314
315#define src a0
316#define dst a1
317#define len a2
318#define psum a3
319#define sum v0
320#define odd t8
321#define errptr t9
322
323/*
324 * The exception handler for loads requires that:
325 * 1- AT contain the address of the byte just past the end of the source
326 * of the copy,
327 * 2- src_entry <= src < AT, and
328 * 3- (dst - src) == (dst_entry - src_entry),
329 * The _entry suffix denotes values when __copy_user was called.
330 *
331 * (1) is set up up by __csum_partial_copy_from_user and maintained by
332 * not writing AT in __csum_partial_copy
333 * (2) is met by incrementing src by the number of bytes copied
334 * (3) is met by not doing loads between a pair of increments of dst and src
335 *
336 * The exception handlers for stores stores -EFAULT to errptr and return.
337 * These handlers do not need to overwrite any data.
338 */
339
340/* Instruction type */
341#define LD_INSN 1
342#define ST_INSN 2
343#define LEGACY_MODE 1
344#define EVA_MODE 2
345#define USEROP 1
346#define KERNELOP 2
347
348/*
349 * Wrapper to add an entry in the exception table
350 * in case the insn causes a memory exception.
351 * Arguments:
352 * insn : Load/store instruction
353 * type : Instruction type
354 * reg : Register
355 * addr : Address
356 * handler : Exception handler
357 */
358#define EXC(insn, type, reg, addr, handler) \
359 .if \mode == LEGACY_MODE; \
3609: insn reg, addr; \
361 .section __ex_table,"a"; \
362 PTR 9b, handler; \
363 .previous; \
364 /* This is enabled in EVA mode */ \
365 .else; \
366 /* If loading from user or storing to user */ \
367 .if ((\from == USEROP) && (type == LD_INSN)) || \
368 ((\to == USEROP) && (type == ST_INSN)); \
3699: __BUILD_EVA_INSN(insn##e, reg, addr); \
370 .section __ex_table,"a"; \
371 PTR 9b, handler; \
372 .previous; \
373 .else; \
374 /* EVA without exception */ \
375 insn reg, addr; \
376 .endif; \
377 .endif
378
379#undef LOAD
380
381#ifdef USE_DOUBLE
382
383#define LOADK ld /* No exception */
384#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler)
385#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler)
386#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler)
387#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler)
388#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler)
389#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler)
390#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler)
391#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler)
392#define ADD daddu
393#define SUB dsubu
394#define SRL dsrl
395#define SLL dsll
396#define SLLV dsllv
397#define SRLV dsrlv
398#define NBYTES 8
399#define LOG_NBYTES 3
400
401#else
402
403#define LOADK lw /* No exception */
404#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler)
405#define LOADBU(reg, addr, handler) EXC(lbu, LD_INSN, reg, addr, handler)
406#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler)
407#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler)
408#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler)
409#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler)
410#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler)
411#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler)
412#define ADD addu
413#define SUB subu
414#define SRL srl
415#define SLL sll
416#define SLLV sllv
417#define SRLV srlv
418#define NBYTES 4
419#define LOG_NBYTES 2
420
421#endif /* USE_DOUBLE */
422
423#ifdef CONFIG_CPU_LITTLE_ENDIAN
424#define LDFIRST LOADR
425#define LDREST LOADL
426#define STFIRST STORER
427#define STREST STOREL
428#define SHIFT_DISCARD SLLV
429#define SHIFT_DISCARD_REVERT SRLV
430#else
431#define LDFIRST LOADL
432#define LDREST LOADR
433#define STFIRST STOREL
434#define STREST STORER
435#define SHIFT_DISCARD SRLV
436#define SHIFT_DISCARD_REVERT SLLV
437#endif
438
439#define FIRST(unit) ((unit)*NBYTES)
440#define REST(unit) (FIRST(unit)+NBYTES-1)
441
442#define ADDRMASK (NBYTES-1)
443
444#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
445 .set noat
446#else
447 .set at=v1
448#endif
449
450 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck
451
452 PTR_ADDU AT, src, len /* See (1) above. */
453 /* initialize __nocheck if this the first time we execute this
454 * macro
455 */
456#ifdef CONFIG_64BIT
457 move errptr, a4
458#else
459 lw errptr, 16(sp)
460#endif
461 .if \__nocheck == 1
462 FEXPORT(csum_partial_copy_nocheck)
463 .endif
464 move sum, zero
465 move odd, zero
466 /*
467 * Note: dst & src may be unaligned, len may be 0
468 * Temps
469 */
470 /*
471 * The "issue break"s below are very approximate.
472 * Issue delays for dcache fills will perturb the schedule, as will
473 * load queue full replay traps, etc.
474 *
475 * If len < NBYTES use byte operations.
476 */
477 sltu t2, len, NBYTES
478 and t1, dst, ADDRMASK
479 bnez t2, .Lcopy_bytes_checklen\@
480 and t0, src, ADDRMASK
481 andi odd, dst, 0x1 /* odd buffer? */
482 bnez t1, .Ldst_unaligned\@
483 nop
484 bnez t0, .Lsrc_unaligned_dst_aligned\@
485 /*
486 * use delay slot for fall-through
487 * src and dst are aligned; need to compute rem
488 */
489.Lboth_aligned\@:
490 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
491 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
492 nop
493 SUB len, 8*NBYTES # subtract here for bgez loop
494 .align 4
4951:
496 LOAD(t0, UNIT(0)(src), .Ll_exc\@)
497 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
498 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
499 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
500 LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
501 LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@)
502 LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@)
503 LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@)
504 SUB len, len, 8*NBYTES
505 ADD src, src, 8*NBYTES
506 STORE(t0, UNIT(0)(dst), .Ls_exc\@)
507 ADDC(t0, t1)
508 STORE(t1, UNIT(1)(dst), .Ls_exc\@)
509 ADDC(sum, t0)
510 STORE(t2, UNIT(2)(dst), .Ls_exc\@)
511 ADDC(t2, t3)
512 STORE(t3, UNIT(3)(dst), .Ls_exc\@)
513 ADDC(sum, t2)
514 STORE(t4, UNIT(4)(dst), .Ls_exc\@)
515 ADDC(t4, t5)
516 STORE(t5, UNIT(5)(dst), .Ls_exc\@)
517 ADDC(sum, t4)
518 STORE(t6, UNIT(6)(dst), .Ls_exc\@)
519 ADDC(t6, t7)
520 STORE(t7, UNIT(7)(dst), .Ls_exc\@)
521 ADDC(sum, t6)
522 .set reorder /* DADDI_WAR */
523 ADD dst, dst, 8*NBYTES
524 bgez len, 1b
525 .set noreorder
526 ADD len, 8*NBYTES # revert len (see above)
527
528 /*
529 * len == the number of bytes left to copy < 8*NBYTES
530 */
531.Lcleanup_both_aligned\@:
532#define rem t7
533 beqz len, .Ldone\@
534 sltu t0, len, 4*NBYTES
535 bnez t0, .Lless_than_4units\@
536 and rem, len, (NBYTES-1) # rem = len % NBYTES
537 /*
538 * len >= 4*NBYTES
539 */
540 LOAD(t0, UNIT(0)(src), .Ll_exc\@)
541 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
542 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
543 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
544 SUB len, len, 4*NBYTES
545 ADD src, src, 4*NBYTES
546 STORE(t0, UNIT(0)(dst), .Ls_exc\@)
547 ADDC(t0, t1)
548 STORE(t1, UNIT(1)(dst), .Ls_exc\@)
549 ADDC(sum, t0)
550 STORE(t2, UNIT(2)(dst), .Ls_exc\@)
551 ADDC(t2, t3)
552 STORE(t3, UNIT(3)(dst), .Ls_exc\@)
553 ADDC(sum, t2)
554 .set reorder /* DADDI_WAR */
555 ADD dst, dst, 4*NBYTES
556 beqz len, .Ldone\@
557 .set noreorder
558.Lless_than_4units\@:
559 /*
560 * rem = len % NBYTES
561 */
562 beq rem, len, .Lcopy_bytes\@
563 nop
5641:
565 LOAD(t0, 0(src), .Ll_exc\@)
566 ADD src, src, NBYTES
567 SUB len, len, NBYTES
568 STORE(t0, 0(dst), .Ls_exc\@)
569 ADDC(sum, t0)
570 .set reorder /* DADDI_WAR */
571 ADD dst, dst, NBYTES
572 bne rem, len, 1b
573 .set noreorder
574
575 /*
576 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
577 * A loop would do only a byte at a time with possible branch
578 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
579 * because can't assume read-access to dst. Instead, use
580 * STREST dst, which doesn't require read access to dst.
581 *
582 * This code should perform better than a simple loop on modern,
583 * wide-issue mips processors because the code has fewer branches and
584 * more instruction-level parallelism.
585 */
586#define bits t2
587 beqz len, .Ldone\@
588 ADD t1, dst, len # t1 is just past last byte of dst
589 li bits, 8*NBYTES
590 SLL rem, len, 3 # rem = number of bits to keep
591 LOAD(t0, 0(src), .Ll_exc\@)
592 SUB bits, bits, rem # bits = number of bits to discard
593 SHIFT_DISCARD t0, t0, bits
594 STREST(t0, -1(t1), .Ls_exc\@)
595 SHIFT_DISCARD_REVERT t0, t0, bits
596 .set reorder
597 ADDC(sum, t0)
598 b .Ldone\@
599 .set noreorder
600.Ldst_unaligned\@:
601 /*
602 * dst is unaligned
603 * t0 = src & ADDRMASK
604 * t1 = dst & ADDRMASK; T1 > 0
605 * len >= NBYTES
606 *
607 * Copy enough bytes to align dst
608 * Set match = (src and dst have same alignment)
609 */
610#define match rem
611 LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
612 ADD t2, zero, NBYTES
613 LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
614 SUB t2, t2, t1 # t2 = number of bytes copied
615 xor match, t0, t1
616 STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
617 SLL t4, t1, 3 # t4 = number of bits to discard
618 SHIFT_DISCARD t3, t3, t4
619 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
620 ADDC(sum, t3)
621 beq len, t2, .Ldone\@
622 SUB len, len, t2
623 ADD dst, dst, t2
624 beqz match, .Lboth_aligned\@
625 ADD src, src, t2
626
627.Lsrc_unaligned_dst_aligned\@:
628 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
629 beqz t0, .Lcleanup_src_unaligned\@
630 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
6311:
632/*
633 * Avoid consecutive LD*'s to the same register since some mips
634 * implementations can't issue them in the same cycle.
635 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
636 * are to the same unit (unless src is aligned, but it's not).
637 */
638 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
639 LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
640 SUB len, len, 4*NBYTES
641 LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
642 LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
643 LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
644 LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
645 LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
646 LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
647 ADD src, src, 4*NBYTES
648#ifdef CONFIG_CPU_SB1
649 nop # improves slotting
650#endif
651 STORE(t0, UNIT(0)(dst), .Ls_exc\@)
652 ADDC(t0, t1)
653 STORE(t1, UNIT(1)(dst), .Ls_exc\@)
654 ADDC(sum, t0)
655 STORE(t2, UNIT(2)(dst), .Ls_exc\@)
656 ADDC(t2, t3)
657 STORE(t3, UNIT(3)(dst), .Ls_exc\@)
658 ADDC(sum, t2)
659 .set reorder /* DADDI_WAR */
660 ADD dst, dst, 4*NBYTES
661 bne len, rem, 1b
662 .set noreorder
663
664.Lcleanup_src_unaligned\@:
665 beqz len, .Ldone\@
666 and rem, len, NBYTES-1 # rem = len % NBYTES
667 beq rem, len, .Lcopy_bytes\@
668 nop
6691:
670 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
671 LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
672 ADD src, src, NBYTES
673 SUB len, len, NBYTES
674 STORE(t0, 0(dst), .Ls_exc\@)
675 ADDC(sum, t0)
676 .set reorder /* DADDI_WAR */
677 ADD dst, dst, NBYTES
678 bne len, rem, 1b
679 .set noreorder
680
681.Lcopy_bytes_checklen\@:
682 beqz len, .Ldone\@
683 nop
684.Lcopy_bytes\@:
685 /* 0 < len < NBYTES */
686#ifdef CONFIG_CPU_LITTLE_ENDIAN
687#define SHIFT_START 0
688#define SHIFT_INC 8
689#else
690#define SHIFT_START 8*(NBYTES-1)
691#define SHIFT_INC -8
692#endif
693 move t2, zero # partial word
694 li t3, SHIFT_START # shift
695/* use .Ll_exc_copy here to return correct sum on fault */
696#define COPY_BYTE(N) \
697 LOADBU(t0, N(src), .Ll_exc_copy\@); \
698 SUB len, len, 1; \
699 STOREB(t0, N(dst), .Ls_exc\@); \
700 SLLV t0, t0, t3; \
701 addu t3, SHIFT_INC; \
702 beqz len, .Lcopy_bytes_done\@; \
703 or t2, t0
704
705 COPY_BYTE(0)
706 COPY_BYTE(1)
707#ifdef USE_DOUBLE
708 COPY_BYTE(2)
709 COPY_BYTE(3)
710 COPY_BYTE(4)
711 COPY_BYTE(5)
712#endif
713 LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@)
714 SUB len, len, 1
715 STOREB(t0, NBYTES-2(dst), .Ls_exc\@)
716 SLLV t0, t0, t3
717 or t2, t0
718.Lcopy_bytes_done\@:
719 ADDC(sum, t2)
720.Ldone\@:
721 /* fold checksum */
722 .set push
723 .set noat
724#ifdef USE_DOUBLE
725 dsll32 v1, sum, 0
726 daddu sum, v1
727 sltu v1, sum, v1
728 dsra32 sum, sum, 0
729 addu sum, v1
730#endif
731
732#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
733 .set push
734 .set arch=mips32r2
735 wsbh v1, sum
736 movn sum, v1, odd
737 .set pop
738#else
739 beqz odd, 1f /* odd buffer alignment? */
740 lui v1, 0x00ff
741 addu v1, 0x00ff
742 and t0, sum, v1
743 sll t0, t0, 8
744 srl sum, sum, 8
745 and sum, sum, v1
746 or sum, sum, t0
7471:
748#endif
749 .set pop
750 .set reorder
751 ADDC32(sum, psum)
752 jr ra
753 .set noreorder
754
755.Ll_exc_copy\@:
756 /*
757 * Copy bytes from src until faulting load address (or until a
758 * lb faults)
759 *
760 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
761 * may be more than a byte beyond the last address.
762 * Hence, the lb below may get an exception.
763 *
764 * Assumes src < THREAD_BUADDR($28)
765 */
766 LOADK t0, TI_TASK($28)
767 li t2, SHIFT_START
768 LOADK t0, THREAD_BUADDR(t0)
7691:
770 LOADBU(t1, 0(src), .Ll_exc\@)
771 ADD src, src, 1
772 sb t1, 0(dst) # can't fault -- we're copy_from_user
773 SLLV t1, t1, t2
774 addu t2, SHIFT_INC
775 ADDC(sum, t1)
776 .set reorder /* DADDI_WAR */
777 ADD dst, dst, 1
778 bne src, t0, 1b
779 .set noreorder
780.Ll_exc\@:
781 LOADK t0, TI_TASK($28)
782 nop
783 LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address
784 nop
785 SUB len, AT, t0 # len number of uncopied bytes
786 /*
787 * Here's where we rely on src and dst being incremented in tandem,
788 * See (3) above.
789 * dst += (fault addr - src) to put dst at first byte to clear
790 */
791 ADD dst, t0 # compute start address in a1
792 SUB dst, src
793 /*
794 * Clear len bytes starting at dst. Can't call __bzero because it
795 * might modify len. An inefficient loop for these rare times...
796 */
797 .set reorder /* DADDI_WAR */
798 SUB src, len, 1
799 beqz len, .Ldone\@
800 .set noreorder
8011: sb zero, 0(dst)
802 ADD dst, dst, 1
803 .set push
804 .set noat
805#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
806 bnez src, 1b
807 SUB src, src, 1
808#else
809 li v1, 1
810 bnez src, 1b
811 SUB src, src, v1
812#endif
813 li v1, -EFAULT
814 b .Ldone\@
815 sw v1, (errptr)
816
817.Ls_exc\@:
818 li v0, -1 /* invalid checksum */
819 li v1, -EFAULT
820 jr ra
821 sw v1, (errptr)
822 .set pop
823 .endm
824
825LEAF(__csum_partial_copy_kernel)
826#ifndef CONFIG_EVA
827FEXPORT(__csum_partial_copy_to_user)
828FEXPORT(__csum_partial_copy_from_user)
829#endif
830__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1
831END(__csum_partial_copy_kernel)
832
833#ifdef CONFIG_EVA
834LEAF(__csum_partial_copy_to_user)
835__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 0
836END(__csum_partial_copy_to_user)
837
838LEAF(__csum_partial_copy_from_user)
839__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 0
840END(__csum_partial_copy_from_user)
841#endif