Linux Audio

Check our new training course

Loading...
v4.17
 
 1/*
 2 * OpenRISC memset.S
 3 *
 4 * Hand-optimized assembler version of memset for OpenRISC.
 5 * Algorithm inspired by several other arch-specific memset routines
 6 * in the kernel tree
 7 *
 8 * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
 9 *
10 *      This program is free software; you can redistribute it and/or
11 *      modify it under the terms of the GNU General Public License
12 *      as published by the Free Software Foundation; either version
13 *      2 of the License, or (at your option) any later version.
14 */
15
16	.global memset
17	.type	memset, @function
18memset:
19	/* arguments:
20	 * r3 = *s
21	 * r4 = c
22	 * r5 = n
23	 * r13, r15, r17, r19 used as temp regs
24	*/
25
26	/* Exit if n == 0 */
27	l.sfeqi		r5, 0
28	l.bf		4f
29
30	/* Truncate c to char */
31	l.andi  	r13, r4, 0xff
32
33	/* Skip word extension if c is 0 */
34	l.sfeqi		r13, 0
35	l.bf		1f
36	/* Check for at least two whole words (8 bytes) */
37	 l.sfleui	r5, 7
38
39	/* Extend char c to 32-bit word cccc in r13 */
40	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
41	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
42	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
43	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
44
451:	l.addi		r19, r3, 0 // Set r19 = src
46	/* Jump to byte copy loop if less than two words */
47	l.bf		3f
48	 l.or		r17, r5, r0 // Set r17 = n
49
50	/* Mask out two LSBs to check alignment */
51	l.andi		r15, r3, 0x3
52
53	/* lsb == 00, jump to word copy loop */
54	l.sfeqi		r15, 0
55	l.bf		2f
56	 l.addi		r19, r3, 0 // Set r19 = src
57
58	/* lsb == 01,10 or 11 */
59	l.sb		0(r3), r13   // *src = c
60	l.addi		r17, r17, -1 // Decrease n
61
62	l.sfeqi		r15, 3
63	l.bf		2f
64	 l.addi		r19, r3, 1  // src += 1
65
66	/* lsb == 01 or 10 */
67	l.sb		1(r3), r13   // *(src+1) = c
68	l.addi		r17, r17, -1 // Decrease n
69
70	l.sfeqi		r15, 2
71	l.bf		2f
72	 l.addi		r19, r3, 2  // src += 2
73
74	/* lsb == 01 */
75	l.sb		2(r3), r13   // *(src+2) = c
76	l.addi		r17, r17, -1 // Decrease n
77	l.addi		r19, r3, 3   // src += 3
78
79	/* Word copy loop */
802:	l.sw		0(r19), r13  // *src = cccc
81	l.addi		r17, r17, -4 // Decrease n
82	l.sfgeui	r17, 4
83	l.bf		2b
84	 l.addi		r19, r19, 4  // Increase src
85
86	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
87	l.sfeqi		r17, 0
88	l.bf		4f
89
90	/* Byte copy loop */
913:	l.addi		r17, r17, -1 // Decrease n
92	l.sb		0(r19), r13  // *src = cccc
93	l.sfnei		r17, 0
94	l.bf		3b
95	 l.addi		r19, r19, 1  // Increase src
96
974:	l.jr		r9
98	 l.ori		r11, r3, 0
v6.2
 1/* SPDX-License-Identifier: GPL-2.0-or-later */
 2/*
 3 * OpenRISC memset.S
 4 *
 5 * Hand-optimized assembler version of memset for OpenRISC.
 6 * Algorithm inspired by several other arch-specific memset routines
 7 * in the kernel tree
 8 *
 9 * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
 
 
 
 
 
10 */
11
12	.global memset
13	.type	memset, @function
14memset:
15	/* arguments:
16	 * r3 = *s
17	 * r4 = c
18	 * r5 = n
19	 * r13, r15, r17, r19 used as temp regs
20	*/
21
22	/* Exit if n == 0 */
23	l.sfeqi		r5, 0
24	l.bf		4f
25
26	/* Truncate c to char */
27	l.andi  	r13, r4, 0xff
28
29	/* Skip word extension if c is 0 */
30	l.sfeqi		r13, 0
31	l.bf		1f
32	/* Check for at least two whole words (8 bytes) */
33	 l.sfleui	r5, 7
34
35	/* Extend char c to 32-bit word cccc in r13 */
36	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
37	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
38	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
39	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
40
411:	l.addi		r19, r3, 0 // Set r19 = src
42	/* Jump to byte copy loop if less than two words */
43	l.bf		3f
44	 l.or		r17, r5, r0 // Set r17 = n
45
46	/* Mask out two LSBs to check alignment */
47	l.andi		r15, r3, 0x3
48
49	/* lsb == 00, jump to word copy loop */
50	l.sfeqi		r15, 0
51	l.bf		2f
52	 l.addi		r19, r3, 0 // Set r19 = src
53
54	/* lsb == 01,10 or 11 */
55	l.sb		0(r3), r13   // *src = c
56	l.addi		r17, r17, -1 // Decrease n
57
58	l.sfeqi		r15, 3
59	l.bf		2f
60	 l.addi		r19, r3, 1  // src += 1
61
62	/* lsb == 01 or 10 */
63	l.sb		1(r3), r13   // *(src+1) = c
64	l.addi		r17, r17, -1 // Decrease n
65
66	l.sfeqi		r15, 2
67	l.bf		2f
68	 l.addi		r19, r3, 2  // src += 2
69
70	/* lsb == 01 */
71	l.sb		2(r3), r13   // *(src+2) = c
72	l.addi		r17, r17, -1 // Decrease n
73	l.addi		r19, r3, 3   // src += 3
74
75	/* Word copy loop */
762:	l.sw		0(r19), r13  // *src = cccc
77	l.addi		r17, r17, -4 // Decrease n
78	l.sfgeui	r17, 4
79	l.bf		2b
80	 l.addi		r19, r19, 4  // Increase src
81
82	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
83	l.sfeqi		r17, 0
84	l.bf		4f
85
86	/* Byte copy loop */
873:	l.addi		r17, r17, -1 // Decrease n
88	l.sb		0(r19), r13  // *src = cccc
89	l.sfnei		r17, 0
90	l.bf		3b
91	 l.addi		r19, r19, 1  // Increase src
92
934:	l.jr		r9
94	 l.ori		r11, r3, 0