Loading...
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/page.h>
21#include <asm/ppc_asm.h>
22
23_GLOBAL(copypage_power7)
24 /*
25 * We prefetch both the source and destination using enhanced touch
26 * instructions. We use a stream ID of 0 for the load side and
27 * 1 for the store side. Since source and destination are page
28 * aligned we don't need to clear the bottom 7 bits of either
29 * address.
30 */
31 ori r9,r3,1 /* stream=1 => to */
32
33#ifdef CONFIG_PPC_64K_PAGES
34 lis r7,0x0E01 /* depth=7
35 * units/cachelines=512 */
36#else
37 lis r7,0x0E00 /* depth=7 */
38 ori r7,r7,0x1000 /* units/cachelines=32 */
39#endif
40 ori r10,r7,1 /* stream=1 */
41
42 lis r8,0x8000 /* GO=1 */
43 clrldi r8,r8,32
44
45 /* setup read stream 0 */
46 dcbt 0,r4,0b01000 /* addr from */
47 dcbt 0,r7,0b01010 /* length and depth from */
48 /* setup write stream 1 */
49 dcbtst 0,r9,0b01000 /* addr to */
50 dcbtst 0,r10,0b01010 /* length and depth to */
51 eieio
52 dcbt 0,r8,0b01010 /* all streams GO */
53
54#ifdef CONFIG_ALTIVEC
55 mflr r0
56 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
57 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
58 std r0,16(r1)
59 stdu r1,-STACKFRAMESIZE(r1)
60 bl enter_vmx_copy
61 cmpwi r3,0
62 ld r0,STACKFRAMESIZE+16(r1)
63 ld r3,STK_REG(R31)(r1)
64 ld r4,STK_REG(R30)(r1)
65 mtlr r0
66
67 li r0,(PAGE_SIZE/128)
68 mtctr r0
69
70 beq .Lnonvmx_copy
71
72 addi r1,r1,STACKFRAMESIZE
73
74 li r6,16
75 li r7,32
76 li r8,48
77 li r9,64
78 li r10,80
79 li r11,96
80 li r12,112
81
82 .align 5
831: lvx v7,0,r4
84 lvx v6,r4,r6
85 lvx v5,r4,r7
86 lvx v4,r4,r8
87 lvx v3,r4,r9
88 lvx v2,r4,r10
89 lvx v1,r4,r11
90 lvx v0,r4,r12
91 addi r4,r4,128
92 stvx v7,0,r3
93 stvx v6,r3,r6
94 stvx v5,r3,r7
95 stvx v4,r3,r8
96 stvx v3,r3,r9
97 stvx v2,r3,r10
98 stvx v1,r3,r11
99 stvx v0,r3,r12
100 addi r3,r3,128
101 bdnz 1b
102
103 b exit_vmx_copy /* tail call optimise */
104
105#else
106 li r0,(PAGE_SIZE/128)
107 mtctr r0
108
109 stdu r1,-STACKFRAMESIZE(r1)
110#endif
111
112.Lnonvmx_copy:
113 std r14,STK_REG(R14)(r1)
114 std r15,STK_REG(R15)(r1)
115 std r16,STK_REG(R16)(r1)
116 std r17,STK_REG(R17)(r1)
117 std r18,STK_REG(R18)(r1)
118 std r19,STK_REG(R19)(r1)
119 std r20,STK_REG(R20)(r1)
120
1211: ld r0,0(r4)
122 ld r5,8(r4)
123 ld r6,16(r4)
124 ld r7,24(r4)
125 ld r8,32(r4)
126 ld r9,40(r4)
127 ld r10,48(r4)
128 ld r11,56(r4)
129 ld r12,64(r4)
130 ld r14,72(r4)
131 ld r15,80(r4)
132 ld r16,88(r4)
133 ld r17,96(r4)
134 ld r18,104(r4)
135 ld r19,112(r4)
136 ld r20,120(r4)
137 addi r4,r4,128
138 std r0,0(r3)
139 std r5,8(r3)
140 std r6,16(r3)
141 std r7,24(r3)
142 std r8,32(r3)
143 std r9,40(r3)
144 std r10,48(r3)
145 std r11,56(r3)
146 std r12,64(r3)
147 std r14,72(r3)
148 std r15,80(r3)
149 std r16,88(r3)
150 std r17,96(r3)
151 std r18,104(r3)
152 std r19,112(r3)
153 std r20,120(r3)
154 addi r3,r3,128
155 bdnz 1b
156
157 ld r14,STK_REG(R14)(r1)
158 ld r15,STK_REG(R15)(r1)
159 ld r16,STK_REG(R16)(r1)
160 ld r17,STK_REG(R17)(r1)
161 ld r18,STK_REG(R18)(r1)
162 ld r19,STK_REG(R19)(r1)
163 ld r20,STK_REG(R20)(r1)
164 addi r1,r1,STACKFRAMESIZE
165 blr
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/page.h>
9#include <asm/ppc_asm.h>
10
11_GLOBAL(copypage_power7)
12 /*
13 * We prefetch both the source and destination using enhanced touch
14 * instructions. We use a stream ID of 0 for the load side and
15 * 1 for the store side. Since source and destination are page
16 * aligned we don't need to clear the bottom 7 bits of either
17 * address.
18 */
19 ori r9,r3,1 /* stream=1 => to */
20
21#ifdef CONFIG_PPC_64K_PAGES
22 lis r7,0x0E01 /* depth=7
23 * units/cachelines=512 */
24#else
25 lis r7,0x0E00 /* depth=7 */
26 ori r7,r7,0x1000 /* units/cachelines=32 */
27#endif
28 ori r10,r7,1 /* stream=1 */
29
30 lis r8,0x8000 /* GO=1 */
31 clrldi r8,r8,32
32
33 /* setup read stream 0 */
34 dcbt 0,r4,0b01000 /* addr from */
35 dcbt 0,r7,0b01010 /* length and depth from */
36 /* setup write stream 1 */
37 dcbtst 0,r9,0b01000 /* addr to */
38 dcbtst 0,r10,0b01010 /* length and depth to */
39 eieio
40 dcbt 0,r8,0b01010 /* all streams GO */
41
42#ifdef CONFIG_ALTIVEC
43 mflr r0
44 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
45 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
46 std r0,16(r1)
47 stdu r1,-STACKFRAMESIZE(r1)
48 bl enter_vmx_ops
49 cmpwi r3,0
50 ld r0,STACKFRAMESIZE+16(r1)
51 ld r3,STK_REG(R31)(r1)
52 ld r4,STK_REG(R30)(r1)
53 mtlr r0
54
55 li r0,(PAGE_SIZE/128)
56 mtctr r0
57
58 beq .Lnonvmx_copy
59
60 addi r1,r1,STACKFRAMESIZE
61
62 li r6,16
63 li r7,32
64 li r8,48
65 li r9,64
66 li r10,80
67 li r11,96
68 li r12,112
69
70 .align 5
711: lvx v7,0,r4
72 lvx v6,r4,r6
73 lvx v5,r4,r7
74 lvx v4,r4,r8
75 lvx v3,r4,r9
76 lvx v2,r4,r10
77 lvx v1,r4,r11
78 lvx v0,r4,r12
79 addi r4,r4,128
80 stvx v7,0,r3
81 stvx v6,r3,r6
82 stvx v5,r3,r7
83 stvx v4,r3,r8
84 stvx v3,r3,r9
85 stvx v2,r3,r10
86 stvx v1,r3,r11
87 stvx v0,r3,r12
88 addi r3,r3,128
89 bdnz 1b
90
91 b exit_vmx_ops /* tail call optimise */
92
93#else
94 li r0,(PAGE_SIZE/128)
95 mtctr r0
96
97 stdu r1,-STACKFRAMESIZE(r1)
98#endif
99
100.Lnonvmx_copy:
101 std r14,STK_REG(R14)(r1)
102 std r15,STK_REG(R15)(r1)
103 std r16,STK_REG(R16)(r1)
104 std r17,STK_REG(R17)(r1)
105 std r18,STK_REG(R18)(r1)
106 std r19,STK_REG(R19)(r1)
107 std r20,STK_REG(R20)(r1)
108
1091: ld r0,0(r4)
110 ld r5,8(r4)
111 ld r6,16(r4)
112 ld r7,24(r4)
113 ld r8,32(r4)
114 ld r9,40(r4)
115 ld r10,48(r4)
116 ld r11,56(r4)
117 ld r12,64(r4)
118 ld r14,72(r4)
119 ld r15,80(r4)
120 ld r16,88(r4)
121 ld r17,96(r4)
122 ld r18,104(r4)
123 ld r19,112(r4)
124 ld r20,120(r4)
125 addi r4,r4,128
126 std r0,0(r3)
127 std r5,8(r3)
128 std r6,16(r3)
129 std r7,24(r3)
130 std r8,32(r3)
131 std r9,40(r3)
132 std r10,48(r3)
133 std r11,56(r3)
134 std r12,64(r3)
135 std r14,72(r3)
136 std r15,80(r3)
137 std r16,88(r3)
138 std r17,96(r3)
139 std r18,104(r3)
140 std r19,112(r3)
141 std r20,120(r3)
142 addi r3,r3,128
143 bdnz 1b
144
145 ld r14,STK_REG(R14)(r1)
146 ld r15,STK_REG(R15)(r1)
147 ld r16,STK_REG(R16)(r1)
148 ld r17,STK_REG(R17)(r1)
149 ld r18,STK_REG(R18)(r1)
150 ld r19,STK_REG(R19)(r1)
151 ld r20,STK_REG(R20)(r1)
152 addi r1,r1,STACKFRAMESIZE
153 blr