Loading...
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/page.h>
9#include <asm/ppc_asm.h>
10
11_GLOBAL(copypage_power7)
12 /*
13 * We prefetch both the source and destination using enhanced touch
14 * instructions. We use a stream ID of 0 for the load side and
15 * 1 for the store side. Since source and destination are page
16 * aligned we don't need to clear the bottom 7 bits of either
17 * address.
18 */
19 ori r9,r3,1 /* stream=1 => to */
20
21#ifdef CONFIG_PPC_64K_PAGES
22 lis r7,0x0E01 /* depth=7
23 * units/cachelines=512 */
24#else
25 lis r7,0x0E00 /* depth=7 */
26 ori r7,r7,0x1000 /* units/cachelines=32 */
27#endif
28 ori r10,r7,1 /* stream=1 */
29
30 lis r8,0x8000 /* GO=1 */
31 clrldi r8,r8,32
32
33 /* setup read stream 0 */
34 dcbt 0,r4,0b01000 /* addr from */
35 dcbt 0,r7,0b01010 /* length and depth from */
36 /* setup write stream 1 */
37 dcbtst 0,r9,0b01000 /* addr to */
38 dcbtst 0,r10,0b01010 /* length and depth to */
39 eieio
40 dcbt 0,r8,0b01010 /* all streams GO */
41
42#ifdef CONFIG_ALTIVEC
43 mflr r0
44 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
45 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
46 std r0,16(r1)
47 stdu r1,-STACKFRAMESIZE(r1)
48 bl CFUNC(enter_vmx_ops)
49 cmpwi r3,0
50 ld r0,STACKFRAMESIZE+16(r1)
51 ld r3,STK_REG(R31)(r1)
52 ld r4,STK_REG(R30)(r1)
53 mtlr r0
54
55 li r0,(PAGE_SIZE/128)
56 mtctr r0
57
58 beq .Lnonvmx_copy
59
60 addi r1,r1,STACKFRAMESIZE
61
62 li r6,16
63 li r7,32
64 li r8,48
65 li r9,64
66 li r10,80
67 li r11,96
68 li r12,112
69
70 .align 5
711: lvx v7,0,r4
72 lvx v6,r4,r6
73 lvx v5,r4,r7
74 lvx v4,r4,r8
75 lvx v3,r4,r9
76 lvx v2,r4,r10
77 lvx v1,r4,r11
78 lvx v0,r4,r12
79 addi r4,r4,128
80 stvx v7,0,r3
81 stvx v6,r3,r6
82 stvx v5,r3,r7
83 stvx v4,r3,r8
84 stvx v3,r3,r9
85 stvx v2,r3,r10
86 stvx v1,r3,r11
87 stvx v0,r3,r12
88 addi r3,r3,128
89 bdnz 1b
90
91 b CFUNC(exit_vmx_ops) /* tail call optimise */
92
93#else
94 li r0,(PAGE_SIZE/128)
95 mtctr r0
96
97 stdu r1,-STACKFRAMESIZE(r1)
98#endif
99
100.Lnonvmx_copy:
101 std r14,STK_REG(R14)(r1)
102 std r15,STK_REG(R15)(r1)
103 std r16,STK_REG(R16)(r1)
104 std r17,STK_REG(R17)(r1)
105 std r18,STK_REG(R18)(r1)
106 std r19,STK_REG(R19)(r1)
107 std r20,STK_REG(R20)(r1)
108
1091: ld r0,0(r4)
110 ld r5,8(r4)
111 ld r6,16(r4)
112 ld r7,24(r4)
113 ld r8,32(r4)
114 ld r9,40(r4)
115 ld r10,48(r4)
116 ld r11,56(r4)
117 ld r12,64(r4)
118 ld r14,72(r4)
119 ld r15,80(r4)
120 ld r16,88(r4)
121 ld r17,96(r4)
122 ld r18,104(r4)
123 ld r19,112(r4)
124 ld r20,120(r4)
125 addi r4,r4,128
126 std r0,0(r3)
127 std r5,8(r3)
128 std r6,16(r3)
129 std r7,24(r3)
130 std r8,32(r3)
131 std r9,40(r3)
132 std r10,48(r3)
133 std r11,56(r3)
134 std r12,64(r3)
135 std r14,72(r3)
136 std r15,80(r3)
137 std r16,88(r3)
138 std r17,96(r3)
139 std r18,104(r3)
140 std r19,112(r3)
141 std r20,120(r3)
142 addi r3,r3,128
143 bdnz 1b
144
145 ld r14,STK_REG(R14)(r1)
146 ld r15,STK_REG(R15)(r1)
147 ld r16,STK_REG(R16)(r1)
148 ld r17,STK_REG(R17)(r1)
149 ld r18,STK_REG(R18)(r1)
150 ld r19,STK_REG(R19)(r1)
151 ld r20,STK_REG(R20)(r1)
152 addi r1,r1,STACKFRAMESIZE
153 blr
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/page.h>
9#include <asm/ppc_asm.h>
10
11_GLOBAL(copypage_power7)
12 /*
13 * We prefetch both the source and destination using enhanced touch
14 * instructions. We use a stream ID of 0 for the load side and
15 * 1 for the store side. Since source and destination are page
16 * aligned we don't need to clear the bottom 7 bits of either
17 * address.
18 */
19 ori r9,r3,1 /* stream=1 => to */
20
21#ifdef CONFIG_PPC_64K_PAGES
22 lis r7,0x0E01 /* depth=7
23 * units/cachelines=512 */
24#else
25 lis r7,0x0E00 /* depth=7 */
26 ori r7,r7,0x1000 /* units/cachelines=32 */
27#endif
28 ori r10,r7,1 /* stream=1 */
29
30 DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
31
32#ifdef CONFIG_ALTIVEC
33 mflr r0
34 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
35 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
36 std r0,16(r1)
37 stdu r1,-STACKFRAMESIZE(r1)
38 bl CFUNC(enter_vmx_ops)
39 cmpwi r3,0
40 ld r0,STACKFRAMESIZE+16(r1)
41 ld r3,STK_REG(R31)(r1)
42 ld r4,STK_REG(R30)(r1)
43 mtlr r0
44
45 li r0,(PAGE_SIZE/128)
46 mtctr r0
47
48 beq .Lnonvmx_copy
49
50 addi r1,r1,STACKFRAMESIZE
51
52 li r6,16
53 li r7,32
54 li r8,48
55 li r9,64
56 li r10,80
57 li r11,96
58 li r12,112
59
60 .align 5
611: lvx v7,0,r4
62 lvx v6,r4,r6
63 lvx v5,r4,r7
64 lvx v4,r4,r8
65 lvx v3,r4,r9
66 lvx v2,r4,r10
67 lvx v1,r4,r11
68 lvx v0,r4,r12
69 addi r4,r4,128
70 stvx v7,0,r3
71 stvx v6,r3,r6
72 stvx v5,r3,r7
73 stvx v4,r3,r8
74 stvx v3,r3,r9
75 stvx v2,r3,r10
76 stvx v1,r3,r11
77 stvx v0,r3,r12
78 addi r3,r3,128
79 bdnz 1b
80
81 b CFUNC(exit_vmx_ops) /* tail call optimise */
82
83#else
84 li r0,(PAGE_SIZE/128)
85 mtctr r0
86
87 stdu r1,-STACKFRAMESIZE(r1)
88#endif
89
90.Lnonvmx_copy:
91 std r14,STK_REG(R14)(r1)
92 std r15,STK_REG(R15)(r1)
93 std r16,STK_REG(R16)(r1)
94 std r17,STK_REG(R17)(r1)
95 std r18,STK_REG(R18)(r1)
96 std r19,STK_REG(R19)(r1)
97 std r20,STK_REG(R20)(r1)
98
991: ld r0,0(r4)
100 ld r5,8(r4)
101 ld r6,16(r4)
102 ld r7,24(r4)
103 ld r8,32(r4)
104 ld r9,40(r4)
105 ld r10,48(r4)
106 ld r11,56(r4)
107 ld r12,64(r4)
108 ld r14,72(r4)
109 ld r15,80(r4)
110 ld r16,88(r4)
111 ld r17,96(r4)
112 ld r18,104(r4)
113 ld r19,112(r4)
114 ld r20,120(r4)
115 addi r4,r4,128
116 std r0,0(r3)
117 std r5,8(r3)
118 std r6,16(r3)
119 std r7,24(r3)
120 std r8,32(r3)
121 std r9,40(r3)
122 std r10,48(r3)
123 std r11,56(r3)
124 std r12,64(r3)
125 std r14,72(r3)
126 std r15,80(r3)
127 std r16,88(r3)
128 std r17,96(r3)
129 std r18,104(r3)
130 std r19,112(r3)
131 std r20,120(r3)
132 addi r3,r3,128
133 bdnz 1b
134
135 ld r14,STK_REG(R14)(r1)
136 ld r15,STK_REG(R15)(r1)
137 ld r16,STK_REG(R16)(r1)
138 ld r17,STK_REG(R17)(r1)
139 ld r18,STK_REG(R18)(r1)
140 ld r19,STK_REG(R19)(r1)
141 ld r20,STK_REG(R20)(r1)
142 addi r1,r1,STACKFRAMESIZE
143 blr