Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2002 Andi Kleen, SuSE Labs */ #include <linux/linkage.h> #include <asm/cpufeatures.h> #include <asm/alternative-asm.h> #include <asm/export.h> .weak memset /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is * simpler and shorter than the original function as well. * * rdi destination * rsi value (char) * rdx count (bytes) * * rax original destination */ ENTRY(memset) ENTRY(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended * to use it when possible. If not available, use fast string instructions. * * Otherwise, use original memset function. */ ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memset_erms", X86_FEATURE_ERMS movq %rdi,%r9 movq %rdx,%rcx andl $7,%edx shrq $3,%rcx /* expand byte value */ movzbl %sil,%esi movabs $0x0101010101010101,%rax imulq %rsi,%rax rep stosq movl %edx,%ecx rep stosb movq %r9,%rax ret ENDPROC(memset) ENDPROC(__memset) EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. * The code is simpler and shorter than the fast string function as well. * * rdi destination * rsi value (char) * rdx count (bytes) * * rax original destination */ ENTRY(memset_erms) movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx rep stosb movq %r9,%rax ret ENDPROC(memset_erms) ENTRY(memset_orig) movq %rdi,%r10 /* expand byte value */ movzbl %sil,%ecx movabs $0x0101010101010101,%rax imulq %rcx,%rax /* align dst */ movl %edi,%r9d andl $7,%r9d jnz .Lbad_alignment .Lafter_bad_alignment: movq %rdx,%rcx shrq $6,%rcx jz .Lhandle_tail .p2align 4 .Lloop_64: decq %rcx movq %rax,(%rdi) movq %rax,8(%rdi) movq %rax,16(%rdi) movq %rax,24(%rdi) movq %rax,32(%rdi) movq %rax,40(%rdi) movq %rax,48(%rdi) movq %rax,56(%rdi) leaq 64(%rdi),%rdi jnz .Lloop_64 /* Handle tail in loops. The loops should be faster than hard to predict jump tables. */ .p2align 4 .Lhandle_tail: movl %edx,%ecx andl $63&(~7),%ecx jz .Lhandle_7 shrl $3,%ecx .p2align 4 .Lloop_8: decl %ecx movq %rax,(%rdi) leaq 8(%rdi),%rdi jnz .Lloop_8 .Lhandle_7: andl $7,%edx jz .Lende .p2align 4 .Lloop_1: decl %edx movb %al,(%rdi) leaq 1(%rdi),%rdi jnz .Lloop_1 .Lende: movq %r10,%rax ret .Lbad_alignment: cmpq $7,%rdx jbe .Lhandle_7 movq %rax,(%rdi) /* unaligned store */ movq $8,%r8 subq %r9,%r8 addq %r8,%rdi subq %r8,%rdx jmp .Lafter_bad_alignment .Lfinal: ENDPROC(memset_orig) |