Linux Audio

Check our new training course

Loading...
v6.13.7
  1/* SPDX-License-Identifier: GPL-2.0 */
  2/*
  3 * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
  4 *
  5 * Copyright 2018 Google LLC
  6 *
  7 * Author: Eric Biggers <ebiggers@google.com>
  8 */
  9
 10#include <linux/linkage.h>
 11#include <linux/cfi_types.h>
 12
 13#define		PASS0_SUMS	%xmm0
 14#define		PASS1_SUMS	%xmm1
 15#define		PASS2_SUMS	%xmm2
 16#define		PASS3_SUMS	%xmm3
 17#define		K0		%xmm4
 18#define		K1		%xmm5
 19#define		K2		%xmm6
 20#define		K3		%xmm7
 21#define		T0		%xmm8
 22#define		T1		%xmm9
 23#define		T2		%xmm10
 24#define		T3		%xmm11
 25#define		T4		%xmm12
 26#define		T5		%xmm13
 27#define		T6		%xmm14
 28#define		T7		%xmm15
 29#define		KEY		%rdi
 30#define		MESSAGE		%rsi
 31#define		MESSAGE_LEN	%rdx
 32#define		HASH		%rcx
 33
 34.macro _nh_stride	k0, k1, k2, k3, offset
 35
 36	// Load next message stride
 37	movdqu		\offset(MESSAGE), T1
 38
 39	// Load next key stride
 40	movdqu		\offset(KEY), \k3
 41
 42	// Add message words to key words
 43	movdqa		T1, T2
 44	movdqa		T1, T3
 45	paddd		T1, \k0    // reuse k0 to avoid a move
 46	paddd		\k1, T1
 47	paddd		\k2, T2
 48	paddd		\k3, T3
 49
 50	// Multiply 32x32 => 64 and accumulate
 51	pshufd		$0x10, \k0, T4
 52	pshufd		$0x32, \k0, \k0
 53	pshufd		$0x10, T1, T5
 54	pshufd		$0x32, T1, T1
 55	pshufd		$0x10, T2, T6
 56	pshufd		$0x32, T2, T2
 57	pshufd		$0x10, T3, T7
 58	pshufd		$0x32, T3, T3
 59	pmuludq		T4, \k0
 60	pmuludq		T5, T1
 61	pmuludq		T6, T2
 62	pmuludq		T7, T3
 63	paddq		\k0, PASS0_SUMS
 64	paddq		T1, PASS1_SUMS
 65	paddq		T2, PASS2_SUMS
 66	paddq		T3, PASS3_SUMS
 67.endm
 68
 69/*
 70 * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
 71 *		__le64 hash[NH_NUM_PASSES])
 72 *
 73 * It's guaranteed that message_len % 16 == 0.
 74 */
 75SYM_TYPED_FUNC_START(nh_sse2)
 76
 77	movdqu		0x00(KEY), K0
 78	movdqu		0x10(KEY), K1
 79	movdqu		0x20(KEY), K2
 80	add		$0x30, KEY
 81	pxor		PASS0_SUMS, PASS0_SUMS
 82	pxor		PASS1_SUMS, PASS1_SUMS
 83	pxor		PASS2_SUMS, PASS2_SUMS
 84	pxor		PASS3_SUMS, PASS3_SUMS
 85
 86	sub		$0x40, MESSAGE_LEN
 87	jl		.Lloop4_done
 88.Lloop4:
 89	_nh_stride	K0, K1, K2, K3, 0x00
 90	_nh_stride	K1, K2, K3, K0, 0x10
 91	_nh_stride	K2, K3, K0, K1, 0x20
 92	_nh_stride	K3, K0, K1, K2, 0x30
 93	add		$0x40, KEY
 94	add		$0x40, MESSAGE
 95	sub		$0x40, MESSAGE_LEN
 96	jge		.Lloop4
 97
 98.Lloop4_done:
 99	and		$0x3f, MESSAGE_LEN
100	jz		.Ldone
101	_nh_stride	K0, K1, K2, K3, 0x00
102
103	sub		$0x10, MESSAGE_LEN
104	jz		.Ldone
105	_nh_stride	K1, K2, K3, K0, 0x10
106
107	sub		$0x10, MESSAGE_LEN
108	jz		.Ldone
109	_nh_stride	K2, K3, K0, K1, 0x20
110
111.Ldone:
112	// Sum the accumulators for each pass, then store the sums to 'hash'
113	movdqa		PASS0_SUMS, T0
114	movdqa		PASS2_SUMS, T1
115	punpcklqdq	PASS1_SUMS, T0		// => (PASS0_SUM_A PASS1_SUM_A)
116	punpcklqdq	PASS3_SUMS, T1		// => (PASS2_SUM_A PASS3_SUM_A)
117	punpckhqdq	PASS1_SUMS, PASS0_SUMS	// => (PASS0_SUM_B PASS1_SUM_B)
118	punpckhqdq	PASS3_SUMS, PASS2_SUMS	// => (PASS2_SUM_B PASS3_SUM_B)
119	paddq		PASS0_SUMS, T0
120	paddq		PASS2_SUMS, T1
121	movdqu		T0, 0x00(HASH)
122	movdqu		T1, 0x10(HASH)
123	RET
124SYM_FUNC_END(nh_sse2)
v6.8
  1/* SPDX-License-Identifier: GPL-2.0 */
  2/*
  3 * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
  4 *
  5 * Copyright 2018 Google LLC
  6 *
  7 * Author: Eric Biggers <ebiggers@google.com>
  8 */
  9
 10#include <linux/linkage.h>
 11#include <linux/cfi_types.h>
 12
 13#define		PASS0_SUMS	%xmm0
 14#define		PASS1_SUMS	%xmm1
 15#define		PASS2_SUMS	%xmm2
 16#define		PASS3_SUMS	%xmm3
 17#define		K0		%xmm4
 18#define		K1		%xmm5
 19#define		K2		%xmm6
 20#define		K3		%xmm7
 21#define		T0		%xmm8
 22#define		T1		%xmm9
 23#define		T2		%xmm10
 24#define		T3		%xmm11
 25#define		T4		%xmm12
 26#define		T5		%xmm13
 27#define		T6		%xmm14
 28#define		T7		%xmm15
 29#define		KEY		%rdi
 30#define		MESSAGE		%rsi
 31#define		MESSAGE_LEN	%rdx
 32#define		HASH		%rcx
 33
 34.macro _nh_stride	k0, k1, k2, k3, offset
 35
 36	// Load next message stride
 37	movdqu		\offset(MESSAGE), T1
 38
 39	// Load next key stride
 40	movdqu		\offset(KEY), \k3
 41
 42	// Add message words to key words
 43	movdqa		T1, T2
 44	movdqa		T1, T3
 45	paddd		T1, \k0    // reuse k0 to avoid a move
 46	paddd		\k1, T1
 47	paddd		\k2, T2
 48	paddd		\k3, T3
 49
 50	// Multiply 32x32 => 64 and accumulate
 51	pshufd		$0x10, \k0, T4
 52	pshufd		$0x32, \k0, \k0
 53	pshufd		$0x10, T1, T5
 54	pshufd		$0x32, T1, T1
 55	pshufd		$0x10, T2, T6
 56	pshufd		$0x32, T2, T2
 57	pshufd		$0x10, T3, T7
 58	pshufd		$0x32, T3, T3
 59	pmuludq		T4, \k0
 60	pmuludq		T5, T1
 61	pmuludq		T6, T2
 62	pmuludq		T7, T3
 63	paddq		\k0, PASS0_SUMS
 64	paddq		T1, PASS1_SUMS
 65	paddq		T2, PASS2_SUMS
 66	paddq		T3, PASS3_SUMS
 67.endm
 68
 69/*
 70 * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
 71 *		__le64 hash[NH_NUM_PASSES])
 72 *
 73 * It's guaranteed that message_len % 16 == 0.
 74 */
 75SYM_TYPED_FUNC_START(nh_sse2)
 76
 77	movdqu		0x00(KEY), K0
 78	movdqu		0x10(KEY), K1
 79	movdqu		0x20(KEY), K2
 80	add		$0x30, KEY
 81	pxor		PASS0_SUMS, PASS0_SUMS
 82	pxor		PASS1_SUMS, PASS1_SUMS
 83	pxor		PASS2_SUMS, PASS2_SUMS
 84	pxor		PASS3_SUMS, PASS3_SUMS
 85
 86	sub		$0x40, MESSAGE_LEN
 87	jl		.Lloop4_done
 88.Lloop4:
 89	_nh_stride	K0, K1, K2, K3, 0x00
 90	_nh_stride	K1, K2, K3, K0, 0x10
 91	_nh_stride	K2, K3, K0, K1, 0x20
 92	_nh_stride	K3, K0, K1, K2, 0x30
 93	add		$0x40, KEY
 94	add		$0x40, MESSAGE
 95	sub		$0x40, MESSAGE_LEN
 96	jge		.Lloop4
 97
 98.Lloop4_done:
 99	and		$0x3f, MESSAGE_LEN
100	jz		.Ldone
101	_nh_stride	K0, K1, K2, K3, 0x00
102
103	sub		$0x10, MESSAGE_LEN
104	jz		.Ldone
105	_nh_stride	K1, K2, K3, K0, 0x10
106
107	sub		$0x10, MESSAGE_LEN
108	jz		.Ldone
109	_nh_stride	K2, K3, K0, K1, 0x20
110
111.Ldone:
112	// Sum the accumulators for each pass, then store the sums to 'hash'
113	movdqa		PASS0_SUMS, T0
114	movdqa		PASS2_SUMS, T1
115	punpcklqdq	PASS1_SUMS, T0		// => (PASS0_SUM_A PASS1_SUM_A)
116	punpcklqdq	PASS3_SUMS, T1		// => (PASS2_SUM_A PASS3_SUM_A)
117	punpckhqdq	PASS1_SUMS, PASS0_SUMS	// => (PASS0_SUM_B PASS1_SUM_B)
118	punpckhqdq	PASS3_SUMS, PASS2_SUMS	// => (PASS2_SUM_B PASS3_SUM_B)
119	paddq		PASS0_SUMS, T0
120	paddq		PASS2_SUMS, T1
121	movdqu		T0, 0x00(HASH)
122	movdqu		T1, 0x10(HASH)
123	RET
124SYM_FUNC_END(nh_sse2)