From 5cb422bc4f14d0136901c2d99516e095c473450c Mon Sep 17 00:00:00 2001 From: frankw Date: Tue, 25 Jul 2017 11:51:12 -0700 Subject: [PATCH 1/7] Minor enhancements including spelling corrections, const definitions and earlier processing of full blocks. --- poly1305.go | 4 ++-- poly1305_amd64.go | 29 +++++++++++++++++------------ poly1305_ref.go | 8 ++++---- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/poly1305.go b/poly1305.go index fa12d7e..c23761d 100644 --- a/poly1305.go +++ b/poly1305.go @@ -3,11 +3,11 @@ // found in the LICENSE file. // Package poly1305 implements Poly1305 one-time message authentication code -// defined in RFC 7539.. +// defined in RFC 7539. // // Poly1305 is a fast, one-time authentication function. It is infeasible for an // attacker to generate an authenticator for a message without the key. -// However, a key must only be used for a single message. Authenticating two +// However, a key must be used only for a single message. Authenticating two // different messages with the same key allows an attacker to forge // authenticators for other messages with the same key. package poly1305 // import "github.com/aead/poly1305" diff --git a/poly1305_amd64.go b/poly1305_amd64.go index cd1a717..86544ac 100644 --- a/poly1305_amd64.go +++ b/poly1305_amd64.go @@ -10,6 +10,11 @@ import ( "io" ) +const ( + AVX2Size = 512 + AVX2Buffer = 8 * TagSize +) + var useAVX2 = supportsAVX2() //go:noescape @@ -19,21 +24,21 @@ func supportsAVX2() bool func initialize(state *[7]uint64, key *[32]byte) //go:noescape -func initializeAVX2(state *[512]byte, key *[32]byte) +func initializeAVX2(state *[AVX2Size]byte, key *[32]byte) //go:noescape func update(state *[7]uint64, msg []byte) //go:noescape -func updateAVX2(state *[512]byte, msg []byte) +func updateAVX2(state *[AVX2Size]byte, msg []byte) //go:noescape func finalize(tag *[TagSize]byte, state *[7]uint64) //go:noescape -func finalizeAVX2(tag *[TagSize]byte, state *[512]byte) +func finalizeAVX2(tag *[TagSize]byte, state *[AVX2Size]byte) -// compiler asserts - check that poly1305Hash and poly1305HashAVX2 implements the hash interface +// compiler asserts - check that poly1305Hash and poly1305HashAVX2 implement the hash interface var ( _ (hash) = &poly1305Hash{} _ (hash) = &poly1305HashAVX2{} @@ -53,8 +58,8 @@ func Sum(msg []byte, key [32]byte) [TagSize]byte { msg = []byte{} } var out [TagSize]byte - if useAVX2 && len(msg) > 8*TagSize { - var state [512]byte + if useAVX2 && len(msg) > AVX2Buffer { + var state [AVX2Size]byte initializeAVX2(&state, &key) updateAVX2(&state, msg) finalizeAVX2(&out, &state) @@ -95,7 +100,7 @@ func (h *Hash) Size() int { return TagSize } // Write adds more data to the running Poly1305 hash. // This function should return a non-nil error if a call // to Write happens after a call to Sum. So it is not possible -// to compute the checksum and than add more data. +// to compute the checksum and then add more data. func (h *Hash) Write(msg []byte) (int, error) { if h.done { return 0, errWriteAfterSum @@ -123,7 +128,7 @@ func (h *poly1305Hash) Write(p []byte) (n int, err error) { n = len(p) if h.off > 0 { dif := TagSize - h.off - if n <= dif { + if n < dif { h.off += copy(h.buf[h.off:], p) return n, nil } @@ -132,7 +137,7 @@ func (h *poly1305Hash) Write(p []byte) (n int, err error) { p = p[dif:] h.off = 0 } - // process full 16-byte blocks + // process full multiples of 16-byte blocks if nn := len(p) & (^(TagSize - 1)); nn > 0 { update(&(h.state), p[:nn]) p = p[nn:] @@ -155,7 +160,7 @@ func (h *poly1305Hash) Sum(b []byte) []byte { type poly1305HashAVX2 struct { // r[0] | r^2[0] | r[1] | r^2[1] | r[2] | r^2[2] | r[3] | r^2[3] | r[4] | r^2[4] | r[1]*5 | r^2[1]*5 | r[2]*5 | r^2[2]*5 r[3]*5 | r^2[3]*5 r[4]*5 | r^2[4]*5 - state [512]byte + state [AVX2Size]byte buffer [8 * TagSize]byte offset int @@ -165,7 +170,7 @@ func (h *poly1305HashAVX2) Write(p []byte) (n int, err error) { n = len(p) if h.offset > 0 { remaining := 8*TagSize - h.offset - if n <= remaining { + if n < remaining { h.offset += copy(h.buffer[h.offset:], p) return n, nil } @@ -174,7 +179,7 @@ func (h *poly1305HashAVX2) Write(p []byte) (n int, err error) { p = p[remaining:] h.offset = 0 } - // process full 8*16-byte blocks + // process full multiples of 8*16-byte blocks if nn := len(p) & (^(8*TagSize - 1)); nn > 0 { updateAVX2(&h.state, p[:nn]) p = p[nn:] diff --git a/poly1305_ref.go b/poly1305_ref.go index f38bbdb..e5953fa 100644 --- a/poly1305_ref.go +++ b/poly1305_ref.go @@ -68,7 +68,7 @@ func (p *Hash) Size() int { return TagSize } // Write adds more data to the running Poly1305 hash. // This function should return a non-nil error if a call // to Write happens after a call to Sum. So it is not possible -// to compute the checksum and than add more data. +// to compute the checksum and then add more data. func (p *Hash) Write(msg []byte) (int, error) { if p.done { return 0, errWriteAfterSum @@ -77,7 +77,7 @@ func (p *Hash) Write(msg []byte) (int, error) { if p.off > 0 { dif := TagSize - p.off - if n <= dif { + if n < dif { p.off += copy(p.buf[p.off:], msg) return n, nil } @@ -87,7 +87,7 @@ func (p *Hash) Write(msg []byte) (int, error) { p.off = 0 } - // process full 16-byte blocks + // process full multiples of 16-byte blocks if nn := len(msg) & (^(TagSize - 1)); nn > 0 { update(msg[:nn], msgBlock, &(p.h), &(p.r)) msg = msg[nn:] @@ -100,7 +100,7 @@ func (p *Hash) Write(msg []byte) (int, error) { return n, nil } -// Sum appends the Pol1305 hash of the previously +// Sum appends the Poly1305 hash of the previously // processed data to b and returns the resulting slice. // It is safe to call this function multiple times. func (p *Hash) Sum(b []byte) []byte { From e83916c6cfc5459bbd42f7a4b64f4da71e0435c7 Mon Sep 17 00:00:00 2001 From: frankw Date: Tue, 25 Jul 2017 11:52:11 -0700 Subject: [PATCH 2/7] Add header, fix correct arguments length and go vet errors --- poly1305_AVX2_amd64.s | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/poly1305_AVX2_amd64.s b/poly1305_AVX2_amd64.s index 1105bfe..3267c67 100644 --- a/poly1305_AVX2_amd64.s +++ b/poly1305_AVX2_amd64.s @@ -64,6 +64,7 @@ DATA fixPermutation<>+0x78(SB)/4, $6 DATA fixPermutation<>+0x7c(SB)/4, $7 GLOBL fixPermutation<>(SB), RODATA, $128 +// func initializeAVX2(state *[AVX2Size]byte, key *[32]byte) TEXT ·initializeAVX2(SB), $0-16 MOVQ state+0(FP), DI MOVQ key+8(FP), SI @@ -301,7 +302,8 @@ TEXT ·initializeAVX2(SB), $0-16 RET -TEXT ·updateAVX2(SB), $0-24 +// func updateAVX2(state *[AVX2Size]byte, msg []byte) +TEXT ·updateAVX2(SB), $0-32 MOVQ state+0(FP), DI MOVQ msg+8(FP), SI MOVQ msg_len+16(FP), DX @@ -815,8 +817,9 @@ DONE: MOVD X4, 320(DI) RET +// func finalizeAVX2(tag *[TagSize]byte, state *[AVX2Size]byte) TEXT ·finalizeAVX2(SB), $0-16 - MOVQ out+0(FP), SI + MOVQ tag+0(FP), SI MOVQ state+8(FP), DI VZEROUPPER From 6d4c059f01d28f799cca64dd54418f3c978e52ca Mon Sep 17 00:00:00 2001 From: frankw Date: Tue, 25 Jul 2017 11:52:59 -0700 Subject: [PATCH 3/7] Remove semi-colon and move initialize function to top (more logical order) --- poly1305_amd64.s | 108 +++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/poly1305_amd64.s b/poly1305_amd64.s index f405772..fd5c27a 100644 --- a/poly1305_amd64.s +++ b/poly1305_amd64.s @@ -11,53 +11,67 @@ DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC GLOBL ·poly1305Mask<>(SB), RODATA, $16 #define POLY1305_ADD(msg, h0, h1, h2) \ - ADDQ 0(msg), h0; \ - ADCQ 8(msg), h1; \ - ADCQ $1, h2; \ + ADDQ 0(msg), h0 \ + ADCQ 8(msg), h1 \ + ADCQ $1, h2 \ LEAQ 16(msg), msg #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ - MOVQ r0, AX; \ - MULQ h0; \ - MOVQ AX, t0; \ - MOVQ DX, t1; \ - MOVQ r0, AX; \ - MULQ h1; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ r0, t2; \ - IMULQ h2, t2; \ - ADDQ DX, t2; \ + MOVQ r0, AX \ + MULQ h0 \ + MOVQ AX, t0 \ + MOVQ DX, t1 \ + MOVQ r0, AX \ + MULQ h1 \ + ADDQ AX, t1 \ + ADCQ $0, DX \ + MOVQ r0, t2 \ + IMULQ h2, t2 \ + ADDQ DX, t2 \ \ - MOVQ r1, AX; \ - MULQ h0; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ DX, h0; \ - MOVQ r1, t3; \ - IMULQ h2, t3; \ - MOVQ r1, AX; \ - MULQ h1; \ - ADDQ AX, t2; \ - ADCQ DX, t3; \ - ADDQ h0, t2; \ - ADCQ $0, t3; \ + MOVQ r1, AX \ + MULQ h0 \ + ADDQ AX, t1 \ + ADCQ $0, DX \ + MOVQ DX, h0 \ + MOVQ r1, t3 \ + IMULQ h2, t3 \ + MOVQ r1, AX \ + MULQ h1 \ + ADDQ AX, t2 \ + ADCQ DX, t3 \ + ADDQ h0, t2 \ + ADCQ $0, t3 \ \ - MOVQ t0, h0; \ - MOVQ t1, h1; \ - MOVQ t2, h2; \ - ANDQ $3, h2; \ - MOVQ t2, t0; \ - ANDQ $0XFFFFFFFFFFFFFFFC, t0; \ - ADDQ t0, h0; \ - ADCQ t3, h1; \ - ADCQ $0, h2; \ - SHRQ $2, t3, t2; \ - SHRQ $2, t3; \ - ADDQ t2, h0; \ - ADCQ t3, h1; \ + MOVQ t0, h0 \ + MOVQ t1, h1 \ + MOVQ t2, h2 \ + ANDQ $3, h2 \ + MOVQ t2, t0 \ + ANDQ $0XFFFFFFFFFFFFFFFC, t0 \ + ADDQ t0, h0 \ + ADCQ t3, h1 \ + ADCQ $0, h2 \ + SHRQ $2, t3, t2 \ + SHRQ $2, t3 \ + ADDQ t2, h0 \ + ADCQ t3, h1 \ ADCQ $0, h2 +// func initialize(state *[7]uint64, key *[32]byte) +TEXT ·initialize(SB), $0-16 + MOVQ state+0(FP), DI + MOVQ key+8(FP), SI + + // state[0...7] is initialized with zero + MOVOU 0(SI), X0 + MOVOU 16(SI), X1 + MOVOU ·poly1305Mask<>(SB), X2 + PAND X2, X0 + MOVOU X0, 24(DI) + MOVOU X1, 40(DI) + RET + // func update(state *[7]uint64, msg []byte) TEXT ·update(SB), $0-32 MOVQ state+0(FP), DI @@ -111,20 +125,6 @@ DONE: MOVQ R10, 16(DI) RET -// func initialize(state *[7]uint64, key *[32]byte) -TEXT ·initialize(SB), $0-16 - MOVQ state+0(FP), DI - MOVQ key+8(FP), SI - - // state[0...7] is initialized with zero - MOVOU 0(SI), X0 - MOVOU 16(SI), X1 - MOVOU ·poly1305Mask<>(SB), X2 - PAND X2, X0 - MOVOU X0, 24(DI) - MOVOU X1, 40(DI) - RET - // func finalize(tag *[TagSize]byte, state *[7]uint64) TEXT ·finalize(SB), $0-16 MOVQ tag+0(FP), DI From d4ac586c64a1b166f0b420a02cae0bc0af3071e1 Mon Sep 17 00:00:00 2001 From: frankw Date: Tue, 25 Jul 2017 11:56:35 -0700 Subject: [PATCH 4/7] Move key definition inside test vector loop (to avoid any spil over from previous test) --- poly1305_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/poly1305_test.go b/poly1305_test.go index 692e0a1..739c2ab 100644 --- a/poly1305_test.go +++ b/poly1305_test.go @@ -56,9 +56,9 @@ var vectors = []struct { } func TestVectors(t *testing.T) { - var key [32]byte - for i, v := range vectors { + var key [32]byte + msg := v.msg copy(key[:], v.key) From 96f6aca8cc5c9a277b65322def5602e28954eb6e Mon Sep 17 00:00:00 2001 From: frankw Date: Tue, 25 Jul 2017 11:59:57 -0700 Subject: [PATCH 5/7] Initialize both key and msg for benchmark tests --- poly1305_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/poly1305_test.go b/poly1305_test.go index 739c2ab..b426cb1 100644 --- a/poly1305_test.go +++ b/poly1305_test.go @@ -133,8 +133,14 @@ func BenchmarkWrite_8K(b *testing.B) { benchmarkWrite(b, 8*1024) } func benchmarkSum(b *testing.B, size int) { var key [32]byte + for i := range key { + key[i] = byte(i << 3) + } msg := make([]byte, size) + for i := range msg { + msg[i] = byte(i) + } b.SetBytes(int64(size)) b.ResetTimer() @@ -145,9 +151,15 @@ func benchmarkSum(b *testing.B, size int) { func benchmarkWrite(b *testing.B, size int) { var key [32]byte + for i := range key { + key[i] = byte(i << 3) + } h := New(key) msg := make([]byte, size) + for i := range msg { + msg[i] = byte(i) + } b.SetBytes(int64(size)) b.ResetTimer() From 3ec29cb847a843ec57b0b9c0b04de26bf7e6fe29 Mon Sep 17 00:00:00 2001 From: frankw Date: Tue, 25 Jul 2017 12:01:08 -0700 Subject: [PATCH 6/7] Run Write test over messages of different lengths --- poly1305_test.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/poly1305_test.go b/poly1305_test.go index b426cb1..8cfe920 100644 --- a/poly1305_test.go +++ b/poly1305_test.go @@ -97,7 +97,7 @@ func TestWriteAfterSum(t *testing.T) { } } -func TestWrite(t *testing.T) { +func testWrite(t *testing.T, size int) { var key [32]byte for i := range key { key[i] = byte(i) @@ -106,7 +106,7 @@ func TestWrite(t *testing.T) { h := New(key) var msg1 []byte - msg0 := make([]byte, 64) + msg0 := make([]byte, size) for i := range msg0 { h.Write(msg0[:i]) msg1 = append(msg1, msg0[:i]...) @@ -120,6 +120,12 @@ func TestWrite(t *testing.T) { } } +func TestWrite(t *testing.T) { + + for size := 0; size < 128; size++ { + testWrite(t, size) + } +} // Benchmarks func BenchmarkSum_64(b *testing.B) { benchmarkSum(b, 64) } From 116eee5c9cf43faaa6db5b65cd963c1559b8a633 Mon Sep 17 00:00:00 2001 From: frankw Date: Tue, 25 Jul 2017 12:01:46 -0700 Subject: [PATCH 7/7] Add extra test vectors --- poly1305_test.go | 68 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/poly1305_test.go b/poly1305_test.go index 8cfe920..4304774 100644 --- a/poly1305_test.go +++ b/poly1305_test.go @@ -18,6 +18,17 @@ func fromHex(s string) []byte { return b } +var mult = []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef} +var sNul = []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} +var sSet = []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} + +var TagZeroByteMessage_1 = []byte{0x0f, 0x01, 0x23, 0x45, 0x07, 0x88, 0xab, 0xcd, 0x0f, 0x00, 0x23, 0x45, 0x07, 0x88, 0xab, 0xcd} +var TagZeroByteMessage_2 = []byte{0x0e, 0x01, 0x23, 0x45, 0x07, 0x88, 0xab, 0xcd, 0x0f, 0x00, 0x23, 0x45, 0x07, 0x88, 0xab, 0xcd} +var TagFirstBitMessage_1 = []byte{0x10, 0x24, 0x68, 0x4c, 0x8f, 0x33, 0x79, 0xdd, 0x0f, 0x23, 0x68, 0x4c, 0x8f, 0x33, 0x79, 0xdd} +var TagFirstBitMessage_2 = []byte{0x0f, 0x24, 0x68, 0x4c, 0x8f, 0x33, 0x79, 0xdd, 0x0f, 0x23, 0x68, 0x4c, 0x8f, 0x33, 0x79, 0xdd} +var TagSecndBitMessage_1 = []byte{0x11, 0x47, 0xad, 0x53, 0x17, 0xdf, 0x46, 0xed, 0x0f, 0x46, 0xad, 0x53, 0x17, 0xdf, 0x46, 0xed} +var TagSecndBitMessage_2 = []byte{0x10, 0x47, 0xad, 0x53, 0x17, 0xdf, 0x46, 0xed, 0x0f, 0x46, 0xad, 0x53, 0x17, 0xdf, 0x46, 0xed} + var vectors = []struct { msg, key, tag []byte }{ @@ -36,11 +47,66 @@ var vectors = []struct { []byte("this is 32-byte key for Poly1305"), []byte{0xda, 0x84, 0xbc, 0xab, 0x02, 0x67, 0x6c, 0x38, 0xcd, 0xb0, 0x15, 0x60, 0x42, 0x74, 0xc2, 0xaa}, }, + // empty key results in empty tag irrespective of message contents { - make([]byte, 2007), + make([]byte, 0), + make([]byte, 32), + make([]byte, 16), + }, + { + make([]byte, 1), make([]byte, 32), make([]byte, 16), }, + { + []byte("Hello world!"), + make([]byte, 32), + make([]byte, 16), + }, + // zero length message + { + []byte{}, + append(mult, sNul...), // as long as S-part is zero, empty result + make([]byte, 16), + }, + { + []byte{}, + append(mult, sSet...), // when S-part set, get XOR-ed result + sSet, + }, + // single zero byte + { + []byte{0x00}, + append(mult, sNul...), + TagZeroByteMessage_1, + }, + { + []byte{0x00}, + append(mult, sSet...), + TagZeroByteMessage_2, + }, + // single byte with first bit set + { + []byte{0x01}, + append(mult, sNul...), + TagFirstBitMessage_1, + }, + { + []byte{0x01}, + append(mult, sSet...), + TagFirstBitMessage_2, + }, + // single byte with second bit set + { + []byte{0x02}, + append(mult, sNul...), + TagSecndBitMessage_1, + }, + { + []byte{0x02}, + append(mult, sSet...), + TagSecndBitMessage_2, + }, { // This test triggers an edge-case. See https://go-review.googlesource.com/#/c/30101/. []byte{0x81, 0xd8, 0xb2, 0xe4, 0x6a, 0x25, 0x21, 0x3b, 0x58, 0xfe, 0xe4, 0x21, 0x3a, 0x2a, 0x28, 0xe9, 0x21, 0xc1, 0x2a, 0x96, 0x32, 0x51, 0x6d, 0x3b, 0x73, 0x27, 0x27, 0x27, 0xbe, 0xcf, 0x21, 0x29},