diff --git a/zstd/_generate/gen.go b/zstd/_generate/gen.go index a3a5c4fcce..e7736aacaa 100644 --- a/zstd/_generate/gen.go +++ b/zstd/_generate/gen.go @@ -34,6 +34,9 @@ const errorMatchOffTooBig = 3 // error reported when the sum of literal lengths exeeceds the literal buffer size const errorNotEnoughLiterals = 4 +// error reported when capacity of `out` is too small +const errorNotEnoughSpace = 5 + const maxMatchLen = 131074 // size of struct seqVals @@ -199,6 +202,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute ec.llPtr = llP ec.outBase = GP64() + ec.outEndPtr = AllocLocal(8) ec.literals = GP64() ec.outPosition = GP64() ec.histLenPtr = AllocLocal(8) @@ -212,6 +216,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute } Load(ctx.Field("out").Base(), ec.outBase) + loadField(ctx.Field("out").Cap(), ec.outEndPtr) Load(ctx.Field("literals").Base(), ec.literals) Load(ctx.Field("outPosition"), ec.outPosition) loadField(ctx.Field("windowSize"), ec.windowSizePtr) @@ -224,8 +229,12 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute ADDQ(tmp, ec.histBasePtr) // Note: we always copy from &hist[len(hist) - v] } + Comment("Calculate poiter to s.out[cap(s.out)] (a past-end pointer)") + ADDQ(ec.outBase, ec.outEndPtr) + Comment("outBase += outPosition") ADDQ(ec.outPosition, ec.outBase) + } } @@ -465,6 +474,22 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute o.returnWithCode(errorNotEnoughLiterals) } + + Comment("Return with not enough output space error") + { + Label("error_not_enough_space") + if !o.useSeqs { + ctx := Dereference(Param("ctx")) + tmp := GP64() + MOVQ(llP, tmp) + Store(tmp, ctx.Field("ll")) + MOVQ(mlP, tmp) + Store(tmp, ctx.Field("ml")) + Store(ec.outPosition, ctx.Field("outPosition")) + } + + o.returnWithCode(errorNotEnoughSpace) + } } func (o options) returnWithCode(returnCode uint32) { @@ -1000,6 +1025,7 @@ type executeSingleTripleContext struct { windowSize reg.GPVirtual // values used when useSeqs is false + outEndPtr Mem // pointer to s.out[cap(s.out)] histBasePtr Mem histLenPtr Mem windowSizePtr Mem @@ -1015,6 +1041,18 @@ func (e executeSimple) executeSingleTriple(c *executeSingleTripleContext, handle ml := GP64() MOVQ(c.mlPtr, ml) + if !e.useSeqs { + Comment("Check if we have enough space in s.out") + { + // baseAfterCopy = ll + ml + c.outBese + baseAfterCopy := GP64() + LEAQ(Mem{Base: ll, Index: ml, Scale: 1}, baseAfterCopy) + ADDQ(c.outBase, baseAfterCopy) + CMPQ(baseAfterCopy, c.outEndPtr) + JAE(LabelRef("error_not_enough_space")) + } + } + Comment("Copy literals") Label("copy_literals") { diff --git a/zstd/seqdec_amd64.go b/zstd/seqdec_amd64.go index 53fa67d521..04c7babac4 100644 --- a/zstd/seqdec_amd64.go +++ b/zstd/seqdec_amd64.go @@ -121,6 +121,10 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ctx.ll, ctx.litRemain+ctx.ll) + case errorNotEnoughSpace: + size := ctx.outPosition + ctx.ll + ctx.ml + return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize) + default: return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) } @@ -173,12 +177,15 @@ const errorMatchLenOfsMismatch = 1 // error reported when ml > maxMatchLen const errorMatchLenTooBig = 2 -// error reported when mo > t or mo > s.windowSize +// error reported when mo > available history or mo > s.windowSize const errorMatchOffTooBig = 3 // error reported when the sum of literal lengths exeeceds the literal buffer size const errorNotEnoughLiterals = 4 +// error reported when capacity of `out` is too small +const errorNotEnoughSpace = 5 + // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. // // Please refer to seqdec_generic.go for the reference implementation. diff --git a/zstd/seqdec_amd64.s b/zstd/seqdec_amd64.s index f8587e4a3b..33da588815 100644 --- a/zstd/seqdec_amd64.s +++ b/zstd/seqdec_amd64.s @@ -306,6 +306,10 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 @@ -584,6 +588,10 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 @@ -865,6 +873,10 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 @@ -1121,6 +1133,10 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool // Requires: SSE TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 @@ -1374,7 +1390,7 @@ empty_seqs: // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE -TEXT ·sequenceDecs_decodeSync_amd64(SB), $56-32 +TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ br+8(FP), AX MOVQ 32(AX), DX MOVBQZX 40(AX), BX @@ -1387,16 +1403,21 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $56-32 MOVQ 80(AX), R8 MOVQ 88(AX), R9 MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) MOVQ 144(AX), R11 MOVQ 136(AX), R12 MOVQ 200(AX), CX - MOVQ CX, 48(SP) + MOVQ CX, 56(SP) MOVQ 176(AX), CX - MOVQ CX, 40(SP) + MOVQ CX, 48(SP) MOVQ 184(AX), AX - MOVQ AX, 32(SP) - MOVQ 32(SP), AX - ADDQ AX, 40(SP) + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) // outBase += outPosition ADDQ R12, R10 @@ -1642,6 +1663,12 @@ sequenceDecs_decodeSync_amd64_match_len_ofs_ok: MOVQ 8(SP), CX MOVQ 16(SP), R13 + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JAE error_not_enough_space + // Copy literals TESTQ AX, AX JZ check_offset @@ -1689,17 +1716,17 @@ copy_1_test: // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) check_offset: MOVQ R12, AX - ADDQ 32(SP), AX + ADDQ 40(SP), AX CMPQ CX, AX JG error_match_off_too_big - CMPQ CX, 48(SP) + CMPQ CX, 56(SP) JG error_match_off_too_big // Copy match from history MOVQ CX, AX SUBQ R12, AX JLS copy_match - MOVQ 40(SP), R14 + MOVQ 48(SP), R14 SUBQ AX, R14 CMPQ R13, AX JGE copy_all_from_history @@ -1879,9 +1906,19 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000005, ret+24(FP) + RET + // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE -TEXT ·sequenceDecs_decodeSync_bmi2(SB), $56-32 +TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ br+8(FP), CX MOVQ 32(CX), AX MOVBQZX 40(CX), DX @@ -1894,16 +1931,21 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $56-32 MOVQ 80(CX), DI MOVQ 88(CX), R8 MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) MOVQ 144(CX), R10 MOVQ 136(CX), R11 MOVQ 200(CX), R12 - MOVQ R12, 48(SP) + MOVQ R12, 56(SP) MOVQ 176(CX), R12 - MOVQ R12, 40(SP) + MOVQ R12, 48(SP) MOVQ 184(CX), CX - MOVQ CX, 32(SP) - MOVQ 32(SP), CX - ADDQ CX, 40(SP) + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) // outBase += outPosition ADDQ R11, R9 @@ -2127,6 +2169,12 @@ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok: MOVQ 8(SP), R12 MOVQ 16(SP), R13 + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JAE error_not_enough_space + // Copy literals TESTQ CX, CX JZ check_offset @@ -2174,17 +2222,17 @@ copy_1_test: // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) check_offset: MOVQ R11, CX - ADDQ 32(SP), CX + ADDQ 40(SP), CX CMPQ R12, CX JG error_match_off_too_big - CMPQ R12, 48(SP) + CMPQ R12, 56(SP) JG error_match_off_too_big // Copy match from history MOVQ R12, CX SUBQ R11, CX JLS copy_match - MOVQ 40(SP), R14 + MOVQ 48(SP), R14 SUBQ CX, R14 CMPQ R13, CX JGE copy_all_from_history @@ -2364,9 +2412,19 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000005, ret+24(FP) + RET + // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE -TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $56-32 +TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ br+8(FP), AX MOVQ 32(AX), DX MOVBQZX 40(AX), BX @@ -2379,16 +2437,21 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $56-32 MOVQ 80(AX), R8 MOVQ 88(AX), R9 MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) MOVQ 144(AX), R11 MOVQ 136(AX), R12 MOVQ 200(AX), CX - MOVQ CX, 48(SP) + MOVQ CX, 56(SP) MOVQ 176(AX), CX - MOVQ CX, 40(SP) + MOVQ CX, 48(SP) MOVQ 184(AX), AX - MOVQ AX, 32(SP) - MOVQ 32(SP), AX - ADDQ AX, 40(SP) + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) // outBase += outPosition ADDQ R12, R10 @@ -2634,6 +2697,12 @@ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok: MOVQ 8(SP), CX MOVQ 16(SP), R13 + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JAE error_not_enough_space + // Copy literals TESTQ AX, AX JZ check_offset @@ -2681,17 +2750,17 @@ copy_1_test: // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) check_offset: MOVQ R12, AX - ADDQ 32(SP), AX + ADDQ 40(SP), AX CMPQ CX, AX JG error_match_off_too_big - CMPQ CX, 48(SP) + CMPQ CX, 56(SP) JG error_match_off_too_big // Copy match from history MOVQ CX, AX SUBQ R12, AX JLS copy_match - MOVQ 40(SP), R14 + MOVQ 48(SP), R14 SUBQ AX, R14 CMPQ R13, AX JGE copy_all_from_history @@ -2900,9 +2969,19 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000005, ret+24(FP) + RET + // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE -TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $56-32 +TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ br+8(FP), CX MOVQ 32(CX), AX MOVBQZX 40(CX), DX @@ -2915,16 +2994,21 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $56-32 MOVQ 80(CX), DI MOVQ 88(CX), R8 MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) MOVQ 144(CX), R10 MOVQ 136(CX), R11 MOVQ 200(CX), R12 - MOVQ R12, 48(SP) + MOVQ R12, 56(SP) MOVQ 176(CX), R12 - MOVQ R12, 40(SP) + MOVQ R12, 48(SP) MOVQ 184(CX), CX - MOVQ CX, 32(SP) - MOVQ 32(SP), CX - ADDQ CX, 40(SP) + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) // outBase += outPosition ADDQ R11, R9 @@ -3148,6 +3232,12 @@ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok: MOVQ 8(SP), R12 MOVQ 16(SP), R13 + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JAE error_not_enough_space + // Copy literals TESTQ CX, CX JZ check_offset @@ -3195,17 +3285,17 @@ copy_1_test: // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) check_offset: MOVQ R11, CX - ADDQ 32(SP), CX + ADDQ 40(SP), CX CMPQ R12, CX JG error_match_off_too_big - CMPQ R12, 48(SP) + CMPQ R12, 56(SP) JG error_match_off_too_big // Copy match from history MOVQ R12, CX SUBQ R11, CX JLS copy_match - MOVQ 40(SP), R14 + MOVQ 48(SP), R14 SUBQ CX, R14 CMPQ R13, CX JGE copy_all_from_history @@ -3413,3 +3503,13 @@ error_not_enough_literals: MOVQ CX, 208(AX) MOVQ $0x00000004, ret+24(FP) RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000005, ret+24(FP) + RET