From e45dd6ee26c5d80d06d396c64166597ad88dee2e Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 6 Apr 2022 10:23:05 +0200 Subject: [PATCH] zstd: Use precise literal copy Literals are not over-allocated, so they must (for now) be copied exactly. Fixes #552 --- zstd/_generate/gen.go | 3 ++- zstd/seqdec_amd64.s | 39 ++++++++++++++++++++++++++++++----- zstd/testdata/regression.zip | Bin 1463447 -> 1467891 bytes 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/zstd/_generate/gen.go b/zstd/_generate/gen.go index f1bc83afb7..a42aa65b90 100644 --- a/zstd/_generate/gen.go +++ b/zstd/_generate/gen.go @@ -674,7 +674,8 @@ func (e executeSimple) generateProcedure(name string) { { TESTQ(ll, ll) JZ(LabelRef("check_offset")) - e.copyMemory("1", literals, outBase, ll) + // TODO: Investigate if it is possible to consistently overallocate literals. + e.copyMemoryPrecise("1", literals, outBase, ll) ADDQ(ll, literals) ADDQ(ll, outBase) diff --git a/zstd/seqdec_amd64.s b/zstd/seqdec_amd64.s index bb9bf794f3..e12bece2e6 100644 --- a/zstd/seqdec_amd64.s +++ b/zstd/seqdec_amd64.s @@ -1116,16 +1116,45 @@ main_loop: TESTQ R13, R13 JZ check_offset XORQ R14, R14 + TESTQ $0x00000001, R13 + JZ copy_1_word + MOVB (SI)(R14*1), R15 + MOVB R15, (BX)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, R13 + JZ copy_1_dword + MOVW (SI)(R14*1), R15 + MOVW R15, (BX)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, R13 + JZ copy_1_qword + MOVL (SI)(R14*1), R15 + MOVL R15, (BX)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, R13 + JZ copy_1_test + MOVQ (SI)(R14*1), R15 + MOVQ R15, (BX)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test copy_1: MOVUPS (SI)(R14*1), X0 MOVUPS X0, (BX)(R14*1) ADDQ $0x10, R14 - CMPQ R14, R13 - JB copy_1 - ADDQ R13, SI - ADDQ R13, BX - ADDQ R13, R8 + +copy_1_test: + CMPQ R14, R13 + JB copy_1 + ADDQ R13, SI + ADDQ R13, BX + ADDQ R13, R8 // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) check_offset: diff --git a/zstd/testdata/regression.zip b/zstd/testdata/regression.zip index 794c3e73529ee02771ec75fadbd059190eb4f1b9..37e553728a71fd140757fd5b8cab3e108a3e221c 100644 GIT binary patch delta 4557 zcma)=cQ@M)z{O+MsuZDS?b@ofw<=15+PhYbqV}HOB192;#AwkVwo2?>TNNEver9b& zWAE|wc>&Kk&%NjVanHH;4Scf8%@0S+-@yoAgfJo)F^mL83cCX%gOS51V3aT_*j*Sk z3=F#mqk+-F=wJ{SeLX{q7C=M{0TF`8KvExLjCQB_tGe%jK)?Tm9|QuumX>sqm6CCA zlynexkdUx{@><5x@wK$QgtWMGw`J8LzW%fUYiUO`^CQ{U3*HrmsVeju8us_zfH2Okvz}{BY!O`!LFH z$Xf65M-V8*z0taUdmCN=dB|PcbH{l-a=^ol^Ss0^jP&V(?igc)$|VuCcU5^=gzB6K z9ZnfMY&%Bz7VejbPKQ5ViNy{!>yq#qA12h3WLBu(cJqC^6k+pHxbVrFZ)h&tGRCKj zJAci8{Nc>>#c>cIvvXDvs6i1YA9%1XCU0-2B&&f2pX_QaOc>qY;MP(|KO&zHeveoV zpAPAO1V^Lu`Mv=Bv1yez{vF~DaHTEB2K`?MTT|rNZlLHA=a2o7N3`jYy~@H6q^Ju* z{x$oJ;(64=Bw2z^kLp^T!iDnR#_*gc3}6$9nm6sQA}WEO^Hs&qH;#7dnTS0_0L;uIxSEW*R8SPm|NNM;^``$4ReIGG~CT zA)}8v8j*YYRoJSWWC_*C{IjVXbYxp)=9h0;ZrhmahuGCnhl-W{o8?_Lh?6 zFGv*&ij6J02OhIz9Q9sF^jbDl@EQ8Eg~AQ67rqSpMsFHA?BIBB=P%(eF!Sb|8gr=a z4k0YQMRHMJ{f^7ZvRk8vN-$DD5&Gcef3;M6yi%@_Y%7_nO@397JyhF6S-5E2)Ci7z z9B{H!-)H&FZjeGZI*|EO$jpg@N4($>=S)?0og`hswC~*#WT@Tx&d5Oc#pZ@>zW0wY zyCAb6r_P0HKC@c#K90MC?AfYyKLsTTM#^7=NS@W)yx<6pa2#Xg;tvULogBe!=+rS4 zY;3v`PtL}DU%$5Il$uixHasOiB#xYD)B-N9!_VURgNM`D5ItN-IBkIDH<70 z@&WHHmPQ>?Nl)BRxMr*$nHt|rtp>iSs{f=?C>(QdN)NJ>Bl+7{D;XaB`Ji+FBzK3n#||iT{7osRWAu2vrI9o=>Ma*5yoh zHGcz;%eq7B6^;)S)=r6WQCO^zwNH2Lr9h}@Taz}~vsaPNPk4@-Np&h)Xi%fB^wr*> zvDWJ}APvoSal3_)#YRl2P0~xRxj&FspPHf|iV+KiIQoXE%raPhgcgcO7iE4DU1Dzj znTI}(MvOmWxN@j=^ylhVASkCxnfijgF_jiO0xV^7>PsvHpGY z;z|6T`XpoS`U)=6>MhS51j7yu^d#*ro2+z;4cZC!2x4e}oBZ8$E!NQZlr#OQ0wF8J zADrED@!4vC!g6Ni?;!q#k8MMXtVwgPfsGJK##Ol2Qz}IMijThIrdA=(k z;Qe2?=tYUxKt-p)8>7u6!X;gsc2?Fx9jJOg^iw2d9Th3mag!`@(_#2odYX1PlSi_F zz^2~wk_06&$??=h{Cmuk>rLKtw~ZgEgO;O+A8ZHBQEO%UeDCOe5ZVe)ml|ry2YMcB)vXYzZ^KmDy>dwuk`;v7hx+EpW0pfO6y1&;U1KJKNjy3MW$A5}?w8r5} z9~Q){=5lx)j`AHJ$9uTlv)L)sMzjZ^=E_PR=obmrYFi5f~<*^8$whdhIW9EmRuyc=jRDOrU%YDO5;meo8WA;~<^E`(s$p$U_ z*iQm7-se0(SxZ33lpk}=8?wtL%TtH$82T=ka!Duo0eTqZ<0R55OE=DFB_qu5Gp4%2 z*W)4ipK<+a;{fvSOXP^SIHxjoa60ZC82q+;I>oe>n%`vOL*7XLDMy!b!pjk!Fh+4s zT4#Osj5IdxSS?qm*_Oj**F{$N{m6b_w`*j&=*po(s8!ER z+UgsOQ_68)XeO-HoBBiI9`lzIXbv{v^r&jTkl24VdRR{y(Y@5KTFo9qIdygBbS}2C zZNP5z#6A9DzqhZ7TaeehV>N)T%$S8eV&8GM3YLcnE3Zo2OGvgTbyJ{O4=dqpsm-Uc z`{DvvP<5J1ah#hmYDsMMSh?{I(cyqB%MPA>12!S|yGQMYZ#|c`+M2t*T4Q?cil3t! zgvLlp=7_jMq}F2iLHi2ZsBf7*R8f=t?}C;{!oy0ZG!4i?S}2<-eVTSNB#WxAMIG{% zqT|5^=#Jq|@p_B>h*O_5!5T0}f z8RG2It@z4yZadtC8*#onKz`>dhEaA1tAC<;;V0{;AK%7Nu1DH?r}y&$##vwN=`)}x z^z>N~%ci;Ply1U9+QeE+EEuG^&7+^t}%2qed>T028cI zgqqjlH#^AC&5Q{O=5&*)S*63ZB;Y%kL*^ssc{ALHz1Wv^PTJe}>q(=#Y{j^DM3r|I zwWX>VknusUpz}{hUsc<){z`?Anj}Fe|1Bzs>M06t-f-=luRbowsh#zr-vW3$0Y!#_ zl`r+L4oZqUS&&8JUCaFZ_TB{1`N&65ml(#u*g%RcC6{IkywkS@<6JNM4AV5^)f#ff z`)kheVI;LK2R_b&U#xvI|M95zL4uE*%F|WU_oZ6I;7Xw!%wUIVoc#-x{2mYb0)3At zD`%Qf{`sd_YV0xB#`KiZGXOEU7Oi&BMP1~xtl0Q1FQs-hD>FB>tc!H zSSneBFs=K((C~o5oOq%CYn>|gFpl2?;SoPm2|I8PUpE|J7emwG*Q?cSmCAc96M|@v zh>k_tkB%YbZ}{F>9!il|;t)A)X{TNbt!&g>$3cwIM^_7(1H&i#Ye2~C&(9>Ujub93 zHtf@95DoO}&Fe`0I&2=U{hd_E!GEpWq3Xv2F>C72h-yOiK+RRb@ z$f?mRgBCQSyacc#WWWQwf_-t!ai)FB7tbS;pn5({-tU`HR}sgub{@)voMm`a`ufRI zxoCmad%68)cbdgfe6iw!rt0$*M|Xn(ymU3Rw||`D{`uRNgCYZmKQ&|+9aTGxRK|>m zv-CdIA|1jz+vW*#n>bAGMyCFCEsOdBk$8G=@fKQdMtb7DsSFepaBOd6<5%wuG`~%h zQI<4=6av3Rk(3vB!shI{L<6FC@dbZ{4q)E=U$a0KVh3!zdsCjYA1X1{= zJDkj^EIZ3W23=hbnfE+)s-%v;wH9Y{6Hffa|1Kwg~qQy##vrNNCM;M`L&` znBei%?)I!QoW%OoC?Wvt+)O;L;4BQu*F;IQ)Q5m&C-SjS=dV-y9^4Q6BT(F?VEBYh z@a{yoAA`s>cnUrx^X#CL(1j-Z0jti4ab@LZCq64)ZNmGA4sOAz0|ig0uQCR z>@aX3g6z+DS{6~J#QVQ8!IJ^U`^|Slnx?zMX$Bv{?5B%+%2c$xA5gv{`ux-1Q=y=k zf`~E?qdkP7BlfSR8%>Dr%lA9Q`?G4m>&Gpg@2mO|dl0xMnn>5N*=Kb{jHY@9XUSks zel#j79_6MBi<_7{&2{rDoMKke9L~9l)u}uV1~>*A*V`wF*B=6>l{;6U#onUY#{o^&)>***j^%>HG=(Mn$C` z@a5jJq_nCS5=`F7gplz)wp4x;D$za1yY2i7{m3_XR<}-ADL+S0rIv}KKWyHT_o*#D z?Sea0AIq&P!aDJl2&vP=9a0sq|XSqz9s5a!NxR7)_daQtusUYn0a;2Zo+N zRc*~}6jnwq>nHC{ifQk>(s4u+KzVR(b$MRI2`9d~2in0w4l|wlB|hA311hr=Fvit; z<{bq+KoWuN+Gp2mA~q(+2keFmFf~=ywT~jd`E$rbI)-vK*x7jOQVzFjCm8?_bHU4s zChhtuDwS@M@Mo49n28i>GZg`Cg-(J`DKjVW1kAKOxLmgWHls8;IO4I3l<=flnDK@> zT9mfN^SDjxKBqvgQEti0{h;rO!c8ce!k%K7%9}A2N2Q?*w2kKht>K$)i@0Qhc$I2) z16@qsNmZIgG#o+*L;x%ImoqNDpDdkXYosQ;a=ug(6yxHT63JY5f&Teq; za$zr?C0AG>+7prbQ8u2d)%_xaf+GQ>s*3RlQp*-kp0LOlQ|S1!gn0E8)SrD0sdc=tIrOvlo|Gwo0_y3zEQXm5JEuzH!k@{O#X@EU;eHC Z-|$2aN<#X-Kdyh~pag+BEExW^{{b2m%ZdO1 delta 80 zcmex-I&%8T$c7fi7N!>F7M2#)7Pc1l7LFFq7OocV7M>Q~7QPn#7J(MQ7NHj57LgXw gEn+)8x9^VaZi-BRgrx=h206Df6y#N3J