Skip to content

Commit

Permalink
feat:(decoder) support skip mismatche-typed value
Browse files Browse the repository at this point in the history
  • Loading branch information
AsterDY committed Oct 31, 2022
1 parent 78c2ade commit c37035b
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 22 deletions.
89 changes: 82 additions & 7 deletions decoder/assembler_amd64_go117.go
Expand Up @@ -70,7 +70,7 @@ const (
_FP_args = 72 // 72 bytes to pass and spill register arguements
_FP_fargs = 80 // 80 bytes for passing arguments to other Go functions
_FP_saves = 48 // 48 bytes for saving the registers before CALL instructions
_FP_locals = 120 // 112 bytes for local variables
_FP_locals = 136 // 136 bytes for local variables
)

const (
Expand Down Expand Up @@ -101,6 +101,7 @@ const (
_LB_unquote_error = "_unquote_error"
_LB_parsing_error = "_parsing_error"
_LB_parsing_error_v = "_parsing_error_v"
_LB_mismatch_error = "_mismatch_error"
)

const (
Expand Down Expand Up @@ -191,6 +192,11 @@ var (

var _VAR_fl = jit.Ptr(_SP, _FP_fargs + _FP_saves + 112)

var (
_VAR_et = jit.Ptr(_SP, _FP_fargs + _FP_saves + 120) // save dismatched type
_VAR_ic = jit.Ptr(_SP, _FP_fargs + _FP_saves + 128) // save dismatched position
)

type _Assembler struct {
jit.BaseAssembler
p _Program
Expand Down Expand Up @@ -220,6 +226,7 @@ func (self *_Assembler) compile() {
self.escape_string()
self.escape_string_twice()
self.type_error()
self.mismatch_error()
self.field_error()
self.range_error()
self.stack_error()
Expand Down Expand Up @@ -290,6 +297,11 @@ var _OpFuncTab = [256]func(*_Assembler, *_Instr) {
_OP_recurse : (*_Assembler)._asm_OP_recurse,
_OP_goto : (*_Assembler)._asm_OP_goto,
_OP_switch : (*_Assembler)._asm_OP_switch,
_OP_check_bool : (*_Assembler)._asm_OP_check_bool,
_OP_check_bytes : (*_Assembler)._asm_OP_check_bytes,
_OP_check_num : (*_Assembler)._asm_OP_check_num,
_OP_check_char_0 : (*_Assembler)._asm_OP_check_char_0,
_OP_dismatch_err : (*_Assembler)._asm_OP_dismatch_err,
}

func (self *_Assembler) instr(v *_Instr) {
Expand All @@ -310,9 +322,12 @@ func (self *_Assembler) instrs() {

func (self *_Assembler) epilogue() {
self.Mark(len(self.p))
self.Emit("XORL", _ET, _ET) // XORL ET, ET
self.Emit("XORL", _EP, _EP) // XORL EP, EP
self.Emit("MOVQ", _VAR_et, _ET) // MOVQ VAR_et, ET
self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET
self.Sjmp("JNZ", _LB_mismatch_error) // JNZ _LB_mismatch_error
self.Link(_LB_error) // _error:
// self.Byte(0xcc)
self.Emit("MOVQ", _EP, _CX) // MOVQ BX, CX
self.Emit("MOVQ", _ET, _BX) // MOVQ AX, BX
self.Emit("MOVQ", _IC, _AX) // MOVQ IC, AX
Expand Down Expand Up @@ -343,6 +358,8 @@ func (self *_Assembler) prologue() {
self.Emit("MOVQ", jit.Imm(0), _VAR_sv_p) // MOVQ $0, sv.p<>+48(FP)
self.Emit("MOVQ", jit.Imm(0), _VAR_sv_n) // MOVQ $0, sv.n<>+56(FP)
self.Emit("MOVQ", jit.Imm(0), _VAR_vk) // MOVQ $0, vk<>+64(FP)
self.Emit("MOVQ", jit.Imm(0), _VAR_et) // MOVQ $0, et<>+120(FP)
self.Emit("MOVQ", jit.Imm(0), _VAR_ic) // MOVQ $0, et<>+128(FP)
// initialize digital buffer first
self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_st_Dc) // MOVQ $_MaxDigitNums, ss.Dcap
self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX
Expand Down Expand Up @@ -421,11 +438,12 @@ func (self *_Assembler) call_vf(fn obj.Addr) {
/** Assembler Error Handlers **/

var (
_F_convT64 = jit.Func(convT64)
_F_error_wrap = jit.Func(error_wrap)
_F_error_type = jit.Func(error_type)
_F_error_field = jit.Func(error_field)
_F_error_value = jit.Func(error_value)
_F_convT64 = jit.Func(convT64)
_F_error_wrap = jit.Func(error_wrap)
_F_error_type = jit.Func(error_type)
_F_error_field = jit.Func(error_field)
_F_error_value = jit.Func(error_value)
_F_error_mismatch = jit.Func(error_mismatch)
)

var (
Expand Down Expand Up @@ -454,6 +472,16 @@ func (self *_Assembler) type_error() {
self.Sjmp("JMP" , _LB_error) // JMP _error
}

func (self *_Assembler) mismatch_error() {
self.Link(_LB_mismatch_error) // _type_error:
self.Emit("MOVQ", _ARG_sp, _AX)
self.Emit("MOVQ", _ARG_sl, _BX)
self.Emit("MOVQ", _VAR_ic, _CX)
self.Emit("MOVQ", _VAR_et, _DI)
self.call_go(_F_error_mismatch) // CALL_GO error_type
self.Sjmp("JMP" , _LB_error) // JMP _error
}

func (self *_Assembler) field_error() {
self.Link(_LB_field_error) // _field_error:
self.Emit("MOVQ", _VAR_sv_p, _AX) // MOVQ sv.p, AX
Expand Down Expand Up @@ -1616,6 +1644,53 @@ func (self *_Assembler) _asm_OP_check_char(p *_Instr) {
self.Xjmp("JE" , p.vi()) // JE {p.vi()}
}

func (self *_Assembler) _asm_OP_check_char_0(p *_Instr) {
self.check_eof(1)
self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm(int64(p.vb()))) // CMPB (IP)(IC), ${p.vb()}
self.Xjmp("JE" , p.vi()) // JE {p.vi()}
}

func (self *_Assembler) _asm_OP_check_bool(p *_Instr) {
self.check_eof(1)
self.Emit("MOVBLZX", jit.Sib(_IP, _IC, 1, 0), _AX)
self.Emit("CMPB", _AX, jit.Imm(int64('f')))
self.Xjmp("JE" , p.vi())
self.Emit("CMPB", _AX, jit.Imm(int64('t')))
self.Xjmp("JE" , p.vi())
}

func (self *_Assembler) _asm_OP_check_bytes(p *_Instr) {
self.check_eof(1)
self.Emit("MOVBLZX", jit.Sib(_IP, _IC, 1, 0), _AX)
self.Emit("CMPB", _AX, jit.Imm(int64('"')))
self.Xjmp("JE" , p.vi())
self.Emit("CMPB", _AX, jit.Imm(int64('[')))
self.Xjmp("JE" , p.vi())
}

func (self *_Assembler) _asm_OP_check_num(p *_Instr) {
self.check_eof(1)
self.Emit("MOVBLZX", jit.Sib(_IP, _IC, 1, 0), _AX)
b := p.vb()
if b == 2 { // json.Number
self.Emit("CMPB", _AX, jit.Imm(int64('"')))
self.Xjmp("JE" , p.vi())
}
if b == 1 { // negative number
self.Emit("CMPB", _AX, jit.Imm(int64('-')))
self.Xjmp("JE" , p.vi())
}
self.Emit("LEAL", jit.Ptr(_AX, -int64('0')), _CX)
self.Emit("CMPB", _CX, jit.Imm(int64('9'-'0')))
self.Xjmp("JLS" , p.vi())
}

func (self *_Assembler) _asm_OP_dismatch_err(p *_Instr) {
self.Emit("MOVQ", _IC, _VAR_ic)
self.Emit("MOVQ", jit.Type(p.vt()), _AX)
self.Emit("MOVQ", _AX, _VAR_et)
}

func (self *_Assembler) _asm_OP_load(_ *_Instr) {
self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX
self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, 0), _VP) // MOVQ (ST)(AX), VP
Expand Down
67 changes: 54 additions & 13 deletions decoder/assembler_test.go
Expand Up @@ -12,25 +12,66 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
*/

package decoder

import (
`encoding/base64`
`encoding/json`
`reflect`
`testing`
`unsafe`

`github.com/bytedance/sonic/internal/caching`
`github.com/bytedance/sonic/internal/jit`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
`github.com/stretchr/testify/assert`
`github.com/stretchr/testify/require`
"encoding/base64"
"encoding/json"
"reflect"
"strings"
"testing"
"unsafe"

"github.com/bytedance/sonic/internal/caching"
"github.com/bytedance/sonic/internal/jit"
"github.com/bytedance/sonic/internal/native/types"
"github.com/bytedance/sonic/internal/rt"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestSkipError(t *testing.T) {
println("TestSkipError")
type skiptype struct {
A int `json:"a"`
B string `json:"b"`

Pass int `json:"pass"`

C struct{
D struct{
E float32 `json:"e"`
} `json:"d"`

Pass int `json:"pass"`

} `json:"c"`
E bool `json:"e"`
F []int `json:"f"`
G map[string]int `json:"g"`

Pass2 int `json:"pass2"`
}
var obj, obj2 = &skiptype{}, &skiptype{}
var data = `{"a":"","b":1,"c":{"d":true,"pass":1},"e":{},"f":"","g":[],"pass":1,"pass2":1}`
d := NewDecoder(data)
err := d.Decode(obj)
// println("decoder out: ", err.Error())
err2 := json.Unmarshal([]byte(data), obj2)
assert.Equal(t, err2 == nil, err == nil)
// assert.Equal(t, len(data), d.i)
assert.Equal(t, obj2, obj)
if te, ok := err.(*MismatchTypeError); ok {
assert.Equal(t, reflect.TypeOf(obj.G), te.Type)
assert.Equal(t, strings.Index(data, `"g":[`)+4, te.Pos)
println(err.Error())
} else {
t.Fatal("invalid error")
}
}

func TestAssembler_PrologueAndEpilogue(t *testing.T) {
a := newAssembler(nil)
_, e := a.Load()("", 0, nil, nil, 0, "", nil)
Expand Down
54 changes: 54 additions & 0 deletions decoder/compiler.go
Expand Up @@ -94,6 +94,11 @@ const (
_OP_recurse
_OP_goto
_OP_switch
_OP_check_bool
_OP_check_bytes
_OP_check_num
_OP_check_char_0
_OP_dismatch_err
)

const (
Expand Down Expand Up @@ -165,6 +170,11 @@ var _OpNames = [256]string {
_OP_recurse : "recurse",
_OP_goto : "goto",
_OP_switch : "switch",
_OP_check_bool : "check_bool",
_OP_check_bytes : "check_bytes",
_OP_check_num : "check_num",
_OP_check_char_0 : "check_char_0",
_OP_dismatch_err : "dismatch_err",
}

func (self _Op) String() string {
Expand Down Expand Up @@ -559,6 +569,8 @@ func (self *_Compiler) compileOne(p *_Program, sp int, vt reflect.Type) {
}

func (self *_Compiler) compileOps(p *_Program, sp int, vt reflect.Type) {
// check first char mathes the type
skip := self.checkType(p, vt)
switch vt.Kind() {
case reflect.Bool : self.compilePrimitive (p, _OP_bool)
case reflect.Int : self.compilePrimitive (p, _OP_int())
Expand All @@ -583,6 +595,9 @@ func (self *_Compiler) compileOps(p *_Program, sp int, vt reflect.Type) {
case reflect.Struct : self.compileStruct (p, sp, vt)
default : panic (&json.UnmarshalTypeError{Type: vt})
}
if skip >= 0 {
p.pin(skip)
}
}

func (self *_Compiler) compileMap(p *_Program, sp int, vt reflect.Type) {
Expand Down Expand Up @@ -893,6 +908,45 @@ end_of_object:
p.pin(n)
}

func (self *_Compiler) checkType(p *_Program, vt reflect.Type) int {
if k := vt.Kind(); k == reflect.Ptr {
return self.checkType(p, vt.Elem())
} else if k == reflect.Interface {
return -1
} else {
x := p.pc()
switch vt.Kind() {
case reflect.Bool : p.add(_OP_check_bool)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Float32, reflect.Float64:
p.chr(_OP_check_num, 1)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
p.chr(_OP_check_num, 0)
case reflect.String :
if vt == jsonNumberType {
p.chr(_OP_check_num, 2)
}else {
p.chr(_OP_check_char_0, '"')
}
case reflect.Array : p.chr(_OP_check_char_0, '[')
case reflect.Map : p.chr(_OP_check_char_0, '{')
case reflect.Slice :
if vt == bytesType {
p.chr(_OP_check_bytes, '"')
} else {
p.chr(_OP_check_char_0, '[')
}
case reflect.Struct : p.chr(_OP_check_char_0, '{')
default : panic(&json.UnmarshalTypeError{Type: vt})
}
p.rtt(_OP_dismatch_err, vt)
p.add(_OP_object_next)
y := p.pc()
p.add(_OP_goto)
p.pin(x)
return y
}
}

func (self *_Compiler) compileStructFieldStr(p *_Program, sp int, vt reflect.Type) {
n1 := -1
ft := vt
Expand Down
42 changes: 40 additions & 2 deletions decoder/errors.go
Expand Up @@ -40,6 +40,10 @@ func (self SyntaxError) Error() string {
}

func (self SyntaxError) Description() string {
return "Syntax error " + self.description()
}

func (self SyntaxError) description() string {
i := 16
p := self.Pos - i
q := self.Pos + i
Expand Down Expand Up @@ -72,7 +76,7 @@ func (self SyntaxError) Description() string {

/* compose the error description */
return fmt.Sprintf(
"Syntax error at index %d: %s\n\n\t%s\n\t%s^%s\n",
"at index %d: %s\n\n\t%s\n\t%s^%s\n",
self.Pos,
self.Message(),
self.Src[p:q],
Expand Down Expand Up @@ -113,10 +117,44 @@ func error_wrap(src string, pos int, code types.ParsingError) error {
}

//go:nosplit
func error_type(vt *rt.GoType) error {
func error_type(vt *rt.GoType, c byte) error {
return &json.UnmarshalTypeError{Type: vt.Pack()}
}

type MismatchTypeError struct {
Pos int
Src string
Type reflect.Type
}

func (self *MismatchTypeError) Error() string {
var val string
switch self.Src[self.Pos] {
case 'f': fallthrough
case 't': val = "bool"
case '"': val = "string"
case '{': val = "object"
case '[': val = "array"
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': val = "number"
}

se := SyntaxError {
Pos : self.Pos,
Src : self.Src,
Code : types.ERR_MISMATCH,
}
return fmt.Sprintf("Mismatch type %s with value %s %s", self.Type.String(), val, se.description())
}

//go:nosplit
func error_mismatch(src string, pos int, vt *rt.GoType) error {
return &MismatchTypeError {
Pos : pos,
Src : src,
Type : vt.Pack(),
}
}

//go:nosplit
func error_field(name string) error {
return errors.New("json: unknown field " + strconv.Quote(name))
Expand Down

0 comments on commit c37035b

Please sign in to comment.