Skip to content

Commit

Permalink
AST Tweaks (#551)
Browse files Browse the repository at this point in the history
* Use pointers instead of copying around ast.Node

Node is a 56B struct that is constantly in the hot path. Passing nodes
around by copy had a cost that started to add up. This change replaces
them by pointers. Using unsafe pointer arithmetic and converting
sibling/child indexes to relative offsets, it removes the need to carry
around a pointer to the root of the tree. This saves 8B per Node. This
space will be used to store an extra []byte slice to provide contextual
error handling on all nodes, including the ones whose data is different
than the raw input (for example: strings with escaped characters), while
staying under the size of a cache line.

* Remove conditional

* Add Raw to track range in data for parsed values

* Simplify reference tracking
  • Loading branch information
pelletier committed Jun 4, 2021
1 parent f3bb20e commit 618f018
Show file tree
Hide file tree
Showing 13 changed files with 239 additions and 165 deletions.
32 changes: 16 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,12 @@ Execution time speedup compared to other Go TOML libraries:
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
</thead>
<tbody>
<tr><td>Marshal/HugoFrontMatter</td><td>1.9x</td><td>1.9x</td></tr>
<tr><td>Marshal/ReferenceFile/map</td><td>1.7x</td><td>1.9x</td></tr>
<tr><td>Marshal/ReferenceFile/struct</td><td>2.7x</td><td>2.9x</td></tr>
<tr><td>Unmarshal/HugoFrontMatter</td><td>2.9x</td><td>2.4x</td></tr>
<tr><td>Unmarshal/ReferenceFile/map</td><td>3.1x</td><td>3.0x</td></tr>
<tr><td>Unmarshal/ReferenceFile/struct</td><td>5.5x</td><td>5.8x</td></tr>
<tr><td>Marshal/HugoFrontMatter</td><td>2.0x</td><td>2.0x</td></tr>
<tr><td>Marshal/ReferenceFile/map</td><td>1.8x</td><td>2.0x</td></tr>
<tr><td>Marshal/ReferenceFile/struct</td><td>2.7x</td><td>2.7x</td></tr>
<tr><td>Unmarshal/HugoFrontMatter</td><td>3.0x</td><td>2.6x</td></tr>
<tr><td>Unmarshal/ReferenceFile/map</td><td>3.0x</td><td>3.1x</td></tr>
<tr><td>Unmarshal/ReferenceFile/struct</td><td>5.9x</td><td>6.6x</td></tr>
</tbody>
</table>
<details><summary>See more</summary>
Expand All @@ -174,16 +174,16 @@ provided for completeness.</p>
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
</thead>
<tbody>
<tr><td>Marshal/SimpleDocument/map</td><td>1.8x</td><td>2.4x</td></tr>
<tr><td>Marshal/SimpleDocument/struct</td><td>2.7x</td><td>3.5x</td></tr>
<tr><td>Unmarshal/SimpleDocument/map</td><td>4.3x</td><td>2.4x</td></tr>
<tr><td>Unmarshal/SimpleDocument/struct</td><td>5.8x</td><td>3.3x</td></tr>
<tr><td>UnmarshalDataset/example</td><td>3.1x</td><td>2.2x</td></tr>
<tr><td>UnmarshalDataset/code</td><td>1.8x</td><td>2.1x</td></tr>
<tr><td>UnmarshalDataset/twitter</td><td>2.7x</td><td>1.9x</td></tr>
<tr><td>UnmarshalDataset/citm_catalog</td><td>1.8x</td><td>1.2x</td></tr>
<tr><td>UnmarshalDataset/config</td><td>3.4x</td><td>2.8x</td></tr>
<tr><td>[Geo mean]</td><td>2.8x</td><td>2.5x</td></tr>
<tr><td>Marshal/SimpleDocument/map</td><td>1.7x</td><td>2.1x</td></tr>
<tr><td>Marshal/SimpleDocument/struct</td><td>2.6x</td><td>2.9x</td></tr>
<tr><td>Unmarshal/SimpleDocument/map</td><td>4.1x</td><td>2.9x</td></tr>
<tr><td>Unmarshal/SimpleDocument/struct</td><td>6.3x</td><td>4.1x</td></tr>
<tr><td>UnmarshalDataset/example</td><td>3.5x</td><td>2.4x</td></tr>
<tr><td>UnmarshalDataset/code</td><td>2.2x</td><td>2.8x</td></tr>
<tr><td>UnmarshalDataset/twitter</td><td>2.8x</td><td>2.1x</td></tr>
<tr><td>UnmarshalDataset/citm_catalog</td><td>2.3x</td><td>1.5x</td></tr>
<tr><td>UnmarshalDataset/config</td><td>4.2x</td><td>3.2x</td></tr>
<tr><td>[Geo mean]</td><td>3.0x</td><td>2.7x</td></tr>
</tbody>
</table>
<p>This table can be generated with <code>./ci.sh benchmark -a -html</code>.</p>
Expand Down
4 changes: 2 additions & 2 deletions errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"strconv"
"strings"

"github.com/pelletier/go-toml/v2/internal/unsafe"
"github.com/pelletier/go-toml/v2/internal/danger"
)

// DecodeError represents an error encountered during the parsing or decoding
Expand Down Expand Up @@ -105,7 +105,7 @@ func (e *DecodeError) Key() Key {
// highlight can be freely deallocated.
//nolint:funlen
func wrapDecodeError(document []byte, de *decodeError) *DecodeError {
offset := unsafe.SubsliceOffset(document, de.highlight)
offset := danger.SubsliceOffset(document, de.highlight)

errMessage := de.Error()
errLine, errColumn := positionAtEnd(document[:offset])
Expand Down
66 changes: 37 additions & 29 deletions internal/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ package ast

import (
"fmt"
"unsafe"

"github.com/pelletier/go-toml/v2/internal/danger"
)

// Iterator starts uninitialized, you need to call Next() first.
Expand All @@ -14,7 +17,7 @@ import (
// }
type Iterator struct {
started bool
node Node
node *Node
}

// Next moves the iterator forward and returns true if points to a node, false
Expand All @@ -31,11 +34,11 @@ func (c *Iterator) Next() bool {
// IsLast returns true if the current node of the iterator is the last one.
// Subsequent call to Next() will return false.
func (c *Iterator) IsLast() bool {
return c.node.next <= 0
return c.node.next == 0
}

// Node returns a copy of the node pointed at by the iterator.
func (c *Iterator) Node() Node {
func (c *Iterator) Node() *Node {
return c.node
}

Expand All @@ -50,14 +53,13 @@ type Root struct {
func (r *Root) Iterator() Iterator {
it := Iterator{}
if len(r.nodes) > 0 {
it.node = r.nodes[0]
it.node = &r.nodes[0]
}
return it
}

func (r *Root) at(idx int) Node {
// TODO: unsafe to point to the node directly
return r.nodes[idx]
func (r *Root) at(idx Reference) *Node {
return &r.nodes[idx]
}

// Arrays have one child per element in the array.
Expand All @@ -69,42 +71,48 @@ func (r *Root) at(idx int) Node {
// children []Node
type Node struct {
Kind Kind
Data []byte // Raw bytes from the input
Raw Range // Raw bytes from the input.
Data []byte // Node value (could be either allocated or referencing the input).

// References to other nodes, as offsets in the backing array from this
// node. References can go backward, so those can be negative.
next int // 0 if last element
child int // 0 if no child
}

// next idx (in the root array). 0 if last of the collection.
next int
// child idx (in the root array). 0 if no child.
child int
// pointer to the root array
root *Root
type Range struct {
Offset uint32
Length uint32
}

// Next returns a copy of the next node, or an invalid Node if there is no
// next node.
func (n Node) Next() Node {
if n.next <= 0 {
return noNode
func (n *Node) Next() *Node {
if n.next == 0 {
return nil
}
return n.root.at(n.next)
ptr := unsafe.Pointer(n)
size := unsafe.Sizeof(Node{})
return (*Node)(danger.Stride(ptr, size, n.next))
}

// Child returns a copy of the first child node of this node. Other children
// can be accessed calling Next on the first child.
// Returns an invalid Node if there is none.
func (n Node) Child() Node {
if n.child <= 0 {
return noNode
func (n *Node) Child() *Node {
if n.child == 0 {
return nil
}
return n.root.at(n.child)
ptr := unsafe.Pointer(n)
size := unsafe.Sizeof(Node{})
return (*Node)(danger.Stride(ptr, size, n.child))
}

// Valid returns true if the node's kind is set (not to Invalid).
func (n Node) Valid() bool {
return n.Kind != Invalid
func (n *Node) Valid() bool {
return n != nil
}

var noNode = Node{}

// Key returns the child nodes making the Key on a supported node. Panics
// otherwise.
// They are guaranteed to be all be of the Kind Key. A simple key would return
Expand All @@ -127,13 +135,13 @@ func (n *Node) Key() Iterator {
// Value returns a pointer to the value node of a KeyValue.
// Guaranteed to be non-nil.
// Panics if not called on a KeyValue node, or if the Children are malformed.
func (n Node) Value() Node {
assertKind(KeyValue, n)
func (n *Node) Value() *Node {
assertKind(KeyValue, *n)
return n.Child()
}

// Children returns an iterator over a node's children.
func (n Node) Children() Iterator {
func (n *Node) Children() Iterator {
return Iterator{node: n.Child()}
}

Expand Down
31 changes: 11 additions & 20 deletions internal/ast/builder.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package ast

type Reference struct {
idx int
set bool
}
type Reference int

const InvalidReference Reference = -1

func (r Reference) Valid() bool {
return r.set
return r != InvalidReference
}

type Builder struct {
Expand All @@ -18,8 +17,8 @@ func (b *Builder) Tree() *Root {
return &b.tree
}

func (b *Builder) NodeAt(ref Reference) Node {
return b.tree.at(ref.idx)
func (b *Builder) NodeAt(ref Reference) *Node {
return b.tree.at(ref)
}

func (b *Builder) Reset() {
Expand All @@ -28,33 +27,25 @@ func (b *Builder) Reset() {
}

func (b *Builder) Push(n Node) Reference {
n.root = &b.tree
b.lastIdx = len(b.tree.nodes)
b.tree.nodes = append(b.tree.nodes, n)
return Reference{
idx: b.lastIdx,
set: true,
}
return Reference(b.lastIdx)
}

func (b *Builder) PushAndChain(n Node) Reference {
n.root = &b.tree
newIdx := len(b.tree.nodes)
b.tree.nodes = append(b.tree.nodes, n)
if b.lastIdx >= 0 {
b.tree.nodes[b.lastIdx].next = newIdx
b.tree.nodes[b.lastIdx].next = newIdx - b.lastIdx
}
b.lastIdx = newIdx
return Reference{
idx: b.lastIdx,
set: true,
}
return Reference(b.lastIdx)
}

func (b *Builder) AttachChild(parent Reference, child Reference) {
b.tree.nodes[parent.idx].child = child.idx
b.tree.nodes[parent].child = int(child) - int(parent)
}

func (b *Builder) Chain(from Reference, to Reference) {
b.tree.nodes[from.idx].next = to.idx
b.tree.nodes[from].next = int(to) - int(from)
}
8 changes: 7 additions & 1 deletion internal/unsafe/unsafe.go → internal/danger/danger.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package unsafe
package danger

import (
"fmt"
Expand Down Expand Up @@ -57,3 +57,9 @@ func BytesRange(start []byte, end []byte) []byte {

return start[:l]
}

func Stride(ptr unsafe.Pointer, size uintptr, offset int) unsafe.Pointer {
// TODO: replace with unsafe.Add when Go 1.17 is released
// https://github.com/golang/go/issues/40481
return unsafe.Pointer(uintptr(ptr) + uintptr(int(size)*offset))
}
28 changes: 19 additions & 9 deletions internal/unsafe/unsafe_test.go → internal/danger/danger_test.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package unsafe_test
package danger_test

import (
"testing"
"unsafe"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/pelletier/go-toml/v2/internal/unsafe"
"github.com/pelletier/go-toml/v2/internal/danger"
)

func TestUnsafeSubsliceOffsetValid(t *testing.T) {
func TestSubsliceOffsetValid(t *testing.T) {
examples := []struct {
desc string
test func() ([]byte, []byte)
Expand All @@ -28,13 +29,13 @@ func TestUnsafeSubsliceOffsetValid(t *testing.T) {
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
d, s := e.test()
offset := unsafe.SubsliceOffset(d, s)
offset := danger.SubsliceOffset(d, s)
assert.Equal(t, e.offset, offset)
})
}
}

func TestUnsafeSubsliceOffsetInvalid(t *testing.T) {
func TestSubsliceOffsetInvalid(t *testing.T) {
examples := []struct {
desc string
test func() ([]byte, []byte)
Expand Down Expand Up @@ -72,13 +73,22 @@ func TestUnsafeSubsliceOffsetInvalid(t *testing.T) {
t.Run(e.desc, func(t *testing.T) {
d, s := e.test()
require.Panics(t, func() {
unsafe.SubsliceOffset(d, s)
danger.SubsliceOffset(d, s)
})
})
}
}

func TestUnsafeBytesRange(t *testing.T) {
func TestStride(t *testing.T) {
a := []byte{1, 2, 3, 4}
x := &a[1]
n := (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), 1))
require.Equal(t, &a[2], n)
n = (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), -1))
require.Equal(t, &a[0], n)
}

func TestBytesRange(t *testing.T) {
type fn = func() ([]byte, []byte)
examples := []struct {
desc string
Expand Down Expand Up @@ -157,10 +167,10 @@ func TestUnsafeBytesRange(t *testing.T) {
start, end := e.test()
if e.expected == nil {
require.Panics(t, func() {
unsafe.BytesRange(start, end)
danger.BytesRange(start, end)
})
} else {
res := unsafe.BytesRange(start, end)
res := danger.BytesRange(start, end)
require.Equal(t, e.expected, res)
}
})
Expand Down
8 changes: 4 additions & 4 deletions internal/tracker/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,27 @@ type KeyTracker struct {
}

// UpdateTable sets the state of the tracker with the AST table node.
func (t *KeyTracker) UpdateTable(node ast.Node) {
func (t *KeyTracker) UpdateTable(node *ast.Node) {
t.reset()
t.Push(node)
}

// UpdateArrayTable sets the state of the tracker with the AST array table node.
func (t *KeyTracker) UpdateArrayTable(node ast.Node) {
func (t *KeyTracker) UpdateArrayTable(node *ast.Node) {
t.reset()
t.Push(node)
}

// Push the given key on the stack.
func (t *KeyTracker) Push(node ast.Node) {
func (t *KeyTracker) Push(node *ast.Node) {
it := node.Key()
for it.Next() {
t.k = append(t.k, string(it.Node().Data))
}
}

// Pop key from stack.
func (t *KeyTracker) Pop(node ast.Node) {
func (t *KeyTracker) Pop(node *ast.Node) {
it := node.Key()
for it.Next() {
t.k = t.k[:len(t.k)-1]
Expand Down

0 comments on commit 618f018

Please sign in to comment.