Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LineIndex.OffsetToLineCol translates offsets to line/col for reference. #106

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion LICENSE
@@ -1,6 +1,7 @@
MIT License

Copyright (c) 2016 Leonid Bugaev
Portions Copyright (c) 2016 Leonid Bugaev
Portions Copyright (c) 2016 Jason E. Aten

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
113 changes: 113 additions & 0 deletions linecol.go
@@ -0,0 +1,113 @@
package jsonparser

import (
"fmt"
"sort"
"unicode/utf8"
)

// NewlineIndex holds the positions of all newlines
// in a given JSON blob. The JsonBlob must be utf8 text.
type LineIndex struct {
JsonBlob []byte
NewlinePos []int
}

// NewLineIndex returns a new LineIndex whose
// NewlinePos member contains the byte-based
// locations of all newlines in the utf8 json.
func NewLineIndex(json []byte) *LineIndex {
li := &LineIndex{
JsonBlob: json,
NewlinePos: []int{},
}
li.FindNewlines()
return li
}

// FindNewlines locates the newlines in the utf8 li.JsonBlob.
func (li *LineIndex) FindNewlines() {

li.NewlinePos = []int{}

// convert json to a string, in order to range over runes.
// c.f. https://blog.golang.org/strings
sj := string(li.JsonBlob)
for index, rune := range sj {
if rune == '\n' {
li.NewlinePos = append(li.NewlinePos, index)
}
}
}

// OffsetToLineCol returns the line and column for a given offset,
// provided that li has been constructed by NewLineIndex so that
// li.NewlinePos is valid. It does so by binary search for offset
// on li.NewlinePos, so its time complexity is O(log q) where q
// is the number of newlines in li.JsonBlob.
//
// Note that bytecol is the byte index of the offset on the line,
// while runecol is the utf8 rune index on the line.
//
// OffsetToLineCol returns line of -1 if offset is out of bounds.
//
// Lines are numbered from 0, so offset 0 is at line 0, col 0.
//
func (li *LineIndex) OffsetToLineCol(offset int) (line int, bytecol int, runecol int) {

if offset >= len(li.JsonBlob) || offset < 0 {
return -1, -1, -1
}
if offset == 0 {
return 0, 0, 0
}
n := len(li.NewlinePos)

if n == 0 {
// no newlines in the indexed li.JsonBlob
return 0, offset, li.bytePosToRunePos(0, offset)
}
if offset >= li.NewlinePos[n-1] {
// on the last line
return n, offset - (li.NewlinePos[n-1] + 1), li.bytePosToRunePos(n, offset)
}

// binary search to locate the line using the li.NewlinePos index:
//
// sort.Search returns the smallest index i in [0, n) at which f(i) is true,
// assuming that on the range [0, n), f(i) == true implies f(i+1) == true.
//
srch := sort.Search(n, func(i int) bool {
r := (offset < li.NewlinePos[i])
return r
})
linestart := li.NewlinePos[srch-1] + 1
return srch, offset - linestart, li.bytePosToRunePos(srch, offset)
}

// bytePosToRunePos expects linenoz to be zero-based line-number
// on which offset falls; i.e. that offset >= li.NewlinePos[linenoz-1];
// and offset < li.NewlinePos[linenoz] assuming linenoz is valid.
//
// It then returns the character (utf8 rune) position of the
// offset on that line.
//
// Since it must parse bytes into utf8 characters, the time complexity of
// bytePosToRunePos is O(length of the line).
//
func (li *LineIndex) bytePosToRunePos(linenoz int, offset int) int {
var beg int
if linenoz > 0 {
beg = li.NewlinePos[linenoz-1] + 1
}
s := string(li.JsonBlob[beg : offset+1])
return utf8.RuneCountInString(s) - 1
}

func (li *LineIndex) DebugDump() {
fmt.Println()
for i := range li.NewlinePos {
fmt.Printf("li.NewlinePos[i=%v]: %v\n", i, li.NewlinePos[i])
}
fmt.Println()
}
105 changes: 105 additions & 0 deletions linecol_test.go
@@ -0,0 +1,105 @@
package jsonparser

import (
"bytes"
"testing"
)

// TestGetLineCol turns an offset into a line/column position.
func TestGetLineCol(t *testing.T) {
runLineColTest(t, []byte("abc"), []int{})
runLineColTest(t, []byte("\n"), []int{0})
runLineColTest(t, []byte("\na\nb\n"), []int{0, 2, 4})
}

func runLineColTest(t *testing.T, input []byte, expected []int) {
li := NewLineIndex(input)
obs := li.NewlinePos
if len(expected) != len(obs) {
t.Errorf("runLineColTest failed at pos len(observed)==%v, "+
"len(expected)=%v; obs='%#v'; expected='%#v'",
len(obs), len(expected), obs, expected)
} else {
for i := range expected {
if obs[i] != expected[i] {
t.Errorf("runLineColTest failed at pos %v, observed='%#v', expected='%#v'",
i, obs, expected)
}
}
}
}

// TestOffsetToLineCol turns an offset into a line/column position.
func TestOffsetToLineCol(t *testing.T) {

runOffsetToLineColTest(t, []byte(`{"a":"b"}`), []string{`a`}, []byte(`b`), 0, 5, 5, String)
runOffsetToLineColTest(t, []byte("\n"+`{"a":"b"}`), []string{`a`}, []byte(`b`), 1, 5, 5, String)
runOffsetToLineColTest(t, []byte("\n"+`{"a":"b"}`+"\n"), []string{`a`}, []byte(`b`), 1, 5, 5, String)
runOffsetToLineColTest(t, []byte("\n\n"+`{"a":"b"}`+"\n"), []string{`a`}, []byte(`b`), 2, 5, 5, String)
runOffsetToLineColTest(t, []byte("\n\n"+`{"a":"b"}`+"\n\n"), []string{`a`}, []byte(`b`), 2, 5, 5, String)
runOffsetToLineColTest(t, []byte("\n\n"+`{"a":`+"\n"+`"b"}`+"\n\n"), []string{`a`}, []byte(`b`), 3, 0, 0, String)
runOffsetToLineColTest(t, []byte("\n\n"+`{`+"\n"+`"a":`+"\n"+`"b"}`+"\n\n"), []string{`a`}, []byte(`b`), 4, 0, 0, String)
runOffsetToLineColTest(t, []byte(`{`+"\n"+`"a":`+"\n"+`"b"}`), []string{`a`}, []byte(`b`), 2, 0, 0, String)
runOffsetToLineColTest(t, []byte(`{`+"\n"+`"a":`+`"b"}`), []string{`a`}, []byte(`b`), 1, 4, 4, String)

// multiline value
runOffsetToLineColTest(t, []byte(`{`+"\n"+`"a":"b`+"\n"+`ye"}`), []string{`a`}, []byte(`b`+"\n"+`ye`), 1, 4, 4, String)

// multi-byte characters
runOffsetToLineColTest(t, []byte(`{"世界":"世界"}`), []string{`世界`}, []byte(`世界`), 0, 10, 6, String)
runOffsetToLineColTest(t, []byte(`{"世界":`+"\n"+`"世界"}`), []string{`世界`}, []byte(`世界`), 1, 0, 0, String)

}

func runOffsetToLineColTest(t *testing.T, input []byte, searchPath []string,
expectedValue []byte,
expectedLine, expectedByteCol, expectedRuneCol int, expectedDataType ValueType) {

li := NewLineIndex(input)
obs, obsDataType, offs, err := Get(input, searchPath...)

//fmt.Printf("\n Get(input='%s', searchPath='%#v') returned obs='%#v', obsDataType='%s', offs=%v, err=%v. len(obs)=%v\n", string(input), searchPath, string(obs), obsDataType, offs, err, len(obs))

// account for the double quotes around strings in their position
lenObs := len(obs)
if obsDataType == String {
lenObs += 2
}

if err != nil {
panic(err)
}
if bytes.Compare(obs, expectedValue) != 0 {
t.Errorf("runOffsetToLineColTest failed, obs != expectedValue, observed='%#v', expected='%#v'",
obs, expectedValue)
}
if obsDataType != expectedDataType {
t.Errorf("runOffsetToLineColTest failed, obsDataType != expectedDataType, observed='%#v', expected='%#v'",
obsDataType, expectedDataType)
}

// the main event: the call to li.OffsetToLineCol()
//
// Note offs is where the key value *ends*, per the jsonparser.Get() docs.
// Hence we subtract the len(obs) to get the byte offset of the
// beginning of the value.
//
obsLine, obsByteCol, obsRuneCol := li.OffsetToLineCol(offs - lenObs)

//fmt.Printf("li.OffsetToLineCol(offs=%#v) returned obsLine=%v, obsByteCol=%v, obsRuneCol=%v. len(obs)=%v\n", offs, obsLine, obsByteCol, obsRuneCol, len(obs))

if obsLine != expectedLine {
t.Errorf("runOffsetToLineColTest failed, obsLine != expectedLine, observed='%#v', expected='%#v'",
obsLine, expectedLine)
}
if obsByteCol != expectedByteCol {
t.Errorf("runOffsetToLineColTest failed, obsByteCol != expectedByteCol, observed='%#v', expected='%#v'",
obsByteCol, expectedByteCol)
}

if obsRuneCol != expectedRuneCol {
t.Errorf("runOffsetToLineColTest failed, obsRuneCol != expectedRuneCol, observed='%#v', expected='%#v'",
obsRuneCol, expectedRuneCol)
}

}