Skip to content

Commit

Permalink
Upgraded rivo/uniseg to latest version, switched StringWidth/Truncate…
Browse files Browse the repository at this point in the history
… to speedier version.

Adapted to short-notice change in rivo/uniseg.

Upgraded to latest rivo/uniseg. Also implemented basic emoji handling in StringWidth. See mattn#59

Added a test for Truncate that includes emojis.

Split the code so it can be upgraded once we move to Go1.18+

The wrong uniseg version was used. Fixed it.
  • Loading branch information
rivo authored and junegunn committed Jan 7, 2024
1 parent fbfe011 commit 7bd7c14
Show file tree
Hide file tree
Showing 8 changed files with 378 additions and 25 deletions.
6 changes: 2 additions & 4 deletions benchmark_test.go
Expand Up @@ -113,21 +113,19 @@ func benchString1Width(b *testing.B, eastAsianWidth bool, start, stop rune, want
return n
}
func BenchmarkString1WidthAll(b *testing.B) {
benchSink = benchString1Width(b, false, 0, utf8.MaxRune+1, 1295990)
benchSink = benchString1Width(b, false, 0, utf8.MaxRune+1, 1298422)
}
func BenchmarkString1Width768(b *testing.B) {
benchSink = benchString1Width(b, false, 0, 0x300, 702)
}
func BenchmarkString1WidthAllEastAsian(b *testing.B) {
benchSink = benchString1Width(b, true, 0, utf8.MaxRune+1, 1436664)
benchSink = benchString1Width(b, true, 0, utf8.MaxRune+1, 1439014)
}
func BenchmarkString1Width768EastAsian(b *testing.B) {
benchSink = benchString1Width(b, true, 0, 0x300, 794)
}

//
// tables
//
func benchTable(b *testing.B, tbl table) int {
n := 0
for i := 0; i < b.N; i++ {
Expand Down
4 changes: 2 additions & 2 deletions go.mod
@@ -1,5 +1,5 @@
module github.com/mattn/go-runewidth

go 1.9
go 1.16

require github.com/rivo/uniseg v0.2.0
require github.com/rivo/uniseg v0.4.4
2 changes: 2 additions & 0 deletions go.sum
@@ -1,2 +1,4 @@
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
321 changes: 321 additions & 0 deletions runewidth_go117.go
@@ -0,0 +1,321 @@
//go:build !go1.18
// +build !go1.18

package runewidth

import (
"os"

"github.com/rivo/uniseg"
)

//go:generate go run script/generate.go

var (
// EastAsianWidth will be set true if the current locale is CJK
EastAsianWidth bool

// StrictEmojiNeutral should be set false if handle broken fonts
StrictEmojiNeutral bool = true

// DefaultCondition is a condition in current locale
DefaultCondition = &Condition{
EastAsianWidth: false,
StrictEmojiNeutral: true,
}
)

func init() {
handleEnv()
}

func handleEnv() {
env := os.Getenv("RUNEWIDTH_EASTASIAN")
if env == "" {
EastAsianWidth = IsEastAsian()
} else {
EastAsianWidth = env == "1"
}
// update DefaultCondition
if DefaultCondition.EastAsianWidth != EastAsianWidth {
DefaultCondition.EastAsianWidth = EastAsianWidth
if len(DefaultCondition.combinedLut) > 0 {
DefaultCondition.combinedLut = DefaultCondition.combinedLut[:0]
CreateLUT()
}
}
}

type interval struct {
first rune
last rune
}

type table []interval

func inTables(r rune, ts ...table) bool {
for _, t := range ts {
if inTable(r, t) {
return true
}
}
return false
}

func inTable(r rune, t table) bool {
if r < t[0].first {
return false
}

bot := 0
top := len(t) - 1
for top >= bot {
mid := (bot + top) >> 1

switch {
case t[mid].last < r:
bot = mid + 1
case t[mid].first > r:
top = mid - 1
default:
return true
}
}

return false
}

var private = table{
{0x00E000, 0x00F8FF}, {0x0F0000, 0x0FFFFD}, {0x100000, 0x10FFFD},
}

var nonprint = table{
{0x0000, 0x001F}, {0x007F, 0x009F}, {0x00AD, 0x00AD},
{0x070F, 0x070F}, {0x180B, 0x180E}, {0x200B, 0x200F},
{0x2028, 0x202E}, {0x206A, 0x206F}, {0xD800, 0xDFFF},
{0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0xFFFE, 0xFFFF},
}

// Condition have flag EastAsianWidth whether the current locale is CJK or not.
type Condition struct {
combinedLut []byte
EastAsianWidth bool
StrictEmojiNeutral bool
}

// NewCondition return new instance of Condition which is current locale.
func NewCondition() *Condition {
return &Condition{
EastAsianWidth: EastAsianWidth,
StrictEmojiNeutral: StrictEmojiNeutral,
}
}

// RuneWidth returns the number of cells in r.
// See http://www.unicode.org/reports/tr11/
func (c *Condition) RuneWidth(r rune) int {
if r < 0 || r > 0x10FFFF {
return 0
}
if len(c.combinedLut) > 0 {
return int(c.combinedLut[r>>1]>>(uint(r&1)*4)) & 3
}
// optimized version, verified by TestRuneWidthChecksums()
if !c.EastAsianWidth {
switch {
case r < 0x20:
return 0
case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
return 0
case r < 0x300:
return 1
case inTable(r, narrow):
return 1
case inTables(r, nonprint, combining):
return 0
case inTable(r, doublewidth):
return 2
default:
return 1
}
} else {
switch {
case inTables(r, nonprint, combining):
return 0
case inTable(r, narrow):
return 1
case inTables(r, ambiguous, doublewidth):
return 2
case !c.StrictEmojiNeutral && inTables(r, ambiguous, emoji, narrow):
return 2
default:
return 1
}
}
}

// CreateLUT will create an in-memory lookup table of 557056 bytes for faster operation.
// This should not be called concurrently with other operations on c.
// If options in c is changed, CreateLUT should be called again.
func (c *Condition) CreateLUT() {
const max = 0x110000
lut := c.combinedLut
if len(c.combinedLut) != 0 {
// Remove so we don't use it.
c.combinedLut = nil
} else {
lut = make([]byte, max/2)
}
for i := range lut {
i32 := int32(i * 2)
x0 := c.RuneWidth(i32)
x1 := c.RuneWidth(i32 + 1)
lut[i] = uint8(x0) | uint8(x1)<<4
}
c.combinedLut = lut
}

// StringWidth return width as you can see
func (c *Condition) StringWidth(s string) (width int) {
g := uniseg.NewGraphemes(s)
for g.Next() {
var chWidth int
for index, r := range g.Str() {
if index == 0 && inTable(r, emoji) {
chWidth = 2 // Not the optimal solution but it will work in most cases.
break
}
chWidth += c.RuneWidth(r)
}
width += chWidth
}
return
}

// Truncate return string truncated with w cells
func (c *Condition) Truncate(s string, w int, tail string) string {
if c.StringWidth(s) <= w {
return s
}
w -= c.StringWidth(tail)
var width, pos int
g := uniseg.NewGraphemes(s)
for g.Next() {
var chWidth int
ch := g.Str()
for index, r := range ch {
if index == 0 && inTable(r, emoji) {
chWidth = 2 // Not the optimal solution but it will work in most cases.
break
}
chWidth += c.RuneWidth(r)
}
if width+chWidth > w {
break
}
width += chWidth
pos += len(ch)
}
return s[:pos] + tail
}

// Wrap return string wrapped with w cells
func (c *Condition) Wrap(s string, w int) string {
width := 0
out := ""
for _, r := range s {
cw := c.RuneWidth(r)
if r == '\n' {
out += string(r)
width = 0
continue
} else if width+cw > w {
out += "\n"
width = 0
out += string(r)
width += cw
continue
}
out += string(r)
width += cw
}
return out
}

// FillLeft return string filled in left by spaces in w cells
func (c *Condition) FillLeft(s string, w int) string {
width := c.StringWidth(s)
count := w - width
if count > 0 {
b := make([]byte, count)
for i := range b {
b[i] = ' '
}
return string(b) + s
}
return s
}

// FillRight return string filled in left by spaces in w cells
func (c *Condition) FillRight(s string, w int) string {
width := c.StringWidth(s)
count := w - width
if count > 0 {
b := make([]byte, count)
for i := range b {
b[i] = ' '
}
return s + string(b)
}
return s
}

// RuneWidth returns the number of cells in r.
// See http://www.unicode.org/reports/tr11/
func RuneWidth(r rune) int {
return DefaultCondition.RuneWidth(r)
}

// IsAmbiguousWidth returns whether is ambiguous width or not.
func IsAmbiguousWidth(r rune) bool {
return inTables(r, private, ambiguous)
}

// IsNeutralWidth returns whether is neutral width or not.
func IsNeutralWidth(r rune) bool {
return inTable(r, neutral)
}

// StringWidth return width as you can see
func StringWidth(s string) (width int) {
return DefaultCondition.StringWidth(s)
}

// Truncate return string truncated with w cells
func Truncate(s string, w int, tail string) string {
return DefaultCondition.Truncate(s, w, tail)
}

// Wrap return string wrapped with w cells
func Wrap(s string, w int) string {
return DefaultCondition.Wrap(s, w)
}

// FillLeft return string filled in left by spaces in w cells
func FillLeft(s string, w int) string {
return DefaultCondition.FillLeft(s, w)
}

// FillRight return string filled in left by spaces in w cells
func FillRight(s string, w int) string {
return DefaultCondition.FillRight(s, w)
}

// CreateLUT will create an in-memory lookup table of 557055 bytes for faster operation.
// This should not be called concurrently with other operations.
func CreateLUT() {
if len(DefaultCondition.combinedLut) > 0 {
return
}
DefaultCondition.CreateLUT()
}

0 comments on commit 7bd7c14

Please sign in to comment.