Skip to content

Commit

Permalink
size: stop using regexp
Browse files Browse the repository at this point in the history
Using a regular expression brings in the whole regexp and regexp/syntax
packages, which increase the resulting binary size by about 130K
(Go 1.18.1, Linux/amd64).

Besides, regular expressions are generally slow and incur some
initialization overhead (to compile all global regexp.MustComplile
variables). This, unlike the size difference, is not the main motivation
for this commit, but it feels like it should have also been mentioned.

A quick benchmark comparison shows a huge improvement (again, this is
not why this is done, nevertheless it pleases the eye):

name         old time/op    new time/op    delta
ParseSize-4    10.6µs ± 3%     2.6µs ±29%  -75.10%  (p=0.002 n=6+6)

name         old alloc/op   new alloc/op   delta
ParseSize-4    3.26kB ± 0%    0.20kB ± 0%  -93.75%  (p=0.000 n=7+6)

name         old allocs/op  new allocs/op  delta
ParseSize-4      72.0 ± 0%      26.0 ± 0%  -63.89%  (p=0.000 n=7+6)

Compatibility note: I chose to accept (rather than error out) input
strings like ".4 Gb" as this is valid and makes sense, and the inability
to process such input by the old implementation is more about the
limitation of regex being used, rather than a decision to error out on
those.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Apr 28, 2022
1 parent ee8a0fd commit 53b72ac
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 13 deletions.
62 changes: 50 additions & 12 deletions size.go
Expand Up @@ -2,7 +2,6 @@ package units

import (
"fmt"
"regexp"
"strconv"
"strings"
)
Expand All @@ -26,16 +25,17 @@ const (
PiB = 1024 * TiB
)

type unitMap map[string]int64
type unitMap map[byte]int64

var (
decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB}
binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB}
sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`)
decimalMap = unitMap{'b': 1, 'k': KB, 'm': MB, 'g': GB, 't': TB, 'p': PB}
binaryMap = unitMap{'b': 1, 'k': KiB, 'm': MiB, 'g': GiB, 't': TiB, 'p': PiB}
)

var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
var (
decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
)

func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) {
i := 0
Expand Down Expand Up @@ -89,20 +89,58 @@ func RAMInBytes(size string) (int64, error) {

// Parses the human-readable size string into the amount it represents.
func parseSize(sizeStr string, uMap unitMap) (int64, error) {
matches := sizeRegex.FindStringSubmatch(sizeStr)
if len(matches) != 4 {
// TODO: rewrite to use strings.Cut if there's a space
// once Go < 1.18 is deprecated.
sep := strings.LastIndexAny(sizeStr, "01234567890. ")
if sep == -1 {
// There should be at least a digit.
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}
var num, sfx string
if sizeStr[sep] != ' ' {
num = sizeStr[:sep+1]
sfx = sizeStr[sep+1:]
} else {
// Remove the optional space.
num = sizeStr[:sep]
sfx = sizeStr[sep+1:]
}

size, err := strconv.ParseFloat(matches[1], 64)
size, err := strconv.ParseFloat(num, 64)
if err != nil {
return -1, err
}
// Backward compatibility: reject negative sizes.
if size < 0 {
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}

if len(sfx) == 0 {
return int64(size), nil
}

// Process the suffix.

unitPrefix := strings.ToLower(matches[3])
if mul, ok := uMap[unitPrefix]; ok {
if len(sfx) > 3 { // Too long.
goto badSuffix
}
sfx = strings.ToLower(sfx)
if mul, ok := uMap[sfx[0]]; ok {
size *= float64(mul)
} else {
goto badSuffix
}

// Final suffix sanity check.
switch {
case len(sfx) == 2 && sfx[1] != 'b':
goto badSuffix
case len(sfx) == 3 && sfx[1:] != "ib":
goto badSuffix
}

return int64(size), nil

badSuffix:
return -1, fmt.Errorf("invalid suffix: '%s'", sfx)
}
2 changes: 1 addition & 1 deletion size_test.go
Expand Up @@ -99,11 +99,11 @@ func TestFromHumanSize(t *testing.T) {
assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB")
assertSuccessEquals(t, 32, FromHumanSize, "32.5 B")
assertSuccessEquals(t, 300, FromHumanSize, "0.3 K")
assertSuccessEquals(t, 300, FromHumanSize, ".3kB")

assertError(t, FromHumanSize, "")
assertError(t, FromHumanSize, "hello")
assertError(t, FromHumanSize, "-32")
assertError(t, FromHumanSize, ".3kB")
assertError(t, FromHumanSize, " 32 ")
assertError(t, FromHumanSize, "32m b")
assertError(t, FromHumanSize, "32bm")
Expand Down

0 comments on commit 53b72ac

Please sign in to comment.