From 189caeaa976deaaf50eb7e33b6d2e1b215748bf2 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 27 Apr 2022 20:30:02 -0700 Subject: [PATCH] size: stop using regexp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using a regular expression brings in the whole regexp and regexp/syntax packages, which increase the resulting binary size by about 130K (Go 1.18.1, Linux/amd64). Besides, regular expressions are generally slow and incur some initialization overhead (to compile all global regexp.MustComplile variables). This, unlike the size difference, is not the main motivation for this commit, but it feels like it should have also been mentioned. A quick benchmark comparison shows a huge improvement (again, this is not why this is done, nevertheless it pleases the eye): name old time/op new time/op delta ParseSize-4 10.6µs ± 3% 2.6µs ±29% -75.10% (p=0.002 n=6+6) name old alloc/op new alloc/op delta ParseSize-4 3.26kB ± 0% 0.20kB ± 0% -93.75% (p=0.000 n=7+6) name old allocs/op new allocs/op delta ParseSize-4 72.0 ± 0% 26.0 ± 0% -63.89% (p=0.000 n=7+6) Compatibility note: As a result, we are now a but more relaxed to the input, allowing e.g. ".4 Gb", or "-0", or "234. B", following the rules of strconv.ParseFloat. It seems that those were previously rejected as a result of a regex being used, not deliberately. Signed-off-by: Kir Kolyshkin --- size.go | 70 +++++++++++++++++++++++++++++++++++++++++++--------- size_test.go | 31 ++++++++++++----------- 2 files changed, 74 insertions(+), 27 deletions(-) diff --git a/size.go b/size.go index 85f6ab0..c245a89 100644 --- a/size.go +++ b/size.go @@ -2,7 +2,6 @@ package units import ( "fmt" - "regexp" "strconv" "strings" ) @@ -26,16 +25,17 @@ const ( PiB = 1024 * TiB ) -type unitMap map[string]int64 +type unitMap map[byte]int64 var ( - decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB} - binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB} - sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`) + decimalMap = unitMap{'k': KB, 'm': MB, 'g': GB, 't': TB, 'p': PB} + binaryMap = unitMap{'k': KiB, 'm': MiB, 'g': GiB, 't': TiB, 'p': PiB} ) -var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} -var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} +var ( + decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} + binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} +) func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) { i := 0 @@ -89,20 +89,66 @@ func RAMInBytes(size string) (int64, error) { // Parses the human-readable size string into the amount it represents. func parseSize(sizeStr string, uMap unitMap) (int64, error) { - matches := sizeRegex.FindStringSubmatch(sizeStr) - if len(matches) != 4 { + // TODO: rewrite to use strings.Cut if there's a space + // once Go < 1.18 is deprecated. + sep := strings.LastIndexAny(sizeStr, "01234567890. ") + if sep == -1 { + // There should be at least a digit. return -1, fmt.Errorf("invalid size: '%s'", sizeStr) } + var num, sfx string + if sizeStr[sep] != ' ' { + num = sizeStr[:sep+1] + sfx = sizeStr[sep+1:] + } else { + // Omit the space separator. + num = sizeStr[:sep] + sfx = sizeStr[sep+1:] + } - size, err := strconv.ParseFloat(matches[1], 64) + size, err := strconv.ParseFloat(num, 64) if err != nil { return -1, err } + // Backward compatibility: reject negative sizes. + if size < 0 { + return -1, fmt.Errorf("invalid size: '%s'", sizeStr) + } + + if len(sfx) == 0 { + return int64(size), nil + } - unitPrefix := strings.ToLower(matches[3]) - if mul, ok := uMap[unitPrefix]; ok { + // Process the suffix. + + if len(sfx) > 3 { // Too long. + goto badSuffix + } + sfx = strings.ToLower(sfx) + // Trivial case: b suffix. + if sfx[0] == 'b' { + if len(sfx) > 1 { // no extra characters allowed after b. + goto badSuffix + } + return int64(size), nil + } + // A suffix from the map. + if mul, ok := uMap[sfx[0]]; ok { size *= float64(mul) + } else { + goto badSuffix + } + + // The suffix may have extra "b" or "ib" (e.g. KiB or MB). + switch { + case len(sfx) == 2 && sfx[1] != 'b': + goto badSuffix + case len(sfx) == 3 && sfx[1:] != "ib": + goto badSuffix } return int64(size), nil + +badSuffix: + return -1, fmt.Errorf("invalid suffix: '%s'", sfx) } diff --git a/size_test.go b/size_test.go index 3ab06a1..06b3cd6 100644 --- a/size_test.go +++ b/size_test.go @@ -103,6 +103,22 @@ func TestFromHumanSize(t *testing.T) { assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB") assertSuccessEquals(t, 32, FromHumanSize, "32.5 B") assertSuccessEquals(t, 300, FromHumanSize, "0.3 K") + assertSuccessEquals(t, 300, FromHumanSize, ".3kB") + + assertSuccessEquals(t, 0, FromHumanSize, "0.") + assertSuccessEquals(t, 0, FromHumanSize, "0. ") + assertSuccessEquals(t, 0, FromHumanSize, "0.b") + assertSuccessEquals(t, 0, FromHumanSize, "0.B") + assertSuccessEquals(t, 0, FromHumanSize, "-0") + assertSuccessEquals(t, 0, FromHumanSize, "-0b") + assertSuccessEquals(t, 0, FromHumanSize, "-0B") + assertSuccessEquals(t, 0, FromHumanSize, "-0 b") + assertSuccessEquals(t, 0, FromHumanSize, "-0 B") + assertSuccessEquals(t, 32, FromHumanSize, "32.") + assertSuccessEquals(t, 32, FromHumanSize, "32.b") + assertSuccessEquals(t, 32, FromHumanSize, "32.B") + assertSuccessEquals(t, 32, FromHumanSize, "32. b") + assertSuccessEquals(t, 32, FromHumanSize, "32. B") // We do not tolerate extra leading or trailing spaces // (except for a space after the number and a missing suffix). @@ -124,26 +140,11 @@ func TestFromHumanSize(t *testing.T) { assertError(t, FromHumanSize, " ") assertError(t, FromHumanSize, " .") assertError(t, FromHumanSize, " . ") - assertError(t, FromHumanSize, "0.") - assertError(t, FromHumanSize, "0. ") - assertError(t, FromHumanSize, "0.b") - assertError(t, FromHumanSize, "0.B") - assertError(t, FromHumanSize, "-0") - assertError(t, FromHumanSize, "-0b") - assertError(t, FromHumanSize, "-0B") - assertError(t, FromHumanSize, "-0 b") - assertError(t, FromHumanSize, "-0 B") assertError(t, FromHumanSize, "-32") - assertError(t, FromHumanSize, ".3kB") assertError(t, FromHumanSize, "-32b") assertError(t, FromHumanSize, "-32B") assertError(t, FromHumanSize, "-32 b") assertError(t, FromHumanSize, "-32 B") - assertError(t, FromHumanSize, "32.") - assertError(t, FromHumanSize, "32.b") - assertError(t, FromHumanSize, "32.B") - assertError(t, FromHumanSize, "32. b") - assertError(t, FromHumanSize, "32. B") assertError(t, FromHumanSize, "32b.") assertError(t, FromHumanSize, "32B.") assertError(t, FromHumanSize, "32 b.")