Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove regexp use #40

Merged
merged 5 commits into from May 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
70 changes: 58 additions & 12 deletions size.go
Expand Up @@ -2,7 +2,6 @@ package units

import (
"fmt"
"regexp"
"strconv"
"strings"
)
Expand All @@ -26,16 +25,17 @@ const (
PiB = 1024 * TiB
)

type unitMap map[string]int64
type unitMap map[byte]int64

var (
decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB}
binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB}
sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`)
decimalMap = unitMap{'k': KB, 'm': MB, 'g': GB, 't': TB, 'p': PB}
binaryMap = unitMap{'k': KiB, 'm': MiB, 'g': GiB, 't': TiB, 'p': PiB}
)

var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
var (
decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
)

func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) {
i := 0
Expand Down Expand Up @@ -89,20 +89,66 @@ func RAMInBytes(size string) (int64, error) {

// Parses the human-readable size string into the amount it represents.
func parseSize(sizeStr string, uMap unitMap) (int64, error) {
matches := sizeRegex.FindStringSubmatch(sizeStr)
if len(matches) != 4 {
// TODO: rewrite to use strings.Cut if there's a space
// once Go < 1.18 is deprecated.
sep := strings.LastIndexAny(sizeStr, "01234567890. ")
if sep == -1 {
// There should be at least a digit.
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}
var num, sfx string
if sizeStr[sep] != ' ' {
num = sizeStr[:sep+1]
sfx = sizeStr[sep+1:]
} else {
// Omit the space separator.
num = sizeStr[:sep]
sfx = sizeStr[sep+1:]
}

size, err := strconv.ParseFloat(matches[1], 64)
size, err := strconv.ParseFloat(num, 64)
if err != nil {
return -1, err
}
// Backward compatibility: reject negative sizes.
if size < 0 {
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}

if len(sfx) == 0 {
return int64(size), nil
}

unitPrefix := strings.ToLower(matches[3])
if mul, ok := uMap[unitPrefix]; ok {
// Process the suffix.

if len(sfx) > 3 { // Too long.
goto badSuffix
}
sfx = strings.ToLower(sfx)
// Trivial case: b suffix.
if sfx[0] == 'b' {
if len(sfx) > 1 { // no extra characters allowed after b.
goto badSuffix
}
return int64(size), nil
}
// A suffix from the map.
if mul, ok := uMap[sfx[0]]; ok {
size *= float64(mul)
} else {
goto badSuffix
}

// The suffix may have extra "b" or "ib" (e.g. KiB or MB).
switch {
case len(sfx) == 2 && sfx[1] != 'b':
goto badSuffix
case len(sfx) == 3 && sfx[1:] != "ib":
goto badSuffix
}

return int64(size), nil

badSuffix:
return -1, fmt.Errorf("invalid suffix: '%s'", sfx)
}
71 changes: 70 additions & 1 deletion size_test.go
Expand Up @@ -83,6 +83,10 @@ func TestHumanSize(t *testing.T) {
}

func TestFromHumanSize(t *testing.T) {
kolyshkin marked this conversation as resolved.
Show resolved Hide resolved
assertSuccessEquals(t, 0, FromHumanSize, "0")
assertSuccessEquals(t, 0, FromHumanSize, "0b")
assertSuccessEquals(t, 0, FromHumanSize, "0B")
assertSuccessEquals(t, 0, FromHumanSize, "0 B")
assertSuccessEquals(t, 32, FromHumanSize, "32")
assertSuccessEquals(t, 32, FromHumanSize, "32b")
assertSuccessEquals(t, 32, FromHumanSize, "32B")
Expand All @@ -98,11 +102,59 @@ func TestFromHumanSize(t *testing.T) {
assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5kB")
assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB")
assertSuccessEquals(t, 32, FromHumanSize, "32.5 B")
assertSuccessEquals(t, 300, FromHumanSize, "0.3 K")
assertSuccessEquals(t, 300, FromHumanSize, ".3kB")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slightly on the fence if we want to accept .<number> (and <number>.) or not 🤔

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Came up with some additional test-cases (TBD if the valid/invalids are in the right one);

  • tests with leading/trailing whitespace (I think generally we should be "ok" with those, and trim before use)
  • tests with trailing . (should be an error)
  • tests with multiple (but valid) units, e.g. bb (looks like those aren't detected)
  • TBD: multiple spaces between number and unit (probably should be an error)
  • TBD: number with ., but no number following (should we accept those?)
func TestFromHumanSize(t *testing.T) {
	assertSuccessEquals(t, 0, FromHumanSize, " 0")
	assertSuccessEquals(t, 0, FromHumanSize, " 0b")
	assertSuccessEquals(t, 0, FromHumanSize, " 0B")
	assertSuccessEquals(t, 0, FromHumanSize, " 0 B")
	assertSuccessEquals(t, 0, FromHumanSize, "0 ")
	assertSuccessEquals(t, 0, FromHumanSize, "0b ")
	assertSuccessEquals(t, 0, FromHumanSize, "0B ")
	assertSuccessEquals(t, 0, FromHumanSize, "0 B ")
	assertSuccessEquals(t, 0, FromHumanSize, "0")
	assertSuccessEquals(t, 0, FromHumanSize, "0b")
	assertSuccessEquals(t, 0, FromHumanSize, "0B")
	assertSuccessEquals(t, 0, FromHumanSize, "0 B")
	assertSuccessEquals(t, 32, FromHumanSize, "32")
	assertSuccessEquals(t, 32, FromHumanSize, "32b")
	assertSuccessEquals(t, 32, FromHumanSize, "32B")
	assertSuccessEquals(t, 32, FromHumanSize, "32")
	assertSuccessEquals(t, 32*KB, FromHumanSize, "32k")
	assertSuccessEquals(t, 32*KB, FromHumanSize, "32K")
	assertSuccessEquals(t, 32*KB, FromHumanSize, "32kb")
	assertSuccessEquals(t, 32*KB, FromHumanSize, "32Kb")
	assertSuccessEquals(t, 32*MB, FromHumanSize, "32Mb")
	assertSuccessEquals(t, 32*GB, FromHumanSize, "32Gb")
	assertSuccessEquals(t, 32*TB, FromHumanSize, "32Tb")
	assertSuccessEquals(t, 32*PB, FromHumanSize, "32Pb")

	assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5kB")
	assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB")
	assertSuccessEquals(t, 32, FromHumanSize, "32.5 B")
	assertSuccessEquals(t, 300, FromHumanSize, "0.3 K")
	assertSuccessEquals(t, 300, FromHumanSize, ".3kB")

	assertError(t, FromHumanSize, "")
	assertError(t, FromHumanSize, ".")
	assertError(t, FromHumanSize, ". ")
	assertError(t, FromHumanSize, " ")
	assertError(t, FromHumanSize, "  ")
	assertError(t, FromHumanSize, " .")
	assertError(t, FromHumanSize, " . ")
	assertError(t, FromHumanSize, "0.")
	assertError(t, FromHumanSize, "0. ")
	assertError(t, FromHumanSize, "0.b")
	assertError(t, FromHumanSize, "0.B")
	assertError(t, FromHumanSize, "-0")
	assertError(t, FromHumanSize, "-0b")
	assertError(t, FromHumanSize, "-0B")
	assertError(t, FromHumanSize, "-0 b")
	assertError(t, FromHumanSize, "-0 B")
	assertError(t, FromHumanSize, "-32")
	assertError(t, FromHumanSize, "-32b")
	assertError(t, FromHumanSize, "-32B")
	assertError(t, FromHumanSize, "-32 b")
	assertError(t, FromHumanSize, "-32 B")
	assertError(t, FromHumanSize, "32.")
	assertError(t, FromHumanSize, "32.b")
	assertError(t, FromHumanSize, "32.B")
	assertError(t, FromHumanSize, "32. b")
	assertError(t, FromHumanSize, "32. B")
	assertError(t, FromHumanSize, "32b.")
	assertError(t, FromHumanSize, "32B.")
	assertError(t, FromHumanSize, "32 b.")
	assertError(t, FromHumanSize, "32 B.")
	assertError(t, FromHumanSize, "32 bb")
	assertError(t, FromHumanSize, "32 BB")
	assertError(t, FromHumanSize, "32 b b")
	assertError(t, FromHumanSize, "32 B B")
	assertError(t, FromHumanSize, "32  b")
	assertError(t, FromHumanSize, "32  B")
	assertError(t, FromHumanSize, "hello")
	assertError(t, FromHumanSize, "-32")
	assertError(t, FromHumanSize, " 32 ")
	assertError(t, FromHumanSize, "32m b")
	assertError(t, FromHumanSize, "32bm")
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, as to what should pass and what should fail, I think we should follow the rules of strconv.ParseFloat (plus our own way of handling suffixes). This is why I decided we should pass .3 as it's a valid floating point number in those programming languages/environments I am familiar with.

Adding your tests now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is why I decided we should pass .3 as it's a valid floating point number in those programming languages/environments I am familiar wit

Ah, gotcha, yes, that makes sense.

With the leading/trailing . I was a bit concerned about those coming from "idk" (accidental things), so was veering on the "safe side".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

	assertSuccessEquals(t, 0, FromHumanSize, " 0")
	assertSuccessEquals(t, 0, FromHumanSize, " 0b")
	assertSuccessEquals(t, 0, FromHumanSize, " 0B")
	assertSuccessEquals(t, 0, FromHumanSize, " 0 B")

Neither the old nor the new code tolerates extra leading or trailing space. We can certainly relax that, although my original intent was to replicate the functionality as-is (with the exception of accepting strings like ".33 GB" or "345. B", as described in the last commit).


assertSuccessEquals(t, 0, FromHumanSize, "0.")
assertSuccessEquals(t, 0, FromHumanSize, "0. ")
assertSuccessEquals(t, 0, FromHumanSize, "0.b")
assertSuccessEquals(t, 0, FromHumanSize, "0.B")
assertSuccessEquals(t, 0, FromHumanSize, "-0")
assertSuccessEquals(t, 0, FromHumanSize, "-0b")
assertSuccessEquals(t, 0, FromHumanSize, "-0B")
assertSuccessEquals(t, 0, FromHumanSize, "-0 b")
assertSuccessEquals(t, 0, FromHumanSize, "-0 B")
assertSuccessEquals(t, 32, FromHumanSize, "32.")
assertSuccessEquals(t, 32, FromHumanSize, "32.b")
assertSuccessEquals(t, 32, FromHumanSize, "32.B")
assertSuccessEquals(t, 32, FromHumanSize, "32. b")
assertSuccessEquals(t, 32, FromHumanSize, "32. B")

// We do not tolerate extra leading or trailing spaces
// (except for a space after the number and a missing suffix).
assertSuccessEquals(t, 0, FromHumanSize, "0 ")

assertError(t, FromHumanSize, " 0")
assertError(t, FromHumanSize, " 0b")
assertError(t, FromHumanSize, " 0B")
assertError(t, FromHumanSize, " 0 B")
assertError(t, FromHumanSize, "0b ")
assertError(t, FromHumanSize, "0B ")
assertError(t, FromHumanSize, "0 B ")

assertError(t, FromHumanSize, "")
assertError(t, FromHumanSize, "hello")
kolyshkin marked this conversation as resolved.
Show resolved Hide resolved
assertError(t, FromHumanSize, ".")
assertError(t, FromHumanSize, ". ")
assertError(t, FromHumanSize, " ")
assertError(t, FromHumanSize, " ")
assertError(t, FromHumanSize, " .")
assertError(t, FromHumanSize, " . ")
assertError(t, FromHumanSize, "-32")
assertError(t, FromHumanSize, ".3kB")
assertError(t, FromHumanSize, "-32b")
assertError(t, FromHumanSize, "-32B")
assertError(t, FromHumanSize, "-32 b")
assertError(t, FromHumanSize, "-32 B")
assertError(t, FromHumanSize, "32b.")
assertError(t, FromHumanSize, "32B.")
assertError(t, FromHumanSize, "32 b.")
assertError(t, FromHumanSize, "32 B.")
assertError(t, FromHumanSize, "32 bb")
assertError(t, FromHumanSize, "32 BB")
assertError(t, FromHumanSize, "32 b b")
assertError(t, FromHumanSize, "32 B B")
assertError(t, FromHumanSize, "32 b")
assertError(t, FromHumanSize, "32 B")
assertError(t, FromHumanSize, " 32 ")
assertError(t, FromHumanSize, "32m b")
assertError(t, FromHumanSize, "32bm")
Expand All @@ -128,6 +180,8 @@ func TestRAMInBytes(t *testing.T) {
assertSuccessEquals(t, 32, RAMInBytes, "32.3")
tmp := 32.3 * MiB
assertSuccessEquals(t, int64(tmp), RAMInBytes, "32.3 mb")
tmp = 0.3 * MiB
assertSuccessEquals(t, int64(tmp), RAMInBytes, "0.3MB")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be 0.3 <space> MB ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, never mind; expected is the first argument 😞 we should swap those; always confuses me if assertions use <expected> <actual>


assertError(t, RAMInBytes, "")
assertError(t, RAMInBytes, "hello")
Expand All @@ -137,7 +191,20 @@ func TestRAMInBytes(t *testing.T) {
assertError(t, RAMInBytes, "32bm")
}

func BenchmarkParseSize(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, s := range []string{
"", "32", "32b", "32 B", "32k", "32.5 K", "32kb", "32 Kb",
"32.8Mb", "32.9Gb", "32.777Tb", "32Pb", "0.3Mb", "-1",
} {
FromHumanSize(s)
RAMInBytes(s)
}
}
}

func assertEquals(t *testing.T, expected, actual interface{}) {
t.Helper()
if expected != actual {
t.Errorf("Expected '%v' but got '%v'", expected, actual)
}
Expand All @@ -153,13 +220,15 @@ func (fn parseFn) String() string {
}

func assertSuccessEquals(t *testing.T, expected int64, fn parseFn, arg string) {
t.Helper()
res, err := fn(arg)
if err != nil || res != expected {
t.Errorf("%s(\"%s\") -> expected '%d' but got '%d' with error '%v'", fn, arg, expected, res, err)
}
}

func assertError(t *testing.T, fn parseFn, arg string) {
t.Helper()
res, err := fn(arg)
if err == nil && res != -1 {
t.Errorf("%s(\"%s\") -> expected error but got '%d'", fn, arg, res)
Expand Down