diff --git a/tests/lint/pylint.sh b/tests/lint/pylint.sh index 61ffb0fd9254..0ead015f9350 100755 --- a/tests/lint/pylint.sh +++ b/tests/lint/pylint.sh @@ -24,3 +24,11 @@ python3 -m pylint tests/python/contrib/test_cmsisnn --rcfile="$(dirname "$0")"/p python3 -m pylint tests/python/relay/aot/*.py --rcfile="$(dirname "$0")"/pylintrc python3 -m pylint tests/python/ci --rcfile="$(dirname "$0")"/pylintrc python3 -m pylint tests/python/integration/ --rcfile="$(dirname "$0")"/pylintrc + +# tests/python/contrib/test_hexagon tests +python3 -m pylint tests/python/contrib/test_hexagon/benchmark_util.py --rcfile="$(dirname "$0")"/pylintrc +python3 -m pylint tests/python/contrib/test_hexagon/conftest.py --rcfile="$(dirname "$0")"/pylintrc +python3 -m pylint tests/python/contrib/test_hexagon/conv2d/test_conv2d_blocked.py --rcfile="$(dirname "$0")"/pylintrc +python3 -m pylint tests/python/contrib/test_hexagon/conv2d/test_conv2d_conv2d.py --rcfile="$(dirname "$0")"/pylintrc +python3 -m pylint tests/python/contrib/test_hexagon/infrastructure.py --rcfile="$(dirname "$0")"/pylintrc +python3 -m pylint tests/python/contrib/test_hexagon/test_2d_physical_buffers.py --rcfile="$(dirname "$0")"/pylintrc diff --git a/tests/python/contrib/test_hexagon/benchmark_util.py b/tests/python/contrib/test_hexagon/benchmark_util.py index e581c3d55d21..0ded60dc498b 100644 --- a/tests/python/contrib/test_hexagon/benchmark_util.py +++ b/tests/python/contrib/test_hexagon/benchmark_util.py @@ -15,11 +15,13 @@ # specific language governing permissions and limitations # under the License. +""" Utility functions used for benchmarks """ + import csv import os -import pytest import tempfile -import collections + +import pytest def skip_bencharks_flag_and_reason(): @@ -37,22 +39,8 @@ def skip_bencharks_flag_and_reason(): if asn == "simulator": return (True, "Skipping benchmarks when ANDROID_SERIAL_NUMBER='simluator'") - else: - return (False, "") - -class UnsupportedException(Exception): - """ - Indicates that the specified benchmarking configuration is known to - currently be unsupported. The Exception message may provide more detail. - """ - - -class NumericalAccuracyException(Exception): - """ - Indicates that the benchmarking configuration appeared to run successfully, - but the output data didn't have the expected accuracy. - """ + return (False, "") class UnsupportedException(Exception): @@ -183,9 +171,10 @@ def print_csv(self, f, column_name_order, timing_decimal_places=3): ]: if col_name in csv_line_dict: old_value = csv_line_dict[col_name] - assert isinstance( - old_value, float - ), f"Formatting code assumes that column {col_name} is some col_nameind of float, but its actual type is {type(old_value)}" + assert isinstance(old_value, float), ( + f"Formatting code assumes that column {col_name} is" + f" some col_nameind of float, but its actual type is {type(old_value)}" + ) str_value = f"{old_value:>0.{timing_decimal_places}f}" csv_line_dict[col_name] = str_value @@ -204,16 +193,16 @@ def get_benchmark_id(keys_dict): Note that the insertion order for `keys_dict` affects the computed name. """ # Creat a copy, because we might be modifying it. - d = dict(keys_dict) + keys_dict_copy = dict(keys_dict) # Sniff for shape-like lists, because we want them in a form that's both # readable and filesystem-friendly... - for k, v in d.items(): - if isinstance(v, list) or isinstance(v, tuple): - v2 = "_".join([str(x) for x in v]) - d[k] = v2 + for k, v in keys_dict_copy.items(): + if isinstance(v, (list, tuple)): + v_str = "_".join([str(x) for x in v]) + keys_dict_copy[k] = v_str - return "-".join([f"{k}:{v}" for k, v in d.items()]) + return "-".join([f"{k}:{v}" for k, v in keys_dict_copy.items()]) def get_benchmark_decription(keys_dict): @@ -226,44 +215,44 @@ def get_benchmark_decription(keys_dict): return " ".join([f"{k}={v}" for k, v in keys_dict.items()]) -# This fixture provides some initialization / finalization logic for groups of related -# benchmark runs. -# See the fixture implementation below for details. -# -# The fixture's mechanics are described here: https://stackoverflow.com/a/63047695 -# -# TODO: There may be cleaner ways to let each class that uses this fixture provide its -# own value for `csv_column_order`. -# -# TODO: In the future we may wish to break this fixture up in to several smaller ones. -# -# The overall contract for a class (e.g. `MyTest`) using this fixture is as follows: -# -# https://stackoverflow.com/a/63047695 -# -# @pytest.mark.usefixtures("benchmark_group") -# class MyTest: -# -# # The fixture requires that this class variable is defined before -# # the fixture's finalizer-logic executes. -# # -# # This is used as an argument to BenchmarkTable.print_csv(...) after -# # all of MyTest's unit tests have executed. -# csv_column_order = [ -# ... -# ] -# -# # Before the MyTest's first unit test executes, the fixture will populate the -# # following class variables: -# MyTest.working_dir : str -# MyTest.benchmark_table : BenchmarkTable @pytest.fixture(scope="class") def benchmark_group(request): + """This fixture provides some initialization / finalization logic for groups of related + benchmark runs. + See the fixture implementation below for details. + + The fixture's mechanics are described here: https://stackoverflow.com/a/63047695 + + TODO: There may be cleaner ways to let each class that uses this fixture provide its + own value for `csv_column_order`. + + TODO: In the future we may wish to break this fixture up in to several smaller ones. + + The overall contract for a class (e.g. `MyTest`) using this fixture is as follows: + + https://stackoverflow.com/a/63047695 + + @pytest.mark.usefixtures("benchmark_group") + class MyTest: + + # The fixture requires that this class variable is defined before + # the fixture's finalizer-logic executes. + # + # This is used as an argument to BenchmarkTable.print_csv(...) after + # all of MyTest's unit tests have executed. + csv_column_order = [ + ... + ] + + # Before the MyTest's first unit test executes, the fixture will populate the + # following class variables: + MyTest.working_dir : str + MyTest.benchmark_table : BenchmarkTable""" working_dir = tempfile.mkdtemp() - bt = BenchmarksTable() + table = BenchmarksTable() request.cls.working_dir = working_dir - request.cls.benchmark_table = bt + request.cls.benchmark_table = table yield @@ -272,8 +261,8 @@ def benchmark_group(request): if not hasattr(request.cls, "csv_column_order"): raise Exception('Classes using this fixture must have a member named "csv_column_order"') - with open(tabular_output_filename, "w") as csv_file: - bt.print_csv(csv_file, request.cls.csv_column_order) + with open(tabular_output_filename, "w", encoding="UTF-8") as csv_file: + table.print_csv(csv_file, request.cls.csv_column_order) print() print("*" * 80) @@ -281,5 +270,5 @@ def benchmark_group(request): print("*" * 80) print() - if bt.has_fail() > 0: + if table.has_fail() > 0: pytest.fail("At least one benchmark configuration failed", pytrace=False) diff --git a/tests/python/contrib/test_hexagon/conftest.py b/tests/python/contrib/test_hexagon/conftest.py index 3b057384df37..52dc146db2f4 100644 --- a/tests/python/contrib/test_hexagon/conftest.py +++ b/tests/python/contrib/test_hexagon/conftest.py @@ -18,12 +18,8 @@ """ Hexagon testing fixtures used to deduce testing argument values from testing parameters """ - -import pytest - -import tvm -import tvm.testing - +# Disabling invalid-name check as the name is expected to be exactly this by pytest +# pylint: disable=invalid-name pytest_plugins = [ "tvm.contrib.hexagon.pytest_plugin", ] diff --git a/tests/python/contrib/test_hexagon/conv2d/test_conv2d_blocked.py b/tests/python/contrib/test_hexagon/conv2d/test_conv2d_blocked.py index c5df89b315b0..07f6c2613dbc 100644 --- a/tests/python/contrib/test_hexagon/conv2d/test_conv2d_blocked.py +++ b/tests/python/contrib/test_hexagon/conv2d/test_conv2d_blocked.py @@ -15,13 +15,13 @@ # specific language governing permissions and limitations # under the License. -import sys +""" Hexagon contrib tests for blocked conv2d """ -import platform + +import numpy as np import tvm import tvm.testing -from tvm import te -from tvm import topi +from tvm import te, topi from tvm.topi import testing from ..infrastructure import ( @@ -33,9 +33,6 @@ get_packed_shape, ) -import numpy as np -import pytest - def conv2d_nhwc8h8w32c( shape_input, @@ -57,72 +54,84 @@ def conv2d_nhwc8h8w32c( """ # nhwc layout - X = te.placeholder(shape_input, dtype=dtype, name="logical_input") + logical_input = te.placeholder(shape_input, dtype=dtype, name="logical_input") # oihw8i32o4i layout filt_packed = te.placeholder(shape_filter, dtype=dtype, name="packed_filter") - block_H, block_W, block_C = get_block_shape() + block_h, block_w, block_c = get_block_shape() # Calculate padded input - N, H, W, C = shape_input - pad_h = (block_H - ((H + pad[1]) % block_H)) % block_H - pad_w = (block_W - ((W + pad[3]) % block_W)) % block_W - X_pad = topi.nn.pad( - X, [0, pad[0], pad[2], 0], [0, pad_h, pad_w, 0], pad_value=0, name="padded_input" + _, height, width, _ = shape_input + pad_h = (block_h - ((height + pad[1]) % block_h)) % block_h + pad_w = (block_w - ((width + pad[3]) % block_w)) % block_w + padded_input = topi.nn.pad( + logical_input, + [0, pad[0], pad[2], 0], + [0, pad_h, pad_w, 0], + pad_value=0, + name="padded_input", ) # Calculate packed input - packed_shape = get_packed_shape(X_pad.shape) - X_packed = te.compute( + packed_shape = get_packed_shape(padded_input.shape) + packed_input = te.compute( packed_shape, - lambda n, ho, wo, co, hi, wi, ci: X_pad[ - n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci + lambda n, ho, wo, co, hi, wi, ci: padded_input[ + n, ho * block_h + hi, wo * block_w + wi, co * block_c + ci ], name="packed_input", ) - output_shape, compute = conv2d_compute(X_packed, filt_packed, pad, stride, dilation) - Y = te.compute(output_shape, compute, name="packed_output") - s = te.create_schedule(Y.op) + output_shape, compute = conv2d_compute(packed_input, filt_packed, pad, stride, dilation) + packed_output = te.compute(output_shape, compute, name="packed_output") + s = te.create_schedule(packed_output.op) # Ensure the padding and array packing is performed inline - s[X_pad].compute_inline() - s[X_packed].compute_inline() + s[padded_input].compute_inline() + s[packed_input].compute_inline() # cache reads and writes - Xl = s.cache_read(X_packed, storage_scope, [Y]) - Fl = s.cache_read(filt_packed, storage_scope, [Y]) - Yl = s.cache_write(Y, storage_scope) + cached_input = s.cache_read(packed_input, storage_scope, [packed_output]) + cached_filt = s.cache_read(filt_packed, storage_scope, [packed_output]) + cached_output = s.cache_write(packed_output, storage_scope) # cache write schedule - n, ho, wo, ko, hi, wi, ki = s[Y].op.axis - koo, koi = s[Y].split(ko, factor=k_split_factor) - hoo, hoi = s[Y].split(ho, factor=h_split_factor) - s[Y].reorder(n, koo, hoo, koi, hoi, wo, hi, wi, ki) - s[Yl].compute_at(s[Y], hoo) + batch, h_outer, w_outer, k_outer, h_inner, w_inner, k_inner = s[packed_output].op.axis + koo, koi = s[packed_output].split(k_outer, factor=k_split_factor) + hoo, hoi = s[packed_output].split(h_outer, factor=h_split_factor) + s[packed_output].reorder(batch, koo, hoo, koi, hoi, w_outer, h_inner, w_inner, k_inner) + s[cached_output].compute_at(s[packed_output], hoo) # compute schedule - n, ho, wo, ko, hi, wi, ki = s[Yl].op.axis - rh, rw, rc = s[Yl].op.reduce_axis - rco, rci = s[Yl].split(rc, factor=block_C) - koo, koi = s[Yl].split(ko, factor=k_split_factor) - hoo, hoi = s[Yl].split(ho, factor=h_split_factor) - s[Yl].reorder(n, koo, hoo, koi, hoi, wo, rco, hi, wi, ki, rci) - s[Xl].compute_at(s[Yl], hoo) - s[Fl].compute_at(s[Yl], hoo) + batch, h_outer, w_outer, k_outer, h_inner, w_inner, k_inner = s[cached_output].op.axis + _, _, reduce_c = s[cached_output].op.reduce_axis + rco, rci = s[cached_output].split(reduce_c, factor=block_c) + koo, koi = s[cached_output].split(k_outer, factor=k_split_factor) + hoo, hoi = s[cached_output].split(h_outer, factor=h_split_factor) + s[cached_output].reorder( + batch, koo, hoo, koi, hoi, w_outer, rco, h_inner, w_inner, k_inner, rci + ) + s[cached_input].compute_at(s[cached_output], hoo) + s[cached_filt].compute_at(s[cached_output], hoo) binds = {} if storage_scope and storage_scope != "global": with tvm.transform.PassContext(): - Xb = tvm.tir.decl_buffer(packed_shape, name="Xb", dtype=dtype, scope=storage_scope) - Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope) - binds = {X: Xb, Y: Yb} + input_buffer = tvm.tir.decl_buffer( + packed_shape, name="Xb", dtype=dtype, scope=storage_scope + ) + output_buffer = tvm.tir.decl_buffer( + output_shape, name="Yb", dtype=dtype, scope=storage_scope + ) + binds = {logical_input: input_buffer, packed_output: output_buffer} - return (s, [X, filt_packed, Y], binds) + return (s, [logical_input, filt_packed, packed_output], binds) class BaseConv2d: + """Base class for conv2d tests""" + # input batch = tvm.testing.parameter(1) in_size = tvm.testing.parameter(64) @@ -139,6 +148,8 @@ class BaseConv2d: class TestConv2dPackedFilter(BaseConv2d): + """Conv2d packed filter test class""" + @tvm.testing.parametrize_targets("llvm") @tvm.testing.skip_if_32bit(reason="Test known to be flaky on i386 machines") def test_conv2d( @@ -155,6 +166,7 @@ def test_conv2d( dtype, target, ): + """conv2d test""" # TODO: no support for dilation dilation = 1 diff --git a/tests/python/contrib/test_hexagon/conv2d/test_conv2d_conv2d.py b/tests/python/contrib/test_hexagon/conv2d/test_conv2d_conv2d.py index 460c824c7037..fa770c9be313 100644 --- a/tests/python/contrib/test_hexagon/conv2d/test_conv2d_conv2d.py +++ b/tests/python/contrib/test_hexagon/conv2d/test_conv2d_conv2d.py @@ -15,13 +15,13 @@ # specific language governing permissions and limitations # under the License. -import sys +""" back-to-back conv2d Hexagon test for stripe scheduling """ -import platform + +import numpy as np import tvm import tvm.testing -from tvm import te -from tvm import topi +from tvm import te, topi from tvm.topi import testing from ..infrastructure import ( @@ -33,9 +33,6 @@ get_packed_shape, ) -import numpy as np -import pytest - def conv2dconv2d_nhwc8h8w32c( shape_input, @@ -61,87 +58,99 @@ def conv2dconv2d_nhwc8h8w32c( """ # nhwc layout - X = te.placeholder(shape_input, dtype=dtype, name="logical_input") + logical_input = te.placeholder(shape_input, dtype=dtype, name="logical_input") # oihw8i32o4i layout filt_packed1 = te.placeholder(shape_filter1, dtype=dtype, name="packed_filter1") filt_packed2 = te.placeholder(shape_filter2, dtype=dtype, name="packed_filter2") - block_H, block_W, block_C = get_block_shape() + block_h, block_w, block_c = get_block_shape() # Calculate padded input - N, H, W, C = shape_input - pad_h = (block_H - ((H + pad1[1]) % block_H)) % block_H - pad_w = (block_W - ((W + pad1[3]) % block_W)) % block_W - X_pad = topi.nn.pad( - X, [0, pad1[0], pad1[2], 0], [0, pad_h, pad_w, 0], pad_value=0, name="padded_input" + _, height, width, _ = shape_input + pad_h = (block_h - ((height + pad1[1]) % block_h)) % block_h + pad_w = (block_w - ((width + pad1[3]) % block_w)) % block_w + padded_input = topi.nn.pad( + logical_input, + [0, pad1[0], pad1[2], 0], + [0, pad_h, pad_w, 0], + pad_value=0, + name="padded_input", ) # Calculate packed input - packed_shape = get_packed_shape(X_pad.shape) - X_packed = te.compute( + packed_shape = get_packed_shape(padded_input.shape) + packed_input = te.compute( packed_shape, - lambda n, ho, wo, co, hi, wi, ci: X_pad[ - n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci + lambda n, ho, wo, co, hi, wi, ci: padded_input[ + n, ho * block_h + hi, wo * block_w + wi, co * block_c + ci ], name="packed_input", ) - output_shape1, compute1 = conv2d_compute(X_packed, filt_packed1, pad1, stride1, dilation1) - temp_Y = te.compute(output_shape1, compute1, name="temp_output") + output_shape1, compute1 = conv2d_compute(packed_input, filt_packed1, pad1, stride1, dilation1) + temp_output = te.compute(output_shape1, compute1, name="temp_output") - output_shape2, compute2 = conv2d_compute(temp_Y, filt_packed2, pad2, stride2, dilation2) - Y = te.compute(output_shape2, compute2, name="packed_output") - s = te.create_schedule(Y.op) + output_shape2, compute2 = conv2d_compute(temp_output, filt_packed2, pad2, stride2, dilation2) + packed_output = te.compute(output_shape2, compute2, name="packed_output") + s = te.create_schedule(packed_output.op) # Ensure the padding and array packing is performed inline - s[X_pad].compute_inline() - s[X_packed].compute_inline() + s[padded_input].compute_inline() + s[packed_input].compute_inline() # cache reads and writes - Xl = s.cache_read(X_packed, storage_scope, [temp_Y]) - F1l = s.cache_read(filt_packed1, storage_scope, [temp_Y]) - F2l = s.cache_read(filt_packed2, storage_scope, [Y]) - Yl = s.cache_write(Y, storage_scope) + packed_input_cached = s.cache_read(packed_input, storage_scope, [temp_output]) + filt_packed1_cached = s.cache_read(filt_packed1, storage_scope, [temp_output]) + filt_packed2_cached = s.cache_read(filt_packed2, storage_scope, [packed_output]) + packed_output_cached = s.cache_write(packed_output, storage_scope) # conv2d #1 schedule - n, ho, wo, ko, hi, wi, ki = s[temp_Y].op.axis - rh, rw, rc = s[temp_Y].op.reduce_axis - rco, rci = s[temp_Y].split(rc, factor=block_C) - koo, koi = s[temp_Y].split(ko, factor=k_split_factor) - hoo, hoi = s[temp_Y].split(ho, factor=h_split_factor) - s[temp_Y].reorder(n, koo, hoo, koi, hoi, wo, rco, hi, wi, ki, rci) - s[Xl].compute_at(s[temp_Y], hoo) - s[F1l].compute_at(s[temp_Y], hoo) + n, h_outer, w_outer, k_outer, h_inner, w_inner, k_inner = s[temp_output].op.axis + _, _, reduce_channel = s[temp_output].op.reduce_axis + rco, rci = s[temp_output].split(reduce_channel, factor=block_c) + koo, koi = s[temp_output].split(k_outer, factor=k_split_factor) + hoo, hoi = s[temp_output].split(h_outer, factor=h_split_factor) + s[temp_output].reorder(n, koo, hoo, koi, hoi, w_outer, rco, h_inner, w_inner, k_inner, rci) + s[packed_input_cached].compute_at(s[temp_output], hoo) + s[filt_packed1_cached].compute_at(s[temp_output], hoo) # cache write schedule - n, ho, wo, ko, hi, wi, ki = s[Y].op.axis - koo, koi = s[Y].split(ko, factor=k_split_factor) - hoo, hoi = s[Y].split(ho, factor=h_split_factor) - s[Y].reorder(n, koo, hoo, koi, hoi, wo, hi, wi, ki) - s[Yl].compute_at(s[Y], hoo) + n, h_outer, w_outer, k_outer, h_inner, w_inner, k_inner = s[packed_output].op.axis + koo, koi = s[packed_output].split(k_outer, factor=k_split_factor) + hoo, hoi = s[packed_output].split(h_outer, factor=h_split_factor) + s[packed_output].reorder(n, koo, hoo, koi, hoi, w_outer, h_inner, w_inner, k_inner) + s[packed_output_cached].compute_at(s[packed_output], hoo) # conv2d #2 schedule - n, ho, wo, ko, hi, wi, ki = s[Yl].op.axis - rh, rw, rc = s[Yl].op.reduce_axis - rco, rci = s[Yl].split(rc, factor=block_C) - koo, koi = s[Yl].split(ko, factor=k_split_factor) - hoo, hoi = s[Yl].split(ho, factor=h_split_factor) - s[Yl].reorder(n, koo, hoo, koi, hoi, wo, rco, hi, wi, ki, rci) - s[temp_Y].compute_at(s[Yl], hoo) - s[F2l].compute_at(s[Yl], hoo) + n, h_outer, w_outer, k_outer, h_inner, w_inner, k_inner = s[packed_output_cached].op.axis + _, _, reduce_channel = s[packed_output_cached].op.reduce_axis + rco, rci = s[packed_output_cached].split(reduce_channel, factor=block_c) + koo, koi = s[packed_output_cached].split(k_outer, factor=k_split_factor) + hoo, hoi = s[packed_output_cached].split(h_outer, factor=h_split_factor) + s[packed_output_cached].reorder( + n, koo, hoo, koi, hoi, w_outer, rco, h_inner, w_inner, k_inner, rci + ) + s[temp_output].compute_at(s[packed_output_cached], hoo) + s[filt_packed2_cached].compute_at(s[packed_output_cached], hoo) binds = {} if storage_scope and storage_scope != "global": with tvm.transform.PassContext(): - Xb = tvm.tir.decl_buffer(packed_shape, name="Xb", dtype=dtype, scope=storage_scope) - Yb = tvm.tir.decl_buffer(output_shape2, name="Yb", dtype=dtype, scope=storage_scope) - binds = {X: Xb, Y: Yb} + input_buffer = tvm.tir.decl_buffer( + packed_shape, name="Xb", dtype=dtype, scope=storage_scope + ) + output_buffer = tvm.tir.decl_buffer( + output_shape2, name="Yb", dtype=dtype, scope=storage_scope + ) + binds = {logical_input: input_buffer, packed_output: output_buffer} - return (s, [X, filt_packed1, filt_packed2, Y], binds) + return (s, [logical_input, filt_packed1, filt_packed2, packed_output], binds) class BaseConv2dConv2d: + """Base class for conv2d-conv2d tests""" + # input batch = tvm.testing.parameter(1) in_size = tvm.testing.parameter(64) @@ -162,6 +171,8 @@ class BaseConv2dConv2d: class TestConv2dConv2dPackedFilter(BaseConv2dConv2d): + """Conv2d-Conv2d packed filter test class""" + @tvm.testing.parametrize_targets("llvm") @tvm.testing.skip_if_32bit(reason="Test known to be flaky on i386 machines") def test_conv2d( @@ -181,6 +192,7 @@ def test_conv2d( dtype, target, ): + """conv2d-conv2d test""" # TODO: no support for padding in conv2d #2 pad2 = 0 diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py index a1fbfdefcdbd..ecd0504ecb1e 100644 --- a/tests/python/contrib/test_hexagon/infrastructure.py +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -18,14 +18,18 @@ """ Hexagon testing infrastructure """ +import numpy import tvm from tvm import te -import numpy def allocate_hexagon_array( dev, tensor_shape=None, dtype=None, data=None, axis_separators=None, mem_scope=None ): + """ + Allocate a hexagon array which could be a 2D array + on physical memory defined by axis_separators + """ if tensor_shape is None: assert data is not None, "Must provide either tensor shape or numpy data array" tensor_shape = data.shape @@ -98,9 +102,19 @@ def get_logical_shape(physical_shape_nhwc8h8w32c): return logical_shape_nhwc -# input: logical shape in oihw layout -# output: physical packed shape in oihw8i3204i layout def get_packed_filter_shape(logical_shape_oihw): + """return packed filter shape + + Parameters + ---------- + logical_shape_oihw : + logical shape in oihw layout + + Returns + ------- + physical_shape_oihw8i32o4i : + physical packed shape in oihw8i3204i layout + """ assert len(logical_shape_oihw) == 4 filter_block_shape = get_filter_block_shape() filter_Cio, filter_Ki, filter_Cii = filter_block_shape @@ -115,6 +129,7 @@ def get_packed_filter_shape(logical_shape_oihw): def build_and_run(inputs, func, target, target_host, *args, **kwargs): + """build and run the function func""" schedule, placeholders, binds = func(*args, **kwargs) func = tvm.build( @@ -149,6 +164,7 @@ def get_conv2d_nhwc_shape(shape_nhwc, kernel_size, strides, padding, dilation, o def conv2d_verify(output, ref_output, dtype): + """transpose and reshape output and compare with ref_output""" # nhwc8h8w32c -> nhwc logical_output_shape = get_logical_shape(output.shape) output = output.transpose(0, 1, 4, 2, 5, 3, 6).reshape(logical_output_shape) @@ -171,10 +187,11 @@ def conv2d_verify(output, ref_output, dtype): def conv2d_compute(X, filt, pad, stride, dilation): + """Define conv2d compute""" block_shape = get_block_shape() block_H, block_W, block_C = block_shape - filter_Cio, filter_Ki, filter_Cii = get_filter_block_shape() - filter_Ci = filter_Cio * filter_Cii + filter_c_io, _, filter_c_ii = get_filter_block_shape() + filter_c_i = filter_c_io * filter_c_ii shape_filter = filt.shape kernel_size = tuple(shape_filter[2:4]) @@ -191,7 +208,6 @@ def conv2d_compute(X, filt, pad, stride, dilation): ) output_shape = get_packed_shape(logical_output_shape) - n, ho, wo, ko, hi, wi, ki = output_shape rh = te.reduce_axis((0, kernel_size[0]), name="rh") rw = te.reduce_axis((0, kernel_size[1]), name="rw") rc = te.reduce_axis((0, logical_input_shape[3]), name="rc") @@ -210,9 +226,9 @@ def compute(n, ho, wo, ko, hi, wi, ki): c_block_id = rc // block_C c_block_offset = rc % block_C - rco = rc // filter_Ci - rcio = (rc % filter_Ci) // filter_Cii - rcii = rc % filter_Cii + rco = rc // filter_c_i + rcio = (rc % filter_c_i) // filter_c_ii + rcii = rc % filter_c_ii return te.sum( X[ diff --git a/tests/python/contrib/test_hexagon/test_2d_physical_buffers.py b/tests/python/contrib/test_hexagon/test_2d_physical_buffers.py index e9fd24656495..cebb36edc35d 100644 --- a/tests/python/contrib/test_hexagon/test_2d_physical_buffers.py +++ b/tests/python/contrib/test_hexagon/test_2d_physical_buffers.py @@ -17,24 +17,29 @@ # specific language governing permissions and limitations # under the License. +""" Test 2d physical buffers """ + import contextlib -import sys -import pytest import numpy as np - +import pytest import tvm + +# Needed to register the link_shared packedfunc. +import tvm.contrib.hexagon import tvm.testing from tvm import te +from tvm.contrib.hexagon.pytest_plugin import requires_hexagon_toolchain from tvm.tir.stmt_functor import post_order_visit -from tvm.contrib.hexagon.build import HexagonLauncher -from tvm.contrib.hexagon.pytest_plugin import requires_hexagon_toolchain from .infrastructure import allocate_hexagon_array -# Needed to register the link_shared packedfunc. -import tvm.contrib.hexagon - +# Disabling invalid name as pylint assumes global variables as constants and +# expects them to be all upper-case. Since these are used as +# tvm.testing.parameters, if they are made upper-case, the functions which take +# them as arguments would also need to be upper-case, and pylint would complain +# there as well +# pylint: disable=invalid-name dtype = tvm.testing.parameter("int8") batch_size = tvm.testing.parameter( @@ -68,9 +73,12 @@ ("nchw-8h8w32c-2d", "global.vtcm"), ) +# pylint: enable=invalid-name + @tvm.testing.fixture def target_host(target): + """Return tvm target.Target with host attached""" target = tvm.target.Target(target) if target.kind.name == "hexagon": @@ -84,6 +92,12 @@ def target_host(target): return tvm.target.Target(target, host=host) +# Disabling redefined-outer-name for the whole file as there isn't any easy +# solution yet to refactor tvm.testing.fixture fixtures that avoid redefining +# outer variable names +# pylint: disable=redefined-outer-name + + @tvm.testing.fixture def input_shape(batch_size, input_channels, input_image_shape): return [batch_size, *input_image_shape, input_channels] @@ -92,21 +106,21 @@ def input_shape(batch_size, input_channels, input_image_shape): def transform_shape(shape, layout): if layout == "nhwc": return shape - elif layout in ["nchw-8h8w32c-1d", "nchw-8h8w32c-2d"]: - N, H, W, C = shape - return [N, (C + 31) // 32, (H + 7) // 8, (W + 7) // 8, 8, 8, 32] - else: - raise RuntimeError(f"Unexpected layout '{layout}'") + if layout in ["nchw-8h8w32c-1d", "nchw-8h8w32c-2d"]: + batch, height, width, channel = shape + return [batch, (channel + 31) // 32, (height + 7) // 8, (width + 7) // 8, 8, 8, 32] + raise RuntimeError(f"Unexpected layout '{layout}'") def transform_numpy(arr_np, layout): if layout == "nhwc": return arr_np - elif layout in ["nchw-8h8w32c-1d", "nchw-8h8w32c-2d"]: - N, H, W, C = arr_np.shape - return arr_np.reshape([N, H // 8, 8, W // 8, 8, C // 32, 32]).transpose(0, 5, 1, 3, 2, 4, 6) - else: - raise RuntimeError(f"Unexpected layout '{layout}'") + if layout in ["nchw-8h8w32c-1d", "nchw-8h8w32c-2d"]: + batch, height, width, channel = arr_np.shape + return arr_np.reshape([batch, height // 8, 8, width // 8, 8, channel // 32, 32]).transpose( + 0, 5, 1, 3, 2, 4, 6 + ) + raise RuntimeError(f"Unexpected layout '{layout}'") @tvm.testing.fixture @@ -134,28 +148,28 @@ def transformed_expected_output_np(expected_output_np, output_layout): return transform_numpy(expected_output_np, output_layout) -def layout_transform_1d(n, h, w, c): +def layout_transform_1d(batch, height, width, channel): return [ - n, - c // 32, - h // 8, - w // 8, - h % 8, - w % 8, - c % 32, + batch, + channel // 32, + height // 8, + width // 8, + height % 8, + width % 8, + channel % 32, ] -def layout_transform_2d(n, h, w, c): +def layout_transform_2d(batch, height, width, channel): return [ - n, - c // 32, - h // 8, - w // 8, + batch, + channel // 32, + height // 8, + width // 8, te.AXIS_SEPARATOR, - h % 8, - w % 8, - c % 32, + height % 8, + width % 8, + channel % 32, ] @@ -171,6 +185,8 @@ def visitor(node): class TestElementWise: + """TestElementWise""" + @tvm.testing.fixture def expected_output_np(self, input_np): return 2 * input_np @@ -189,35 +205,35 @@ def schedule_args( working_layout, working_scope, ): - InputTensor = te.placeholder(input_shape, dtype, name="Input") - OutputTensor = te.compute( - shape=InputTensor.shape, - fcompute=lambda *indices: (2 * InputTensor[indices]).astype(dtype), + """Create and return the schedule and input args after applying layout transform""" + input_tensor = te.placeholder(input_shape, dtype, name="Input") + output_tensor = te.compute( + shape=input_tensor.shape, + fcompute=lambda *indices: (2 * input_tensor[indices]).astype(dtype), name="Output", ) - schedule = te.create_schedule(OutputTensor.op) + schedule = te.create_schedule(output_tensor.op) - WriteCache = schedule.cache_write(OutputTensor, working_scope) - ReadCache = schedule.cache_read(InputTensor, working_scope, [WriteCache]) + write_cache = schedule.cache_write(output_tensor, working_scope) + read_cache = schedule.cache_read(input_tensor, working_scope, [write_cache]) def apply_transform(tensor, layout): if layout == "nhwc": - pass - elif layout == "nchw-8h8w32c-1d": + return None + if layout == "nchw-8h8w32c-1d": return schedule[tensor].transform_layout(layout_transform_1d) - elif layout == "nchw-8h8w32c-2d": + if layout == "nchw-8h8w32c-2d": return schedule[tensor].transform_layout(layout_transform_2d) - else: - raise RuntimeError(f"Unexpected layout '{layout}'") + raise RuntimeError(f"Unexpected layout '{layout}'") - apply_transform(InputTensor, input_layout) - compute_loopnest = apply_transform(OutputTensor, output_layout) or OutputTensor.op.axis - schedule[WriteCache].compute_at(schedule[OutputTensor], compute_loopnest[0]) + apply_transform(input_tensor, input_layout) + compute_loopnest = apply_transform(output_tensor, output_layout) or output_tensor.op.axis + schedule[write_cache].compute_at(schedule[output_tensor], compute_loopnest[0]) - apply_transform(ReadCache, working_layout) - apply_transform(WriteCache, working_layout) + apply_transform(read_cache, working_layout) + apply_transform(write_cache, working_layout) - return [schedule, [InputTensor, OutputTensor]] + return [schedule, [input_tensor, output_tensor]] @tvm.testing.fixture def ir_module(self, schedule_args): @@ -229,7 +245,7 @@ def ir_module(self, schedule_args): return tvm.lower(*schedule_args) @tvm.testing.fixture - def uses_unsupported_physical_dimensions( + def uses_unsupported_physical_dimensions( # pylint: disable=invalid-name self, target_host, input_layout, working_layout, output_layout ): uses_2d_memory = "nchw-8h8w32c-2d" in [input_layout, working_layout, output_layout] @@ -246,6 +262,7 @@ def test_param_shapes(self, ir_module, transformed_input_shape, transformed_outp assert primfunc_output_shape == transformed_output_shape def test_cache_shape(self, ir_module, input_layout, working_layout, output_layout): + """Test function to check expected_physical_dimensions for cached buffers""" func = ir_module["main"] for buffer in extract_buffers(func.body): buffer_layout = { @@ -306,6 +323,7 @@ def test_execute( output_layout, hexagon_session, ): + """Test execution of computes with 2d physical buffers""" if input_layout == "nchw-8h8w32c-2d": input_axis_separators = [4] else: diff --git a/tests/python/contrib/test_hexagon/test_benchmark_elemwise_add.py b/tests/python/contrib/test_hexagon/test_benchmark_elemwise_add.py index f7f5f3e176e4..b15219ebc00e 100644 --- a/tests/python/contrib/test_hexagon/test_benchmark_elemwise_add.py +++ b/tests/python/contrib/test_hexagon/test_benchmark_elemwise_add.py @@ -14,20 +14,20 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +""" benchmark_elemwise_add """ import os import os.path import sys -import pytest -import numpy as np -import logging import tempfile -import tvm.testing +import numpy as np +import pytest import tvm.script -from tvm.script import tir as T -from tvm import te +import tvm.testing from tvm.contrib.hexagon.build import HexagonLauncherRPC +from tvm.script import tir as T + from . import benchmark_util as bu _SHOULD_SKIP_BENCHMARKS, _SKIP_BENCHMARKS_REASON = bu.skip_bencharks_flag_and_reason() @@ -90,12 +90,8 @@ print("-" * 80) print() -from typing import Tuple - -def _get_irmod_elemwise_add( - _PRIMFUNC_NAME: str, shape: list, dtype: str, mem_scope: str -) -> tvm.ir.module.IRModule: +def _get_irmod_elemwise_add(shape: list, dtype: str, mem_scope: str) -> tvm.ir.module.IRModule: """ Return an IRModule containing a single primfunc, expressed as NS-TIR. @@ -113,7 +109,6 @@ def _get_irmod_elemwise_add( dim0_size, dim1_size, ) = shape - dtype_str = str(dtype) if mem_scope == "global.vtcm": raise bu.UnsupportedException("This benchmark kernel does not yet support VTCM buffers.") @@ -124,20 +119,30 @@ def _get_irmod_elemwise_add( # Also: The VTCM budget is a very rough estimate, based only on experience. # Assuming that it's even reasonable to use a hard-coded estimate AT ALL, this number # may need tweaking. - estimated_vtcm_budget_bytes = HVX_VECTOR_BYTES * 1024 - dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits - assert dtype_bits % 8 == 0 - dtype_bytes = dtype_bits // 8 + # The below code is commented is commented to avoid unreachable error + # with pylint. Please enable this once the kernel starts supporting + # VTCM buffers + + # Code starts below: + # ---- ------ ----- + # estimated_vtcm_budget_bytes = HVX_VECTOR_BYTES * 1024 - num_vtcm_tensors = 3 - estimated_vtcm_needed_bytes = shape[0] * shape[1] * dtype_bytes * num_vtcm_tensors + # dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits + # assert dtype_bits % 8 == 0 + # dtype_bytes = dtype_bits // 8 - if estimated_vtcm_needed_bytes > estimated_vtcm_budget_bytes: - raise bu.UnsupportedException("Expect to exceed VTCM budget.") + # num_vtcm_tensors = 3 + # estimated_vtcm_needed_bytes = shape[0] * shape[1] * dtype_bytes * num_vtcm_tensors + + # if estimated_vtcm_needed_bytes > estimated_vtcm_budget_bytes: + # raise bu.UnsupportedException("Expect to exceed VTCM budget.") @tvm.script.ir_module class BenchmarkModule: + """Elementwise STIR module for benchmarking""" + + # pylint: disable=no-self-argument,invalid-name,missing-function-docstring @T.prim_func def main(a: T.handle, b: T.handle, c: T.handle): # We exchange data between function by handles, which are similar to pointer. @@ -151,6 +156,8 @@ def main(a: T.handle, b: T.handle, c: T.handle): for j in range(dim1_size): C[i, j] = A[i, j] + B[i, j] + # pylint: enable=no-self-argument,invalid-name,missing-function-docstring + return BenchmarkModule @@ -187,12 +194,12 @@ def _benchmark_hexagon_elementwise_add_kernel( keys_dict["host_files_dir_path"] = host_files_dir_path log_file_path = os.path.join(host_files_dir_path, "out.txt") - with open(log_file_path, "w") as log_file: + with open(log_file_path, "w", encoding="UTF-8") as log_file: print(f"CONFIGURATION: {desc}") log_file.write(f"CONFIGURATION: {desc}\n") try: - ns_tir_module = _get_irmod_elemwise_add(_PRIMFUNC_NAME, shape, dtype, mem_scope) + ns_tir_module = _get_irmod_elemwise_add(shape, dtype, mem_scope) # Dump the primfunc NS-TIR (as text) to the log file... lowered_mod = tvm.lower(ns_tir_module, _PRIMFUNC_NAME) @@ -201,16 +208,16 @@ def _benchmark_hexagon_elementwise_add_kernel( log_file.write("\n") # Lower the primfunc's IRModule to Hexagon object code... - A = tvm.te.placeholder(shape, dtype=dtype) - B = tvm.te.placeholder(shape, dtype=dtype) - C = tvm.te.placeholder(shape, dtype=dtype) + input1 = tvm.te.placeholder(shape, dtype=dtype) + input2 = tvm.te.placeholder(shape, dtype=dtype) + output = tvm.te.placeholder(shape, dtype=dtype) built_module: tvm.driver.build_module.OperatorModule = tvm.build( ns_tir_module, [ - A, - B, - C, + input1, + input2, + output, ], _SUPER_TARGET, name=_PRIMFUNC_NAME, @@ -231,9 +238,9 @@ def _benchmark_hexagon_elementwise_add_kernel( # Generate our testing / validation data... ( - host_numpy_A_data, - host_numpy_B_data, - host_numpy_C_data_expected, + host_numpy_input1_data, + host_numpy_input2_data, + host_numpy_output_data_expected, ) = _get_elemwise_add_reference_value_tensors(shape, dtype) with hexagon_launcher.start_session() as sess: @@ -244,25 +251,25 @@ def _benchmark_hexagon_elementwise_add_kernel( ) # Create the target-side tensors to hold the primfunc's inputs and outputs... - A_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope) - B_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope) - C_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope) + input1_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope) + input2_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope) + output_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope) # Populate the primfunc's input tensors... - A_data.copyfrom(host_numpy_A_data) - B_data.copyfrom(host_numpy_B_data) + input1_data.copyfrom(host_numpy_input1_data) + input2_data.copyfrom(host_numpy_input2_data) # Actually benchmark the primfunc... timer = loaded_hexagon_module.time_evaluator( "main", sess.device, number=10, repeat=1 ) - timing_result = timer(A_data, B_data, C_data) + timing_result = timer(input1_data, input2_data, output_data) print(f"TIMING RESULT: {timing_result}") log_file.write(f"TIMING RESULT: {timing_result}\n") # Verify that the computation actually happened, and produced the correct result. - result = C_data.numpy() + result = output_data.numpy() if dtype == "float16": # These are the closest tolerance we currently expect / require for these @@ -282,30 +289,30 @@ def _benchmark_hexagon_elementwise_add_kernel( # kill the overall script. try: tvm.testing.assert_allclose( - result, host_numpy_C_data_expected, rel_tolerance, abs_tolerance + result, host_numpy_output_data_expected, rel_tolerance, abs_tolerance ) - except AssertionError as e: - raise bu.NumericalAccuracyException(str(e)) + except AssertionError as err: + raise bu.NumericalAccuracyException(str(err)) _BT.record_success(timing_result, **keys_dict) - except bu.NumericalAccuracyException as e: + except bu.NumericalAccuracyException as err: print() - print(f"FAIL: Numerical accuracy error. See log file.") + print("FAIL: Numerical accuracy error. See log file.") log_file.write("\n") - log_file.write(f"FAIL: {e}\n") + log_file.write(f"FAIL: {err}\n") - _BT.record_fail(**keys_dict, comments=f"Numerical accuracy error. See log file.") + _BT.record_fail(**keys_dict, comments="Numerical accuracy error. See log file.") - except bu.UnsupportedException as e: + except bu.UnsupportedException as err: print() - print(f"SKIP: {e}") + print(f"SKIP: {err}") log_file.write("\n") - log_file.write(f"SKIP: {e}\n") + log_file.write(f"SKIP: {err}\n") - _BT.record_skip(**keys_dict, comments=f"Unsupported configuration: {e}") + _BT.record_skip(**keys_dict, comments=f"Unsupported configuration: {err}") def _get_elemwise_add_reference_value_tensors(shape: list, dtype: str): @@ -321,10 +328,10 @@ def _get_elemwise_add_reference_value_tensors(shape: list, dtype: str): """ assert len(shape) == 2 - A = np.ndarray(shape, dtype=dtype) - B = np.ndarray(shape, dtype=dtype) + input1 = np.ndarray(shape, dtype=dtype) + input2 = np.ndarray(shape, dtype=dtype) - np_dtype = A.dtype + np_dtype = input1.dtype if np_dtype.kind in ["i", "u"]: # We allow overflow for integer types because it tends to be well-behaved @@ -336,8 +343,8 @@ def _get_elemwise_add_reference_value_tensors(shape: list, dtype: str): for i in range(shape[0]): for j in range(shape[1]): - A[i, j] = next_value - B[i, j] = next_value * 2 + input1[i, j] = next_value + input2[i, j] = next_value * 2 next_value += 1 elif np_dtype.kind == "f": @@ -355,24 +362,25 @@ def _get_elemwise_add_reference_value_tensors(shape: list, dtype: str): for i in range(shape[0]): for j in range(shape[1]): - A[i, j] = next_value - B[i, j] = next_value + 1 + input1[i, j] = next_value + input2[i, j] = next_value + 1 next_value += delta else: assert False, f"Unexpected data type: {np_dtype}" - C = A + B + output = input1 + input2 return [ - A, - B, - C, + input1, + input2, + output, ] @pytest.mark.skipif(_SHOULD_SKIP_BENCHMARKS, reason=_SKIP_BENCHMARKS_REASON) @tvm.testing.requires_hexagon def test_elemwise_add(hexagon_launcher: HexagonLauncherRPC): + """Main elementwise add test function""" for dtype in [ "int8", "float16", @@ -413,7 +421,7 @@ def test_elemwise_add(hexagon_launcher: HexagonLauncherRPC): print() tabular_output_filename = os.path.join(_HOST_OUTPUT_DIR, "benchmark-results.csv") - with open(tabular_output_filename, "w") as csv_file: + with open(tabular_output_filename, "w", encoding="UTF-8") as csv_file: _BT.print_csv(csv_file, _CSV_COLUMN_ORDER) print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")