Skip to content
This repository has been archived by the owner on Nov 25, 2022. It is now read-only.

Commit

Permalink
[Pylint] Making hexagon tests pylint compliant Part 2 of N (apache#12176
Browse files Browse the repository at this point in the history
)

Second set of **hexagon tests** modified to be pylint compliant as part of apache#11414 tracking issue. The files supported in this patch are:

* [X] tests/python/contrib/test_hexagon/test_autotvm.py
* [X] tests/python/contrib/test_hexagon/test_cache_read_write.py
* [X] tests/python/contrib/test_hexagon/test_launcher.py
* [X] tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py
* [X] tests/python/contrib/test_hexagon/test_models.py
* [X] tests/python/contrib/test_hexagon/test_run_unit_tests.py
* [X] tests/python/contrib/test_hexagon/test_thread_pool.py
* [X] tests/python/contrib/test_hexagon/test_usmp.py
  • Loading branch information
quic-sanirudh authored and xinetzone committed Nov 25, 2022
1 parent 2f403ef commit 6fba1a2
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 189 deletions.
8 changes: 8 additions & 0 deletions tests/lint/pylint.sh
Expand Up @@ -32,3 +32,11 @@ python3 -m pylint tests/python/contrib/test_hexagon/conv2d/test_conv2d_blocked.p
python3 -m pylint tests/python/contrib/test_hexagon/conv2d/test_conv2d_conv2d.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/infrastructure.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_2d_physical_buffers.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_autotvm.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_cache_read_write.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_launcher.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_models.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_run_unit_tests.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_thread_pool.py --rcfile="$(dirname "$0")"/pylintrc
python3 -m pylint tests/python/contrib/test_hexagon/test_usmp.py --rcfile="$(dirname "$0")"/pylintrc
38 changes: 22 additions & 16 deletions tests/python/contrib/test_hexagon/test_autotvm.py
Expand Up @@ -15,40 +15,46 @@
# specific language governing permissions and limitations
# under the License.

""" Minimal example of tuning on hexagon. """

import contextlib
import os
import sys

import pytest
import numpy as np

import tvm
import tvm.testing
from tvm import tir, te, TVMError
from tvm.script import tir as T
from tvm import autotvm
from tvm import autotvm, te
from tvm.autotvm.tuner import GATuner, XGBTuner


@autotvm.template("demo_template")
def demo_template():
M, N, K = [1024] * 3
A = te.placeholder((M, K), dtype="float32")
B = te.placeholder((N, K), dtype="float32")
"""Initial demo template"""
size_m, size_n, size_k = [1024] * 3
input1 = te.placeholder((size_m, size_k), dtype="float32")
input2 = te.placeholder((size_n, size_k), dtype="float32")
k = te.reduce_axis((0, 1024), name="k")
C = te.compute((M, N), lambda i, j: te.sum(A[i, k] * B[j, k], axis=[k]))
output = te.compute(
(size_m, size_n), lambda i, j: te.sum(input1[i, k] * input2[j, k], axis=[k])
)

s = te.create_schedule(C.op)
s = te.create_schedule(output.op)
cfg = autotvm.get_config()

m_iter, n_iter = s[C].op.axis
(k_iter,) = s[C].op.reduce_axis
_, _ = s[output].op.axis
(k_iter,) = s[output].op.reduce_axis

cfg.define_split("k_split", k_iter, num_outputs=2)
ko, ki = cfg["k_split"].apply(s, C, k_iter)
_, _ = cfg["k_split"].apply(s, output, k_iter)

return s, [A, B, C]
return s, [input1, input2, output]


class HexagonModuleLoader:
"""HexagonModuleLoader"""

def __init__(self, hexagon_session, pre_load_function=None) -> None:
self.pre_load_function = pre_load_function
self.hexagon_session = hexagon_session
Expand All @@ -74,16 +80,15 @@ def tune_tasks(
log_filename="tuning.log",
use_transfer_learning=True,
):
from tvm.autotvm.tuner import XGBTuner
from tvm.autotvm.tuner import GATuner
"""Tune tasks with different tuners"""

tmp_log_file = log_filename + ".tmp"
if os.path.exists(tmp_log_file):
os.remove(tmp_log_file)

for i, tsk in enumerate(reversed(tasks)):
prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
if tuner == "xgb" or tuner == "xgb-rank":
if tuner in ("xgb", "xgb-rank"):
tuner_obj = XGBTuner(tsk, loss_type="rank")
elif tuner == "xgb_knob":
tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
Expand Down Expand Up @@ -118,6 +123,7 @@ def tune_tasks(
@pytest.mark.skip(reason="AutoTVM tuning is not yet enabled on Hexagon")
@tvm.testing.requires_hexagon
def test_autotvm(hexagon_session):
"""Top level test function for testing autotvm"""
logfilename = "./hexagon.autotvm.log"

options = {
Expand Down
100 changes: 54 additions & 46 deletions tests/python/contrib/test_hexagon/test_cache_read_write.py
Expand Up @@ -15,17 +15,18 @@
# specific language governing permissions and limitations
# under the License.

import pytest
""" Lower cache_read and cache_write to Hexagon DMA via tensorize """

import numpy as np
from tvm.contrib.hexagon.session import Session

import tvm.testing
from tvm import te, tir
from tvm.script import tir as T
from tvm.contrib.hexagon.session import Session
from tvm.script import tir as T


def intrin_mem_copy(shape, dtype, dst_scope, src_scope):
"""Define and return tensor intrinsic for mem copy"""
src = te.placeholder(shape=shape, dtype=dtype, name="src")
dst = te.compute(shape, lambda i: src[i], name="dst")
size = shape[0] * np.dtype(dtype).itemsize
Expand All @@ -49,15 +50,15 @@ def intrin_mem_copy(shape, dtype, dst_scope, src_scope):
zero_indices = [0 for _ in shape]

def intrin_func(ins, outs):
ib = tvm.tir.ir_builder.create()
ir_builder = tvm.tir.ir_builder.create()

_src = ins[0]
_dst = outs[0]

dst_handle = ib.buffer_ptr(dst_buffer)
src_handle = ib.buffer_ptr(src_buffer)
dst_handle = ir_builder.buffer_ptr(dst_buffer)
src_handle = ir_builder.buffer_ptr(src_buffer)

ib.emit(
ir_builder.emit(
tvm.tir.call_intrin(
"handle",
"tir.mem_copy",
Expand All @@ -66,56 +67,61 @@ def intrin_func(ins, outs):
size,
)
)
return ib.get()
return ir_builder.get()

return te.decl_tensor_intrin(dst.op, intrin_func, binds={src: src_buffer, dst: dst_buffer})


def verify(hexagon_session: Session, s, x, y, z, size):
print(tvm.lower(s, [x, y, z]))
def verify(hexagon_session: Session, schedule, x_tensor, y_tensor, z_tensor, size):
"""Verify correctness with reference from numpy"""
print(tvm.lower(schedule, [x_tensor, y_tensor, z_tensor]))

target_hexagon = tvm.target.hexagon("v68", link_params=True)
func = tvm.build(
s, [x, y, z], tvm.target.Target(target_hexagon, host=target_hexagon), name="dmacpy"
schedule,
[x_tensor, y_tensor, z_tensor],
tvm.target.Target(target_hexagon, host=target_hexagon),
name="dmacpy",
)

mod = hexagon_session.load_module(func)
xt = tvm.nd.array(
np.random.randint(low=-128, high=127, size=size, dtype=x.dtype),
x_array = tvm.nd.array(
np.random.randint(low=-128, high=127, size=size, dtype=x_tensor.dtype),
device=hexagon_session.device,
)
yt = tvm.nd.array(
np.random.randint(low=-128, high=127, size=size, dtype=y.dtype),
y_array = tvm.nd.array(
np.random.randint(low=-128, high=127, size=size, dtype=y_tensor.dtype),
device=hexagon_session.device,
)
zt = tvm.nd.array(
np.random.randint(low=-128, high=127, size=size, dtype=z.dtype),
z_array = tvm.nd.array(
np.random.randint(low=-128, high=127, size=size, dtype=z_tensor.dtype),
device=hexagon_session.device,
)
mod["dmacpy"](xt, yt, zt)
mod["dmacpy"](x_array, y_array, z_array)

ref = xt.numpy() + yt.numpy()
np.testing.assert_equal(zt.numpy(), ref)
ref = x_array.numpy() + y_array.numpy()
np.testing.assert_equal(z_array.numpy(), ref)


@tvm.testing.requires_hexagon
def test_cache_read_write(hexagon_session: Session):
"""Test cache_read and cache_write to global.vtcm for hexagon"""
size = 128
outer_shape = (size,)
factor = 16
inner_shape = (factor,)
dtype = "int8"

x = te.placeholder(shape=outer_shape, dtype=dtype, name="x")
y = te.placeholder(shape=outer_shape, dtype=dtype, name="y")
z = te.compute(outer_shape, lambda i: x[i] + y[i], name="z")
s = te.create_schedule(z.op)
x_tensor = te.placeholder(shape=outer_shape, dtype=dtype, name="x")
y_tensor = te.placeholder(shape=outer_shape, dtype=dtype, name="y")
z_tensor = te.compute(outer_shape, lambda i: x_tensor[i] + y_tensor[i], name="z")
s = te.create_schedule(z_tensor.op)

x_vtcm = s.cache_read(x, "global.vtcm", [z])
y_vtcm = s.cache_read(y, "global.vtcm", [z])
z_vtcm = s.cache_write(z, "global.vtcm")
x_vtcm = s.cache_read(x_tensor, "global.vtcm", [z_tensor])
y_vtcm = s.cache_read(y_tensor, "global.vtcm", [z_tensor])
z_vtcm = s.cache_write(z_tensor, "global.vtcm")

zouter, zinner = s[z_vtcm].split(z_vtcm.op.axis[0], factor=factor)
zouter, _ = s[z_vtcm].split(z_vtcm.op.axis[0], factor=factor)

s[x_vtcm].compute_at(s[z_vtcm], zouter)
s[y_vtcm].compute_at(s[z_vtcm], zouter)
Expand All @@ -130,10 +136,10 @@ def test_cache_read_write(hexagon_session: Session):

mem_copy_write = intrin_mem_copy(outer_shape, dtype, "global", "global.vtcm")

(cache_write_z,) = s[z].op.axis
s[z].tensorize(cache_write_z, mem_copy_write)
(cache_write_z,) = s[z_tensor].op.axis
s[z_tensor].tensorize(cache_write_z, mem_copy_write)

verify(hexagon_session, s, x, y, z, size)
verify(hexagon_session, s, x_tensor, y_tensor, z_tensor, size)


def layout_transform_2d(n):
Expand All @@ -142,24 +148,25 @@ def layout_transform_2d(n):

@tvm.testing.requires_hexagon
def test_cache_read_write_2d(hexagon_session: Session):
"""Test 2D cache_read and cache_write to global.vtcm for hexagon"""
size = 128
outer_shape = (size,)
factor = 16
inner_shape = (factor,)
dtype = "int8"

x = te.placeholder(shape=outer_shape, dtype=dtype, name="x")
y = te.placeholder(shape=outer_shape, dtype=dtype, name="y")
z = te.compute(outer_shape, lambda i: x[i] + y[i], name="z")
s = te.create_schedule(z.op)
x_tensor = te.placeholder(shape=outer_shape, dtype=dtype, name="x")
y_tensor = te.placeholder(shape=outer_shape, dtype=dtype, name="y")
z_tensor = te.compute(outer_shape, lambda i: x_tensor[i] + y_tensor[i], name="z")
s = te.create_schedule(z_tensor.op)

x_vtcm = s.cache_read(x, "global.vtcm", [z])
y_vtcm = s.cache_read(y, "global.vtcm", [z])
z_vtcm = s.cache_write(z, "global.vtcm")
x_vtcm = s.cache_read(x_tensor, "global.vtcm", [z_tensor])
y_vtcm = s.cache_read(y_tensor, "global.vtcm", [z_tensor])
z_vtcm = s.cache_write(z_tensor, "global.vtcm")

layout_x_vtcm = s[x_vtcm].transform_layout(layout_transform_2d)
layout_y_vtcm = s[y_vtcm].transform_layout(layout_transform_2d)
layout_z_vtcm = s[z_vtcm].transform_layout(layout_transform_2d)
_ = s[z_vtcm].transform_layout(layout_transform_2d)

mem_copy_read = intrin_mem_copy(inner_shape, dtype, "global.vtcm", "global")
s[x_vtcm].tensorize(layout_x_vtcm[1], mem_copy_read)
Expand All @@ -169,31 +176,32 @@ def test_cache_read_write_2d(hexagon_session: Session):
# on `z_vtcm` above therefore we must call `split` to modify the loop schedule
# over `z` to match the layout of `z_vtcm` such that we can accurately write
# `z_vtcm` back to `z` using memory copy intrinsic
zouter, zinner = s[z].split(z.op.axis[0], factor=factor)
_, zinner = s[z_tensor].split(z_tensor.op.axis[0], factor=factor)
mem_copy_write = intrin_mem_copy(inner_shape, dtype, "global", "global.vtcm")
s[z].tensorize(zinner, mem_copy_write)
s[z_tensor].tensorize(zinner, mem_copy_write)

verify(hexagon_session, s, x, y, z, size)
verify(hexagon_session, s, x_tensor, y_tensor, z_tensor, size)


@T.prim_func
def scale_by_two(A: T.Buffer[(8192,), "int8"], C: T.Buffer[(8192,), "int8"]):
def scale_by_two(buffer_a: T.Buffer[(8192,), "int8"], buffer_c: T.Buffer[(8192,), "int8"]):
for i in T.serial(
0,
8192,
):
with T.block("C"):
C[i] = A[i] * T.int8(2)
buffer_c[i] = buffer_a[i] * T.int8(2)


def test_vtcm_lowering():
"""Test lowering with vtcm mem scope"""
mod = tvm.IRModule.from_expr(scale_by_two.with_attr("global_symbol", "main"))
sch = tir.Schedule(mod, debug_mask="all")
block_c = sch.get_block("C")
(flat,) = sch.get_loops(block_c)
o, i, ii, iii = sch.split(flat, factors=[8, 4, 2, 128])
outer, _, _, _ = sch.split(flat, factors=[8, 4, 2, 128])
cache_block = sch.cache_read(block_c, 0, storage_scope="global.vtcm")
sch.compute_at(cache_block, o)
sch.compute_at(cache_block, outer)
lowered = tvm.lower(sch.mod["main"])

def ir_module_has_allocate_nodes(irmod):
Expand Down

0 comments on commit 6fba1a2

Please sign in to comment.