Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Upgrade to use MXNet2.0.0.beta1 #1584

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/unittests-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
--saved-output coverage.xml \
--save-path coverage.xml \
--remote https://github.com/${{ github.repository }} \
--command "python3 -m pip install pytest-forked && python3 -m pytest --forked --cov=. --cov-config=./.coveragerc --cov-report=xml --durations=50 --device="gpu" --runslow ./tests/" \
--command "python3 -m pip install pytest-forked && python3 -m pytest -vv --forked --cov=. --cov-config=./.coveragerc --cov-report=xml --durations=50 --device="gpu" --runslow ./tests/" \
--wait | tee batch_job.log


Expand All @@ -64,7 +64,7 @@ jobs:
--saved-output coverage.xml \
--save-path coverage.xml \
--remote https://github.com/${{ github.event.pull_request.head.repo.full_name }} \
--command "python3 -m pip install pytest-forked && python3 -m pytest --forked --cov=. --cov-config=./.coveragerc --cov-report=xml --durations=50 --device="gpu" --runslow ./tests/" \
--command "python3 -m pip install pytest-forked && python3 -m pytest -vv --forked --cov=. --cov-config=./.coveragerc --cov-report=xml --durations=50 --device="gpu" --runslow ./tests/" \
--wait | tee batch_job.log

- name: Wait for job and copy files from AWS s3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install setuptools pytest pytest-cov contextvars
python -m pip install --upgrade cython
python -m pip install --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python
python -m pip install mxnet==2.0.0b1
python -m pip install -U -e .[extras,dev]
- name: Build and Install TVM
if: matrix.os == 'ubuntu-latest'
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ following commands:

```bash
# Install the version with CUDA 10.2
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0a"
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b"

# Install the version with CUDA 11
python3 -m pip install -U --pre "mxnet-cu110>=2.0.0a"
python3 -m pip install -U --pre "mxnet-cu110>=2.0.0b"

# Install the cpu-only version
python3 -m pip install -U --pre "mxnet>=2.0.0a"
python3 -m pip install -U --pre "mxnet>=2.0.0b"
```


Expand Down
4 changes: 2 additions & 2 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,5 +231,5 @@ def pytest_generate_tests(metafunc):
devices = metafunc.config.option.device
if not devices:
devices = ['cpu']
if 'ctx' in metafunc.fixturenames:
metafunc.parametrize("ctx", [getattr(mx, device)() for device in devices])
if 'device' in metafunc.fixturenames:
metafunc.parametrize("device", [getattr(mx, device)() for device in devices])
8 changes: 4 additions & 4 deletions docs/install/install-include.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Select your preferences and run the install command.
.. code-block:: bash

# Install Apache MXNet (incubating) 2 Alhpa or newer.
python3 -m pip install -U --pre "mxnet>=2.0.0a"
python3 -m pip install -U --pre "mxnet>=2.0.0b"

# Install GluonNLP
git clone https://github.com/dmlc/gluon-nlp.git
Expand All @@ -71,7 +71,7 @@ Select your preferences and run the install command.
# Install Apache MXNet (incubating) 2 Alhpa or newer.
# Here we assume CUDA 10.2 is installed. You can change the number
# according to your own CUDA version, e.g., cu101, cu110
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0a"
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b"

# Install GluonNLP
git clone https://github.com/dmlc/gluon-nlp.git
Expand All @@ -85,7 +85,7 @@ Select your preferences and run the install command.
.. code-block:: bash

# Install Apache MXNet (incubating) 2 Alhpa or newer.
python3 -m pip install -U --pre "mxnet>=2.0.0a"
python3 -m pip install -U --pre "mxnet>=2.0.0b"

# Install GluonNLP
git clone https://github.com/dmlc/gluon-nlp.git
Expand All @@ -99,7 +99,7 @@ Select your preferences and run the install command.
# Install Apache MXNet (incubating) 2 Alhpa or newer.
# Here we assume CUDA 10.2 is installed. You can change the number
# according to your own CUDA version, e.g., cu100, cu101
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0a"
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b"

# Install GluonNLP
git clone https://github.com/dmlc/gluon-nlp.git
Expand Down
26 changes: 13 additions & 13 deletions docs/tutorials/word_embedding/word_embedding.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ To begin, let's first import a few packages that we'll need for this example:
import warnings
warnings.filterwarnings('ignore')

from mxnet import gluon, nd
from mxnet import gluon, np
import gluonnlp as nlp
import re
import collections
import numpy as np
import numpy as onp

```

Expand Down Expand Up @@ -160,7 +160,7 @@ For example,

```{.python .input}
def simple(words):
return np.ones((len(words), 300))
return onp.ones((len(words), 300))
matrix = nlp.embedding.load_embeddings(vocab, 'wiki.simple', unk_method=simple)
```

Expand Down Expand Up @@ -217,7 +217,7 @@ input_dim, output_dim = matrix.shape
layer = gluon.nn.Embedding(input_dim, output_dim)
layer.initialize()
layer.weight.set_data(matrix)
layer(nd.array([5, 4]))[:, :5]
layer(np.array([5, 4]))[:, :5]
```

### Creating Vocabulary from Pre-trained Word Embeddings
Expand Down Expand Up @@ -259,16 +259,16 @@ cosine similarity. Cosine similarity determines the similarity between two vecto
```{.python .input}
import numpy as np
def cos_sim(x, y):
return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
return onp.dot(x, y) / (onp.linalg.norm(x) * onp.linalg.norm(y))
```

The range of cosine similarity between two vectors can be between -1 and 1. The
larger the value, the larger the similarity between the two vectors.

```{.python .input}
x = np.array([1, 2])
y = np.array([10, 20])
z = np.array([-1, -2])
x = onp.array([1, 2])
y = onp.array([10, 20])
z = onp.array([-1, -2])

print(cos_sim(x, y))
print(cos_sim(x, z))
Expand All @@ -287,16 +287,16 @@ We can then find the indices for which the dot product is greatest (`topk`), whi

```{.python .input}
def norm_vecs_by_row(x):
return x / np.sqrt(np.sum(x * x, axis=1) + 1E-10).reshape((-1,1))
return x / onp.sqrt(onp.sum(x * x, axis=1) + 1E-10).reshape((-1,1))

def topk(res, k):
part = np.argpartition(res, -k)[-k:]
return part[np.argsort(res[part])].tolist()[::-1]
part = onp.argpartition(res, -k)[-k:]
return part[onp.argsort(res[part])].tolist()[::-1]

def get_knn(vocab, matrix, k, word):
word_vec = matrix[vocab[word]].reshape((-1, 1))
vocab_vecs = norm_vecs_by_row(matrix)
dot_prod = np.dot(vocab_vecs, word_vec)
dot_prod = onp.dot(vocab_vecs, word_vec)
indices = topk(dot_prod.reshape((len(vocab), )), k=k+1)
# Remove unknown and input tokens.
return vocab.to_tokens(indices[1:])
Expand Down Expand Up @@ -351,7 +351,7 @@ def get_top_k_by_analogy(vocab, matrix, k, word1, word2, word3):
word_vecs = [matrix[vocab[word]] for word in [word1, word2, word3]]
word_diff = (word_vecs[1] - word_vecs[0] + word_vecs[2]).reshape((-1, 1))
vocab_vecs = norm_vecs_by_row(matrix)
dot_prod = np.dot(vocab_vecs, word_diff)
dot_prod = onp.dot(vocab_vecs, word_diff)
indices = topk(dot_prod.reshape((len(vocab), )), k=k)
return vocab.to_tokens(indices)
```
Expand Down
1 change: 0 additions & 1 deletion scripts/benchmarks/benchmark_gluonnlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from benchmark_utils import GluonNLPBackboneBenchmark
import multiprocessing as mp
from multiprocessing import Process
mx.npx.set_np()


MODELS = [
Expand Down
68 changes: 34 additions & 34 deletions scripts/benchmarks/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,8 @@ def traceit(frame, event, args):
if log_gpu:
# Clear GPU caches
if is_mxnet_available():
for ctx in mx_all_contexts:
ctx.empty_cache()
for device in mx_all_contexts:
device.empty_cache()
if is_torch_available():
torch_empty_cache()
if is_tf_available():
Expand Down Expand Up @@ -665,10 +665,10 @@ def compile_tvm_graph_executor(model, model_name, layout, compute_layout,
with tvm.transform.PassContext(opt_level=opt_level, required_pass=required_pass):
lib = relay.build(mod, target, params=params)
if use_gpu:
ctx = tvm.gpu()
device = tvm.gpu()
else:
ctx = tvm.cpu()
rt = graph_executor.GraphModule(lib["default"](ctx))
device = tvm.cpu()
rt = graph_executor.GraphModule(lib["default"](device))
_TVM_RT_CACHE[key] = rt
return rt

Expand Down Expand Up @@ -767,9 +767,9 @@ def _inference_speed_memory(self, model_name: str, batch_size: int, sequence_len
else:
dtype = 'float32'
if self._use_gpu:
ctx = mxnet.gpu()
device = mxnet.gpu()
else:
ctx = mxnet.cpu()
device = mxnet.cpu()
model_cls, cfg, tokenizer, backbone_param_path, _ = get_backbone(model_name)
cfg.defrost()
cfg.MODEL.layout = self._layout
Expand All @@ -780,22 +780,22 @@ def _inference_speed_memory(self, model_name: str, batch_size: int, sequence_len
model = model_cls.from_cfg(cfg, extract_feature=True, dtype=dtype)
else:
model = model_cls.from_cfg(cfg, dtype=dtype)
model.load_parameters(backbone_param_path, ctx=ctx, cast_dtype=True)
model.load_parameters(backbone_param_path, device=device, cast_dtype=True)
model.cast(dtype)
model.hybridize(static_alloc=True, static_shape=True)
vocab_size = cfg.MODEL.vocab_size
if self._layout == 'NT':
input_ids = mxnet.np.random.randint(0, vocab_size, (batch_size, sequence_length),
dtype=np.int32, ctx=ctx)
token_types = mxnet.np.zeros((batch_size, sequence_length), dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
token_types = mxnet.np.zeros((batch_size, sequence_length), dtype=np.int32, device=device)
valid_length = mxnet.np.full((batch_size,), sequence_length,
dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
elif self._layout == 'TN':
input_ids = mxnet.np.random.randint(0, vocab_size, (sequence_length, batch_size),
dtype=np.int32, ctx=ctx)
token_types = mxnet.np.zeros((sequence_length, batch_size), dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
token_types = mxnet.np.zeros((sequence_length, batch_size), dtype=np.int32, device=device)
valid_length = mxnet.np.full((batch_size,), sequence_length,
dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
else:
raise NotImplementedError
mxnet.npx.waitall()
Expand All @@ -817,17 +817,17 @@ def run_forward():
tvm = try_import_tvm()
run_forward()
if self._use_gpu:
ctx = tvm.gpu()
device = tvm.gpu()
else:
ctx = tvm.cpu()
device = tvm.cpu()
rt = compile_tvm_graph_executor(model=model, model_name=model_name,
layout=self._layout, compute_layout=self._compute_layout,
batch_size=batch_size, seq_length=sequence_length,
instance_type=self._instance_type,
dtype='float32' if not self._use_fp16 else 'float16')
tvm_input_ids = tvm.nd.array(input_ids.asnumpy(), ctx=ctx)
tvm_token_types = tvm.nd.array(token_types.asnumpy(), ctx=ctx)
tvm_valid_length = tvm.nd.array(valid_length.asnumpy(), ctx=ctx)
tvm_input_ids = tvm.nd.array(input_ids.asnumpy(), device=device)
tvm_token_types = tvm.nd.array(token_types.asnumpy(), device=device)
tvm_valid_length = tvm.nd.array(valid_length.asnumpy(), device=device)

if 'roberta' in model_name or 'xlmr' in model_name:
rt.set_input(data0=tvm_input_ids, data1=tvm_valid_length)
Expand All @@ -837,7 +837,7 @@ def run_forward():
rt.set_input(data0=tvm_input_ids, data1=tvm_token_types,
data2=tvm_valid_length)
# ftimer returns a ProfileResult
ftimer = rt.module.time_evaluator("run", ctx, number=3, repeat=self._repeat)
ftimer = rt.module.time_evaluator("run", device, number=3, repeat=self._repeat)
runtimes = np.min(ftimer().results)
else:
timeit.repeat(run_forward, repeat=1, number=3)
Expand Down Expand Up @@ -867,9 +867,9 @@ def _train_speed_memory(self, model_name: str, batch_size: int, sequence_length:
amp.init()

if self._use_gpu:
ctx = mxnet.gpu()
device = mxnet.gpu()
else:
ctx = mxnet.cpu()
device = mxnet.cpu()
model_cls, cfg, tokenizer, backbone_param_path, _ = get_backbone(model_name)
cfg.defrost()
cfg.MODEL.layout = self._layout
Expand All @@ -880,7 +880,7 @@ def _train_speed_memory(self, model_name: str, batch_size: int, sequence_length:
model = model_cls.from_cfg(cfg, extract_feature=True)
else:
model = model_cls.from_cfg(cfg)
model.load_parameters(backbone_param_path, ctx=ctx)
model.load_parameters(backbone_param_path, device=device)
model.hybridize(static_alloc=True)
vocab_size = cfg.MODEL.vocab_size
if hasattr(cfg.MODEL, 'units'):
Expand All @@ -889,27 +889,27 @@ def _train_speed_memory(self, model_name: str, batch_size: int, sequence_length:
out_units = cfg.MODEL.DECODER.units
if self._layout == 'NT':
input_ids = mxnet.np.random.randint(0, vocab_size, (batch_size, sequence_length),
dtype=np.int32, ctx=ctx)
token_types = mxnet.np.zeros((batch_size, sequence_length), dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
token_types = mxnet.np.zeros((batch_size, sequence_length), dtype=np.int32, device=device)
valid_length = mxnet.np.full((batch_size,), sequence_length,
dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
contextual_embedding_ograd = mxnet.np.random.normal(
0, 1, (batch_size, sequence_length, out_units),
dtype=np.float32, ctx=ctx)
dtype=np.float32, device=device)
pooled_out_ograd = mxnet.np.random.normal(
0, 1, (batch_size, out_units), dtype=np.float32, ctx=ctx)
0, 1, (batch_size, out_units), dtype=np.float32, device=device)
elif self._layout == 'TN':
input_ids = mxnet.np.random.randint(0, vocab_size, (sequence_length, batch_size),
dtype=np.int32, ctx=ctx)
token_types = mxnet.np.zeros((sequence_length, batch_size), dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
token_types = mxnet.np.zeros((sequence_length, batch_size), dtype=np.int32, device=device)
valid_length = mxnet.np.full((batch_size,), sequence_length,
dtype=np.int32, ctx=ctx)
dtype=np.int32, device=device)
contextual_embedding_ograd = mxnet.np.random.normal(
0, 1, (sequence_length, batch_size, out_units),
dtype=np.float32, ctx=ctx)
dtype=np.float32, device=device)
pooled_out_ograd = mxnet.np.random.normal(0, 1, (batch_size, out_units),
dtype=np.float32,
ctx=ctx)
device=device)
else:
raise NotImplementedError
if model_cls.__name__ in ['BertModel', 'AlbertModel', 'ElectraModel', 'MobileBertModel']:
Expand Down Expand Up @@ -939,7 +939,7 @@ def train_step():
mxnet.npx.waitall()
runtimes = timeit.repeat(train_step, repeat=self._repeat, number=3)
mxnet.npx.waitall()
ctx.empty_cache()
device.empty_cache()
mxnet.npx.waitall()
# Profile memory
if self._use_gpu:
Expand Down
8 changes: 4 additions & 4 deletions scripts/classification/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from gluonnlp.models import get_backbone
from gluonnlp.utils.parameter import clip_grad_global_norm
from gluonnlp.utils.preprocessing import get_trimmed_lengths
from gluonnlp.utils.misc import get_mxnet_visible_ctx, grouper, repeat
from gluonnlp.utils.misc import get_mxnet_visible_device, grouper, repeat
from mxnet.gluon.data import batchify as bf
from mxnet.gluon.data import DataLoader
from mxnet.lr_scheduler import PolyScheduler
Expand All @@ -30,7 +30,7 @@ def forward(self, data, token_types, valid_length):
out = self.out_proj(pooled_out)
return out

def initialize_with_pretrained_backbone(self, backbone_params_path, ctx=None):
self.backbone.load_parameters(backbone_params_path, ctx=ctx)
self.out_proj.initialize(ctx=ctx)
def initialize_with_pretrained_backbone(self, backbone_params_path, device=None):
self.backbone.load_parameters(backbone_params_path, device=device)
self.out_proj.initialize(device=device)

2 changes: 1 addition & 1 deletion scripts/classification/classification_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from gluonnlp.models import get_backbone
from gluonnlp.utils.parameter import clip_grad_global_norm
from gluonnlp.utils.preprocessing import get_trimmed_lengths
from gluonnlp.utils.misc import get_mxnet_visible_ctx, grouper, repeat
from gluonnlp.utils.misc import get_mxnet_visible_device, grouper, repeat
from mxnet.gluon.data import batchify as bf
from mxnet.gluon.data import DataLoader
from mxnet.lr_scheduler import PolyScheduler
Expand Down