diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 6b9e184afc26..3f835b57add8 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -302,26 +302,6 @@ jobs:
       docker-image-name: pytorch-linux-focal-py3.7-gcc7
       build-generates-artifacts: false
 
-  linux-bionic-cuda11_6-py3_10-gcc7-deploy-build:
-    name: linux-bionic-cuda11_6-py3_10-gcc7-deploy
-    uses: ./.github/workflows/_linux-build.yml
-    with:
-      build-environment: linux-bionic-cuda11.6-py3.10-gcc7-deploy
-      docker-image-name: pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7
-      test-matrix: |
-        { include: [
-          { config: "deploy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
-        ]}
-
-  deploy-linux-bionic-cuda11_6-py3_10-gcc7-test:
-    name: linux-bionic-cuda11_6-py3_10-gcc7-deploy
-    uses: ./.github/workflows/_linux-test.yml
-    needs: linux-bionic-cuda11_6-py3_10-gcc7-deploy-build
-    with:
-      build-environment: linux-bionic-cuda11.6-py3.10-gcc7-deploy
-      docker-image: ${{ needs.linux-bionic-cuda11_6-py3_10-gcc7-deploy-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-bionic-cuda11_6-py3_10-gcc7-deploy-build.outputs.test-matrix }}
-
   linux-focal-rocm5_2-py3_7-build:
     # don't run build twice on master
     if: github.event_name == 'pull_request'
diff --git a/.gitignore b/.gitignore
index 5dbad08f4f4c..3e6f3831c4c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,10 +78,6 @@ torch/testing/_internal/generated/annotated_fn_args.py
 torch/testing/_internal/data/*.pt
 torch/csrc/api/include/torch/version.h
 torch/csrc/cudnn/cuDNN.cpp
-torch/csrc/deploy/example/generated
-torch/csrc/deploy/interpreter/cpython
-torch/csrc/deploy/interpreter/frozen
-torch/csrc/deploy/interpreter/third_party/typing_extensions.py
 torch/csrc/generated
 torch/csrc/generic/TensorMethods.cpp
 torch/csrc/jit/generated/*
diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh
index 12724f598bd3..f0c3d94b7f96 100755
--- a/.jenkins/pytorch/build.sh
+++ b/.jenkins/pytorch/build.sh
@@ -19,12 +19,6 @@ if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
   exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
 fi
 
-if [[ "$BUILD_ENVIRONMENT" == *deploy* ]]; then
-  # Enabling DEPLOY build (embedded torch python interpreter, experimental)
-  # only on one config for now, can expand later
-  export USE_DEPLOY=ON
-fi
-
 echo "Python version:"
 python --version
 
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
index acf2fa2064f0..7113106748b8 100755
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@@ -655,16 +655,6 @@ test_dynamo() {
   popd
 }
 
-test_torch_deploy() {
-  python torch/csrc/deploy/example/generate_examples.py
-  ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
-  ln -sf "$TORCH_LIB_DIR"/libshm* "$TORCH_BIN_DIR"
-  ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
-  "$TORCH_BIN_DIR"/test_deploy
-  "$TORCH_BIN_DIR"/test_deploy_gpu
-  assert_git_not_dirty
-}
-
 test_docs_test() {
   .jenkins/pytorch/docs-test.sh
 }
@@ -673,10 +663,7 @@ if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-baze
   (cd test && python -c "import torch; print(torch.__config__.show())")
   (cd test && python -c "import torch; print(torch.__config__.parallel_info())")
 fi
-if [[ "${TEST_CONFIG}" == *deploy* ]]; then
-  install_torchdynamo
-  test_torch_deploy
-elif [[ "${TEST_CONFIG}" == *backward* ]]; then
+if [[ "${TEST_CONFIG}" == *backward* ]]; then
   test_forward_backward_compatibility
   # Do NOT add tests after bc check tests, see its comment.
 elif [[ "${TEST_CONFIG}" == *xla* ]]; then
diff --git a/.lintrunner.toml b/.lintrunner.toml
index 16357c27f956..f302724b150a 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -170,7 +170,6 @@ command = [
 [[linter]]
 code = 'CLANGTIDY'
 include_patterns = [
-    'torch/csrc/deploy/**/*.cpp',
     'torch/csrc/fx/**/*.cpp',
     'torch/csrc/generic/**/*.cpp',
     'torch/csrc/onnx/**/*.cpp',
@@ -183,7 +182,6 @@ exclude_patterns = [
     # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
     # in a follow up PR.
     # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built.
-    # deploy/interpreter files are excluded due to using macros and other techniquies
     # that are not easily converted to accepted c++
     'torch/csrc/jit/passes/onnx/helper.cpp',
     'torch/csrc/jit/passes/onnx/shape_type_inference.cpp',
@@ -197,11 +195,6 @@ exclude_patterns = [
     'torch/csrc/autograd/FunctionsManual.cpp',
     'torch/csrc/generic/*.cpp',
     'torch/csrc/jit/codegen/cuda/runtime/*',
-    'torch/csrc/deploy/interactive_embedded_interpreter.cpp',
-    'torch/csrc/deploy/interpreter/**',
-    'torch/csrc/deploy/test_deploy_python_ext.cpp',
-    'torch/csrc/deploy/test_deploy_missing_interpreter.cpp',
-    'torch/csrc/deploy/test_deploy_gpu.cpp',
     'torch/csrc/utils/disable_torch_function.cpp',
 ]
 init_command = [
diff --git a/BUILD.bazel b/BUILD.bazel
index 2c00e0d1dc56..df780db33f7b 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1748,7 +1748,6 @@ cc_library(
 # Torch integration tests rely on a labeled data set from the MNIST database.
 # http://yann.lecun.com/exdb/mnist/
 
-# imethod.cpp is excluded since torch/csrc/deploy* build is not yet supported.
 cpp_api_tests = glob(
     ["test/cpp/api/*.cpp"],
     exclude = [
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c2c6fb2496de..e2e1f69457e4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -345,9 +345,6 @@ cmake_dependent_option(
 option(ONNX_ML "Enable traditional ONNX ML API." ON)
 option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
 option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
-cmake_dependent_option(
-    USE_DEPLOY "Build embedded torch::deploy interpreter.  See torch/csrc/deploy/README.md for more info." OFF
-    "BUILD_PYTHON" OFF)
 cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
 option(WERROR "Build with -Werror supported by the compiler" OFF)
 option(USE_COREML_DELEGATE "Use the CoreML backend through delegate APIs" OFF)
@@ -1177,11 +1174,6 @@ endif()
 include(cmake/Summary.cmake)
 caffe2_print_configuration_summary()
 
-# ---[ Torch Deploy
-if(USE_DEPLOY)
-  add_subdirectory(torch/csrc/deploy)
-endif()
-
 if(BUILD_FUNCTORCH)
   add_subdirectory(functorch)
 endif()
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 90eaebd0b431..9074b848411f 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1154,12 +1154,6 @@ install(FILES
   "${TORCH_SRC_DIR}/library.h"
   "${TORCH_SRC_DIR}/custom_class_detail.h"
   DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch)
-if(USE_DEPLOY)
-  install(FILES
-    "${TORCH_SRC_DIR}/deploy.h"
-    DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch)
-endif()
-
 if(BUILD_TEST)
   if(BUILD_LITE_INTERPRETER)
     add_subdirectory(
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index a892f550a611..d0e76bbdd857 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -193,7 +193,6 @@ function(caffe2_print_configuration_summary)
   if(NOT "${SELECTED_OP_LIST}" STREQUAL "")
     message(STATUS "  SELECTED_OP_LIST    : ${SELECTED_OP_LIST}")
   endif()
-  message(STATUS "  USE_DEPLOY           : ${USE_DEPLOY}")
   message(STATUS "  Public Dependencies  : ${Caffe2_PUBLIC_DEPENDENCY_LIBS}")
   message(STATUS "  Private Dependencies : ${Caffe2_DEPENDENCY_LIBS}")
   # coreml
diff --git a/docs/source/deploy.rst b/docs/source/deploy.rst
index 9311ba8c4ee6..8bc5a1f61da2 100644
--- a/docs/source/deploy.rst
+++ b/docs/source/deploy.rst
@@ -1,239 +1,4 @@
-torch::deploy
-=============
+torch::deploy has been moved to pytorch/multipy
+===============================================
 
-``torch::deploy`` is a system that allows you to run multiple embedded Python
-interpreters in a C++ process without a shared global interpreter lock. For more
-information on how ``torch::deploy`` works internally, please see the related
-`arXiv paper <https://arxiv.org/pdf/2104.00254.pdf>`_.
-
-
-.. warning::
-
-    This is a prototype feature. Only Linux x86 is supported, and the API may
-    change without warning.
-
-
-Getting Started
----------------
-
-Installing ``torch::deploy``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``torch::deploy`` is not yet built by default in our binary releases, so to get
-a copy of libtorch with ``torch::deploy`` enabled, follow the instructions for
-`building PyTorch from source <https://github.com/pytorch/pytorch/#from-source>`_.
-
-When running ``setup.py``, you will need to specify ``USE_DEPLOY=1``, like:
-
-.. code-block:: bash
-
-    export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-    export USE_DEPLOY=1
-    python setup.py develop
-
-
-Creating a model package in Python
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``torch::deploy`` can load and run Python models that are packaged with
-``torch.package``. You can learn more about ``torch.package`` in the
-``torch.package`` `documentation <https://pytorch.org/docs/stable/package.html#tutorials>`_.
-
-For now, let's create a simple model that we can load and run in ``torch::deploy``.
-
-.. code-block:: py
-
-    from torch.package import PackageExporter
-    import torchvision
-
-    # Instantiate some model
-    model = torchvision.models.resnet.resnet18()
-
-    # Package and export it.
-    with PackageExporter("my_package.pt") as e:
-        e.intern("torchvision.**")
-        e.extern("numpy.**")
-        e.extern("sys")
-        e.extern("PIL.*")
-        e.save_pickle("model", "model.pkl", model)
-
-Note that since "numpy", "sys" and "PIL" were marked as "extern", `torch.package` will
-look for these dependencies on the system that loads this package. They will not be packaged
-with the model.
-
-Now, there should be a file named ``my_package.pt`` in your working directory.
-
-
-Loading and running the model in C++
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Set an environment variable (e.g. $PATH_TO_EXTERN_PYTHON_PACKAGES) to indicate to the interpreters
-where the external Python dependencies can be found. In the example below, the path to the
-site-packages of a conda environment is provided.
-
-.. code-block:: bash
-
-    export PATH_TO_EXTERN_PYTHON_PACKAGES= \
-        "~/anaconda/envs/deploy-example-env/lib/python3.8/site-packages"
-
-
-Let's create a minimal C++ program to that loads the model.
-
-.. code-block:: cpp
-
-    #include <torch/csrc/deploy/deploy.h>
-    #include <torch/csrc/deploy/path_environment.h>
-    #include <torch/script.h>
-    #include <torch/torch.h>
-
-    #include <iostream>
-    #include <memory>
-
-    int main(int argc, const char* argv[]) {
-        if (argc != 2) {
-            std::cerr << "usage: example-app <path-to-exported-script-module>\n";
-            return -1;
-        }
-
-        // Start an interpreter manager governing 4 embedded interpreters.
-        std::shared_ptr<torch::deploy::Environment> env =
-            std::make_shared<torch::deploy::PathEnvironment>(
-                std::getenv("PATH_TO_EXTERN_PYTHON_PACKAGES")
-            );
-        torch::deploy::InterpreterManager manager(4, env);
-
-        try {
-            // Load the model from the torch.package.
-            torch::deploy::Package package = manager.loadPackage(argv[1]);
-            torch::deploy::ReplicatedObj model = package.loadPickle("model", "model.pkl");
-        } catch (const c10::Error& e) {
-            std::cerr << "error loading the model\n";
-            std::cerr << e.msg();
-            return -1;
-        }
-
-        std::cout << "ok\n";
-    }
-
-This small program introduces many of the core concepts of ``torch::deploy``.
-
-An ``InterpreterManager`` abstracts over a collection of independent Python
-interpreters, allowing you to load balance across them when running your code.
-
-``PathEnvironment`` enables you to specify the location of Python
-packages on your system which are external, but necessary, for your model.
-
-Using the ``InterpreterManager::loadPackage`` method, you can load a
-``torch.package`` from disk and make it available to all interpreters.
-
-``Package::loadPickle`` allows you to retrieve specific Python objects
-from the package, like the ResNet model we saved earlier.
-
-Finally, the model itself is a ``ReplicatedObj``. This is an abstract handle to
-an object that is replicated across multiple interpreters. When you interact
-with a ``ReplicatedObj`` (for example, by calling ``forward``), it will select
-an free interpreter to execute that interaction.
-
-
-Building and running the application
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Locate `libtorch_deployinterpreter.o` on your system. This should have been
-built when PyTorch was built from source. In the same PyTorch directory, locate
-the deploy source files. Set these locations to an environment variable for the build.
-An example of where these can be found on a system is shown below.
-
-.. code-block:: bash
-
-    export DEPLOY_INTERPRETER_PATH="/pytorch/build/torch/csrc/deploy/"
-    export DEPLOY_SRC_PATH="/pytorch/torch/csrc/deploy/"
-
-As ``torch::deploy`` is in active development, these manual steps will be removed
-soon.
-
-Assuming the above C++ program was stored in a file called, `example-app.cpp`, a
-minimal CMakeLists.txt file would look like:
-
-.. code-block:: cmake
-
-    cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
-    project(deploy_tutorial)
-
-    find_package(fmt REQUIRED)
-    find_package(Torch REQUIRED)
-
-    add_library(torch_deploy_internal STATIC
-        ${DEPLOY_INTERPRETER_PATH}/libtorch_deployinterpreter.o
-        ${DEPLOY_DIR}/deploy.cpp
-        ${DEPLOY_DIR}/loader.cpp
-        ${DEPLOY_DIR}/path_environment.cpp
-        ${DEPLOY_DIR}/elf_file.cpp)
-
-    # for python builtins
-    target_link_libraries(torch_deploy_internal PRIVATE
-        crypt pthread dl util m z ffi lzma readline nsl ncursesw panelw)
-    target_link_libraries(torch_deploy_internal PUBLIC
-        shm torch fmt::fmt-header-only)
-    caffe2_interface_library(torch_deploy_internal torch_deploy)
-
-    add_executable(example-app example.cpp)
-    target_link_libraries(example-app PUBLIC
-        "-Wl,--no-as-needed -rdynamic" dl torch_deploy "${TORCH_LIBRARIES}")
-
-Currently, it is necessary to build ``torch::deploy`` as a static library.
-In order to correctly link to a static library, the utility ``caffe2_interface_library``
-is used to appropriately set and unset ``--whole-archive`` flag.
-
-Furthermore, the ``-rdynamic`` flag is needed when linking to the executable
-to ensure that symbols are exported to the dynamic table, making them accessible
-to the deploy interpreters (which are dynamically loaded).
-
-The last step is configuring and building the project. Assuming that our code
-directory is laid out like this:
-
-.. code-block:: none
-
-    example-app/
-        CMakeLists.txt
-        example-app.cpp
-
-We can now run the following commands to build the application from within the
-``example-app/`` folder:
-
-.. code-block:: bash
-
-    mkdir build
-    cd build
-    # Point CMake at the built version of PyTorch we just installed.
-    cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. \
-        -DDEPLOY_INTERPRETER_PATH="$DEPLOY_INTERPRETER_PATH" \
-        -DDEPLOY_DIR="$DEPLOY_DIR"
-    cmake --build . --config Release
-
-Now we can run our app:
-
-.. code-block:: bash
-
-        ./example-app /path/to/my_package.pt
-
-
-Executing ``forward`` in C++
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-One you have your model loaded in C++, it is easy to execute it:
-
-.. code-block:: cpp
-
-    // Create a vector of inputs.
-    std::vector<torch::jit::IValue> inputs;
-    inputs.push_back(torch::ones({1, 3, 224, 224}));
-
-    // Execute the model and turn its output into a tensor.
-    at::Tensor output = model(inputs).toTensor();
-    std::cout << output.slice(/*dim=*/1, /*start=*/0, /*end=*/5) << '\n';
-
-Notably, the model's forward function is executing in Python, in an embedded
-CPython interpreter. Note that the model is a ``ReplicatedObj``, which means
-that you can call ``model()`` from multiple threads and the forward method will
-be executed on multiple independent interpreters, with no global interpreter
-lock.
+``torch::deploy`` has been moved to its new home at `https://github.com/pytorch/multipy <https://github.com/pytorch/multipy>`_.
diff --git a/setup.py b/setup.py
index 99a26599d22a..0577b77d67c0 100644
--- a/setup.py
+++ b/setup.py
@@ -1089,11 +1089,7 @@ def main():
         'include/torch/csrc/autograd/generated/*.h',
         'include/torch/csrc/autograd/utils/*.h',
         'include/torch/csrc/cuda/*.h',
-        'include/torch/csrc/deploy/*.h',
-        'include/torch/csrc/deploy/interpreter/*.h',
-        'include/torch/csrc/deploy/interpreter/*.hpp',
-        'include/torch/csrc/distributed/c10d/*.h',
-        'include/torch/csrc/distributed/c10d/*.hpp',
+        'include/torch/csrc/distributed/c10d/exception.h',
         'include/torch/csrc/distributed/rpc/*.h',
         'include/torch/csrc/jit/*.h',
         'include/torch/csrc/jit/backends/*.h',
diff --git a/test/cpp/api/CMakeLists.txt b/test/cpp/api/CMakeLists.txt
index 17ae64ab74ee..6b801a073182 100644
--- a/test/cpp/api/CMakeLists.txt
+++ b/test/cpp/api/CMakeLists.txt
@@ -42,11 +42,6 @@ set(TORCH_API_TEST_SOURCES
   ${TORCH_API_TEST_DIR}/grad_mode.cpp
   ${TORCH_API_TEST_DIR}/operations.cpp
 )
-
-if(USE_DEPLOY)
-  list(APPEND TORCH_API_TEST_SOURCES ${TORCH_API_TEST_DIR}/imethod.cpp)
-endif()
-
 if(USE_CUDA)
   list(APPEND TORCH_API_TEST_SOURCES ${TORCH_API_TEST_DIR}/parallel.cpp)
 endif()
@@ -68,10 +63,6 @@ if(USE_CUDA)
   target_compile_definitions(test_api PRIVATE "USE_CUDA")
 endif()
 
-if(USE_DEPLOY)
-  target_link_libraries(test_api PRIVATE torch_deploy)
-endif()
-
 # Workaround for https://github.com/pytorch/pytorch/issues/40941
 if(USE_OPENMP AND CMAKE_COMPILER_IS_GNUCXX AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0.0))
   # Compiling transformer.cpp or pow_test.cpp with -O2+ and both -fuse-openmp and -faligned-newout any optimization
diff --git a/test/cpp/api/imethod.cpp b/test/cpp/api/imethod.cpp
deleted file mode 100644
index 6d257d377acb..000000000000
--- a/test/cpp/api/imethod.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// (c) Facebook, Inc. and its affiliates. Confidential and proprietary.
-
-#include <gtest/gtest.h>
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/script.h>
-#include <torch/torch.h>
-
-using namespace ::testing;
-using namespace caffe2;
-
-const char* simple = "torch/csrc/deploy/example/generated/simple";
-const char* simpleJit = "torch/csrc/deploy/example/generated/simple_jit";
-
-// TODO(jwtan): Try unifying cmake and buck for getting the path.
-const char* path(const char* envname, const char* path) {
-  const char* env = getenv(envname);
-  return env ? env : path;
-}
-
-// Run `python torch/csrc/deploy/example/generate_examples.py` before running
-// the following tests.
-// TODO(jwtan): Figure out a way to automate the above step for development. (CI
-// has it already.)
-TEST(IMethodTest, CallMethod) {
-  auto scriptModel = torch::jit::load(path("SIMPLE_JIT", simpleJit));
-  auto scriptMethod = scriptModel.get_method("forward");
-
-  torch::deploy::InterpreterManager manager(3);
-  torch::deploy::Package package = manager.loadPackage(path("SIMPLE", simple));
-  auto pyModel = package.loadPickle("model", "model.pkl");
-  torch::deploy::PythonMethodWrapper pyMethod(pyModel, "forward");
-
-  EXPECT_EQ(scriptMethod.name(), "forward");
-  EXPECT_EQ(pyMethod.name(), "forward");
-
-  auto input = torch::ones({10, 20});
-  auto outputPy = pyMethod({input});
-  auto outputScript = scriptMethod({input});
-  EXPECT_TRUE(outputPy.isTensor());
-  EXPECT_TRUE(outputScript.isTensor());
-  auto outputPyTensor = outputPy.toTensor();
-  auto outputScriptTensor = outputScript.toTensor();
-
-  EXPECT_TRUE(outputPyTensor.equal(outputScriptTensor));
-  EXPECT_EQ(outputPyTensor.numel(), 200);
-}
-
-TEST(IMethodTest, GetArgumentNames) {
-  auto scriptModel = torch::jit::load(path("SIMPLE_JIT", simpleJit));
-  auto scriptMethod = scriptModel.get_method("forward");
-
-  auto& scriptNames = scriptMethod.getArgumentNames();
-  EXPECT_EQ(scriptNames.size(), 1);
-  EXPECT_STREQ(scriptNames[0].c_str(), "input");
-
-  torch::deploy::InterpreterManager manager(3);
-  torch::deploy::Package package = manager.loadPackage(path("SIMPLE", simple));
-  auto pyModel = package.loadPickle("model", "model.pkl");
-  torch::deploy::PythonMethodWrapper pyMethod(pyModel, "forward");
-
-  auto& pyNames = pyMethod.getArgumentNames();
-  EXPECT_EQ(pyNames.size(), 1);
-  EXPECT_STREQ(pyNames[0].c_str(), "input");
-}
diff --git a/tools/linter/clang_tidy/generate_build_files.py b/tools/linter/clang_tidy/generate_build_files.py
index 35f1b81d8989..3986d3d28e4d 100644
--- a/tools/linter/clang_tidy/generate_build_files.py
+++ b/tools/linter/clang_tidy/generate_build_files.py
@@ -32,7 +32,6 @@ def update_submodules() -> None:
 
 def gen_compile_commands() -> None:
     os.environ["USE_NCCL"] = "0"
-    os.environ["USE_DEPLOY"] = "1"
     os.environ["CC"] = "clang"
     os.environ["CXX"] = "clang++"
     run_timed_cmd([sys.executable, "setup.py", "--cmake-only", "build"])
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 237d3182cc55..4452ddb5b383 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -279,90 +279,6 @@ if(USE_NCCL AND NOT WIN32)
     list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
 endif()
 
-
-# WARNING- any TORCH_PYTHON_COMPILE_DEFINITIONS above this line
-#          affect both torch_python and DEPLOY interpreter.
-if(USE_DEPLOY)
-  add_library(torch_python_obj OBJECT ${TORCH_PYTHON_SRCS})
-  if(NOT MSVC)
-    target_compile_options(torch_python_obj PRIVATE -Wno-unused-variable)
-  endif()
-  if(USE_DISTRIBUTED)
-    # Set c10d-related compile definitions. For a "normal" build of
-    # libtorch_python, these are set on libtorch as PUBLIC so they are
-    # automatically propagated when libtorch_python links against libtorch. But
-    # since in the deploy build we are intentionally *not* linking against
-    # libtorch, we need to set them manually here.
-    list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_DISTRIBUTED)
-    if(USE_GLOO AND USE_C10D_GLOO)
-      list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D_GLOO)
-    endif()
-    if(USE_UCC AND USE_C10D_UCC)
-      list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D_UCC)
-    endif()
-    if(USE_NCCL AND USE_C10D_NCCL)
-        list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D_NCCL)
-        # Put nccl headers on the include path. We are specifically only setting
-        # include dirs here instead of linking against __caffe2_nccl wholesale
-        # to ensure we aren't accidentally replicating the nccl lib.
-        target_include_directories(torch_python_obj PRIVATE $<TARGET_PROPERTY:__caffe2_nccl,INTERFACE_INCLUDE_DIRECTORIES>)
-    endif()
-    if(USE_MPI AND USE_C10D_MPI)
-      list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D_MPI)
-    endif()
-
-    # Pass USE_RPC in order to reduce use of
-    # #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
-    # need to be removed when RPC is supported
-    if(NOT WIN32)
-      target_compile_definitions(torch_cpu PUBLIC USE_RPC)
-    endif()
-    if(USE_TENSORPIPE)
-      list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_TENSORPIPE)
-    endif()
-  endif()
-  target_compile_definitions(torch_python_obj PRIVATE "-DTHP_BUILD_MAIN_LIB -DUSE_DEPLOY")
-
-  target_compile_definitions(torch_python_obj PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})
-
-  target_compile_definitions(torch_python_obj PUBLIC ${TORCH_PYTHON_PUBLIC_COMPILE_DEFINITIONS})
-
-  target_compile_options(torch_python_obj PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
-
-  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-    target_compile_options(torch_python_obj PRIVATE -fno-gnu-unique)
-  endif()
-
-  target_include_directories(torch_python_obj PUBLIC ${TORCH_PYTHON_INCLUDE_DIRECTORIES})
-  target_include_directories(torch_python_obj PRIVATE ../third_party/fmt/include)
-
-  # need to specify the dependency so the generated headers exist,
-  # missing dependency since torch_python_obj doesn't link onnx, the interpreter lib does.
-  add_dependencies(torch_python_obj onnx)
-
-  target_include_directories(torch_python_obj SYSTEM PRIVATE
-      ${PYTHON_INCLUDE_DIRS}
-      ${pybind11_INCLUDE_DIRS})
-
-  if(HAVE_SOVERSION)
-    set_target_properties(torch_python_obj PROPERTIES
-        VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
-  endif()
-  add_dependencies(torch_python_obj torch_python_stubs)
-
-  # Required workaround for generated sources
-  # See https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
-  add_dependencies(torch_python_obj generate-torch-sources)
-  set_source_files_properties(
-      ${GENERATED_THNN_SOURCES}
-      ${GENERATED_CXX_PYTHON}
-      PROPERTIES GENERATED TRUE
-      )
-
-  add_dependencies(torch_python_obj gen_torch_version)
-
-endif()
-
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   # Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80947 in EmbeddingBag.cpp
   set_source_files_properties(${TORCH_SRC_DIR}/csrc/utils/throughput_benchmark.cpp PROPERTIES COMPILE_FLAGS -Wno-attributes)
diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in
index 5f53fc4e9ffe..b7eb1d0e40f5 100644
--- a/torch/_C/__init__.pyi.in
+++ b/torch/_C/__init__.pyi.in
@@ -875,7 +875,6 @@ def _disabled_torch_dispatch_impl(func: Callable, types: Iterable[Type], args: T
 def _get_linalg_preferred_backend() -> torch._C._LinalgBackend: ...
 def _set_linalg_preferred_backend(arg: torch._C._LinalgBackend): ...
 def _is_mps_available() -> _bool: ...
-def _is_deploy_enabled() -> _bool: ...
 class _LinalgBackend:
     Default: _LinalgBackend
     Cusolver: _LinalgBackend
diff --git a/torch/autograd/forward_ad.py b/torch/autograd/forward_ad.py
index dbbd312e03c1..0a4ff26b5064 100644
--- a/torch/autograd/forward_ad.py
+++ b/torch/autograd/forward_ad.py
@@ -87,7 +87,6 @@ def make_dual(tensor, tangent, *, level=None):
     # Currently broken for 3.11, see https://github.com/pytorch/pytorch/issues/85506
     if (os.environ.get("PYTORCH_JIT", "1" if sys.version_info < (3, 11) else "0") == "1" and
             __debug__ and
-            not torch._C._is_deploy_enabled() and
             os.environ.get('PYTORCH_DISABLE_LIBRARY', "0") == "0"):
         from torch._decomp import decompositions_for_jvp  # noqa: F401
 
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
index 36568ca5f4a2..02e9d933de2d 100644
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@@ -1413,14 +1413,6 @@ Call this whenever a new thread is created in order to propagate values from
         return torch::should_allow_numbers_as_tensors(name);
       });
 
-  py_module.def("_is_deploy_enabled", []() {
-#if defined(USE_DEPLOY)
-    return true;
-#else
-    return false;
-#endif
-  });
-
   const auto& defaultGenerator = at::detail::getDefaultCPUGenerator();
   THPDefaultCPUGenerator =
       (THPGenerator*)THPGenerator_initDefaultGenerator(defaultGenerator);
diff --git a/torch/csrc/autograd/python_engine.cpp b/torch/csrc/autograd/python_engine.cpp
index 93e5441a1917..3bd12f480d40 100644
--- a/torch/csrc/autograd/python_engine.cpp
+++ b/torch/csrc/autograd/python_engine.cpp
@@ -73,7 +73,7 @@ void PythonEngine::thread_init(
   // Create a PyThreadState, but release the GIL. This lets
   // pybind11::gil_scoped_acquire calls inside thread_main acquire the GIL
   // without having to create a new PyThreadState each time.
-#if defined(IS_PYTHON_3_9_PLUS) || defined(USE_DEPLOY)
+#if defined(IS_PYTHON_3_9_PLUS)
   auto gil = std::make_unique<pybind11::gil_scoped_acquire>();
 #else
   pybind11::gil_scoped_acquire gil;
@@ -86,7 +86,7 @@ void PythonEngine::thread_init(
     decrement_non_reentrant_thread_count();
   }
 
-#if defined(IS_PYTHON_3_9_PLUS) || defined(USE_DEPLOY)
+#if defined(IS_PYTHON_3_9_PLUS)
   // Do not call PyEval_RestoreThread, PyThreadState_[Clear|DeleteCurrent] if
   // runtime is finalizing
   if (!Py_IsInitialized()) {
diff --git a/torch/csrc/autograd/python_variable.cpp b/torch/csrc/autograd/python_variable.cpp
index da21173de435..342e200089ca 100644
--- a/torch/csrc/autograd/python_variable.cpp
+++ b/torch/csrc/autograd/python_variable.cpp
@@ -1282,34 +1282,6 @@ PyObject* THPVariable_get_base(THPVariable* self, void* unused) {
   END_HANDLE_TH_ERRORS
 }
 
-#ifndef USE_DEPLOY
-// This code is only used for asserts, so it is OK to skip it entirely from
-// deploy interpreters (in which case we will just skip the safety check).  For
-// a more precise check, it would be necessary to test that we are not holding
-// the GIL for *all* active torch deploy interpreters.  There is not really any
-// reason to do this.
-struct ConcretePythonGILHooks : public c10::impl::PythonGILHooks {
-  bool check_python_gil() const override {
-    return Py_IsInitialized() && PyGILState_Check();
-  };
-};
-// During process destruction, python_gil_hooks will get destructed, making
-// further virtual calls on the object invalid.  By the ordering of declarations
-// in this file, the registerer will get destructed first, removing the
-// externally visible reference to the object.  Assuming at this point in time,
-// there aren't other threads racing to read out the hooks, subsequent calls
-// into GIL hooks will hit a nullptr and gracefully no-op the asserts (as
-// desired, since at process shutdown time the Python interpreter is definitely
-// dead).
-//
-// An alternative way to reduce the risk of python_gil_hooks going prematurely
-// dead would be to leak it at destruction time.  I didn't do that because
-// it's annoying to write the Registerer class for this case.
-ConcretePythonGILHooks python_gil_hooks;
-static c10::impl::PythonGILHooksRegisterer python_gil_hooks_registerer(
-    &python_gil_hooks);
-#endif
-
 PyObject* THPVariable_get_shape(THPVariable* self, void* unused) {
   HANDLE_TH_ERRORS
   if (check_has_torch_function((PyObject*)self)) {
diff --git a/torch/csrc/deploy/.gitignore b/torch/csrc/deploy/.gitignore
deleted file mode 100644
index aa484a97a20f..000000000000
--- a/torch/csrc/deploy/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-example/generated/*
diff --git a/torch/csrc/deploy/CMakeLists.txt b/torch/csrc/deploy/CMakeLists.txt
deleted file mode 100644
index 61fe8c1bb892..000000000000
--- a/torch/csrc/deploy/CMakeLists.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-set(DEPLOY_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-add_subdirectory(interpreter)
-
-if(DEFINED GLIBCXX_USE_CXX11_ABI)
-  if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1")
-    set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=1")
-  endif()
-endif()
-
-# we do not want to have torch_deployinterpreter linked against libstdc++ or libc because
-# when loading it with RTLD_DEEPBIND it will resolve std::cout/stdout to the copy in libc++/libc instead of the
-# ones in the main process (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42679).
-# However, we can't just instruct the linker to not link against these libraries because these
-# libraries use function versioning. Without linking them, the shared library would not know the right
-# symbol versions and instead try to link against the old ones. Our solution is to link the library
-# normally then remove the DT_NEEDED entries in the ELF file that instruct the loaded to load the sublibraries.
-# This gives us the right version numbers but no direct dependency on libstdc++/libc. When loaded these
-# symbols will fallback to resolution through the main execution and get the correct values
-add_executable(remove_dt_needed remove_dt_needed.cpp)
-target_link_libraries(remove_dt_needed PRIVATE fmt::fmt-header-only)
-
-add_custom_command(
-  OUTPUT libtorch_deployinterpreter.o
-  # remove the DT_NEEDED entries
-  COMMAND $<TARGET_FILE:remove_dt_needed> $<TARGET_FILE:torch_deployinterpreter> libtorch_deployinterpreter_all.so
-  # package the result into an object we can link into the libdeploy binary.
-  COMMAND ld -r -b binary -o libtorch_deployinterpreter.o libtorch_deployinterpreter_all.so
-  COMMAND objcopy --rename-section .data=.torch_deploy_payload.interpreter_all,readonly,contents -N _binary_libtorch_deployinterpreter_all_so_start -N _binary_libtorch_deployinterpreter_all_so_end libtorch_deployinterpreter.o
-  COMMAND rm libtorch_deployinterpreter_all.so
-  DEPENDS torch_deployinterpreter remove_dt_needed
-  VERBATIM
-)
-
-add_library(torch_deploy_internal STATIC libtorch_deployinterpreter.o ${DEPLOY_DIR}/deploy.cpp ${DEPLOY_DIR}/loader.cpp ${DEPLOY_DIR}/path_environment.cpp ${DEPLOY_DIR}/elf_file.cpp)
-target_link_libraries(torch_deploy_internal PRIVATE crypt pthread dl util m z ffi lzma readline nsl ncursesw panelw) # for python builtins
-target_link_libraries(torch_deploy_internal PUBLIC  shm torch fmt::fmt-header-only protobuf::libprotobuf-lite)
-caffe2_interface_library(torch_deploy_internal torch_deploy)
-
-set(INTERPRETER_TEST_SOURCES
-  ${DEPLOY_DIR}/test_deploy.cpp
-)
-set(INTERPRETER_TEST_SOURCES_GPU
-  ${DEPLOY_DIR}/test_deploy_gpu.cpp
-)
-
-add_executable(test_deploy ${INTERPRETER_TEST_SOURCES})
-target_compile_definitions(test_deploy PUBLIC TEST_CUSTOM_LIBRARY)
-target_include_directories(test_deploy PRIVATE ${PYTORCH_ROOT}/torch)
-target_link_libraries(test_deploy
-  PUBLIC "-Wl,--no-as-needed -rdynamic" gtest dl torch_deploy
-)
-
-add_executable(test_deploy_gpu ${INTERPRETER_TEST_SOURCES_GPU})
-target_compile_definitions(test_deploy_gpu PUBLIC TEST_CUSTOM_LIBRARY)
-target_include_directories(test_deploy_gpu PRIVATE ${PYTORCH_ROOT}/torch)
-target_link_libraries(test_deploy_gpu
-  PUBLIC "-Wl,--no-as-needed -rdynamic" gtest dl torch_deploy
-)
-
-add_library(test_deploy_lib SHARED test_deploy_lib.cpp)
-add_dependencies(test_deploy_lib cpython)
-target_include_directories(test_deploy_lib BEFORE PRIVATE ${PYTHON_INC_DIR})
-target_link_libraries(test_deploy_lib PRIVATE pybind::pybind11)
-
-add_executable(deploy_benchmark ${DEPLOY_DIR}/example/benchmark.cpp)
-target_include_directories(deploy_benchmark PRIVATE ${PYTORCH_ROOT}/torch)
-target_link_libraries(deploy_benchmark
-  PUBLIC "-Wl,--no-as-needed -rdynamic" torch_deploy
-)
-
-add_executable(interactive_embedded_interpreter ${DEPLOY_DIR}/interactive_embedded_interpreter.cpp)
-target_include_directories(interactive_embedded_interpreter PRIVATE ${PYTORCH_ROOT}/torch)
-target_link_libraries(interactive_embedded_interpreter
-  PUBLIC "-Wl,--no-as-needed -rdynamic" torch_deploy
-)
-
-if(INSTALL_TEST)
-  install(TARGETS test_deploy DESTINATION bin)
-  install(TARGETS test_deploy_gpu DESTINATION bin)
-endif()
-
-install(TARGETS torch_deploy DESTINATION lib)
diff --git a/torch/csrc/deploy/Exception.h b/torch/csrc/deploy/Exception.h
deleted file mode 100644
index f4311debeebc..000000000000
--- a/torch/csrc/deploy/Exception.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef MULTIPY_EXCEPTION_H
-#define MULTIPY_EXCEPTION_H
-
-#include <exception>
-
-#define MULTIPY_INTERNAL_ASSERT_WITH_MESSAGE(condition, message)               \
-  if (!(condition)) {                                                          \
-    throw std::runtime_error(                                                  \
-        "Internal Assertion failed: (" + std::string(#condition) + "), " +     \
-        "function " + __FUNCTION__ + ", file " + __FILE__ + ", line " +        \
-        std::to_string(__LINE__) + ".\n" + "Please report bug to Pytorch.\n" + \
-        message + "\n");                                                       \
-  }
-
-#define MULTIPY_INTERNAL_ASSERT_NO_MESSAGE(condition) \
-  MULTIPY_INTERNAL_ASSERT_WITH_MESSAGE(#condition, "")
-
-#define MULTIPY_INTERNAL_ASSERT_(x, condition, message, FUNC, ...) FUNC
-
-#define MULTIPY_INTERNAL_ASSERT(...)                     \
-  MULTIPY_INTERNAL_ASSERT_(                              \
-      ,                                                  \
-      ##__VA_ARGS__,                                     \
-      MULTIPY_INTERNAL_ASSERT_WITH_MESSAGE(__VA_ARGS__), \
-      MULTIPY_INTERNAL_ASSERT_NO_MESSAGE(__VA_ARGS__));
-
-#define MULTIPY_CHECK_WITH_MESSAGE(condition, message)                      \
-  if (!(condition)) {                                                       \
-    throw std::runtime_error(                                               \
-        "Check failed: (" + std::string(#condition) + "), " + "function " + \
-        __FUNCTION__ + ", file " + __FILE__ + ", line " +                   \
-        std::to_string(__LINE__) + ".\n" + message + "\n");                 \
-  }
-
-#define MULTIPY_CHECK_NO_MESSAGE(condition) \
-  MULTIPY_CHECK_WITH_MESSAGE(#condition, "")
-
-#define MULTIPY_CHECK_(x, condition, message, FUNC, ...) FUNC
-
-#define MULTIPY_CHECK(...)                     \
-  MULTIPY_CHECK_(                              \
-      ,                                        \
-      ##__VA_ARGS__,                           \
-      MULTIPY_CHECK_WITH_MESSAGE(__VA_ARGS__), \
-      MULTIPY_CHECK_NO_MESSAGE(__VA_ARGS__));
-
-#endif // MULTIPY_EXCEPTION_H
diff --git a/torch/csrc/deploy/README.md b/torch/csrc/deploy/README.md
index dfe436ba79fa..c757287f8e1b 100644
--- a/torch/csrc/deploy/README.md
+++ b/torch/csrc/deploy/README.md
@@ -1,27 +1,2 @@
-# Torch Deploy
-This is an experimental feature to embed multiple python interpreters inside the torch library,
-providing a solution to the 'GIL problem' for multithreading with the convenience of python
-and eager or torchscripted pytorch programs.
-
-# libinterpreter
-This is an internal library used behind the scenes to enable multiple python interpreters in
-a single deploy runtime.  libinterpreter.so is DLOPENed multiple times by the deploy library.
-Each copy of libinterpreter exposes a simple interpreter interface but hides its python and other
-internal symbols, preventing the different python instances from seeing each other.
-
-# CPython build
-Torch Deploy builds CPython from source as part of the embedded python interpreter.  CPython has a flexible build system that builds successfully with or without a variety of dependencies installed - if missing, the resulting CPython build simply omits optional functionality, meaning some stdlib modules/libs are not present.
-
-Currently, the torch deploy build setup assumes the full CPython build is present.  This matters because there is a [hardcoded list of python stdlib modules](https://github.com/pytorch/pytorch/blob/2662e34e9287a72e96dabb590e7732f9d4a6b37b/torch/csrc/deploy/interpreter/interpreter_impl.cpp#L35) that are explicitly loaded from the embedded binary at runtime.
-
-### rebuilding CPython after installing missing dependencies
-Because CPython builds successfully when optional dependencies are missing, the cmake wrapper currently doesn't know if you need to rebuild CPython after adding missing dependencies (or whether dependencies were missing in the first place).
-
-To be safe, install the [complete list of dependencies for CPython](https://devguide.python.org/setup/#install-dependencies) for your platform, before trying to build torch with USE_DEPLOY=1.
-
-If you already built CPython without all the dependencies and want to fix it, just blow away the CPython folder under torch/csrc/deploy/third_party, install the missing system dependencies, and re-attempt the pytorch build command.
-
-# Example
-
-Read the [getting started guide](https://github.com/pytorch/pytorch/blob/master/docs/source/deploy.rst) for an
-example on how to use `torch::deploy`.
+# torch::deploy has been moved to pytorch/multipy
+Please check out [https://github.com/pytorch/multipy](https://github.com/pytorch/multipy) to find the new home for torch::deploy.
diff --git a/torch/csrc/deploy/benchmark.cpp b/torch/csrc/deploy/benchmark.cpp
deleted file mode 100644
index 82296a5e1a1d..000000000000
--- a/torch/csrc/deploy/benchmark.cpp
+++ /dev/null
@@ -1,336 +0,0 @@
-#include <torch/deploy.h>
-
-#include <ATen/ATen.h>
-#include <ATen/TypeDefault.h>
-#include <c10/util/irange.h>
-
-#include <torch/script.h>
-
-#include <pthread.h>
-#include <algorithm>
-#include <atomic>
-#include <cassert>
-#include <chrono>
-#include <iostream>
-#include <sstream>
-#include <thread>
-#include <vector>
-
-typedef void (*function_type)(const char*);
-
-bool cuda = false;
-
-constexpr auto latency_p = {
-    25.,
-    50.,
-    95.}; //{1., 5., 25., 50., 75., 90., 95., 99., 99.25, 99.5, 99.75, 99.9};
-
-// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-struct Report {
-  std::string benchmark;
-  std::string strategy;
-  size_t n_threads;
-  size_t items_completed;
-  double work_items_per_second;
-  std::vector<double> latencies;
-  static void report_header(std::ostream& out) {
-    out << "benchmark, strategy, n_threads, work_items_completed, work_items_per_second";
-    for (double l : latency_p) {
-      out << ", p" << l << "_latency";
-    }
-    out << ", device\n";
-  }
-  void report(std::ostream& out) {
-    out << benchmark << ", " << strategy << ", " << n_threads << ", "
-        << items_completed << ", " << work_items_per_second;
-    for (double l : latencies) {
-      out << ", " << l;
-    }
-    out << ", " << (cuda ? "cuda" : "cpu") << "\n";
-  }
-};
-
-const int min_items_to_complete = 1;
-
-struct RunPython {
-  static torch::deploy::ReplicatedObj load_and_wrap(
-      torch::deploy::Package& package) {
-    auto I = package.acquireSession();
-    auto obj = I.self.attr("load_pickle")({"model", "model.pkl"});
-    if (cuda) {
-      obj = I.global("gpu_wrapper", "GPUWrapper")({obj});
-    }
-    return I.createMovable(obj);
-  }
-  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-  RunPython(
-      torch::deploy::Package& package,
-      std::vector<at::IValue> eg,
-      const torch::deploy::Interpreter* interps)
-      : obj_(load_and_wrap(package)), eg_(std::move(eg)), interps_(interps) {}
-  void operator()(int i) {
-    auto I = obj_.acquireSession();
-    if (cuda) {
-      // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-      std::vector<at::IValue> eg2 = {i};
-      eg2.insert(eg2.end(), eg_.begin(), eg_.end());
-      I.self(eg2);
-    } else {
-      I.self(eg_);
-    }
-  }
-  torch::deploy::ReplicatedObj obj_;
-  std::vector<at::IValue> eg_;
-  const torch::deploy::Interpreter* interps_;
-};
-
-// def to_device(i, d):
-//     if isinstance(i, torch.Tensor):
-//         return i.to(device=d)
-//     elif isinstance(i, (tuple, list)):
-//         return tuple(to_device(e, d) for e in i)
-//     else:
-//         raise RuntimeError('inputs are weird')
-
-static torch::IValue to_device(const torch::IValue& v, torch::Device to);
-
-static std::vector<torch::IValue> to_device_vec(
-    at::ArrayRef<torch::IValue> vs,
-    torch::Device to) {
-  std::vector<torch::IValue> results;
-  for (const torch::IValue& v : vs) {
-    results.push_back(to_device(v, to));
-  }
-  return results;
-}
-
-static torch::IValue to_device(const torch::IValue& v, torch::Device to) {
-  if (v.isTensor()) {
-    return v.toTensor().to(to);
-  } else if (v.isTuple()) {
-    auto tup = v.toTuple();
-    return c10::ivalue::Tuple::create(to_device_vec(tup->elements(), to));
-  } else if (v.isList()) {
-    auto converted = to_device_vec(v.toListRef(), to);
-    torch::List<torch::IValue> result(v.toList().elementType());
-    for (const torch::IValue& v : converted) {
-      result.push_back(v);
-    }
-    return result;
-  } else {
-    MULTIPY_INTERNAL_ASSERT(false, "cannot to_device");
-  }
-}
-
-static bool exists(const std::string& fname) {
-  std::fstream jit_file(fname);
-  return jit_file.good();
-}
-
-struct RunJIT {
-  RunJIT(const std::string& file_to_run, std::vector<torch::IValue> eg)
-      : eg_(std::move(eg)) {
-    if (!cuda) {
-      models_.push_back(torch::jit::load(file_to_run + "_jit"));
-    } else {
-      for (const auto i : c10::irange(2)) {
-        auto d = torch::Device(torch::DeviceType::CUDA, i);
-        std::stringstream qualified;
-        qualified << file_to_run << "_jit_" << i;
-        auto loaded = exists(qualified.str())
-            ? torch::jit::load(qualified.str(), d)
-            : torch::jit::load(file_to_run + "_jit", d);
-        loaded.to(d);
-        models_.push_back(loaded);
-      }
-    }
-  }
-  void operator()(int i) {
-    if (cuda) {
-      const auto device_id = i % models_.size();
-      auto d = torch::Device(torch::DeviceType::CUDA, device_id);
-      to_device(
-          models_[device_id].forward(to_device_vec(eg_, d)),
-          torch::DeviceType::CPU);
-    } else {
-      models_[0].forward(eg_);
-    }
-  }
-  std::vector<at::IValue> eg_;
-  std::vector<torch::jit::Module> models_;
-};
-
-struct Benchmark {
-  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-  Benchmark(
-      torch::deploy::InterpreterManager& manager,
-      size_t n_threads,
-      std::string strategy,
-      // NOLINTNEXTLINE(modernize-pass-by-value)
-      std::string file_to_run,
-      size_t n_seconds = 5)
-      : manager_(manager),
-        n_threads_(n_threads),
-        strategy_(strategy),
-        file_to_run_(file_to_run),
-        n_seconds_(n_seconds),
-        should_run_(true),
-        items_completed_(0),
-        reached_min_items_completed_(0) {
-    // NOLINTNEXTLINE(bugprone-branch-clone)
-    if (strategy == "one_python") {
-      manager.debugLimitInterpreters(1);
-    } else if (strategy == "multi_python") {
-      manager.debugLimitInterpreters(n_threads_);
-    }
-  }
-
-  Report run() {
-    pthread_barrier_init(&first_run_, nullptr, n_threads_ + 1);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    torch::deploy::Package package = manager_.loadPackage(file_to_run_);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    std::vector<at::IValue> eg;
-    {
-      auto I = package.acquireSession();
-
-      eg = I.global("builtins", "tuple")(
-                I.self.attr("load_pickle")({"model", "example.pkl"}))
-               .toIValue()
-               .toTupleRef()
-               .elements();
-    }
-
-    // NOLINTNEXTLINE(bugprone-branch-clone)
-    if (strategy_ == "jit") {
-      run_one_work_item = RunJIT(file_to_run_, std::move(eg));
-    } else {
-      run_one_work_item =
-          RunPython(package, std::move(eg), manager_.allInstances().data());
-    }
-
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    std::vector<std::vector<double>> latencies(n_threads_);
-
-    for (const auto i : c10::irange(n_threads_)) {
-      threads_.emplace_back([this, &latencies, i] {
-        torch::NoGradGuard guard;
-        // do initial work
-        run_one_work_item(i);
-
-        pthread_barrier_wait(&first_run_);
-        size_t local_items_completed = 0;
-        while (should_run_) {
-          auto begin = std::chrono::steady_clock::now();
-          run_one_work_item(i);
-          auto end = std::chrono::steady_clock::now();
-          double work_seconds =
-              std::chrono::duration<double>(end - begin).count();
-          latencies[i].push_back(work_seconds);
-          local_items_completed++;
-          if (local_items_completed == min_items_to_complete) {
-            reached_min_items_completed_++;
-          }
-        }
-        items_completed_ += local_items_completed;
-      });
-    }
-
-    pthread_barrier_wait(&first_run_);
-    auto begin = std::chrono::steady_clock::now();
-    auto try_stop_at = begin + std::chrono::seconds(n_seconds_);
-    std::this_thread::sleep_until(try_stop_at);
-    for (int i = 0; reached_min_items_completed_ < n_threads_; ++i) {
-      std::this_thread::sleep_until(
-          begin + (i + 2) * std::chrono::seconds(n_seconds_));
-    }
-    should_run_ = false;
-    for (std::thread& thread : threads_) {
-      thread.join();
-    }
-    auto end = std::chrono::steady_clock::now();
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    double total_seconds = std::chrono::duration<double>(end - begin).count();
-    Report report;
-    report.benchmark = file_to_run_;
-    report.strategy = strategy_;
-    report.n_threads = n_threads_;
-    report.items_completed = items_completed_;
-    report.work_items_per_second = items_completed_ / total_seconds;
-    reportLatencies(report.latencies, latencies);
-    run_one_work_item = nullptr;
-    return report;
-  }
-
- private:
-  void reportLatencies(
-      std::vector<double>& results,
-      const std::vector<std::vector<double>>& latencies) {
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    std::vector<double> flat_latencies;
-    for (const auto& elem : latencies) {
-      flat_latencies.insert(flat_latencies.end(), elem.begin(), elem.end());
-    }
-    std::sort(flat_latencies.begin(), flat_latencies.end());
-    for (double target : latency_p) {
-      size_t idx = size_t(flat_latencies.size() * target / 100.0);
-      double time = flat_latencies.size() == 0
-          ? 0
-          : flat_latencies.at(std::min(flat_latencies.size() - 1, idx));
-      results.push_back(time);
-    }
-  }
-  torch::deploy::InterpreterManager& manager_;
-  size_t n_threads_;
-  std::string strategy_;
-  std::string file_to_run_;
-  size_t n_seconds_;
-  pthread_barrier_t first_run_;
-  std::atomic<bool> should_run_;
-  std::atomic<size_t> items_completed_;
-  std::atomic<size_t> reached_min_items_completed_;
-  std::vector<std::thread> threads_;
-  std::function<void(int)> run_one_work_item;
-};
-
-// NOLINTNEXTLINE(bugprone-exception-escape)
-int main(int argc, char* argv[]) {
-  int max_thread = atoi(argv[1]);
-  cuda = std::string(argv[2]) == "cuda";
-  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-  bool jit_enable = std::string(argv[3]) == "jit";
-  Report::report_header(std::cout);
-  torch::deploy::InterpreterManager manager(max_thread);
-
-  // make sure gpu_wrapper.py is in the import path
-  for (auto& interp : manager.allInstances()) {
-    auto I = interp.acquireSession();
-    I.global("sys", "path").attr("append")({"torch/csrc/deploy/example"});
-  }
-
-  auto n_threads = {1, 2, 4, 8, 16, 32, 40};
-  for (const auto i : c10::irange(4, argc)) {
-    std::string model_file = argv[i];
-    for (int n_thread : n_threads) {
-      if (n_thread > max_thread) {
-        continue;
-      }
-      for (std::string strategy : {"one_python", "multi_python", "jit"}) {
-        if (strategy == "jit") {
-          if (!jit_enable) {
-            continue;
-          }
-          if (!exists(model_file + "_jit")) {
-            continue;
-          }
-        }
-        Benchmark b(manager, n_thread, strategy, model_file);
-        Report r = b.run();
-        r.report(std::cout);
-      }
-    }
-  }
-  return 0;
-}
diff --git a/torch/csrc/deploy/deploy.cpp b/torch/csrc/deploy/deploy.cpp
deleted file mode 100644
index cea7ea774d89..000000000000
--- a/torch/csrc/deploy/deploy.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-#include <torch/csrc/deploy/Exception.h>
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/csrc/deploy/elf_file.h>
-#include <torch/csrc/deploy/interpreter/Optional.hpp>
-
-#include <torch/cuda.h>
-
-#include <dlfcn.h>
-#include <libgen.h>
-#include <unistd.h>
-
-struct ExeSection {
-  const char* sectionName;
-  bool customLoader;
-};
-
-struct InterpreterSymbol {
-  const char* startSym;
-  const char* endSym;
-  bool customLoader;
-};
-
-// these symbols are generated by cmake, using ld -r -b binary
-// libtorch_deployinterpreter.so which takes the contents of the so and embeds
-// it into a symbol that is then linked into libtorch_deploy.so. This enables us
-// to simply copy the contents of this symbol to disk and dlopen it to create an
-// instance of python.
-
-namespace torch {
-namespace deploy {
-
-const std::initializer_list<ExeSection> pythonInterpreterSection = {
-    {".torch_deploy_payload.interpreter_all", true},
-    {".torch_deploy_payload.interpreter_hip", false},
-    {".torch_deploy_payload.interpreter_cuda", false},
-    {".torch_deploy_payload.interpreter_cpu", false},
-};
-
-const std::initializer_list<InterpreterSymbol> kInterpreterSearchPath = {
-    {"_binary_libtorch_deployinterpreter_all_so_start",
-     "_binary_libtorch_deployinterpreter_all_so_end",
-     true},
-    {"_binary_libtorch_deployinterpreter_hip_so_start",
-     "_binary_libtorch_deployinterpreter_hip_so_end",
-     false},
-    {"_binary_libtorch_deployinterpreter_cuda_so_start",
-     "_binary_libtorch_deployinterpreter_cuda_so_end",
-     false},
-    {"_binary_libtorch_deployinterpreter_cpu_so_start",
-     "_binary_libtorch_deployinterpreter_cpu_so_end",
-     false},
-};
-
-static bool writeDeployInterpreter(FILE* dst) {
-  TORCH_INTERNAL_ASSERT(dst);
-  const char* payloadStart = nullptr;
-  size_t size = 0;
-  bool customLoader = false;
-  std::string exePath;
-  std::ifstream("/proc/self/cmdline") >> exePath;
-  ElfFile elfFile(exePath.c_str());
-  for (const auto& s : pythonInterpreterSection) {
-    multipy::optional<Section> payloadSection =
-        elfFile.findSection(s.sectionName);
-    if (payloadSection != multipy::nullopt) {
-      payloadStart = payloadSection->start;
-      customLoader = s.customLoader;
-      size = payloadSection->len;
-      MULTIPY_CHECK(payloadSection.has_value(), "Missing the payload section");
-      break;
-    }
-  }
-  if (payloadStart == nullptr) {
-    const char* libStart = nullptr;
-    const char* libEnd = nullptr;
-    for (const auto& s : kInterpreterSearchPath) {
-      libStart = (const char*)dlsym(nullptr, s.startSym);
-      if (libStart) {
-        libEnd = (const char*)dlsym(nullptr, s.endSym);
-        customLoader = s.customLoader;
-        break;
-      }
-    }
-    MULTIPY_CHECK(
-        libStart != nullptr && libEnd != nullptr,
-        "torch::deploy requires a build-time dependency on embedded_interpreter or embedded_interpreter_cuda, neither of which were found.  torch::cuda::is_available()=" +
-            std::to_string(torch::cuda::is_available()));
-
-    size = libEnd - libStart;
-    payloadStart = libStart;
-  }
-  size_t written = fwrite(payloadStart, 1, size, dst);
-  TORCH_INTERNAL_ASSERT(size == written, "expected written == size");
-  return customLoader;
-}
-
-InterpreterManager::InterpreterManager(
-    size_t nInterp,
-    std::shared_ptr<Environment> env)
-    : resources_(nInterp) {
-  C10_LOG_API_USAGE_ONCE("torch.deploy.InterpreterManager");
-
-  TORCH_DEPLOY_TRY
-  for (const auto i : c10::irange(nInterp)) {
-    instances_.emplace_back(this, env);
-    auto I = instances_.back().acquireSession();
-    // make torch.version.interp be the interpreter id
-    // can be used for balancing work across GPUs
-    I.global("torch", "version").attr("__setattr__")({"interp", int(i)});
-    instances_.back().pImpl_->setFindModule(
-        [this](const std::string& name) -> multipy::optional<std::string> {
-          auto it = registeredModuleSource_.find(name);
-          if (it != registeredModuleSource_.end()) {
-            return it->second;
-          } else {
-            return multipy::nullopt;
-          }
-        });
-  }
-
-  // Pre-registered modules.
-  // Since torch::deploy::Obj.toIValue cannot infer empty list, we hack it to
-  // return None for empty list.
-  // TODO(jwtan): Make the discovery of these modules easier.
-  registerModuleSource(
-      "GetArgumentNamesModule",
-      "from inspect import signature\n"
-      "from typing import Callable, Optional\n"
-      "def getArgumentNames(function: Callable) -> Optional[list]:\n"
-      "    names = list(signature(function).parameters.keys())\n"
-      "    if len(names) == 0:\n"
-      "        return None\n"
-      "    return names\n");
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-Package InterpreterManager::loadPackage(const std::string& uri) {
-  TORCH_DEPLOY_TRY
-  return Package(uri, this);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-Package InterpreterManager::loadPackage(
-    std::shared_ptr<caffe2::serialize::ReadAdapterInterface> reader) {
-  TORCH_DEPLOY_TRY
-  return Package(reader, this);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-Obj InterpreterSession::fromMovable(const ReplicatedObj& obj) {
-  TORCH_DEPLOY_TRY
-  return impl_->unpickleOrGet(obj.pImpl_->objectId_, obj.pImpl_->data_);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-InterpreterSession ReplicatedObj::acquireSession(
-    const Interpreter* onThisInterpreter) const {
-  TORCH_DEPLOY_TRY
-  InterpreterSession I = onThisInterpreter ? onThisInterpreter->acquireSession()
-                                           : pImpl_->manager_->acquireOne();
-  I.self = I.fromMovable(*this);
-  return I;
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-// NOLINTNEXTLINE(bugprone-exception-escape)
-InterpreterSession::~InterpreterSession() {
-  if (manager_ && notifyIdx_ >= 0) {
-    manager_->resources_.free(notifyIdx_);
-  }
-}
-
-void ReplicatedObjImpl::unload(const Interpreter* onThisInterpreter) {
-  TORCH_DEPLOY_TRY
-  if (!onThisInterpreter) {
-    // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
-    for (auto& interp : manager_->allInstances()) {
-      unload(&interp);
-    }
-    return;
-  }
-
-  InterpreterSession I = onThisInterpreter->acquireSession();
-  I.impl_->unload(objectId_);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-// NOLINTNEXTLINE(bugprone-exception-escape)
-ReplicatedObjImpl::~ReplicatedObjImpl() {
-  unload(nullptr);
-}
-
-void ReplicatedObj::unload(const Interpreter* onThisInterpreter) {
-  TORCH_DEPLOY_TRY
-  pImpl_->unload(onThisInterpreter);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-ReplicatedObj InterpreterSession::createMovable(Obj obj) {
-  TORCH_DEPLOY_TRY
-  MULTIPY_CHECK(
-      manager_,
-      "Can only create a movable object when the session was created from an interpreter that is part of a InterpreterManager");
-
-  MULTIPY_CHECK(
-      impl_->isOwner(obj),
-      "Cannot create movable from an object that lives in different session");
-
-  auto pickled = impl_->pickle(self, obj);
-  return ReplicatedObj(std::make_shared<ReplicatedObjImpl>(
-      manager_->nextObjectId_++, std::move(pickled), manager_));
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-using dlopen_t = void* (*)(const char*, int);
-
-// ASAN overrides dlopen and errors when it sees the RTLD_DEEPBIND flags because
-// it thinks that the library being loaded will not link against its overrides
-// for things like malloc/free. However, our specially crafted library doesn't
-// have any DT_NEEDED entries -- all undefined symbols will be resolved from the
-// process's link map. So it is actually safe to use RTLD_DEEPBIND with ASAN. We
-// have to get around its check though, so we do it by finding the real dlopen
-// function.
-static dlopen_t find_real_dlopen() {
-  void* libc = dlopen("libdl.so.2", RTLD_NOLOAD | RTLD_LAZY | RTLD_LOCAL);
-  // libdl is gone on some newer systems.
-  if (!libc) {
-    // libc.so won't open with dlopen because it's a linker script.
-    libc = dlopen("libc.so.6", RTLD_NOLOAD | RTLD_LAZY | RTLD_LOCAL);
-  }
-  TORCH_INTERNAL_ASSERT(libc);
-  auto dlopen_ = (dlopen_t)dlsym(libc, "dlopen");
-  TORCH_INTERNAL_ASSERT(dlopen_);
-  return dlopen_;
-}
-
-Interpreter::Interpreter(
-    InterpreterManager* manager,
-    std::shared_ptr<Environment> env)
-    : handle_(nullptr), manager_(manager), env_(env) {
-  // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
-  char libraryName[] = "/tmp/torch_deployXXXXXX";
-  int fd = mkstemp(libraryName);
-  TORCH_INTERNAL_ASSERT(fd != -1, "failed to create temporary file");
-  libraryName_ = libraryName;
-  FILE* dst = fdopen(fd, "wb");
-
-  customLoader_ = writeDeployInterpreter(dst);
-
-  fclose(dst);
-  int flags = RTLD_LOCAL | RTLD_LAZY;
-  if (customLoader_) {
-    flags |= RTLD_DEEPBIND;
-  }
-
-#ifdef FBCODE_CAFFE2
-  static dlopen_t dlopen_ = find_real_dlopen();
-  handle_ = dlopen_(libraryName, flags);
-#else
-  handle_ = dlopen(libraryName, flags);
-#endif
-
-  if (!handle_) {
-    throw std::runtime_error(dlerror());
-  }
-
-  // note: if you want better debugging symbols for things inside
-  // new_intepreter_impl, comment out this line so that the so lasts long enough
-  // for the debugger to see it.
-  unlink(libraryName_.c_str());
-
-  if (customLoader_) {
-    // when using the custom loader we need to link python symbols against
-    // the right version of the symbols for the interpreter which an be looked
-    // up from the handle_ to this shared library. here we register the handle
-    // with the code that does custom loading of python extensions.
-    auto deploySetSelfPtr = (void (*)(void*))dlsym(handle_, "deploy_set_self");
-    AT_ASSERT(deploySetSelfPtr);
-    deploySetSelfPtr(handle_);
-  }
-
-  auto extra_python_paths = env->getExtraPythonPaths();
-  void* newInterpreterImpl = dlsym(handle_, "newInterpreterImpl");
-  AT_ASSERT(newInterpreterImpl);
-  pImpl_ = std::unique_ptr<InterpreterImpl>(
-      ((InterpreterImpl * (*)(const std::vector<std::string>&))
-           newInterpreterImpl)(extra_python_paths));
-  env->configureInterpreter(this);
-}
-
-Interpreter::~Interpreter() {
-  if (handle_) {
-    // ensure python uninitialization runs before we dlclose the library
-    pImpl_.reset();
-    if (customLoader_) {
-      auto deploy_flush_python_libs =
-          (void (*)())dlsym(handle_, "deploy_flush_python_libs");
-      deploy_flush_python_libs();
-    }
-    dlclose(handle_);
-  }
-}
-
-int LoadBalancer::acquire() {
-  TORCH_DEPLOY_TRY
-  thread_local int last = 0;
-  size_t minusers = SIZE_MAX;
-  int minIdx = 0;
-  for (size_t i = 0; i < n_; ++i, ++last) {
-    if (last >= static_cast<int>(n_)) {
-      last = 0;
-    }
-    uint64_t prev = 0;
-    bool acquired = __atomic_compare_exchange_n(
-        &uses_[8 * last],
-        &prev,
-        1ULL,
-        false,
-        __ATOMIC_SEQ_CST,
-        __ATOMIC_SEQ_CST);
-    if (acquired) {
-      // fast path, we found an interpreter with no users
-      return last;
-    }
-    // slow path, we don't want to use this interpreter because it is being
-    // used by someone else.
-
-    if (prev < minusers) {
-      minusers = prev;
-      minIdx = last;
-    }
-  }
-  // we failed to find a completely free interpreter. heuristically use the
-  // one with the least number of user (note that this may have changed since
-  // then, so this is only a heuristic).
-  __atomic_fetch_add(&uses_[8 * minIdx], 1ULL, __ATOMIC_SEQ_CST);
-  return minIdx;
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-void LoadBalancer::free(int where) {
-  TORCH_DEPLOY_TRY
-  // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
-  __atomic_fetch_sub(&uses_[8 * where], 1ULL, __ATOMIC_SEQ_CST);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-void PythonMethodWrapper::setArgumentNames(
-    std::vector<std::string>& argumentNamesOut) const {
-  auto session = model_.acquireSession();
-  auto method = session.self.attr(methodName_.c_str());
-  auto iArgumentNames =
-      session.global("GetArgumentNamesModule", "getArgumentNames")({method})
-          .toIValue();
-  if (iArgumentNames.isNone()) {
-    return;
-  }
-
-  TORCH_INTERNAL_ASSERT(iArgumentNames.isList());
-  auto argumentNames = iArgumentNames.toListRef();
-
-  argumentNamesOut.reserve(argumentNames.size());
-  for (auto& argumentName : argumentNames) {
-    TORCH_INTERNAL_ASSERT(argumentName.isString());
-    argumentNamesOut.push_back(argumentName.toStringRef());
-  }
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/deploy.h b/torch/csrc/deploy/deploy.h
deleted file mode 100644
index b986093ed020..000000000000
--- a/torch/csrc/deploy/deploy.h
+++ /dev/null
@@ -1,302 +0,0 @@
-#pragma once
-#include <c10/util/irange.h>
-#include <torch/csrc/api/include/torch/imethod.h>
-#include <torch/csrc/deploy/interpreter/Optional.hpp>
-#include <torch/csrc/deploy/interpreter/interpreter_impl.h>
-#include <torch/csrc/deploy/noop_environment.h>
-#include <torch/csrc/jit/serialization/import.h>
-#include <cassert>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <thread>
-#include <vector>
-
-namespace torch {
-namespace deploy {
-
-struct ReplicatedObj;
-struct InterpreterManager;
-
-struct TORCH_API InterpreterSession {
-  InterpreterSession(
-      InterpreterSessionImpl* impl,
-      InterpreterManager* manager) noexcept
-      : impl_(impl), manager_(manager) {}
-
-  // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
-  Obj self; // when retreived from a PythonMovable this will be set.
-  InterpreterSession(InterpreterSession&&) noexcept = default;
-  // NOLINTNEXTLINE(bugprone-exception-escape)
-  ~InterpreterSession();
-  Obj global(const char* module, const char* name) {
-    TORCH_DEPLOY_TRY
-    return impl_->global(module, name);
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-  Obj fromIValue(at::IValue ivalue) {
-    TORCH_DEPLOY_TRY
-    return impl_->fromIValue(std::move(ivalue));
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-  ReplicatedObj createMovable(Obj obj);
-  Obj fromMovable(const ReplicatedObj& obj);
-
- private:
-  friend struct ReplicatedObj;
-  friend struct Package;
-  friend struct InterpreterManager;
-  friend struct ReplicatedObjImpl;
-  std::unique_ptr<InterpreterSessionImpl> impl_;
-  InterpreterManager* manager_; // if created from one
-  int64_t notifyIdx_ = -1;
-};
-
-class TORCH_API Interpreter {
- private:
-  std::string libraryName_;
-  void* handle_;
-  std::unique_ptr<InterpreterImpl> pImpl_;
-  bool customLoader_ = false;
-  InterpreterManager* manager_; // optional if managed by one
-  std::shared_ptr<Environment> env_;
-
- public:
-  Interpreter(InterpreterManager* manager, std::shared_ptr<Environment> env);
-  InterpreterSession acquireSession() const {
-    TORCH_DEPLOY_TRY
-    return InterpreterSession(pImpl_->acquireSession(), manager_);
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-  ~Interpreter();
-  Interpreter(Interpreter&& rhs) noexcept
-      : libraryName_(std::move(rhs.libraryName_)),
-        handle_(rhs.handle_),
-        pImpl_(std::move(rhs.pImpl_)),
-        manager_(rhs.manager_) {
-    rhs.handle_ = nullptr;
-  }
-
-  Interpreter(const Interpreter&) = delete;
-  Interpreter& operator=(const Interpreter&) = delete;
-  Interpreter& operator=(Interpreter&&) = delete;
-  friend struct InterpreterManager;
-};
-
-struct Package;
-
-struct TORCH_API LoadBalancer {
-  explicit LoadBalancer(size_t n)
-      : uses_(new uint64_t[8 * n]), allocated_(n), n_(n) {
-    TORCH_DEPLOY_TRY
-    // 8*... to avoid false sharing of atomics on the same cache line
-    memset(uses_.get(), 0, 8 * n_ * sizeof(uint64_t));
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-  void setResourceLimit(size_t n) {
-    TORCH_DEPLOY_TRY
-    MULTIPY_INTERNAL_ASSERT(n <= allocated_);
-    n_ = n;
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-  int acquire();
-  void free(int where);
-
- private:
-  // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
-  std::unique_ptr<uint64_t[]>
-      uses_; // the approximate count of the number of users of interpreter
-  size_t allocated_;
-  size_t n_;
-};
-
-struct TORCH_API InterpreterManager {
-  explicit InterpreterManager(
-      size_t nInterp = 2,
-      std::shared_ptr<Environment> env = std::make_shared<NoopEnvironment>());
-
-  // get a free model, guarenteed that no other user of acquireOne has the same
-  // model. It _is_ possible that other users will be using the interpreter.
-  InterpreterSession acquireOne() {
-    TORCH_DEPLOY_TRY
-    int where = resources_.acquire();
-    InterpreterSession I = instances_[where].acquireSession();
-    I.notifyIdx_ = where;
-    return I;
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-
-  // use to make sure something gets run on all interpreters, such as loading or
-  // unloading a model eagerly
-  at::ArrayRef<Interpreter> allInstances() {
-    TORCH_DEPLOY_TRY
-    return instances_;
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-  void debugLimitInterpreters(size_t N) {
-    TORCH_DEPLOY_TRY
-    AT_ASSERT(N <= instances_.size());
-    resources_.setResourceLimit(N);
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-  Package loadPackage(const std::string& uri);
-  Package loadPackage(
-      std::shared_ptr<caffe2::serialize::ReadAdapterInterface> reader);
-
-  // convience function for loading some python source code as a module across
-  // all interpreters. this can be used for writing tests of deploy that need to
-  // execute python code, or for small amounts of application logic that are
-  // best written in Python. For larger amounts of code, prefer creating and
-  // loading them as packages.
-  void registerModuleSource(std::string name, std::string src) {
-    registeredModuleSource_[std::move(name)] = std::move(src);
-  }
-
-  InterpreterManager(const InterpreterManager&) = delete;
-  InterpreterManager& operator=(const InterpreterManager&) = delete;
-  InterpreterManager& operator=(InterpreterManager&&) = delete;
-
- private:
-  friend struct Package;
-  friend struct InterpreterSession;
-  size_t nextObjectId_ = 0;
-  std::vector<Interpreter> instances_;
-  LoadBalancer resources_;
-  std::unordered_map<std::string, std::string> registeredModuleSource_;
-};
-
-struct TORCH_API ReplicatedObjImpl {
-  ReplicatedObjImpl(
-      size_t object_id,
-      // NOLINTNEXTLINE(modernize-pass-by-value)
-      PickledObject data,
-      InterpreterManager* manager)
-      : objectId_(object_id), data_(data), manager_(manager) {}
-  // NOLINTNEXTLINE(bugprone-exception-escape)
-  ~ReplicatedObjImpl();
-  void unload(const Interpreter* onThisInterpreter);
-  int64_t objectId_;
-  PickledObject data_;
-  InterpreterManager* manager_;
-};
-
-struct TORCH_API ReplicatedObj {
-  ReplicatedObj() : pImpl_(nullptr) {}
-  InterpreterSession acquireSession(
-      const Interpreter* onThisInterpreter = nullptr) const;
-  at::IValue operator()(at::ArrayRef<at::IValue> args) const {
-    TORCH_DEPLOY_TRY
-    auto I = acquireSession();
-    return I.self(args).toIValue();
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-
-  [[nodiscard]] at::IValue callKwargs(
-      std::vector<at::IValue> args,
-      std::unordered_map<std::string, c10::IValue> kwargs) const {
-    TORCH_DEPLOY_TRY
-    auto I = acquireSession();
-    return I.self.callKwargs(std::move(args), std::move(kwargs)).toIValue();
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-
-  [[nodiscard]] at::IValue callKwargs(
-      std::unordered_map<std::string, c10::IValue> kwargs) const {
-    TORCH_DEPLOY_TRY
-    auto I = acquireSession();
-    return I.self.callKwargs(std::move(kwargs)).toIValue();
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-
-  [[nodiscard]] bool hasattr(const char* name) const {
-    TORCH_DEPLOY_TRY
-    auto I = acquireSession();
-    return I.self.hasattr(name);
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-
-  void unload(const Interpreter* onThisInterpreter = nullptr);
-
- private:
-  ReplicatedObj(std::shared_ptr<ReplicatedObjImpl> pImpl)
-      : pImpl_(std::move(pImpl)) {}
-  std::shared_ptr<ReplicatedObjImpl> pImpl_;
-  friend struct Package;
-  friend struct InterpreterSession;
-  friend struct InterpreterManager;
-};
-
-class PythonMethodWrapper : public torch::IMethod {
-  // PythonMethodWrapper is a more specific instance of a
-  // ReplicatedObj which represents a python method, and
-  // is therefore callable and has argument names accessible.
- public:
-  // TODO(whc) make bound method pickleable, then directly construct from that
-  PythonMethodWrapper(
-      torch::deploy::ReplicatedObj model,
-      std::string methodName)
-      : model_(std::move(model)), methodName_(std::move(methodName)) {}
-
-  const std::string& name() const override {
-    return methodName_;
-  }
-
-  c10::IValue operator()(
-      std::vector<c10::IValue> args,
-      const IValueMap& kwargs = IValueMap()) const override {
-    // TODO(whc) ideally, pickle the method itself as replicatedobj, to skip
-    // this lookup each time
-    auto modelSession = model_.acquireSession();
-    auto method = modelSession.self.attr(methodName_.c_str());
-    return method.callKwargs(args, kwargs).toIValue();
-  }
-
- private:
-  void setArgumentNames(std::vector<std::string>&) const override;
-
-  torch::deploy::ReplicatedObj model_;
-  std::string methodName_;
-};
-
-struct TORCH_API Package {
-  // shorthand for getting the object as a pickle resource in the package
-  ReplicatedObj loadPickle(const std::string& module, const std::string& file) {
-    TORCH_DEPLOY_TRY
-    auto I = acquireSession();
-    auto loaded = I.self.attr("load_pickle")({module, file});
-    return I.createMovable(loaded);
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-
-  InterpreterSession acquireSession() {
-    TORCH_DEPLOY_TRY
-    auto I = manager_->acquireOne();
-    I.self =
-        I.impl_->createOrGetPackageImporterFromContainerFile(containerFile_);
-    return I;
-    TORCH_DEPLOY_SAFE_CATCH_RETHROW
-  }
-
- private:
-  Package(
-      const std::string& uri,
-      InterpreterManager*
-          pm) // or really any of the constructors to our zip file format
-      : manager_(pm),
-        containerFile_(
-            std::make_shared<caffe2::serialize::PyTorchStreamReader>(uri)) {}
-  Package(
-      std::shared_ptr<caffe2::serialize::ReadAdapterInterface> reader,
-      InterpreterManager*
-          pm) // or really any of the constructors to our zip file format
-      : manager_(pm),
-        containerFile_(
-            std::make_shared<caffe2::serialize::PyTorchStreamReader>(reader)) {}
-  friend struct ReplicatedObj;
-  friend struct InterpreterManager;
-  InterpreterManager* manager_;
-  std::shared_ptr<caffe2::serialize::PyTorchStreamReader> containerFile_;
-};
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/elf_file.cpp b/torch/csrc/deploy/elf_file.cpp
deleted file mode 100644
index ca1e749868e5..000000000000
--- a/torch/csrc/deploy/elf_file.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <c10/util/irange.h>
-#include <torch/csrc/deploy/Exception.h>
-#include <torch/csrc/deploy/elf_file.h>
-#include <torch/csrc/deploy/interpreter/Optional.hpp>
-
-namespace torch {
-namespace deploy {
-
-ElfFile::ElfFile(const char* filename) : memFile_(filename) {
-  const char* fileData = memFile_.data();
-  ehdr_ = (Elf64_Ehdr*)fileData;
-  checkFormat();
-
-  numSections_ = ehdr_->e_shnum;
-  shdrList_ = (Elf64_Shdr*)(fileData + ehdr_->e_shoff);
-
-  auto strtabSecNo = ehdr_->e_shstrndx;
-  MULTIPY_CHECK(
-      strtabSecNo >= 0 && strtabSecNo < numSections_,
-      "e_shstrndx out of range");
-
-  strtabSection_ = toSection(&shdrList_[strtabSecNo]);
-
-  sections_.reserve(numSections_);
-  for (const auto i : c10::irange(numSections_)) {
-    sections_.emplace_back(toSection(&shdrList_[i]));
-  }
-}
-
-multipy::optional<Section> ElfFile::findSection(const char* name) const {
-  MULTIPY_CHECK(name != nullptr, "Null name");
-  multipy::optional<Section> found = multipy::nullopt;
-  for (const auto& section : sections_) {
-    if (strcmp(name, section.name) == 0) {
-      found = section;
-      break;
-    }
-  }
-
-  return found;
-}
-
-void ElfFile::checkFormat() const {
-  // check the magic numbers
-  MULTIPY_CHECK(
-      (ehdr_->e_ident[EI_MAG0] == ELFMAG0) &&
-          (ehdr_->e_ident[EI_MAG1] == ELFMAG1) &&
-          (ehdr_->e_ident[EI_MAG2] == ELFMAG2) &&
-          (ehdr_->e_ident[EI_MAG3] == ELFMAG3),
-      "Unexpected magic numbers");
-  MULTIPY_CHECK(
-      ehdr_->e_ident[EI_CLASS] == ELFCLASS64, "Only support 64bit ELF file");
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/elf_file.h b/torch/csrc/deploy/elf_file.h
deleted file mode 100644
index 31ea7976af88..000000000000
--- a/torch/csrc/deploy/elf_file.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include <elf.h>
-#include <torch/csrc/deploy/Exception.h>
-#include <torch/csrc/deploy/interpreter/Optional.hpp>
-#include <torch/csrc/deploy/mem_file.h>
-#include <vector>
-
-namespace torch {
-namespace deploy {
-
-struct Section {
-  explicit Section(
-      const char* _name = nullptr,
-      const char* _start = nullptr,
-      size_t _len = 0)
-      : name(_name), start(_start), len(_len) {}
-  const char* name;
-  const char* start;
-  size_t len;
-
-  operator bool() const {
-    return start != nullptr;
-  }
-};
-
-/*
- * This class provie utilities to handle ELF file. Only support 64bit ELF file.
- */
-// TODO: consolidate other ELF file related functions in loader.cpp to this file
-class ElfFile {
- public:
-  explicit ElfFile(const char* filename);
-  multipy::optional<Section> findSection(const char* name) const;
-
- private:
-  Section toSection(Elf64_Shdr* shdr) {
-    auto nameOff = shdr->sh_name;
-    auto shOff = shdr->sh_offset;
-    auto len = shdr->sh_size;
-    const char* name = "";
-
-    if (strtabSection_) {
-      MULTIPY_CHECK(nameOff >= 0 && nameOff < strtabSection_.len);
-      name = strtabSection_.start + nameOff;
-    }
-    const char* start = memFile_.data() + shOff;
-    return Section{name, start, len};
-  }
-
-  [[nodiscard]] const char* str(size_t off) const {
-    MULTIPY_CHECK(off < strtabSection_.len, "String table index out of range");
-    return strtabSection_.start + off;
-  }
-  void checkFormat() const;
-  MemFile memFile_;
-  Elf64_Ehdr* ehdr_;
-  Elf64_Shdr* shdrList_;
-  size_t numSections_;
-
-  Section strtabSection_;
-  std::vector<Section> sections_;
-};
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/environment.h b/torch/csrc/deploy/environment.h
deleted file mode 100644
index 5837b59a1b3b..000000000000
--- a/torch/csrc/deploy/environment.h
+++ /dev/null
@@ -1,69 +0,0 @@
-#pragma once
-#include <fmt/format.h>
-#include <torch/csrc/deploy/Exception.h>
-#include <torch/csrc/deploy/elf_file.h>
-#include <fstream>
-#include <string>
-
-namespace torch {
-namespace deploy {
-
-class Interpreter;
-
-/*
- * An environment is the concept to decribe the circumstances in which a
- * torch::deploy interpreter runs. In can be an xar file embedded in the binary,
- * a filesystem path for the installed libraries etc.
- */
-class Environment {
-  std::vector<std::string> extraPythonPaths_;
-  // all zipped python libraries will be written
-  // under this directory
-  std::string extraPythonLibrariesDir_;
-  void setupZippedPythonModules(const std::string& pythonAppDir) {
-#ifdef FBCODE_CAFFE2
-    std::string execPath;
-    std::ifstream("/proc/self/cmdline") >> execPath;
-    ElfFile elfFile(execPath.c_str());
-    // load the zipped torch modules
-    constexpr const char* ZIPPED_TORCH_NAME = ".torch_python_modules";
-    auto zippedTorchSection = elfFile.findSection(ZIPPED_TORCH_NAME);
-    MULTIPY_CHECK(
-        zippedTorchSection.has_value(), "Missing the zipped torch section");
-    const char* zippedTorchStart = zippedTorchSection->start;
-    auto zippedTorchSize = zippedTorchSection->len;
-
-    std::string zipArchive =
-        std::string(pythonAppDir) + "/torch_python_modules.zip";
-    auto zippedFile = fopen(zipArchive.c_str(), "wb");
-    MULTIPY_CHECK(
-        zippedFile != nullptr, "Fail to create file: ", strerror(errno));
-    fwrite(zippedTorchStart, 1, zippedTorchSize, zippedFile);
-    fclose(zippedFile);
-
-    extraPythonPaths_.push_back(zipArchive);
-#endif
-    extraPythonLibrariesDir_ = pythonAppDir;
-  }
-
- public:
-  explicit Environment() {
-    char tempDirName[] = "/tmp/torch_deploy_zipXXXXXX";
-    char* tempDirectory = mkdtemp(tempDirName);
-    setupZippedPythonModules(tempDirectory);
-  }
-  explicit Environment(const std::string& pythonAppDir) {
-    setupZippedPythonModules(pythonAppDir);
-  }
-  virtual ~Environment() {
-    auto rmCmd = fmt::format("rm -rf {}", extraPythonLibrariesDir_);
-    (void)system(rmCmd.c_str());
-  }
-  virtual void configureInterpreter(Interpreter* interp) = 0;
-  virtual const std::vector<std::string>& getExtraPythonPaths() {
-    return extraPythonPaths_;
-  }
-};
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/example/benchmark.cpp b/torch/csrc/deploy/example/benchmark.cpp
deleted file mode 100644
index e1d5b6fdcce2..000000000000
--- a/torch/csrc/deploy/example/benchmark.cpp
+++ /dev/null
@@ -1,336 +0,0 @@
-#include <torch/deploy.h>
-
-#include <ATen/ATen.h>
-#include <ATen/TypeDefault.h>
-#include <c10/util/irange.h>
-
-#include <torch/script.h>
-
-#include <pthread.h>
-#include <algorithm>
-#include <atomic>
-#include <cassert>
-#include <chrono>
-#include <iostream>
-#include <sstream>
-#include <thread>
-#include <vector>
-
-typedef void (*function_type)(const char*);
-
-bool cuda = false;
-
-constexpr auto latency_p = {
-    25.,
-    50.,
-    95.}; //{1., 5., 25., 50., 75., 90., 95., 99., 99.25, 99.5, 99.75, 99.9};
-
-// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-struct Report {
-  std::string benchmark;
-  std::string strategy;
-  size_t n_threads;
-  size_t items_completed;
-  double work_items_per_second;
-  std::vector<double> latencies;
-  static void report_header(std::ostream& out) {
-    out << "benchmark, strategy, n_threads, work_items_completed, work_items_per_second";
-    for (double l : latency_p) {
-      out << ", p" << l << "_latency";
-    }
-    out << ", device\n";
-  }
-  void report(std::ostream& out) {
-    out << benchmark << ", " << strategy << ", " << n_threads << ", "
-        << items_completed << ", " << work_items_per_second;
-    for (double l : latencies) {
-      out << ", " << l;
-    }
-    out << ", " << (cuda ? "cuda" : "cpu") << "\n";
-  }
-};
-
-const int min_items_to_complete = 1;
-
-struct RunPython {
-  static torch::deploy::ReplicatedObj load_and_wrap(
-      torch::deploy::Package& package) {
-    auto I = package.acquireSession();
-    auto obj = I.self.attr("load_pickle")({"model", "model.pkl"});
-    if (cuda) {
-      obj = I.global("gpu_wrapper", "GPUWrapper")({obj});
-    }
-    return I.createMovable(obj);
-  }
-  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-  RunPython(
-      torch::deploy::Package& package,
-      std::vector<at::IValue> eg,
-      const torch::deploy::Interpreter* interps)
-      : obj_(load_and_wrap(package)), eg_(std::move(eg)), interps_(interps) {}
-  void operator()(int i) {
-    auto I = obj_.acquireSession();
-    if (cuda) {
-      // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-      std::vector<at::IValue> eg2 = {i};
-      eg2.insert(eg2.end(), eg_.begin(), eg_.end());
-      I.self(eg2);
-    } else {
-      I.self(eg_);
-    }
-  }
-  torch::deploy::ReplicatedObj obj_;
-  std::vector<at::IValue> eg_;
-  const torch::deploy::Interpreter* interps_;
-};
-
-// def to_device(i, d):
-//     if isinstance(i, torch.Tensor):
-//         return i.to(device=d)
-//     elif isinstance(i, (tuple, list)):
-//         return tuple(to_device(e, d) for e in i)
-//     else:
-//         raise RuntimeError('inputs are weird')
-
-static torch::IValue to_device(const torch::IValue& v, torch::Device to);
-
-static std::vector<torch::IValue> to_device_vec(
-    at::ArrayRef<torch::IValue> vs,
-    torch::Device to) {
-  std::vector<torch::IValue> results;
-  for (const torch::IValue& v : vs) {
-    results.push_back(to_device(v, to));
-  }
-  return results;
-}
-
-static torch::IValue to_device(const torch::IValue& v, torch::Device to) {
-  if (v.isTensor()) {
-    return v.toTensor().to(to);
-  } else if (v.isTuple()) {
-    auto tup = v.toTuple();
-    return c10::ivalue::Tuple::create(to_device_vec(tup->elements(), to));
-  } else if (v.isList()) {
-    auto converted = to_device_vec(v.toListRef(), to);
-    torch::List<torch::IValue> result(v.toList().elementType());
-    for (const torch::IValue& v : converted) {
-      result.push_back(v);
-    }
-    return result;
-  } else {
-    TORCH_INTERNAL_ASSERT(false, "cannot to_device");
-  }
-}
-
-static bool exists(const std::string& fname) {
-  std::fstream jit_file(fname);
-  return jit_file.good();
-}
-
-struct RunJIT {
-  RunJIT(const std::string& file_to_run, std::vector<torch::IValue> eg)
-      : eg_(std::move(eg)) {
-    if (!cuda) {
-      models_.push_back(torch::jit::load(file_to_run + "_jit"));
-    } else {
-      for (const auto i : c10::irange(2)) {
-        auto d = torch::Device(torch::DeviceType::CUDA, i);
-        std::stringstream qualified;
-        qualified << file_to_run << "_jit_" << i;
-        auto loaded = exists(qualified.str())
-            ? torch::jit::load(qualified.str(), d)
-            : torch::jit::load(file_to_run + "_jit", d);
-        loaded.to(d);
-        models_.push_back(loaded);
-      }
-    }
-  }
-  void operator()(int i) {
-    if (cuda) {
-      const auto device_id = i % models_.size();
-      auto d = torch::Device(torch::DeviceType::CUDA, device_id);
-      to_device(
-          models_[device_id].forward(to_device_vec(eg_, d)),
-          torch::DeviceType::CPU);
-    } else {
-      models_[0].forward(eg_);
-    }
-  }
-  std::vector<at::IValue> eg_;
-  std::vector<torch::jit::Module> models_;
-};
-
-struct Benchmark {
-  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-  Benchmark(
-      torch::deploy::InterpreterManager& manager,
-      size_t n_threads,
-      std::string strategy,
-      // NOLINTNEXTLINE(modernize-pass-by-value)
-      std::string file_to_run,
-      size_t n_seconds = 5)
-      : manager_(manager),
-        n_threads_(n_threads),
-        strategy_(strategy),
-        file_to_run_(file_to_run),
-        n_seconds_(n_seconds),
-        should_run_(true),
-        items_completed_(0),
-        reached_min_items_completed_(0) {
-    // NOLINTNEXTLINE(bugprone-branch-clone)
-    if (strategy == "one_python") {
-      manager.debugLimitInterpreters(1);
-    } else if (strategy == "multi_python") {
-      manager.debugLimitInterpreters(n_threads_);
-    }
-  }
-
-  Report run() {
-    pthread_barrier_init(&first_run_, nullptr, n_threads_ + 1);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    torch::deploy::Package package = manager_.loadPackage(file_to_run_);
-
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    std::vector<at::IValue> eg;
-    {
-      auto I = package.acquireSession();
-
-      eg = I.global("builtins", "tuple")(
-                I.self.attr("load_pickle")({"model", "example.pkl"}))
-               .toIValue()
-               .toTupleRef()
-               .elements();
-    }
-
-    // NOLINTNEXTLINE(bugprone-branch-clone)
-    if (strategy_ == "jit") {
-      run_one_work_item = RunJIT(file_to_run_, std::move(eg));
-    } else {
-      run_one_work_item =
-          RunPython(package, std::move(eg), manager_.allInstances().data());
-    }
-
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    std::vector<std::vector<double>> latencies(n_threads_);
-
-    for (const auto i : c10::irange(n_threads_)) {
-      threads_.emplace_back([this, &latencies, i] {
-        torch::NoGradGuard guard;
-        // do initial work
-        run_one_work_item(i);
-
-        pthread_barrier_wait(&first_run_);
-        size_t local_items_completed = 0;
-        while (should_run_) {
-          auto begin = std::chrono::steady_clock::now();
-          run_one_work_item(i);
-          auto end = std::chrono::steady_clock::now();
-          double work_seconds =
-              std::chrono::duration<double>(end - begin).count();
-          latencies[i].push_back(work_seconds);
-          local_items_completed++;
-          if (local_items_completed == min_items_to_complete) {
-            reached_min_items_completed_++;
-          }
-        }
-        items_completed_ += local_items_completed;
-      });
-    }
-
-    pthread_barrier_wait(&first_run_);
-    auto begin = std::chrono::steady_clock::now();
-    auto try_stop_at = begin + std::chrono::seconds(n_seconds_);
-    std::this_thread::sleep_until(try_stop_at);
-    for (int i = 0; reached_min_items_completed_ < n_threads_; ++i) {
-      std::this_thread::sleep_until(
-          begin + (i + 2) * std::chrono::seconds(n_seconds_));
-    }
-    should_run_ = false;
-    for (std::thread& thread : threads_) {
-      thread.join();
-    }
-    auto end = std::chrono::steady_clock::now();
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    double total_seconds = std::chrono::duration<double>(end - begin).count();
-    Report report;
-    report.benchmark = file_to_run_;
-    report.strategy = strategy_;
-    report.n_threads = n_threads_;
-    report.items_completed = items_completed_;
-    report.work_items_per_second = items_completed_ / total_seconds;
-    reportLatencies(report.latencies, latencies);
-    run_one_work_item = nullptr;
-    return report;
-  }
-
- private:
-  void reportLatencies(
-      std::vector<double>& results,
-      const std::vector<std::vector<double>>& latencies) {
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    std::vector<double> flat_latencies;
-    for (const auto& elem : latencies) {
-      flat_latencies.insert(flat_latencies.end(), elem.begin(), elem.end());
-    }
-    std::sort(flat_latencies.begin(), flat_latencies.end());
-    for (double target : latency_p) {
-      size_t idx = size_t(flat_latencies.size() * target / 100.0);
-      double time = flat_latencies.size() == 0
-          ? 0
-          : flat_latencies.at(std::min(flat_latencies.size() - 1, idx));
-      results.push_back(time);
-    }
-  }
-  torch::deploy::InterpreterManager& manager_;
-  size_t n_threads_;
-  std::string strategy_;
-  std::string file_to_run_;
-  size_t n_seconds_;
-  pthread_barrier_t first_run_;
-  std::atomic<bool> should_run_;
-  std::atomic<size_t> items_completed_;
-  std::atomic<size_t> reached_min_items_completed_;
-  std::vector<std::thread> threads_;
-  std::function<void(int)> run_one_work_item;
-};
-
-// NOLINTNEXTLINE(bugprone-exception-escape)
-int main(int argc, char* argv[]) {
-  int max_thread = atoi(argv[1]);
-  cuda = std::string(argv[2]) == "cuda";
-  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-  bool jit_enable = std::string(argv[3]) == "jit";
-  Report::report_header(std::cout);
-  torch::deploy::InterpreterManager manager(max_thread);
-
-  // make sure gpu_wrapper.py is in the import path
-  for (auto& interp : manager.allInstances()) {
-    auto I = interp.acquireSession();
-    I.global("sys", "path").attr("append")({"torch/csrc/deploy/example"});
-  }
-
-  auto n_threads = {1, 2, 4, 8, 16, 32, 40};
-  for (const auto i : c10::irange(4, argc)) {
-    std::string model_file = argv[i];
-    for (int n_thread : n_threads) {
-      if (n_thread > max_thread) {
-        continue;
-      }
-      for (std::string strategy : {"one_python", "multi_python", "jit"}) {
-        if (strategy == "jit") {
-          if (!jit_enable) {
-            continue;
-          }
-          if (!exists(model_file + "_jit")) {
-            continue;
-          }
-        }
-        Benchmark b(manager, n_thread, strategy, model_file);
-        Report r = b.run();
-        r.report(std::cout);
-      }
-    }
-  }
-  return 0;
-}
diff --git a/torch/csrc/deploy/example/examples.py b/torch/csrc/deploy/example/examples.py
deleted file mode 100644
index 73eeb2149b54..000000000000
--- a/torch/csrc/deploy/example/examples.py
+++ /dev/null
@@ -1,268 +0,0 @@
-from typing import Tuple, List, Dict
-
-import torch
-import torch.nn as nn
-from torch import Tensor
-
-
-class Simple(torch.nn.Module):
-    def __init__(self, N, M):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.rand(N, M))
-
-    def forward(self, input):
-        output = self.weight + input
-        return output
-
-
-def load_library():
-    torch.ops.load_library("my_so.so")
-
-
-def conv1x1(in_planes, out_planes, stride=1):
-    """1x1 convolution"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
-
-
-def conv3x3(in_planes, out_planes, stride=1):
-    """3x3 convolution with padding"""
-    return nn.Conv2d(
-        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
-    )
-
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(inplanes, planes, stride)
-        self.bn1 = nn.BatchNorm2d(planes)
-        self.relu = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(planes, planes)
-        self.bn2 = nn.BatchNorm2d(planes)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class ResNet(nn.Module):
-    def __init__(self, block, layers, num_classes=1000):
-        super(ResNet, self).__init__()
-        self.inplanes = 64
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(block, 64, layers[0])
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
-        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-        self.fc = nn.Linear(512 * block.expansion, num_classes)
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-            elif isinstance(m, nn.BatchNorm2d):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-
-    def _make_layer(self, block, planes, blocks, stride=1):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                conv1x1(self.inplanes, planes * block.expansion, stride),
-                nn.BatchNorm2d(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, downsample))
-        self.inplanes = planes * block.expansion
-        for _ in range(1, blocks):
-            layers.append(block(self.inplanes, planes))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-
-        return x
-
-
-def resnet18():
-    return ResNet(BasicBlock, [2, 2, 2, 2])
-
-
-class BatchedModel(nn.Module):
-    def forward(self, input1: Tensor, input2: Tensor) -> Tuple[Tensor, Tensor]:
-        return (input1 * -1, input2 * -1)
-
-    def make_prediction(
-        self, input: List[Tuple[Tensor, Tensor]]
-    ) -> List[Tuple[Tensor, Tensor]]:
-        return [self.forward(i[0], i[1]) for i in input]
-
-    def make_batch(
-        self, mega_batch: List[Tuple[Tensor, Tensor, int]], goals: Dict[str, str]
-    ) -> List[List[Tuple[Tensor, Tensor, int]]]:
-        max_bs = int(goals["max_bs"])
-        return [
-            mega_batch[start_idx : start_idx + max_bs]
-            for start_idx in range(0, len(mega_batch), max_bs)
-        ]
-
-
-class MultiReturn(torch.nn.Module):
-    def __init__(self):
-        super(MultiReturn, self).__init__()
-
-    def forward(self, t: Tuple[Tensor, Tensor]) -> Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor]]:
-        a, b = t
-        result = ((a.masked_fill_(b, 0.1), b), (torch.ones_like(a), b))
-        return result
-
-
-multi_return_metadata = r"""
-{
- "metadata_container": {
-  "forward": {
-   "named_input_metadata": {
-    "t": {
-     "argument_type": {
-      "tuple": {
-       "tuple_elements": [
-        {
-         "tensor": 1
-        },
-        {
-         "tensor": 6
-        }
-       ]
-      }
-     },
-     "optional_argument": false,
-     "metadata": {
-      "dense_features": {
-       "feature_desc": [
-        {
-          "feature_name": "test_feature_1",
-          "feature_id": 1
-        }
-       ],
-       "expected_shape": {
-        "dims": [
-         -1,
-         1
-        ],
-        "unknown_rank": false
-       },
-       "data_type": 1,
-       "feature_store_feature_type": 0
-      }
-     }
-    }
-   },
-   "positional_output_metadata": [
-    {
-     "argument_type": {
-      "tuple": {
-       "tuple_elements": [
-        {
-         "tensor": 1
-        },
-        {
-         "tensor": 6
-        }
-       ]
-      }
-     },
-     "optional_argument": false,
-     "metadata": {
-      "dense_features": {
-       "feature_desc": [
-        {
-          "feature_name": "test_feature_1",
-          "feature_id": 1
-        }
-       ],
-       "expected_shape": {
-        "dims": [
-         -1,
-         1
-        ],
-        "unknown_rank": false
-       },
-       "data_type": 1,
-       "feature_store_feature_type": 0
-      }
-     }
-    },
-    {
-     "argument_type": {
-      "tuple": {
-       "tuple_elements": [
-        {
-         "tensor": 1
-        },
-        {
-         "tensor": 6
-        }
-       ]
-      }
-     },
-     "optional_argument": false,
-     "metadata": {
-      "dense_features": {
-       "feature_desc": [
-        {
-          "feature_name": "test_feature_3",
-          "feature_id": 3
-        }
-       ],
-       "expected_shape": {
-        "dims": [
-         -1,
-         1
-        ],
-        "unknown_rank": false
-       },
-       "data_type": 1,
-       "feature_store_feature_type": 0
-      }
-     }
-    }
-   ]
-  }
- }
-}
-"""
diff --git a/torch/csrc/deploy/example/fx/examples.py b/torch/csrc/deploy/example/fx/examples.py
deleted file mode 100644
index ef875e9885e6..000000000000
--- a/torch/csrc/deploy/example/fx/examples.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import torch.fx
-try:
-    from .some_dependency import a_non_torch_leaf
-except ImportError:
-    from some_dependency import a_non_torch_leaf
-
-
-torch.fx.wrap('a_non_torch_leaf')
-class SimpleWithLeaf(torch.nn.Module):
-    def __init__(self, N, M):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.rand(N, M))
-
-    def forward(self, input):
-        output = self.weight + a_non_torch_leaf(1, input)
-        return output
diff --git a/torch/csrc/deploy/example/fx/some_dependency.py b/torch/csrc/deploy/example/fx/some_dependency.py
deleted file mode 100644
index a9abb0360ae8..000000000000
--- a/torch/csrc/deploy/example/fx/some_dependency.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# dependency for torch package
-
-def a_non_torch_leaf(a: int, b):
-    return a * b
diff --git a/torch/csrc/deploy/example/generate_examples.py b/torch/csrc/deploy/example/generate_examples.py
deleted file mode 100644
index 591ccc5e850e..000000000000
--- a/torch/csrc/deploy/example/generate_examples.py
+++ /dev/null
@@ -1,96 +0,0 @@
-"""
-Generate the example files that torchpy_test uses.
-"""
-import argparse
-from pathlib import Path
-
-import torch
-from torch.package import PackageExporter
-from torch.fx import symbolic_trace
-
-try:
-    from .examples import Simple, resnet18, MultiReturn, multi_return_metadata, load_library, BatchedModel
-except ImportError:
-    from examples import Simple, resnet18, MultiReturn, multi_return_metadata, load_library, BatchedModel
-
-try:
-    from .fx.examples import SimpleWithLeaf
-except ImportError:
-    from fx.examples import SimpleWithLeaf
-
-try:
-    from .tensorrt_example import make_trt_module
-except ImportError:
-    from tensorrt_example import make_trt_module
-
-def generate_fx_example():
-    name = 'simple_leaf'
-    model = SimpleWithLeaf(5, 10)
-    graph_module : torch.fx.GraphModule = symbolic_trace(model)
-    with PackageExporter(str(p / (name + "_fx"))) as e:
-        e.intern("**")
-        e.save_pickle("model", "model.pkl", graph_module)
-
-    model_jit = torch.jit.script(model)
-    model_jit.save(str(p / (name + "_jit")))
-
-def save(name, model, model_jit=None, eg=None, featurestore_meta=None):
-    with PackageExporter(str(p / name)) as e:
-        e.mock("iopath.**")
-        e.intern("**")
-        e.save_pickle("model", "model.pkl", model)
-        if eg:
-            e.save_pickle("model", "example.pkl", eg)
-        if featurestore_meta:
-            # TODO(whc) can this name come from buck somehow,
-            # so it's consistent with predictor_config_constants::METADATA_FILE_NAME()?
-            e.save_text("extra_files", "metadata.json", featurestore_meta)
-
-    if model_jit:
-        model_jit.save(str(p / (name + "_jit")))
-
-
-parser = argparse.ArgumentParser(description="Generate Examples")
-parser.add_argument("--install_dir", help="Root directory for all output files")
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    if args.install_dir is None:
-        p = Path(__file__).parent / "generated"
-        p.mkdir(exist_ok=True)
-    else:
-        p = Path(args.install_dir)
-
-    resnet = resnet18()
-    resnet.eval()
-    resnet_eg = torch.rand(1, 3, 224, 224)
-    resnet_traced = torch.jit.trace(resnet, resnet_eg)
-    save("resnet", resnet, resnet_traced, (resnet_eg,))
-
-    simple = Simple(10, 20)
-    save("simple", simple, torch.jit.script(simple), (torch.rand(10, 20),))
-
-    multi_return = MultiReturn()
-    save("multi_return", multi_return, torch.jit.script(multi_return), (torch.rand(10, 20),), multi_return_metadata)
-
-    # used for torch deploy/package tests in predictor
-    batched_model = BatchedModel()
-    save("batched_model", batched_model)
-
-    with PackageExporter(str(p / "load_library")) as e:
-        e.mock("iopath.**")
-        e.intern("**")
-        e.save_pickle("fn", "fn.pkl", load_library)
-
-    generate_fx_example()
-
-    with PackageExporter(p / "uses_distributed") as e:
-        e.save_source_string("uses_distributed", "import torch.distributed; assert torch.distributed.is_available()")
-
-    with PackageExporter(str(p / "make_trt_module")) as e:
-        e.extern("tensorrt")
-        e.add_dependency("tensorrt")
-        e.mock("iopath.**")
-        e.intern("**")
-        e.save_pickle("make_trt_module", "model.pkl", make_trt_module)
diff --git a/torch/csrc/deploy/example/gpu_wrapper.py b/torch/csrc/deploy/example/gpu_wrapper.py
deleted file mode 100644
index e40f0f8e8fc1..000000000000
--- a/torch/csrc/deploy/example/gpu_wrapper.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# used by the benchmarking program to wrap cpu models for GPU use
-import torch
-from copy import deepcopy
-
-def to_device(i, d):
-    if isinstance(i, torch.Tensor):
-        return i.to(device=d)
-    elif isinstance(i, (tuple, list)):
-        return tuple(to_device(e, d) for e in i)
-    else:
-        raise RuntimeError('inputs are weird')
-
-class GPUWrapper(torch.nn.Module):
-    def __init__(self, root):
-        super().__init__()
-        self.models = []
-        self.streams = {}
-        for i in range(torch.cuda.device_count()):
-            m = deepcopy(root) if i != 0 else root
-            d = f'cuda:{i}'
-            m.to(device=d)
-            self.models.append((m, d))
-
-    def __getstate__(self):
-        return self.models
-
-    def __setstate__(self, models):
-        super().__init__()
-        self.models = models
-        self.streams = {}
-        for m, d in models:
-            torch.cuda.synchronize(d)
-
-    # roi_align, 2210 count, ROIAlign_cuda.cu: add threadsync: problem goes away, return rand problem goes away,
-    # use different streams here, problem goes away.
-    def forward(self, tid, *args):
-        m, d = self.models[tid % len(self.models)]
-        if tid not in self.streams:
-            self.streams[tid] = torch.cuda.Stream(d)
-        s = self.streams[tid]
-        with torch.cuda.stream(s):
-            iput = to_device(args, d)
-            r = to_device(m(*iput), 'cpu')
-            return r
-
-
-if __name__ == '__main__':
-    def check_close(a, b):
-        if isinstance(a, (list, tuple)):
-            for ae, be in zip(a, b):
-                check_close(ae, be)
-        else:
-            print(torch.max(torch.abs(a - b)))
-            assert torch.allclose(a, b)
-
-    import sys
-    from torch.package import PackageImporter
-    i = PackageImporter(sys.argv[1])
-    torch.version.interp = 0
-    model = i.loadPickle('model', 'model.pkl')
-    eg = i.loadPickle('model', 'example.pkl')
-    r = model(*eg)
-
-    gpu_model = GPUWrapper(model)
-    r2 = gpu_model(*eg)
-    check_close(r, r2)
diff --git a/torch/csrc/deploy/example/simple.pt b/torch/csrc/deploy/example/simple.pt
deleted file mode 100644
index 50f9a087aa82..000000000000
Binary files a/torch/csrc/deploy/example/simple.pt and /dev/null differ
diff --git a/torch/csrc/deploy/example/tensorrt_example.py b/torch/csrc/deploy/example/tensorrt_example.py
deleted file mode 100644
index 7e97fd8ea655..000000000000
--- a/torch/csrc/deploy/example/tensorrt_example.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from typing import List, Any
-import pickle
-import torch
-
-
-class TestTRTModule(torch.nn.Module):
-    def __init__(self, engine, input_names=None, output_names=None, fp16_output=False):
-        super(TestTRTModule, self).__init__()
-        self.engine = engine
-        self.input_names = input_names
-        self.output_names = output_names
-
-        # Indicate output is in fp16
-        self.fp16_output = fp16_output
-
-    def forward(self, *inputs):
-        batch_size = inputs[0].shape[0]
-        contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
-        bindings: List[Any] = [None] * (len(self.input_names) + len(self.output_names))
-
-        # create output tensors
-        outputs: List[torch.Tensor] = []
-        for _, output_name in enumerate(self.output_names):
-            idx: int = self.engine.get_binding_index(output_name)
-            shape = (batch_size,) + tuple(self.engine.get_binding_shape(idx))
-            output = torch.empty(size=shape, dtype=torch.float32, device="cuda")
-            outputs.append(output)
-            bindings[idx] = output.data_ptr()
-
-        for i, input_name in enumerate(self.input_names):
-            idx = self.engine.get_binding_index(input_name)
-            bindings[idx] = contiguous_inputs[i].data_ptr()
-
-        context = self.engine.create_execution_context()
-        context.execute_async(
-            batch_size, bindings, torch.cuda.current_stream().cuda_stream
-        )
-
-        if len(outputs) == 1:
-            return outputs[0]
-
-        return tuple(outputs)
-
-def make_trt_module():
-    import tensorrt as trt
-    logger = trt.Logger(trt.Logger.WARNING)
-    builder = trt.Builder(logger)
-    network = builder.create_network()
-
-    x = network.add_input("x", shape=(1, 2, 3), dtype=trt.float32)
-    layer = network.add_elementwise(x, x, trt.ElementWiseOperation.SUM)
-    layer.name = "add"
-    output = layer.get_output(0)
-    output.name = "output"
-    network.mark_output(output)
-    output.dtype = trt.float32
-
-    builder.max_batch_size = 1024
-    builder_config = builder.create_builder_config()
-    builder_config.max_workspace_size = 1 << 25
-    # Test engine can be serialized and loaded correctly.
-    serialized_engine = pickle.dumps(builder.build_engine(network, builder_config))
-    return TestTRTModule(pickle.loads(serialized_engine), ["x"], ["output"])
diff --git a/torch/csrc/deploy/interactive_embedded_interpreter.cpp b/torch/csrc/deploy/interactive_embedded_interpreter.cpp
deleted file mode 100644
index f730176c12ff..000000000000
--- a/torch/csrc/deploy/interactive_embedded_interpreter.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * The tool provides a shell to the embedded interpreter. Useful to inspect the
- * state of the embedding interpreter interactively.
- */
-#include <c10/util/Flags.h>
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/csrc/deploy/path_environment.h>
-
-C10_DEFINE_string(
-    python_path,
-    "",
-    "The root of the installed python libraries in the system");
-C10_DEFINE_string(pyscript, "", "The path of the python script to execute");
-
-// NOLINTNEXTLINE(bugprone-exception-escape)
-int main(int argc, char** argv) {
-  c10::ParseCommandLineFlags(&argc, &argv);
-
-  if (FLAGS_python_path.size() > 0) {
-    LOG(INFO) << "Will add " << FLAGS_python_path << " to python sys.path";
-  }
-  std::shared_ptr<torch::deploy::Environment> env =
-      std::make_shared<torch::deploy::PathEnvironment>(FLAGS_python_path);
-  // create multiple interpreter instances so the tool does not just cover the
-  // simplest case with a single interpreter instance.
-  torch::deploy::InterpreterManager m(2, env);
-  auto I = m.acquireOne();
-
-  if (FLAGS_pyscript.size() > 0) {
-    auto realpath = I.global("os", "path").attr("expanduser")({FLAGS_pyscript});
-    I.global("runpy", "run_path")({realpath});
-  } else {
-    c10::ArrayRef<torch::deploy::Obj> noArgs;
-    I.global("pdb", "set_trace")(noArgs);
-  }
-  return 0;
-}
diff --git a/torch/csrc/deploy/interpreter/CMakeLists.txt b/torch/csrc/deploy/interpreter/CMakeLists.txt
deleted file mode 100644
index 33b71e348396..000000000000
--- a/torch/csrc/deploy/interpreter/CMakeLists.txt
+++ /dev/null
@@ -1,117 +0,0 @@
-SET(INTERPRETER_DIR "${DEPLOY_DIR}/interpreter" )
-SET(INTERPRETER_DIR "${DEPLOY_DIR}/interpreter" PARENT_SCOPE)
-SET(PYTORCH_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../../../")
-
-if(NOT TORCH_INSTALL_LIB_DIR)
-  set(TORCH_INSTALL_LIB_DIR lib)
-endif()
-
-# Build cpython
-SET(PYTHON_INSTALL_DIR "${INTERPRETER_DIR}/cpython")
-SET(PYTHON_INC_DIR "${PYTHON_INSTALL_DIR}/include/python3.8")
-SET(PYTHON_INC_DIR "${PYTHON_INSTALL_DIR}/include/python3.8" PARENT_SCOPE)
-SET(PYTHON_LIB "${PYTHON_INSTALL_DIR}/lib/libpython3.8.a")
-SET(PYTHON_BIN "${PYTHON_INSTALL_DIR}/bin/python3")
-ExternalProject_Add(
-  cpython
-  PREFIX cpython
-  GIT_REPOSITORY https://github.com/python/cpython.git
-  GIT_TAG v3.8.6
-  UPDATE_COMMAND ""
-  PATCH_COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/cpython_patch.diff
-  BUILD_IN_SOURCE True
-  CONFIGURE_COMMAND PYTHON_INSTALL_DIR=${PYTHON_INSTALL_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/configure_cpython.sh
-  BUILD_COMMAND CFLAGS=-fPIC CPPFLAGS=-fPIC make -j8
-  INSTALL_COMMAND make install
-  BYPRODUCTS ${PYTHON_MODULES} ${PYTHON_LIB} ${PYTHON_BIN} ${PYTHON_INSTALL_DIR}/lib/libssl.a ${PYTHON_INSTALL_DIR}/lib/libcrypto.a
-  LOG_OUTPUT_ON_FAILURE True
-)
-
-# We find the built python modules, this is confusing because python build already outputs
-# the modules in a strange nested path, and then that path is relative to the
-# Cmake ExternalProject root in the cmake build dir.
-ExternalProject_Get_property(cpython SOURCE_DIR)
-SET(PYTHON_MODULE_DIR "${SOURCE_DIR}/build/temp.linux-x86_64-3.8/${SOURCE_DIR}/Modules")
-SET(PYTHON_STDLIB_DIR "${SOURCE_DIR}/Lib")
-SET(PYTHON_STDLIB "${PYTHON_INSTALL_DIR}/lib/libpython_stdlib3.8.a")
-# Then we use a hardcoded list of expected module names and include them in our lib
-include("CMakePythonModules.txt")
-ExternalProject_Add_Step(
-  cpython
-  archive_stdlib
-  DEPENDEES install
-  BYPRODUCTS ${PYTHON_STDLIB}
-  COMMAND ar -rc ${PYTHON_STDLIB} ${PYTHON_MODULES}
-  VERBATIM
-)
-# Get python typing extension, needed by torch
-SET(TYPING_PKG "${INTERPRETER_DIR}/third_party/typing_extensions.py")
-ExternalProject_Add(
-  typing
-  PREFIX typing
-  GIT_REPOSITORY https://github.com/python/typing.git
-  GIT_TAG 3.7.4.3
-  UPDATE_COMMAND ""
-  CONFIGURE_COMMAND ""
-  BUILD_COMMAND ""
-  INSTALL_COMMAND cp ../typing/typing_extensions/src_py3/typing_extensions.py ${TYPING_PKG}
-  BYPRODUCTS ${TYPING_PKG}
-  LOG_OUTPUT_ON_FAILURE True
-)
-
-# Output files generated by freeze script, containing frozen bytecode
-SET(FROZEN_DIR "${INTERPRETER_DIR}/frozen")
-set(FROZEN_FILES
-  ${FROZEN_DIR}/main.c
-  ${FROZEN_DIR}/bytecode_0.c
-  ${FROZEN_DIR}/bytecode_1.c
-  ${FROZEN_DIR}/bytecode_2.c
-  ${FROZEN_DIR}/bytecode_3.c
-  ${FROZEN_DIR}/bytecode_4.c
-)
-
-file(GLOB_RECURSE PYTORCH_PYTHON_SOURCE_FILES ${PYTORCH_ROOT}/torch/*.py)
-
-# Packages to freeze: python stdlib, typing extension, and torch
-add_custom_command(
-   OUTPUT ${FROZEN_FILES}
-   WORKING_DIRECTORY ${INTERPRETER_DIR}
-   COMMAND mkdir -p ${FROZEN_DIR}
-   COMMAND ${PYTHON_BIN} ${PYTORCH_ROOT}/torch/utils/_freeze.py ${PYTHON_STDLIB_DIR} ${TYPING_PKG} ${PYTORCH_ROOT}/torch --oss --install_dir ${FROZEN_DIR} --verbose
-   DEPENDS cpython typing ${PYTORCH_PYTHON_SOURCE_FILES}
-   VERBATIM
-)
-
-# instantiate a library based on the objects that make up torch_python
-# make sure system python isn't used here
-target_include_directories(torch_python_obj BEFORE PRIVATE ${PYTHON_INC_DIR})
-add_library(torch_python_static STATIC $<TARGET_OBJECTS:torch_python_obj>)
-# Build the interpreter lib, designed to be standalone and dlopened
-# We bake the python and torch_python binding objs into libinterpreter
-set(INTERPRETER_LIB_SOURCES
-  ${INTERPRETER_DIR}/interpreter_impl.cpp
-  ${INTERPRETER_DIR}/builtin_registry.cpp
-  ${INTERPRETER_DIR}/register_frozenpython.cpp
-  ${INTERPRETER_DIR}/import_find_sharedfuncptr.cpp
-  ${FROZEN_FILES}
-  ${LINKER_SCRIPT}
-)
-add_library(torch_deployinterpreter ${INTERPRETER_LIB_SOURCES} ${LINKER_SCRIPT})
-# need to ensure headers are present before any .cpp in interpreter are compiled,
-# but cpp themselves don't clearly depend on cpython so there is a race otherwise
-add_dependencies(torch_deployinterpreter cpython)
-add_dependencies(torch_python_obj cpython)
-if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-  target_compile_options(torch_deployinterpreter PRIVATE -fno-gnu-unique)
-endif()
-
-target_include_directories(torch_deployinterpreter PRIVATE ${INTERPRETER_DIR})
-target_include_directories(torch_deployinterpreter BEFORE PUBLIC ${PYTHON_INC_DIR})
-
-target_link_libraries(torch_deployinterpreter PRIVATE ${PYTHON_LIB} ${PYTHON_STDLIB} torch_python_static)
-target_link_libraries(torch_deployinterpreter PRIVATE fmt::fmt-header-only protobuf::libprotobuf-lite)
-target_link_libraries(torch_deployinterpreter PRIVATE ${PYTHON_INSTALL_DIR}/lib/libssl.a ${PYTHON_INSTALL_DIR}/lib/libcrypto.a)
-target_link_libraries(torch_deployinterpreter PRIVATE pybind::pybind11)
-
-# expose torch_python_static for multipy
-install(TARGETS torch_python_static DESTINATION "${TORCH_INSTALL_LIB_DIR}")
diff --git a/torch/csrc/deploy/interpreter/CMakePythonModules.txt b/torch/csrc/deploy/interpreter/CMakePythonModules.txt
deleted file mode 100644
index c6bc9cab76ff..000000000000
--- a/torch/csrc/deploy/interpreter/CMakePythonModules.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-SET(PYTHON_MODULES
-  ${PYTHON_MODULE_DIR}/arraymodule.o
-  ${PYTHON_MODULE_DIR}/_asynciomodule.o
-  ${PYTHON_MODULE_DIR}/audioop.o
-  ${PYTHON_MODULE_DIR}/binascii.o
-  ${PYTHON_MODULE_DIR}/_bisectmodule.o
-  ${PYTHON_MODULE_DIR}/_blake2/blake2module.o ${PYTHON_MODULE_DIR}/_blake2/blake2b_impl.o ${PYTHON_MODULE_DIR}/_blake2/blake2s_impl.o
-  ${PYTHON_MODULE_DIR}/_bz2module.o
-  ${PYTHON_MODULE_DIR}/cmathmodule.o
-  # ${PYTHON_MODULE_DIR}/_math.o
-  ${PYTHON_MODULE_DIR}/cjkcodecs/_codecs_cn.o
-  ${PYTHON_MODULE_DIR}/cjkcodecs/_codecs_hk.o
-  ${PYTHON_MODULE_DIR}/cjkcodecs/_codecs_iso2022.o
-  ${PYTHON_MODULE_DIR}/cjkcodecs/_codecs_jp.o
-  ${PYTHON_MODULE_DIR}/cjkcodecs/_codecs_kr.o
-  ${PYTHON_MODULE_DIR}/cjkcodecs/_codecs_tw.o
-  ${PYTHON_MODULE_DIR}/_contextvarsmodule.o
-  ${PYTHON_MODULE_DIR}/_cryptmodule.o
-  ${PYTHON_MODULE_DIR}/_csv.o
-  ${PYTHON_MODULE_DIR}/_ctypes/_ctypes.o ${PYTHON_MODULE_DIR}/_ctypes/callbacks.o ${PYTHON_MODULE_DIR}/_ctypes/callproc.o ${PYTHON_MODULE_DIR}/_ctypes/stgdict.o ${PYTHON_MODULE_DIR}/_ctypes/cfield.o
-  ${PYTHON_MODULE_DIR}/_ctypes/_ctypes_test.o
-  ${PYTHON_MODULE_DIR}/_cursesmodule.o
-  ${PYTHON_MODULE_DIR}/_curses_panel.o
-  ${PYTHON_MODULE_DIR}/_datetimemodule.o
-  ${PYTHON_MODULE_DIR}/_decimal/_decimal.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/basearith.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/constants.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/context.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/convolute.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/crt.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/difradix2.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/fnt.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/fourstep.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/io.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/memory.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/mpdecimal.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/numbertheory.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/sixstep.o ${PYTHON_MODULE_DIR}/_decimal/libmpdec/transpose.o
-  ${PYTHON_MODULE_DIR}/_elementtree.o
-  ${PYTHON_MODULE_DIR}/fcntlmodule.o
-  ${PYTHON_MODULE_DIR}/grpmodule.o
-  ${PYTHON_MODULE_DIR}/_hashopenssl.o
-  ${PYTHON_MODULE_DIR}/_heapqmodule.o
-  ${PYTHON_MODULE_DIR}/_json.o
-  ${PYTHON_MODULE_DIR}/_lsprof.o
-  ${PYTHON_MODULE_DIR}/_lzmamodule.o
-  ${PYTHON_MODULE_DIR}/mathmodule.o
-  ${PYTHON_MODULE_DIR}/md5module.o
-  ${PYTHON_MODULE_DIR}/mmapmodule.o
-  ${PYTHON_MODULE_DIR}/cjkcodecs/multibytecodec.o
-  ${PYTHON_MODULE_DIR}/_multiprocessing/multiprocessing.o ${PYTHON_MODULE_DIR}/_multiprocessing/semaphore.o
-  ${PYTHON_MODULE_DIR}/nismodule.o
-  ${PYTHON_MODULE_DIR}/_opcode.o
-  ${PYTHON_MODULE_DIR}/ossaudiodev.o
-  ${PYTHON_MODULE_DIR}/parsermodule.o
-  ${PYTHON_MODULE_DIR}/_pickle.o
-  ${PYTHON_MODULE_DIR}/_posixsubprocess.o
-  ${PYTHON_MODULE_DIR}/pyexpat.o ${PYTHON_MODULE_DIR}/expat/xmlparse.o ${PYTHON_MODULE_DIR}/expat/xmlrole.o ${PYTHON_MODULE_DIR}/expat/xmltok.o
-  ${PYTHON_MODULE_DIR}/_queuemodule.o
-  ${PYTHON_MODULE_DIR}/_randommodule.o
-  ${PYTHON_MODULE_DIR}/readline.o
-  ${PYTHON_MODULE_DIR}/resource.o
-  ${PYTHON_MODULE_DIR}/selectmodule.o
-  ${PYTHON_MODULE_DIR}/sha1module.o
-  ${PYTHON_MODULE_DIR}/sha256module.o
-  ${PYTHON_MODULE_DIR}/_sha3/sha3module.o
-  ${PYTHON_MODULE_DIR}/sha512module.o
-  ${PYTHON_MODULE_DIR}/socketmodule.o
-  ${PYTHON_MODULE_DIR}/spwdmodule.o
-  ${PYTHON_MODULE_DIR}/_ssl.o
-  ${PYTHON_MODULE_DIR}/_struct.o
-  ${PYTHON_MODULE_DIR}/syslogmodule.o
-  ${PYTHON_MODULE_DIR}/termios.o
-  ${PYTHON_MODULE_DIR}/_testbuffer.o
-  ${PYTHON_MODULE_DIR}/_testcapimodule.o
-  ${PYTHON_MODULE_DIR}/_testimportmultiple.o
-  ${PYTHON_MODULE_DIR}/_testmultiphase.o
-  ${PYTHON_MODULE_DIR}/unicodedata.o
-  ${PYTHON_MODULE_DIR}/xxlimited.o
-  ${PYTHON_MODULE_DIR}/_xxtestfuzz/_xxtestfuzz.o ${PYTHON_MODULE_DIR}/_xxtestfuzz/fuzzer.o
-  ${PYTHON_MODULE_DIR}/zlibmodule.o
-)
diff --git a/torch/csrc/deploy/interpreter/Optional.hpp b/torch/csrc/deploy/interpreter/Optional.hpp
deleted file mode 100644
index 92b73d7f6fbb..000000000000
--- a/torch/csrc/deploy/interpreter/Optional.hpp
+++ /dev/null
@@ -1,1107 +0,0 @@
-// Copyright (C) 2011 - 2012 Andrzej Krzemienski.
-//
-// Use, modification, and distribution is subject to the Boost Software
-// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//
-// The idea and interface is based on Boost.Optional library
-// authored by Fernando Luis Cacciola Carballal
-//
-// Source: https://github.com/akrzemi1/Optional
-
-#ifndef ___OPTIONAL_HPP___
-#define ___OPTIONAL_HPP___
-
-#include <cassert>
-#include <functional>
-#include <initializer_list>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-#define TR2_OPTIONAL_REQUIRES(...) \
-  typename std::enable_if<__VA_ARGS__::value, bool>::type = false
-
-#if defined __GNUC__ // NOTE: GNUC is also defined for Clang
-#if (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)
-#define TR2_OPTIONAL_GCC_4_8_AND_HIGHER___
-#elif (__GNUC__ > 4)
-#define TR2_OPTIONAL_GCC_4_8_AND_HIGHER___
-#endif
-
-#if (__GNUC__ == 4) && (__GNUC_MINOR__ >= 7)
-#define TR2_OPTIONAL_GCC_4_7_AND_HIGHER___
-#elif (__GNUC__ > 4)
-#define TR2_OPTIONAL_GCC_4_7_AND_HIGHER___
-#endif
-
-#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 8) && (__GNUC_PATCHLEVEL__ >= 1)
-#define TR2_OPTIONAL_GCC_4_8_1_AND_HIGHER___
-#elif (__GNUC__ == 4) && (__GNUC_MINOR__ >= 9)
-#define TR2_OPTIONAL_GCC_4_8_1_AND_HIGHER___
-#elif (__GNUC__ > 4)
-#define TR2_OPTIONAL_GCC_4_8_1_AND_HIGHER___
-#endif
-#endif
-
-#if defined __clang_major__
-#if (__clang_major__ == 3 && __clang_minor__ >= 5)
-#define TR2_OPTIONAL_CLANG_3_5_AND_HIGHTER_
-#elif (__clang_major__ > 3)
-#define TR2_OPTIONAL_CLANG_3_5_AND_HIGHTER_
-#endif
-#if defined TR2_OPTIONAL_CLANG_3_5_AND_HIGHTER_
-#define TR2_OPTIONAL_CLANG_3_4_2_AND_HIGHER_
-#elif ( \
-    __clang_major__ == 3 && __clang_minor__ == 4 && __clang_patchlevel__ >= 2)
-#define TR2_OPTIONAL_CLANG_3_4_2_AND_HIGHER_
-#endif
-#endif
-
-#if defined _MSC_VER
-#if (_MSC_VER >= 1900)
-#define TR2_OPTIONAL_MSVC_2015_AND_HIGHER___
-#endif
-#endif
-
-#if defined __clang__
-#if (__clang_major__ > 2) || (__clang_major__ == 2) && (__clang_minor__ >= 9)
-#define OPTIONAL_HAS_THIS_RVALUE_REFS 1
-#else
-#define OPTIONAL_HAS_THIS_RVALUE_REFS 0
-#endif
-#elif defined TR2_OPTIONAL_GCC_4_8_1_AND_HIGHER___
-#define OPTIONAL_HAS_THIS_RVALUE_REFS 1
-#elif defined TR2_OPTIONAL_MSVC_2015_AND_HIGHER___
-#define OPTIONAL_HAS_THIS_RVALUE_REFS 1
-#else
-#define OPTIONAL_HAS_THIS_RVALUE_REFS 0
-#endif
-
-#if defined TR2_OPTIONAL_GCC_4_8_1_AND_HIGHER___
-#define OPTIONAL_HAS_CONSTEXPR_INIT_LIST 1
-#define OPTIONAL_CONSTEXPR_INIT_LIST constexpr
-#else
-#define OPTIONAL_HAS_CONSTEXPR_INIT_LIST 0
-#define OPTIONAL_CONSTEXPR_INIT_LIST
-#endif
-
-#if defined TR2_OPTIONAL_CLANG_3_5_AND_HIGHTER_ && (defined __cplusplus) && \
-    (__cplusplus != 201103L)
-#define OPTIONAL_HAS_MOVE_ACCESSORS 1
-#else
-#define OPTIONAL_HAS_MOVE_ACCESSORS 0
-#endif
-
-// In C++11 constexpr implies const, so we need to make non-const members also
-// non-constexpr
-#if (defined __cplusplus) && (__cplusplus == 201103L)
-#define OPTIONAL_MUTABLE_CONSTEXPR
-#else
-#define OPTIONAL_MUTABLE_CONSTEXPR constexpr
-#endif
-
-namespace multipy {
-
-// BEGIN workaround for missing std::is_trivially_destructible
-#if defined TR2_OPTIONAL_GCC_4_8_AND_HIGHER___
-// leave it: it is already there
-#elif defined TR2_OPTIONAL_CLANG_3_4_2_AND_HIGHER_
-// leave it: it is already there
-#elif defined TR2_OPTIONAL_MSVC_2015_AND_HIGHER___
-// leave it: it is already there
-#elif defined TR2_OPTIONAL_DISABLE_EMULATION_OF_TYPE_TRAITS
-// leave it: the user doesn't want it
-#else
-template <typename T>
-using std::is_trivially_destructible = std::has_trivial_destructor<T>;
-#endif
-// END workaround for missing std::is_trivially_destructible
-
-#if (defined TR2_OPTIONAL_GCC_4_7_AND_HIGHER___)
-// leave it; our metafunctions are already defined.
-#elif defined TR2_OPTIONAL_CLANG_3_4_2_AND_HIGHER_
-// leave it; our metafunctions are already defined.
-#elif defined TR2_OPTIONAL_MSVC_2015_AND_HIGHER___
-// leave it: it is already there
-#elif defined TR2_OPTIONAL_DISABLE_EMULATION_OF_TYPE_TRAITS
-// leave it: the user doesn't want it
-#else
-
-// workaround for missing traits in GCC and CLANG
-template <class T>
-struct std::is_nothrow_move_constructible {
-  constexpr static bool value = std::is_nothrow_constructible<T, T&&>::value;
-};
-
-template <class T, class U>
-struct is_assignable {
-  template <class X, class Y>
-  constexpr static bool has_assign(...) {
-    return false;
-  }
-
-  template <
-      class X,
-      class Y,
-      size_t S = sizeof((std::declval<X>() = std::declval<Y>(), true))>
-  // the comma operator is necessary for the cases where operator= returns void
-  constexpr static bool has_assign(bool) {
-    return true;
-  }
-
-  constexpr static bool value = has_assign<T, U>(true);
-};
-
-template <class T>
-struct std::is_nothrow_move_assignable {
-  template <class X, bool has_any_move_assign>
-  struct has_nothrow_move_assign {
-    constexpr static bool value = false;
-  };
-
-  template <class X>
-  struct has_nothrow_move_assign<X, true> {
-    constexpr static bool value =
-        noexcept(std::declval<X&>() = std::declval<X&&>());
-  };
-
-  constexpr static bool value =
-      has_nothrow_move_assign<T, is_assignable<T&, T&&>::value>::value;
-};
-// end workaround
-
-#endif
-
-// 20.5.4, optional for object types
-template <class T>
-class optional;
-
-// 20.5.5, optional for lvalue reference types
-template <class T>
-class optional<T&>;
-
-// workaround: std utility functions aren't constexpr yet
-template <class T>
-inline constexpr T&& constexpr_forward(
-    typename std::remove_reference<T>::type& t) noexcept {
-  return static_cast<T&&>(t);
-}
-
-template <class T>
-inline constexpr T&& constexpr_forward(
-    typename std::remove_reference<T>::type&& t) noexcept {
-  static_assert(!std::is_lvalue_reference<T>::value, "!!");
-  return static_cast<T&&>(t);
-}
-
-template <class T>
-inline constexpr typename std::remove_reference<T>::type&& constexpr_move(
-    T&& t) noexcept {
-  return static_cast<typename std::remove_reference<T>::type&&>(t);
-}
-
-#if defined NDEBUG
-#define TR2_OPTIONAL_ASSERTED_EXPRESSION(CHECK, EXPR) (EXPR)
-#else
-#define TR2_OPTIONAL_ASSERTED_EXPRESSION(CHECK, EXPR) \
-  ((CHECK) ? (EXPR) : ([] { assert(!#CHECK); }(), (EXPR)))
-#endif
-
-namespace detail_ {
-
-// static_addressof: a constexpr version of addressof
-template <typename T>
-struct has_overloaded_addressof {
-  template <class X>
-  constexpr static bool has_overload(...) {
-    return false;
-  }
-
-  template <class X, size_t S = sizeof(std::declval<X&>().operator&())>
-  constexpr static bool has_overload(bool) {
-    return true;
-  }
-
-  constexpr static bool value = has_overload<T>(true);
-};
-
-template <typename T, TR2_OPTIONAL_REQUIRES(!has_overloaded_addressof<T>)>
-constexpr T* static_addressof(T& ref) {
-  return &ref;
-}
-
-template <typename T, TR2_OPTIONAL_REQUIRES(has_overloaded_addressof<T>)>
-T* static_addressof(T& ref) {
-  return std::addressof(ref);
-}
-
-// the call to convert<A>(b) has return type A and converts b to type A iff b
-// decltype(b) is implicitly convertible to A
-template <class U>
-constexpr U convert(U v) {
-  return v;
-}
-
-namespace swap_ns {
-using std::swap;
-
-template <class T>
-void adl_swap(T& t, T& u) noexcept(noexcept(swap(t, u))) {
-  swap(t, u);
-}
-
-} // namespace swap_ns
-
-} // namespace detail_
-
-constexpr struct trivial_init_t {
-} trivial_init{};
-
-// 20.5.6, In-place construction
-constexpr struct in_place_t {
-} in_place{};
-
-// 20.5.7, Disengaged state indicator
-struct nullopt_t {
-  struct init {};
-  constexpr explicit nullopt_t(init) {}
-};
-constexpr nullopt_t nullopt{nullopt_t::init()};
-
-// 20.5.8, class bad_optional_access
-class bad_optional_access : public std::logic_error {
- public:
-  explicit bad_optional_access(const std::string& what_arg)
-      : std::logic_error{what_arg} {}
-  explicit bad_optional_access(const char* what_arg)
-      : std::logic_error{what_arg} {}
-};
-
-template <class T>
-union storage_t {
-  unsigned char dummy_;
-  T value_;
-
-  constexpr storage_t(trivial_init_t) noexcept : dummy_(){};
-
-  template <class... Args>
-  constexpr storage_t(Args&&... args)
-      : value_(constexpr_forward<Args>(args)...) {}
-
-  ~storage_t() {}
-};
-
-template <class T>
-union constexpr_storage_t {
-  unsigned char dummy_;
-  T value_;
-
-  constexpr constexpr_storage_t(trivial_init_t) noexcept : dummy_(){};
-
-  template <class... Args>
-  constexpr constexpr_storage_t(Args&&... args)
-      : value_(constexpr_forward<Args>(args)...) {}
-
-  ~constexpr_storage_t() = default;
-};
-
-template <class T>
-struct optional_base {
-  bool init_;
-  storage_t<T> storage_;
-
-  constexpr optional_base() noexcept : init_(false), storage_(trivial_init){};
-
-  explicit constexpr optional_base(const T& v) : init_(true), storage_(v) {}
-
-  explicit constexpr optional_base(T&& v)
-      : init_(true), storage_(constexpr_move(v)) {}
-
-  template <class... Args>
-  explicit optional_base(in_place_t, Args&&... args)
-      : init_(true), storage_(constexpr_forward<Args>(args)...) {}
-
-  template <
-      class U,
-      class... Args,
-      TR2_OPTIONAL_REQUIRES(std::is_constructible<T, std::initializer_list<U>>)>
-  explicit optional_base(
-      in_place_t,
-      std::initializer_list<U> il,
-      Args&&... args)
-      : init_(true), storage_(il, std::forward<Args>(args)...) {}
-
-  ~optional_base() {
-    if (init_)
-      storage_.value_.T::~T();
-  }
-};
-
-template <class T>
-struct constexpr_optional_base {
-  bool init_;
-  constexpr_storage_t<T> storage_;
-
-  constexpr constexpr_optional_base() noexcept
-      : init_(false), storage_(trivial_init){};
-
-  explicit constexpr constexpr_optional_base(const T& v)
-      : init_(true), storage_(v) {}
-
-  explicit constexpr constexpr_optional_base(T&& v)
-      : init_(true), storage_(constexpr_move(v)) {}
-
-  template <class... Args>
-  explicit constexpr constexpr_optional_base(in_place_t, Args&&... args)
-      : init_(true), storage_(constexpr_forward<Args>(args)...) {}
-
-  template <
-      class U,
-      class... Args,
-      TR2_OPTIONAL_REQUIRES(std::is_constructible<T, std::initializer_list<U>>)>
-  OPTIONAL_CONSTEXPR_INIT_LIST explicit constexpr_optional_base(
-      in_place_t,
-      std::initializer_list<U> il,
-      Args&&... args)
-      : init_(true), storage_(il, std::forward<Args>(args)...) {}
-
-  ~constexpr_optional_base() = default;
-};
-
-template <class T>
-using OptionalBase = typename std::conditional<
-    std::is_trivially_destructible<T>::value, // if possible
-    constexpr_optional_base<typename std::remove_const<
-        T>::type>, // use base with trivial destructor
-    optional_base<typename std::remove_const<T>::type>>::type;
-
-template <class T>
-class optional : private OptionalBase<T> {
-  static_assert(
-      !std::is_same<typename std::decay<T>::type, nullopt_t>::value,
-      "bad T");
-  static_assert(
-      !std::is_same<typename std::decay<T>::type, in_place_t>::value,
-      "bad T");
-
-  constexpr bool initialized() const noexcept {
-    return OptionalBase<T>::init_;
-  }
-  typename std::remove_const<T>::type* dataptr() {
-    return std::addressof(OptionalBase<T>::storage_.value_);
-  }
-  constexpr const T* dataptr() const {
-    return detail_::static_addressof(OptionalBase<T>::storage_.value_);
-  }
-
-#if OPTIONAL_HAS_THIS_RVALUE_REFS == 1
-  constexpr const T& contained_val() const& {
-    return OptionalBase<T>::storage_.value_;
-  }
-#if OPTIONAL_HAS_MOVE_ACCESSORS == 1
-  OPTIONAL_MUTABLE_CONSTEXPR T&& contained_val() && {
-    return std::move(OptionalBase<T>::storage_.value_);
-  }
-  OPTIONAL_MUTABLE_CONSTEXPR T& contained_val() & {
-    return OptionalBase<T>::storage_.value_;
-  }
-#else
-  T& contained_val() & {
-    return OptionalBase<T>::storage_.value_;
-  }
-  T&& contained_val() && {
-    return std::move(OptionalBase<T>::storage_.value_);
-  }
-#endif
-#else
-  constexpr const T& contained_val() const {
-    return OptionalBase<T>::storage_.value_;
-  }
-  T& contained_val() {
-    return OptionalBase<T>::storage_.value_;
-  }
-#endif
-
-  void clear() noexcept {
-    if (initialized())
-      dataptr()->T::~T();
-    OptionalBase<T>::init_ = false;
-  }
-
-  template <class... Args>
-  void initialize(Args&&... args) noexcept(
-      noexcept(T(std::forward<Args>(args)...))) {
-    assert(!OptionalBase<T>::init_);
-    ::new (static_cast<void*>(dataptr())) T(std::forward<Args>(args)...);
-    OptionalBase<T>::init_ = true;
-  }
-
-  template <class U, class... Args>
-  void initialize(std::initializer_list<U> il, Args&&... args) noexcept(
-      noexcept(T(il, std::forward<Args>(args)...))) {
-    assert(!OptionalBase<T>::init_);
-    ::new (static_cast<void*>(dataptr())) T(il, std::forward<Args>(args)...);
-    OptionalBase<T>::init_ = true;
-  }
-
- public:
-  typedef T value_type;
-
-  // 20.5.5.1, constructors
-  constexpr optional() noexcept : OptionalBase<T>(){};
-  constexpr optional(nullopt_t) noexcept : OptionalBase<T>(){};
-
-  optional(const optional& rhs) : OptionalBase<T>() {
-    if (rhs.initialized()) {
-      ::new (static_cast<void*>(dataptr())) T(*rhs);
-      OptionalBase<T>::init_ = true;
-    }
-  }
-
-  optional(optional&& rhs) noexcept(
-      std::is_nothrow_move_constructible<T>::value)
-      : OptionalBase<T>() {
-    if (rhs.initialized()) {
-      ::new (static_cast<void*>(dataptr())) T(std::move(*rhs));
-      OptionalBase<T>::init_ = true;
-    }
-  }
-
-  constexpr optional(const T& v) : OptionalBase<T>(v) {}
-
-  constexpr optional(T&& v) : OptionalBase<T>(constexpr_move(v)) {}
-
-  template <class... Args>
-  explicit constexpr optional(in_place_t, Args&&... args)
-      : OptionalBase<T>(in_place_t{}, constexpr_forward<Args>(args)...) {}
-
-  template <
-      class U,
-      class... Args,
-      TR2_OPTIONAL_REQUIRES(std::is_constructible<T, std::initializer_list<U>>)>
-  OPTIONAL_CONSTEXPR_INIT_LIST explicit optional(
-      in_place_t,
-      std::initializer_list<U> il,
-      Args&&... args)
-      : OptionalBase<T>(in_place_t{}, il, constexpr_forward<Args>(args)...) {}
-
-  // 20.5.4.2, Destructor
-  ~optional() = default;
-
-  // 20.5.4.3, assignment
-  optional& operator=(nullopt_t) noexcept {
-    clear();
-    return *this;
-  }
-
-  optional& operator=(const optional& rhs) {
-    if (initialized() == true && rhs.initialized() == false)
-      clear();
-    else if (initialized() == false && rhs.initialized() == true)
-      initialize(*rhs);
-    else if (initialized() == true && rhs.initialized() == true)
-      contained_val() = *rhs;
-    return *this;
-  }
-
-  optional& operator=(optional&& rhs) noexcept(
-      std::is_nothrow_move_assignable<T>::value&&
-          std::is_nothrow_move_constructible<T>::value) {
-    if (initialized() == true && rhs.initialized() == false)
-      clear();
-    else if (initialized() == false && rhs.initialized() == true)
-      initialize(std::move(*rhs));
-    else if (initialized() == true && rhs.initialized() == true)
-      contained_val() = std::move(*rhs);
-    return *this;
-  }
-
-  template <class U>
-  auto operator=(U&& v) -> typename std::enable_if<
-      std::is_same<typename std::decay<U>::type, T>::value,
-      optional&>::type {
-    if (initialized()) {
-      contained_val() = std::forward<U>(v);
-    } else {
-      initialize(std::forward<U>(v));
-    }
-    return *this;
-  }
-
-  template <class... Args>
-  void emplace(Args&&... args) {
-    clear();
-    initialize(std::forward<Args>(args)...);
-  }
-
-  template <class U, class... Args>
-  void emplace(std::initializer_list<U> il, Args&&... args) {
-    clear();
-    initialize<U, Args...>(il, std::forward<Args>(args)...);
-  }
-
-  // 20.5.4.4, Swap
-  void swap(optional<T>& rhs) noexcept(
-      std::is_nothrow_move_constructible<T>::value&& noexcept(
-          detail_::swap_ns::adl_swap(std::declval<T&>(), std::declval<T&>()))) {
-    if (initialized() == true && rhs.initialized() == false) {
-      rhs.initialize(std::move(**this));
-      clear();
-    } else if (initialized() == false && rhs.initialized() == true) {
-      initialize(std::move(*rhs));
-      rhs.clear();
-    } else if (initialized() == true && rhs.initialized() == true) {
-      using std::swap;
-      swap(**this, *rhs);
-    }
-  }
-
-  // 20.5.4.5, Observers
-
-  explicit constexpr operator bool() const noexcept {
-    return initialized();
-  }
-  constexpr bool has_value() const noexcept {
-    return initialized();
-  }
-
-  constexpr T const* operator->() const {
-    return TR2_OPTIONAL_ASSERTED_EXPRESSION(initialized(), dataptr());
-  }
-
-#if OPTIONAL_HAS_MOVE_ACCESSORS == 1
-
-  OPTIONAL_MUTABLE_CONSTEXPR T* operator->() {
-    assert(initialized());
-    return dataptr();
-  }
-
-  constexpr T const& operator*() const& {
-    return TR2_OPTIONAL_ASSERTED_EXPRESSION(initialized(), contained_val());
-  }
-
-  OPTIONAL_MUTABLE_CONSTEXPR T& operator*() & {
-    assert(initialized());
-    return contained_val();
-  }
-
-  OPTIONAL_MUTABLE_CONSTEXPR T&& operator*() && {
-    assert(initialized());
-    return constexpr_move(contained_val());
-  }
-
-  constexpr T const& value() const& {
-    return initialized()
-        ? contained_val()
-        : (throw bad_optional_access("bad optional access"), contained_val());
-  }
-
-  OPTIONAL_MUTABLE_CONSTEXPR T& value() & {
-    return initialized()
-        ? contained_val()
-        : (throw bad_optional_access("bad optional access"), contained_val());
-  }
-
-  OPTIONAL_MUTABLE_CONSTEXPR T&& value() && {
-    if (!initialized())
-      throw bad_optional_access("bad optional access");
-    return std::move(contained_val());
-  }
-
-#else
-
-  T* operator->() {
-    assert(initialized());
-    return dataptr();
-  }
-
-  constexpr T const& operator*() const {
-    return TR2_OPTIONAL_ASSERTED_EXPRESSION(initialized(), contained_val());
-  }
-
-  T& operator*() {
-    assert(initialized());
-    return contained_val();
-  }
-
-  constexpr T const& value() const {
-    return initialized()
-        ? contained_val()
-        : (throw bad_optional_access("bad optional access"), contained_val());
-  }
-
-  T& value() {
-    return initialized()
-        ? contained_val()
-        : (throw bad_optional_access("bad optional access"), contained_val());
-  }
-
-#endif
-
-#if OPTIONAL_HAS_THIS_RVALUE_REFS == 1
-
-  template <class V>
-  constexpr T value_or(V&& v) const& {
-    return *this ? **this : detail_::convert<T>(constexpr_forward<V>(v));
-  }
-
-#if OPTIONAL_HAS_MOVE_ACCESSORS == 1
-
-  template <class V>
-  OPTIONAL_MUTABLE_CONSTEXPR T value_or(V&& v) && {
-    return *this
-        ? constexpr_move(const_cast<optional<T>&>(*this).contained_val())
-        : detail_::convert<T>(constexpr_forward<V>(v));
-  }
-
-#else
-
-  template <class V>
-  T value_or(V&& v) && {
-    return *this
-        ? constexpr_move(const_cast<optional<T>&>(*this).contained_val())
-        : detail_::convert<T>(constexpr_forward<V>(v));
-  }
-
-#endif
-
-#else
-
-  template <class V>
-  constexpr T value_or(V&& v) const {
-    return *this ? **this : detail_::convert<T>(constexpr_forward<V>(v));
-  }
-
-#endif
-
-  // 20.6.3.6, modifiers
-  void reset() noexcept {
-    clear();
-  }
-};
-
-template <class T>
-class optional<T&> {
-  static_assert(!std::is_same<T, nullopt_t>::value, "bad T");
-  static_assert(!std::is_same<T, in_place_t>::value, "bad T");
-  T* ref;
-
- public:
-  // 20.5.5.1, construction/destruction
-  constexpr optional() noexcept : ref(nullptr) {}
-
-  constexpr optional(nullopt_t) noexcept : ref(nullptr) {}
-
-  constexpr optional(T& v) noexcept : ref(detail_::static_addressof(v)) {}
-
-  optional(T&&) = delete;
-
-  constexpr optional(const optional& rhs) noexcept : ref(rhs.ref) {}
-
-  explicit constexpr optional(in_place_t, T& v) noexcept
-      : ref(detail_::static_addressof(v)) {}
-
-  explicit optional(in_place_t, T&&) = delete;
-
-  ~optional() = default;
-
-  // 20.5.5.2, mutation
-  optional& operator=(nullopt_t) noexcept {
-    ref = nullptr;
-    return *this;
-  }
-
-  // optional& operator=(const optional& rhs) noexcept {
-  // ref = rhs.ref;
-  // return *this;
-  // }
-
-  // optional& operator=(optional&& rhs) noexcept {
-  // ref = rhs.ref;
-  // return *this;
-  // }
-
-  template <typename U>
-  auto operator=(U&& rhs) noexcept -> typename std::enable_if<
-      std::is_same<typename std::decay<U>::type, optional<T&>>::value,
-      optional&>::type {
-    ref = rhs.ref;
-    return *this;
-  }
-
-  template <typename U>
-  auto operator=(U&& rhs) noexcept -> typename std::enable_if<
-      !std::is_same<typename std::decay<U>::type, optional<T&>>::value,
-      optional&>::type = delete;
-
-  void emplace(T& v) noexcept {
-    ref = detail_::static_addressof(v);
-  }
-
-  void emplace(T&&) = delete;
-
-  void swap(optional<T&>& rhs) noexcept {
-    std::swap(ref, rhs.ref);
-  }
-
-  // 20.5.5.3, observers
-  constexpr T* operator->() const {
-    return TR2_OPTIONAL_ASSERTED_EXPRESSION(ref, ref);
-  }
-
-  constexpr T& operator*() const {
-    return TR2_OPTIONAL_ASSERTED_EXPRESSION(ref, *ref);
-  }
-
-  constexpr T& value() const {
-    return ref ? *ref
-               : (throw bad_optional_access("bad optional access"), *ref);
-  }
-
-  explicit constexpr operator bool() const noexcept {
-    return ref != nullptr;
-  }
-
-  constexpr bool has_value() const noexcept {
-    return ref != nullptr;
-  }
-
-  template <class V>
-  constexpr typename std::decay<T>::type value_or(V&& v) const {
-    return *this ? **this
-                 : detail_::convert<typename std::decay<T>::type>(
-                       constexpr_forward<V>(v));
-  }
-
-  // x.x.x.x, modifiers
-  void reset() noexcept {
-    ref = nullptr;
-  }
-};
-
-template <class T>
-class optional<T&&> {
-  static_assert(sizeof(T) == 0, "optional rvalue references disallowed");
-};
-
-// 20.5.8, Relational operators
-template <class T>
-constexpr bool operator==(const optional<T>& x, const optional<T>& y) {
-  return bool(x) != bool(y) ? false : bool(x) == false ? true : *x == *y;
-}
-
-template <class T>
-constexpr bool operator!=(const optional<T>& x, const optional<T>& y) {
-  return !(x == y);
-}
-
-template <class T>
-constexpr bool operator<(const optional<T>& x, const optional<T>& y) {
-  return (!y) ? false : (!x) ? true : *x < *y;
-}
-
-template <class T>
-constexpr bool operator>(const optional<T>& x, const optional<T>& y) {
-  return (y < x);
-}
-
-template <class T>
-constexpr bool operator<=(const optional<T>& x, const optional<T>& y) {
-  return !(y < x);
-}
-
-template <class T>
-constexpr bool operator>=(const optional<T>& x, const optional<T>& y) {
-  return !(x < y);
-}
-
-// 20.5.9, Comparison with nullopt
-template <class T>
-constexpr bool operator==(const optional<T>& x, nullopt_t) noexcept {
-  return (!x);
-}
-
-template <class T>
-constexpr bool operator==(nullopt_t, const optional<T>& x) noexcept {
-  return (!x);
-}
-
-template <class T>
-constexpr bool operator!=(const optional<T>& x, nullopt_t) noexcept {
-  return bool(x);
-}
-
-template <class T>
-constexpr bool operator!=(nullopt_t, const optional<T>& x) noexcept {
-  return bool(x);
-}
-
-template <class T>
-constexpr bool operator<(const optional<T>&, nullopt_t) noexcept {
-  return false;
-}
-
-template <class T>
-constexpr bool operator<(nullopt_t, const optional<T>& x) noexcept {
-  return bool(x);
-}
-
-template <class T>
-constexpr bool operator<=(const optional<T>& x, nullopt_t) noexcept {
-  return (!x);
-}
-
-template <class T>
-constexpr bool operator<=(nullopt_t, const optional<T>&) noexcept {
-  return true;
-}
-
-template <class T>
-constexpr bool operator>(const optional<T>& x, nullopt_t) noexcept {
-  return bool(x);
-}
-
-template <class T>
-constexpr bool operator>(nullopt_t, const optional<T>&) noexcept {
-  return false;
-}
-
-template <class T>
-constexpr bool operator>=(const optional<T>&, nullopt_t) noexcept {
-  return true;
-}
-
-template <class T>
-constexpr bool operator>=(nullopt_t, const optional<T>& x) noexcept {
-  return (!x);
-}
-
-// 20.5.10, Comparison with T
-template <class T>
-constexpr bool operator==(const optional<T>& x, const T& v) {
-  return bool(x) ? *x == v : false;
-}
-
-template <class T>
-constexpr bool operator==(const T& v, const optional<T>& x) {
-  return bool(x) ? v == *x : false;
-}
-
-template <class T>
-constexpr bool operator!=(const optional<T>& x, const T& v) {
-  return bool(x) ? *x != v : true;
-}
-
-template <class T>
-constexpr bool operator!=(const T& v, const optional<T>& x) {
-  return bool(x) ? v != *x : true;
-}
-
-template <class T>
-constexpr bool operator<(const optional<T>& x, const T& v) {
-  return bool(x) ? *x < v : true;
-}
-
-template <class T>
-constexpr bool operator>(const T& v, const optional<T>& x) {
-  return bool(x) ? v > *x : true;
-}
-
-template <class T>
-constexpr bool operator>(const optional<T>& x, const T& v) {
-  return bool(x) ? *x > v : false;
-}
-
-template <class T>
-constexpr bool operator<(const T& v, const optional<T>& x) {
-  return bool(x) ? v < *x : false;
-}
-
-template <class T>
-constexpr bool operator>=(const optional<T>& x, const T& v) {
-  return bool(x) ? *x >= v : false;
-}
-
-template <class T>
-constexpr bool operator<=(const T& v, const optional<T>& x) {
-  return bool(x) ? v <= *x : false;
-}
-
-template <class T>
-constexpr bool operator<=(const optional<T>& x, const T& v) {
-  return bool(x) ? *x <= v : true;
-}
-
-template <class T>
-constexpr bool operator>=(const T& v, const optional<T>& x) {
-  return bool(x) ? v >= *x : true;
-}
-
-// Comparison of optional<T&> with T
-template <class T>
-constexpr bool operator==(const optional<T&>& x, const T& v) {
-  return bool(x) ? *x == v : false;
-}
-
-template <class T>
-constexpr bool operator==(const T& v, const optional<T&>& x) {
-  return bool(x) ? v == *x : false;
-}
-
-template <class T>
-constexpr bool operator!=(const optional<T&>& x, const T& v) {
-  return bool(x) ? *x != v : true;
-}
-
-template <class T>
-constexpr bool operator!=(const T& v, const optional<T&>& x) {
-  return bool(x) ? v != *x : true;
-}
-
-template <class T>
-constexpr bool operator<(const optional<T&>& x, const T& v) {
-  return bool(x) ? *x < v : true;
-}
-
-template <class T>
-constexpr bool operator>(const T& v, const optional<T&>& x) {
-  return bool(x) ? v > *x : true;
-}
-
-template <class T>
-constexpr bool operator>(const optional<T&>& x, const T& v) {
-  return bool(x) ? *x > v : false;
-}
-
-template <class T>
-constexpr bool operator<(const T& v, const optional<T&>& x) {
-  return bool(x) ? v < *x : false;
-}
-
-template <class T>
-constexpr bool operator>=(const optional<T&>& x, const T& v) {
-  return bool(x) ? *x >= v : false;
-}
-
-template <class T>
-constexpr bool operator<=(const T& v, const optional<T&>& x) {
-  return bool(x) ? v <= *x : false;
-}
-
-template <class T>
-constexpr bool operator<=(const optional<T&>& x, const T& v) {
-  return bool(x) ? *x <= v : true;
-}
-
-template <class T>
-constexpr bool operator>=(const T& v, const optional<T&>& x) {
-  return bool(x) ? v >= *x : true;
-}
-
-// Comparison of optional<T const&> with T
-template <class T>
-constexpr bool operator==(const optional<const T&>& x, const T& v) {
-  return bool(x) ? *x == v : false;
-}
-
-template <class T>
-constexpr bool operator==(const T& v, const optional<const T&>& x) {
-  return bool(x) ? v == *x : false;
-}
-
-template <class T>
-constexpr bool operator!=(const optional<const T&>& x, const T& v) {
-  return bool(x) ? *x != v : true;
-}
-
-template <class T>
-constexpr bool operator!=(const T& v, const optional<const T&>& x) {
-  return bool(x) ? v != *x : true;
-}
-
-template <class T>
-constexpr bool operator<(const optional<const T&>& x, const T& v) {
-  return bool(x) ? *x < v : true;
-}
-
-template <class T>
-constexpr bool operator>(const T& v, const optional<const T&>& x) {
-  return bool(x) ? v > *x : true;
-}
-
-template <class T>
-constexpr bool operator>(const optional<const T&>& x, const T& v) {
-  return bool(x) ? *x > v : false;
-}
-
-template <class T>
-constexpr bool operator<(const T& v, const optional<const T&>& x) {
-  return bool(x) ? v < *x : false;
-}
-
-template <class T>
-constexpr bool operator>=(const optional<const T&>& x, const T& v) {
-  return bool(x) ? *x >= v : false;
-}
-
-template <class T>
-constexpr bool operator<=(const T& v, const optional<const T&>& x) {
-  return bool(x) ? v <= *x : false;
-}
-
-template <class T>
-constexpr bool operator<=(const optional<const T&>& x, const T& v) {
-  return bool(x) ? *x <= v : true;
-}
-
-template <class T>
-constexpr bool operator>=(const T& v, const optional<const T&>& x) {
-  return bool(x) ? v >= *x : true;
-}
-
-// 20.5.12, Specialized algorithms
-template <class T>
-void swap(optional<T>& x, optional<T>& y) noexcept(noexcept(x.swap(y))) {
-  x.swap(y);
-}
-
-template <class T>
-constexpr optional<typename std::decay<T>::type> make_optional(T&& v) {
-  return optional<typename std::decay<T>::type>(constexpr_forward<T>(v));
-}
-
-template <class X>
-constexpr optional<X&> make_optional(std::reference_wrapper<X> v) {
-  return optional<X&>(v.get());
-}
-
-} // namespace multipy
-
-namespace std {
-template <typename T>
-struct hash<multipy::optional<T>> {
-  typedef typename hash<T>::result_type result_type;
-  typedef multipy::optional<T> argument_type;
-
-  constexpr result_type operator()(argument_type const& arg) const {
-    return arg ? std::hash<T>{}(*arg) : result_type{};
-  }
-};
-
-template <typename T>
-struct hash<multipy::optional<T&>> {
-  typedef typename hash<T>::result_type result_type;
-  typedef multipy::optional<T&> argument_type;
-
-  constexpr result_type operator()(argument_type const& arg) const {
-    return arg ? std::hash<T>{}(*arg) : result_type{};
-  }
-};
-} // namespace std
-
-#undef TR2_OPTIONAL_REQUIRES
-#undef TR2_OPTIONAL_ASSERTED_EXPRESSION
-
-#endif //___OPTIONAL_HPP___
diff --git a/torch/csrc/deploy/interpreter/builtin_registry.cpp b/torch/csrc/deploy/interpreter/builtin_registry.cpp
deleted file mode 100644
index 611def2e7490..000000000000
--- a/torch/csrc/deploy/interpreter/builtin_registry.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-#include <Python.h>
-#include <c10/util/Exception.h>
-#include <fmt/format.h>
-#include <torch/csrc/deploy/Exception.h>
-#include <torch/csrc/deploy/interpreter/builtin_registry.h>
-
-namespace torch {
-namespace deploy {
-
-// These numbers of modules should not change as long as the cpython version
-// embedded in the build remains fixed
-static const size_t NUM_FROZEN_PY_BUILTIN_MODULES = 6;
-#ifndef FBCODE_CAFFE2
-static const size_t NUM_FROZEN_PY_STDLIB_MODULES = 680;
-#endif
-
-extern "C" PyObject* initModule(void);
-
-REGISTER_TORCH_DEPLOY_BUILTIN(cpython_internal, PyImport_FrozenModules);
-
-#ifdef FBCODE_CAFFE2
-REGISTER_TORCH_DEPLOY_BUILTIN(frozentorch, nullptr, "torch._C", initModule);
-#else
-extern "C" struct _frozen _PyImport_FrozenModules_torch[];
-REGISTER_TORCH_DEPLOY_BUILTIN(
-    frozentorch,
-    _PyImport_FrozenModules_torch,
-    "torch._C",
-    initModule);
-#endif
-
-BuiltinRegistryItem::BuiltinRegistryItem(
-    const char* _name,
-    const struct _frozen* _frozenModules,
-    std::vector<std::pair<const char*, void*>>&& _builtinModules)
-    : name(_name),
-      frozenModules(_frozenModules),
-      builtinModules(std::move(_builtinModules)) {
-  numModules = 0;
-  if (frozenModules) {
-    while (frozenModules[numModules].name != nullptr) {
-      ++numModules;
-    }
-  }
-
-  fprintf(
-      stderr,
-      "torch::deploy builtin %s contains %u modules\n",
-      name,
-      numModules);
-}
-
-BuiltinRegistry* BuiltinRegistry::get() {
-  static BuiltinRegistry _registry;
-  return &_registry;
-}
-
-void BuiltinRegistry::runPreInitialization() {
-  TORCH_INTERNAL_ASSERT(!Py_IsInitialized());
-  sanityCheck();
-  PyImport_FrozenModules = BuiltinRegistry::getAllFrozenModules();
-  TORCH_INTERNAL_ASSERT(PyImport_FrozenModules != nullptr);
-
-  appendCPythonInittab();
-}
-
-const char* metaPathSetupTemplate = R"PYTHON(
-import sys
-from importlib.metadata import DistributionFinder, Distribution
-# We need to register a custom meta path finder because we are registering
-# `torch._C` as a builtin module.
-#
-# Normally, builtins will be found by the `BuiltinImporter` meta path finder.
-# However, `BuiltinImporter` is hard-coded to assume that all builtin modules
-# are top-level imports.  Since `torch._C` is a submodule of `torch`, the
-# BuiltinImporter skips it.
-class F:
-    MODULES = {<<<DEPLOY_BUILTIN_MODULES_CSV>>>}
-
-    def find_spec(self, fullname, path, target=None):
-        if fullname in self.MODULES:
-            # Load this module using `BuiltinImporter`, but set `path` to None
-            # in order to trick it into loading our module.
-            return sys.meta_path[1].find_spec(fullname, path=None, target=None)
-        return None
-
-    def find_distributions(self, context=DistributionFinder.Context()):
-        modules = {"torch"} | self.MODULES
-        # Insert dummy distribution records for each builtin module so
-        # importlib.metadata.version(...) works.
-        if context.name is None:
-            for name in modules:
-                yield DummyDistribution(name)
-        if context.name in modules:
-            yield DummyDistribution(context.name)
-
-class DummyDistribution(Distribution):
-    def __init__(self, name):
-        self._metadata = {
-            "Name": name,
-            "Version": "0.0.1+fake_multipy",
-        }
-
-    @property
-    def metadata(self):
-        return self._metadata
-
-sys.meta_path.insert(0, F())
-)PYTHON";
-
-void BuiltinRegistry::runPostInitialization() {
-  TORCH_INTERNAL_ASSERT(Py_IsInitialized());
-  std::string metaPathSetupScript(metaPathSetupTemplate);
-  std::string replaceKey = "<<<DEPLOY_BUILTIN_MODULES_CSV>>>";
-  size_t pos = metaPathSetupScript.find(replaceKey);
-  if (pos != std::string::npos) {
-    metaPathSetupScript.replace(pos, replaceKey.size(), getBuiltinModulesCSV());
-  }
-  int r = PyRun_SimpleString(metaPathSetupScript.c_str());
-  TORCH_INTERNAL_ASSERT(r == 0);
-}
-
-void BuiltinRegistry::registerBuiltin(
-    std::unique_ptr<BuiltinRegistryItem> item) {
-  if (get()->name2idx_.find(item->name) != get()->name2idx_.end()) {
-    throw std::runtime_error(std::string("redefine bultin: ") + item->name);
-  }
-  get()->name2idx_[item->name] = get()->items_.size();
-  get()->items_.emplace_back(std::move(item));
-}
-
-BuiltinRegistryItem* BuiltinRegistry::getItem(const std::string& name) {
-  auto itr = get()->name2idx_.find(name);
-  return itr == get()->name2idx_.end() ? nullptr
-                                       : get()->items_[itr->second].get();
-}
-
-unsigned BuiltinRegistry::totalNumModules() {
-  unsigned tot = 0;
-  for (const auto& itemptr : get()->items_) {
-    tot += itemptr->numModules;
-  }
-  return tot;
-}
-
-struct _frozen* BuiltinRegistry::getAllFrozenModules() {
-  /* Allocate new memory for the combined table */
-  size_t totNumModules = totalNumModules();
-  struct _frozen* p = nullptr;
-  if (totNumModules > 0 &&
-      totNumModules <= SIZE_MAX / sizeof(struct _frozen) - 1) {
-    size_t size = sizeof(struct _frozen) * (totNumModules + 1);
-    p = (_frozen*)PyMem_Malloc(size);
-  }
-  if (p == nullptr) {
-    return nullptr;
-  }
-
-  // mark p as an empty frozen module list
-  memset(&p[0], 0, sizeof(p[0]));
-
-  /* Copy the tables into the new memory */
-  unsigned off = 0;
-  for (const auto& itemptr : items()) {
-    if (itemptr->numModules > 0) {
-      memcpy(
-          p + off,
-          itemptr->frozenModules,
-          (itemptr->numModules + 1) * sizeof(struct _frozen));
-      off += itemptr->numModules;
-    }
-  }
-
-  return p;
-}
-
-void BuiltinRegistry::sanityCheck() {
-  auto* cpythonInternalFrozens = getItem("cpython_internal");
-  // Num frozen builtins shouldn't change (unless modifying the underlying
-  // cpython version)
-  TORCH_INTERNAL_ASSERT(
-      cpythonInternalFrozens != nullptr &&
-          cpythonInternalFrozens->numModules == NUM_FROZEN_PY_BUILTIN_MODULES,
-      "Missing python builtin frozen modules");
-
-  auto* frozenpython = getItem("frozenpython");
-#ifdef FBCODE_CAFFE2
-  TORCH_INTERNAL_ASSERT(
-      frozenpython != nullptr, "Missing frozen python modules");
-#else
-  auto* frozentorch = getItem("frozentorch");
-  // Check frozenpython+frozentorch together since in OSS frozenpython is empty
-  // and frozentorch contains stdlib+torch, while in fbcode they are separated
-  // due to thirdparty2 frozenpython. No fixed number of torch modules to check
-  // for, but there should be at least one.
-  TORCH_INTERNAL_ASSERT(
-      frozenpython != nullptr && frozentorch != nullptr &&
-          frozenpython->numModules + frozentorch->numModules >
-              NUM_FROZEN_PY_STDLIB_MODULES + 1,
-      "Missing frozen python stdlib or torch modules");
-#endif
-}
-
-std::vector<std::pair<const char*, void*>> BuiltinRegistry::
-    getAllBuiltinModules() {
-  std::vector<std::pair<const char*, void*>> allBuiltinModules;
-  for (const auto& itemptr : items()) {
-    allBuiltinModules.insert(
-        allBuiltinModules.end(),
-        itemptr->builtinModules.begin(),
-        itemptr->builtinModules.end());
-  }
-  return allBuiltinModules;
-}
-
-void BuiltinRegistry::appendCPythonInittab() {
-  for (const auto& pair : get()->getAllBuiltinModules()) {
-    PyImport_AppendInittab(
-        pair.first, reinterpret_cast<PyObject* (*)()>(pair.second));
-  }
-}
-
-std::string BuiltinRegistry::getBuiltinModulesCSV() {
-  std::string modulesCSV;
-  for (const auto& pair : get()->getAllBuiltinModules()) {
-    if (!modulesCSV.empty()) {
-      modulesCSV += ", ";
-    }
-    modulesCSV += fmt::format("'{}'", pair.first);
-  }
-  return modulesCSV;
-}
-
-BuiltinRegisterer::BuiltinRegisterer(
-    const char* name,
-    const struct _frozen* frozenModules...) {
-  if (allowLibrary && !allowLibrary(name)) {
-    fprintf(
-        stderr,
-        "Skip %s since it's rejected by the allowLibrary method\n",
-        name);
-    return;
-  }
-  // gather builtin modules for this lib
-  va_list args;
-  va_start(args, frozenModules);
-  const char* moduleName = nullptr;
-  void* initFn = nullptr;
-  std::vector<std::pair<const char*, void*>> builtinModules;
-  while (true) {
-    moduleName = va_arg(args, const char*);
-    // encounter end of sequence
-    if (moduleName == nullptr) {
-      break;
-    }
-    initFn = va_arg(args, void*);
-    // skip null init function. This can happen if we create weak reference
-    // to init functions defined in another library. Depending on if we
-    // link with that library, the init function pointer will be the real
-    // implementation or nullptr. tensorrt is a good example. If this is
-    // a CPU build, we will not link with the tensorrt library, so the init
-    // function will be nullptr; on the other hand if this is a GPU build,
-    // we link with the tensorrt library, so the init function will not be
-    // nullptr.
-    if (initFn == nullptr) {
-      continue;
-    }
-    builtinModules.emplace_back(moduleName, initFn);
-  }
-
-  // note: don't call glog api in this method since this method is usually
-  // called before glog get setup
-  fprintf(
-      stderr,
-      "Registering torch::deploy builtin library %s (idx %lu) with %lu builtin modules\n",
-      name,
-      BuiltinRegistry::items().size(),
-      builtinModules.size());
-  BuiltinRegistry::registerBuiltin(std::make_unique<BuiltinRegistryItem>(
-      name, frozenModules, std::move(builtinModules)));
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/interpreter/builtin_registry.h b/torch/csrc/deploy/interpreter/builtin_registry.h
deleted file mode 100644
index 5f2726db67b6..000000000000
--- a/torch/csrc/deploy/interpreter/builtin_registry.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * The torch::deploy builtin registry library is used to make adding new bultins
- * to torch::deploy easy and clean.
- *
- * Under the hood, to add a torch::deploy builtin, the following things need to
- * be done
- * 1. merge the frozen modules for the builtin into PyImport_FrozenModules
- * 2. appending PyInit methods for modules implemented in C++ to the CPython
- *    builtin module list via methods like PyImport_AppendInittab
- * 3. tweak the sys.meta_path a bit to force loading non-toplevel moduels for
- * the torch::deploy builtin via the CPython builtin module importer.
- *
- * Doing all these things again and again manually is cumbersome and
- * error-prone. This builtin registry library supports open registration for
- * torch::deploy builtins. It does the work above by a single line of code
- * invoking REGISTER_TORCH_DEPLOY_BUILTIN macro. Here is an example for numpy:
- *
- *   REGISTER_TORCH_DEPLOY_BUILTIN(numpy, numpy_frozen_modules, <list of name,
- * PyInit function pairs>)
- *
- * Calling REGISTER_TORCH_DEPLOY_BUILTIN macro will instantiate a
- * BuiltinRegisterer object. The constructor of BuiltinRegisterer does the real
- * registration work.
- */
-#include <gtest/gtest_prod.h>
-#include <cstdarg>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-struct _frozen;
-
-namespace torch {
-namespace deploy {
-
-/*
- * This data structure describes a torch::deploy builtin being registered to
- * the registry.
- *
- * Each torch::deploy builtin contains the following basically information:
- * - a name for the builtin. It's usually the name of the library like numpy
- * - the lsit of frozen modules
- * - the list of builtin modules
- */
-struct BuiltinRegistryItem {
-  explicit BuiltinRegistryItem(
-      const char* _name,
-      const struct _frozen* _frozenModules,
-      std::vector<std::pair<const char*, void*>>&& _builtinModules);
-  const char* name;
-  const struct _frozen* frozenModules;
-  unsigned numModules;
-  std::vector<std::pair<const char*, void*>> builtinModules;
-};
-
-/*
- * BuiltinRegistry maintains all the registered torch::deploy builtins. This
- * class is a singleton. Calling BuiltinRegistry::get() returns the single
- * object instance.
- *
- * The state of this class is basically a list of BuiltinRegistryItem registered
- * so far.
- */
-class BuiltinRegistry {
- public:
-  static void runPreInitialization();
-  static void runPostInitialization();
-
- private:
-  static struct _frozen* getAllFrozenModules();
-  // call this after all the registration is done.
-  static void sanityCheck();
-  static void appendCPythonInittab();
-  static std::string getBuiltinModulesCSV();
-
-  static void registerBuiltin(std::unique_ptr<BuiltinRegistryItem> item);
-  static const std::vector<std::unique_ptr<BuiltinRegistryItem>>& items() {
-    return get()->items_;
-  }
-  static unsigned totalNumModules();
-  static BuiltinRegistry* get();
-  static BuiltinRegistryItem* getItem(const std::string& name);
-  static std::vector<std::pair<const char*, void*>> getAllBuiltinModules();
-
-  explicit BuiltinRegistry() = default;
-  std::unordered_map<std::string, int> name2idx_;
-  std::vector<std::unique_ptr<BuiltinRegistryItem>> items_;
-
-  friend class BuiltinRegisterer;
-  FRIEND_TEST(BuiltinRegistryTest, SimpleTest);
-};
-
-/*
- * If nobody defines allowLibrary method, allowLibrary will be evaluated to
- * 0 and we allow registering any libraries. If someone defines allowLibrary,
- * we respect that and only registering libraries that get true from calling
- * allowLibrary(libname).
- *
- * Currently used in unit test so we can fully control the registered libraries.
- */
-__attribute__((weak)) bool allowLibrary(const std::string& libname);
-
-/*
- * This class implements RAII (resource acquisition is initialization) to
- * register a bulitin to the registry.
- */
-class BuiltinRegisterer {
- public:
-  explicit BuiltinRegisterer(
-      const char* name,
-      const struct _frozen* frozenModules...);
-};
-
-} // namespace deploy
-} // namespace torch
-
-#define CONCAT_IMPL(s1, s2) s1##s2
-#define CONCAT(s1, s2) CONCAT_IMPL(s1, s2)
-#define ANONYMOUS_VARIABLE(str) CONCAT(str, __LINE__)
-
-/* there can be a variable list of builtin modules following frozen_modules
- * A typical usage of this macro is:
- *
- *  REGISTER_TORCH_DEPLOY_BUILTIN(library_name_without_quote,
- * frozen_modules_list, builtin_module_name_1, builtin_module_init_function_1,
- * ..., builtin_module_name_N, builtin_module_init_function_N)
- */
-#define REGISTER_TORCH_DEPLOY_BUILTIN(libname, frozenModules...) \
-  static torch::deploy::BuiltinRegisterer ANONYMOUS_VARIABLE(    \
-      BuiltinRegisterer)(#libname, frozenModules, nullptr)
diff --git a/torch/csrc/deploy/interpreter/configure_cpython.sh b/torch/csrc/deploy/interpreter/configure_cpython.sh
deleted file mode 100755
index 2812ea91e011..000000000000
--- a/torch/csrc/deploy/interpreter/configure_cpython.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -ex
-wget https://www.openssl.org/source/openssl-1.1.1k.tar.gz
-tar xf openssl-1.1.1k.tar.gz
-(cd openssl-1.1.1k && ./config --prefix="$PYTHON_INSTALL_DIR" && make -j32 && make install)
-CFLAGS=-fPIC CPPFLAGS=-fPIC ./configure --prefix "$PYTHON_INSTALL_DIR" --with-openssl="$PYTHON_INSTALL_DIR"
diff --git a/torch/csrc/deploy/interpreter/cpython_patch.diff b/torch/csrc/deploy/interpreter/cpython_patch.diff
deleted file mode 100644
index 0ca731f102d1..000000000000
--- a/torch/csrc/deploy/interpreter/cpython_patch.diff
+++ /dev/null
@@ -1,14 +0,0 @@
-diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c
-index c51f97abd2..83f73e351d 100644
---- a/Python/dynload_shlib.c
-+++ b/Python/dynload_shlib.c
-@@ -54,8 +54,7 @@ static struct {
- } handles[128];
- static int nhandles = 0;
- 
--
--dl_funcptr
-+dl_funcptr __attribute__((weak))
- _PyImport_FindSharedFuncptr(const char *prefix,
-                             const char *shortname,
-                             const char *pathname, FILE *fp)
diff --git a/torch/csrc/deploy/interpreter/defs.bzl b/torch/csrc/deploy/interpreter/defs.bzl
deleted file mode 100644
index 16dcee362595..000000000000
--- a/torch/csrc/deploy/interpreter/defs.bzl
+++ /dev/null
@@ -1,131 +0,0 @@
-load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary")
-load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
-load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
-
-# @lint-ignore-every BUCKLINT
-load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
-
-def embedded_interpreter(name, suffix, legacy = False, exported_deps = [], exported_external_deps = []):
-    final_name = name
-    is_all = suffix == "all"
-    is_cuda = suffix == "cuda" or is_all
-    is_hip = suffix == "hip"
-    platform_static_lib = []
-    for platform in ["platform009", "platform010"]:
-        name = platform + "_" + final_name
-        so_name = name + ".so"
-        cpp_binary(
-            name = so_name,
-            srcs = [
-                "interpreter_impl.cpp",
-            ] + (["import_find_sharedfuncptr.cpp"] if is_all else []),
-            headers = [
-                "Optional.hpp",
-                "interpreter_impl.h",
-            ],
-            header_namespace = "torch/csrc/deploy",
-            dlopen_enabled = True,
-            linker_flags = ([
-                # This ensures only the intended interface symbols are public/global
-                # the rest are hidden, regardless of how they were compiled
-                # (e.g. fvisibility=hidden is NOT important for the component
-                # objs in this library, since we override here.)
-                "--version-script=$(location :hide_symbols.script)",
-            ] if not is_all else []),
-            deps = [
-                "fbsource//third-party/fmt:fmt",
-            ] + ([
-                ":builtin_registry_cuda",
-                "//caffe2:torch_python_cuda_without_torch",
-                "//deeplearning/trt/python:frozen_tensorrt",
-            ] if is_cuda else ([
-                ":builtin_registry_hip",
-                "//caffe2:torch_python_hip_without_torch",
-            ] if is_hip else [
-                ":builtin_registry",
-                "//caffe2:torch_python_without_torch",
-            ])),
-            external_deps =
-                [
-                    # needed for interpreter.cpp itself, it uses pybind currently
-                    ("frozenpython", None, "python-frozen"),
-                    ("frozenpython", None, "python"),
-                ],
-            fbcode_platform = platform,
-        )
-
-        # We build torch::deploy with two embedded binaries- one with only cpu py bindings,
-        # the other with cpu+cuda py bindings.  This unfortunately wastes some binary size,
-        # but at least at runtime only one of them is loaded.
-        #
-        # This is becuase of two reasons
-        # (1) that applications such as predictor want to depend on torch::deploy in a
-        # cuda-agnostic way, e.g. they don't choose yet, and a binary/app that depends
-        # on predictor either chooses to include or not include a dep on cuda.
-        #
-        # (2) the way the embedded binary is created and loaded, it only exposes a small
-        # set of interface symbols globally, for creating a new interpreter, and hides its
-        # other symbols (esp. python ones) so they don't conflict with other interpreters.
-        # This prevents dividing the cpu and cuda portions of bindings into _separate_ libs
-        # and loading the cuda part additively.  Hence to achieve requirement (1) we bundle
-        # two complete interpreter libs, one with and one without cuda.
-
-        cp_cmd = "$(exe //caffe2/torch/csrc/deploy:remove_dt_needed)" if suffix == "all" else "cp"
-
-        build_name = "build_" + name
-        if not legacy:
-            cxx_genrule(
-                name = build_name,
-                out = "embedded_interpreter_" + suffix + ".a",
-                cmd = """\
-                """ + cp_cmd + """ $(location :""" + so_name + """) libtorch_deployinterpreter_internal_""" + suffix + """.so
-                $(exe fbsource//third-party/binutils:ld) -r \\
-                -m """ + select({
-                    "ovr_config//cpu:arm64": "aarch64linux",
-                    "ovr_config//cpu:x86_64": "elf_x86_64",
-                }) + """ \\
-                -b binary -o ${TMP}/embedded_interpreter_""" + suffix + """.o libtorch_deployinterpreter_internal_""" + suffix + """.so
-                 $(exe fbsource//third-party/binutils:objcopy) --rename-section .data=.torch_deploy_payload.interpreter_""" + suffix + """,readonly,contents -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_start -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_end ${TMP}/embedded_interpreter_""" + suffix + """.o
-                $(exe fbsource//third-party/binutils:ar) rcs ${OUT} ${TMP}/embedded_interpreter_""" + suffix + """.o
-                """,
-            )
-        else:
-            cxx_genrule(
-                name = build_name,
-                out = "embedded_interpreter_cuda_legacy.a",
-                cmd = """\
-                cp $(location :""" + so_name + """) libtorch_deployinterpreter_cuda.so
-                $(exe fbsource//third-party/binutils:ld) -r \\
-                -m """ + select({
-                    "ovr_config//cpu:arm64": "aarch64linux",
-                    "ovr_config//cpu:x86_64": "elf_x86_64",
-                }) + """ \\
-                -b binary -o ${TMP}/embedded_interpreter_cuda.o libtorch_deployinterpreter_cuda.so
-                $(exe fbsource//third-party/binutils:ar) rcs ${OUT} ${TMP}/embedded_interpreter_cuda.o
-                """,
-            )
-        platform_static_lib.append(["^" + platform, ":" + build_name])
-
-    internal_name = final_name + "_internal"
-    fb_native.prebuilt_cxx_library(
-        preferred_linkage = "static",
-        name = internal_name,
-        visibility = ["PUBLIC"],
-        link_whole = True,
-        platform_static_lib = platform_static_lib,
-    )
-
-    # a thin wrapper around :embedded_interpreter_internal to add --export-dynamic
-    # linker flags. The flag will be propagated to cpp_binary. We don't require
-    # cpp_binary to explicitly enable --export-dynamic any more. New usecases usually
-    # forgot to do so and caused interpreter not found crash.
-    cpp_library(
-        name = final_name,
-        linker_flags = [
-            "--export-dynamic",
-        ],
-        exported_deps = [
-            ":" + internal_name,
-        ] + exported_deps,
-        exported_external_deps = exported_external_deps,
-    )
diff --git a/torch/csrc/deploy/interpreter/hide_symbols.script b/torch/csrc/deploy/interpreter/hide_symbols.script
deleted file mode 100644
index 2d515de4fb02..000000000000
--- a/torch/csrc/deploy/interpreter/hide_symbols.script
+++ /dev/null
@@ -1,4 +0,0 @@
-INTERPRETER_0.1 {
-  global: newInterpreterImpl;
-  local: *;
-};
diff --git a/torch/csrc/deploy/interpreter/import_find_sharedfuncptr.cpp b/torch/csrc/deploy/interpreter/import_find_sharedfuncptr.cpp
deleted file mode 100644
index 2a89a96c623d..000000000000
--- a/torch/csrc/deploy/interpreter/import_find_sharedfuncptr.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <torch/csrc/deploy/loader.h>
-#include <sstream>
-#include <vector>
-
-using torch::deploy::CustomLibrary;
-using torch::deploy::CustomLibraryPtr;
-using torch::deploy::SystemLibrary;
-
-// NOLINTNEXTLINE
-std::vector<CustomLibraryPtr> loaded_files_;
-// NOLINTNEXTLINE
-static void* deploy_self = nullptr;
-
-extern "C" {
-
-__attribute__((visibility("default"))) void deploy_set_self(void* v) {
-  deploy_self = v;
-}
-
-typedef void (*dl_funcptr)();
-extern "C" dl_funcptr _PyImport_FindSharedFuncptr(
-    const char* prefix,
-    const char* shortname,
-    const char* pathname,
-    FILE* fp) {
-  const char* args[] = {"deploy"};
-  // XXX: we have to manually flush loaded_files_ (see deploy_flush_python_libs)
-  // when the manager unloads. Otherwise some libraries can live longer than
-  // they are needed, and the process of unloading them might use functionality
-  // that itself gets unloaded.
-  loaded_files_.emplace_back(CustomLibrary::create(pathname, 1, args));
-  CustomLibrary& lib = *loaded_files_.back();
-  lib.add_search_library(SystemLibrary::create(deploy_self));
-  lib.add_search_library(SystemLibrary::create());
-  lib.load();
-  std::stringstream ss;
-  ss << prefix << "_" << shortname;
-  auto r = (dl_funcptr)lib.sym(ss.str().c_str()).value();
-  assert(r);
-  return r;
-}
-__attribute__((visibility("default"))) void deploy_flush_python_libs() {
-  loaded_files_.clear();
-}
-}
diff --git a/torch/csrc/deploy/interpreter/interpreter_impl.cpp b/torch/csrc/deploy/interpreter/interpreter_impl.cpp
deleted file mode 100644
index 251770745e2a..000000000000
--- a/torch/csrc/deploy/interpreter/interpreter_impl.cpp
+++ /dev/null
@@ -1,413 +0,0 @@
-#include <torch/csrc/deploy/interpreter/interpreter_impl.h>
-
-#include <dlfcn.h>
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-#include <pybind11/embed.h>
-#include <pybind11/functional.h>
-#include <torch/csrc/DynamicTypes.h>
-#include <torch/csrc/autograd/generated/variable_factories.h>
-#include <torch/csrc/deploy/Exception.h>
-#include <torch/csrc/jit/python/pybind_utils.h>
-#include <torch/csrc/utils/pybind.h>
-
-#include <cassert>
-#include <cstdio>
-#include <iostream>
-#include <map>
-#include <thread>
-
-#include <fmt/format.h>
-#include <torch/csrc/deploy/interpreter/builtin_registry.h>
-
-namespace py = pybind11;
-using namespace py::literals;
-
-// TODO this should come from cmake
-#define DEBUG 1
-
-#if (DEBUG == 1)
-#define PYOBJ_ASSERT(obj) \
-  if (NULL == obj) {      \
-    PyErr_Print();        \
-  }                       \
-  assert(NULL != obj);
-#elif (DEBUG == 0)
-#define PYOBJ_ASSERT(obj) assert(NULL != obj);
-#endif
-
-const char* start = R"PYTHON(
-import _ssl # must come before _hashlib otherwise ssl's locks will be set to a Python that might no longer exist...
-import sys
-import importlib.abc
-import linecache
-
-class RegisterModuleImporter(importlib.abc.InspectLoader):
-    def __init__(self, find_module_source):
-        self.find_module_source = find_module_source
-
-    def create_module(self, spec):
-        return None
-
-    def get_source(self, name):
-        return self.find_module_source(name)
-
-    def exec_module(self, module):
-        filename = f"_deploy_internal.{module.__name__}"
-        linecache.lazycache(filename, module.__dict__)
-        code = compile(self.get_source(module.__name__), filename, "exec", dont_inherit=True)
-        exec(code, module.__dict__)
-
-    def find_spec(self, fullname, path, target=None):
-        r = self.find_module_source(fullname)
-        if r is not None:
-            return importlib.util.spec_from_loader(fullname, self)
-        return None
-
-# print("exec_prefix:", sys.base_exec_prefix)
-# print("_base_executable:", sys._base_executable)
-# print("base_prefix:", sys.base_prefix)
-# print("exec_prefix:", sys.exec_prefix)
-# print("executable:", sys.executable)
-# print("path:", sys.path)
-# print("prefix:", sys.prefix)
-import torch # has to be done serially otherwise things will segfault
-try:
-  import torch.version # for some reason torch doesn't import this and cuda fails?
-except ModuleNotFoundError:
-  # fbcode built doesn't have version.py, workaround by faking its info...
-  from types import ModuleType
-  _v = torch.version = sys.modules['torch.version'] = ModuleType('torch.version')
-  _v.__version__ = '1.8.0a0+fake'
-  _v.debug = False
-  _v.cuda = '10.1'
-  _v.git_version = 'fake'
-  _v.hip = None
-
-
-if torch.cuda.is_available():
-  torch.zeros(1).cuda() # force cuda init...
-import warnings
-warnings.simplefilter("ignore")
-)PYTHON";
-
-extern "C" __attribute__((__weak__)) PyObject* PyInit_tensorrt(void);
-extern "C"
-    __attribute__((__weak__)) struct _frozen _PyImport_FrozenModules_tensorrt[];
-
-using torch::deploy::BuiltinRegistry;
-// TODO(shunting) move this to the tensorrt code
-REGISTER_TORCH_DEPLOY_BUILTIN(
-    tensorrt,
-    _PyImport_FrozenModules_tensorrt,
-    "tensorrt.tensorrt",
-    PyInit_tensorrt);
-
-static py::object global_impl(const char* module, const char* name) {
-  return py::module::import(module).attr(name);
-}
-
-using at::IValue;
-using torch::deploy::Obj;
-using torch::deploy::PickledObject;
-
-// Ensure GIL is held while this object is live,
-// note: we are not use py::gil_scoped_acquire here because
-// InitLockAcquire used below has to temporarily release the GIL
-// within this scope to ensure locking order.  Having the source
-// for these objects together makes it easier to see what is happening.
-struct ScopedAcquire {
-  ScopedAcquire() {
-    gstate = PyGILState_Ensure();
-  }
-  ~ScopedAcquire() {
-    PyGILState_Release(gstate);
-  }
-  PyGILState_STATE gstate;
-};
-
-struct InitLockAcquire {
-  InitLockAcquire(std::mutex& init_lock) : init_lock_(init_lock) {
-    // to avoid deadlock, we need to ensure a consistent lock order:
-    // init_lock -> GIL. Otherwise, the GIL can be released by the python
-    // interpreter during initalization tasks, and then re-acquired. If another
-    // thread grabs the GIL to do non-initialization tasks, then it might start
-    // initializing (GIL -> init_lock). To avoid this, release the GIL before
-    // trying to get the init_lock and then reacquire it afterward.
-    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-    PyThreadState* _save;
-    _save = PyEval_SaveThread();
-    init_lock.lock();
-    PyEval_RestoreThread(_save);
-  }
-  ~InitLockAcquire() {
-    init_lock_.unlock();
-  }
-
- private:
-  std::mutex& init_lock_;
-};
-
-struct __attribute__((visibility("hidden"))) ConcreteInterpreterImpl
-    : public torch::deploy::InterpreterImpl {
-  explicit ConcreteInterpreterImpl(
-      const std::vector<std::string>& extra_python_paths) {
-    BuiltinRegistry::runPreInitialization();
-    PyPreConfig preconfig;
-    PyPreConfig_InitIsolatedConfig(&preconfig);
-    PyStatus status = Py_PreInitialize(&preconfig);
-    TORCH_INTERNAL_ASSERT(!PyStatus_Exception(status))
-
-    PyConfig config;
-    PyConfig_InitIsolatedConfig(&config);
-
-    // Completely blank out the path configuration. This ensures we have
-    // complete control of how our embedded Python searches for modules, and we
-    // will never consult the external filesystem. See:
-    // https://docs.python.org/3/c-api/init_config.html#path-configuration
-    config.site_import = 0;
-    status = PyConfig_SetString(&config, &config.base_exec_prefix, L"");
-    status =
-        PyConfig_SetString(&config, &config.base_executable, L"torch_deploy");
-    status = PyConfig_SetString(&config, &config.base_prefix, L"");
-    status = PyConfig_SetString(&config, &config.exec_prefix, L"");
-    status = PyConfig_SetString(&config, &config.executable, L"torch_deploy");
-    status = PyConfig_SetString(&config, &config.prefix, L"");
-    config.module_search_paths_set = 1;
-    // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-    wchar_t* module_search_paths[0] = {};
-    status = PyConfig_SetWideStringList(
-        &config, &config.module_search_paths, 0, module_search_paths);
-
-    status = Py_InitializeFromConfig(&config);
-    PyConfig_Clear(&config);
-    TORCH_INTERNAL_ASSERT(!PyStatus_Exception(status))
-#ifdef FBCODE_CAFFE2
-    auto sys_path = global_impl("sys", "path");
-    for (const auto& entry : extra_python_paths) {
-      sys_path.attr("insert")(0, entry);
-    }
-#endif
-    BuiltinRegistry::runPostInitialization();
-
-    int r = PyRun_SimpleString(start);
-    TORCH_INTERNAL_ASSERT(r == 0);
-
-    // we cache these so we don't have to repeat the conversion of strings into
-    // Python and hash table lookups to get to these object
-    saveStorage = global_impl("torch._deploy", "_save_storages");
-    loadStorage = global_impl("torch._deploy", "_load_storages");
-    getPackage = global_impl("torch._deploy", "_get_package");
-    objects = global_impl("torch._deploy", "_deploy_objects");
-    // Release the GIL that PyInitialize acquires
-    PyEval_SaveThread();
-  }
-
-  ~ConcreteInterpreterImpl() override {
-    PyGILState_Ensure();
-    // make sure pybind11 doesn't try to decref after we have destroyed python
-    // note: this leaks the references to these objects, but we are about to
-    // deinit python anyway so it doesn't matter
-    objects.release();
-    saveStorage.release();
-    loadStorage.release();
-    getPackage.release();
-    if (Py_FinalizeEx() != 0) {
-      exit(1); // can't use TORCH_INTERNAL_ASSERT because we are in a
-               // non-throwing destructor.
-    }
-  }
-
-  void setFindModule(
-      std::function<multipy::optional<std::string>(const std::string&)>
-          find_module) override {
-    std::function<py::object(const std::string&)> wrapped_find_module =
-        [=](const std::string& name) -> py::object {
-      auto r = find_module(name);
-      return r ? py::cast(*r) : py::none();
-    };
-    py::object register_module_importer =
-        py::module::import("__main__")
-            .attr("RegisterModuleImporter")(wrapped_find_module);
-    py::module::import("sys")
-        .attr("meta_path")
-        .attr("append")(register_module_importer);
-  }
-
-  torch::deploy::InterpreterSessionImpl* acquireSession() override;
-  py::object saveStorage;
-  py::object loadStorage;
-  py::object getPackage;
-  py::dict objects;
-  std::mutex init_lock_;
-};
-
-struct __attribute__((visibility("hidden"))) ConcreteInterpreterSessionImpl
-    : public torch::deploy::InterpreterSessionImpl {
-  ConcreteInterpreterSessionImpl(ConcreteInterpreterImpl* interp)
-      : interp_(interp) {}
-  Obj global(const char* module, const char* name) override {
-    return wrap(global_impl(module, name));
-  }
-
-  Obj fromIValue(IValue value) override {
-    return wrap(torch::jit::toPyObject(value));
-  }
-  Obj createOrGetPackageImporterFromContainerFile(
-      const std::shared_ptr<caffe2::serialize::PyTorchStreamReader>&
-          containerFile_) override {
-    InitLockAcquire guard(interp_->init_lock_);
-    return wrap(interp_->getPackage(containerFile_));
-  }
-
-  PickledObject pickle(Obj container, Obj obj) override {
-    py::tuple result = interp_->saveStorage(unwrap(container), unwrap(obj));
-    py::bytes bytes = py::cast<py::bytes>(result[0]);
-    py::list storages = py::cast<py::list>(result[1]);
-    py::list dtypes = py::cast<py::list>(result[2]);
-    auto container_file =
-        py::cast<std::shared_ptr<caffe2::serialize::PyTorchStreamReader>>(
-            result[3]);
-
-    std::vector<at::Storage> storages_c;
-    std::vector<at::ScalarType> dtypes_c;
-    for (size_t i = 0, N = storages.size(); i < N; ++i) {
-      storages_c.push_back(torch::createStorage(storages[i].ptr()));
-      dtypes_c.push_back(
-          reinterpret_cast<THPDtype*>(dtypes[i].ptr())->scalar_type);
-    }
-    return PickledObject{
-        bytes,
-        std::move(storages_c),
-        std::move(dtypes_c),
-        std::move(container_file)};
-  }
-  Obj unpickleOrGet(int64_t id, const PickledObject& obj) override {
-    py::dict objects = interp_->objects;
-    py::object id_p = py::cast(id);
-    if (objects.contains(id_p)) {
-      return wrap(objects[id_p]);
-    }
-
-    InitLockAcquire guard(interp_->init_lock_);
-    // re-check if something else loaded this before we acquired the
-    // init_lock_
-    if (objects.contains(id_p)) {
-      return wrap(objects[id_p]);
-    }
-
-    py::tuple storages(obj.storages_.size());
-    for (size_t i = 0, N = obj.storages_.size(); i < N; ++i) {
-      py::object new_storage = py::reinterpret_steal<py::object>(
-          torch::createPyObject(obj.storages_[i]));
-      storages[i] = std::move(new_storage);
-    }
-    py::tuple dtypes(obj.types_.size());
-    for (size_t i = 0, N = obj.types_.size(); i < N; ++i) {
-      auto dtype = (PyObject*)torch::getTHPDtype(obj.types_[i]);
-      Py_INCREF(dtype);
-      dtypes[i] = dtype;
-    }
-    py::object result = interp_->loadStorage(
-        id, obj.containerFile_, py::bytes(obj.data_), storages, dtypes);
-    return wrap(result);
-  }
-  void unload(int64_t id) override {
-    py::dict objects = interp_->objects;
-    py::object id_p = py::cast(id);
-    if (objects.contains(id_p)) {
-      objects.attr("__delitem__")(id_p);
-    }
-  }
-
-  IValue toIValue(Obj obj) const override {
-    return torch::jit::toTypeInferredIValue(unwrap(obj));
-  }
-
-  Obj call(Obj obj, at::ArrayRef<Obj> args) override {
-    py::tuple m_args(args.size());
-    for (size_t i = 0, N = args.size(); i != N; ++i) {
-      m_args[i] = unwrap(args[i]);
-    }
-    return wrap(call(unwrap(obj), m_args));
-  }
-
-  Obj call(Obj obj, at::ArrayRef<IValue> args) override {
-    py::tuple m_args(args.size());
-    for (size_t i = 0, N = args.size(); i != N; ++i) {
-      m_args[i] = torch::jit::toPyObject(args[i]);
-    }
-    return wrap(call(unwrap(obj), m_args));
-  }
-
-  Obj callKwargs(
-      Obj obj,
-      std::vector<at::IValue> args,
-      std::unordered_map<std::string, c10::IValue> kwargs) override {
-    py::tuple py_args(args.size());
-    for (size_t i = 0, N = args.size(); i != N; ++i) {
-      py_args[i] = torch::jit::toPyObject(args[i]);
-    }
-
-    py::dict py_kwargs;
-    for (auto kv : kwargs) {
-      py_kwargs[py::cast(std::get<0>(kv))] =
-          torch::jit::toPyObject(std::get<1>(kv));
-    }
-    return wrap(call(unwrap(obj), py_args, py_kwargs));
-  }
-
-  Obj callKwargs(Obj obj, std::unordered_map<std::string, c10::IValue> kwargs)
-      override {
-    std::vector<at::IValue> args;
-    return callKwargs(obj, args, kwargs);
-  }
-
-  bool hasattr(Obj obj, const char* attr) override {
-    return py::hasattr(unwrap(obj), attr);
-  }
-
-  Obj attr(Obj obj, const char* attr) override {
-    return wrap(unwrap(obj).attr(attr));
-  }
-
-  static py::object call(
-      py::handle object,
-      py::handle args,
-      py::handle kwargs = nullptr) {
-    PyObject* result = PyObject_Call(object.ptr(), args.ptr(), kwargs.ptr());
-    if (!result) {
-      throw py::error_already_set();
-    }
-    return py::reinterpret_steal<py::object>(result);
-  }
-
-  py::handle unwrap(Obj obj) const {
-    return objects_.at(ID(obj));
-  }
-
-  Obj wrap(py::object obj) {
-    objects_.emplace_back(std::move(obj));
-    return Obj(this, objects_.size() - 1);
-  }
-
-  ~ConcreteInterpreterSessionImpl() override {
-    objects_.clear();
-  }
-  ConcreteInterpreterImpl* interp_;
-  ScopedAcquire acquire_;
-  std::vector<py::object> objects_;
-};
-
-torch::deploy::InterpreterSessionImpl* ConcreteInterpreterImpl::
-    acquireSession() {
-  return new ConcreteInterpreterSessionImpl(this);
-}
-
-extern "C" __attribute__((visibility("default")))
-torch::deploy::InterpreterImpl*
-newInterpreterImpl(const std::vector<std::string>& extra_python_paths) {
-  return new ConcreteInterpreterImpl(extra_python_paths);
-}
diff --git a/torch/csrc/deploy/interpreter/interpreter_impl.h b/torch/csrc/deploy/interpreter/interpreter_impl.h
deleted file mode 100644
index a2dd57e9beeb..000000000000
--- a/torch/csrc/deploy/interpreter/interpreter_impl.h
+++ /dev/null
@@ -1,185 +0,0 @@
-#pragma once
-// multi-python abstract code
-#include <ATen/ATen.h>
-#include <ATen/core/ivalue.h>
-#include <caffe2/serialize/inline_container.h>
-#include <torch/csrc/deploy/interpreter/Optional.hpp>
-
-/* Torch Deploy intentionally embeds multiple copies of c++ libraries
-   providing python bindings necessary for torch::deploy users in the same
-   process space in order to provide a multi-python environment.  As a result,
-   any exception types defined by these duplicated libraries can't be safely
-   caught or handled outside of the originating dynamic library (.so).
-
-   In practice this means that you must either
-   catch these exceptions inside the torch::deploy API boundary or risk crashing
-   the client application.
-
-   It is safe to throw exception types that are defined once in
-   the context of the client application, such as std::runtime_error,
-   which isn't duplicated in torch::deploy interpreters.
-
-   ==> Use TORCH_DEPLOY_TRY, _SAFE_CATCH_RETHROW around _ALL_ torch::deploy APIs
-
-   For more information, see
-    https://gcc.gnu.org/wiki/Visibility (section on c++ exceptions)
-    or https://stackoverflow.com/a/14364055
-    or
-   https://stackoverflow.com/questions/14268736/symbol-visibility-exceptions-runtime-error
-    note- this may be only a serious problem on versions of gcc prior to 4.0,
-   but still seems worth sealing off.
-
-*/
-#define TORCH_DEPLOY_TRY try {
-#define TORCH_DEPLOY_SAFE_CATCH_RETHROW                                     \
-  }                                                                         \
-  catch (std::exception & err) {                                            \
-    throw std::runtime_error(                                               \
-        std::string(                                                        \
-            "Exception Caught inside torch::deploy embedded library: \n") + \
-        err.what());                                                        \
-  }                                                                         \
-  catch (...) {                                                             \
-    throw std::runtime_error(std::string(                                   \
-        "Unknown Exception Caught inside torch::deploy embedded library")); \
-  }
-namespace torch {
-namespace deploy {
-
-struct InterpreterSessionImpl;
-
-struct PickledObject {
-  std::string data_;
-  std::vector<at::Storage> storages_;
-  // types for the storages, required to
-  // reconstruct correct Python storages
-  std::vector<at::ScalarType> types_;
-  std::shared_ptr<caffe2::serialize::PyTorchStreamReader> containerFile_;
-};
-
-// this is a wrapper class that refers to a PyObject* instance in a particular
-// interpreter. We can't use normal PyObject or pybind11 objects here
-// because these objects get used in a user application which will not directly
-// link against libpython. Instead all interaction with the Python state in each
-// interpreter is done via this wrapper class, and methods on
-// InterpreterSession.
-struct Obj {
-  friend struct InterpreterSessionImpl;
-  Obj() : interaction_(nullptr), id_(0) {}
-  Obj(InterpreterSessionImpl* interaction, int64_t id)
-      : interaction_(interaction), id_(id) {}
-
-  at::IValue toIValue() const;
-  Obj operator()(at::ArrayRef<Obj> args);
-  Obj operator()(at::ArrayRef<at::IValue> args);
-  Obj callKwargs(
-      std::vector<at::IValue> args,
-      std::unordered_map<std::string, c10::IValue> kwargs);
-  Obj callKwargs(std::unordered_map<std::string, c10::IValue> kwargs);
-  bool hasattr(const char* attr);
-  Obj attr(const char* attr);
-
- private:
-  InterpreterSessionImpl* interaction_;
-  int64_t id_;
-};
-
-struct InterpreterSessionImpl {
-  friend struct Package;
-  friend struct ReplicatedObj;
-  friend struct Obj;
-  friend struct InterpreterSession;
-  friend struct ReplicatedObjImpl;
-
-  virtual ~InterpreterSessionImpl() = default;
-
- private:
-  virtual Obj global(const char* module, const char* name) = 0;
-  virtual Obj fromIValue(at::IValue value) = 0;
-  virtual Obj createOrGetPackageImporterFromContainerFile(
-      const std::shared_ptr<caffe2::serialize::PyTorchStreamReader>&
-          containerFile_) = 0;
-  virtual PickledObject pickle(Obj container, Obj obj) = 0;
-  virtual Obj unpickleOrGet(int64_t id, const PickledObject& obj) = 0;
-  virtual void unload(int64_t id) = 0;
-
-  virtual at::IValue toIValue(Obj obj) const = 0;
-
-  virtual Obj call(Obj obj, at::ArrayRef<Obj> args) = 0;
-  virtual Obj call(Obj obj, at::ArrayRef<at::IValue> args) = 0;
-  virtual Obj callKwargs(
-      Obj obj,
-      std::vector<at::IValue> args,
-      std::unordered_map<std::string, c10::IValue> kwargs) = 0;
-  virtual Obj callKwargs(
-      Obj obj,
-      std::unordered_map<std::string, c10::IValue> kwargs) = 0;
-  virtual Obj attr(Obj obj, const char* attr) = 0;
-  virtual bool hasattr(Obj obj, const char* attr) = 0;
-
- protected:
-  int64_t ID(Obj obj) const {
-    return obj.id_;
-  }
-
-  bool isOwner(Obj obj) const {
-    return this == obj.interaction_;
-  }
-};
-
-struct InterpreterImpl {
-  virtual InterpreterSessionImpl* acquireSession() = 0;
-  virtual void setFindModule(
-      std::function<multipy::optional<std::string>(const std::string&)>
-          find_module) = 0;
-  virtual ~InterpreterImpl() = default; // this will uninitialize python
-};
-
-// inline definitions for Objs are necessary to avoid introducing a
-// source file that would need to exist it both the libinterpreter.so and then
-// the libtorchpy library.
-inline at::IValue Obj::toIValue() const {
-  TORCH_DEPLOY_TRY
-  return interaction_->toIValue(*this);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-inline Obj Obj::operator()(at::ArrayRef<Obj> args) {
-  TORCH_DEPLOY_TRY
-  return interaction_->call(*this, args);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-inline Obj Obj::operator()(at::ArrayRef<at::IValue> args) {
-  TORCH_DEPLOY_TRY
-  return interaction_->call(*this, args);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-inline Obj Obj::callKwargs(
-    std::vector<at::IValue> args,
-    std::unordered_map<std::string, c10::IValue> kwargs) {
-  TORCH_DEPLOY_TRY
-  return interaction_->callKwargs(*this, std::move(args), std::move(kwargs));
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-inline Obj Obj::callKwargs(
-    std::unordered_map<std::string, c10::IValue> kwargs) {
-  TORCH_DEPLOY_TRY
-  return interaction_->callKwargs(*this, std::move(kwargs));
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-inline bool Obj::hasattr(const char* attr) {
-  TORCH_DEPLOY_TRY
-  return interaction_->hasattr(*this, attr);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-inline Obj Obj::attr(const char* attr) {
-  TORCH_DEPLOY_TRY
-  return interaction_->attr(*this, attr);
-  TORCH_DEPLOY_SAFE_CATCH_RETHROW
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/interpreter/register_frozenpython.cpp b/torch/csrc/deploy/interpreter/register_frozenpython.cpp
deleted file mode 100644
index 75badd2d85cb..000000000000
--- a/torch/csrc/deploy/interpreter/register_frozenpython.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#include <Python.h>
-#include <torch/csrc/deploy/interpreter/builtin_registry.h>
-
-#define FOREACH_LIBRARY(_) \
-  _(array)                 \
-  _(_asyncio)              \
-  _(audioop)               \
-  _(binascii)              \
-  _(_bisect)               \
-  _(_blake2)               \
-  _(_bz2)                  \
-  _(cmath)                 \
-  _(_codecs_cn)            \
-  _(_codecs_hk)            \
-  _(_codecs_iso2022)       \
-  _(_codecs_jp)            \
-  _(_codecs_kr)            \
-  _(_codecs_tw)            \
-  _(_contextvars)          \
-  _(_crypt)                \
-  _(_csv)                  \
-  _(_ctypes)               \
-  _(_ctypes_test)          \
-  _(_curses)               \
-  _(_curses_panel)         \
-  _(_datetime)             \
-  _(_decimal)              \
-  _(_elementtree)          \
-  _(fcntl)                 \
-  _(grp)                   \
-  _(_hashlib)              \
-  _(_heapq)                \
-  _(_json)                 \
-  _(_lsprof)               \
-  _(_lzma)                 \
-  _(math)                  \
-  _(_md5)                  \
-  _(mmap)                  \
-  _(_multibytecodec)       \
-  _(_multiprocessing)      \
-  _(nis)                   \
-  _(_opcode)               \
-  _(ossaudiodev)           \
-  _(parser)                \
-  _(_pickle)               \
-  _(_posixsubprocess)      \
-  _(pyexpat)               \
-  _(_queue)                \
-  _(_random)               \
-  _(readline)              \
-  _(resource)              \
-  _(select)                \
-  _(_sha1)                 \
-  _(_sha256)               \
-  _(_sha3)                 \
-  _(_sha512)               \
-  _(_socket)               \
-  _(spwd)                  \
-  _(_ssl)                  \
-  _(_struct)               \
-  _(syslog)                \
-  _(termios)               \
-  _(_testbuffer)           \
-  _(_testcapi)             \
-  _(_testimportmultiple)   \
-  _(_testmultiphase)       \
-  _(unicodedata)           \
-  _(xxlimited)             \
-  _(_xxtestfuzz)           \
-  _(zlib)
-
-#define DECLARE_LIBRARY_INIT(name) extern "C" PyObject* PyInit_##name(void);
-FOREACH_LIBRARY(DECLARE_LIBRARY_INIT)
-#undef DECLARE_LIBRARY_INIT
-
-extern "C" struct _frozen _PyImport_FrozenModules[];
-
-#define STD_LIBARY_PARMS(name) , #name, PyInit_##name
-REGISTER_TORCH_DEPLOY_BUILTIN(
-    frozenpython,
-    _PyImport_FrozenModules FOREACH_LIBRARY(STD_LIBARY_PARMS));
-#undef STD_LIBARY_PARMS
diff --git a/torch/csrc/deploy/interpreter/register_numpy.cpp b/torch/csrc/deploy/interpreter/register_numpy.cpp
deleted file mode 100644
index b32db5729c5c..000000000000
--- a/torch/csrc/deploy/interpreter/register_numpy.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <Python.h>
-#include <torch/csrc/deploy/interpreter/builtin_registry.h>
-
-extern "C" struct _frozen _PyImport_FrozenModules_numpy[];
-
-extern "C" PyObject* PyInit__multiarray_umath(void);
-extern "C" PyObject* PyInit__multiarray_tests(void);
-extern "C" PyObject* PyInit_lapack_lite(void);
-extern "C" PyObject* PyInit__umath_linalg(void);
-extern "C" PyObject* PyInit__pocketfft_internal(void);
-extern "C" PyObject* PyInit_mtrand(void);
-extern "C" PyObject* PyInit_bit_generator(void);
-extern "C" PyObject* PyInit__common(void);
-extern "C" PyObject* PyInit__bounded_integers(void);
-extern "C" PyObject* PyInit__mt19937(void);
-extern "C" PyObject* PyInit__philox(void);
-extern "C" PyObject* PyInit__pcg64(void);
-extern "C" PyObject* PyInit__sfc64(void);
-extern "C" PyObject* PyInit__generator(void);
-
-REGISTER_TORCH_DEPLOY_BUILTIN(
-    frozen_numpy,
-    _PyImport_FrozenModules_numpy,
-    "numpy.core._multiarray_umath",
-    PyInit__multiarray_umath,
-    "numpy.core._multiarray_tests",
-    PyInit__multiarray_tests,
-    "numpy.linalg.lapack_lite",
-    PyInit_lapack_lite,
-    "numpy.linalg._umath_linalg",
-    PyInit__umath_linalg,
-    "numpy.fft._pocketfft_internal",
-    PyInit__pocketfft_internal,
-    "numpy.random.mtrand",
-    PyInit_mtrand,
-    "numpy.random.bit_generator",
-    PyInit_bit_generator,
-    "numpy.random._common",
-    PyInit__common,
-    "numpy.random._bounded_integers",
-    PyInit__bounded_integers,
-    "numpy.random._mt19937",
-    PyInit__mt19937,
-    "numpy.random._philox",
-    PyInit__philox,
-    "numpy.random._pcg64",
-    PyInit__pcg64,
-    "numpy.random._sfc64",
-    PyInit__sfc64,
-    "numpy.random._generator",
-    PyInit__generator);
diff --git a/torch/csrc/deploy/interpreter/register_pyyaml.cpp b/torch/csrc/deploy/interpreter/register_pyyaml.cpp
deleted file mode 100644
index 9be98f53422d..000000000000
--- a/torch/csrc/deploy/interpreter/register_pyyaml.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <Python.h>
-#include <torch/csrc/deploy/interpreter/builtin_registry.h>
-
-extern "C" struct _frozen _PyImport_FrozenModules_pyyaml[];
-
-REGISTER_TORCH_DEPLOY_BUILTIN(frozen_pyyaml, _PyImport_FrozenModules_pyyaml);
diff --git a/torch/csrc/deploy/interpreter/test_builtin_registry.cpp b/torch/csrc/deploy/interpreter/test_builtin_registry.cpp
deleted file mode 100644
index 736ddb8e8aa9..000000000000
--- a/torch/csrc/deploy/interpreter/test_builtin_registry.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <Python.h>
-#include <gtest/gtest.h>
-#include <torch/csrc/deploy/interpreter/builtin_registry.h>
-
-namespace torch {
-namespace deploy {
-
-bool allowLibrary(const std::string& libname) {
-  return libname == "lib1" || libname == "lib2";
-}
-
-// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
-struct _frozen lib1FrozenModules[] = {
-    {"mod1", nullptr, 0},
-    {nullptr, nullptr, 0}};
-
-// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
-struct _frozen lib2FrozenModules[] = {
-    {"mod2", nullptr, 0},
-    {"mod3", nullptr, 0},
-    {nullptr, nullptr, 0}};
-
-void builtin1() {}
-void builtin2() {}
-REGISTER_TORCH_DEPLOY_BUILTIN(
-    lib1,
-    lib1FrozenModules,
-    "lib1.builtin1",
-    builtin1,
-    "lib1.builtin2",
-    builtin2);
-REGISTER_TORCH_DEPLOY_BUILTIN(lib2, lib2FrozenModules);
-
-TEST(BuiltinRegistryTest, SimpleTest) {
-  const auto& items = BuiltinRegistry::items();
-  EXPECT_EQ(2, items.size());
-  EXPECT_EQ(lib1FrozenModules, items[0]->frozenModules);
-  EXPECT_EQ(lib2FrozenModules, items[1]->frozenModules);
-
-  struct _frozen* allFrozenModules = BuiltinRegistry::getAllFrozenModules();
-  EXPECT_EQ("mod1", allFrozenModules[0].name);
-  EXPECT_EQ("mod2", allFrozenModules[1].name);
-  EXPECT_EQ("mod3", allFrozenModules[2].name);
-  EXPECT_EQ(nullptr, allFrozenModules[3].name);
-
-  auto allBuiltinModules = BuiltinRegistry::getAllBuiltinModules();
-  EXPECT_EQ(2, allBuiltinModules.size());
-  EXPECT_EQ("lib1.builtin1", allBuiltinModules[0].first);
-  EXPECT_EQ(builtin1, allBuiltinModules[0].second);
-  EXPECT_EQ("lib1.builtin2", allBuiltinModules[1].first);
-  EXPECT_EQ(builtin2, allBuiltinModules[1].second);
-
-  std::string expectedBuiltinModulesCSV = "'lib1.builtin1', 'lib1.builtin2'";
-  EXPECT_EQ(expectedBuiltinModulesCSV, BuiltinRegistry::getBuiltinModulesCSV());
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/interpreter/third_party/README.md b/torch/csrc/deploy/interpreter/third_party/README.md
deleted file mode 100644
index 2c5d9241d2bb..000000000000
--- a/torch/csrc/deploy/interpreter/third_party/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-Python libraries that we want to package along with the Python implementation
-bundled in libinterpreter.
diff --git a/torch/csrc/deploy/loader.cpp b/torch/csrc/deploy/loader.cpp
deleted file mode 100644
index ab4d0c7c329e..000000000000
--- a/torch/csrc/deploy/loader.cpp
+++ /dev/null
@@ -1,1255 +0,0 @@
-// Code in this file is a heavily modified version of the dynamic loader
-// from android's bionic library. Here is the license for that project:
-
-/*
- * Copyright (C) 2016 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <dlfcn.h>
-#include <elf.h>
-#include <fcntl.h>
-#include <libgen.h>
-#include <link.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <atomic>
-#include <cerrno>
-#include <cinttypes>
-#include <climits>
-#include <cstdint>
-#include <cstring>
-#include <functional>
-#include <iostream>
-#include <sstream>
-#include <stdexcept>
-#include <thread>
-#include <vector>
-// Get PAGE_SIZE and PAGE_MASK.
-#include <sys/user.h>
-
-#include <c10/util/irange.h>
-#include <torch/csrc/deploy/interpreter/Optional.hpp>
-
-#include <fmt/format.h>
-#include <torch/csrc/deploy/loader.h>
-#include <torch/csrc/deploy/mem_file.h>
-
-namespace torch {
-namespace deploy {
-
-#define DEPLOY_ERROR(msg_fmt, ...) \
-  throw DeployLoaderError(fmt::format(msg_fmt, ##__VA_ARGS__))
-
-#define DEPLOY_CHECK(cond, fmt, ...)  \
-  if (!(cond)) {                      \
-    DEPLOY_ERROR(fmt, ##__VA_ARGS__); \
-  }
-
-std::vector<std::string> split_path(const std::string& s, char delim) {
-  const char* cur = s.c_str();
-  const char* end = cur + s.size();
-  if (cur == end) {
-    return {};
-  }
-  std::vector<std::string> result;
-  while (true) {
-    // non-zero amount of chars
-    const char* next = strchr(cur, delim);
-    if (!next) {
-      result.emplace_back(std::string(cur, end));
-      break;
-    }
-    result.emplace_back(std::string(cur, next));
-    cur = next + 1;
-  }
-  return result;
-}
-
-// https://stackoverflow.com/questions/23006930/the-shared-library-rpath-and-the-binary-rpath-priority/52647116#52647116
-void replace_all(
-    std::string& str,
-    const std::string& from,
-    const std::string& to) {
-  if (from.empty())
-    return;
-  size_t start_pos = 0;
-  while ((start_pos = str.find(from, start_pos)) != std::string::npos) {
-    str.replace(start_pos, from.length(), to);
-    start_pos += to.length(); // In case 'to' contains 'from', like replacing
-                              // 'x' with 'yx'
-  }
-}
-
-std::string resolve_path(const std::string& origin, const std::string& t) {
-  std::string result = t;
-  replace_all(result, "$ORIGIN", origin);
-  // NOLINTNEXTLINE
-  char buf[PATH_MAX];
-  char* resolved = realpath(result.c_str(), buf);
-  if (!resolved) {
-    return result;
-  }
-  return resolved;
-}
-
-std::string resolve_origin(const std::string& so_name) {
-  // NOLINTNEXTLINE
-  char origin[PATH_MAX];
-  realpath(so_name.c_str(), origin);
-  dirname(origin);
-  return origin;
-}
-
-template <typename... Args>
-std::string stringf(const char* format, Args... args) {
-  int size_s = snprintf(nullptr, 0, format, args...);
-  std::string result(size_s + 1, 0);
-  snprintf((char*)result.data(), size_s + 1, format, args...);
-  return result;
-}
-// Returns the address of the page containing address 'x'.
-#define PAGE_START(x) ((x)&PAGE_MASK)
-
-// Returns the offset of address 'x' in its page.
-#define PAGE_OFFSET(x) ((x) & ~PAGE_MASK)
-
-// Returns the address of the next page after address 'x', unless 'x' is
-// itself at the start of a page.
-#define PAGE_END(x) PAGE_START((x) + (PAGE_SIZE - 1))
-
-// from bionic
-// returns the size a shared library will take in memory
-size_t phdr_table_get_load_size(
-    const Elf64_Phdr* phdr_table,
-    size_t phdr_count,
-    Elf64_Addr* out_min_vaddr,
-    Elf64_Addr* out_max_vaddr) {
-  Elf64_Addr min_vaddr = UINTPTR_MAX;
-  Elf64_Addr max_vaddr = 0;
-
-  bool found_pt_load = false;
-  for (const auto i : c10::irange(phdr_count)) {
-    const Elf64_Phdr* phdr = &phdr_table[i];
-
-    if (phdr->p_type != PT_LOAD) {
-      continue;
-    }
-    found_pt_load = true;
-
-    if (phdr->p_vaddr < min_vaddr) {
-      min_vaddr = phdr->p_vaddr;
-    }
-
-    if (phdr->p_vaddr + phdr->p_memsz > max_vaddr) {
-      max_vaddr = phdr->p_vaddr + phdr->p_memsz;
-    }
-  }
-  if (!found_pt_load) {
-    min_vaddr = 0;
-  }
-
-  min_vaddr = PAGE_START(min_vaddr);
-  max_vaddr = PAGE_END(max_vaddr);
-
-  if (out_min_vaddr != nullptr) {
-    *out_min_vaddr = min_vaddr;
-  }
-  if (out_max_vaddr != nullptr) {
-    *out_max_vaddr = max_vaddr;
-  }
-  return max_vaddr - min_vaddr;
-}
-
-#define MAYBE_MAP_FLAG(x, from, to) (((x) & (from)) ? (to) : 0)
-#define PFLAGS_TO_PROT(x)                 \
-  (MAYBE_MAP_FLAG((x), PF_X, PROT_EXEC) | \
-   MAYBE_MAP_FLAG((x), PF_R, PROT_READ) | \
-   MAYBE_MAP_FLAG((x), PF_W, PROT_WRITE))
-
-// holds a pre-computed hash for a string that is used in a GNU-style hash
-// tables and also keeps track of the string length.
-struct GnuHash {
-  GnuHash(const char* name) {
-    uint32_t h = 5381;
-    const uint8_t* name_bytes = reinterpret_cast<const uint8_t*>(name);
-#pragma unroll 8
-    while (*name_bytes != 0) {
-      h += (h << 5) +
-          *name_bytes++; // h*33 + c = h + h * 32 + c = h + h << 5 + c
-    }
-    hash = h;
-    name_len = reinterpret_cast<const char*>(name_bytes) - name;
-  }
-  uint32_t hash;
-  uint32_t name_len;
-};
-
-// this is a special builtin in the libc++ API used for telling C++ execption
-// frame unwinding about functions loaded from a pathway other than the libc
-// loader. it is passed a pointer to where the EH_FRAME section was loaded,
-// which appears to include frame information relative to that address.
-extern "C" void __register_frame(void*);
-extern "C" void __deregister_frame(void*);
-
-typedef void (*linker_dtor_function_t)();
-typedef void (*linker_ctor_function_t)(int, const char**, char**);
-
-// https://refspecs.linuxfoundation.org/LSB_2.1.0/LSB-Core-generic/LSB-Core-generic/ehframehdr.html
-// note that eh_frame_ptr can be different types based on eh_frame_ptr_enc but
-// we only support one sepecific encoding that is stored in a int32_t and an
-// offset relative to the start of this struct.
-struct EH_Frame_HDR {
-  char version;
-  char eh_frame_ptr_enc;
-  char fde_count_enc;
-  char table_enc;
-  int32_t eh_frame_ptr;
-};
-
-// this is the libc++ function called to lookup thread local state.
-// It is passed a pointer to an object of the same shape as TLSEntry
-// with the module_id and offset.
-extern "C" void* __tls_get_addr(void*);
-
-extern "C" int __cxa_thread_atexit_impl(
-    void (*dtor)(void*),
-    void* obj,
-    void* dso_symbol);
-
-struct CustomLibraryImpl;
-
-struct TLSMemory {
-  TLSMemory(std::shared_ptr<CustomLibraryImpl> file, size_t size)
-      // NOLINTNEXTLINE
-      : file_(std::move(file)), mem_(malloc(size)) {}
-  std::shared_ptr<CustomLibraryImpl> file_;
-  void* mem_;
-  ~TLSMemory() {
-    // NOLINTNEXTLINE
-    free(mem_);
-  }
-};
-
-static void delete_TLSMemory(void* obj) {
-  delete ((TLSMemory*)obj);
-}
-
-// This object performs TLS emulation for modules not loaded by dlopen.
-// Normally modules have a module_id that is used as a key in libc for the
-// thread local data for that module. However, there is no public API for
-// assigning this module id. Instead, for modules that we load, we set module_id
-// to a pointer to a TLSSegment object, and replace __tls_get_addr with a
-// function that calls `addr`.
-
-// libc module_id's are sequential, so we use the top bit as a flag to see
-// if we have a local TLSegment object instead. This will break if
-// someone creates 2^63 sequential objects, but it is hard to imagine
-// a system with enough RAM to do that.
-constexpr size_t TLS_LOCAL_FLAG = (1ULL << 63);
-
-static void* local__tls_get_addr(TLSIndex* idx);
-
-/* LLDB puts a breakpoint in this function, and reads __deploy_module_info to
- * get debug info from library.  */
-__attribute__((noinline)) void __deploy_register_code() {
-  std::cout << ""; // otherwise the breakpoint doesn't get hit, not sure if
-                   // there is a more stable way of doing this.
-};
-
-struct DeployModuleInfo {
-  const char* name;
-  Elf64_Addr file_addr;
-  size_t file_size;
-  Elf64_Addr load_bias;
-};
-
-extern "C" {
-// NOLINTNEXTLINE
-DeployModuleInfo __deploy_module_info;
-}
-
-// RAII wrapper around dlopen
-struct __attribute__((visibility("hidden"))) SystemLibraryImpl
-    : public SystemLibrary {
-  SystemLibraryImpl(void* handle, bool steal)
-      : handle_(handle), own_handle_(steal && handle != RTLD_DEFAULT) {}
-
-  multipy::optional<Elf64_Addr> sym(const char* name) const override {
-    void* r = dlsym(handle_, name);
-    if (!r) {
-      return multipy::nullopt;
-    }
-    return (Elf64_Addr)r;
-  }
-
-  multipy::optional<TLSIndex> tls_sym(const char* name) const override;
-
-  ~SystemLibraryImpl() override {
-    if (own_handle_) {
-      dlclose(handle_);
-    }
-  }
-
- private:
-  void* handle_;
-  bool own_handle_;
-};
-
-std::shared_ptr<SystemLibrary> SystemLibrary::create(void* handle, bool steal) {
-  return std::make_shared<SystemLibraryImpl>(handle, steal);
-}
-std::shared_ptr<SystemLibrary> SystemLibrary::create(
-    const char* path,
-    int flags) {
-  void* handle = dlopen(path, flags);
-  return SystemLibrary::create(handle, handle != nullptr);
-}
-
-// reads DT_NEEDED and DT_RUNPATH from an unloaded elf file so we can sort out
-// dependencies before calling dlopen
-std::pair<const char*, std::vector<const char*>> load_needed_from_elf_file(
-    const char* filename,
-    const char* data) {
-  auto header_ = (Elf64_Ehdr*)data;
-  auto program_headers = (Elf64_Phdr*)(data + header_->e_phoff);
-  auto n_program_headers = header_->e_phnum;
-  const Elf64_Dyn* dynamic = nullptr;
-  for (const auto i : c10::irange(n_program_headers)) {
-    const Elf64_Phdr* phdr = &program_headers[i];
-    if (phdr->p_type == PT_DYNAMIC) {
-      dynamic = reinterpret_cast<const Elf64_Dyn*>(data + phdr->p_offset);
-      break;
-    }
-  }
-  DEPLOY_CHECK(
-      dynamic,
-      "{}: could not load dynamic section for looking up DT_NEEDED",
-      filename);
-
-  const char* runpath = "";
-  std::vector<const char*> needed;
-
-  auto segment_headers = (Elf64_Shdr*)(data + header_->e_shoff);
-  size_t n_segments = header_->e_shnum;
-  const char* strtab = nullptr;
-
-  const char* segment_string_table =
-      data + segment_headers[header_->e_shstrndx].sh_offset;
-
-  for (const auto i : c10::irange(n_segments)) {
-    const Elf64_Shdr* shdr = &segment_headers[i];
-    if (shdr->sh_type == SHT_STRTAB &&
-        strcmp(".dynstr", segment_string_table + shdr->sh_name) == 0) {
-      strtab = data + shdr->sh_offset;
-      break;
-    }
-  }
-
-  DEPLOY_CHECK(strtab, "{}: could not load dynstr for DT_NEEDED", filename);
-
-  for (const Elf64_Dyn* d = dynamic; d->d_tag != DT_NULL; ++d) {
-    switch (d->d_tag) {
-      case DT_NEEDED:
-        // std::cout << "NEEDED: '" << strtab + d->d_un.d_val << "'\n";
-        needed.push_back(strtab + d->d_un.d_val);
-        break;
-      case DT_RPATH: /* not quite correct, because this is a different order
-                        than runpath,
-                        but better than not processing it at all */
-      case DT_RUNPATH:
-        // std::cout << "RUNPATH: '" << strtab + d->d_un.d_val << "'\n";
-        runpath = strtab + d->d_un.d_val;
-        break;
-    }
-  }
-  return std::make_pair(runpath, std::move(needed));
-}
-
-// common mechanism for reading the elf symbol table,
-// and other information in the PT_DYNAMIC segment.
-struct ElfDynamicInfo {
-  std::string name_;
-  const Elf64_Dyn* dynamic_ = nullptr;
-  Elf64_Addr load_bias_ = 0;
-  const Elf64_Sym* symtab_ = nullptr;
-  const char* strtab_ = nullptr;
-  size_t strtab_size_ = 0;
-  Elf64_Rela* plt_rela_ = nullptr;
-  size_t n_plt_rela_ = 0;
-  Elf64_Rela* rela_ = nullptr;
-  size_t n_rela_ = 0;
-  linker_ctor_function_t init_func_ = nullptr;
-  linker_ctor_function_t* init_array_ = nullptr;
-  linker_dtor_function_t fini_func_ = nullptr;
-  linker_dtor_function_t* fini_array_ = nullptr;
-  size_t n_init_array_ = 0;
-  size_t n_fini_array_ = 0;
-  size_t gnu_nbucket_ = 0;
-  uint32_t* gnu_bucket_ = nullptr;
-  uint32_t* gnu_chain_ = nullptr;
-  uint32_t gnu_maskwords_ = 0;
-  uint32_t gnu_shift2_ = 0;
-  Elf64_Addr* gnu_bloom_filter_ = nullptr;
-  std::string runpath_;
-  std::vector<const char*> needed_;
-
-  const char* get_string(int idx) {
-    return strtab_ + idx;
-  }
-
-  void initialize_from_dynamic_section(
-      std::string name,
-      Elf64_Dyn* dynamic,
-      Elf64_Addr load_bias,
-      bool check_absolute) {
-    name_ = std::move(name);
-    load_bias_ = load_bias;
-    dynamic_ = dynamic;
-    for (const Elf64_Dyn* d = dynamic_; d->d_tag != DT_NULL; ++d) {
-      void* addr = (check_absolute && d->d_un.d_ptr > load_bias_)
-          ? reinterpret_cast<void*>(d->d_un.d_ptr)
-          : reinterpret_cast<void*>(load_bias_ + d->d_un.d_ptr);
-      auto value = d->d_un.d_val;
-
-      switch (d->d_tag) {
-        case DT_SYMTAB:
-          symtab_ = (Elf64_Sym*)addr;
-          break;
-        case DT_STRTAB:
-          strtab_ = (const char*)addr;
-          break;
-
-        case DT_STRSZ:
-          strtab_size_ = value;
-          break;
-
-        case DT_JMPREL:
-          plt_rela_ = (Elf64_Rela*)addr;
-          break;
-        case DT_PLTRELSZ:
-          n_plt_rela_ = value / sizeof(Elf64_Rela);
-          break;
-        case DT_RELA:
-          rela_ = (Elf64_Rela*)addr;
-          break;
-        case DT_RELASZ:
-          n_rela_ = value / sizeof(Elf64_Rela);
-          break;
-
-        case DT_INIT:
-          init_func_ = reinterpret_cast<linker_ctor_function_t>(
-              load_bias_ + d->d_un.d_ptr);
-          break;
-
-        case DT_FINI:
-          fini_func_ = reinterpret_cast<linker_dtor_function_t>(
-              load_bias_ + d->d_un.d_ptr);
-          break;
-
-        case DT_INIT_ARRAY:
-          init_array_ = reinterpret_cast<linker_ctor_function_t*>(
-              load_bias_ + d->d_un.d_ptr);
-          break;
-
-        case DT_INIT_ARRAYSZ:
-          n_init_array_ =
-              static_cast<uint32_t>(d->d_un.d_val) / sizeof(Elf64_Addr);
-          break;
-
-        case DT_FINI_ARRAY:
-          fini_array_ = reinterpret_cast<linker_dtor_function_t*>(
-              load_bias_ + d->d_un.d_ptr);
-          break;
-
-        case DT_FINI_ARRAYSZ:
-          n_fini_array_ =
-              static_cast<uint32_t>(d->d_un.d_val) / sizeof(Elf64_Addr);
-          break;
-
-        case DT_HASH:
-          break;
-
-        case DT_GNU_HASH: {
-          gnu_nbucket_ = reinterpret_cast<uint32_t*>(addr)[0];
-          // skip symndx
-          gnu_maskwords_ = reinterpret_cast<uint32_t*>(addr)[2];
-          gnu_shift2_ = reinterpret_cast<uint32_t*>(addr)[3];
-          gnu_bloom_filter_ =
-              reinterpret_cast<Elf64_Addr*>((Elf64_Addr)addr + 16);
-          gnu_bucket_ =
-              reinterpret_cast<uint32_t*>(gnu_bloom_filter_ + gnu_maskwords_);
-          // amend chain for symndx = header[1]
-          gnu_chain_ =
-              gnu_bucket_ + gnu_nbucket_ - reinterpret_cast<uint32_t*>(addr)[1];
-          --gnu_maskwords_;
-        } break;
-      }
-    }
-
-    if (!gnu_bucket_) {
-      std::cout << fmt::format(
-          "{}: warning, no DT_GNU_HASH found, symbol lookups on this module will not find anything.\n",
-          name_);
-    }
-
-    // pass 2 for things that require the strtab_ to be loaded
-    for (const Elf64_Dyn* d = dynamic_; d->d_tag != DT_NULL; ++d) {
-      switch (d->d_tag) {
-        case DT_NEEDED:
-          needed_.push_back(get_string(d->d_un.d_val));
-          break;
-        case DT_RPATH: /* not quite correct, because this is a different order
-                          than runpath,
-                          but better than not processing it at all */
-        case DT_RUNPATH:
-          runpath_ = get_string(d->d_un.d_val);
-          break;
-      }
-    }
-  }
-
-  multipy::optional<Elf64_Addr> sym(
-      const char* name,
-      GnuHash* precomputed_hash = nullptr) const {
-    if (!gnu_bucket_) {
-      return multipy::nullopt; // no hashtable was loaded
-    }
-    GnuHash hash_obj = precomputed_hash ? *precomputed_hash : GnuHash(name);
-    auto hash = hash_obj.hash;
-    auto name_len = hash_obj.name_len;
-    constexpr uint32_t kBloomMaskBits = sizeof(Elf64_Addr) * 8;
-
-    const uint32_t word_num = (hash / kBloomMaskBits) & gnu_maskwords_;
-    const Elf64_Addr bloom_word = gnu_bloom_filter_[word_num];
-    const uint32_t h1 = hash % kBloomMaskBits;
-    const uint32_t h2 = (hash >> gnu_shift2_) % kBloomMaskBits;
-
-    if ((1 & (bloom_word >> h1) & (bloom_word >> h2)) != 1) {
-      return multipy::nullopt;
-    }
-
-    uint32_t sym_idx = gnu_bucket_[hash % gnu_nbucket_];
-    if (sym_idx == 0) {
-      return multipy::nullopt;
-    }
-
-    uint32_t chain_value = 0;
-    const Elf64_Sym* sym = nullptr;
-
-    do {
-      sym = symtab_ + sym_idx;
-      chain_value = gnu_chain_[sym_idx];
-      if ((chain_value >> 1) == (hash >> 1)) {
-        if (static_cast<size_t>(sym->st_name) + name_len + 1 <= strtab_size_ &&
-            memcmp(strtab_ + sym->st_name, name, name_len + 1) == 0) {
-          // found the matching entry, is it defined?
-          if (sym->st_shndx != 0) {
-            return sym->st_value +
-                ((ELF64_ST_TYPE(sym->st_info) == STT_TLS) ? 0 : load_bias_);
-          }
-          // symbol isn't defined
-          return multipy::nullopt;
-        }
-      }
-      ++sym_idx;
-    } while ((chain_value & 1) == 0);
-    return multipy::nullopt;
-  }
-};
-
-// for resolving TLS offsets we need to look through
-// libc's already loaded libraries. We do not have the whole
-// ELF file mapped in this case just a pointer to the program headers and
-// the load_bias (offset in memory) where the library was loaded.
-struct AlreadyLoadedSymTable {
- private:
-  ElfDynamicInfo dyninfo_;
-
- public:
-  AlreadyLoadedSymTable(
-      const char* name,
-      Elf64_Addr load_bias,
-      const Elf64_Phdr* program_headers,
-      size_t n_program_headers) {
-    Elf64_Dyn* dynamic = nullptr;
-    for (const auto i : c10::irange(n_program_headers)) {
-      const Elf64_Phdr* phdr = &program_headers[i];
-
-      // Segment addresses in memory.
-      Elf64_Addr seg_start = phdr->p_vaddr + load_bias;
-      if (phdr->p_type == PT_DYNAMIC) {
-        dynamic = reinterpret_cast<Elf64_Dyn*>(seg_start);
-        break;
-      }
-    }
-    DEPLOY_CHECK(
-        dynamic, "%s: couldn't find PT_DYNAMIC in already loaded table.", name);
-    dyninfo_.initialize_from_dynamic_section(name, dynamic, load_bias, true);
-  }
-
-  multipy::optional<Elf64_Addr> sym(const char* name) {
-    return dyninfo_.sym(name);
-  }
-};
-static int iterate_cb(struct dl_phdr_info* info, size_t size, void* data) {
-  auto fn = (std::function<int(struct dl_phdr_info * info, size_t size)>*)data;
-  return (*fn)(info, size);
-}
-
-// we need to find a TLS offset / module_id pair for a symbol which we cannot do
-// with a normal dlsym call. Instead we iterate through all loaded libraries and
-// check their symbol tables for the symbol. The value of the symbol is the TLS
-// offset. When we find the library we also get the module id.
-multipy::optional<TLSIndex> slow_find_tls_symbol_offset(const char* sym_name) {
-  multipy::optional<TLSIndex> result = multipy::nullopt;
-  std::function<int(struct dl_phdr_info*, size_t)> cb =
-      [&](struct dl_phdr_info* info, size_t size) {
-        // std::cout << "SEARCHING .. " << info->dlpi_name << "\n";
-        AlreadyLoadedSymTable symtable(
-            info->dlpi_name,
-            info->dlpi_addr,
-            info->dlpi_phdr,
-            info->dlpi_phnum);
-        auto sym_addr = symtable.sym(sym_name);
-        if (sym_addr) {
-          // std::cout << "FOUND IT IN: " << info->dlpi_name << " it has modid:
-          // " << info->dlpi_tls_modid << "\n";
-          result = TLSIndex{info->dlpi_tls_modid, *sym_addr};
-          return 1;
-        }
-        return 0;
-      };
-
-  dl_iterate_phdr(iterate_cb, (void*)&cb);
-  return result;
-}
-
-multipy::optional<TLSIndex> SystemLibraryImpl::tls_sym(const char* name) const {
-  if (!sym(name)) {
-    return multipy::nullopt; // before we do a bunch of slow lookups to find the
-                             // module_id, check that this even defines the
-                             // symbol
-  }
-  if (handle_ == RTLD_DEFAULT) {
-    return slow_find_tls_symbol_offset(name);
-  }
-
-  struct link_map* lm = nullptr;
-  DEPLOY_CHECK(
-      0 == dlinfo(handle_, RTLD_DI_LINKMAP, &lm), "failed to query dlinfo");
-  std::cout << "TLS dlinfo LOOKUP " << lm->l_name << " " << name << " "
-            << "\n";
-
-  ElfDynamicInfo info;
-  info.initialize_from_dynamic_section(lm->l_name, lm->l_ld, lm->l_addr, true);
-  auto r = info.sym(name);
-  if (r) {
-    size_t module_id = 0;
-    DEPLOY_CHECK(
-        0 == dlinfo(handle_, RTLD_DI_TLS_MODID, &module_id),
-        "failed to query dlinfo for module_id");
-    return TLSIndex{module_id, *r};
-  }
-  return multipy::nullopt;
-}
-
-// dlopen does not accept additional search paths as an argument.
-// however, normal DT_NEEDED library load inherits the runpath of parents.
-// So we need to pre-find all the libraries and call dlopen on them directly to
-// get the same behavior. We can find the dependencies by reading the libraries
-// dynamic section for recursive DT_NEEED entries.
-void resolve_needed_libraries(
-    std::vector<std::shared_ptr<SymbolProvider>>& libraries,
-    const std::string& origin_relative,
-    std::vector<std::string>& search_path,
-    const std::string& runpath_template,
-    const std::vector<const char*>& needed) {
-  size_t search_path_start_size = search_path.size();
-
-  std::string origin = resolve_origin(origin_relative);
-  std::vector<std::string> paths = split_path(runpath_template, ':');
-  // backwards because we want paths to be search in order but we search
-  // search_path backward
-  for (size_t i = paths.size(); i > 0; --i) {
-    search_path.emplace_back(resolve_path(origin, paths[i - 1]));
-  }
-
-  for (const char* name : needed) {
-    // std::cout << "ATTEMPTING FIND " << name << "\n";
-    if (strcmp(name, "libtorch_python.so") == 0) {
-      // torchvision expects it...
-      continue;
-    }
-    // find the library, either (1) it is already loaded,
-    //                          (2) it is an absolute path that exists,
-    //                          (3) we find it in the search path
-    //                          (4) we can dlopen it
-
-    // (1) the library is already loaded
-    const int base_flags = RTLD_LAZY | RTLD_LOCAL;
-    void* handle = dlopen(name, base_flags | RTLD_NOLOAD);
-    if (handle) {
-      // std::cout << "ALREADY LOADED " << name << "\n";
-      libraries.emplace_back(SystemLibrary::create(handle, true));
-      continue;
-    }
-
-    std::string library_path = "";
-    // (2) it is an absolute path
-    if (strchr(name, '/') != nullptr) {
-      library_path = name;
-    } else {
-      // (3) find it in the search path
-      for (size_t i = search_path.size(); i > 0; --i) {
-        std::stringstream ss;
-        ss << search_path[i - 1] << "/" << name;
-        if (access(ss.str().c_str(), F_OK) == 0) {
-          library_path = ss.str();
-          break;
-        }
-      }
-    }
-
-    std::vector<std::shared_ptr<SymbolProvider>>
-        sublibraries; // these need to say loaded until we open library_path
-                      // otherwise we might dlclose a sublibrary
-
-    if (library_path != "") {
-      // std::cout << "LOOKING FOR SUBLIBRARIES FOR FILE AT PATH " <<
-      // library_path << "\n"; we found the actual file, recursively load its
-      // deps before opening it so we resolve their paths correctly
-      MemFile image(library_path.c_str());
-      auto search =
-          load_needed_from_elf_file(library_path.c_str(), image.data());
-      resolve_needed_libraries(
-          sublibraries, library_path, search_path, search.first, search.second);
-    } else {
-      library_path = name;
-    }
-
-    // either we didn't find the file, or we have already loaded its deps
-    // in both cases, we now try to call dlopen. In the case where we didn't
-    // find the file, we hope that something like LD_LIBRARY_PATH knows where it
-    // is. In the case where we found it, we know its deps are loaded and
-    // resolved.
-
-    // std::cout << "OPENING " << library_path << "\n";
-    handle = dlopen(library_path.c_str(), base_flags);
-    DEPLOY_CHECK(
-        handle, "{}: could not load library, dlopen says: {}", name, dlerror());
-    libraries.emplace_back(SystemLibrary::create(handle, true));
-  }
-
-  // unwind search_path stack
-  search_path.erase(
-      search_path.begin() + search_path_start_size, search_path.end());
-}
-
-// NOLINTNEXTLINE
-extern "C" void* __dso_handle;
-
-struct __attribute__((visibility("hidden"))) CustomLibraryImpl
-    : public std::enable_shared_from_this<CustomLibraryImpl>,
-      public CustomLibrary {
-  CustomLibraryImpl(const char* filename, int argc, const char** argv)
-      : contents_(filename),
-        mapped_library_(nullptr),
-        name_(filename),
-        argc_(argc),
-        argv_(argv) {
-    pthread_key_create(&tls_key_, nullptr);
-    data_ = contents_.data();
-    header_ = (Elf64_Ehdr*)data_;
-    program_headers_ = (Elf64_Phdr*)(data_ + header_->e_phoff);
-    n_program_headers_ = header_->e_phnum;
-  }
-  void add_search_library(std::shared_ptr<SymbolProvider> lib) override {
-    symbol_search_path_.emplace_back(std::move(lib));
-  }
-
-  void check_library_format() {
-    DEPLOY_CHECK(
-        0 == memcmp(header_->e_ident, ELFMAG, SELFMAG),
-        "{}: not an ELF file",
-        this->name_);
-    DEPLOY_CHECK(
-        header_->e_type == ET_DYN,
-        "{}: is not shared object file",
-        this->name_);
-    DEPLOY_CHECK(
-        header_->e_ident[EI_CLASS] == ELFCLASS64,
-        "{}: is not ELF64 format",
-        this->name_);
-    DEPLOY_CHECK(
-        header_->e_ident[EI_DATA] == ELFDATA2LSB,
-        "{}: is not 2's complement, little endian",
-        this->name_);
-    DEPLOY_CHECK(
-        header_->e_machine == EM_X86_64,
-        "{}: is not in x86_64 format",
-        this->name_);
-  }
-
-  void reserve_address_space() {
-    Elf64_Addr min_vaddr = 0;
-    Elf64_Addr max_vaddr = 0;
-    mapped_size_ = phdr_table_get_load_size(
-        program_headers_, n_program_headers_, &min_vaddr, &max_vaddr);
-    mapped_library_ = mmap(
-        nullptr, mapped_size_, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    load_bias_ =
-        (const char*)mapped_library_ - reinterpret_cast<const char*>(min_vaddr);
-  }
-
-  void load_segments() {
-    // from bionic
-    for (const auto i : c10::irange(n_program_headers_)) {
-      const Elf64_Phdr* phdr = &program_headers_[i];
-
-      // Segment addresses in memory.
-      Elf64_Addr seg_start = phdr->p_vaddr + load_bias_;
-      Elf64_Addr seg_end = seg_start + phdr->p_memsz;
-
-      switch (phdr->p_type) {
-        case PT_DYNAMIC:
-          dynamic_ = reinterpret_cast<Elf64_Dyn*>(seg_start);
-          break;
-        case PT_GNU_EH_FRAME:
-          eh_frame_hdr_ = reinterpret_cast<EH_Frame_HDR*>(seg_start);
-          DEPLOY_CHECK(
-              eh_frame_hdr_->eh_frame_ptr_enc == 0x1b,
-              "unsupported eh_frame_pointer_enc {}",
-              eh_frame_hdr_->eh_frame_ptr_enc);
-          eh_frame_ =
-              (void*)((int64_t)&eh_frame_hdr_->eh_frame_ptr + eh_frame_hdr_->eh_frame_ptr);
-          break;
-        case PT_TLS:
-          tls_file_size_ = phdr->p_filesz;
-          tls_mem_size_ = phdr->p_memsz;
-          tls_initalization_image_ = (void*)seg_start;
-          break;
-      };
-
-      if (phdr->p_type != PT_LOAD) {
-        continue;
-      }
-
-      Elf64_Addr seg_page_start = PAGE_START(seg_start);
-      Elf64_Addr seg_page_end = PAGE_END(seg_end);
-
-      Elf64_Addr seg_file_end = seg_start + phdr->p_filesz;
-
-      // File offsets.
-      Elf64_Addr file_start = phdr->p_offset;
-      Elf64_Addr file_end = file_start + phdr->p_filesz;
-
-      Elf64_Addr file_page_start = PAGE_START(file_start);
-      Elf64_Addr file_length = file_end - file_page_start;
-
-      if (contents_.size() <= 0) {
-        DEPLOY_ERROR(
-            "\"{}\" invalid file size: {}", name_.c_str(), contents_.size());
-      }
-
-      if (file_end > contents_.size()) {
-        DEPLOY_ERROR(
-            "invalid ELF file \"{}\" load segment[{}]:"
-            " p_offset ({}) + p_filesz ({}) ( = {}) past end of file "
-            "({})",
-            name_.c_str(),
-            i,
-            reinterpret_cast<void*>(phdr->p_offset),
-            reinterpret_cast<void*>(phdr->p_filesz),
-            reinterpret_cast<void*>(file_end),
-            contents_.size());
-      }
-
-      if (file_length != 0) {
-        int prot = PFLAGS_TO_PROT(phdr->p_flags);
-
-        void* seg_addr = mmap64(
-            reinterpret_cast<void*>(seg_page_start),
-            file_length,
-            prot | PROT_WRITE, // initially everything is writable to do
-                               // relocations
-            MAP_FIXED | MAP_PRIVATE,
-            contents_.fd(),
-            file_page_start);
-        fixup_prot_.emplace_back([=]() {
-          mprotect(reinterpret_cast<void*>(seg_page_start), file_length, prot);
-        });
-        if (seg_addr == MAP_FAILED) {
-          DEPLOY_ERROR(
-              "couldn't map \"{}\" segment {}: {}",
-              name_.c_str(),
-              i,
-              strerror(errno));
-        }
-      }
-
-      // if the segment is writable, and does not end on a page boundary,
-      // zero-fill it until the page limit.
-      if ((phdr->p_flags & PF_W) != 0 && PAGE_OFFSET(seg_file_end) > 0) {
-        memset(
-            reinterpret_cast<void*>(seg_file_end),
-            0,
-            PAGE_SIZE - PAGE_OFFSET(seg_file_end));
-      }
-
-      seg_file_end = PAGE_END(seg_file_end);
-
-      // seg_file_end is now the first page address after the file
-      // content. If seg_end is larger, we need to zero anything
-      // between them. This is done by using a private anonymous
-      // map for all extra pages.
-      if (seg_page_end > seg_file_end) {
-        size_t zeromap_size = seg_page_end - seg_file_end;
-        int prot = PFLAGS_TO_PROT(phdr->p_flags);
-        void* zeromap = mmap(
-            reinterpret_cast<void*>(seg_file_end),
-            zeromap_size,
-            prot | PROT_WRITE,
-            MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE,
-            -1,
-            0);
-        fixup_prot_.emplace_back([=]() {
-          mprotect(reinterpret_cast<void*>(seg_file_end), zeromap_size, prot);
-        });
-        if (zeromap == MAP_FAILED) {
-          DEPLOY_ERROR(
-              "couldn't zero fill \"{}\" gap: {}",
-              name_.c_str(),
-              strerror(errno));
-        }
-      }
-    }
-  }
-  size_t module_id() const {
-    size_t this_as_number = (size_t)this;
-    return this_as_number | TLS_LOCAL_FLAG;
-  }
-
-  void read_dynamic_section() {
-    dyninfo_.initialize_from_dynamic_section(
-        name_, dynamic_, load_bias_, false);
-    std::vector<std::string> empty_search_path;
-    resolve_needed_libraries(
-        symbol_search_path_,
-        name_,
-        empty_search_path,
-        dyninfo_.runpath_,
-        dyninfo_.needed_);
-  }
-
-  multipy::optional<Elf64_Addr> lookup_symbol(Elf64_Xword r_info) {
-    const uint32_t r_type = ELF64_R_TYPE(r_info);
-    const uint32_t r_sym = ELF64_R_SYM(r_info);
-
-    if (r_sym == 0) {
-      return (Elf64_Addr)0;
-    }
-    auto sym_st = dyninfo_.symtab_[r_sym];
-    const char* sym_name = dyninfo_.get_string(sym_st.st_name);
-    if (r_type == R_X86_64_JUMP_SLOT) {
-      if (strcmp(sym_name, "__tls_get_addr") == 0) {
-        return (Elf64_Addr)local__tls_get_addr;
-      }
-      if (strcmp(sym_name, "__cxa_thread_atexit") == 0) {
-        return (Elf64_Addr)__cxa_thread_atexit_impl;
-      }
-    }
-    for (const auto& sys_lib : symbol_search_path_) {
-      auto r = sys_lib->sym(sym_name);
-      if (r) {
-        return r;
-      }
-    }
-    auto r = sym(sym_name);
-    if (r) {
-      return r;
-    }
-    if (ELF64_ST_BIND(sym_st.st_info) != STB_WEAK) {
-      DEPLOY_ERROR(
-          "{}: '{}' symbol not found in ElfFile lookup",
-          name_.c_str(),
-          sym_name);
-    }
-    return multipy::nullopt;
-  }
-
-  multipy::optional<TLSIndex> tls_lookup_symbol(Elf64_Xword r_info) {
-    const uint32_t r_sym = ELF64_R_SYM(r_info);
-
-    if (r_sym == 0) {
-      return TLSIndex{
-          module_id(),
-          0}; // note: offset is not queried when the symbol is blank
-    }
-
-    auto sym_st = dyninfo_.symtab_[r_sym];
-    const char* sym_name = dyninfo_.get_string(sym_st.st_name);
-    for (const auto& sys_lib : symbol_search_path_) {
-      auto r = sys_lib->tls_sym(sym_name);
-      if (r) {
-        return r;
-      }
-    }
-    auto r = tls_sym(sym_name);
-    if (r) {
-      return r;
-    }
-
-    if (ELF64_ST_BIND(sym_st.st_info) != STB_WEAK) {
-      DEPLOY_ERROR(
-          "{}: '{}' symbol not found in ElfFile lookup",
-          name_.c_str(),
-          sym_name);
-    }
-    return multipy::nullopt;
-  }
-
-  void relocate_one(const Elf64_Rela& reloc) {
-    const uint32_t r_type = ELF64_R_TYPE(reloc.r_info);
-
-    if (r_type == 0) {
-      return;
-    }
-
-    void* const rel_target =
-        reinterpret_cast<void*>(reloc.r_offset + load_bias_);
-
-    // TLS relocations need to lookup symbols differently so we can get the
-    // module_id
-    if (r_type == R_X86_64_DTPMOD64 || r_type == R_X86_64_DTPOFF64) {
-      auto tls_index = tls_lookup_symbol(reloc.r_info);
-      if (!tls_index) {
-        return; // skip weak relocation that wasn't found
-      }
-      switch (r_type) {
-        case R_X86_64_DTPMOD64:
-          *static_cast<size_t*>(rel_target) = tls_index->module_id;
-          break;
-        case R_X86_64_DTPOFF64:
-          *static_cast<Elf64_Addr*>(rel_target) =
-              tls_index->offset + reloc.r_addend;
-          break;
-      }
-      return;
-    }
-
-    auto sym_addr = lookup_symbol(reloc.r_info);
-    if (!sym_addr) {
-      return; // skip weak relocation that wasn't found
-    }
-
-    switch (r_type) {
-      case R_X86_64_JUMP_SLOT:
-      case R_X86_64_64:
-      case R_X86_64_GLOB_DAT: {
-        const Elf64_Addr result = *sym_addr + reloc.r_addend;
-        *static_cast<Elf64_Addr*>(rel_target) = result;
-      } break;
-      case R_X86_64_RELATIVE: {
-        // In practice, r_sym is always zero, but if it weren't, the linker
-        // would still look up the referenced symbol (and abort if the symbol
-        // isn't found), even though it isn't used.
-        const Elf64_Addr result = load_bias_ + reloc.r_addend;
-        *static_cast<Elf64_Addr*>(rel_target) = result;
-      } break;
-      case R_X86_64_32: {
-        const Elf32_Addr result = *sym_addr + reloc.r_addend;
-        *static_cast<Elf32_Addr*>(rel_target) = result;
-      } break;
-      case R_X86_64_PC32: {
-        const Elf64_Addr target = *sym_addr + reloc.r_addend;
-        const Elf64_Addr base = reinterpret_cast<Elf64_Addr>(rel_target);
-        const Elf32_Addr result = target - base;
-        *static_cast<Elf32_Addr*>(rel_target) = result;
-      } break;
-      default:
-        DEPLOY_ERROR("unknown reloc type {} in \"{}\"", r_type, name_.c_str());
-        break;
-    }
-  }
-
-  void relocate() {
-    for (const auto i : c10::irange(dyninfo_.n_rela_)) {
-      relocate_one(dyninfo_.rela_[i]);
-    }
-    for (const auto i : c10::irange(dyninfo_.n_plt_rela_)) {
-      relocate_one(dyninfo_.plt_rela_[i]);
-    }
-  }
-
-  void initialize() {
-    call_function(dyninfo_.init_func_);
-    for (const auto i : c10::irange(dyninfo_.n_init_array_)) {
-      call_function(dyninfo_.init_array_[i]);
-    }
-    initialized_ = true;
-  }
-
-  void finalize() {
-    for (size_t i = dyninfo_.n_fini_array_; i > 0; --i) {
-      call_function(dyninfo_.fini_array_[i - 1]);
-    }
-    call_function(dyninfo_.fini_func_);
-  }
-
-  void register_debug_info() {
-    // std::cout << "target modules add " << name_.c_str() << "\n";
-    // std::cout << "target modules load -f " << name_.c_str() << " -s "
-    //           << std::hex << "0x" << load_bias_ << "\n";
-    __deploy_module_info.name = name_.c_str();
-    __deploy_module_info.file_addr = (Elf64_Addr)contents_.data();
-    __deploy_module_info.file_size = contents_.size();
-    __deploy_module_info.load_bias = load_bias_;
-    // debugger script sets a breakpoint on this function,
-    // then reads __deploy_module_info to issue the target module commands.
-    __deploy_register_code();
-  }
-
-  // remove the extra write flags from read-only sections
-  void protect() {
-    for (const auto& fixup : fixup_prot_) {
-      fixup();
-    }
-  }
-
-  void load() override {
-    check_library_format();
-    reserve_address_space();
-    load_segments();
-    read_dynamic_section();
-    relocate();
-    protect();
-    __register_frame(eh_frame_);
-    eh_frame_registered_ = true;
-    register_debug_info();
-    initialize();
-  }
-
-  ~CustomLibraryImpl() override {
-    // std::cout << "LINKER IS UNLOADING: " << name_ << "\n";
-    if (initialized_) {
-      finalize();
-    }
-    if (eh_frame_registered_) {
-      __deregister_frame(eh_frame_);
-    }
-    if (mapped_library_) {
-      munmap(mapped_library_, mapped_size_);
-    }
-  }
-  void call_function(linker_dtor_function_t f) {
-    if (f == nullptr || (int64_t)f == -1)
-      return;
-    f();
-  }
-  void call_function(linker_ctor_function_t f) {
-    if (f == nullptr || (int64_t)f == -1)
-      return;
-    f(argc_, argv_, environ);
-  }
-
-  multipy::optional<Elf64_Addr> sym(const char* name) const override {
-    return dyninfo_.sym(name);
-  }
-
-  multipy::optional<TLSIndex> tls_sym(const char* name) const override {
-    auto r = dyninfo_.sym(name);
-    if (r) {
-      return TLSIndex{module_id(), *r};
-    }
-    return multipy::nullopt;
-  }
-
-  void* tls_addr(size_t offset) {
-    // this was a TLS entry for one of our modules, so we use pthreads to
-    // emulate thread local state.
-    void* start = pthread_getspecific(tls_key_);
-    if (!start) {
-      auto tls_mem = new TLSMemory(shared_from_this(), tls_mem_size_);
-      __cxa_thread_atexit_impl(delete_TLSMemory, tls_mem, &__dso_handle);
-      start = tls_mem->mem_;
-      memcpy(start, tls_initalization_image_, tls_file_size_);
-      memset(
-          (void*)((const char*)start + tls_file_size_),
-          0,
-          tls_mem_size_ - tls_file_size_);
-      pthread_setspecific(tls_key_, start);
-    }
-    return (void*)((const char*)start + offset);
-  }
-
- private:
-  MemFile contents_;
-  const char* data_ = nullptr;
-  const Elf64_Ehdr* header_ = nullptr;
-  const Elf64_Phdr* program_headers_ = nullptr;
-  const EH_Frame_HDR* eh_frame_hdr_ = nullptr;
-  void* eh_frame_ = nullptr;
-  size_t n_program_headers_ = 0;
-  void* mapped_library_ = nullptr;
-  size_t mapped_size_ = 0;
-  Elf64_Addr load_bias_ = 0;
-  Elf64_Dyn* dynamic_ = nullptr;
-  ElfDynamicInfo dyninfo_;
-  std::string name_;
-  int argc_ = 0;
-  const char** argv_ = nullptr;
-  bool initialized_ = false;
-  bool eh_frame_registered_ = false;
-
-  pthread_key_t tls_key_ = 0;
-  void* tls_initalization_image_ = nullptr;
-  size_t tls_file_size_ = 0;
-  size_t tls_mem_size_ = 0;
-
-  std::vector<std::shared_ptr<SymbolProvider>> symbol_search_path_;
-  std::vector<std::function<void(void)>> fixup_prot_;
-};
-
-std::shared_ptr<CustomLibrary> CustomLibrary::create(
-    const char* filename,
-    int argc,
-    const char** argv) {
-  return std::make_shared<CustomLibraryImpl>(filename, argc, argv);
-}
-
-static void* local__tls_get_addr(TLSIndex* idx) {
-  if ((idx->module_id & TLS_LOCAL_FLAG) != 0) {
-    return ((CustomLibraryImpl*)(idx->module_id & ~TLS_LOCAL_FLAG))
-        ->tls_addr(idx->offset);
-  }
-  return __tls_get_addr(idx);
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/loader.h b/torch/csrc/deploy/loader.h
deleted file mode 100644
index 9e5a7fd4571d..000000000000
--- a/torch/csrc/deploy/loader.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#pragma once
-#include <dlfcn.h>
-#include <elf.h>
-#include <torch/csrc/deploy/interpreter/Optional.hpp>
-#include <memory>
-
-namespace torch {
-namespace deploy {
-
-struct DeployLoaderError : public std::runtime_error {
-  using std::runtime_error::runtime_error;
-};
-
-struct TLSIndex {
-  size_t module_id; // if module_id & TLS_LOCAL_FLAG, then module_id &
-                    // ~TLS_LOCAL_FLAG is a TLSMemory*;
-  size_t offset;
-};
-
-struct SymbolProvider {
-  SymbolProvider() = default;
-  virtual multipy::optional<Elf64_Addr> sym(const char* name) const = 0;
-  virtual multipy::optional<TLSIndex> tls_sym(const char* name) const = 0;
-  SymbolProvider(const SymbolProvider&) = delete;
-  SymbolProvider& operator=(const SymbolProvider&) = delete;
-  virtual ~SymbolProvider() = default;
-};
-
-// RAII wrapper around dlopen
-struct SystemLibrary : public SymbolProvider {
-  // create a wrapper around an existing handle returned from dlopen
-  // if steal == true, then this will dlclose the handle when it is destroyed.
-  static std::shared_ptr<SystemLibrary> create(
-      void* handle = RTLD_DEFAULT,
-      bool steal = false);
-  static std::shared_ptr<SystemLibrary> create(const char* path, int flags);
-};
-
-struct CustomLibrary : public SymbolProvider {
-  static std::shared_ptr<CustomLibrary> create(
-      const char* filename,
-      int argc = 0,
-      const char** argv = nullptr);
-  virtual void add_search_library(std::shared_ptr<SymbolProvider> lib) = 0;
-  virtual void load() = 0;
-};
-
-using SystemLibraryPtr = std::shared_ptr<SystemLibrary>;
-using CustomLibraryPtr = std::shared_ptr<CustomLibrary>;
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/mem_file.h b/torch/csrc/deploy/mem_file.h
deleted file mode 100644
index df4fe941ca58..000000000000
--- a/torch/csrc/deploy/mem_file.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#pragma once
-
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <torch/csrc/deploy/Exception.h>
-#include <unistd.h>
-#include <cerrno>
-#include <cstdio>
-
-namespace torch {
-namespace deploy {
-
-// Memory maps a file into the address space read-only, and manages the lifetime
-// of the mapping. Here are a few use cases:
-// 1. Used in the loader to read in initial image, and to inspect
-// ELF files for dependencies before callling dlopen.
-//
-// 2. Used in unity to load the elf file.
-struct MemFile {
-  explicit MemFile(const char* filename_) : fd_(0), mem_(nullptr), n_bytes_(0) {
-    fd_ = open(filename_, O_RDONLY);
-    MULTIPY_CHECK(
-        fd_ != -1, "failed to open {}: {}" + filename_ + strerror(errno));
-    // NOLINTNEXTLINE
-    struct stat s;
-    if (-1 == fstat(fd_, &s)) {
-      close(fd_); // destructors don't run during exceptions
-      MULTIPY_CHECK(
-          false, "failed to stat {}: {}" + filename_ + strerror(errno));
-    }
-    n_bytes_ = s.st_size;
-    mem_ = mmap(nullptr, n_bytes_, PROT_READ, MAP_SHARED, fd_, 0);
-    if (MAP_FAILED == mem_) {
-      close(fd_);
-      MULTIPY_CHECK(
-          false, "failed to mmap {}: {}" + filename_ + strerror(errno));
-    }
-  }
-  MemFile(const MemFile&) = delete;
-  MemFile& operator=(const MemFile&) = delete;
-  [[nodiscard]] const char* data() const {
-    return (const char*)mem_;
-  }
-  ~MemFile() {
-    if (mem_) {
-      munmap((void*)mem_, n_bytes_);
-    }
-    if (fd_) {
-      close(fd_);
-    }
-  }
-  size_t size() {
-    return n_bytes_;
-  }
-  [[nodiscard]] int fd() const {
-    return fd_;
-  }
-
- private:
-  int fd_;
-  void* mem_;
-  size_t n_bytes_;
-};
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/noop_environment.h b/torch/csrc/deploy/noop_environment.h
deleted file mode 100644
index c1fe6357027f..000000000000
--- a/torch/csrc/deploy/noop_environment.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include <torch/csrc/deploy/environment.h>
-
-namespace torch {
-namespace deploy {
-
-class NoopEnvironment : public Environment {
- public:
-  void configureInterpreter(Interpreter* /* interp */) override {}
-};
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/path_environment.cpp b/torch/csrc/deploy/path_environment.cpp
deleted file mode 100644
index 89bda34fbe15..000000000000
--- a/torch/csrc/deploy/path_environment.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/csrc/deploy/path_environment.h>
-
-namespace torch {
-namespace deploy {
-
-void PathEnvironment::configureInterpreter(Interpreter* interp) {
-  auto I = interp->acquireSession();
-  I.global("sys", "path").attr("append")({path_});
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/path_environment.h b/torch/csrc/deploy/path_environment.h
deleted file mode 100644
index 8c01191b288d..000000000000
--- a/torch/csrc/deploy/path_environment.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#include <torch/csrc/deploy/environment.h>
-#include <string>
-
-namespace torch {
-namespace deploy {
-
-class PathEnvironment : public Environment {
- public:
-  explicit PathEnvironment(std::string path) : path_(std::move(path)) {}
-  void configureInterpreter(Interpreter* interp) override;
-
- private:
-  std::string path_;
-};
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/remove_dt_needed.cpp b/torch/csrc/deploy/remove_dt_needed.cpp
deleted file mode 100644
index 8b1cad535814..000000000000
--- a/torch/csrc/deploy/remove_dt_needed.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#include <elf.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <cerrno>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <iostream>
-#include <vector>
-
-#include <c10/util/irange.h>
-#include <fmt/format.h>
-
-#define ERROR(msg_fmt, ...) \
-  throw std::runtime_error(fmt::format(msg_fmt, ##__VA_ARGS__))
-
-#define CHECK(cond, fmt, ...)  \
-  if (!(cond)) {               \
-    ERROR(fmt, ##__VA_ARGS__); \
-  }
-
-// NOLINTNEXTLINE
-int main(int argc, const char** argv) {
-  if (argc != 3) {
-    std::cout << "usage: " << argv[0] << " <input_library> <result_library>\n";
-    return 1;
-  }
-  const char* filename = argv[1];
-  int fd_ = open(filename, O_RDWR);
-  CHECK(fd_ != -1, "failed to open {}: {}", filename, strerror(errno));
-  struct stat s = {0};
-  if (-1 == fstat(fd_, &s)) {
-    close(fd_); // destructors don't run during exceptions
-    ERROR("failed to stat {}: {}", filename, strerror(errno));
-  }
-  size_t n_bytes = s.st_size;
-  void* mem =
-      mmap(nullptr, n_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_, 0);
-  if (MAP_FAILED == mem) {
-    close(fd_);
-    ERROR("failed to mmap {}: {}", filename, strerror(errno));
-  }
-
-  char* data = (char*)mem;
-  auto header = (Elf64_Ehdr*)data;
-  auto program_headers = (Elf64_Phdr*)(data + header->e_phoff);
-  auto n_program_headers = header->e_phnum;
-  Elf64_Dyn* dynamic = nullptr;
-  for (const auto i : c10::irange(n_program_headers)) {
-    const Elf64_Phdr* phdr = &program_headers[i];
-    if (phdr->p_type == PT_DYNAMIC) {
-      dynamic = reinterpret_cast<Elf64_Dyn*>(data + phdr->p_offset);
-      break;
-    }
-  }
-  CHECK(
-      dynamic,
-      "{}: could not load dynamic section for looking up DT_NEEDED",
-      filename);
-  std::vector<Elf64_Dyn> entries;
-  for (const Elf64_Dyn* d = dynamic; d->d_tag != DT_NULL; ++d) {
-    entries.push_back(*d);
-  }
-  Elf64_Dyn* w = dynamic;
-  for (const Elf64_Dyn& e : entries) {
-    if (e.d_tag != DT_NEEDED) {
-      *w++ = e;
-    }
-  }
-  auto nwritten = w - dynamic;
-  memset(w, 0, sizeof(Elf64_Dyn) * (entries.size() - nwritten));
-
-  FILE* dst = fopen(argv[2], "w");
-  CHECK(dst != nullptr, "{}: {}", argv[2], strerror(errno));
-  fwrite(mem, n_bytes, 1, dst);
-  fclose(dst);
-  munmap(mem, n_bytes);
-  close(fd_);
-  return 0;
-}
diff --git a/torch/csrc/deploy/test_deploy.cpp b/torch/csrc/deploy/test_deploy.cpp
deleted file mode 100644
index c6293c50c88a..000000000000
--- a/torch/csrc/deploy/test_deploy.cpp
+++ /dev/null
@@ -1,537 +0,0 @@
-#include <ATen/Parallel.h>
-#include <gtest/gtest.h>
-#include <cstring>
-
-#include <c10/util/irange.h>
-#include <libgen.h>
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/script.h>
-#include <torch/torch.h>
-
-#include <future>
-#include <iostream>
-#include <string>
-
-void compare_torchpy_jit(const char* model_filename, const char* jit_filename) {
-  // Test
-
-  torch::deploy::InterpreterManager m(1);
-  torch::deploy::Package p = m.loadPackage(model_filename);
-  auto model = p.loadPickle("model", "model.pkl");
-  at::IValue eg;
-  {
-    auto I = p.acquireSession();
-    eg = I.self.attr("load_pickle")({"model", "example.pkl"}).toIValue();
-  }
-
-  at::Tensor output = model(eg.toTupleRef().elements()).toTensor();
-
-  // Reference
-  auto ref_model = torch::jit::load(jit_filename);
-  at::Tensor ref_output =
-      ref_model.forward(eg.toTupleRef().elements()).toTensor();
-
-  ASSERT_TRUE(ref_output.allclose(output, 1e-03, 1e-05));
-}
-
-const char* simple = "torch/csrc/deploy/example/generated/simple";
-const char* simple_jit = "torch/csrc/deploy/example/generated/simple_jit";
-
-const char* path(const char* envname, const char* path) {
-  const char* e = getenv(envname);
-  return e ? e : path;
-}
-
-TEST(TorchpyTest, LoadLibrary) {
-  torch::deploy::InterpreterManager m(1);
-  torch::deploy::Package p = m.loadPackage(
-      path("LOAD_LIBRARY", "torch/csrc/deploy/example/generated/load_library"));
-  auto model = p.loadPickle("fn", "fn.pkl");
-  model({});
-}
-
-TEST(TorchpyTest, InitTwice) {
-  { torch::deploy::InterpreterManager m(2); }
-  { torch::deploy::InterpreterManager m(1); }
-}
-
-TEST(TorchpyTest, DifferentInterps) {
-  torch::deploy::InterpreterManager m(2);
-  m.registerModuleSource("check_none", "check = id(None)\n");
-  int64_t id0 = 0, id1 = 0;
-  {
-    auto I = m.allInstances()[0].acquireSession();
-    id0 = I.global("check_none", "check").toIValue().toInt();
-  }
-  {
-    auto I = m.allInstances()[1].acquireSession();
-    id1 = I.global("check_none", "check").toIValue().toInt();
-  }
-  ASSERT_NE(id0, id1);
-}
-
-TEST(TorchpyTest, SimpleModel) {
-  compare_torchpy_jit(path("SIMPLE", simple), path("SIMPLE_JIT", simple_jit));
-}
-
-TEST(TorchpyTest, ResNet) {
-  compare_torchpy_jit(
-      path("RESNET", "torch/csrc/deploy/example/generated/resnet"),
-      path("RESNET_JIT", "torch/csrc/deploy/example/generated/resnet_jit"));
-}
-
-TEST(TorchpyTest, Movable) {
-  torch::deploy::InterpreterManager m(1);
-  torch::deploy::ReplicatedObj obj;
-  {
-    auto I = m.acquireOne();
-    auto model =
-        I.global("torch.nn", "Module")(std::vector<torch::deploy::Obj>());
-    obj = I.createMovable(model);
-  }
-  obj.acquireSession();
-}
-
-TEST(TorchpyTest, MultiSerialSimpleModel) {
-  torch::deploy::InterpreterManager manager(3);
-  torch::deploy::Package p = manager.loadPackage(path("SIMPLE", simple));
-  auto model = p.loadPickle("model", "model.pkl");
-  auto ref_model = torch::jit::load(path("SIMPLE_JIT", simple_jit));
-
-  auto input = torch::ones({10, 20});
-  size_t ninterp = 3;
-  std::vector<at::Tensor> outputs;
-
-  for (const auto i : c10::irange(ninterp)) {
-    (void)i;
-    outputs.push_back(model({input.alias()}).toTensor());
-  }
-
-  // Generate reference
-  auto ref_output = ref_model.forward({input.alias()}).toTensor();
-
-  // Compare all to reference
-  for (const auto i : c10::irange(ninterp)) {
-    ASSERT_TRUE(ref_output.equal(outputs[i]));
-  }
-
-  // test kwargs api with args
-  std::vector<c10::IValue> args;
-  args.emplace_back(input);
-  std::unordered_map<std::string, c10::IValue> kwargs_empty;
-  auto jit_output_args = model.callKwargs(args, kwargs_empty).toTensor();
-  ASSERT_TRUE(ref_output.equal(jit_output_args));
-
-  // and with kwargs only
-  std::unordered_map<std::string, c10::IValue> kwargs;
-  kwargs["input"] = input;
-  auto jit_output_kwargs = model.callKwargs(kwargs).toTensor();
-  ASSERT_TRUE(ref_output.equal(jit_output_kwargs));
-
-  // test hasattr
-  ASSERT_TRUE(model.hasattr("forward"));
-  ASSERT_FALSE(model.hasattr("make_prediction"));
-}
-
-TEST(TorchpyTest, ThreadedSimpleModel) {
-  size_t nthreads = 3;
-  torch::deploy::InterpreterManager manager(nthreads);
-
-  torch::deploy::Package p = manager.loadPackage(path("SIMPLE", simple));
-  auto model = p.loadPickle("model", "model.pkl");
-  auto ref_model = torch::jit::load(path("SIMPLE_JIT", simple_jit));
-
-  auto input = torch::ones({10, 20});
-
-  std::vector<at::Tensor> outputs;
-
-  std::vector<std::future<at::Tensor>> futures;
-  for (const auto i : c10::irange(nthreads)) {
-    (void)i;
-    futures.push_back(std::async(std::launch::async, [&model]() {
-      auto input = torch::ones({10, 20});
-      for (const auto j : c10::irange(100)) {
-        (void)j;
-        model({input.alias()}).toTensor();
-      }
-      auto result = model({input.alias()}).toTensor();
-      return result;
-    }));
-  }
-  for (const auto i : c10::irange(nthreads)) {
-    outputs.push_back(futures[i].get());
-  }
-
-  // Generate reference
-  auto ref_output = ref_model.forward({input.alias()}).toTensor();
-
-  // Compare all to reference
-  for (const auto i : c10::irange(nthreads)) {
-    ASSERT_TRUE(ref_output.equal(outputs[i]));
-  }
-}
-
-TEST(TorchpyTest, ErrorsReplicatingObj) {
-  torch::deploy::InterpreterManager manager(4);
-  torch::deploy::Package p = manager.loadPackage(path("SIMPLE", simple));
-  auto replicatedObj = p.loadPickle("model", "model.pkl");
-  // Acquire two different interpreters
-  auto session1 = replicatedObj.acquireSession();
-  auto session2 = p.acquireSession();
-  // Create an obj reference on interpreter 1
-  auto obj = session1.fromMovable(replicatedObj);
-  // should throw an error when trying to access obj from different session
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(session2.createMovable(obj), std::runtime_error);
-  try {
-    session2.createMovable(obj);
-  } catch (std::runtime_error& error) {
-    EXPECT_TRUE(
-        std::string(error.what())
-            .find(
-                "Cannot create movable from an object that lives in different session") !=
-        std::string::npos);
-  }
-}
-
-TEST(TorchpyTest, ThrowsSafely) {
-  // See explanation in deploy.h
-  torch::deploy::InterpreterManager manager(3);
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(manager.loadPackage("some garbage path"), std::runtime_error);
-
-  torch::deploy::Package p = manager.loadPackage(path("SIMPLE", simple));
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(p.loadPickle("some other", "garbage path"), std::runtime_error);
-
-  auto model = p.loadPickle("model", "model.pkl");
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(model(at::IValue("unexpected input")), std::runtime_error);
-}
-
-TEST(TorchpyTest, AcquireMultipleSessionsInTheSamePackage) {
-  torch::deploy::InterpreterManager m(1);
-
-  torch::deploy::Package p = m.loadPackage(path("SIMPLE", simple));
-  auto I = p.acquireSession();
-
-  auto I1 = p.acquireSession();
-}
-
-TEST(TorchpyTest, AcquireMultipleSessionsInDifferentPackages) {
-  torch::deploy::InterpreterManager m(1);
-
-  torch::deploy::Package p = m.loadPackage(path("SIMPLE", simple));
-  auto I = p.acquireSession();
-
-  torch::deploy::Package p1 = m.loadPackage(
-      path("RESNET", "torch/csrc/deploy/example/generated/resnet"));
-  auto I1 = p1.acquireSession();
-}
-
-TEST(TorchpyTest, TensorSharingNotAllowed) {
-  size_t nthreads = 2;
-  torch::deploy::InterpreterManager m(nthreads);
-  // generate a tensor from one interpreter
-  auto I0 = m.allInstances()[0].acquireSession();
-  auto I1 = m.allInstances()[1].acquireSession();
-  auto obj = I0.global("torch", "empty")({I0.fromIValue(2)});
-  auto t = obj.toIValue().toTensor();
-  // try to feed it to the other interpreter, should error
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  ASSERT_THROW(I1.global("torch", "sigmoid")({t}), std::runtime_error);
-}
-
-TEST(TorchpyTest, TaggingRace) {
-  // At time of writing, this takes about 7s to run on DEBUG=1.  I think
-  // this is OK, but feel free to fiddle with the knobs here to reduce the
-  // runtime
-  constexpr int64_t trials = 4;
-  constexpr int64_t nthreads = 16;
-  torch::deploy::InterpreterManager m(nthreads);
-  for (const auto n : c10::irange(trials)) {
-    (void)n;
-    at::Tensor t = torch::empty(2);
-    std::atomic<int64_t> success(0);
-    std::atomic<int64_t> failed(0);
-    at::parallel_for(0, nthreads, 1, [&](int64_t begin, int64_t end) {
-      for (const auto i : c10::irange(begin, end)) {
-        auto I = m.allInstances()[i].acquireSession();
-        try {
-          I.fromIValue(t);
-          success++;
-        } catch (const std::runtime_error& e) {
-          failed++;
-        }
-      }
-    });
-    ASSERT_EQ(success, 1);
-    ASSERT_EQ(failed, nthreads - 1);
-  }
-}
-
-TEST(TorchpyTest, DisarmHook) {
-  at::Tensor t = torch::empty(2);
-  {
-    torch::deploy::InterpreterManager m(1);
-    auto I = m.acquireOne();
-    I.fromIValue(t);
-  } // unload the old interpreter
-  torch::deploy::InterpreterManager m(1);
-  auto I = m.acquireOne();
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  ASSERT_THROW(I.fromIValue(t), std::runtime_error); // NOT a segfault
-}
-
-TEST(TorchpyTest, RegisterModule) {
-  torch::deploy::InterpreterManager m(2);
-  m.registerModuleSource("foomodule", "def add1(x): return x + 1\n");
-  for (const auto& interp : m.allInstances()) {
-    auto I = interp.acquireSession();
-    AT_ASSERT(3 == I.global("foomodule", "add1")({2}).toIValue().toInt());
-  }
-}
-
-#ifdef FBCODE_CAFFE2
-TEST(TorchpyTest, FxModule) {
-  size_t nthreads = 3;
-  torch::deploy::InterpreterManager manager(nthreads);
-  torch::deploy::Package p = manager.loadPackage(path(
-      "SIMPLE_LEAF_FX", "torch/csrc/deploy/example/generated/simple_leaf_fx"));
-  auto model = p.loadPickle("model", "model.pkl");
-
-  std::vector<at::Tensor> outputs;
-  auto input = torch::ones({5, 10});
-  for (const auto i : c10::irange(nthreads)) {
-    (void)i;
-    outputs.push_back(model({input.alias()}).toTensor());
-  }
-
-  // reference model
-  auto ref_model = torch::jit::load(path(
-      "SIMPLE_LEAF_JIT",
-      "torch/csrc/deploy/example/generated/simple_leaf_jit"));
-
-  auto ref_output = ref_model.forward({input.alias()}).toTensor();
-
-  // Compare all to reference
-  for (const auto i : c10::irange(nthreads)) {
-    ASSERT_TRUE(ref_output.equal(outputs[i]));
-  }
-}
-#endif
-
-// Moving a tensor between interpreters should share the underlying storage.
-TEST(TorchpyTest, TensorSerializationSharing) {
-  torch::deploy::InterpreterManager manager(2);
-  manager.registerModuleSource("test_module", R"PYTHON(
-import torch
-
-def get_tensor():
-    return torch.ones(2, 2)
-)PYTHON");
-
-  auto I = manager.acquireOne();
-  auto I2 = manager.acquireOne();
-
-  auto objOnI =
-      I.global("test_module", "get_tensor")(at::ArrayRef<at::IValue>{});
-  auto replicated = I.createMovable(objOnI);
-  auto objOnI2 = I2.fromMovable(replicated);
-
-  auto tensorOnI = objOnI.toIValue().toTensor();
-  auto tensorOnI2 = objOnI2.toIValue().toTensor();
-  ASSERT_TRUE(tensorOnI.storage().is_alias_of(tensorOnI2.storage()));
-}
-
-#ifdef TEST_CUSTOM_LIBRARY
-thread_local int in_another_module = 5;
-TEST(TorchpyTest, SharedLibraryLoad) {
-  torch::deploy::InterpreterManager manager(2);
-  auto no_args = at::ArrayRef<torch::deploy::Obj>();
-  for (auto& interp : manager.allInstances()) {
-    auto I = interp.acquireSession();
-
-    const char* test_lib_path = getenv("LIBTEST_DEPLOY_LIB");
-    if (!test_lib_path) {
-      I.global("sys", "path").attr("append")({"torch/csrc/deploy"});
-      I.global("test_deploy_python", "setup")({getenv("PATH")});
-    } else {
-      // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-      char buf[PATH_MAX];
-      strncpy(buf, test_lib_path, PATH_MAX);
-      dirname(buf);
-      I.global("sys", "path").attr("append")({buf});
-    }
-
-    AT_ASSERT(I.global("libtest_deploy_lib", "check_initial_state")(no_args)
-                  .toIValue()
-                  .toBool());
-    ASSERT_TRUE(
-        I.global("libtest_deploy_lib", "simple_add")({5, 4})
-            .toIValue()
-            .toInt() == 9);
-    // I.global("numpy", "array"); // force numpy to load here so it is loaded
-    //                             // twice before we run the tests
-  }
-  for (auto& interp : manager.allInstances()) {
-    auto I = interp.acquireSession();
-    // auto i =
-    //     I.global("test_deploy_python", "numpy_test")({1}).toIValue().toInt();
-    I.global("libtest_deploy_lib", "raise_and_catch_exception")({true});
-    try {
-      I.global("libtest_deploy_lib", "raise_exception")(no_args);
-      ASSERT_TRUE(false); // raise_exception did not throw?
-    } catch (std::exception& err) {
-      ASSERT_TRUE(std::string(err.what()).find("yet") != std::string::npos);
-    }
-    in_another_module = 6;
-    ASSERT_TRUE(
-        I.global("libtest_deploy_lib", "get_in_another_module")(no_args)
-            .toIValue()
-            .toInt() == 6);
-    ASSERT_TRUE(
-        I.global("libtest_deploy_lib", "get_bar")(no_args).toIValue().toInt() ==
-        14);
-    {
-      std::thread foo([&] {
-        I.global("libtest_deploy_lib", "set_bar")({13});
-        ASSERT_TRUE(
-            I.global("libtest_deploy_lib", "get_bar")(no_args)
-                .toIValue()
-                .toInt() == 13);
-      });
-      foo.join();
-    }
-    ASSERT_TRUE(
-        I.global("libtest_deploy_lib", "get_bar_destructed")(no_args)
-            .toIValue()
-            .toInt() == 1);
-    I.global("libtest_deploy_lib", "set_bar")({12});
-  }
-}
-#endif
-
-TEST(TorchpyTest, UsesDistributed) {
-  const auto model_filename = path(
-      "USES_DISTRIBUTED",
-      "torch/csrc/deploy/example/generated/uses_distributed");
-  torch::deploy::InterpreterManager m(1);
-  torch::deploy::Package p = m.loadPackage(model_filename);
-  {
-    auto I = p.acquireSession();
-    I.self.attr("import_module")({"uses_distributed"});
-  }
-}
-
-TEST(TorchpyTest, Autograd) {
-  torch::deploy::InterpreterManager m(2);
-  m.registerModuleSource("autograd_test", R"PYTHON(
-import torch
-
-x = torch.ones(5)  # input tensor
-y = torch.zeros(3)  # expected output
-w = torch.randn(5, 3, requires_grad=True)
-b = torch.randn(3, requires_grad=True)
-z = torch.matmul(x, w)+b
-loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)
-loss.backward()
-# result = w.grad
-result = torch.Tensor([1,2,3])
-)PYTHON");
-  at::Tensor w_grad0, w_grad1;
-  {
-    auto I = m.allInstances()[0].acquireSession();
-    w_grad0 = I.global("autograd_test", "result").toIValue().toTensor();
-  }
-  {
-    auto I = m.allInstances()[1].acquireSession();
-    w_grad1 = I.global("autograd_test", "result").toIValue().toTensor();
-  }
-  EXPECT_TRUE(w_grad0.equal(w_grad1));
-}
-
-TEST(TorchpyTest, ImportlibMetadata) {
-  torch::deploy::InterpreterManager m(1);
-  m.registerModuleSource("importlib_test", R"PYTHON(
-from importlib.metadata import version
-
-result = version("torch")
-)PYTHON");
-  auto I = m.allInstances()[0].acquireSession();
-  auto ver = I.global("importlib_test", "result").toIValue().toString();
-  ASSERT_EQ(ver->string(), "0.0.1+fake_multipy");
-}
-
-// OSS build does not have bultin numpy support yet. Use this flag to guard the
-// test case.
-#if HAS_NUMPY
-TEST(TorchpyTest, TestNumpy) {
-  torch::deploy::InterpreterManager m(2);
-  auto noArgs = at::ArrayRef<torch::deploy::Obj>();
-  auto I = m.acquireOne();
-  auto mat35 = I.global("numpy", "random").attr("rand")({3, 5});
-  auto mat58 = I.global("numpy", "random").attr("rand")({5, 8});
-  auto mat38 = I.global("numpy", "matmul")({mat35, mat58});
-  EXPECT_EQ(2, mat38.attr("shape").attr("__len__")(noArgs).toIValue().toInt());
-  EXPECT_EQ(3, mat38.attr("shape").attr("__getitem__")({0}).toIValue().toInt());
-  EXPECT_EQ(8, mat38.attr("shape").attr("__getitem__")({1}).toIValue().toInt());
-}
-#endif
-
-#if HAS_PYYAML
-TEST(TorchpyTest, TestPyYAML) {
-  const std::string kDocument = "a: 1\n";
-
-  torch::deploy::InterpreterManager m(2);
-  auto I = m.acquireOne();
-
-  auto load = I.global("yaml", "load")({kDocument});
-  EXPECT_EQ(1, load.attr("__getitem__")({"a"}).toIValue().toInt());
-
-  auto dump = I.global("yaml", "dump")({load});
-  EXPECT_EQ(kDocument, dump.toIValue().toStringRef());
-}
-#endif
-
-TEST(TorchpyTest, PrintInstruction) {
-  const auto jit_script_with_print = R"JIT(
-  def forward(self, a):
-    print(a)
-    return a + a
-  )JIT";
-
-  auto input = torch::autograd::make_variable(at::randn({2, 3}));
-  auto expected_forward = input + input;
-
-  auto module = std::make_shared<torch::jit::Module>(
-      "Module", std::make_shared<at::CompilationUnit>());
-  module->define(jit_script_with_print);
-
-  std::vector<at::IValue> inputs{at::IValue(input)};
-
-  // Checking that a module containing prim::Print() works fine.
-  auto result1 = (*module)(inputs);
-  EXPECT_TRUE(result1.toTensor().equal(expected_forward));
-
-  {
-    auto interpreterManager =
-        std::make_shared<torch::deploy::InterpreterManager>(1);
-
-    // Checking that a module containing prim::Print() still works fine
-    // after Python environment was created.
-    auto result2 = (*module)(inputs);
-    EXPECT_TRUE(result2.toTensor().equal(expected_forward));
-  }
-
-  // Checking that a module containing prim::Print() still works fine
-  // after Python environment was created and then destroyed.
-  auto result3 = (*module)(inputs);
-  EXPECT_TRUE(result3.toTensor().equal(expected_forward));
-}
-
-int main(int argc, char* argv[]) {
-  ::testing::InitGoogleTest(&argc, argv);
-  int rc = RUN_ALL_TESTS();
-  return rc;
-}
diff --git a/torch/csrc/deploy/test_deploy_from_python.py b/torch/csrc/deploy/test_deploy_from_python.py
deleted file mode 100644
index b310d8bd7107..000000000000
--- a/torch/csrc/deploy/test_deploy_from_python.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from libfb.py import testutil
-
-import test_deploy_python_ext
-
-class TestDeployFromPython(testutil.BaseFacebookTestCase):
-    def test_deploy_from_python(self):
-        self.assertTrue(test_deploy_python_ext.run())
diff --git a/torch/csrc/deploy/test_deploy_gpu.cpp b/torch/csrc/deploy/test_deploy_gpu.cpp
deleted file mode 100644
index b657329201f9..000000000000
--- a/torch/csrc/deploy/test_deploy_gpu.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-#include <gtest/gtest.h>
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/cuda.h>
-#include <torch/script.h>
-#include <torch/torch.h>
-#include <future>
-#include <iostream>
-#include <string>
-
-int main(int argc, char* argv[]) {
-  ::testing::InitGoogleTest(&argc, argv);
-  int rc = RUN_ALL_TESTS();
-  return rc;
-}
-
-const char* simple = "torch/csrc/deploy/example/generated/simple";
-const char* simple_jit = "torch/csrc/deploy/example/generated/simple_jit";
-
-const char* path(const char* envname, const char* path) {
-  const char* e = getenv(envname);
-  return e ? e : path;
-}
-
-TEST(TorchDeployGPUTest, SimpleModel) {
-  if (!torch::cuda::is_available()) {
-    GTEST_SKIP();
-  }
-  const char* model_filename = path("SIMPLE", simple);
-  const char* jit_filename = path("SIMPLE_JIT", simple_jit);
-
-  // Test
-  torch::deploy::InterpreterManager m(1);
-  torch::deploy::Package p = m.loadPackage(model_filename);
-  auto model = p.loadPickle("model", "model.pkl");
-  {
-    auto M = model.acquireSession();
-    M.self.attr("to")({"cuda"});
-  }
-  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-  std::vector<at::IValue> inputs;
-  {
-    auto I = p.acquireSession();
-    auto eg = I.self.attr("load_pickle")({"model", "example.pkl"}).toIValue();
-    inputs = eg.toTupleRef().elements();
-    inputs[0] = inputs[0].toTensor().to("cuda");
-  }
-  at::Tensor output = model(inputs).toTensor();
-  ASSERT_TRUE(output.device().is_cuda());
-
-  // Reference
-  auto ref_model = torch::jit::load(jit_filename);
-  ref_model.to(torch::kCUDA);
-  at::Tensor ref_output = ref_model.forward(inputs).toTensor();
-
-  ASSERT_TRUE(ref_output.allclose(output, 1e-03, 1e-05));
-}
-
-TEST(TorchDeployGPUTest, UsesDistributed) {
-  const auto model_filename = path(
-      "USES_DISTRIBUTED",
-      "torch/csrc/deploy/example/generated/uses_distributed");
-  torch::deploy::InterpreterManager m(1);
-  torch::deploy::Package p = m.loadPackage(model_filename);
-  {
-    auto I = p.acquireSession();
-    I.self.attr("import_module")({"uses_distributed"});
-  }
-}
-
-#ifdef FBCODE_CAFFE2
-TEST(TorchDeployGPUTest, TensorRT) {
-  if (!torch::cuda::is_available()) {
-    GTEST_SKIP();
-  }
-  auto packagePath = path(
-      "MAKE_TRT_MODULE", "torch/csrc/deploy/example/generated/make_trt_module");
-  torch::deploy::InterpreterManager m(1);
-  torch::deploy::Package p = m.loadPackage(packagePath);
-  auto makeModel = p.loadPickle("make_trt_module", "model.pkl");
-  {
-    auto I = makeModel.acquireSession();
-    auto model = I.self(at::ArrayRef<at::IValue>{});
-    auto input = at::ones({1, 2, 3}).cuda();
-    auto output = input * 2;
-    ASSERT_TRUE(
-        output.allclose(model(at::IValue{input}).toIValue().toTensor()));
-  }
-}
-#endif
-
-// OSS build does not have bultin numpy support yet. Use this flag to guard the
-// test case.
-#if HAS_NUMPY
-TEST(TorchpyTest, TestNumpy) {
-  torch::deploy::InterpreterManager m(2);
-  auto noArgs = at::ArrayRef<torch::deploy::Obj>();
-  auto I = m.acquireOne();
-  auto mat35 = I.global("numpy", "random").attr("rand")({3, 5});
-  auto mat58 = I.global("numpy", "random").attr("rand")({5, 8});
-  auto mat38 = I.global("numpy", "matmul")({mat35, mat58});
-  EXPECT_EQ(2, mat38.attr("shape").attr("__len__")(noArgs).toIValue().toInt());
-  EXPECT_EQ(3, mat38.attr("shape").attr("__getitem__")({0}).toIValue().toInt());
-  EXPECT_EQ(8, mat38.attr("shape").attr("__getitem__")({1}).toIValue().toInt());
-}
-#endif
-
-#if HAS_PYYAML
-TEST(TorchpyTest, TestPyYAML) {
-  const std::string kDocument = "a: 1\n";
-
-  torch::deploy::InterpreterManager m(2);
-  auto I = m.acquireOne();
-
-  auto load = I.global("yaml", "load")({kDocument});
-  EXPECT_EQ(1, load.attr("__getitem__")({"a"}).toIValue().toInt());
-
-  auto dump = I.global("yaml", "dump")({load});
-  EXPECT_EQ(kDocument, dump.toIValue().toStringRef());
-}
-#endif
diff --git a/torch/csrc/deploy/test_deploy_lib.cpp b/torch/csrc/deploy/test_deploy_lib.cpp
deleted file mode 100644
index cac0b539c043..000000000000
--- a/torch/csrc/deploy/test_deploy_lib.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <torch/csrc/utils/pybind.h>
-#include <cstdint>
-#include <cstdio>
-#include <iostream>
-
-namespace py = pybind11;
-
-int foo_constructed = 0;
-int bar_constructed = 0;
-int bar_destructed = 0;
-
-struct Foo {
-  Foo() {
-    ++foo_constructed;
-  }
-  int v = -1;
-};
-
-Foo f;
-
-struct Bar {
-  Bar() {
-    ++bar_constructed;
-  }
-  ~Bar() {
-    ++bar_destructed;
-  }
-  int v = 14;
-};
-
-static thread_local int first = 1; // local TLS, probably offset 0
-static thread_local int second = 2; // local TLS, probably offset 4
-thread_local int bss_local; // local TLS, bss initialized so it probably comes
-                            // after the initialized stuff
-thread_local int third = 3; // local TLS, but extern declared so it will look
-                            // for the symbol third globally, but not find it
-static thread_local Bar bar; // local TLS, with a constructor that should run
-thread_local int
-    in_another_module; // non local TLS, this is defined in test_deploy.cpp
-
-struct MyError : public std::runtime_error {
-  using std::runtime_error::runtime_error;
-};
-
-bool raise_and_catch_exception(bool except) {
-  try {
-    if (except) {
-      throw MyError("yep");
-    }
-    return false;
-  } catch (MyError& c) {
-    return true;
-  }
-}
-bool raise_exception() {
-  throw MyError("yet"); // caught in test_deploy
-}
-
-bool check_initial_state() {
-  bool bv = bar.v == 14; // unless we reference bar it is unspecified whether it
-                         // should have been constructed
-  return bv && first == 1 && second == 2 && bss_local == 0 && third == 3 &&
-      bar_constructed == 1 && foo_constructed == 1 && bar_destructed == 0;
-}
-
-int get_in_another_module() {
-  return in_another_module;
-}
-
-void set_in_another_module(int x) {
-  in_another_module = x;
-}
-int get_bar() {
-  return bar.v;
-}
-void set_bar(int v) {
-  bar.v = v;
-}
-int get_bar_destructed() {
-  return bar_destructed;
-}
-
-int simple_add(int a, int b) {
-  return a + b;
-}
-
-PYBIND11_MODULE(libtest_deploy_lib, m) {
-  m.def("raise_and_catch_exception", raise_and_catch_exception);
-  m.def("raise_exception", raise_exception);
-  m.def("check_initial_state", check_initial_state);
-  m.def("get_in_another_module", get_in_another_module);
-  m.def("set_in_another_module", set_in_another_module);
-  m.def("get_bar", get_bar);
-  m.def("set_bar", set_bar);
-  m.def("get_bar_destructed", get_bar_destructed);
-  m.def("simple_add", simple_add);
-}
diff --git a/torch/csrc/deploy/test_deploy_missing_interpreter.cpp b/torch/csrc/deploy/test_deploy_missing_interpreter.cpp
deleted file mode 100644
index b47f4556ad78..000000000000
--- a/torch/csrc/deploy/test_deploy_missing_interpreter.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <gtest/gtest.h>
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/torch.h>
-
-int main(int argc, char* argv[]) {
-  ::testing::InitGoogleTest(&argc, argv);
-  int rc = RUN_ALL_TESTS();
-  return rc;
-}
-
-TEST(TorchDeployMissingInterpreter, Throws) {
-  // NOLINTNEXTLINE(hicpp-avoid-goto,cppcoreguidelines-avoid-goto)
-  EXPECT_THROW(torch::deploy::InterpreterManager(1), std::runtime_error);
-}
diff --git a/torch/csrc/deploy/test_deploy_python.py b/torch/csrc/deploy/test_deploy_python.py
deleted file mode 100644
index e32cd37cfacc..000000000000
--- a/torch/csrc/deploy/test_deploy_python.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# this is imported by test_deploy to do some checks in python
-import sys
-import subprocess
-from pathlib import Path
-
-# we've taken steps to clear out the embedded python environment,
-# so we have to go searching for real python to figure out where its libraries are installed.
-def python_path(cpath):
-    for maybe in cpath.split(':'):
-        candidate = Path(maybe) / "python"
-        if candidate.exists():
-            cmd = [str(candidate), '-c', 'import sys; print(":".join(sys.path))']
-            return subprocess.check_output(cmd).decode('utf-8').strip('\n').split(':')
-    raise RuntimeError('could not find real python')
-
-def setup(path):
-    sys.path.extend(python_path(path))
-    sys.path.append('build/lib')  # for our test python extension
-
-# smoke test the numpy extension loading works
-def numpy_test(x):
-    import numpy as np
-    xs = [np.array([x, x]), np.array([x, x])]
-    for i in range(10):
-        xs.append(xs[-1] + xs[-2])
-    return int(xs[-1][0])
diff --git a/torch/csrc/deploy/test_deploy_python_ext.cpp b/torch/csrc/deploy/test_deploy_python_ext.cpp
deleted file mode 100644
index 2c7748b6f46b..000000000000
--- a/torch/csrc/deploy/test_deploy_python_ext.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <caffe2/torch/csrc/deploy/deploy.h>
-#include <pybind11/pybind11.h>
-#include <torch/csrc/utils/pybind.h>
-#include <cstdint>
-#include <cstdio>
-#include <iostream>
-
-bool run() {
-  torch::deploy::InterpreterManager m(2);
-  m.registerModuleSource("check_none", "check = id(None)\n");
-  int64_t id0 = 0, id1 = 0;
-  {
-    auto I = m.allInstances()[0].acquireSession();
-    id0 = I.global("check_none", "check").toIValue().toInt();
-  }
-  {
-    auto I = m.allInstances()[1].acquireSession();
-    id1 = I.global("check_none", "check").toIValue().toInt();
-  }
-  return id0 != id1;
-}
-
-PYBIND11_MODULE(test_deploy_python_ext, m) {
-  m.def("run", run);
-}
diff --git a/torch/csrc/deploy/unity/example.py b/torch/csrc/deploy/unity/example.py
deleted file mode 100644
index 2236600899bb..000000000000
--- a/torch/csrc/deploy/unity/example.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import numpy as np
-import scipy
-from scipy import linalg
-
-print("Hello, torch::deploy unity!")
-print(f"np.random.rand(5): {np.random.rand(5)}")
-print(f"scipy {scipy}")
-mat_a = np.array([[1, 0, 0, 0], [1, 1, 0, 0], [1, 2, 1, 0], [1, 3, 3, 1]])
-mat_b = linalg.inv(mat_a)
-print(mat_b)
diff --git a/torch/csrc/deploy/unity/main.cpp b/torch/csrc/deploy/unity/main.cpp
deleted file mode 100644
index f6eb26f92f53..000000000000
--- a/torch/csrc/deploy/unity/main.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/csrc/deploy/unity/xar_environment.h>
-#include <memory>
-
-namespace torch {
-namespace deploy {
-
-// the way we lookup main module follows how an xar file is setup
-std::string lookupMainModule(InterpreterManager& m) {
-  auto I = m.acquireOne();
-  auto mainModule =
-      I.global("__manifest__", "fbmake").attr("get")({"main_module"});
-  std::ostringstream ss;
-  ss << mainModule.toIValue();
-  LOG(INFO) << "main module is " << ss.str();
-  return ss.str();
-}
-
-int doMain(int /* argc */, char** argv) {
-  std::shared_ptr<Environment> env = std::make_shared<XarEnvironment>(argv[0]);
-  InterpreterManager m(2, env);
-
-  auto mainModule = lookupMainModule(m);
-  auto I = m.acquireOne();
-  I.global("runpy", "run_module")({mainModule});
-  return 0;
-}
-
-} // namespace deploy
-} // namespace torch
-
-// NOLINTNEXTLINE(bugprone-exception-escape)
-int main(int argc, char** argv) {
-  return torch::deploy::doMain(argc, argv);
-}
diff --git a/torch/csrc/deploy/unity/tests/simple_model.py b/torch/csrc/deploy/unity/tests/simple_model.py
deleted file mode 100644
index 910d8c675127..000000000000
--- a/torch/csrc/deploy/unity/tests/simple_model.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import torch
-from torch import nn
-
-class SimpleModel(nn.Module):
-    def __init__(self):
-        super(SimpleModel, self).__init__()
-        self.fc = nn.Linear(256, 64)
-        self.fc2 = nn.Linear(64, 10)
-
-    def forward(self, X):
-        X = self.fc(X)
-        X = torch.relu(X)
-        X = self.fc2(X)
-        X = torch.softmax(X, dim=-1)
-        return X
diff --git a/torch/csrc/deploy/unity/tests/sum.py b/torch/csrc/deploy/unity/tests/sum.py
deleted file mode 100644
index 725ec26517af..000000000000
--- a/torch/csrc/deploy/unity/tests/sum.py
+++ /dev/null
@@ -1,5 +0,0 @@
-def func(*vlist):
-    return sum(vlist)
-
-import sys
-print("byebye!", file=sys.stderr)
diff --git a/torch/csrc/deploy/unity/tests/test_unity.h b/torch/csrc/deploy/unity/tests/test_unity.h
deleted file mode 100644
index d5b007980b09..000000000000
--- a/torch/csrc/deploy/unity/tests/test_unity.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#pragma once
-
-// NOLINTNEXTLINE
-static char TEST_PYTHON_APP_DIR_TEMP[] =
-    "/tmp/torch_deploy_unity_unittest_XXXXXX";
diff --git a/torch/csrc/deploy/unity/tests/test_unity_simple_model.cpp b/torch/csrc/deploy/unity/tests/test_unity_simple_model.cpp
deleted file mode 100644
index 3987340f190b..000000000000
--- a/torch/csrc/deploy/unity/tests/test_unity_simple_model.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include <fmt/format.h>
-#include <gtest/gtest.h>
-#include <torch/csrc/deploy/unity/tests/test_unity.h>
-#include <torch/csrc/deploy/unity/xar_environment.h>
-
-namespace torch {
-namespace deploy {
-
-const char* exePath = nullptr;
-
-TEST(UnityTest, TestUnitySimpleModel) {
-  // use a different path for unit test. Normally don't specify the path will
-  // use the default one.
-  mkdtemp(TEST_PYTHON_APP_DIR_TEMP);
-  std::shared_ptr<Environment> env =
-      std::make_shared<XarEnvironment>(exePath, TEST_PYTHON_APP_DIR_TEMP);
-  InterpreterManager m(2, env);
-
-  auto I = m.acquireOne();
-
-  auto noArgs = at::ArrayRef<Obj>();
-  auto input = I.global("torch", "randn")({32, 256});
-  auto model = I.global("simple_model", "SimpleModel")(noArgs);
-
-  auto output = model({input}); // implicitly calls model's forward method
-  EXPECT_EQ(2, output.attr("shape").attr("__len__")(noArgs).toIValue().toInt());
-  EXPECT_EQ(
-      32, output.attr("shape").attr("__getitem__")({0}).toIValue().toInt());
-  EXPECT_EQ(
-      10, output.attr("shape").attr("__getitem__")({1}).toIValue().toInt());
-}
-
-} // namespace deploy
-} // namespace torch
-
-int main(int argc, char** argv) {
-  torch::deploy::exePath = argv[0];
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/torch/csrc/deploy/unity/tests/test_unity_sum.cpp b/torch/csrc/deploy/unity/tests/test_unity_sum.cpp
deleted file mode 100644
index 6105c1158e30..000000000000
--- a/torch/csrc/deploy/unity/tests/test_unity_sum.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <fmt/format.h>
-#include <gtest/gtest.h>
-#include <torch/csrc/deploy/unity/tests/test_unity.h>
-#include <torch/csrc/deploy/unity/xar_environment.h>
-
-namespace torch {
-namespace deploy {
-
-const char* exePath = nullptr;
-
-TEST(UnityTest, TestUnitySum) {
-  // use a different path for unit test. Normally don't specify the path will
-  // use the default one.
-  mkdtemp(TEST_PYTHON_APP_DIR_TEMP);
-  std::shared_ptr<Environment> env =
-      std::make_shared<XarEnvironment>(exePath, TEST_PYTHON_APP_DIR_TEMP);
-  InterpreterManager m(2, env);
-
-  auto I = m.acquireOne();
-  auto result = I.global("sum", "func")({1, 2, 3, 4});
-  EXPECT_EQ(10, result.toIValue().toInt());
-}
-
-} // namespace deploy
-} // namespace torch
-
-int main(int argc, char** argv) {
-  torch::deploy::exePath = argv[0];
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/torch/csrc/deploy/unity/unity.bzl b/torch/csrc/deploy/unity/unity.bzl
deleted file mode 100644
index 8431356a4df9..000000000000
--- a/torch/csrc/deploy/unity/unity.bzl
+++ /dev/null
@@ -1,46 +0,0 @@
-load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
-load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
-load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
-
-# @lint-ignore-every BUCKLINT
-load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
-
-def build_unity(name, **kwargs):
-    python_binary(name = name, **kwargs)
-
-    cxx_genrule(
-        name = "{}_build_python_app_lib".format(name),
-        out = "python_app.a",
-        cmd = """\
-        cp $(location :""" + name + """) python_app
-        ld -r -b binary -o ${TMP}/python_app.o python_app
-        # rename the .data section to .torch_deploy_payload.unity.
-        # don't set the alloc/load flags for the section so it will not join
-        # the party of relocation.
-        # Also strip the _binary_python_app_start/end/size symbols to avoid
-        # confusion.
-        objcopy --rename-section .data=.torch_deploy_payload.unity,readonly,contents -N  _binary_python_app_start -N  _binary_python_app_end -N  _binary_python_app_size ${TMP}/python_app.o
-        ar rcs ${OUT} ${TMP}/python_app.o
-        """,
-    )
-
-    fb_native.prebuilt_cxx_library(
-        name = "{}_python_app_lib".format(name),
-        visibility = ["PUBLIC"],
-        link_whole = True,
-        preferred_linkage = "static",
-        static_lib = ":{}_build_python_app_lib".format(name),
-    )
-
-    cpp_library(
-        name = "{}_unity_lib".format(name),
-        srcs = [
-        ],
-        linker_flags = [
-            "--export-dynamic",
-        ],
-        exported_deps = [
-            "//caffe2/torch/csrc/deploy/unity:unity_core",
-            ":{}_python_app_lib".format(name),
-        ],
-    )
diff --git a/torch/csrc/deploy/unity/xar_environment.cpp b/torch/csrc/deploy/unity/xar_environment.cpp
deleted file mode 100644
index 4bb764374525..000000000000
--- a/torch/csrc/deploy/unity/xar_environment.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-#include <dirent.h>
-#include <dlfcn.h>
-#include <fmt/format.h>
-#include <sys/stat.h>
-#include <torch/csrc/deploy/Exception.h>
-#include <torch/csrc/deploy/elf_file.h>
-#include <torch/csrc/deploy/unity/xar_environment.h>
-
-namespace torch {
-namespace deploy {
-
-XarEnvironment::XarEnvironment(std::string exePath, std::string pythonAppDir)
-    : exePath_(std::move(exePath)),
-      pythonAppDir_(std::move(pythonAppDir)),
-      pythonAppRoot_(pythonAppDir_ + "/python_app_root") {
-  setupPythonApp();
-  preloadSharedLibraries();
-}
-
-// NOLINTNEXTLINE(modernize-use-equals-default)
-XarEnvironment::~XarEnvironment() {
-  // We should delete the pythonAppDir_ here. However if we did that, the
-  // next time we run the executable, we will get issue to load shared
-  // libraries since the path we add to LD_LIBRARY_PATH does not exist yet.
-  // Also the pythonAppDir_ will anyway be re-created the next time we run the
-  // executable.
-  //
-  // Keep the teardown step a noop for now.
-}
-
-void XarEnvironment::configureInterpreter(Interpreter* interp) {
-  auto I = interp->acquireSession();
-  I.global("sys", "path").attr("append")({pythonAppRoot_});
-}
-
-/*
- * I can not use std::filesystem since that's added in C++17 and clang-tidy
- * seems using a older version of C++ and can not find it.
- *
- * Create a small utility to check the existence of a directory instead.
- */
-bool _dirExists(const std::string& dirPath) {
-  DIR* dir = opendir(dirPath.c_str());
-  if (dir) {
-    closedir(dir);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-bool _fileExists(const std::string& filePath) {
-  FILE* fp = fopen(filePath.c_str(), "rb");
-  if (fp) {
-    fclose(fp);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void XarEnvironment::setupPythonApp() {
-  MULTIPY_CHECK(
-      !alreadySetupPythonApp_,
-      "Already setup the python application. It should only been done once!");
-
-  // must match the section name specified in unity.bzl
-  constexpr const char* SECTION_NAME = ".torch_deploy_payload.unity";
-  ElfFile elfFile(exePath_.c_str());
-  auto payloadSection = elfFile.findSection(SECTION_NAME);
-  MULTIPY_CHECK(
-      payloadSection != multipy::nullopt, "Missing the payload section");
-  const char* pythonAppPkgStart = payloadSection->start;
-  auto pythonAppPkgSize = payloadSection->len;
-  LOG(INFO) << "Embedded binary size " << pythonAppPkgSize;
-
-  /*
-   * [NOTE about LD_LIBRARY_PATH]
-   * Some python applications uses python extensions that depends on shared
-   * libraries in the XAR file. E.g., scipy depends on MKL libraries shipped
-   * with the XAR. For those cases, we need ensure 2 things before running the
-   * executable:
-   * 1, make sure the path /tmp/torch_deploy_python_app/python_app_root exists.
-   * 2, add /tmp/torch_deploy_python_app/python_app_root to the LD_LIBRRY_PATH.
-   *
-   * If either condition is not met, we fail to load the dependent shared
-   * libraries in the XAR file.
-   *
-   * There are simple cases though. If the use case only relies on the libraries
-   * built into torch::deploy like torch, numpy, pyyaml etc., or if the
-   * extensions used does not rely on extra shared libraries in the XAR file,
-   * then neither of the prerequisites need to be met.
-   *
-   * We used to fatal if the path is not preexisted. But to make (stress)
-   * unit-test and other simple uses cases easier, we change it to a warning. If
-   * you encounter shared library not found issue, it's likely that your use
-   * case are the aforementioned complex case, make sure the two prerequisites
-   * are met and run again.
-   */
-  LOG_IF(WARNING, !_dirExists(pythonAppRoot_))
-      << "The python app root " << pythonAppRoot_ << " does not exists before "
-      << " running the executable. If you encounter shared libraries not found "
-      << " issue, try create the directory and run the executable again. Check "
-      << "the note in the code for more details";
-
-  /*
-   * NOTE: we remove the pythonAppDir_ below. Anything under it will be gone.
-   * Normally the directory just contains potential stuff left over from the
-   * past runs. It should be pretty safe to discard them.
-   */
-  std::string rmCmd = fmt::format("rm -rf {}", pythonAppDir_);
-  MULTIPY_CHECK(system(rmCmd.c_str()) == 0, "Fail to remove the directory.");
-
-  // recreate the directory
-  auto r = mkdir(pythonAppDir_.c_str(), 0777);
-  MULTIPY_CHECK(r == 0, "Failed to create directory: " + strerror(errno));
-
-  std::string pythonAppArchive = std::string(pythonAppDir_) + "/python_app.xar";
-  auto fp = fopen(pythonAppArchive.c_str(), "wb");
-  MULTIPY_CHECK(fp != nullptr, "Fail to create file: " + strerror(errno));
-  auto written = fwrite(pythonAppPkgStart, 1, pythonAppPkgSize, fp);
-  MULTIPY_CHECK(written == pythonAppPkgSize, "Expected written == size");
-  fclose(fp);
-
-  std::string extractCommand = fmt::format(
-      "unsquashfs -o 4096 -d {} {}", pythonAppRoot_, pythonAppArchive);
-  r = system(extractCommand.c_str());
-  MULTIPY_CHECK(
-      r == 0,
-      "Fail to extract the python package" + std::to_string(r) +
-          extractCommand.c_str());
-
-  alreadySetupPythonApp_ = true;
-}
-
-void XarEnvironment::preloadSharedLibraries() {
-  // preload the following libraries since the CustomLoader has some limitations
-  // 1. CustomLoader can not find the correct order to loader them
-  // 2. CustomLoader use RTLD_LOCAL so the symbol defined in one lib can not be
-  // used by another
-  std::array<const char*, 3> preloadList = {
-      "libmkl_core.so", "libmkl_intel_thread.so", nullptr};
-  for (int i = 0; preloadList[i]; ++i) {
-    // only preload the library if it exists in pythonAppRoot_
-    auto path = pythonAppRoot_ + "/" + preloadList[i];
-    if (!_fileExists(path)) {
-      LOG(INFO) << "The preload library " << preloadList[i]
-                << " does not exist in the python app root, skip loading it";
-      continue;
-    }
-    MULTIPY_CHECK(
-        dlopen(preloadList[i], RTLD_GLOBAL | RTLD_LAZY) != nullptr,
-        "Fail to open the shared library " + preloadList[i] + ": " + dlerror());
-  }
-}
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/deploy/unity/xar_environment.h b/torch/csrc/deploy/unity/xar_environment.h
deleted file mode 100644
index 446b4a09ecea..000000000000
--- a/torch/csrc/deploy/unity/xar_environment.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include <torch/csrc/deploy/deploy.h>
-#include <torch/csrc/deploy/environment.h>
-#include <string>
-
-namespace torch {
-namespace deploy {
-
-constexpr const char* DEFAULT_PYTHON_APP_DIR = "/tmp/torch_deploy_python_app";
-
-class XarEnvironment : public Environment {
- public:
-  explicit XarEnvironment(
-      std::string exePath,
-      std::string pythonAppDir = DEFAULT_PYTHON_APP_DIR);
-  ~XarEnvironment() override;
-  void configureInterpreter(Interpreter* interp) override;
-
- private:
-  void setupPythonApp();
-  void preloadSharedLibraries();
-
-  std::string exePath_;
-  std::string pythonAppDir_;
-  std::string pythonAppRoot_;
-  bool alreadySetupPythonApp_ = false;
-};
-
-} // namespace deploy
-} // namespace torch
diff --git a/torch/csrc/lazy/python/init.cpp b/torch/csrc/lazy/python/init.cpp
index aaee72a012c6..2d421a3eb2ae 100644
--- a/torch/csrc/lazy/python/init.cpp
+++ b/torch/csrc/lazy/python/init.cpp
@@ -305,15 +305,6 @@ void initLazyBindings(PyObject* module) {
 #endif // !(defined(FBCODE_CAFFE2) || defined(OVRSOURCE))
         return result;
       });
-
-#ifndef USE_DEPLOY
-  // When libtorch_python is loaded, we register the python frame getter
-  // otherwise, debug util simply omits python frames
-  // TODO(whc) can we make this work inside torch deploy interpreter?
-  // it doesn't work as-is, possibly becuase GetPythonFrames resolves to
-  // external cpython rather than embedded cpython
-  GetPythonFramesFunction() = GetPythonFrames;
-#endif
 }
 
 } // namespace lazy
diff --git a/torch/deploy.h b/torch/deploy.h
deleted file mode 100644
index 87338adaba1d..000000000000
--- a/torch/deploy.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#pragma once
-
-#include <torch/csrc/deploy/deploy.h>