macOS M1平台下编译使用LibTorch

Posted on 2023-03-17 In Tech , Mac Views: Disqus:

For macOS

Building libTorch using CMake ¹

git clone -b master --recurse-submodule https://github.com/pytorch/pytorch.git
cd pytorch
git checkout nightly
mkdir libtorch_build
cd libtorch_build
cmake -D BUILD_SHARED_LIBS:BOOL=ON \
      -D CMAKE_BUILD_TYPE:STRING=Release \
      -D PYTHON_EXECUTABLE:PATH=`which python3` \
      -D BUILD_PYTHON=OFF \
      -D USE_CUDA=OFF \
      -D USE_PYTORCH_METAL_EXPORT=ON \
      -D USE_OPENCV=ON \
      -D BUILD_CUSTOM_PROTOBUF=OFF \
      -D CMAKE_INSTALL_PREFIX:PATH=../pytorch-install  \
      ..

-- 
-- ******** Summary ********
-- General:
--   CMake version         : 3.24.2
--   CMake command         : /opt/homebrew/Cellar/cmake/3.24.2/bin/cmake
--   System                : Darwin
--   C++ compiler          : /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/c++
--   C++ compiler id       : AppleClang
--   C++ compiler version  : 14.0.0.14000029
--   Using ccache if found : ON
--   Found ccache          : CCACHE_PROGRAM-NOTFOUND
--   CXX flags             :  -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_PYTORCH_METAL_EXPORT -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=braced-scalar-init -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wvla-extension -Wno-range-loop-analysis -Wno-pass-failed -Wsuggest-override -Wno-error=pedantic -Wno-error=old-style-cast -Wconstant-conversion -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-missing-braces -Wunused-lambda-capture -Qunused-arguments -fcolor-diagnostics -fdiagnostics-color=always -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -DUSE_MPS -fno-objc-arc -Wno-unguarded-availability-new -Wno-unused-private-field -Wno-missing-braces
--   Build type            : Release
--   Compile definitions   : ONNX_ML=1;ONNXIFI_ENABLE_EXT=1;ONNX_NAMESPACE=onnx_torch;HAVE_MMAP=1;_FILE_OFFSET_BITS=64;HAVE_SHM_OPEN=1;HAVE_SHM_UNLINK=1;USE_EXTERNAL_MZCRC;MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
--   CMAKE_PREFIX_PATH     : 
--   CMAKE_INSTALL_PREFIX  : ../pytorch-install
--   USE_GOLD_LINKER       : OFF
-- 
--   TORCH_VERSION         : 2.1.0
--   CAFFE2_VERSION        : 2.1.0
--   BUILD_CAFFE2          : OFF
--   BUILD_CAFFE2_OPS      : OFF
--   BUILD_STATIC_RUNTIME_BENCHMARK: OFF
--   BUILD_TENSOREXPR_BENCHMARK: OFF
--   BUILD_NVFUSER_BENCHMARK: OFF
--   BUILD_BINARY          : OFF
--   BUILD_CUSTOM_PROTOBUF : OFF
--     Protobuf compiler   : 
--     Protobuf includes   : 
--     Protobuf libraries  : 
--   BUILD_DOCS            : OFF
--   BUILD_PYTHON          : OFF
--   BUILD_SHARED_LIBS     : ON
--   CAFFE2_USE_MSVC_STATIC_RUNTIME     : OFF
--   BUILD_TEST            : OFF
--   BUILD_JNI             : OFF
--   BUILD_MOBILE_AUTOGRAD : OFF
--   BUILD_LITE_INTERPRETER: OFF
--   CROSS_COMPILING_MACOSX : 
--   INTERN_BUILD_MOBILE   : 
--   TRACING_BASED         : OFF
--   USE_BLAS              : 1
--     BLAS                : accelerate
--     BLAS_HAS_SBGEMM     : 
--   USE_LAPACK            : 1
--     LAPACK              : accelerate
--   USE_ASAN              : OFF
--   USE_TSAN              : OFF
--   USE_CPP_CODE_COVERAGE : OFF
--   USE_CUDA              : OFF
--   USE_ROCM              : OFF
--   BUILD_NVFUSER         : OFF
--   USE_EIGEN_FOR_BLAS    : ON
--   USE_FBGEMM            : OFF
--     USE_FAKELOWP          : OFF
--   USE_KINETO            : ON
--   USE_FFMPEG            : OFF
--   USE_GFLAGS            : OFF
--   USE_GLOG              : OFF
--   USE_LEVELDB           : OFF
--   USE_LITE_PROTO        : OFF
--   USE_LMDB              : OFF
--   USE_METAL             : OFF
--   USE_PYTORCH_METAL     : OFF
--   USE_PYTORCH_METAL_EXPORT     : ON
--   USE_MPS               : ON
--   USE_FFTW              : ON
--   USE_MKL               : OFF
--   USE_MKLDNN            : OFF
--   USE_UCC               : OFF
--   USE_ITT               : OFF
--   USE_NCCL              : OFF
--   USE_NNPACK            : ON
--   USE_NUMPY             : ON
--   USE_OBSERVERS         : OFF
--   USE_OPENCL            : OFF
--   USE_OPENCV            : ON
--     OpenCV version      : 4.7.0
--   USE_OPENMP            : OFF
--   USE_TBB               : OFF
--   USE_VULKAN            : OFF
--   USE_PROF              : OFF
--   USE_QNNPACK           : OFF
--   USE_PYTORCH_QNNPACK   : ON
--   USE_XNNPACK           : ON
--   USE_REDIS             : OFF
--   USE_ROCKSDB           : OFF
--   USE_ZMQ               : OFF
--   USE_DISTRIBUTED       : OFF
--   Public Dependencies  : 
--   Private Dependencies : Threads::Threads;pthreadpool;cpuinfo;pytorch_qnnpack;nnpack;XNNPACK;opencv_core;opencv_highgui;opencv_imgproc;opencv_imgcodecs;opencv_optflow;opencv_videoio;opencv_video;fp16;foxi_loader;fmt::fmt-header-only;kineto
--   Public CUDA Deps.    : 
--   Private CUDA Deps.   : 
--   USE_COREML_DELEGATE     : OFF
--   BUILD_LAZY_TS_BACKEND   : ON
--   TORCH_DISABLE_GPU_ASSERTS : OFF
-- Configuring done
-- Generating done
-- Build files have been written to: /Users/luohanjie/Softwares/pytorch/libtorch_build

注意系统已有protobuf可能会导致编译错误²。

1	cmake --build . --target install --parallel 20

CMake测试程序 ³

set(TORCH_SRC /Users/luohanjie/Softwares/pytorch/pytorch-install)

set(TORCH_INCLUDE_DIRS ${TORCH_SRC}/include/torch/csrc/api/include ${TORCH_SRC}/include)
file(GLOB TORCH_LIBS ${TORCH_SRC}/lib/*.dylib ${TORCH_SRC}/lib/*.a)

message(${TORCH_INCLUDE_DIRS})

include_directories(${TORCH_INCLUDE_DIRS} )

add_executable(test_libtorch test_libtorch.cpp)
target_link_libraries(test_libtorch ${TORCH_LIBS})

程序1:

#include <torch/torch.h>
#include <iostream>

int main(int argc, char* argv[]) {
  std::cout<<"MPS? "<<torch::mps::is_available()<<std::endl;

  torch::Tensor tensor = torch::rand({2, 3}).to("mps");;
  std::cout << tensor << std::endl;
}

输出：

MPS? 1
 0.1982  0.2995  0.5541
 0.4153  0.2684  0.4655
[ MPSFloatType{2,3} ]

程序2:

#include <time.h>
#include <torch/torch.h>
#include <iostream>

#define USE_MPS 1

using namespace std;

struct Net : torch::nn::Module {
    Net() {
        conv1 = register_module("conv1", torch::nn::Conv2d(3, 64, 3));
        conv2 = register_module("conv2", torch::nn::Conv2d(64, 128, 3));
        conv3 = register_module("conv3", torch::nn::Conv2d(128, 256, 3));
        fc1 = register_module("fc1", torch::nn::Linear(256, 128));
        fc2 = register_module("fc2", torch::nn::Linear(128, 56));
        fc3 = register_module("fc3", torch::nn::Linear(56, 10));
        global_pool = register_module("global_pool", torch::nn::AdaptiveAvgPool2d(1));
    }

    torch::Tensor forward(torch::Tensor x) {
        x = torch::relu(conv1->forward(x));
        x = torch::max_pool2d(x, {2, 2});
        x = torch::relu(conv2->forward(x));
        x = torch::max_pool2d(x, {2, 2});
        x = torch::relu(conv3->forward(x));
        x = torch::max_pool2d(x, {2, 2});
        x = global_pool->forward(x);
        x = torch::relu(fc1->forward(x.reshape({x.size(0), -1})));
        x = torch::relu(fc2->forward(x));
        x = torch::log_softmax(fc3->forward(x), 1);

        return x;
    }

    torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr};
    torch::nn::Conv2d conv1{nullptr}, conv2{nullptr}, conv3{nullptr};
    torch::nn::AdaptiveAvgPool2d global_pool{nullptr};
};

int main(int argc, char* argv[]) {
    auto net = std::make_shared<Net>();
    torch::Tensor data = torch::ones({8, 3, 128, 128});

#ifdef USE_MPS
    net->to(torch::Device(torch::kMPS));
    data = data.to("mps");
    // torch::Tensor data = torch::ones({8, 3, 128, 128}).to("mps");
#endif

    torch::Tensor y;
    clock_t start, end;
    start = clock();
    for (int i = 0; i < 100; ++i) {
        y = net->forward(data);
    }
    end = clock();
    cout << "Time: " << double(end - start) / CLOCKS_PER_SEC << endl;

    return 0;
}

Device	Time
CPU	15.36
MPS	0.2671

生成TorchScript ⁴ ⁵

A PyTorch model’s journey from Python to C++ is enabled by Torch Script, a representation of a PyTorch model that can be understood, compiled and serialized by the Torch Script compiler.

import torch
import torchvision

# An instance of your model.
model = torchvision.models.resnet18()

# An example input you would normally provide to your model's forward() method.
example = torch.rand(1, 3, 224, 224)

# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, example)

# full jit version model (not compatible mobile interpreter)
traced_script_module.save("traced_resnet_model.pt")

# Export mobile interpreter version model (compatible with mobile interpreter)
from torch.utils.mobile_optimizer import optimize_for_mobile
traced_script_module_lite = optimize_for_mobile(traced_script_module)
traced_script_module_lite._save_for_lite_interpreter("traced_resnet_model_lite.pt")

By default, for the CPU backend, optimize_for_mobile performs the following types of optimizations: * Conv2D and BatchNorm fusion which folds Conv2d-BatchNorm2d into Conv2d; * Insert and fold prepacked ops which rewrites the model graph to replace 2D convolutions and linear ops with their prepacked counterparts. * ReLU and hardtanh fusion which rewrites graph by finding ReLU/hardtanh ops and fuses them together. * Dropout removal which removes dropout nodes from this module when training is false. * Conv packed params hoisting which moves convolution packed params to the root module, so that the convolution structs can be deleted. This decreases model size without impacting numerics.

For the Vulkan backend,optimize_for_mobile performs the following type of optimization: * Automatic GPU transfer which rewrites the graph so that moving input and output data to and from the GPU becomes part of the model.

Optimization types can be disabled by passing an optimization blocklist as an argument to optimize_for_mobile.

c++中读取TorchScript并Inference

#include <torch/script.h> // One-stop header.

#include <iostream>
#include <memory>

int main(int argc, const char* argv[]) {
  if (argc != 2) {
    std::cerr << "usage: example-app <path-to-exported-script-module>\n";
    return -1;
  }
  
  torch::jit::script::Module module;
  try {
    // Deserialize the ScriptModule from a file using torch::jit::load().
    module = torch::jit::load(argv[1]);
  }
  catch (const c10::Error& e) {
    std::cerr << "error loading the model\n";
    return -1;
  }

  std::cout << "ok\n";
}

PyTorch to ONNX

1	conda install onnx

import torch
import torchvision

dummy_input = torch.randn(10, 3, 224, 224, device="cpu")
model = torchvision.models.alexnet(pretrained=True).cpu()

# Providing input and output names sets the display names for values
# within the model's graph. Setting these does not change the semantics
# of the graph; it is only for readability.
#
# The inputs to the network consist of the flat list of inputs (i.e.
# the values you would pass to the forward() method) followed by the
# flat list of parameters. You can partially specify names, i.e. provide
# a list here shorter than the number of inputs to the model, and we will
# only set that subset of names, starting from the beginning.
input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

torch.onnx.export(model, dummy_input, "alexnet.onnx", verbose=True, input_names=input_names, output_names=output_names)

测试：

import onnx
import onnxruntime as ort
import numpy as np

# Load the ONNX model
model = onnx.load("alexnet.onnx")

# Check that the model is well formed
onnx.checker.check_model(model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(model.graph))


ort_session = ort.InferenceSession("alexnet.onnx")

outputs = ort_session.run(
    None,
    {"actual_input_1": np.random.randn(10, 3, 224, 224).astype(np.float32)},
)
print(outputs[0])

For Android

Cross Compiling for Android NDK

修改/Users/luohanjie/Softwares/pytorch/scripts/build_android.sh:

1	CMAKE_ARGS+=("-DBUILD_SHARED_LIBS=ON")

brew install automake libtool

cd pytorch
git checkout nightly
export ANDROID_NDK=/Users/luohanjie/Library/Android/sdk/ndk/21.4.7075529
export ANDROID_TOOLCHAIN=clang
export ANDROID_ABI=arm64-v8a
export BUILD_SHARED_LIBS=ON
export PYTHON_EXECUTABLE=`which python3`
sh scripts/build_android.sh

-- ******** Summary ********
-- General:
--   CMake version         : 3.24.2
--   CMake command         : /opt/homebrew/Cellar/cmake/3.24.2/bin/cmake
--   System                : Android
--   C++ compiler          : /Users/luohanjie/Library/Android/sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/darwin-x86_64/bin/clang++
--   C++ compiler id       : Clang
--   C++ compiler version  : 9.0
--   Using ccache if found : ON
--   Found ccache          : CCACHE_PROGRAM-NOTFOUND
--   CXX flags             : -g -DANDROID -fdata-sections -ffunction-sections -funwind-tables -fstack-protector-strong -no-canonical-prefixes -D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security -frtti -fexceptions  -ffunction-sections -fdata-sections -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DUSE_VULKAN_WRAPPER -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN -DUSE_VULKAN_API -DBUILD_LITE_INTERPRETER -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=braced-scalar-init -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wvla-extension -Wno-range-loop-analysis -Wno-pass-failed -Wno-error=pedantic -Wno-error=old-style-cast -Wconstant-conversion -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-missing-braces -Wunused-lambda-capture -Qunused-arguments -fcolor-diagnostics -fdiagnostics-color=always -fno-math-errno -fno-trapping-math -Werror=format -g0
--   Build type            : Release
--   Compile definitions   : 
--   CMAKE_PREFIX_PATH     : /opt/homebrew/Caskroom/miniforge/base/envs/tf/lib/python3.10/site-packages;/Users/luohanjie/Library/Android/sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/darwin-x86_64
--   CMAKE_INSTALL_PREFIX  : /Users/luohanjie/Softwares/pytorch/build_android/install
--   USE_GOLD_LINKER       : OFF
-- 
--   TORCH_VERSION         : 2.1.0
--   CAFFE2_VERSION        : 2.1.0
--   BUILD_CAFFE2          : OFF
--   BUILD_CAFFE2_OPS      : OFF
--   BUILD_STATIC_RUNTIME_BENCHMARK: OFF
--   BUILD_TENSOREXPR_BENCHMARK: OFF
--   BUILD_NVFUSER_BENCHMARK: OFF
--   BUILD_BINARY          : OFF
--   BUILD_CUSTOM_PROTOBUF : OFF
--     Protobuf compiler   : 
--     Protobuf includes   : 
--     Protobuf libraries  : 
--   BUILD_DOCS            : OFF
--   BUILD_PYTHON          : OFF
--   BUILD_SHARED_LIBS     : ON
--   CAFFE2_USE_MSVC_STATIC_RUNTIME     : OFF
--   BUILD_TEST            : OFF
--   BUILD_JNI             : OFF
--   BUILD_MOBILE_AUTOGRAD : OFF
--   BUILD_LITE_INTERPRETER: ON
--   INTERN_BUILD_MOBILE   : ON
--   TRACING_BASED         : OFF
--   USE_BLAS              : 1
--     BLAS                : 
--     BLAS_HAS_SBGEMM     : 
--   USE_LAPACK            : 0
--   USE_ASAN              : OFF
--   USE_TSAN              : OFF
--   USE_CPP_CODE_COVERAGE : OFF
--   USE_CUDA              : OFF
--   USE_ROCM              : OFF
--   BUILD_NVFUSER         : OFF
--   USE_EIGEN_FOR_BLAS    : ON
--   USE_FBGEMM            : OFF
--     USE_FAKELOWP          : OFF
--   USE_KINETO            : ON
--   USE_FFMPEG            : OFF
--   USE_GFLAGS            : OFF
--   USE_GLOG              : OFF
--   USE_LEVELDB           : OFF
--   USE_LITE_PROTO        : OFF
--   USE_LMDB              : OFF
--   USE_METAL             : OFF
--   USE_PYTORCH_METAL     : OFF
--   USE_PYTORCH_METAL_EXPORT     : OFF
--   USE_MPS               : OFF
--   USE_FFTW              : OFF
--   USE_MKL               : 
--   USE_MKLDNN            : OFF
--   USE_UCC               : OFF
--   USE_ITT               : OFF
--   USE_NCCL              : OFF
--   USE_NNPACK            : ON
--   USE_NUMPY             : ON
--   USE_OBSERVERS         : OFF
--   USE_OPENCL            : OFF
--   USE_OPENCV            : OFF
--   USE_OPENMP            : OFF
--   USE_TBB               : OFF
--   USE_VULKAN            : ON
--     USE_VULKAN_FP16_INFERENCE    : OFF
--     USE_VULKAN_RELAXED_PRECISION : OFF
--   USE_PROF              : OFF
--   USE_QNNPACK           : OFF
--   USE_PYTORCH_QNNPACK   : ON
--   USE_XNNPACK           : ON
--   USE_REDIS             : OFF
--   USE_ROCKSDB           : OFF
--   USE_ZMQ               : OFF
--   USE_DISTRIBUTED       : OFF
--   Public Dependencies  : 
--   Private Dependencies : Threads::Threads;eigen_blas;pthreadpool;cpuinfo;pytorch_qnnpack;nnpack;XNNPACK;VulkanWrapper;fp16;log;fmt::fmt-header-only;kineto;dl
--   Public CUDA Deps.    : 
--   Private CUDA Deps.   : 
--   USE_COREML_DELEGATE     : OFF
--   BUILD_LAZY_TS_BACKEND   : OFF
--   TORCH_DISABLE_GPU_ASSERTS : OFF
-- Configuring done
-- Generating done
-- Build files have been written to: /Users/luohanjie/Softwares/pytorch/build_android

Building PyTorch with Vulkan ⁶

Build PyTorch

PyTorch supports the ability to run model inference on GPUs that support the Vulkan graphics and compute API. The primary target devices are mobile GPUs on Android devices. Vulkan backend is not included by default. The main switch to include Vulkan backend is cmake option USE_VULKAN, that can be set by environment variable USE_VULKAN. To use PyTorch with Vulkan backend, we need to build it from source with additional settings.

下载Vulkan Sdk，双击并且安装。

卸载方法：sudo path_to_vulkan_sdk/uninstall.sh。

vulkaninfo

==========
VULKANINFO
==========

Vulkan Instance Version: 1.3.239
...

Build PyTorch with Vulkan:

conda create --name pytorch_vulkan python=3.10
conda activate pytorch_vulkan

conda install pkg-config libuv pyyaml typing-extensions sympy
brew install google-benchmark

cd PYTORCH_ROOT
git checkout nightly
git submodule update --recursive --remote

修改CMakeLists.txt：

option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" OFF)

option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." ON)

#添加
string(APPEND CMAKE_CXX_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY")

修改c10/CMakeLists.txt：

1	#add_subdirectory(benchmark)

export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
USE_VULKAN=1 USE_VULKAN_SHADERC_RUNTIME=1 USE_VULKAN_WRAPPER=0 MACOSX_DEPLOYMENT_TARGET=10.11 CC=clang CXX=clang++ python setup.py install


```bash
-- ******** Summary ********
-- General:
--   CMake version         : 3.24.2
--   CMake command         : /opt/homebrew/Cellar/cmake/3.24.2/bin/cmake
--   System                : Darwin
--   C++ compiler          : /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang++
--   C++ compiler id       : AppleClang
--   C++ compiler version  : 14.0.0.14000029
--   Using ccache if found : ON
--   Found ccache          : CCACHE_PROGRAM-NOTFOUND
--   CXX flags             :  -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN -DUSE_VULKAN_API -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -D_LIBCPP_DISABLE_AVAILABILITY -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=braced-scalar-init -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wvla-extension -Wno-range-loop-analysis -Wno-pass-failed -Wsuggest-override -Wno-error=pedantic -Wno-error=old-style-cast -Wconstant-conversion -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-missing-braces -Wunused-lambda-capture -Qunused-arguments -fcolor-diagnostics -fdiagnostics-color=always -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -DUSE_MPS -fno-objc-arc -Wno-unguarded-availability-new -Wno-unused-private-field -Wno-missing-braces
--   Build type            : Release
--   Compile definitions   : ONNX_ML=1;ONNXIFI_ENABLE_EXT=1;ONNX_NAMESPACE=onnx_torch;HAVE_MMAP=1;_FILE_OFFSET_BITS=64;HAVE_SHM_OPEN=1;HAVE_SHM_UNLINK=1;USE_EXTERNAL_MZCRC;MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
--   CMAKE_PREFIX_PATH     : /opt/homebrew/Caskroom/miniforge/base/envs/pytorch_vulkan/lib/python3.10/site-packages;/opt/homebrew/Caskroom/miniforge/base/envs/pytorch_vulkan
--   CMAKE_INSTALL_PREFIX  : /Users/luohanjie/Softwares/pytorch/torch
--   USE_GOLD_LINKER       : OFF
-- 
--   TORCH_VERSION         : 2.1.0
--   CAFFE2_VERSION        : 2.1.0
--   BUILD_CAFFE2          : OFF
--   BUILD_CAFFE2_OPS      : OFF
--   BUILD_STATIC_RUNTIME_BENCHMARK: OFF
--   BUILD_TENSOREXPR_BENCHMARK: OFF
--   BUILD_NVFUSER_BENCHMARK: OFF
--   BUILD_BINARY          : OFF
--   BUILD_CUSTOM_PROTOBUF : OFF
--     Protobuf compiler   : 
--     Protobuf includes   : 
--     Protobuf libraries  : 
--   BUILD_DOCS            : OFF
--   BUILD_PYTHON          : True
--     Python version      : 3.10.9
--     Python executable   : /opt/homebrew/Caskroom/miniforge/base/envs/pytorch_vulkan/bin/python3
--     Pythonlibs version  : 3.10.9
--     Python library      : /opt/homebrew/Caskroom/miniforge/base/envs/pytorch_vulkan/lib/libpython3.10.a
--     Python includes     : /opt/homebrew/Caskroom/miniforge/base/envs/pytorch_vulkan/include/python3.10
--     Python site-packages: lib/python3.10/site-packages
--   BUILD_SHARED_LIBS     : ON
--   CAFFE2_USE_MSVC_STATIC_RUNTIME     : OFF
--   BUILD_TEST            : True
--   BUILD_JNI             : OFF
--   BUILD_MOBILE_AUTOGRAD : OFF
--   BUILD_LITE_INTERPRETER: OFF
--   CROSS_COMPILING_MACOSX : 
--   INTERN_BUILD_MOBILE   : 
--   TRACING_BASED         : OFF
--   USE_BLAS              : 1
--     BLAS                : accelerate
--     BLAS_HAS_SBGEMM     : 
--   USE_LAPACK            : 1
--     LAPACK              : accelerate
--   USE_ASAN              : OFF
--   USE_TSAN              : OFF
--   USE_CPP_CODE_COVERAGE : OFF
--   USE_CUDA              : OFF
--   USE_ROCM              : OFF
--   BUILD_NVFUSER         : OFF
--   USE_EIGEN_FOR_BLAS    : ON
--   USE_FBGEMM            : OFF
--     USE_FAKELOWP          : OFF
--   USE_KINETO            : ON
--   USE_FFMPEG            : OFF
--   USE_GFLAGS            : OFF
--   USE_GLOG              : OFF
--   USE_LEVELDB           : OFF
--   USE_LITE_PROTO        : OFF
--   USE_LMDB              : OFF
--   USE_METAL             : OFF
--   USE_PYTORCH_METAL     : OFF
--   USE_PYTORCH_METAL_EXPORT     : OFF
--   USE_MPS               : ON
--   USE_FFTW              : ON
--   USE_MKL               : OFF
--   USE_MKLDNN            : OFF
--   USE_UCC               : OFF
--   USE_ITT               : OFF
--   USE_NCCL              : OFF
--   USE_NNPACK            : ON
--   USE_NUMPY             : OFF
--   USE_OBSERVERS         : ON
--   USE_OPENCL            : OFF
--   USE_OPENCV            : OFF
--   USE_OPENMP            : OFF
--   USE_TBB               : OFF
--   USE_VULKAN            : 1
--     USE_VULKAN_FP16_INFERENCE    : OFF
--     USE_VULKAN_RELAXED_PRECISION : OFF
--   USE_PROF              : OFF
--   USE_QNNPACK           : OFF
--   USE_PYTORCH_QNNPACK   : ON
--   USE_XNNPACK           : ON
--   USE_REDIS             : OFF
--   USE_ROCKSDB           : OFF
--   USE_ZMQ               : OFF
--   USE_DISTRIBUTED       : OFF
--   Public Dependencies  : 
--   Private Dependencies : Threads::Threads;pthreadpool;cpuinfo;pytorch_qnnpack;nnpack;XNNPACK;/usr/local/lib/libvulkan.dylib;fp16;foxi_loader;fmt::fmt-header-only;kineto
--   Public CUDA Deps.    : 
--   Private CUDA Deps.   : 
--   USE_COREML_DELEGATE     : OFF
--   BUILD_LAZY_TS_BACKEND   : ON
--   TORCH_DISABLE_GPU_ASSERTS : OFF
-- Configuring done
-- Generating done

import torch
print(torch.__version__)

2.1.0a0+git517a432

生成TorchScript

import torch
import torchvision

# An instance of your model.
model = torchvision.models.resnet18()

# An example input you would normally provide to your model's forward() method.
example = torch.rand(1, 3, 224, 224)

# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, example)

# Export mobile interpreter version model (compatible with mobile interpreter)
from torch.utils.mobile_optimizer import optimize_for_mobile
traced_script_module_vulkan = optimize_for_mobile(traced_script_module, backend='vulkan')
traced_script_module_vulkan._save_for_lite_interpreter("traced_script_module_vulkan.pt")

If you see the error message: PytorchStreamReader failed locating file bytecode.pkl: file not found (), likely you are using a torch script model that requires the use of the PyTorch JIT interpreter (a version of our PyTorch interpreter that is not as size-efficient). In order to leverage our efficient interpreter, please regenerate the model by running: module._save_for_lite_interpreter(${model_path}).

If bytecode.pkl is missing, likely the model is generated with the api: module.save(${model_psth}).

The api _load_for_lite_interpreter(${model_psth}) can be helpful to validate model with the efficient mobile interpreter.

https://github.com/pytorch/pytorch/blob/master/docs/libtorch.rst↩︎
https://github.com/pytorch/pytorch/issues/64645↩︎
https://pytorch.org/cppdocs/installing.html↩︎
https://pytorch.org/tutorials/advanced/cpp_export.html↩︎
https://pytorch.org/tutorials/recipes/script_optimized.html↩︎
https://pytorch.org/tutorials/prototype/vulkan_workflow.html↩︎