/** * \file sdk/c-opr-loaders/mace/mace_loader.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include #include #include #include "mace/public/mace.h" #include "extern_c_opr.h" #if defined(__APPLE__) || defined(__MACOSX) static const char* default_so_paths[] = { "/System/Library/Frameworks/OpenCL.framework/OpenCL", "libOpenCL.so"}; #elif defined(__ANDROID__) static const char* default_so_paths[] = { #if defined(__aarch64__) "/system/lib64/libOpenCL.so", "/system/lib64/libOpenCL_system.so", "/system/lib64/egl/libGLES_mali.so", "/system/vendor/lib64/libOpenCL.so", "/system/vendor/lib64/egl/libGLES_mali.so", "/system/vendor/lib64/libPVROCL.so", "/vendor/lib64/libOpenCL.so", "/data/data/org.pocl.libs/files/lib64/libpocl.so", #else "/system/lib/libOpenCL.so", "/system/lib/libOpenCL_system.so", "/system/lib/egl/libGLES_mali.so", "/system/vendor/lib/libOpenCL.so", "/system/vendor/lib/egl/libGLES_mali.so", "/system/vendor/lib/libPVROCL.so", "/vendor/lib/libOpenCL.so", "/data/data/org.pocl.libs/files/lib/libpocl.so", #endif "libOpenCL.so"}; #elif defined(_WIN32) static const char* default_so_paths[] = {"OpenCL.dll"}; #elif defined(__linux__) static const char* default_so_paths[] = { #if defined(__x86_64__) || defined(__amd64__) "/usr/lib64/libOpenCL.so", "/usr/local/lib64/libOpenCL.so", "/usr/local/cuda/lib64/libOpenCL.so", "/opt/intel/opencl/libOpenCL.so", //! As in some system like apex, the driver exists here "/usr/lib/libOpenCL.so", #else "/usr/lib/libOpenCL.so", "/usr/lib32/libOpenCL.so", "/usr/local/lib/libOpenCL.so", "/usr/local/lib/libpocl.so", "/usr/local/cuda/lib/libOpenCL.so", #endif "libOpenCL.so"}; #endif #define ASSERT(x, msg) \ do { \ if (!(x)) { \ printf("error at %s:%d %s\n", __FILE__, __LINE__, __FUNCTION__); \ printf(msg); \ __builtin_trap(); \ } \ } while (0) inline bool file_exists (const char* name) { struct stat buffer; return (stat (name, &buffer) == 0); } class MGBOprDescImpl { struct UserData { std::shared_ptr engine; size_t nr_inputs, nr_outputs; std::vector> output_shapes; std::vector input_names, output_names; }; static UserData* user_data(const MGBOprDesc* self) { return static_cast(self->user_data); } static void release(MGBOprDesc* self) { // free all data buffers delete user_data(self); delete self; } static size_t hash(const MGBOprDesc* self) { return reinterpret_cast(self); } static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) { return self == rhs; } static void infer_shape(const MGBOprDesc* self, const MGBTensorShape* input, MGBTensorShape* output) { auto ud = user_data(self); // infer output shape from user data for (size_t i = 0; i < ud->nr_outputs; i++) { output[i].ndim = ud->output_shapes[i].size(); for (size_t j = 0; j < output[i].ndim; j++) { output[i].shape[j] = ud->output_shapes[i][j]; } } } static void infer_dtype(const MGBOprDesc*, const MGBDType* input, MGBDType* output) { ASSERT(input[0] == MGB_DTYPE_FLOAT32, "Input dtype is not float32"); output[0] = MGB_DTYPE_FLOAT32; } static void execute(const MGBOprDesc* self, const MGBTensor* input, const MGBTensor* output) { auto ud = user_data(self); // create input and output tensor buffers std::map mace_inputs; std::map mace_outputs; auto mace_data_format = mace::DataFormat::NCHW; char *data_format = getenv("MGB_MACE_LOADER_FORMAT"); if (data_format != nullptr && !strcmp(data_format, "NHWC")) { mace_data_format = mace::DataFormat::NHWC; } for (size_t i = 0; i < ud->nr_inputs; ++i) { // allocate input uint32_t ndim = input[i].layout.shape.ndim; auto input_shape = std::vector(input[i].layout.shape.shape, input[i].layout.shape.shape + ndim); int64_t input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); auto buffer_in = std::shared_ptr(new float[input_size], std::default_delete()); memcpy(buffer_in.get(), input[i].data, input_size * sizeof(float)); mace_inputs[ud->input_names[i]] = mace::MaceTensor(input_shape, buffer_in, mace_data_format); } for (size_t i = 0; i < ud->nr_outputs; ++i) { // allocate output uint32_t ndim = output[i].layout.shape.ndim; auto output_shape = std::vector(output[i].layout.shape.shape, output[i].layout.shape.shape + ndim); int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); auto buffer_out = std::shared_ptr(new float[output_size], std::default_delete()); mace_outputs[ud->output_names[i]] = mace::MaceTensor(output_shape, buffer_out, mace_data_format); } // run the model auto status = (ud->engine)->Run(mace_inputs, &mace_outputs); ASSERT(status == mace::MaceStatus::MACE_SUCCESS, "Error in running mace engine"); // send computed output to MGB int idx = 0; for (auto it = mace_outputs.begin(); it != mace_outputs.end(); it++) { float* to = &((float *)output[idx++].data)[0]; to = (it->second).data().get(); } } public: static MGBOprDesc* make(size_t nr_input, const void *buf, size_t buf_len) { auto ud = std::make_unique(); std::shared_ptr engine; mace::DeviceType device_type = mace::DeviceType::CPU; char *runtime_mode = getenv("MGB_MACE_RUNTIME"); if (runtime_mode != nullptr && !strcmp(runtime_mode, "GPU")) { device_type = mace::DeviceType::GPU; } mace::MaceEngineConfig config(device_type); // set number of threads for cpu, default 1 if (device_type == mace::DeviceType::CPU) { int nthread = 1; char *str_nthread = getenv("MGB_MACE_NR_THREADS"); if (str_nthread != nullptr) { nthread = atoi(str_nthread); } config.SetCPUThreadPolicy(nthread, mace::CPUAffinityPolicy::AFFINITY_NONE); } // set gpu context, mainly opencl path if (device_type == mace::DeviceType::GPU) { std::shared_ptr gpu_context; char *cache_path = getenv("MGB_MACE_OPENCL_CACHE_PATH"); ASSERT(cache_path, "there must be an opencl cache file path"); char *param_path = getenv("MGB_MACE_TUNING_PARAM_PATH"); std::string opencl_param_path(""); if (param_path != nullptr) { opencl_param_path = std::string(param_path); } std::string storage_path(cache_path); gpu_context = mace::GPUContextBuilder() .SetStoragePath(storage_path) .SetOpenCLParameterPath(opencl_param_path) .Finalize(); config.SetGPUContext(gpu_context); config.SetGPUHints( static_cast(mace::GPUPerfHint::PERF_HIGH), static_cast(mace::GPUPriorityHint::PRIORITY_HIGH)); } std::vector input_names, output_names; // extract all information from buf void *buffer = const_cast(buf); ud->nr_inputs = *reinterpret_cast(buffer); ud->nr_outputs = *(reinterpret_cast(buffer) + 1); // interpret input names char *name_buf = reinterpret_cast(buffer) + 8; for (size_t i = 0; i < ud->nr_inputs; i++) { size_t ilen = *reinterpret_cast(name_buf); input_names.push_back(std::string(name_buf + 4, ilen)); name_buf += (ilen + 4); } // interpret output names buffer = name_buf; name_buf = reinterpret_cast(buffer); for (size_t i = 0; i < ud->nr_outputs; i++) { size_t olen = *reinterpret_cast(name_buf); output_names.push_back(std::string(name_buf + 4, olen)); name_buf += (olen + 4); } ud->input_names = input_names; ud->output_names = output_names; // interpret output shapes buffer = name_buf; uint32_t *shape_buf = reinterpret_cast(buffer) + 1; for (size_t i = 0; i < ud->nr_outputs; i++) { size_t olen = *reinterpret_cast(shape_buf); ud->output_shapes.push_back( std::vector(shape_buf + 1, shape_buf + olen + 1) ); shape_buf += (olen + 1); } buffer = shape_buf; const size_t model_buf_len = *reinterpret_cast(buffer); unsigned char *model_buf = reinterpret_cast(buffer) + 4; const size_t param_buf_len = *reinterpret_cast(model_buf + model_buf_len); unsigned char *param_buf = model_buf + model_buf_len + 4; // create mace engine auto create_engine_status = mace::CreateMaceEngineFromProto( model_buf, model_buf_len, param_buf, param_buf_len, input_names, output_names, config, &engine ); ASSERT(create_engine_status == mace::MaceStatus::MACE_SUCCESS, "Error in creating mace engine"); ud->engine = engine; auto ret = std::make_unique(); mgb_init_opr_desc(ret.get(), ud->nr_outputs, "mace"); #define a(n) ret->n = &n; MGB_OPR_DESC_FOREACH_MEM_FN(a); a(infer_dtype); #undef a ret->user_data = ud.release(); return ret.release(); } }; class MGBOprLoaderImpl { static MGBOprDesc* create_desc(size_t nr_input, const void *buf, size_t buf_len) { return MGBOprDescImpl::make(nr_input, buf, buf_len); } public: static MGBOprLoader make() { return {"mace", create_desc}; } }; extern "C" { // public interface __attribute__((visibility("default"))) void MGB_C_OPR_INIT_FUNC(const MGBExternCOprApi* (*get_api)(int)) { const MGBExternCOprApi* api = get_api(MGB_EXTERN_C_OPR_VERSION); ASSERT(api, "Create api failed"); MGBOprLoader loader = MGBOprLoaderImpl::make(); api->register_loader(&loader); } } // extern "C"