加载自定义GPU操作时,Tensorflow“未定义符号”

eh57zj3b  于 6个月前  发布在  其他
关注(0)|答案(1)|浏览(74)

我使用的Tensorflow 1.9rc0是从bazel版本0.15.0的源代码编译而来的。我使用的是cuda版本9.2和cudnn版本7的cuda支持。
我试图建立一个自定义的操作,执行cuda内核。我已经按照doumentation关于这件事,并检查了一些实现的操作,以发展它。
最后,我得到了以下代码:

  • kernel_example.h:*
#ifndef KERNEL_EXAMPLE_H_
#define KERNEL_EXAMPLE_H_

#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor_types.h"

#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/platform/types.h"

namespace functor {

template <typename Device,typename T>
struct ExampleFunctor {
  void operator()(const Device& d,
                  const T* input, const T* filter, T* output,
                  int in_depth, int input_cols, int input_rows,
                  int out_depth, int filter_cols, int filter_rows,
                  int stride_rows, int stride_cols,
                  int n_elements);
};

#if GOOGLE_CUDA

typedef Eigen::GpuDevice GPUDevice;

template <typename T>
struct ExampleFunctor<GPUDevice, T> {
    void operator()(
        const GPUDevice& d,
        const T* input, const T* filter, T* output,
        int in_depth, int input_cols, int input_rows,
        int out_depth, int filter_cols, int filter_rows,
        int stride_rows, int stride_cols,
        int n_elements);
};

#endif //GOOGLE_CUDA

}

字符串

  • kernel_example.cc:*
#include "kernel_example.h"
#include "tensorflow/core/framework/op_kernel.h"

#include "tensorflow/core/framework/common_shape_fns.h"

using namespace tensorflow;

using CPUDevice = Eigen::ThreadPoolDevice;
using GPUDevice = Eigen::GpuDevice;

REGISTER_OP("MyConvGpu")
    .Input("input: T")
    .Input("filter: T")
    .Output("output: T")
    .SetShapeFn(tensorflow::shape_inference::UnknownShape);

// OpKernel definition.
// template parameter <T> is the datatype of the tensors.
template <typename Device, typename T>
class ExampleOp : public OpKernel {
 public:
  explicit ExampleOp(OpKernelConstruction* context) : OpKernel(context) {}

  void Compute(OpKernelContext* context) override {
    // Loading op parameters and defining  variables

    functor::ExampleFunctor<Device, T> functor;
    functor(
        context->eigen_device<Device>(),
        input.flat<T>().data(),
        filter.flat<T>().data(),
        output->flat<T>().data(),
        in_depth, input_cols, input_rows,
        out_depth, filter_cols, filter_rows,
        stride_rows, stride_cols,
        static_cast<int>(output->NumElements()));
  }
};

#if GOOGLE_CUDA

#define REGISTER_GPU_KERNEL(T)                                    \
  REGISTER_KERNEL_BUILDER(Name("Example")                         \
                              .Device(DEVICE_GPU)                 \
                              .TypeConstraint<T>("T"),            \
                              ExampleOp<GPUDevice, T>);

TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL);

//REGISTER_GPU_KERNEL(Eigen::half)
REGISTER_GPU_KERNEL(float)
REGISTER_GPU_KERNEL(double)
REGISTER_GPU_KERNEL(int)

#endif

  • kernel_example.cu.cc:*
#ifdef GOOGLE_CUDA

#define EIGEN_USE_GPU
#include "kernel_example.h"
#include "tensorflow/core/util/cuda_kernel_helper.h"

using namespace tensorflow;

namespace functor {

// Define the CUDA kernel.
template <typename T>
__global__ void ExampleCudaKernel(const T* input, const T* filter, T* output,
                               const int batch_size,
                               const int in_depth, const int input_cols, const int input_rows,
                               const int out_depth, const int filter_cols, const int filter_rows,
                               const int stride_rows, const int stride_cols,
                               const int n_elements) {
    // Kernel here
}

// Define the GPU implementation that launches the CUDA kernel.
template <typename T>
void ExampleFunctor<GPUDevice, T>::operator()(
    const Eigen::GpuDevice& d,
    const T* input, const T* filter, T* output,
    int in_depth, int input_cols, int input_rows,
    int out_depth, int filter_cols, int filter_rows,
    int stride_rows, int stride_cols,
    int n_elements) {

  // Launch the cuda kernel.
  ExampleCudaKernel<T>
        <<<(n_elements + 255) / 256, 256>>>(input, filter, output,
                                             batch_size,
                                             in_depth, input_cols, input_rows,
                                             out_depth, filter_cols, filter_rows,
                                             stride_rows, stride_cols,
                                             n_elements);
}

// Explicitly instantiate functors for the types of OpKernels registered.
template struct ExampleFunctor<GPUDevice, float>;
template struct ExampleFunctor<GPUDevice, double>;
template struct ExampleFunctor<GPUDevice, int>;

} //namespace functor

#endif  // GOOGLE_CUDA


一切都正确编译,生成一个 example.so 库文件,其中包含以下bazel构建文件:

load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")

tf_custom_op_library(
    name = "example.so",
    srcs = ["kernel_example.h", "kernel_example.cc"],
    gpu_srcs = ["kernel_example.h", "kernel_example.cu.cc"],
)


但是,当使用

module = tf.load_op_library('./example.so')


我得到以下输出:

Traceback (most recent call last):
  File "mnist.py", line 51, in <module>
    my_conv_gpu_module = tf.load_op_library('./example.so')
  File "/usr/lib/python3.6/site-packages/tensorflow/python/framework/load_library.py", line 56, in load_op_library
    lib_handle = py_tf.TF_LoadLibrary(library_filename)
tensorflow.python.framework.errors_impl.NotFoundError: ./example.so: undefined symbol: _ZN10tensorflow16FormatFromStringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS_12TensorFormatE


我还开发了其他的操作,它们不使用cuda加速,加载它们没有问题,尽管它们的实现非常相似。
此外,我已经阅读了有关此错误的其他主题,但解决方案似乎总是将此**--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0”**标志添加到bazel构建参数中,因为Tensoflow的二进制pip包是使用gcc版本4构建的。我正在这样做,但错误仍然存在。
我也在不同的环境中尝试过这段代码,使用Tensorflow rc1.8,cuda 8和cudnn 6。但没有任何运气。
我错过了什么?

disbfnqx

disbfnqx1#

cxxopt仅适用于C++编译。请尝试同时使用coptcxxopt并将其设置为-D_GLIBCXX_USE_CXX11_ABI=0

相关问题