diff --git a/docker/Makefile b/docker/Makefile index 0de887d0e19..3a6575b0c43 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -22,7 +22,7 @@ docker_files: standalone_files standalone_files: standalone/cpu/Dockerfile standalone/gpu/Dockerfile -FROM_GPU = "nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04" +FROM_GPU = "nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04" FROM_CPU = "ubuntu:14.04" GPU_CMAKE_ARGS = -DUSE_CUDNN=1 CPU_CMAKE_ARGS = -DCPU_ONLY=1 diff --git a/docker/standalone/gpu/Dockerfile b/docker/standalone/gpu/Dockerfile index 371aad5b1e9..daf6a7223ff 100644 --- a/docker/standalone/gpu/Dockerfile +++ b/docker/standalone/gpu/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04 +FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER caffe-maint@googlegroups.com RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/docs/installation.md b/docs/installation.md index 1e29a49d82d..4aac7c42d27 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -40,14 +40,14 @@ Optional dependencies: * [OpenCV](http://opencv.org/) >= 2.4 including 3.0 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`) -* cuDNN for GPU acceleration (v4) +* cuDNN for GPU acceleration (v5) Pycaffe and Matcaffe interfaces have their own natural needs. * For Python Caffe: `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python` * For MATLAB Caffe: MATLAB with the `mex` compiler. -**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v4; older versions are supported in older Caffe. +**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v5; older versions are supported in older Caffe. **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment. diff --git a/include/caffe/layers/cudnn_relu_layer.hpp b/include/caffe/layers/cudnn_relu_layer.hpp index e01f568abc9..a1cb29e7c5f 100644 --- a/include/caffe/layers/cudnn_relu_layer.hpp +++ b/include/caffe/layers/cudnn_relu_layer.hpp @@ -37,6 +37,7 @@ class CuDNNReLULayer : public ReLULayer { cudnnHandle_t handle_; cudnnTensorDescriptor_t bottom_desc_; cudnnTensorDescriptor_t top_desc_; + cudnnActivationDescriptor_t activ_desc_; }; #endif diff --git a/include/caffe/layers/cudnn_sigmoid_layer.hpp b/include/caffe/layers/cudnn_sigmoid_layer.hpp index 9c597958b0b..7b3486f8a7e 100644 --- a/include/caffe/layers/cudnn_sigmoid_layer.hpp +++ b/include/caffe/layers/cudnn_sigmoid_layer.hpp @@ -37,6 +37,7 @@ class CuDNNSigmoidLayer : public SigmoidLayer { cudnnHandle_t handle_; cudnnTensorDescriptor_t bottom_desc_; cudnnTensorDescriptor_t top_desc_; + cudnnActivationDescriptor_t activ_desc_; }; #endif diff --git a/include/caffe/layers/cudnn_tanh_layer.hpp b/include/caffe/layers/cudnn_tanh_layer.hpp index c0f0053f71e..59e758d7031 100644 --- a/include/caffe/layers/cudnn_tanh_layer.hpp +++ b/include/caffe/layers/cudnn_tanh_layer.hpp @@ -37,6 +37,7 @@ class CuDNNTanHLayer : public TanHLayer { cudnnHandle_t handle_; cudnnTensorDescriptor_t bottom_desc_; cudnnTensorDescriptor_t top_desc_; + cudnnActivationDescriptor_t activ_desc_; }; #endif diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp index 8a7e17c6cd4..a7d8dbbad4c 100644 --- a/include/caffe/util/cudnn.hpp +++ b/include/caffe/util/cudnn.hpp @@ -91,8 +91,13 @@ template inline void createFilterDesc(cudnnFilterDescriptor_t* desc, int n, int c, int h, int w) { CUDNN_CHECK(cudnnCreateFilterDescriptor(desc)); +#if CUDNN_VERSION_MIN(5, 0, 0) CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType::type, - n, c, h, w)); + CUDNN_TENSOR_NCHW, n, c, h, w)); +#else + CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType::type, + CUDNN_TENSOR_NCHW, n, c, h, w)); +#endif } template @@ -123,8 +128,21 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc, LOG(FATAL) << "Unknown pooling method."; } CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc)); - CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w, - pad_h, pad_w, stride_h, stride_w)); +#if CUDNN_VERSION_MIN(5, 0, 0) + CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, + CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w)); +#else + CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode, + CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w)); +#endif +} + +template +inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc, + cudnnActivationMode_t mode) { + CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc)); + CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode, + CUDNN_PROPAGATE_NAN, Dtype(0))); } } // namespace cudnn diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index 42c4fd0260c..8bc5346248c 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -30,19 +30,11 @@ void CuDNNConvolutionLayer::Forward_gpu( // Bias. if (this->bias_term_) { const Dtype* bias_data = this->blobs_[1]->gpu_data(); -#if CUDNN_VERSION_MIN(4, 0, 0) CUDNN_CHECK(cudnnAddTensor(handle_[g], cudnn::dataType::one, bias_desc_, bias_data + bias_offset_ * g, cudnn::dataType::one, top_descs_[i], top_data + top_offset_ * g)); -#else - CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C, - cudnn::dataType::one, - bias_desc_, bias_data + bias_offset_ * g, - cudnn::dataType::one, - top_descs_[i], top_data + top_offset_ * g)); -#endif } } @@ -82,7 +74,7 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, // Gradient w.r.t. weights. if (this->param_propagate_down_[0]) { const Dtype* bottom_data = bottom[i]->gpu_data(); - CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3( + CUDNN_CHECK(cudnnConvolutionBackwardFilter( handle_[1*this->group_ + g], cudnn::dataType::one, bottom_descs_[i], bottom_data + bottom_offset_ * g, @@ -100,7 +92,7 @@ void CuDNNConvolutionLayer::Backward_gpu(const vector*>& top, weight = this->blobs_[0]->gpu_data(); } Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); - CUDNN_CHECK(cudnnConvolutionBackwardData_v3( + CUDNN_CHECK(cudnnConvolutionBackwardData( handle_[2*this->group_ + g], cudnn::dataType::one, filter_desc_, weight + this->weight_offset_ * g, diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp index c86c6907113..795e0a9efb0 100644 --- a/src/caffe/layers/cudnn_relu_layer.cpp +++ b/src/caffe/layers/cudnn_relu_layer.cpp @@ -13,6 +13,7 @@ void CuDNNReLULayer::LayerSetUp(const vector*>& bottom, CUDNN_CHECK(cudnnCreate(&handle_)); cudnn::createTensor4dDesc(&bottom_desc_); cudnn::createTensor4dDesc(&top_desc_); + cudnn::createActivationDescriptor(&activ_desc_, CUDNN_ACTIVATION_RELU); handles_setup_ = true; } diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu index 9f617183baa..e7928bbd6e0 100644 --- a/src/caffe/layers/cudnn_relu_layer.cu +++ b/src/caffe/layers/cudnn_relu_layer.cu @@ -15,12 +15,21 @@ void CuDNNReLULayer::Forward_gpu(const vector*>& bottom, const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); +#if CUDNN_VERSION_MIN(5, 0, 0) CUDNN_CHECK(cudnnActivationForward(this->handle_, - CUDNN_ACTIVATION_RELU, + activ_desc_, cudnn::dataType::one, this->bottom_desc_, bottom_data, cudnn::dataType::zero, this->top_desc_, top_data)); +#else + CUDNN_CHECK(cudnnActivationForward_v4(this->handle_, + activ_desc_, + cudnn::dataType::one, + this->bottom_desc_, bottom_data, + cudnn::dataType::zero, + this->top_desc_, top_data)); +#endif } template @@ -40,13 +49,23 @@ void CuDNNReLULayer::Backward_gpu(const vector*>& top, const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); +#if CUDNN_VERSION_MIN(5, 0, 0) CUDNN_CHECK(cudnnActivationBackward(this->handle_, - CUDNN_ACTIVATION_RELU, + activ_desc_, cudnn::dataType::one, this->top_desc_, top_data, this->top_desc_, top_diff, this->bottom_desc_, bottom_data, cudnn::dataType::zero, this->bottom_desc_, bottom_diff)); +#else + CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_, + activ_desc_, + cudnn::dataType::one, + this->top_desc_, top_data, this->top_desc_, top_diff, + this->bottom_desc_, bottom_data, + cudnn::dataType::zero, + this->bottom_desc_, bottom_diff)); +#endif } INSTANTIATE_LAYER_GPU_FUNCS(CuDNNReLULayer); diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cpp b/src/caffe/layers/cudnn_sigmoid_layer.cpp index ccb955cdaff..3ce6aef1764 100644 --- a/src/caffe/layers/cudnn_sigmoid_layer.cpp +++ b/src/caffe/layers/cudnn_sigmoid_layer.cpp @@ -13,6 +13,8 @@ void CuDNNSigmoidLayer::LayerSetUp(const vector*>& bottom, CUDNN_CHECK(cudnnCreate(&handle_)); cudnn::createTensor4dDesc(&bottom_desc_); cudnn::createTensor4dDesc(&top_desc_); + cudnn::createActivationDescriptor(&activ_desc_, + CUDNN_ACTIVATION_SIGMOID); handles_setup_ = true; } diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu index e2a4b460c6c..48d6cbab6de 100644 --- a/src/caffe/layers/cudnn_sigmoid_layer.cu +++ b/src/caffe/layers/cudnn_sigmoid_layer.cu @@ -10,12 +10,21 @@ void CuDNNSigmoidLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); +#if CUDNN_VERSION_MIN(5, 0, 0) CUDNN_CHECK(cudnnActivationForward(this->handle_, - CUDNN_ACTIVATION_SIGMOID, + activ_desc_, cudnn::dataType::one, this->bottom_desc_, bottom_data, cudnn::dataType::zero, this->top_desc_, top_data)); +#else + CUDNN_CHECK(cudnnActivationForward_v4(this->handle_, + activ_desc_, + cudnn::dataType::one, + this->bottom_desc_, bottom_data, + cudnn::dataType::zero, + this->top_desc_, top_data)); +#endif } template @@ -30,13 +39,23 @@ void CuDNNSigmoidLayer::Backward_gpu(const vector*>& top, const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); +#if CUDNN_VERSION_MIN(5, 0, 0) CUDNN_CHECK(cudnnActivationBackward(this->handle_, - CUDNN_ACTIVATION_SIGMOID, + activ_desc_, cudnn::dataType::one, this->top_desc_, top_data, this->top_desc_, top_diff, this->bottom_desc_, bottom_data, cudnn::dataType::zero, this->bottom_desc_, bottom_diff)); +#else + CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_, + activ_desc_, + cudnn::dataType::one, + this->top_desc_, top_data, this->top_desc_, top_diff, + this->bottom_desc_, bottom_data, + cudnn::dataType::zero, + this->bottom_desc_, bottom_diff)); +#endif } INSTANTIATE_LAYER_GPU_FUNCS(CuDNNSigmoidLayer); diff --git a/src/caffe/layers/cudnn_tanh_layer.cpp b/src/caffe/layers/cudnn_tanh_layer.cpp index 1a56418227c..e87dd9de0ab 100644 --- a/src/caffe/layers/cudnn_tanh_layer.cpp +++ b/src/caffe/layers/cudnn_tanh_layer.cpp @@ -13,6 +13,7 @@ void CuDNNTanHLayer::LayerSetUp(const vector*>& bottom, CUDNN_CHECK(cudnnCreate(&handle_)); cudnn::createTensor4dDesc(&bottom_desc_); cudnn::createTensor4dDesc(&top_desc_); + cudnn::createActivationDescriptor(&activ_desc_, CUDNN_ACTIVATION_TANH); handles_setup_ = true; } diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu index 89df28a3e8b..6b5d7ae7ea7 100644 --- a/src/caffe/layers/cudnn_tanh_layer.cu +++ b/src/caffe/layers/cudnn_tanh_layer.cu @@ -10,12 +10,21 @@ void CuDNNTanHLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); +#if CUDNN_VERSION_MIN(5, 0, 0) CUDNN_CHECK(cudnnActivationForward(this->handle_, - CUDNN_ACTIVATION_TANH, + activ_desc_, cudnn::dataType::one, this->bottom_desc_, bottom_data, cudnn::dataType::zero, this->top_desc_, top_data)); +#else + CUDNN_CHECK(cudnnActivationForward_v4(this->handle_, + activ_desc_, + cudnn::dataType::one, + this->bottom_desc_, bottom_data, + cudnn::dataType::zero, + this->top_desc_, top_data)); +#endif } template @@ -31,13 +40,23 @@ void CuDNNTanHLayer::Backward_gpu(const vector*>& top, const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); +#if CUDNN_VERSION_MIN(5, 0, 0) CUDNN_CHECK(cudnnActivationBackward(this->handle_, - CUDNN_ACTIVATION_TANH, + activ_desc_, cudnn::dataType::one, this->top_desc_, top_data, this->top_desc_, top_diff, this->bottom_desc_, bottom_data, cudnn::dataType::zero, this->bottom_desc_, bottom_diff)); +#else + CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_, + activ_desc_, + cudnn::dataType::one, + this->top_desc_, top_data, this->top_desc_, top_diff, + this->bottom_desc_, bottom_data, + cudnn::dataType::zero, + this->bottom_desc_, bottom_diff)); +#endif } INSTANTIATE_LAYER_GPU_FUNCS(CuDNNTanHLayer);