From 63ed23bd83d5c30e74a83a976524dadf22d6dd49 Mon Sep 17 00:00:00 2001 From: Takuya Narihira Date: Mon, 4 May 2015 11:44:44 -0700 Subject: [PATCH 1/2] Fix redundancy of parameter backward computation --- src/caffe/layers/prelu_layer.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu index fd0eda5d191..5fd69d6c4d4 100644 --- a/src/caffe/layers/prelu_layer.cu +++ b/src/caffe/layers/prelu_layer.cu @@ -89,7 +89,7 @@ void PReLULayer::Backward_gpu(const vector*>& top, Dtype* temp_buff = multiplier_.mutable_gpu_diff(); // compute element-wise diff // NOLINT_NEXT_LINE(whitespace/operators) - PReLUParamBackward<<<<>>( cdim, top_diff + top[0]->offset(n), bottom_data + bottom[0]->offset(n), multiplier_.mutable_gpu_diff()); From 4348c6f4c905e9eb2c4dee32614a1880e074b217 Mon Sep 17 00:00:00 2001 From: Takuya Narihira Date: Mon, 4 May 2015 11:45:33 -0700 Subject: [PATCH 2/2] Modify for better readability regarding temporary bufffer for backward computation --- include/caffe/neuron_layers.hpp | 3 ++- src/caffe/layers/prelu_layer.cpp | 3 ++- src/caffe/layers/prelu_layer.cu | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index 323215134c7..aff58233e5c 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -734,7 +734,8 @@ class PReLULayer : public NeuronLayer { const vector& propagate_down, const vector*>& bottom); bool channel_shared_; - Blob multiplier_; // dot multipler for backward computation of params + Blob multiplier_; // dot multiplier for backward computation of params + Blob backward_buff_; // temporary buffer for backward computation Blob bottom_memory_; // memory for in-place computation }; diff --git a/src/caffe/layers/prelu_layer.cpp b/src/caffe/layers/prelu_layer.cpp index 7119a274dd3..7a38f9fac80 100644 --- a/src/caffe/layers/prelu_layer.cpp +++ b/src/caffe/layers/prelu_layer.cpp @@ -45,7 +45,8 @@ void PReLULayer::LayerSetUp(const vector*>& bottom, // Propagate gradients to the parameters (as directed by backward pass). this->param_propagate_down_.resize(this->blobs_.size(), true); - multiplier_.Reshape(vector(1, bottom[0]->count() / bottom[0]->num())); + multiplier_.Reshape(vector(1, bottom[0]->count(1))); + backward_buff_.Reshape(vector(1, bottom[0]->count(1))); caffe_set(multiplier_.count(), Dtype(1), multiplier_.mutable_cpu_data()); } diff --git a/src/caffe/layers/prelu_layer.cu b/src/caffe/layers/prelu_layer.cu index 5fd69d6c4d4..dfa238d85bd 100644 --- a/src/caffe/layers/prelu_layer.cu +++ b/src/caffe/layers/prelu_layer.cu @@ -86,22 +86,22 @@ void PReLULayer::Backward_gpu(const vector*>& top, int cdim = channels * dim; Dtype dsum = 0.; for (int n = 0; n < bottom[0]->num(); ++n) { - Dtype* temp_buff = multiplier_.mutable_gpu_diff(); // compute element-wise diff // NOLINT_NEXT_LINE(whitespace/operators) PReLUParamBackward<<>>( cdim, top_diff + top[0]->offset(n), - bottom_data + bottom[0]->offset(n), multiplier_.mutable_gpu_diff()); + bottom_data + bottom[0]->offset(n), + backward_buff_.mutable_gpu_diff()); CUDA_POST_KERNEL_CHECK; if (channel_shared_) { Dtype d; - caffe_gpu_dot(channels * dim, multiplier_.gpu_diff(), + caffe_gpu_dot(channels * dim, backward_buff_.gpu_diff(), multiplier_.gpu_data(), &d); dsum += d; } else { caffe_gpu_gemv(CblasNoTrans, channels, dim, 1., - multiplier_.gpu_diff(), multiplier_.gpu_data(), 1., + backward_buff_.gpu_diff(), multiplier_.gpu_data(), 1., slope_diff); } }