diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index 42e4420408c..36579a5a545 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -1,11 +1,17 @@ #ifndef CAFFE_BLOB_HPP_ #define CAFFE_BLOB_HPP_ +#include +#include +#include + #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/syncedmem.hpp" #include "caffe/util/math_functions.hpp" +const int kMaxBlobAxes = INT_MAX; + namespace caffe { /** @@ -19,10 +25,16 @@ template class Blob { public: Blob() - : data_(), diff_(), num_(0), channels_(0), height_(0), width_(0), - count_(0), capacity_(0) {} + : data_(), diff_(), count_(0), capacity_(0) {} + + /// @brief Deprecated; use Blob(const vector& shape). explicit Blob(const int num, const int channels, const int height, - const int width); + const int width); + explicit Blob(const vector& shape); + + /// @brief Deprecated; use Reshape(const vector& shape). + void Reshape(const int num, const int channels, const int height, + const int width); /** * @brief Change the dimensions of the blob, allocating new memory if * necessary. @@ -37,25 +49,133 @@ class Blob { * an error; either Net::Forward or Net::Reshape need to be called to * propagate the new input shape to higher layers. */ - void Reshape(const int num, const int channels, const int height, - const int width); + void Reshape(const vector& shape); + void Reshape(const BlobShape& shape); void ReshapeLike(const Blob& other); - inline int num() const { return num_; } - inline int channels() const { return channels_; } - inline int height() const { return height_; } - inline int width() const { return width_; } + inline string shape_string() const { + ostringstream stream; + for (int i = 0; i < shape_.size(); ++i) { + stream << shape_[i] << " "; + } + stream << "(" << count_ << ")"; + return stream.str(); + } + inline const vector& shape() const { return shape_; } + /** + * @brief Returns the dimension of the index-th axis (or the negative index-th + * axis from the end, if index is negative). + * + * @param index the axis index, which may be negative as it will be + * "canonicalized" using CanonicalAxisIndex. + * Dies on out of range index. + */ + inline int shape(int index) const { + return shape_[CanonicalAxisIndex(index)]; + } + inline int num_axes() const { return shape_.size(); } inline int count() const { return count_; } + + /** + * @brief Compute the volume of a slice; i.e., the product of dimensions + * among a range of axes. + * + * @param start_axis The first axis to include in the slice. + * + * @param end_axis The first axis to exclude from the slice. + */ + inline int count(int start_axis, int end_axis) const { + CHECK_LE(start_axis, end_axis); + CHECK_GE(start_axis, 0); + CHECK_GE(end_axis, 0); + CHECK_LE(start_axis, num_axes()); + CHECK_LE(end_axis, num_axes()); + int count = 1; + for (int i = start_axis; i < end_axis; ++i) { + count *= shape(i); + } + return count; + } + /** + * @brief Compute the volume of a slice spanning from a particular first + * axis to the final axis. + * + * @param start_axis The first axis to include in the slice. + */ + inline int count(int start_axis) const { + return count(start_axis, num_axes()); + } + + /** + * @brief Returns the 'canonical' version of a (usually) user-specified axis, + * allowing for negative indexing (e.g., -1 for the last axis). + * + * @param index the axis index. + * If 0 <= index < num_axes(), return index. + * If -num_axes <= index <= -1, return (num_axes() - (-index)), + * e.g., the last axis index (num_axes() - 1) if index == -1, + * the second to last if index == -2, etc. + * Dies on out of range index. + */ + inline int CanonicalAxisIndex(int axis_index) const { + CHECK_GE(axis_index, -num_axes()) + << "axis " << axis_index << " out of range for " << num_axes() + << "-D Blob with shape " << shape_string(); + CHECK_LT(axis_index, num_axes()) + << "axis " << axis_index << " out of range for " << num_axes() + << "-D Blob with shape " << shape_string(); + if (axis_index < 0) { + return axis_index + num_axes(); + } + return axis_index; + } + + /// @brief Deprecated legacy shape accessor num: use shape(0) instead. + inline int num() const { return LegacyShape(0); } + /// @brief Deprecated legacy shape accessor channels: use shape(1) instead. + inline int channels() const { return LegacyShape(1); } + /// @brief Deprecated legacy shape accessor height: use shape(2) instead. + inline int height() const { return LegacyShape(2); } + /// @brief Deprecated legacy shape accessor width: use shape(3) instead. + inline int width() const { return LegacyShape(3); } + inline int LegacyShape(int index) const { + CHECK_LE(num_axes(), 4) + << "Cannot use legacy accessors on Blobs with > 4 axes."; + CHECK_LT(index, 4); + CHECK_GE(index, -4); + if (index >= num_axes() || index < -num_axes()) { + // Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse + // indexing) -- this special case simulates the one-padding used to fill + // extraneous axes of legacy blobs. + return 1; + } + return shape(index); + } + inline int offset(const int n, const int c = 0, const int h = 0, const int w = 0) const { CHECK_GE(n, 0); - CHECK_LE(n, num_); - CHECK_GE(channels_, 0); - CHECK_LE(c, channels_); - CHECK_GE(height_, 0); - CHECK_LE(h, height_); - CHECK_GE(width_, 0); - CHECK_LE(w, width_); - return ((n * channels_ + c) * height_ + h) * width_ + w; + CHECK_LE(n, num()); + CHECK_GE(channels(), 0); + CHECK_LE(c, channels()); + CHECK_GE(height(), 0); + CHECK_LE(h, height()); + CHECK_GE(width(), 0); + CHECK_LE(w, width()); + return ((n * channels() + c) * height() + h) * width() + w; + } + + inline int offset(const vector& indices) const { + CHECK_LE(indices.size(), num_axes()); + int offset = 0; + for (int i = 0; i < num_axes(); ++i) { + offset *= shape(i); + if (indices.size() > i) { + CHECK_GE(indices[i], 0); + CHECK_LT(indices[i], shape(i)); + offset += indices[i]; + } + } + return offset; } /** * @brief Copy from a source Blob. @@ -71,12 +191,20 @@ class Blob { inline Dtype data_at(const int n, const int c, const int h, const int w) const { - return *(cpu_data() + offset(n, c, h, w)); + return cpu_data()[offset(n, c, h, w)]; } inline Dtype diff_at(const int n, const int c, const int h, const int w) const { - return *(cpu_diff() + offset(n, c, h, w)); + return cpu_diff()[offset(n, c, h, w)]; + } + + inline Dtype data_at(const vector& index) const { + return cpu_data()[offset(index)]; + } + + inline Dtype diff_at(const vector& index) const { + return cpu_diff()[offset(index)]; } inline const shared_ptr& data() const { @@ -99,7 +227,7 @@ class Blob { Dtype* mutable_cpu_diff(); Dtype* mutable_gpu_diff(); void Update(); - void FromProto(const BlobProto& proto); + void FromProto(const BlobProto& proto, bool reshape = true); void ToProto(BlobProto* proto, bool write_diff = false) const; /// @brief Compute the sum of absolute values (L1 norm) of the data. @@ -135,13 +263,12 @@ class Blob { */ void ShareDiff(const Blob& other); + bool ShapeEquals(const BlobProto& other); + protected: shared_ptr data_; shared_ptr diff_; - int num_; - int channels_; - int height_; - int width_; + vector shape_; int count_; int capacity_; diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index c67822c3738..b1ac3a93eff 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -99,8 +99,8 @@ class ConcatLayer : public Layer { * - K @f$ (N \times C \times H \times W) @f$ * the inputs @f$ x_K @f$ * @param top output Blob vector (length 1) - * -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or - * @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1: + * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or + * @f$ (N \times KC \times H \times W) @f$ if axis == 1: * the concatenated output @f$ * y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}] * @f$ @@ -115,8 +115,8 @@ class ConcatLayer : public Layer { * * @param top output Blob vector (length 1), providing the error gradient with * respect to the outputs - * -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or - * @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1: + * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or + * @f$ (N \times KC \times H \times W) @f$ if axis == 1: * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ * with respect to concatenated outputs @f$ y @f$ * @param propagate_down see Layer::Backward. @@ -137,13 +137,10 @@ class ConcatLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - Blob col_bob_; int count_; - int num_; - int channels_; - int height_; - int width_; - int concat_dim_; + int num_concats_; + int concat_input_size_; + int concat_axis_; }; /** @@ -216,8 +213,6 @@ class FlattenLayer : public Layer { */ virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); /** * @brief Computes the error gradient w.r.t. the concatenate inputs. @@ -230,10 +225,6 @@ class FlattenLayer : public Layer { */ virtual void Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom); - - int count_; }; /** @@ -362,6 +353,9 @@ class SoftmaxLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); + int outer_num_; + int inner_num_; + int softmax_axis_; /// sum_multiplier is used to carry out sum using BLAS Blob sum_multiplier_; /// scale is an intermediate Blob to hold temporary results. @@ -458,13 +452,10 @@ class SliceLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - Blob col_bob_; int count_; - int num_; - int channels_; - int height_; - int width_; - int slice_dim_; + int num_slices_; + int slice_size_; + int slice_axis_; vector slice_point_; }; diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index eebf565b1d5..bb18e8e1e28 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -79,9 +79,8 @@ class GaussianFiller : public Filler { // These have num == channels == 1; width is number of inputs; height is // number of outputs. The 'sparse' variable specifies the mean number // of non-zero input weights for a given output. - CHECK_EQ(blob->num(), 1); - CHECK_EQ(blob->channels(), 1); - int num_outputs = blob->height(); + CHECK_GE(blob->num_axes(), 1); + const int num_outputs = blob->shape(0); Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs); rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int))); int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 36413ccd176..62d6df71a4a 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -754,6 +754,8 @@ class SoftmaxWithLossLayer : public LossLayer { /// Whether to normalize the loss by the total number of values present /// (otherwise just by the batch size). bool normalize_; + + int softmax_axis_, outer_num_, inner_num_; }; } // namespace caffe diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index 03967a21029..bfea0de661b 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -163,9 +164,10 @@ struct NdarrayCallPolicies : public bp::default_call_policies { // the shape information from the blob. void* data = PyArray_DATA(reinterpret_cast(result)); Py_DECREF(result); - npy_intp dims[] = {blob->num(), blob->channels(), - blob->height(), blob->width()}; - PyObject* arr_obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32, data); + const int num_axes = blob->num_axes(); + vector dims(blob->shape().begin(), blob->shape().end()); + PyObject *arr_obj = PyArray_SimpleNewFromData(num_axes, dims.data(), + NPY_FLOAT32, data); // SetBaseObject steals a ref, so we need to INCREF. Py_INCREF(pyblob.ptr()); PyArray_SetBaseObject(reinterpret_cast(arr_obj), @@ -174,6 +176,20 @@ struct NdarrayCallPolicies : public bp::default_call_policies { } }; +bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) { + if (bp::len(kwargs) > 0) { + throw std::runtime_error("Blob.reshape takes no kwargs"); + } + Blob* self = bp::extract*>(args[0]); + vector shape(bp::len(args) - 1); + for (int i = 1; i < bp::len(args); ++i) { + shape[i - 1] = bp::extract(args[i]); + } + self->Reshape(shape); + // We need to explicitly return None to use bp::raw_function. + return bp::object(); +} + BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1); BOOST_PYTHON_MODULE(_caffe) { @@ -218,8 +234,9 @@ BOOST_PYTHON_MODULE(_caffe) { .add_property("channels", &Blob::channels) .add_property("height", &Blob::height) .add_property("width", &Blob::width) - .add_property("count", &Blob::count) - .def("reshape", &Blob::Reshape) + .add_property("count", static_cast::*)() const>( + &Blob::count)) + .def("reshape", bp::raw_function(&Blob_Reshape)) .add_property("data", bp::make_function(&Blob::mutable_cpu_data, NdarrayCallPolicies())) .add_property("diff", bp::make_function(&Blob::mutable_cpu_diff, diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index d662d6cc282..ac387d51d96 100644 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -85,8 +85,6 @@ def _Net_forward(self, blobs=None, start=None, end=None, **kwargs): # Set input according to defined shapes and make arrays single and # C-contiguous as Caffe expects. for in_, blob in kwargs.iteritems(): - if blob.ndim != 4: - raise Exception('{} blob is not 4-d'.format(in_)) if blob.shape[0] != self.blobs[in_].num: raise Exception('Input is not batch sized') self.blobs[in_].data[...] = blob diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py index 383c283959d..dd99f6f15b9 100644 --- a/python/caffe/test/test_python_layer.py +++ b/python/caffe/test/test_python_layer.py @@ -11,8 +11,7 @@ def setup(self, bottom, top): pass def reshape(self, bottom, top): - top[0].reshape(bottom[0].num, bottom[0].channels, bottom[0].height, - bottom[0].width) + top[0].reshape(*bottom[0].data.shape) def forward(self, bottom, top): top[0].data[...] = 10 * bottom[0].data @@ -21,17 +20,16 @@ def backward(self, top, propagate_down, bottom): bottom[0].diff[...] = 10 * top[0].diff def python_net_file(): - f = tempfile.NamedTemporaryFile(delete=False) - f.write("""name: 'pythonnet' force_backward: true - input: 'data' input_dim: 10 input_dim: 9 input_dim: 8 input_dim: 7 - layer { type: 'Python' name: 'one' bottom: 'data' top: 'one' - python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } - layer { type: 'Python' name: 'two' bottom: 'one' top: 'two' - python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } - layer { type: 'Python' name: 'three' bottom: 'two' top: 'three' - python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }""") - f.close() - return f.name + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write("""name: 'pythonnet' force_backward: true + input: 'data' input_shape { dim: 10 dim: 9 dim: 8 } + layer { type: 'Python' name: 'one' bottom: 'data' top: 'one' + python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } + layer { type: 'Python' name: 'two' bottom: 'one' top: 'two' + python_param { module: 'test_python_layer' layer: 'SimpleLayer' } } + layer { type: 'Python' name: 'three' bottom: 'two' top: 'three' + python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }""") + return f.name class TestPythonLayer(unittest.TestCase): def setUp(self): diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index fbc1361a19d..6d2b3f502d9 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -1,3 +1,6 @@ +#include +#include + #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/syncedmem.hpp" @@ -8,15 +11,24 @@ namespace caffe { template void Blob::Reshape(const int num, const int channels, const int height, const int width) { - CHECK_GE(num, 0); - CHECK_GE(channels, 0); - CHECK_GE(height, 0); - CHECK_GE(width, 0); - num_ = num; - channels_ = channels; - height_ = height; - width_ = width; - count_ = num_ * channels_ * height_ * width_; + vector shape(4); + shape[0] = num; + shape[1] = channels; + shape[2] = height; + shape[3] = width; + Reshape(shape); +} + +template +void Blob::Reshape(const vector& shape) { + CHECK_LE(shape.size(), kMaxBlobAxes); + count_ = 1; + shape_.resize(shape.size()); + for (int i = 0; i < shape.size(); ++i) { + CHECK_GE(shape[i], 0); + count_ *= shape[i]; + shape_[i] = shape[i]; + } if (count_ > capacity_) { capacity_ = count_; data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); @@ -25,8 +37,18 @@ void Blob::Reshape(const int num, const int channels, const int height, } template +void Blob::Reshape(const BlobShape& shape) { + CHECK_LE(shape.dim_size(), kMaxBlobAxes); + vector shape_vec(shape.dim_size()); + for (int i = 0; i < shape.dim_size(); ++i) { + shape_vec[i] = shape.dim(i); + } + Reshape(shape_vec); +} + +template void Blob::ReshapeLike(const Blob& other) { - Reshape(other.num(), other.channels(), other.height(), other.width()); + Reshape(other.shape()); } template @@ -38,6 +60,13 @@ Blob::Blob(const int num, const int channels, const int height, } template +Blob::Blob(const vector& shape) + // capacity_ must be initialized before calling Reshape + : capacity_(0) { + Reshape(shape); +} + +template const Dtype* Blob::cpu_data() const { CHECK(data_); return (const Dtype*)data_->cpu_data(); @@ -346,11 +375,33 @@ void Blob::scale_diff(Dtype scale_factor) { } template +bool Blob::ShapeEquals(const BlobProto& other) { + if (other.has_num() || other.has_channels() || + other.has_height() || other.has_width()) { + // Using deprecated 4D Blob dimensions -- + // shape is (num, channels, height, width). + // Note: we do not use the normal Blob::num(), Blob::channels(), etc. + // methods as these index from the beginning of the blob shape, where legacy + // parameter blobs were indexed from the end of the blob shape (e.g., bias + // Blob shape (1 x 1 x 1 x N), IP layer weight Blob shape (1 x 1 x M x N)). + return shape_.size() <= 4 && + LegacyShape(-4) == other.num() && + LegacyShape(-3) == other.channels() && + LegacyShape(-2) == other.height() && + LegacyShape(-1) == other.width(); + } + vector other_shape(other.shape().dim_size()); + for (int i = 0; i < other.shape().dim_size(); ++i) { + other_shape[i] = other.shape().dim(i); + } + return shape_ == other_shape; +} + +template void Blob::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { - if (num_ != source.num() || channels_ != source.channels() || - height_ != source.height() || width_ != source.width()) { + if (source.count() != count_ || source.shape() != shape_) { if (reshape) { - Reshape(source.num(), source.channels(), source.height(), source.width()); + ReshapeLike(source); } else { LOG(FATAL) << "Trying to copy blobs of different sizes."; } @@ -380,8 +431,28 @@ void Blob::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { } template -void Blob::FromProto(const BlobProto& proto) { - Reshape(proto.num(), proto.channels(), proto.height(), proto.width()); +void Blob::FromProto(const BlobProto& proto, bool reshape) { + if (reshape) { + vector shape; + if (proto.has_num() || proto.has_channels() || + proto.has_height() || proto.has_width()) { + // Using deprecated 4D Blob dimensions -- + // shape is (num, channels, height, width). + shape.resize(4); + shape[0] = proto.num(); + shape[1] = proto.channels(); + shape[2] = proto.height(); + shape[3] = proto.width(); + } else { + shape.resize(proto.shape().dim_size()); + for (int i = 0; i < proto.shape().dim_size(); ++i) { + shape[i] = proto.shape().dim(i); + } + } + Reshape(shape); + } else { + CHECK(ShapeEquals(proto)) << "shape mismatch (reshape not set)"; + } // copy data Dtype* data_vec = mutable_cpu_data(); for (int i = 0; i < count_; ++i) { @@ -397,10 +468,10 @@ void Blob::FromProto(const BlobProto& proto) { template void Blob::ToProto(BlobProto* proto, bool write_diff) const { - proto->set_num(num_); - proto->set_channels(channels_); - proto->set_height(height_); - proto->set_width(width_); + proto->clear_shape(); + for (int i = 0; i < shape_.size(); ++i) { + proto->mutable_shape()->add_dim(shape_[i]); + } proto->clear_data(); proto->clear_diff(); const Dtype* data_vec = cpu_data(); diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp index 3e8df34c0d6..186f9f8632c 100644 --- a/src/caffe/layers/accuracy_layer.cpp +++ b/src/caffe/layers/accuracy_layer.cpp @@ -19,14 +19,15 @@ void AccuracyLayer::LayerSetUp( template void AccuracyLayer::Reshape( const vector*>& bottom, const vector*>& top) { - CHECK_EQ(bottom[0]->num(), bottom[1]->num()) - << "The data and label should have the same number."; - CHECK_LE(top_k_, bottom[0]->count() / bottom[0]->num()) + CHECK_LE(top_k_, bottom[0]->count() / bottom[1]->count()) << "top_k must be less than or equal to the number of classes."; - CHECK_EQ(bottom[1]->channels(), 1); - CHECK_EQ(bottom[1]->height(), 1); - CHECK_EQ(bottom[1]->width(), 1); - top[0]->Reshape(1, 1, 1, 1); + CHECK_GE(bottom[0]->num_axes(), bottom[1]->num_axes()); + for (int i = 0; i < bottom[1]->num_axes(); ++i) { + CHECK_LE(bottom[0]->shape(i), bottom[1]->shape(i)) + << "Dimension mismatch between predictions and label."; + } + vector top_shape(0); // Accuracy is a scalar; 0 axes. + top[0]->Reshape(top_shape); } template @@ -35,8 +36,8 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, Dtype accuracy = 0; const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* bottom_label = bottom[1]->cpu_data(); - int num = bottom[0]->num(); - int dim = bottom[0]->count() / bottom[0]->num(); + int num = bottom[0]->count(0, bottom[1]->num_axes()); + int dim = bottom[0]->count() / num; vector maxval(top_k_+1); vector max_id(top_k_+1); for (int i = 0; i < num; ++i) { diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp index dccd5170c11..ccb3adc7e89 100644 --- a/src/caffe/layers/base_conv_layer.cpp +++ b/src/caffe/layers/base_conv_layer.cpp @@ -11,6 +11,8 @@ namespace caffe { template void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; // Configure the kernel size, padding, stride, and inputs. ConvolutionParameter conv_param = this->layer_param_.convolution_param(); CHECK(!conv_param.has_kernel_size() != @@ -85,10 +87,10 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, shared_ptr > weight_filler(GetFiller( this->layer_param_.convolution_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); - // If necessary, initialize and fill the biases: - // 1 x 1 x 1 x output channels + // If necessary, initialize and fill the biases. if (bias_term_) { - this->blobs_[1].reset(new Blob(1, 1, 1, num_output_)); + vector bias_shape(1, num_output_); + this->blobs_[1].reset(new Blob(bias_shape)); shared_ptr > bias_filler(GetFiller( this->layer_param_.convolution_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); @@ -101,6 +103,8 @@ void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, template void BaseConvolutionLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); @@ -144,7 +148,8 @@ void BaseConvolutionLayer::Reshape(const vector*>& bottom, } // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { - bias_multiplier_.Reshape(1, 1, 1, height_out_ * width_out_); + vector bias_multiplier_shape(1, height_out_ * width_out_); + bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index fc88433c42b..1cac8fc3387 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -9,62 +9,63 @@ namespace caffe { template void ConcatLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { - concat_dim_ = this->layer_param_.concat_param().concat_dim(); - CHECK_GE(concat_dim_, 0) << - "concat_dim should be >= 0"; - CHECK_LE(concat_dim_, 1) << - "For now concat_dim <=1, it can only concat num and channels"; + const ConcatParameter& concat_param = this->layer_param_.concat_param(); + CHECK(!(concat_param.has_axis() && concat_param.has_concat_dim())) + << "Either axis or concat_dim should be specified; not both."; } template void ConcatLayer::Reshape(const vector*>& bottom, const vector*>& top) { + const int num_axes = bottom[0]->num_axes(); + const ConcatParameter& concat_param = this->layer_param_.concat_param(); + if (concat_param.has_concat_dim()) { + concat_axis_ = static_cast(concat_param.concat_dim()); + // Don't allow negative indexing for concat_dim, a uint32 -- almost + // certainly unintended. + CHECK_GE(concat_axis_, 0) << "casting concat_dim from uint32 to int32 " + << "produced negative result; concat_dim must satisfy " + << "0 <= concat_dim < " << kMaxBlobAxes; + CHECK_LT(concat_axis_, num_axes) << "concat_dim out of range."; + } else { + concat_axis_ = bottom[0]->CanonicalAxisIndex(concat_param.axis()); + } // Initialize with the first blob. - count_ = bottom[0]->count(); - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); + vector top_shape = bottom[0]->shape(); + num_concats_ = bottom[0]->count(0, concat_axis_); + concat_input_size_ = bottom[0]->count(concat_axis_ + 1); + int bottom_count_sum = bottom[0]->count(); for (int i = 1; i < bottom.size(); ++i) { - count_ += bottom[i]->count(); - if (concat_dim_== 0) { - num_ += bottom[i]->num(); - } else if (concat_dim_ == 1) { - channels_ += bottom[i]->channels(); - } else if (concat_dim_ == 2) { - height_ += bottom[i]->height(); - } else if (concat_dim_ == 3) { - width_ += bottom[i]->width(); + CHECK_EQ(num_axes, bottom[i]->num_axes()) + << "All inputs must have the same #axes."; + for (int j = 0; j < num_axes; ++j) { + if (j == concat_axis_) { continue; } + CHECK_EQ(top_shape[j], bottom[i]->shape(j)) + << "All inputs must have the same shape, except at concat_axis."; } + bottom_count_sum += bottom[i]->count(); + top_shape[concat_axis_] += bottom[i]->shape(concat_axis_); } - top[0]->Reshape(num_, channels_, height_, width_); - CHECK_EQ(count_, top[0]->count()); + top[0]->Reshape(top_shape); + CHECK_EQ(bottom_count_sum, top[0]->count()); } template void ConcatLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { Dtype* top_data = top[0]->mutable_cpu_data(); - if (concat_dim_== 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->cpu_data(); - int num_elem = bottom[i]->count(); - caffe_copy(num_elem, bottom_data, top_data+top[0]->offset(offset_num)); - offset_num += bottom[i]->num(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->cpu_data(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, + bottom_data + n * bottom_concat_axis * concat_input_size_, + top_data + (n * top_concat_axis + offset_concat_axis) + * concat_input_size_); } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->cpu_data(); - int num_elem = - bottom[i]->channels()*bottom[i]->height()*bottom[i]->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data+bottom[i]->offset(n), - top_data+top[0]->offset(n, offset_channel)); - } - offset_channel += bottom[i]->channels(); - } // concat_dim_ is guaranteed to be 0 or 1 by LayerSetUp. + offset_concat_axis += bottom_concat_axis; } } @@ -72,32 +73,19 @@ template void ConcatLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); - if (concat_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_cpu_diff(); - caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num), - bottom_diff); - } - offset_num += blob->num(); - } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_cpu_diff(); - int num_elem = blob->channels()*blob->height()*blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel), - bottom_diff + blob->offset(n)); - } - } - offset_channel += blob->channels(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + if (!propagate_down[i]) { continue; } + Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, top_diff + + (n * top_concat_axis + offset_concat_axis) * concat_input_size_, + bottom_diff + n * bottom_concat_axis * concat_input_size_); } - } // concat_dim_ is guaranteed to be 0 or 1 by LayerSetUp. + offset_concat_axis += bottom_concat_axis; + } } #ifdef CPU_ONLY diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index 88fc090025f..dbadb5aeb30 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -10,29 +10,18 @@ template void ConcatLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { Dtype* top_data = top[0]->mutable_gpu_data(); - if (concat_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->gpu_data(); - caffe_copy(bottom[i]->count(), bottom_data, - top_data + top[0]->offset(offset_num)); - offset_num += bottom[i]->num(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->gpu_data(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, + bottom_data + n * bottom_concat_axis * concat_input_size_, + top_data + (n * top_concat_axis + offset_concat_axis) + * concat_input_size_); } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - const Dtype* bottom_data = bottom[i]->gpu_data(); - int num_elem = - bottom[i]->channels() * bottom[i]->height() * bottom[i]->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data+bottom[i]->offset(n), - top_data + top[0]->offset(n, offset_channel)); - } - offset_channel += bottom[i]->channels(); - } - } else { - LOG(FATAL) << "concat_dim along dim" << concat_dim_ << - " not implemented yet"; + offset_concat_axis += bottom_concat_axis; } } @@ -40,34 +29,18 @@ template void ConcatLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->gpu_diff(); - if (concat_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_gpu_diff(); - caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num), - bottom_diff); - } - offset_num += blob->num(); - } - } else if (concat_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < bottom.size(); ++i) { - Blob* blob = bottom[i]; - if (propagate_down[i]) { - Dtype* bottom_diff = blob->mutable_gpu_diff(); - int num_elem = blob->channels()*blob->height()*blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel), - bottom_diff + blob->offset(n)); - } - } - offset_channel += blob->channels(); + int offset_concat_axis = 0; + const int top_concat_axis = top[0]->shape(concat_axis_); + for (int i = 0; i < bottom.size(); ++i) { + if (!propagate_down[i]) { continue; } + Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); + const int bottom_concat_axis = bottom[i]->shape(concat_axis_); + for (int n = 0; n < num_concats_; ++n) { + caffe_copy(bottom_concat_axis * concat_input_size_, top_diff + + (n * top_concat_axis + offset_concat_axis) * concat_input_size_, + bottom_diff + n * bottom_concat_axis * concat_input_size_); } - } else { - LOG(FATAL) << "concat_dim along dim" << concat_dim_ << - " not implemented yet"; + offset_concat_axis += bottom_concat_axis; } } diff --git a/src/caffe/layers/cudnn_softmax_layer.cpp b/src/caffe/layers/cudnn_softmax_layer.cpp index 83a5b69a626..211701cad49 100644 --- a/src/caffe/layers/cudnn_softmax_layer.cpp +++ b/src/caffe/layers/cudnn_softmax_layer.cpp @@ -26,10 +26,10 @@ template void CuDNNSoftmaxLayer::Reshape(const vector*>& bottom, const vector*>& top) { SoftmaxLayer::Reshape(bottom, top); - int N = bottom[0]->num(); - int K = bottom[0]->channels(); - int H = bottom[0]->height(); - int W = bottom[0]->width(); + int N = this->outer_num_; + int K = bottom[0]->shape(this->softmax_axis_); + int H = this->inner_num_; + int W = 1; cudnn::setTensor4dDesc(&bottom_desc_, N, K, H, W); cudnn::setTensor4dDesc(&top_desc_, N, K, H, W); } diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 1861090f953..0f2d66776a9 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -69,9 +69,9 @@ void DataLayer::DataLayerSetUp(const vector*>& bottom, << top[0]->width(); // label if (this->output_labels_) { - top[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1); - this->prefetch_label_.Reshape(this->layer_param_.data_param().batch_size(), - 1, 1, 1); + vector label_shape(1, this->layer_param_.data_param().batch_size()); + top[1]->Reshape(label_shape); + this->prefetch_label_.Reshape(label_shape); } } diff --git a/src/caffe/layers/dummy_data_layer.cpp b/src/caffe/layers/dummy_data_layer.cpp index d254eb1f961..6b0d617464c 100644 --- a/src/caffe/layers/dummy_data_layer.cpp +++ b/src/caffe/layers/dummy_data_layer.cpp @@ -16,18 +16,30 @@ void DummyDataLayer::LayerSetUp(const vector*>& bottom, num_data_filler == num_top) << "Number of data fillers must be 0, 1 or equal to the number of tops: " << num_top << "; you specified " << num_data_filler << " data fillers."; - CHECK(param.num_size() == 1 || param.num_size() == num_top) - << "Must specify either a single (1) 'num' or one for each top blob " - << "(" << num_top << "); you specified " << param.num_size() << "."; - CHECK(param.channels_size() == 1 || param.channels_size() == num_top) - << "Must specify either a single (1) 'channels' or one for each top blob " - << "(" << num_top << "); you specified " << param.channels_size() << "."; - CHECK(param.height_size() == 1 || param.height_size() == num_top) - << "Must specify either a single (1) 'height' or one for each top blob " - << "(" << num_top << "); you specified " << param.height_size() << "."; - CHECK(param.width_size() == 1 || param.width_size() == num_top) - << "Must specify either a single (1) 'width' or one for each top blob " - << "(" << num_top << "); you specified " << param.width_size() << "."; + + const bool legacy_dims = param.num_size() || param.channels_size() || + param.height_size() || param.width_size(); + if (legacy_dims) { + CHECK_EQ(0, param.shape_size()) + << "Both shape and legacy fields were specified"; + // Using deprecated 4D output dim specifiers. + CHECK(param.num_size() == 1 || param.num_size() == num_top) + << "Must specify 'num' once, or once per top blob " + << "(" << num_top << "); specified " << param.num_size() << "."; + CHECK(param.channels_size() == 1 || param.channels_size() == num_top) + << "Must specify 'channels' once, or once per top blob " + << "(" << num_top << "); specified " << param.channels_size() << "."; + CHECK(param.height_size() == 1 || param.height_size() == num_top) + << "Must specify 'height' once, or once per top blob " + << "(" << num_top << "); specified " << param.height_size() << "."; + CHECK(param.width_size() == 1 || param.width_size() == num_top) + << "Must specify 'width' once, or once per top blob " + << "(" << num_top << "); specified " << param.width_size() << "."; + } else { + CHECK(param.shape_size() == 1 || param.shape_size() == num_top) + << "Must specify 'shape' once, or once per top blob " + << "(" << num_top << "); specified " << param.shape_size() << "."; + } // refill_[i] tells Forward i whether or not to actually refill top Blob i. // If refill_[i] is false, Forward does nothing for Blob i. We use this to // avoid wastefully refilling "constant" Blobs in every forward pass. @@ -63,14 +75,19 @@ void DummyDataLayer::LayerSetUp(const vector*>& bottom, } } for (int i = 0; i < num_top; ++i) { - const int num = (param.num_size() == 1) ? param.num(0) : param.num(i); - const int channels = - (param.channels_size() == 1) ? param.channels(0) : param.channels(i); - const int height = - (param.height_size() == 1) ? param.height(0) : param.height(i); - const int width = - (param.width_size() == 1) ? param.width(0) : param.width(i); - top[i]->Reshape(num, channels, height, width); + if (legacy_dims) { + const int num = (param.num_size() == 1) ? param.num(0) : param.num(i); + const int channels = + (param.channels_size() == 1) ? param.channels(0) : param.channels(i); + const int height = + (param.height_size() == 1) ? param.height(0) : param.height(i); + const int width = + (param.width_size() == 1) ? param.width(0) : param.width(i); + top[i]->Reshape(num, channels, height, width); + } else { + const int shape_index = (param.shape_size() == 1) ? 0 : i; + top[i]->Reshape(param.shape(shape_index)); + } } // Run Forward once, with refill_ inverted, to fill the constant Blobs. this->Forward(bottom, top); diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp index bbc34449588..a80700736bd 100644 --- a/src/caffe/layers/eltwise_layer.cpp +++ b/src/caffe/layers/eltwise_layer.cpp @@ -31,21 +31,14 @@ void EltwiseLayer::LayerSetUp(const vector*>& bottom, template void EltwiseLayer::Reshape(const vector*>& bottom, const vector*>& top) { - const int num = bottom[0]->num(); - const int channels = bottom[0]->channels(); - const int height = bottom[0]->height(); - const int width = bottom[0]->width(); for (int i = 1; i < bottom.size(); ++i) { - CHECK_EQ(num, bottom[i]->num()); - CHECK_EQ(channels, bottom[i]->channels()); - CHECK_EQ(height, bottom[i]->height()); - CHECK_EQ(width, bottom[i]->width()); + CHECK(bottom[i]->shape() == bottom[0]->shape()); } - top[0]->Reshape(num, channels, height, width); + top[0]->ReshapeLike(*bottom[0]); // If max operation, we will initialize the vector index part. if (this->layer_param_.eltwise_param().operation() == EltwiseParameter_EltwiseOp_MAX && top.size() == 1) { - max_idx_.Reshape(bottom[0]->num(), channels, height, width); + max_idx_.Reshape(bottom[0]->shape()); } } diff --git a/src/caffe/layers/euclidean_loss_layer.cpp b/src/caffe/layers/euclidean_loss_layer.cpp index b539d3487f5..80efa31b22c 100644 --- a/src/caffe/layers/euclidean_loss_layer.cpp +++ b/src/caffe/layers/euclidean_loss_layer.cpp @@ -11,11 +11,9 @@ template void EuclideanLossLayer::Reshape( const vector*>& bottom, const vector*>& top) { LossLayer::Reshape(bottom, top); - CHECK_EQ(bottom[0]->channels(), bottom[1]->channels()); - CHECK_EQ(bottom[0]->height(), bottom[1]->height()); - CHECK_EQ(bottom[0]->width(), bottom[1]->width()); - diff_.Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); + CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1)) + << "Inputs must have the same dimension."; + diff_.ReshapeLike(*bottom[0]); } template diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp index eb7b42bc10b..745f271ea45 100644 --- a/src/caffe/layers/flatten_layer.cpp +++ b/src/caffe/layers/flatten_layer.cpp @@ -9,12 +9,11 @@ namespace caffe { template void FlattenLayer::Reshape(const vector*>& bottom, const vector*>& top) { - int channels_out = bottom[0]->channels() * bottom[0]->height() - * bottom[0]->width(); - top[0]->Reshape(bottom[0]->num(), channels_out, 1, 1); - count_ = bottom[0]->num() * channels_out; - CHECK_EQ(count_, bottom[0]->count()); - CHECK_EQ(count_, top[0]->count()); + vector top_shape(2); + top_shape[0] = bottom[0]->num(); + top_shape[1] = bottom[0]->count() / bottom[0]->num(); + top[0]->Reshape(top_shape); + CHECK_EQ(top[0]->count(), bottom[0]->count()); } template @@ -29,10 +28,6 @@ void FlattenLayer::Backward_cpu(const vector*>& top, bottom[0]->ShareDiff(*top[0]); } -#ifdef CPU_ONLY -STUB_GPU(FlattenLayer); -#endif - INSTANTIATE_CLASS(FlattenLayer); REGISTER_LAYER_CLASS(Flatten); diff --git a/src/caffe/layers/flatten_layer.cu b/src/caffe/layers/flatten_layer.cu deleted file mode 100644 index 42abdad4499..00000000000 --- a/src/caffe/layers/flatten_layer.cu +++ /dev/null @@ -1,23 +0,0 @@ -#include - -#include "caffe/layer.hpp" -#include "caffe/util/math_functions.hpp" -#include "caffe/vision_layers.hpp" - -namespace caffe { - -template -void FlattenLayer::Forward_gpu(const vector*>& bottom, - const vector*>& top) { - top[0]->ShareData(*bottom[0]); -} - -template -void FlattenLayer::Backward_gpu(const vector*>& top, - const vector& propagate_down, const vector*>& bottom) { - bottom[0]->ShareDiff(*top[0]); -} - -INSTANTIATE_LAYER_GPU_FUNCS(FlattenLayer); - -} // namespace caffe diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index 3d856ec3001..1ceb6c24431 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -36,7 +36,7 @@ void HDF5DataLayer::LoadHDF5FileData(const char* filename) { hdf_blobs_.resize(top_size); const int MIN_DATA_DIM = 1; - const int MAX_DATA_DIM = 4; + const int MAX_DATA_DIM = INT_MAX; for (int i = 0; i < top_size; ++i) { hdf_blobs_[i] = shared_ptr >(new Blob()); @@ -88,9 +88,14 @@ void HDF5DataLayer::LayerSetUp(const vector*>& bottom, // Reshape blobs. const int batch_size = this->layer_param_.hdf5_data_param().batch_size(); const int top_size = this->layer_param_.top_size(); + vector top_shape; for (int i = 0; i < top_size; ++i) { - top[i]->Reshape(batch_size, hdf_blobs_[i]->channels(), - hdf_blobs_[i]->height(), hdf_blobs_[i]->width()); + top_shape.resize(hdf_blobs_[i]->num_axes()); + top_shape[0] = batch_size; + for (int j = 1; j < top_shape.size(); ++j) { + top_shape[j] = hdf_blobs_[i]->shape(j); + } + top[i]->Reshape(top_shape); } } diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index 112226116c8..1c802714e33 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -50,6 +50,8 @@ void Im2colLayer::LayerSetUp(const vector*>& bottom, template void Im2colLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index f9046e1b3a1..38ebbd5ec14 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -81,8 +81,9 @@ void ImageDataLayer::DataLayerSetUp(const vector*>& bottom, << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label - top[1]->Reshape(batch_size, 1, 1, 1); - this->prefetch_label_.Reshape(batch_size, 1, 1, 1); + vector label_shape(1, batch_size); + top[1]->Reshape(label_shape); + this->prefetch_label_.Reshape(label_shape); } template diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp index b1ec6cb25c0..6b88724491b 100644 --- a/src/caffe/layers/inner_product_layer.cpp +++ b/src/caffe/layers/inner_product_layer.cpp @@ -15,7 +15,12 @@ void InnerProductLayer::LayerSetUp(const vector*>& bottom, const int num_output = this->layer_param_.inner_product_param().num_output(); bias_term_ = this->layer_param_.inner_product_param().bias_term(); N_ = num_output; - K_ = bottom[0]->count() / bottom[0]->num(); + const int axis = bottom[0]->CanonicalAxisIndex( + this->layer_param_.inner_product_param().axis()); + // Dimensions starting from "axis" are "flattened" into a single + // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), + // N inner products with dimension CHW are performed. + K_ = bottom[0]->count(axis); // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; @@ -26,14 +31,18 @@ void InnerProductLayer::LayerSetUp(const vector*>& bottom, this->blobs_.resize(1); } // Intialize the weight - this->blobs_[0].reset(new Blob(1, 1, N_, K_)); + vector weight_shape(2); + weight_shape[0] = N_; + weight_shape[1] = K_; + this->blobs_[0].reset(new Blob(weight_shape)); // fill the weights shared_ptr > weight_filler(GetFiller( this->layer_param_.inner_product_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // If necessary, intiialize and fill the bias term if (bias_term_) { - this->blobs_[1].reset(new Blob(1, 1, 1, N_)); + vector bias_shape(1, N_); + this->blobs_[1].reset(new Blob(bias_shape)); shared_ptr > bias_filler(GetFiller( this->layer_param_.inner_product_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); @@ -46,13 +55,24 @@ template void InnerProductLayer::Reshape(const vector*>& bottom, const vector*>& top) { // Figure out the dimensions - M_ = bottom[0]->num(); - CHECK_EQ(bottom[0]->count() / bottom[0]->num(), K_) << "Input size " - "incompatible with inner product parameters."; - top[0]->Reshape(bottom[0]->num(), N_, 1, 1); + const int axis = bottom[0]->CanonicalAxisIndex( + this->layer_param_.inner_product_param().axis()); + const int new_K = bottom[0]->count(axis); + CHECK_EQ(K_, new_K) + << "Input size incompatible with inner product parameters."; + // The first "axis" dimensions are independent inner products; the total + // number of these is M_, the product over these dimensions. + M_ = bottom[0]->count(0, axis); + // The top shape will be the bottom shape with the flattened axes dropped, + // and replaced by a single axis with dimension num_output (N_). + vector top_shape = bottom[0]->shape(); + top_shape.resize(axis + 1); + top_shape[axis] = N_; + top[0]->Reshape(top_shape); // Set up the bias multiplier if (bias_term_) { - bias_multiplier_.Reshape(1, 1, 1, M_); + vector bias_shape(1, M_); + bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); } } diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp index a5b6d11b065..3496a5c2a8a 100644 --- a/src/caffe/layers/loss_layer.cpp +++ b/src/caffe/layers/loss_layer.cpp @@ -24,7 +24,8 @@ void LossLayer::Reshape( const vector*>& bottom, const vector*>& top) { CHECK_EQ(bottom[0]->num(), bottom[1]->num()) << "The data and label should have the same number."; - top[0]->Reshape(1, 1, 1, 1); + vector loss_shape(0); // Loss layers output a scalar; 0 axes. + top[0]->Reshape(loss_shape); } INSTANTIATE_CLASS(LossLayer); diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp index 5e3e7c429ef..36c1ace4c99 100644 --- a/src/caffe/layers/lrn_layer.cpp +++ b/src/caffe/layers/lrn_layer.cpp @@ -69,6 +69,8 @@ void LRNLayer::LayerSetUp(const vector*>& bottom, template void LRNLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp index effdad90aff..42de4198bc4 100644 --- a/src/caffe/layers/memory_data_layer.cpp +++ b/src/caffe/layers/memory_data_layer.cpp @@ -19,10 +19,11 @@ void MemoryDataLayer::DataLayerSetUp(const vector*>& bottom, CHECK_GT(batch_size_ * size_, 0) << "batch_size, channels, height, and width must be specified and" " positive in memory_data_param"; + vector label_shape(1, batch_size_); top[0]->Reshape(batch_size_, channels_, height_, width_); - top[1]->Reshape(batch_size_, 1, 1, 1); + top[1]->Reshape(label_shape); added_data_.Reshape(batch_size_, channels_, height_, width_); - added_label_.Reshape(batch_size_, 1, 1, 1); + added_label_.Reshape(label_shape); data_ = NULL; labels_ = NULL; added_data_.cpu_data(); diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp index 6f4c69c861e..c8d41499455 100644 --- a/src/caffe/layers/pooling_layer.cpp +++ b/src/caffe/layers/pooling_layer.cpp @@ -81,6 +81,8 @@ void PoolingLayer::LayerSetUp(const vector*>& bottom, template void PoolingLayer::Reshape(const vector*>& bottom, const vector*>& top) { + CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " + << "corresponding to (num, channels, height, width)"; channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); diff --git a/src/caffe/layers/slice_layer.cpp b/src/caffe/layers/slice_layer.cpp index 46c3acd6513..e4418c9cf9c 100644 --- a/src/caffe/layers/slice_layer.cpp +++ b/src/caffe/layers/slice_layer.cpp @@ -11,9 +11,8 @@ template void SliceLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { const SliceParameter& slice_param = this->layer_param_.slice_param(); - slice_dim_ = slice_param.slice_dim(); - CHECK_GE(slice_dim_, 0); - CHECK_LE(slice_dim_, 1) << "Can only slice num and channels"; + CHECK(!(slice_param.has_axis() && slice_param.has_slice_dim())) + << "Either axis or slice_dim should be specified; not both."; slice_point_.clear(); std::copy(slice_param.slice_point().begin(), slice_param.slice_point().end(), @@ -23,18 +22,27 @@ void SliceLayer::LayerSetUp(const vector*>& bottom, template void SliceLayer::Reshape(const vector*>& bottom, const vector*>& top) { - count_ = 0; - num_ = bottom[0]->num(); - channels_ = bottom[0]->channels(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); + const int num_axes = bottom[0]->num_axes(); + const SliceParameter& slice_param = this->layer_param_.slice_param(); + if (slice_param.has_slice_dim()) { + slice_axis_ = static_cast(slice_param.slice_dim()); + // Don't allow negative indexing for slice_dim, a uint32 -- almost + // certainly unintended. + CHECK_GE(slice_axis_, 0) << "casting slice_dim from uint32 to int32 " + << "produced negative result; slice_dim must satisfy " + << "0 <= slice_dim < " << kMaxBlobAxes; + CHECK_LT(slice_axis_, num_axes) << "slice_dim out of range."; + } else { + slice_axis_ = bottom[0]->CanonicalAxisIndex(slice_param.axis()); + } + vector top_shape = bottom[0]->shape(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + num_slices_ = bottom[0]->count(0, slice_axis_); + slice_size_ = bottom[0]->count(slice_axis_ + 1); + int count = 0; if (slice_point_.size() != 0) { CHECK_EQ(slice_point_.size(), top.size() - 1); - if (slice_dim_ == 0) { - CHECK_LE(top.size(), num_); - } else { - CHECK_LE(top.size(), channels_); - } + CHECK_LE(top.size(), bottom_slice_axis); int prev = 0; vector slices; for (int i = 0; i < slice_point_.size(); ++i) { @@ -42,94 +50,64 @@ void SliceLayer::Reshape(const vector*>& bottom, slices.push_back(slice_point_[i] - prev); prev = slice_point_[i]; } - if (slice_dim_ == 0) { - slices.push_back(num_ - prev); - for (int i = 0; i < top.size(); ++i) { - top[i]->Reshape(slices[i], channels_, height_, width_); - count_ += top[i]->count(); - } - } else { - slices.push_back(channels_ - prev); - for (int i = 0; i < top.size(); ++i) { - top[i]->Reshape(num_, slices[i], height_, width_); - count_ += top[i]->count(); - } + slices.push_back(bottom_slice_axis - prev); + for (int i = 0; i < top.size(); ++i) { + top_shape[slice_axis_] = slices[i]; + top[i]->Reshape(top_shape); + count += top[i]->count(); } } else { - if (slice_dim_ == 0) { - CHECK_EQ(num_ % top.size(), 0) - << "Number of top blobs (" << top.size() << ") " - << "should evenly divide input num ( " << num_ << ")"; - num_ = num_ / top.size(); - } else { - CHECK_EQ(channels_ % top.size(), 0) - << "Number of top blobs (" << top.size() << ") " - << "should evenly divide input channels ( " << channels_ << ")"; - channels_ = channels_ / top.size(); - } + CHECK_EQ(bottom_slice_axis % top.size(), 0) + << "Number of top blobs (" << top.size() << ") should evenly " + << "divide input slice axis (" << bottom_slice_axis << ")"; + top_shape[slice_axis_] = bottom_slice_axis / top.size(); for (int i = 0; i < top.size(); ++i) { - top[i]->Reshape(num_, channels_, height_, width_); - count_ += top[i]->count(); + top[i]->Reshape(top_shape); + count += top[i]->count(); } } - CHECK_EQ(count_, bottom[0]->count()); + CHECK_EQ(count, bottom[0]->count()); } template void SliceLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { - const Dtype* bottom_data = bottom[0]->mutable_cpu_data(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_cpu_data(); - caffe_copy(blob->count(), bottom_data + bottom[0]->offset(offset_num), - top_data); - offset_num += blob->num(); + int offset_slice_axis = 0; + const Dtype* bottom_data = bottom[0]->cpu_data(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + Dtype* top_data = top[i]->mutable_cpu_data(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + bottom_data + bottom_offset, top_data + top_offset); } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_cpu_data(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data + bottom[0]->offset(n, offset_channel), - top_data + blob->offset(n)); - } - offset_channel += blob->channels(); - } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } template void SliceLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { if (!propagate_down[0]) { return; } + int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->cpu_diff(); - caffe_copy(blob->count(), top_diff, - bottom_diff + bottom[0]->offset(offset_num)); - offset_num += blob->num(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->cpu_diff(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + top_diff + top_offset, bottom_diff + bottom_offset); } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->cpu_diff(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + blob->offset(n), - bottom_diff + bottom[0]->offset(n, offset_channel)); - } - offset_channel += blob->channels(); - } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } #ifdef CPU_ONLY diff --git a/src/caffe/layers/slice_layer.cu b/src/caffe/layers/slice_layer.cu index b5c5e61533f..e6e65677bd8 100644 --- a/src/caffe/layers/slice_layer.cu +++ b/src/caffe/layers/slice_layer.cu @@ -9,58 +9,42 @@ namespace caffe { template void SliceLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { - const Dtype* bottom_data = bottom[0]->mutable_gpu_data(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_gpu_data(); - caffe_copy(blob->count(), bottom_data + bottom[0]->offset(offset_num), - top_data); - offset_num += blob->num(); + int offset_slice_axis = 0; + const Dtype* bottom_data = bottom[0]->gpu_data(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + Dtype* top_data = top[i]->mutable_gpu_data(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + bottom_data + bottom_offset, top_data + top_offset); } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - Dtype* top_data = blob->mutable_gpu_data(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, bottom_data + bottom[0]->offset(n, offset_channel), - top_data + blob->offset(n)); - } - offset_channel += blob->channels(); - } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } template void SliceLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { if (!propagate_down[0]) { return; } + int offset_slice_axis = 0; Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - if (slice_dim_ == 0) { - int offset_num = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->gpu_diff(); - caffe_copy(blob->count(), top_diff, - bottom_diff + bottom[0]->offset(offset_num)); - offset_num += blob->num(); - } - } else if (slice_dim_ == 1) { - int offset_channel = 0; - for (int i = 0; i < top.size(); ++i) { - Blob* blob = top[i]; - const Dtype* top_diff = blob->gpu_diff(); - const int num_elem = blob->channels() * blob->height() * blob->width(); - for (int n = 0; n < num_; ++n) { - caffe_copy(num_elem, top_diff + blob->offset(n), - bottom_diff + bottom[0]->offset(n, offset_channel)); - } - offset_channel += blob->channels(); + const int bottom_slice_axis = bottom[0]->shape(slice_axis_); + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->gpu_diff(); + const int top_slice_axis = top[i]->shape(slice_axis_); + for (int n = 0; n < num_slices_; ++n) { + const int top_offset = n * top_slice_axis * slice_size_; + const int bottom_offset = + (n * bottom_slice_axis + offset_slice_axis) * slice_size_; + caffe_copy(top_slice_axis * slice_size_, + top_diff + top_offset, bottom_diff + bottom_offset); } - } // slice_dim_ is guaranteed to be 0 or 1 by SetUp. + offset_slice_axis += top_slice_axis; + } } INSTANTIATE_LAYER_GPU_FUNCS(SliceLayer); diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp index 25142fdec53..04712c9e653 100644 --- a/src/caffe/layers/softmax_layer.cpp +++ b/src/caffe/layers/softmax_layer.cpp @@ -10,14 +10,18 @@ namespace caffe { template void SoftmaxLayer::Reshape(const vector*>& bottom, const vector*>& top) { - top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); - sum_multiplier_.Reshape(1, bottom[0]->channels(), 1, 1); + softmax_axis_ = + bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis()); + top[0]->ReshapeLike(*bottom[0]); + vector mult_dims(1, bottom[0]->shape(softmax_axis_)); + sum_multiplier_.Reshape(mult_dims); Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); - for (int i = 0; i < sum_multiplier_.count(); ++i) { - multiplier_data[i] = 1.; - } - scale_.Reshape(bottom[0]->num(), 1, bottom[0]->height(), bottom[0]->width()); + caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data); + outer_num_ = bottom[0]->count(0, softmax_axis_); + inner_num_ = bottom[0]->count(softmax_axis_ + 1); + vector scale_dims = bottom[0]->shape(); + scale_dims[softmax_axis_] = 1; + scale_.Reshape(scale_dims); } template @@ -26,34 +30,32 @@ void SoftmaxLayer::Forward_cpu(const vector*>& bottom, const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype* scale_data = scale_.mutable_cpu_data(); - int num = bottom[0]->num(); - int channels = bottom[0]->channels(); - int dim = bottom[0]->count() / bottom[0]->num(); - int spatial_dim = bottom[0]->height() * bottom[0]->width(); + int channels = bottom[0]->shape(softmax_axis_); + int dim = bottom[0]->count() / outer_num_; caffe_copy(bottom[0]->count(), bottom_data, top_data); // We need to subtract the max to avoid numerical issues, compute the exp, // and then normalize. - for (int i = 0; i < num; ++i) { + for (int i = 0; i < outer_num_; ++i) { // initialize scale_data to the first plane - caffe_copy(spatial_dim, bottom_data + i * dim, scale_data); + caffe_copy(inner_num_, bottom_data + i * dim, scale_data); for (int j = 0; j < channels; j++) { - for (int k = 0; k < spatial_dim; k++) { + for (int k = 0; k < inner_num_; k++) { scale_data[k] = std::max(scale_data[k], - bottom_data[i * dim + j * spatial_dim + k]); + bottom_data[i * dim + j * inner_num_ + k]); } } // subtraction - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, spatial_dim, - 1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data + i * dim); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, inner_num_, + 1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data); // exponentiation - caffe_exp(dim, top_data + i * dim, top_data + i * dim); + caffe_exp(dim, top_data, top_data); // sum after exp - caffe_cpu_gemv(CblasTrans, channels, spatial_dim, 1., - top_data + i * dim, sum_multiplier_.cpu_data(), 0., scale_data); + caffe_cpu_gemv(CblasTrans, channels, inner_num_, 1., + top_data, sum_multiplier_.cpu_data(), 0., scale_data); // division for (int j = 0; j < channels; j++) { - caffe_div(spatial_dim, top_data + top[0]->offset(i, j), scale_data, - top_data + top[0]->offset(i, j)); + caffe_div(inner_num_, top_data, scale_data, top_data); + top_data += inner_num_; } } } @@ -66,20 +68,18 @@ void SoftmaxLayer::Backward_cpu(const vector*>& top, const Dtype* top_data = top[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* scale_data = scale_.mutable_cpu_data(); - int num = top[0]->num(); - int channels = top[0]->channels(); - int dim = top[0]->count() / top[0]->num(); - int spatial_dim = top[0]->height() * top[0]->width(); + int channels = top[0]->shape(softmax_axis_); + int dim = top[0]->count() / outer_num_; caffe_copy(top[0]->count(), top_diff, bottom_diff); - for (int i = 0; i < num; ++i) { + for (int i = 0; i < outer_num_; ++i) { // compute dot(top_diff, top_data) and subtract them from the bottom diff - for (int k = 0; k < spatial_dim; ++k) { + for (int k = 0; k < inner_num_; ++k) { scale_data[k] = caffe_cpu_strided_dot(channels, - bottom_diff + i * dim + k, spatial_dim, - top_data + i * dim + k, spatial_dim); + bottom_diff + i * dim + k, inner_num_, + top_data + i * dim + k, inner_num_); } // subtraction - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, spatial_dim, 1, + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, channels, inner_num_, 1, -1., sum_multiplier_.cpu_data(), scale_data, 1., bottom_diff + i * dim); } // elementwise multiplication diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu index 6b8871a0b20..1f9c3a41203 100644 --- a/src/caffe/layers/softmax_layer.cu +++ b/src/caffe/layers/softmax_layer.cu @@ -90,36 +90,33 @@ void SoftmaxLayer::Forward_gpu(const vector*>& bottom, Dtype* top_data = top[0]->mutable_gpu_data(); Dtype* scale_data = scale_.mutable_gpu_data(); int count = bottom[0]->count(); - int num = bottom[0]->num(); - int channels = bottom[0]->channels(); - int spatial_dim = bottom[0]->height() * bottom[0]->width(); + int channels = top[0]->shape(softmax_axis_); caffe_copy(count, bottom_data, top_data); // We need to subtract the max to avoid numerical issues, compute the exp, // and then normalize. // compute max // NOLINT_NEXT_LINE(whitespace/operators) - kernel_channel_max<<>>(num, channels, spatial_dim, top_data, + kernel_channel_max<<>>(outer_num_, channels, inner_num_, top_data, scale_data); // subtract // NOLINT_NEXT_LINE(whitespace/operators) kernel_channel_subtract<<>>(count, num, channels, spatial_dim, + CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_, scale_data, top_data); // exponentiate // NOLINT_NEXT_LINE(whitespace/operators) - kernel_exp<<>>(num * channels * spatial_dim, top_data, - top_data); + kernel_exp<<>>( + count, top_data, top_data); // sum after exp // NOLINT_NEXT_LINE(whitespace/operators) - kernel_channel_sum<<>>(num, channels, spatial_dim, top_data, + kernel_channel_sum<<>>(outer_num_, channels, inner_num_, top_data, scale_data); // divide // NOLINT_NEXT_LINE(whitespace/operators) kernel_channel_div<<>>(count, num, channels, spatial_dim, + CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_, scale_data, top_data); } @@ -131,18 +128,16 @@ void SoftmaxLayer::Backward_gpu(const vector*>& top, Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); Dtype* scale_data = scale_.mutable_gpu_data(); int count = top[0]->count(); - int num = top[0]->num(); - int channels = top[0]->channels(); - int spatial_dim = top[0]->height() * top[0]->width(); - caffe_copy(top[0]->count(), top_diff, bottom_diff); + int channels = top[0]->shape(softmax_axis_); + caffe_copy(count, top_diff, bottom_diff); // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff. // NOLINT_NEXT_LINE(whitespace/operators) - kernel_channel_dot<<>>(num, channels, spatial_dim, top_diff, top_data, - scale_data); + kernel_channel_dot<<>>(outer_num_, channels, inner_num_, + top_diff, top_data, scale_data); // NOLINT_NEXT_LINE(whitespace/operators) kernel_channel_subtract<<>>(count, num, channels, spatial_dim, + CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_, scale_data, bottom_diff); // elementwise multiplication caffe_gpu_mul(top[0]->count(), bottom_diff, top_data, bottom_diff); diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index 0c9ba2c6626..132c30796a4 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -35,6 +35,14 @@ void SoftmaxWithLossLayer::Reshape( const vector*>& bottom, const vector*>& top) { LossLayer::Reshape(bottom, top); softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_); + softmax_axis_ = this->layer_param_.softmax_param().axis(); + outer_num_ = bottom[0]->count(0, softmax_axis_); + inner_num_ = bottom[0]->count(softmax_axis_ + 1); + CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) + << "Number of labels must match number of predictions; " + << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), " + << "label count (number of labels) must be N*H*W, " + << "with integer values in {0, 1, ..., C-1}."; if (top.size() >= 2) { // softmax output top[1]->ReshapeLike(*bottom[0]); @@ -48,20 +56,18 @@ void SoftmaxWithLossLayer::Forward_cpu( softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); - int num = prob_.num(); - int dim = prob_.count() / num; - int spatial_dim = prob_.height() * prob_.width(); + int dim = prob_.count() / outer_num_; int count = 0; Dtype loss = 0; - for (int i = 0; i < num; ++i) { - for (int j = 0; j < spatial_dim; j++) { - const int label_value = static_cast(label[i * spatial_dim + j]); + for (int i = 0; i < outer_num_; ++i) { + for (int j = 0; j < inner_num_; j++) { + const int label_value = static_cast(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); - DCHECK_LT(label_value, prob_.channels()); - loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j], + DCHECK_LT(label_value, prob_.shape(softmax_axis_)); + loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], Dtype(FLT_MIN))); ++count; } @@ -69,7 +75,7 @@ void SoftmaxWithLossLayer::Forward_cpu( if (normalize_) { top[0]->mutable_cpu_data()[0] = loss / count; } else { - top[0]->mutable_cpu_data()[0] = loss / num; + top[0]->mutable_cpu_data()[0] = loss / outer_num_; } if (top.size() == 2) { top[1]->ShareData(prob_); @@ -88,19 +94,17 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); - int num = prob_.num(); - int dim = prob_.count() / num; - int spatial_dim = prob_.height() * prob_.width(); + int dim = prob_.count() / outer_num_; int count = 0; - for (int i = 0; i < num; ++i) { - for (int j = 0; j < spatial_dim; ++j) { - const int label_value = static_cast(label[i * spatial_dim + j]); + for (int i = 0; i < outer_num_; ++i) { + for (int j = 0; j < inner_num_; ++j) { + const int label_value = static_cast(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { - for (int c = 0; c < bottom[0]->channels(); ++c) { - bottom_diff[i * dim + c * spatial_dim + j] = 0; + for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { + bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { - bottom_diff[i * dim + label_value * spatial_dim + j] -= 1; + bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; ++count; } } @@ -110,7 +114,7 @@ void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, if (normalize_) { caffe_scal(prob_.count(), loss_weight / count, bottom_diff); } else { - caffe_scal(prob_.count(), loss_weight / num, bottom_diff); + caffe_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); } } } diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu index 215d589ffee..7e0f3da4552 100644 --- a/src/caffe/layers/softmax_loss_layer.cu +++ b/src/caffe/layers/softmax_loss_layer.cu @@ -35,10 +35,8 @@ void SoftmaxWithLossLayer::Forward_gpu( softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.gpu_data(); const Dtype* label = bottom[1]->gpu_data(); - const int num = prob_.num(); - const int dim = prob_.count() / num; - const int spatial_dim = prob_.height() * prob_.width(); - const int nthreads = num * spatial_dim; + const int dim = prob_.count() / outer_num_; + const int nthreads = outer_num_ * inner_num_; // Since this memory is not used for anything until it is overwritten // on the backward pass, we use it here to avoid having to allocate new GPU // memory to accumulate intermediate results in the kernel. @@ -49,7 +47,7 @@ void SoftmaxWithLossLayer::Forward_gpu( // NOLINT_NEXT_LINE(whitespace/operators) SoftmaxLossForwardGPU<<>>(nthreads, prob_data, label, loss_data, - num, dim, spatial_dim, has_ignore_label_, ignore_label_, counts); + outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); Dtype loss; caffe_gpu_asum(nthreads, loss_data, &loss); if (normalize_) { @@ -57,7 +55,7 @@ void SoftmaxWithLossLayer::Forward_gpu( caffe_gpu_asum(nthreads, counts, &count); loss /= count; } else { - loss /= num; + loss /= outer_num_; } top[0]->mutable_cpu_data()[0] = loss; if (top.size() == 2) { @@ -102,24 +100,22 @@ void SoftmaxWithLossLayer::Backward_gpu(const vector*>& top, const Dtype* top_data = top[0]->gpu_data(); caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff); const Dtype* label = bottom[1]->gpu_data(); - const int num = prob_.num(); - const int dim = prob_.count() / num; - const int spatial_dim = prob_.height() * prob_.width(); - const int nthreads = num * spatial_dim; + const int dim = prob_.count() / outer_num_; + const int nthreads = outer_num_ * inner_num_; // Since this memory is never used for anything else, // we use to to avoid allocating new GPU memory. Dtype* counts = prob_.mutable_gpu_diff(); // NOLINT_NEXT_LINE(whitespace/operators) SoftmaxLossBackwardGPU<<>>(nthreads, top_data, label, bottom_diff, - num, dim, spatial_dim, has_ignore_label_, ignore_label_, counts); + outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); const Dtype loss_weight = top[0]->cpu_diff()[0]; if (normalize_) { Dtype count; caffe_gpu_asum(nthreads, counts, &count); caffe_gpu_scal(prob_.count(), loss_weight / count, bottom_diff); } else { - caffe_gpu_scal(prob_.count(), loss_weight / num, bottom_diff); + caffe_gpu_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); } } } diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp index d6929b99683..272cb59cd37 100644 --- a/src/caffe/layers/split_layer.cpp +++ b/src/caffe/layers/split_layer.cpp @@ -18,8 +18,7 @@ void SplitLayer::Reshape(const vector*>& bottom, // some strange effects in practice...) CHECK_NE(top[i], bottom[0]) << this->type() << " Layer does not " "allow in-place computation."; - top[i]->Reshape(bottom[0]->num(), bottom[0]->channels(), - bottom[0]->height(), bottom[0]->width()); + top[i]->ReshapeLike(*bottom[0]); CHECK_EQ(count_, top[i]->count()); } } diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 36e41560327..c127d56bc46 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -177,8 +177,9 @@ void WindowDataLayer::DataLayerSetUp(const vector*>& bottom, << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label - top[1]->Reshape(batch_size, 1, 1, 1); - this->prefetch_label_.Reshape(batch_size, 1, 1, 1); + vector label_shape(1, batch_size); + top[1]->Reshape(label_shape); + this->prefetch_label_.Reshape(label_shape); // data mean has_mean_file_ = this->transform_param_.has_mean_file(); diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index c359be9b575..e8f7c05e09d 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -48,8 +48,16 @@ void Net::Init(const NetParameter& in_param) { name_ = param.name(); map blob_name_to_idx; set available_blobs; - CHECK_EQ(param.input_size() * 4, param.input_dim_size()) - << "Incorrect input blob dimension specifications."; + CHECK(param.input_dim_size() == 0 || param.input_shape_size() == 0) + << "Must specify either input_shape OR deprecated input_dim, not both."; + if (param.input_dim_size() > 0) { + // Deprecated 4D dimensions. + CHECK_EQ(param.input_size() * 4, param.input_dim_size()) + << "Incorrect input blob dimension specifications."; + } else { + CHECK_EQ(param.input_size(), param.input_shape_size()) + << "Exactly one input_shape must be specified per input."; + } memory_used_ = 0; // set the input blobs for (int input_id = 0; input_id < param.input_size(); ++input_id) { @@ -109,11 +117,7 @@ void Net::Init(const NetParameter& in_param) { blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0)); } blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id); - LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->num() << " " - << top_vecs_[layer_id][top_id]->channels() << " " - << top_vecs_[layer_id][top_id]->height() << " " - << top_vecs_[layer_id][top_id]->width() << " (" - << top_vecs_[layer_id][top_id]->count() << ")"; + LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string(); if (layer->loss(top_id)) { LOG(INFO) << " with loss weight " << layer->loss(top_id); } @@ -343,10 +347,14 @@ void Net::AppendTop(const NetParameter& param, const int layer_id, if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; } if (layer_id == -1) { // Set the (explicitly specified) dimensions of the input blob. - blob_pointer->Reshape(param.input_dim(top_id * 4), - param.input_dim(top_id * 4 + 1), - param.input_dim(top_id * 4 + 2), - param.input_dim(top_id * 4 + 3)); + if (param.input_dim_size() > 0) { + blob_pointer->Reshape(param.input_dim(top_id * 4), + param.input_dim(top_id * 4 + 1), + param.input_dim(top_id * 4 + 2), + param.input_dim(top_id * 4 + 3)); + } else { + blob_pointer->Reshape(param.input_shape(top_id)); + } net_input_blob_indices_.push_back(blob_id); net_input_blobs_.push_back(blob_pointer.get()); } else { @@ -427,14 +435,7 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, << "Shared parameter blobs must have the same count."; } else { // Strict dimension checking -- all dims must be the same. - CHECK_EQ(this_blob->num(), owner_blob->num()) - << "Shared parameter blobs must have the same num."; - CHECK_EQ(this_blob->channels(), owner_blob->channels()) - << "Shared parameter blobs must have the same channels."; - CHECK_EQ(this_blob->height(), owner_blob->height()) - << "Shared parameter blobs must have the same height."; - CHECK_EQ(this_blob->width(), owner_blob->width()) - << "Shared parameter blobs must have the same width."; + CHECK(this_blob->shape() == owner_blob->shape()); } layers_[layer_id]->blobs()[param_id]->ShareData( *layers_[owner_layer_id]->blobs()[owner_param_id]); @@ -640,10 +641,7 @@ void Net::ShareTrainedLayersWith(const Net* other) { << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { Blob* source_blob = source_layer->blobs()[j].get(); - CHECK_EQ(target_blobs[j]->num(), source_blob->num()); - CHECK_EQ(target_blobs[j]->channels(), source_blob->channels()); - CHECK_EQ(target_blobs[j]->height(), source_blob->height()); - CHECK_EQ(target_blobs[j]->width(), source_blob->width()); + CHECK(target_blobs[j]->shape() == source_blob->shape()); target_blobs[j]->ShareData(*source_blob); } } @@ -707,11 +705,8 @@ void Net::CopyTrainedLayersFrom(const NetParameter& param) { CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { - CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num()); - CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels()); - CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height()); - CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width()); - target_blobs[j]->FromProto(source_layer.blobs(j)); + const bool kReshape = false; + target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); } } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 84b475ce3cd..3b4794664b5 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -2,13 +2,21 @@ syntax = "proto2"; package caffe; +// Specifies the shape (dimensions) of a Blob. +message BlobShape { + repeated int64 dim = 1 [packed = true]; +} + message BlobProto { + optional BlobShape shape = 7; + repeated float data = 5 [packed = true]; + repeated float diff = 6 [packed = true]; + + // 4D dimensions -- deprecated. Use "shape" instead. optional int32 num = 1 [default = 0]; optional int32 channels = 2 [default = 0]; optional int32 height = 3 [default = 0]; optional int32 width = 4 [default = 0]; - repeated float data = 5 [packed = true]; - repeated float diff = 6 [packed = true]; } // The BlobProtoVector is simply a way to pass multiple blobproto instances @@ -47,10 +55,15 @@ message NetParameter { optional string name = 1; // consider giving the network a name // The input blobs to the network. repeated string input = 3; - // The dim of the input blobs. For each input blob there should be four + // The shape of the input blobs. + repeated BlobShape input_shape = 8; + + // 4D input dimensions -- deprecated. Use "shape" instead. + // If specified, for each input blob there should be four // values specifying the num, channels, height and width of the input blob. // Thus, there should be a total of (4 * #input) numbers. repeated int32 input_dim = 4; + // Whether the network will force every layer to carry out backward operation. // If set False, then whether to carry out backward is determined // automatically according to the net structure and learning rates. @@ -365,9 +378,13 @@ message ArgMaxParameter { // Message that stores parameters used by ConcatLayer message ConcatParameter { - // Concat Layer needs to specify the dimension along the concat will happen, - // the other dimensions must be the same for all the bottom blobs - // By default it will concatenate blobs along channels dimension + // The axis along which to concatenate -- may be negative to index from the + // end (e.g., -1 for the last axis). Other axes must have the + // same dimension for all the bottom blobs. + // By default, ConcatLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 2 [default = 1]; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. optional uint32 concat_dim = 1 [default = 1]; } @@ -444,13 +461,15 @@ message DropoutParameter { // (or constant) data generated by "Fillers" (see "message FillerParameter"). message DummyDataParameter { // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N - // num, N channels, N height, and N width fields, and must specify 0, 1 or N - // data_fillers. + // shape fields, and 0, 1 or N data_fillers. // // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. // If 1 data_filler is specified, it is applied to all top blobs. If N are // specified, the ith is applied to the ith top blob. repeated FillerParameter data_filler = 1; + repeated BlobShape shape = 6; + + // 4D dimensions -- deprecated. Use "shape" instead. repeated uint32 num = 2; repeated uint32 channels = 3; repeated uint32 height = 4; @@ -548,6 +567,11 @@ message InnerProductParameter { optional bool bias_term = 2 [default = true]; // whether to have bias terms optional FillerParameter weight_filler = 3; // The filler for the weight optional FillerParameter bias_filler = 4; // The filler for the bias + + // The first axis to be lumped into a single inner product computation; + // all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 5 [default = 1]; } // Message that stores parameters used by LRNLayer @@ -652,12 +676,14 @@ message SigmoidParameter { // Message that stores parameters used by SliceLayer message SliceParameter { - // SliceLayer needs to know which dimension to slice across. - // Currently, SliceLayer only supports slicing across num (dim 0) - // and channels (dim 1). - // By default, SliceLayer slices across channels. - optional uint32 slice_dim = 1 [default = 1]; + // The axis along which to slice -- may be negative to index from the end + // (e.g., -1 for the last axis). + // By default, SliceLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 3 [default = 1]; repeated uint32 slice_point = 2; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 slice_dim = 1 [default = 1]; } // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer @@ -668,6 +694,11 @@ message SoftmaxParameter { CUDNN = 2; } optional Engine engine = 1 [default = DEFAULT]; + + // The axis along which to perform the softmax -- may be negative to index + // from the end (e.g., -1 for the last axis). + // Any other axes will be evaluated as independent softmaxes. + optional int32 axis = 2 [default = 1]; } // Message that stores parameters used by TanHLayer diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 8ed8aec2fc8..034390e6824 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -420,16 +420,10 @@ void SGDSolver::PreSolve() { update_.clear(); temp_.clear(); for (int i = 0; i < net_params.size(); ++i) { - const Blob* net_param = net_params[i].get(); - history_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); - update_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); - temp_.push_back(shared_ptr >(new Blob( - net_param->num(), net_param->channels(), net_param->height(), - net_param->width()))); + const vector& shape = net_params[i]->shape(); + history_.push_back(shared_ptr >(new Blob(shape))); + update_.push_back(shared_ptr >(new Blob(shape))); + temp_.push_back(shared_ptr >(new Blob(shape))); } } diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp index fa59fab1e8a..1c58b767bfc 100644 --- a/src/caffe/test/test_accuracy_layer.cpp +++ b/src/caffe/test/test_accuracy_layer.cpp @@ -19,10 +19,16 @@ template class AccuracyLayerTest : public ::testing::Test { protected: AccuracyLayerTest() - : blob_bottom_data_(new Blob(100, 10, 1, 1)), - blob_bottom_label_(new Blob(100, 1, 1, 1)), + : blob_bottom_data_(new Blob()), + blob_bottom_label_(new Blob()), blob_top_(new Blob()), top_k_(3) { + vector shape(2); + shape[0] = 100; + shape[1] = 10; + blob_bottom_data_->Reshape(shape); + shape.resize(1); + blob_bottom_label_->Reshape(shape); // fill the probability values FillerParameter filler_param; GaussianFiller filler(filler_param); diff --git a/src/caffe/test/test_blob.cpp b/src/caffe/test/test_blob.cpp index e0678061173..7da6423b67c 100644 --- a/src/caffe/test/test_blob.cpp +++ b/src/caffe/test/test_blob.cpp @@ -1,4 +1,5 @@ #include +#include #include "gtest/gtest.h" @@ -31,10 +32,7 @@ TYPED_TEST(BlobSimpleTest, TestInitialization) { EXPECT_EQ(this->blob_preshaped_->height(), 4); EXPECT_EQ(this->blob_preshaped_->width(), 5); EXPECT_EQ(this->blob_preshaped_->count(), 120); - EXPECT_EQ(this->blob_->num(), 0); - EXPECT_EQ(this->blob_->channels(), 0); - EXPECT_EQ(this->blob_->height(), 0); - EXPECT_EQ(this->blob_->width(), 0); + EXPECT_EQ(this->blob_->num_axes(), 0); EXPECT_EQ(this->blob_->count(), 0); } @@ -54,6 +52,59 @@ TYPED_TEST(BlobSimpleTest, TestReshape) { EXPECT_EQ(this->blob_->count(), 120); } +TYPED_TEST(BlobSimpleTest, TestLegacyBlobProtoShapeEquals) { + BlobProto blob_proto; + + // Reshape to (3 x 2). + vector shape(2); + shape[0] = 3; + shape[1] = 2; + this->blob_->Reshape(shape); + + // (3 x 2) blob == (1 x 1 x 3 x 2) legacy blob + blob_proto.set_num(1); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_TRUE(this->blob_->ShapeEquals(blob_proto)); + + // (3 x 2) blob != (0 x 1 x 3 x 2) legacy blob + blob_proto.set_num(0); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto)); + + // (3 x 2) blob != (3 x 1 x 3 x 2) legacy blob + blob_proto.set_num(3); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto)); + + // Reshape to (1 x 3 x 2). + shape.insert(shape.begin(), 1); + this->blob_->Reshape(shape); + + // (1 x 3 x 2) blob == (1 x 1 x 3 x 2) legacy blob + blob_proto.set_num(1); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_TRUE(this->blob_->ShapeEquals(blob_proto)); + + // Reshape to (2 x 3 x 2). + shape[0] = 2; + this->blob_->Reshape(shape); + + // (2 x 3 x 2) blob != (1 x 1 x 3 x 2) legacy blob + blob_proto.set_num(1); + blob_proto.set_channels(1); + blob_proto.set_height(3); + blob_proto.set_width(2); + EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto)); +} + template class BlobMathTest : public MultiDeviceTest { typedef typename TypeParam::Dtype Dtype; diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp index f14f1d2fa4f..662a50fa23b 100644 --- a/src/caffe/test/test_concat_layer.cpp +++ b/src/caffe/test/test_concat_layer.cpp @@ -19,9 +19,9 @@ class ConcatLayerTest : public MultiDeviceTest { protected: ConcatLayerTest() - : blob_bottom_0(new Blob(2, 3, 6, 5)), - blob_bottom_1(new Blob(2, 5, 6, 5)), - blob_bottom_2(new Blob(5, 3, 6, 5)), + : blob_bottom_0_(new Blob(2, 3, 6, 5)), + blob_bottom_1_(new Blob(2, 5, 6, 5)), + blob_bottom_2_(new Blob(5, 3, 6, 5)), blob_top_(new Blob()) {} virtual void SetUp() { // fill the values @@ -29,30 +29,30 @@ class ConcatLayerTest : public MultiDeviceTest { FillerParameter filler_param; filler_param.set_value(1.); filler.reset(new ConstantFiller(filler_param)); - filler->Fill(this->blob_bottom_0); + filler->Fill(this->blob_bottom_0_); filler_param.set_value(2.); filler.reset(new ConstantFiller(filler_param)); - filler->Fill(this->blob_bottom_1); + filler->Fill(this->blob_bottom_1_); filler_param.set_value(3.); filler.reset(new ConstantFiller(filler_param)); - filler->Fill(this->blob_bottom_2); - blob_bottom_vec_0.push_back(blob_bottom_0); - blob_bottom_vec_0.push_back(blob_bottom_1); - blob_bottom_vec_1.push_back(blob_bottom_0); - blob_bottom_vec_1.push_back(blob_bottom_2); + filler->Fill(this->blob_bottom_2_); + blob_bottom_vec_0_.push_back(blob_bottom_0_); + blob_bottom_vec_0_.push_back(blob_bottom_1_); + blob_bottom_vec_1_.push_back(blob_bottom_0_); + blob_bottom_vec_1_.push_back(blob_bottom_2_); blob_top_vec_.push_back(blob_top_); } virtual ~ConcatLayerTest() { - delete blob_bottom_0; delete blob_bottom_1; - delete blob_bottom_2; delete blob_top_; + delete blob_bottom_0_; delete blob_bottom_1_; + delete blob_bottom_2_; delete blob_top_; } - Blob* const blob_bottom_0; - Blob* const blob_bottom_1; - Blob* const blob_bottom_2; + Blob* const blob_bottom_0_; + Blob* const blob_bottom_1_; + Blob* const blob_bottom_2_; Blob* const blob_top_; - vector*> blob_bottom_vec_0, blob_bottom_vec_1; + vector*> blob_bottom_vec_0_, blob_bottom_vec_1_; vector*> blob_top_vec_; }; @@ -61,61 +61,115 @@ TYPED_TEST_CASE(ConcatLayerTest, TestDtypesAndDevices); TYPED_TEST(ConcatLayerTest, TestSetupNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.mutable_concat_param()->set_concat_dim(0); + layer_param.mutable_concat_param()->set_axis(0); ConcatLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_1, this->blob_top_vec_); + layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_); EXPECT_EQ(this->blob_top_->num(), - this->blob_bottom_0->num() + this->blob_bottom_2->num()); - EXPECT_EQ(this->blob_top_->channels(), this->blob_bottom_0->channels()); - EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0->height()); - EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0->width()); + this->blob_bottom_0_->num() + this->blob_bottom_2_->num()); + EXPECT_EQ(this->blob_top_->channels(), this->blob_bottom_0_->channels()); + EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height()); + EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); } TYPED_TEST(ConcatLayerTest, TestSetupChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConcatLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_0, this->blob_top_vec_); - EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0->num()); + layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0_->num()); EXPECT_EQ(this->blob_top_->channels(), - this->blob_bottom_0->channels()+this->blob_bottom_1->channels()); - EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0->height()); - EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0->width()); + this->blob_bottom_0_->channels() + this->blob_bottom_1_->channels()); + EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height()); + EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); } +TYPED_TEST(ConcatLayerTest, TestSetupChannelsNegativeIndexing) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConcatLayer layer(layer_param); + // "channels" index is the third one from the end -- test negative indexing + // by setting axis to -3 and checking that we get the same results as above in + // TestSetupChannels. + layer_param.mutable_concat_param()->set_axis(-3); + layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0_->num()); + EXPECT_EQ(this->blob_top_->channels(), + this->blob_bottom_0_->channels() + this->blob_bottom_1_->channels()); + EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height()); + EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width()); +} + +TYPED_TEST(ConcatLayerTest, TestForwardNum) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_concat_param()->set_axis(0); + ConcatLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_1_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_vec_1_[0]->num(); ++n) { + for (int c = 0; c < this->blob_top_->channels(); ++c) { + for (int h = 0; h < this->blob_top_->height(); ++h) { + for (int w = 0; w < this->blob_top_->width(); ++w) { + EXPECT_EQ(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_vec_1_[0]->data_at(n, c, h, w)); + } + } + } + } + for (int n = 0; n < this->blob_bottom_vec_1_[1]->num(); ++n) { + for (int c = 0; c < this->blob_top_->channels(); ++c) { + for (int h = 0; h < this->blob_top_->height(); ++h) { + for (int w = 0; w < this->blob_top_->width(); ++w) { + EXPECT_EQ(this->blob_top_->data_at(n + 2, c, h, w), + this->blob_bottom_vec_1_[1]->data_at(n, c, h, w)); + } + } + } + } +} -TYPED_TEST(ConcatLayerTest, TestNum) { +TYPED_TEST(ConcatLayerTest, TestForwardChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConcatLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_0, this->blob_top_vec_); - layer.Forward(this->blob_bottom_vec_0, this->blob_top_vec_); + layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_0_, this->blob_top_vec_); for (int n = 0; n < this->blob_top_->num(); ++n) { - for (int c = 0; c < this->blob_bottom_0->channels(); ++c) { + for (int c = 0; c < this->blob_bottom_0_->channels(); ++c) { for (int h = 0; h < this->blob_top_->height(); ++h) { for (int w = 0; w < this->blob_top_->width(); ++w) { EXPECT_EQ(this->blob_top_->data_at(n, c, h, w), - this->blob_bottom_vec_0[0]->data_at(n, c, h, w)); + this->blob_bottom_vec_0_[0]->data_at(n, c, h, w)); } } } - for (int c = 0; c < this->blob_bottom_1->channels(); ++c) { + for (int c = 0; c < this->blob_bottom_1_->channels(); ++c) { for (int h = 0; h < this->blob_top_->height(); ++h) { for (int w = 0; w < this->blob_top_->width(); ++w) { - EXPECT_EQ(this->blob_top_->data_at(n, c+3, h, w), - this->blob_bottom_vec_0[1]->data_at(n, c, h, w)); + EXPECT_EQ(this->blob_top_->data_at(n, c + 3, h, w), + this->blob_bottom_vec_0_[1]->data_at(n, c, h, w)); } } } } } -TYPED_TEST(ConcatLayerTest, TestGradient) { +TYPED_TEST(ConcatLayerTest, TestGradientNum) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + layer_param.mutable_concat_param()->set_axis(0); + ConcatLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradient(&layer, this->blob_bottom_vec_1_, + this->blob_top_vec_); +} + +TYPED_TEST(ConcatLayerTest, TestGradientChannels) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; ConcatLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradient(&layer, this->blob_bottom_vec_0, + checker.CheckGradient(&layer, this->blob_bottom_vec_0_, this->blob_top_vec_); } diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp index 8d3b3d1e987..c9b027f88cf 100644 --- a/src/caffe/test/test_hdf5data_layer.cpp +++ b/src/caffe/test/test_hdf5data_layer.cpp @@ -77,15 +77,13 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) { EXPECT_EQ(this->blob_top_data_->height(), height); EXPECT_EQ(this->blob_top_data_->width(), width); - EXPECT_EQ(this->blob_top_label_->num(), batch_size); - EXPECT_EQ(this->blob_top_label_->channels(), 1); - EXPECT_EQ(this->blob_top_label_->height(), 1); - EXPECT_EQ(this->blob_top_label_->width(), 1); - - EXPECT_EQ(this->blob_top_label2_->num(), batch_size); - EXPECT_EQ(this->blob_top_label2_->channels(), 1); - EXPECT_EQ(this->blob_top_label2_->height(), 1); - EXPECT_EQ(this->blob_top_label2_->width(), 1); + EXPECT_EQ(this->blob_top_label_->num_axes(), 2); + EXPECT_EQ(this->blob_top_label_->shape(0), batch_size); + EXPECT_EQ(this->blob_top_label_->shape(1), 1); + + EXPECT_EQ(this->blob_top_label2_->num_axes(), 2); + EXPECT_EQ(this->blob_top_label2_->shape(0), batch_size); + EXPECT_EQ(this->blob_top_label2_->shape(1), 1); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 1680a3f28d5..08106e79274 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -63,18 +63,19 @@ class NetTest : public MultiDeviceTest { " name: 'data' " " type: 'DummyData' " " dummy_data_param { " - " num: 5 " - " channels: 2 " - " height: 3 " - " width: 4 " - " num: 5 " - " channels: 1 " - " height: 1 " - " width: 1 " + " shape { " + " dim: 5 " + " dim: 2 " + " dim: 3 " + " dim: 4 " + " } " " data_filler { " " type: 'gaussian' " " std: 0.01 " " } " + " shape { " + " dim: 5 " + " } " " data_filler { " " type: 'constant' " " value: 0 " diff --git a/src/caffe/test/test_slice_layer.cpp b/src/caffe/test/test_slice_layer.cpp index 395be280089..ccd03646d19 100644 --- a/src/caffe/test/test_slice_layer.cpp +++ b/src/caffe/test/test_slice_layer.cpp @@ -62,7 +62,7 @@ TYPED_TEST_CASE(SliceLayerTest, TestDtypesAndDevices); TYPED_TEST(SliceLayerTest, TestSetupNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.mutable_slice_param()->set_slice_dim(0); + layer_param.mutable_slice_param()->set_axis(0); SliceLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_1_); EXPECT_EQ(this->blob_bottom_->num(), 3 * this->blob_top_0_->num()); @@ -91,7 +91,7 @@ TYPED_TEST(SliceLayerTest, TestSetupChannels) { TYPED_TEST(SliceLayerTest, TestSliceAcrossNum) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; - layer_param.mutable_slice_param()->set_slice_dim(0); + layer_param.mutable_slice_param()->set_axis(0); SliceLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_0_); const int top_num = this->blob_bottom_->num() / 2; @@ -166,7 +166,7 @@ TYPED_TEST(SliceLayerTest, TestGradientAcrossNum) { // Gradient checks are slow; reduce blob size. this->ReduceBottomBlobSize(); LayerParameter layer_param; - layer_param.mutable_slice_param()->set_slice_dim(0); + layer_param.mutable_slice_param()->set_axis(0); SliceLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, diff --git a/src/caffe/test/test_solver.cpp b/src/caffe/test/test_solver.cpp index 1c2c9bbb740..ceabc9cdd2c 100644 --- a/src/caffe/test/test_solver.cpp +++ b/src/caffe/test/test_solver.cpp @@ -55,14 +55,15 @@ TYPED_TEST(SolverTest, TestInitTrainTestNets) { " name: 'data' " " type: 'DummyData' " " dummy_data_param { " - " num: 5 " - " channels: 3 " - " height: 10 " - " width: 10 " - " num: 5 " - " channels: 1 " - " height: 1 " - " width: 1 " + " shape { " + " dim: 5 " + " dim: 2 " + " dim: 3 " + " dim: 4 " + " } " + " shape { " + " dim: 5 " + " } " " } " " top: 'data' " " top: 'label' " diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index b243a9804ec..77ef7f257f4 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -252,11 +252,11 @@ void hdf5_load_nd_dataset_helper( CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_; CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data"; - blob->Reshape( - dims[0], - (dims.size() > 1) ? dims[1] : 1, - (dims.size() > 2) ? dims[2] : 1, - (dims.size() > 3) ? dims[3] : 1); + vector blob_dims(dims.size()); + for (int i = 0; i < dims.size(); ++i) { + blob_dims[i] = dims[i]; + } + blob->Reshape(blob_dims); } template <>