diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 1f154408c27..2bb9d948169 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -171,6 +171,8 @@ class HDF5DataLayer : public Layer { unsigned int current_file_; hsize_t current_row_; std::vector > > hdf_blobs_; + std::vector data_permutation_; + std::vector file_permutation_; }; /** diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index 1ceb6c24431..1d3dc1e62ac 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -14,9 +14,9 @@ #include "hdf5_hl.h" #include "stdint.h" +#include "caffe/data_layers.hpp" #include "caffe/layer.hpp" #include "caffe/util/io.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { @@ -48,11 +48,25 @@ void HDF5DataLayer::LoadHDF5FileData(const char* filename) { CHECK_GE(status, 0) << "Failed to close HDF5 file: " << filename; // MinTopBlobs==1 guarantees at least one top blob - int num = hdf_blobs_[0]->num(); + CHECK_GE(hdf_blobs_[0]->num_axes(), 1) << "Input must have at least 1 axis."; + const int num = hdf_blobs_[0]->shape(0); for (int i = 1; i < top_size; ++i) { - CHECK_EQ(hdf_blobs_[i]->num(), num); + CHECK_EQ(hdf_blobs_[i]->shape(0), num); + } + // Default to identity permutation. + data_permutation_.clear(); + data_permutation_.resize(hdf_blobs_[0]->shape(0)); + for (int i = 0; i < hdf_blobs_[0]->shape(0); i++) + data_permutation_[i] = i; + + // Shuffle if needed. + if (this->layer_param_.hdf5_data_param().shuffle()) { + std::random_shuffle(data_permutation_.begin(), data_permutation_.end()); + DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) + << " rows (shuffled)"; + } else { + DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows"; } - DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->num() << " rows"; } template @@ -81,9 +95,24 @@ void HDF5DataLayer::LayerSetUp(const vector*>& bottom, CHECK_GE(num_files_, 1) << "Must have at least 1 HDF5 filename listed in " << source; + file_permutation_.clear(); + file_permutation_.resize(num_files_); + // Default to identity permutation. + for (int i = 0; i < num_files_; i++) { + file_permutation_[i] = i; + } + + // Shuffle if needed. + if (this->layer_param_.hdf5_data_param().shuffle()) { + std::random_shuffle(file_permutation_.begin(), file_permutation_.end()); + } + // Load the first HDF5 file and initialize the line counter. - LoadHDF5FileData(hdf_filenames_[current_file_].c_str()); + LoadHDF5FileData(hdf_filenames_[file_permutation_[current_file_]].c_str()); current_row_ = 0; + if (this->layer_param_.hdf5_data_param().shuffle()) { + std::random_shuffle(file_permutation_.begin(), file_permutation_.end()); + } // Reshape blobs. const int batch_size = this->layer_param_.hdf5_data_param().batch_size(); @@ -104,22 +133,29 @@ void HDF5DataLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { const int batch_size = this->layer_param_.hdf5_data_param().batch_size(); for (int i = 0; i < batch_size; ++i, ++current_row_) { - if (current_row_ == hdf_blobs_[0]->num()) { + if (current_row_ == hdf_blobs_[0]->shape(0)) { if (num_files_ > 1) { ++current_file_; if (current_file_ == num_files_) { current_file_ = 0; + if (this->layer_param_.hdf5_data_param().shuffle()) { + std::random_shuffle(file_permutation_.begin(), + file_permutation_.end()); + } DLOG(INFO) << "Looping around to first file."; } - LoadHDF5FileData(hdf_filenames_[current_file_].c_str()); + LoadHDF5FileData( + hdf_filenames_[file_permutation_[current_file_]].c_str()); } current_row_ = 0; + if (this->layer_param_.hdf5_data_param().shuffle()) + std::random_shuffle(data_permutation_.begin(), data_permutation_.end()); } for (int j = 0; j < this->layer_param_.top_size(); ++j) { - int data_dim = top[j]->count() / top[j]->num(); + int data_dim = top[j]->count() / top[j]->shape(0); caffe_copy(data_dim, - &hdf_blobs_[j]->cpu_data()[current_row_ * data_dim], - &top[j]->mutable_cpu_data()[i * data_dim]); + &hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_] + * data_dim], &top[j]->mutable_cpu_data()[i * data_dim]); } } } diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu index 02e3821d104..5e3e4ced141 100644 --- a/src/caffe/layers/hdf5_data_layer.cu +++ b/src/caffe/layers/hdf5_data_layer.cu @@ -10,9 +10,9 @@ TODO: #include "hdf5.h" #include "hdf5_hl.h" +#include "caffe/data_layers.hpp" #include "caffe/layer.hpp" #include "caffe/util/io.hpp" -#include "caffe/vision_layers.hpp" namespace caffe { @@ -21,22 +21,29 @@ void HDF5DataLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { const int batch_size = this->layer_param_.hdf5_data_param().batch_size(); for (int i = 0; i < batch_size; ++i, ++current_row_) { - if (current_row_ == hdf_blobs_[0]->num()) { + if (current_row_ == hdf_blobs_[0]->shape(0)) { if (num_files_ > 1) { current_file_ += 1; if (current_file_ == num_files_) { current_file_ = 0; + if (this->layer_param_.hdf5_data_param().shuffle()) { + std::random_shuffle(file_permutation_.begin(), + file_permutation_.end()); + } DLOG(INFO) << "Looping around to first file."; } - LoadHDF5FileData(hdf_filenames_[current_file_].c_str()); + LoadHDF5FileData( + hdf_filenames_[file_permutation_[current_file_]].c_str()); } current_row_ = 0; + if (this->layer_param_.hdf5_data_param().shuffle()) + std::random_shuffle(data_permutation_.begin(), data_permutation_.end()); } for (int j = 0; j < this->layer_param_.top_size(); ++j) { - int data_dim = top[j]->count() / top[j]->num(); + int data_dim = top[j]->count() / top[j]->shape(0); caffe_copy(data_dim, - &hdf_blobs_[j]->cpu_data()[current_row_ * data_dim], - &top[j]->mutable_gpu_data()[i * data_dim]); + &hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_] + * data_dim], &top[j]->mutable_gpu_data()[i * data_dim]); } } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 888371dc778..5b21cf20028 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -518,6 +518,13 @@ message HDF5DataParameter { optional string source = 1; // Specify the batch size. optional uint32 batch_size = 2; + + // Specify whether to shuffle the data. + // If shuffle == true, the ordering of the HDF5 files is shuffled, + // and the ordering of data within any given HDF5 file is shuffled, + // but data between different files are not interleaved; all of a file's + // data are output (in a random order) before moving onto another file. + optional bool shuffle = 3 [default = false]; } // Message that stores parameters used by HDF5OutputLayer