From 52465873d08cfd58a4c84ee8b239908814de4f39 Mon Sep 17 00:00:00 2001 From: philkr Date: Mon, 16 Feb 2015 11:42:22 -0800 Subject: [PATCH] Cleaning up the encoded flag. Allowing any image (cropped or gray scale) to be encoded. Allowing for a change in encoded (jpg -> png vice versa) and cleaning up some unused functions. --- include/caffe/util/io.hpp | 36 ++++++++------------ src/caffe/layers/data_layer.cpp | 4 +-- src/caffe/layers/window_data_layer.cpp | 2 +- src/caffe/test/test_io.cpp | 31 +++--------------- src/caffe/util/io.cpp | 60 ++++++++++++++++++---------------- tools/compute_image_mean.cpp | 4 +-- tools/convert_imageset.cpp | 30 ++++++++++------- 7 files changed, 72 insertions(+), 95 deletions(-) diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index a01f917abf5..9d7540d62cc 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -92,7 +92,14 @@ inline bool ReadFileToDatum(const string& filename, Datum* datum) { } bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, const bool is_color, Datum* datum); + const int height, const int width, const bool is_color, + const std::string & encoding, Datum* datum); + +inline bool ReadImageToDatum(const string& filename, const int label, + const int height, const int width, const bool is_color, Datum* datum) { + return ReadImageToDatum(filename, label, height, width, is_color, + "", datum); +} inline bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, Datum* datum) { @@ -109,20 +116,12 @@ inline bool ReadImageToDatum(const string& filename, const int label, return ReadImageToDatum(filename, label, 0, 0, true, datum); } -bool DecodeDatum(const int height, const int width, const bool is_color, - Datum* datum); - -inline bool DecodeDatum(const int height, const int width, Datum* datum) { - return DecodeDatum(height, width, true, datum); -} - -inline bool DecodeDatum(const bool is_color, Datum* datum) { - return DecodeDatum(0, 0, is_color, datum); +inline bool ReadImageToDatum(const string& filename, const int label, + const std::string & encoding, Datum* datum) { + return ReadImageToDatum(filename, label, 0, 0, true, encoding, datum); } -inline bool DecodeDatum(Datum* datum) { - return DecodeDatum(0, 0, true, datum); -} +bool DecodeDatumNative(Datum* datum); cv::Mat ReadImageToCVMat(const string& filename, const int height, const int width, const bool is_color); @@ -135,16 +134,7 @@ cv::Mat ReadImageToCVMat(const string& filename, cv::Mat ReadImageToCVMat(const string& filename); -cv::Mat DecodeDatumToCVMat(const Datum& datum, - const int height, const int width, const bool is_color); - -cv::Mat DecodeDatumToCVMat(const Datum& datum, - const int height, const int width); - -cv::Mat DecodeDatumToCVMat(const Datum& datum, - const bool is_color); - -cv::Mat DecodeDatumToCVMat(const Datum& datum); +cv::Mat DecodeDatumToCVMatNative(const Datum& datum); void CVMatToDatum(const cv::Mat& cv_img, Datum* datum); diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 227db201759..891d03911c2 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -42,7 +42,7 @@ void DataLayer::DataLayerSetUp(const vector*>& bottom, Datum datum; datum.ParseFromString(cursor_->value()); - if (DecodeDatum(&datum)) { + if (DecodeDatumNative(&datum)) { LOG(INFO) << "Decoding Datum"; } // image @@ -98,7 +98,7 @@ void DataLayer::InternalThreadEntry() { cv::Mat cv_img; if (datum.encoded()) { - cv_img = DecodeDatumToCVMat(datum); + cv_img = DecodeDatumToCVMatNative(datum); } read_time += timer.MicroSeconds(); timer.Start(); diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 23ec83d166b..cceb4ffb1b5 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -281,7 +281,7 @@ void WindowDataLayer::InternalThreadEntry() { if (this->cache_images_) { pair image_cached = image_database_cache_[window[WindowDataLayer::IMAGE_INDEX]]; - cv_img = DecodeDatumToCVMat(image_cached.second); + cv_img = DecodeDatumToCVMatNative(image_cached.second); } else { cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR); if (!cv_img.data) { diff --git a/src/caffe/test/test_io.cpp b/src/caffe/test/test_io.cpp index 4d941fa8683..6b135efcf6e 100644 --- a/src/caffe/test/test_io.cpp +++ b/src/caffe/test/test_io.cpp @@ -289,8 +289,8 @@ TEST_F(IOTest, TestDecodeDatum) { string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg"; Datum datum; EXPECT_TRUE(ReadFileToDatum(filename, &datum)); - EXPECT_TRUE(DecodeDatum(&datum)); - EXPECT_FALSE(DecodeDatum(&datum)); + EXPECT_TRUE(DecodeDatumNative(&datum)); + EXPECT_FALSE(DecodeDatumNative(&datum)); Datum datum_ref; ReadImageToDatumReference(filename, 0, 0, 0, true, &datum_ref); EXPECT_EQ(datum.channels(), datum_ref.channels()); @@ -309,38 +309,17 @@ TEST_F(IOTest, TestDecodeDatumToCVMat) { string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg"; Datum datum; EXPECT_TRUE(ReadFileToDatum(filename, &datum)); - cv::Mat cv_img = DecodeDatumToCVMat(datum); + cv::Mat cv_img = DecodeDatumToCVMatNative(datum); EXPECT_EQ(cv_img.channels(), 3); EXPECT_EQ(cv_img.rows, 360); EXPECT_EQ(cv_img.cols, 480); } -TEST_F(IOTest, TestDecodeDatumToCVMatResized) { - string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg"; - Datum datum; - EXPECT_TRUE(ReadFileToDatum(filename, &datum)); - cv::Mat cv_img = DecodeDatumToCVMat(datum, 100, 200); - EXPECT_EQ(cv_img.channels(), 3); - EXPECT_EQ(cv_img.rows, 100); - EXPECT_EQ(cv_img.cols, 200); -} - -TEST_F(IOTest, TestDecodeDatumToCVMatResizedGray) { - string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg"; - Datum datum; - EXPECT_TRUE(ReadFileToDatum(filename, &datum)); - const bool is_color = false; - cv::Mat cv_img = DecodeDatumToCVMat(datum, 200, 100, is_color); - EXPECT_EQ(cv_img.channels(), 1); - EXPECT_EQ(cv_img.rows, 200); - EXPECT_EQ(cv_img.cols, 100); -} - TEST_F(IOTest, TestDecodeDatumToCVMatContent) { string filename = EXAMPLES_SOURCE_DIR "images/cat.jpg"; Datum datum; - EXPECT_TRUE(ReadFileToDatum(filename, &datum)); - cv::Mat cv_img = DecodeDatumToCVMat(datum); + EXPECT_TRUE(ReadImageToDatum(filename, 0, std::string("jpg"), &datum)); + cv::Mat cv_img = DecodeDatumToCVMatNative(datum); cv::Mat cv_img_ref = ReadImageToCVMat(filename); EXPECT_EQ(cv_img_ref.channels(), cv_img.channels()); EXPECT_EQ(cv_img_ref.rows, cv_img.rows); diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index f738e053e86..65531687b63 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -98,11 +98,36 @@ cv::Mat ReadImageToCVMat(const string& filename, cv::Mat ReadImageToCVMat(const string& filename) { return ReadImageToCVMat(filename, 0, 0, true); } - +// Do the file extension and encoding match? +static bool matchExt(const std::string & fn, + std::string en) { + size_t p = fn.rfind('.'); + std::string ext = p != fn.npos ? fn.substr(p) : fn; + std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); + std::transform(en.begin(), en.end(), en.begin(), ::tolower); + if ( ext == en ) + return true; + if ( en == "jpg" && ext == "jpeg" ) + return true; + return false; +} bool ReadImageToDatum(const string& filename, const int label, - const int height, const int width, const bool is_color, Datum* datum) { + const int height, const int width, const bool is_color, + const std::string & encoding, Datum* datum) { cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); if (cv_img.data) { + if (encoding.size()) { + if ( (cv_img.channels() == 3) == is_color && !height && !width && + matchExt(filename, encoding) ) + return ReadFileToDatum(filename, label, datum); + std::vector buf; + cv::imencode("."+encoding, cv_img, buf); + datum->set_data(std::string(reinterpret_cast(&buf[0]), + buf.size())); + datum->set_label(label); + datum->set_encoded(true); + return true; + } CVMatToDatum(cv_img, datum); datum->set_label(label); return true; @@ -131,47 +156,24 @@ bool ReadFileToDatum(const string& filename, const int label, } } -cv::Mat DecodeDatumToCVMat(const Datum& datum, - const int height, const int width, const bool is_color) { +cv::Mat DecodeDatumToCVMatNative(const Datum& datum) { cv::Mat cv_img; CHECK(datum.encoded()) << "Datum not encoded"; - int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : - CV_LOAD_IMAGE_GRAYSCALE); const string& data = datum.data(); std::vector vec_data(data.c_str(), data.c_str() + data.size()); - if (height > 0 && width > 0) { - cv::Mat cv_img_origin = cv::imdecode(cv::Mat(vec_data), cv_read_flag); - cv::resize(cv_img_origin, cv_img, cv::Size(width, height)); - } else { - cv_img = cv::imdecode(vec_data, cv_read_flag); - } + cv_img = cv::imdecode(vec_data, -1); if (!cv_img.data) { LOG(ERROR) << "Could not decode datum "; } return cv_img; } -cv::Mat DecodeDatumToCVMat(const Datum& datum, - const int height, const int width) { - return DecodeDatumToCVMat(datum, height, width, true); -} - -cv::Mat DecodeDatumToCVMat(const Datum& datum, - const bool is_color) { - return DecodeDatumToCVMat(datum, 0, 0, is_color); -} - -cv::Mat DecodeDatumToCVMat(const Datum& datum) { - return DecodeDatumToCVMat(datum, 0, 0, true); -} - // If Datum is encoded will decoded using DecodeDatumToCVMat and CVMatToDatum // if height and width are set it will resize it // If Datum is not encoded will do nothing -bool DecodeDatum(const int height, const int width, const bool is_color, - Datum* datum) { +bool DecodeDatumNative(Datum* datum) { if (datum->encoded()) { - cv::Mat cv_img = DecodeDatumToCVMat((*datum), height, width, is_color); + cv::Mat cv_img = DecodeDatumToCVMatNative((*datum)); CVMatToDatum(cv_img, datum); return true; } else { diff --git a/tools/compute_image_mean.cpp b/tools/compute_image_mean.cpp index dff63a09dca..b1fc7cae38f 100644 --- a/tools/compute_image_mean.cpp +++ b/tools/compute_image_mean.cpp @@ -50,7 +50,7 @@ int main(int argc, char** argv) { Datum datum; datum.ParseFromString(cursor->value()); - if (DecodeDatum(&datum)) { + if (DecodeDatumNative(&datum)) { LOG(INFO) << "Decoding Datum"; } @@ -68,7 +68,7 @@ int main(int argc, char** argv) { while (cursor->valid()) { Datum datum; datum.ParseFromString(cursor->value()); - DecodeDatum(&datum); + DecodeDatumNative(&datum); const std::string& data = datum.data(); size_in_datum = std::max(datum.data().size(), diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp index 7fbf5b0514c..816a91f971b 100644 --- a/tools/convert_imageset.cpp +++ b/tools/convert_imageset.cpp @@ -39,6 +39,8 @@ DEFINE_bool(check_size, false, "When this option is on, check that all the datum have the same size"); DEFINE_bool(encoded, false, "When this option is on, the encoded image will be save in datum"); +DEFINE_string(encode_type, "", + "Optional: What type should we encode the image as ('png','jpg',...)."); int main(int argc, char** argv) { ::google::InitGoogleLogging(argv[0]); @@ -63,6 +65,7 @@ int main(int argc, char** argv) { const bool is_color = !FLAGS_gray; const bool check_size = FLAGS_check_size; const bool encoded = FLAGS_encoded; + const string encode_type = FLAGS_encode_type; std::ifstream infile(argv[2]); std::vector > lines; @@ -78,11 +81,8 @@ int main(int argc, char** argv) { } LOG(INFO) << "A total of " << lines.size() << " images."; - if (encoded) { - CHECK_EQ(FLAGS_resize_height, 0) << "With encoded don't resize images"; - CHECK_EQ(FLAGS_resize_width, 0) << "With encoded don't resize images"; - CHECK(!check_size) << "With encoded cannot check_size"; - } + if (encode_type.size() && !encoded) + LOG(INFO) << "encode_type specified, assuming encoded=true."; int resize_height = std::max(0, FLAGS_resize_height); int resize_width = std::max(0, FLAGS_resize_width); @@ -98,18 +98,24 @@ int main(int argc, char** argv) { int count = 0; const int kMaxKeyLength = 256; char key_cstr[kMaxKeyLength]; - int data_size; + int data_size = 0; bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { bool status; - if (encoded) { - status = ReadFileToDatum(root_folder + lines[line_id].first, - lines[line_id].second, &datum); - } else { - status = ReadImageToDatum(root_folder + lines[line_id].first, - lines[line_id].second, resize_height, resize_width, is_color, &datum); + std::string enc = encode_type; + if (encoded && !enc.size()) { + // Guess the encoding type from the file name + string fn = lines[line_id].first; + size_t p = fn.rfind('.'); + if ( p == fn.npos ) + LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'"; + enc = fn.substr(p); + std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); } + status = ReadImageToDatum(root_folder + lines[line_id].first, + lines[line_id].second, resize_height, resize_width, is_color, + enc, &datum); if (status == false) continue; if (check_size) { if (!data_size_initialized) {