diff --git a/Makefile b/Makefile index 35c37af50fb..9ac60c0caa6 100644 --- a/Makefile +++ b/Makefile @@ -168,7 +168,7 @@ ifneq ($(CPU_ONLY), 1) LIBRARIES := cudart cublas curand endif LIBRARIES += glog gflags protobuf leveldb snappy \ - lmdb boost_system hdf5_hl hdf5 m \ + lmdb boost_filesystem boost_system hdf5_hl hdf5 m \ opencv_core opencv_highgui opencv_imgproc PYTHON_LIBRARIES := boost_python python2.7 WARNINGS := -Wall -Wno-sign-compare diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp index 2749e4521b6..b2c7a586e13 100644 --- a/examples/mnist/convert_mnist_data.cpp +++ b/examples/mnist/convert_mnist_data.cpp @@ -18,6 +18,10 @@ #include // NOLINT(readability/streams) #include +#include + +#include "opencv2/core/core.hpp" +#include "opencv2/opencv.hpp" #include "caffe/proto/caffe.pb.h" using namespace caffe; // NOLINT(build/namespaces) @@ -83,8 +87,9 @@ void convert_dataset(const char* image_filename, const char* label_filename, batch = new leveldb::WriteBatch(); } else if (db_backend == "lmdb") { // lmdb LOG(INFO) << "Opening lmdb " << db_path; - CHECK_EQ(mkdir(db_path, 0744), 0) - << "mkdir " << db_path << "failed"; + boost::filesystem::path p(db_path); + CHECK_EQ(boost::filesystem::create_directories(p), 0) + << "mkdir " << db_path << " failed"; CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed"; CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS) // 1TB << "mdb_env_set_mapsize failed"; @@ -94,6 +99,21 @@ void convert_dataset(const char* image_filename, const char* label_filename, << "mdb_txn_begin failed"; CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS) << "mdb_open failed. Does the lmdb already exist? "; + } else if (db_backend == "files") { // png files + boost::filesystem::path p(db_path); + if (! boost::filesystem::is_directory(p)) + CHECK_EQ(boost::filesystem::create_directories(p), 1) + << "mkdir " << p << " failed"; + for (int i = 0; i < 10; i++) { + char buf[2]; + snprintf(buf, 2, "%c", '0' + i); + string digit_dir_name(buf); + boost::filesystem::path digit_path = p / digit_dir_name; + + if (! boost::filesystem::is_directory(digit_path)) + CHECK_EQ(boost::filesystem::create_directories(digit_path), 1) + << "mkdir " << digit_path << " failed"; + } } else { LOG(FATAL) << "Unknown db backend " << db_backend; } @@ -114,6 +134,7 @@ void convert_dataset(const char* image_filename, const char* label_filename, LOG(INFO) << "Rows: " << rows << " Cols: " << cols; for (int item_id = 0; item_id < num_items; ++item_id) { image_file.read(pixels, rows * cols); + label_file.read(&label, 1); datum.set_data(pixels, rows*cols); datum.set_label(label); @@ -131,6 +152,15 @@ void convert_dataset(const char* image_filename, const char* label_filename, mdb_key.mv_data = reinterpret_cast(&keystr[0]); CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS) << "mdb_put failed"; + } else if (db_backend == "files") { // png files + cv::Mat img = cv::Mat(28, 28, CV_8UC1, pixels); + + int pathlen = strlen(db_path) + strlen("/0/12345.png") + 1; + char filename[pathlen]; + + snprintf(filename, pathlen, "%s/%c/%05d.png", db_path, '0' + label, item_id); + //std::cout << "writing " << filename << std::endl; + cv::imwrite(filename, img); } else { LOG(FATAL) << "Unknown db backend " << db_backend; } @@ -146,6 +176,8 @@ void convert_dataset(const char* image_filename, const char* label_filename, << "mdb_txn_commit failed"; CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS) << "mdb_txn_begin failed"; + } else if (db_backend == "files") { // png files + } else { LOG(FATAL) << "Unknown db backend " << db_backend; } @@ -161,6 +193,8 @@ void convert_dataset(const char* image_filename, const char* label_filename, CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS) << "mdb_txn_commit failed"; mdb_close(mdb_env, mdb_dbi); mdb_env_close(mdb_env); + } else if (db_backend == "files") { // files + } else { LOG(FATAL) << "Unknown db backend " << db_backend; } diff --git a/examples/siamese/create_mnist_siamese.sh b/examples/siamese/create_mnist_siamese.sh index 43ad6b184a7..4d6326304ad 100755 --- a/examples/siamese/create_mnist_siamese.sh +++ b/examples/siamese/create_mnist_siamese.sh @@ -1,21 +1,37 @@ #!/usr/bin/env sh -# This script converts the mnist data into leveldb format. +# This script converts the mnist data into lmdb/leveldb format, +# depending on the value assigned to $BACKEND. -EXAMPLES=./build/examples/siamese -DATA=./data/mnist +EXAMPLE=examples/siamese +DATA=data/mnist +BUILD=build/examples/mnist -echo "Creating leveldb..." +BACKEND="lmdb" +BACKEND="files" -rm -rf ./examples/siamese/mnist_siamese_train_leveldb -rm -rf ./examples/siamese/mnist_siamese_test_leveldb +echo "Creating ${BACKEND}..." -$EXAMPLES/convert_mnist_siamese_data.bin \ - $DATA/train-images-idx3-ubyte \ - $DATA/train-labels-idx1-ubyte \ - ./examples/siamese/mnist_siamese_train_leveldb -$EXAMPLES/convert_mnist_siamese_data.bin \ - $DATA/t10k-images-idx3-ubyte \ - $DATA/t10k-labels-idx1-ubyte \ - ./examples/siamese/mnist_siamese_test_leveldb +rm -rf $EXAMPLE/mnist_train_${BACKEND} +rm -rf $EXAMPLE/mnist_test_${BACKEND} + +$BUILD/convert_mnist_data.bin $DATA/train-images-idx3-ubyte \ + $DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_${BACKEND} --backend=${BACKEND} +$BUILD/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \ + $DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_${BACKEND} --backend=${BACKEND} + +for t in train test +do + rm -f $EXAMPLE/mnist_${t}.txt + + for d in 0 1 2 3 4 5 6 7 8 9 + do + for f in $EXAMPLE/mnist_${t}_${BACKEND}/$d/*.png + do + echo "$f $d" >>$EXAMPLE/mnist_${t}.txt + done + done + + shuf $EXAMPLE/mnist_${t}.txt >$EXAMPLE/mnist_${t}_p.txt +done echo "Done." diff --git a/examples/siamese/mnist_siamese_train_test.prototxt b/examples/siamese/mnist_siamese_train_test.prototxt index 92361c31dc7..6b71f5a0b62 100644 --- a/examples/siamese/mnist_siamese_train_test.prototxt +++ b/examples/siamese/mnist_siamese_train_test.prototxt @@ -1,38 +1,61 @@ name: "mnist_siamese_train_test" + + layers { - name: "pair_data" - type: DATA - top: "pair_data" - top: "sim" - data_param { - source: "examples/siamese/mnist_siamese_train_leveldb" + name: "data" + type: IMAGE_DATA + top: "data" + top: "label" + image_data_param { + source: "examples/siamese/mnist_train.txt" + batch_size: 100 scale: 0.00390625 - batch_size: 64 + shuffle: true } include: { phase: TRAIN } } + layers { - name: "pair_data" - type: DATA - top: "pair_data" - top: "sim" - data_param { - source: "examples/siamese/mnist_siamese_test_leveldb" - scale: 0.00390625 + name: "data" + type: IMAGE_DATA + top: "data" + top: "label" + image_data_param { + source: "examples/siamese/mnist_test.txt" batch_size: 100 + scale: 0.00390625 + shuffle: false } include: { phase: TEST } } + + layers { - name: "slice_pair" - type: SLICE - bottom: "pair_data" - top: "data" - top: "data_p" - slice_param { - slice_dim: 1 - slice_point: 1 - } + name: "data_p" + type: IMAGE_DATA + top: "data_p" + top: "label_p" + image_data_param { + source: "examples/siamese/mnist_train_p.txt" + batch_size: 100 + scale: 0.00390625 + shuffle: true + } + include: { phase: TRAIN } +} + +layers { + name: "data_p" + type: IMAGE_DATA + top: "data_p" + top: "label_p" + image_data_param { + source: "examples/siamese/mnist_test_p.txt" + batch_size: 100 + scale: 0.00390625 + shuffle: false + } + include: { phase: TEST } } @@ -308,6 +331,7 @@ layers { } bottom: "feat" bottom: "feat_p" - bottom: "sim" + bottom: "label" + bottom: "label_p" top: "loss" } diff --git a/examples/siamese/train_mnist_siamese.sh b/examples/siamese/train_mnist_siamese.sh index 84a30a8ac44..6142ebc5d54 100755 --- a/examples/siamese/train_mnist_siamese.sh +++ b/examples/siamese/train_mnist_siamese.sh @@ -2,4 +2,4 @@ TOOLS=./build/tools -$TOOLS/caffe train --solver=examples/siamese/mnist_siamese_solver.prototxt +GLOG_logtostderr=0 GLOG_log_dir=./examples/siamese/ $TOOLS/caffe train --solver=examples/siamese/mnist_siamese_solver.prototxt --gpu=0 diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index 9fe58cd97bc..b28c7b55607 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -153,7 +153,9 @@ class ContrastiveLossLayer : public LossLayer { virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); - virtual inline int ExactNumBottomBlobs() const { return 3; } + virtual inline int ExactNumBottomBlobs() const { return -1; } + virtual inline int MinBottomBlobs() const { return 3; } + virtual inline int MaxBottomBlobs() const { return 4; } virtual inline LayerParameter_LayerType type() const { return LayerParameter_LayerType_CONTRASTIVE_LOSS; } @@ -202,10 +204,15 @@ class ContrastiveLossLayer : public LossLayer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); + Blob label_; // either copied or calc'd from equality of two labels Blob diff_; // cached for backward pass Blob dist_sq_; // cached for backward pass Blob diff_sq_; // tmp storage for gpu forward pass Blob summer_vec_; // tmp storage for gpu forward pass + + private: + virtual void LabelSetUp(const vector*>& bottom); + }; /** diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 12823f8da0c..f2de1b1794c 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -112,6 +112,19 @@ inline int8_t caffe_sign(Dtype val) { return (Dtype(0) < val) - (val < Dtype(0)); } +#define DEFINE_CAFFE_CPU_BINARY_FUNC(name, operation) \ + template \ + void caffe_cpu_##name(const int n, const Dtype* a, const Dtype *b, Dtype* y) { \ + CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \ + for (int i = 0; i < n; ++i) { \ + operation; \ + } \ + } + +// output is 1 for the sames, 0 for not sames +DEFINE_CAFFE_CPU_BINARY_FUNC(same, y[i] = (a[i] == b[i])); + + // The following two macros are modifications of DEFINE_VSL_UNARY_FUNC // in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp. // Please refer to commit 7e8ef25c7 of the boost-eigen branch. diff --git a/src/caffe/layers/contrastive_loss_layer.cpp b/src/caffe/layers/contrastive_loss_layer.cpp index 0d0b443b66b..d7b60ddba15 100644 --- a/src/caffe/layers/contrastive_loss_layer.cpp +++ b/src/caffe/layers/contrastive_loss_layer.cpp @@ -20,6 +20,8 @@ void ContrastiveLossLayer::LayerSetUp( CHECK_EQ(bottom[2]->channels(), 1); CHECK_EQ(bottom[2]->height(), 1); CHECK_EQ(bottom[2]->width(), 1); + + label_.Reshape(bottom[0]->num(), 1, 1, 1); diff_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1); diff_sq_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1); dist_sq_.Reshape(bottom[0]->num(), 1, 1, 1); @@ -29,6 +31,29 @@ void ContrastiveLossLayer::LayerSetUp( summer_vec_.mutable_cpu_data()[i] = Dtype(1); } +template +void ContrastiveLossLayer::LabelSetUp( + const vector*>& bottom) { + + switch (this->layer_param_.contrastive_loss_param().label_type()) { + case ContrastiveLossParameter_LabelType_PAIR_SIMILARITY: + CHECK_EQ(bottom.size(), 3); + caffe_copy(bottom[0]->num(), + bottom[2]->cpu_data(), + label_.mutable_cpu_data()); + break; + case ContrastiveLossParameter_LabelType_CATEGORY: + CHECK_EQ(bottom.size(), 4); + caffe_cpu_same(bottom[0]->num(), + bottom[2]->cpu_data(), + bottom[3]->cpu_data(), + label_.mutable_cpu_data()); + break; + default: + LOG(FATAL) << "Unknown label type"; + } +} + template void ContrastiveLossLayer::Forward_cpu( const vector*>& bottom, @@ -41,11 +66,15 @@ void ContrastiveLossLayer::Forward_cpu( diff_.mutable_cpu_data()); // a_i-b_i const int channels = bottom[0]->channels(); Dtype margin = this->layer_param_.contrastive_loss_param().margin(); + + LabelSetUp(bottom); + Dtype loss(0.0); for (int i = 0; i < bottom[0]->num(); ++i) { dist_sq_.mutable_cpu_data()[i] = caffe_cpu_dot(channels, diff_.cpu_data() + (i*channels), diff_.cpu_data() + (i*channels)); - if (static_cast(bottom[2]->cpu_data()[i])) { // similar pairs + + if (static_cast(label_.cpu_data()[i])) { // similar pairs loss += dist_sq_.cpu_data()[i]; } else { // dissimilar pairs loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0)); @@ -68,7 +97,7 @@ void ContrastiveLossLayer::Backward_cpu(const vector*>& top, int channels = bottom[i]->channels(); for (int j = 0; j < num; ++j) { Dtype* bout = bottom[i]->mutable_cpu_diff(); - if (static_cast(bottom[2]->cpu_data()[j])) { // similar pairs + if (static_cast(label_.cpu_data()[j])) { // similar pairs caffe_cpu_axpby( channels, alpha, diff --git a/src/caffe/layers/contrastive_loss_layer.cu b/src/caffe/layers/contrastive_loss_layer.cu index 78a55995a0a..b7805d887c0 100644 --- a/src/caffe/layers/contrastive_loss_layer.cu +++ b/src/caffe/layers/contrastive_loss_layer.cu @@ -32,9 +32,12 @@ void ContrastiveLossLayer::Forward_gpu( Dtype(0.0), dist_sq_.mutable_gpu_data()); // \Sum (a_i-b_i)^2 Dtype margin = this->layer_param_.contrastive_loss_param().margin(); + + LabelSetUp(bottom); + Dtype loss(0.0); for (int i = 0; i < bottom[0]->num(); ++i) { - if (static_cast(bottom[2]->cpu_data()[i])) { // similar pairs + if (static_cast(label_.cpu_data()[i])) { // similar pairs loss += dist_sq_.cpu_data()[i]; } else { // dissimilar pairs loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0)); @@ -51,6 +54,7 @@ __global__ void CLLForward(const int count, const int channels, Dtype *bottom_diff) { CUDA_KERNEL_LOOP(i, count) { int n = i / channels; // the num index, to access y and dist_sq + if (static_cast(y[n])) { // similar pairs bottom_diff[i] = alpha * diff[i]; } else { // dissimilar pairs @@ -77,7 +81,7 @@ void ContrastiveLossLayer::Backward_gpu(const vector*>& top, // NOLINT_NEXT_LINE(whitespace/operators) CLLForward<<>>( count, channels, margin, alpha, - bottom[2]->gpu_data(), // pair similarity 0 or 1 + label_.gpu_data(), diff_.gpu_data(), // the cached eltwise difference between a and b dist_sq_.gpu_data(), // the cached square distance between a and b bottom[i]->mutable_gpu_diff()); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index f0404a09b90..59def6a6c4a 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -380,8 +380,16 @@ message ConcatParameter { // Message that stores parameters used by ContrastiveLossLayer message ContrastiveLossParameter { - //margin for dissimilar pair + // margin for dissimilar pair optional float margin = 1 [default = 1.0]; + // types of labels on the bottom: + // PAIR_SIMILARITY -- there should be 3 bottoms total + // CATEGORY -- there should be 4 bottoms total + optional LabelType label_type = 2 [default = PAIR_SIMILARITY]; + enum LabelType { + PAIR_SIMILARITY = 0; // a single 0/1 label, where 1 indicates "same" + CATEGORY = 1; // a pair of category labels in [0,N-1] + } } // Message that stores parameters used by ConvolutionLayer