diff --git a/LICENSE b/LICENSE index d69d16f5bc7..2c8d98c3e30 100644 --- a/LICENSE +++ b/LICENSE @@ -1,44 +1,25 @@ -COPYRIGHT +Caffe-face -All contributions by the University of California: -Copyright (c) 2014, 2015, The Regents of the University of California (Regents) -All rights reserved. +Copyright (c) Yandong Wen -All other contributions: -Copyright (c) 2014, 2015, the respective contributors All rights reserved. -Caffe uses a shared copyright model: each contributor holds copyright over -their contributions to Caffe. The project versioning records all such -contribution and copyright details. If a contributor wants to further mark -their specific copyright on a particular contribution, they should indicate -their copyright solely in the commit message of the change when it is -committed. - -LICENSE - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -CONTRIBUTION AGREEMENT - -By contributing to the BVLC/caffe repository through pull-request, comment, -or otherwise, the contributor releases their content to the -license and copyright terms herein. +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/LICENSE-Caffe b/LICENSE-Caffe new file mode 100644 index 00000000000..d69d16f5bc7 --- /dev/null +++ b/LICENSE-Caffe @@ -0,0 +1,44 @@ +COPYRIGHT + +All contributions by the University of California: +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: +Copyright (c) 2014, 2015, the respective contributors +All rights reserved. + +Caffe uses a shared copyright model: each contributor holds copyright over +their contributions to Caffe. The project versioning records all such +contribution and copyright details. If a contributor wants to further mark +their specific copyright on a particular contribution, they should indicate +their copyright solely in the commit message of the change when it is +committed. + +LICENSE + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CONTRIBUTION AGREEMENT + +By contributing to the BVLC/caffe repository through pull-request, comment, +or otherwise, the contributor releases their content to the +license and copyright terms herein. diff --git a/README.md b/README.md index 44b9e62c157..31f48df3b93 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,150 @@ -# Caffe +# Deep Face Recognition with Caffe Implementation -[![Build Status](https://travis-ci.org/BVLC/caffe.svg?branch=master)](https://travis-ci.org/BVLC/caffe) -[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) +This branch is developed for deep face recognition, the related paper is as follows. + + A Discriminative Feature Learning Approach for Deep Face Recognition[C] + Yandong Wen, Kaipeng Zhang, Zhifeng Li*, Yu Qiao + European Conference on Computer Vision. Springer International Publishing, 2016: 499-515. -Caffe is a deep learning framework made with expression, speed, and modularity in mind. -It is developed by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu)) and community contributors. -Check out the [project site](http://caffe.berkeleyvision.org) for all the details like +* [Updates](#updates) +* [Files](#files) +* [Train_Model](#train_model) +* [Extract_DeepFeature](#extract_deepfeature) +* [Contact](#contact) +* [Citation](#citation) +* [LICENSE](#license) +* [README_Caffe](#readme_caffe) -- [DIY Deep Learning for Vision with Caffe](https://docs.google.com/presentation/d/1UeKXVgRvvxg9OUdh_UiC5G71UMscNPlvArsWER41PsU/edit#slide=id.p) -- [Tutorial Documentation](http://caffe.berkeleyvision.org/tutorial/) -- [BVLC reference models](http://caffe.berkeleyvision.org/model_zoo.html) and the [community model zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo) -- [Installation instructions](http://caffe.berkeleyvision.org/installation.html) +### Updates +- Oct 13, 2016 + * A demo for extracting deep feature by the given model is provided. +- Oct 12, 2016 + * The links of face model and features on LFW are available. + **model:** [google drive](https://drive.google.com/open?id=0B_geeR2lTMegUzlSdG5wZ1V5WU0) [baidu skydrive](http://pan.baidu.com/s/1skFoqrr) + **feature:** [google drive](https://drive.google.com/open?id=0B_geeR2lTMegLWRuWnZoMVJPZ3c) [baidu skydrive](http://pan.baidu.com/s/1boLM1bh) + * The training prototxt of toy example on MNIST are released. +- Otc 9, 2016 + * The code and training prototxt for our [ECCV16](http://link.springer.com/chapter/10.1007/978-3-319-46478-7_31) paper are released. + * If you train our Network on **CAISA-WebFace**, the expected verification performance of **SINGLE MODEL** on **[LFW](http://vis-www.cs.umass.edu/lfw/)** should be **~99%**. -and step-by-step examples. +### Files +- Original Caffe library +- Center Loss + * src/caffe/proto/caffe.proto + * include/caffe/layers/center_loss_layer.hpp + * src/caffe/layers/center_loss_layer.cpp + * src/caffe/layers/center_loss_layer.cu +- face_example + * face_example/data/ + * face_example/face_snapshot/ + * face_example/face_train_test.prototxt + * face_example/face_solver.prototxt + * face_example/face_deploy.prototxt + * face_example/extractDeepFeature.m +- mnist_example + * mnist_example/data/ + * mnist_example/face_snapshot/ + * mnist_example/mnist_train_test.prototxt + * mnist_example/mnist_solver.prototxt + * mnist_example/mnist_deploy.prototxt -[![Join the chat at https://gitter.im/BVLC/caffe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +### Train_Model +1. The Installation completely the same as [Caffe](http://caffe.berkeleyvision.org/). Please follow the [installation instructions](http://caffe.berkeleyvision.org/installation.html). Make sure you have correctly installed before using our code. +2. Download the face dataset for training, e.g. [CAISA-WebFace](http://www.cbsr.ia.ac.cn/english/CASIA-WebFace-Database.html), [VGG-Face](http://www.robots.ox.ac.uk/~vgg/data/vgg_face/), [MS-Celeb-1M](https://www.microsoft.com/en-us/research/project/ms-celeb-1m-challenge-recognizing-one-million-celebrities-real-world/), [MegaFace](http://megaface.cs.washington.edu/). +3. Preprocess the training face images, including detection, alignment, etc. Here we strongly recommend [MTCNN](https://github.com/kpzhang93/MTCNN_face_detection_alignment), which is an effective and efficient open-source tool for face detection and alignment. +4. Creat list for training set and validation set. Place them in face_example/data/ +5. Specify your data source for train & val -Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models. -Framework development discussions and thorough bug reports are collected on [Issues](https://github.com/BVLC/caffe/issues). + layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + image_data_param { + source: "face_example/data/###your_list###" + } + } -Happy brewing! +6. Specify the number of subject in FC6 layer -## License and Citation + layer { + name: "fc6" + type: "InnerProduct" + bottom: "fc5" + top: "fc6" + inner_product_param { + num_output: ##number## + } + } -Caffe is released under the [BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE). -The BVLC reference models are released for unrestricted use. +7. Specify the loss weight and the number of subject in center loss layer -Please cite Caffe in your publications if it helps your research: + layer { + name: "center_loss" + type: "CenterLoss" + bottom: "fc5" + bottom: "label" + top: "center_loss" + loss_weight: ##weight## + center_loss_param { + num_output: ##number## + } + } - @article{jia2014caffe, - Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor}, - Journal = {arXiv preprint arXiv:1408.5093}, - Title = {Caffe: Convolutional Architecture for Fast Feature Embedding}, - Year = {2014} +8. Train model + + cd $CAFFE-FACE_ROOT + ./build/tools/caffe train -solver face_example/face_solver.prototxt -gpu X,Y + +### Extract_DeepFeature +1. Compile matcaffe by make matcaffe +2. Specify the correspinding paths in face_example/extractDeepFeature.m + + addpath('path_to_matCaffe/matlab'); + model = 'path_to_deploy/face_deploy.prototxt'; + weights = 'path_to_model/face_model.caffemodel'; + image = imread('path_to_image/Jennifer_Aniston_0016.jpg'); + +3. Run extractDeepFeature.m in Matlab + +### Contact +- [Yandong Wen](http://ydwen.github.io/) +- [Kaipeng Zhang](http://kpzhang93.github.io/) + +### Citation +You are encouraged to cite the following paper if it helps your research. + + @inproceedings{wen2016discriminative, + title={A Discriminative Feature Learning Approach for Deep Face Recognition}, + author={Wen, Yandong and Zhang, Kaipeng and Li, Zhifeng and Qiao, Yu}, + booktitle={European Conference on Computer Vision}, + pages={499--515}, + year={2016}, + organization={Springer} } + +### License +Copyright (c) Yandong Wen + +All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/face_example/Jennifer_Aniston_0016.jpg b/face_example/Jennifer_Aniston_0016.jpg new file mode 100644 index 00000000000..4ffee36b97c Binary files /dev/null and b/face_example/Jennifer_Aniston_0016.jpg differ diff --git a/face_example/data/.gitignore b/face_example/data/.gitignore new file mode 100644 index 00000000000..86d0cb2726c --- /dev/null +++ b/face_example/data/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/face_example/extractDeepFeature.m b/face_example/extractDeepFeature.m new file mode 100644 index 00000000000..d599f16b104 --- /dev/null +++ b/face_example/extractDeepFeature.m @@ -0,0 +1,44 @@ +clear;clc; +addpath('path_to_matCaffe/matlab'); +caffe.reset_all(); + +% load face model and creat network +caffe.set_device(0); +caffe.set_mode_gpu(); +model = 'path_to_deploy/face_deploy.prototxt'; +weights = 'path_to_model/face_model.caffemodel'; +net = caffe.Net(model, weights, 'test'); + +% load face image, and align to 112 X 96 +imgSize = [112, 96]; +coord5points = [30.2946, 65.5318, 48.0252, 33.5493, 62.7299; ... + 51.6963, 51.5014, 71.7366, 92.3655, 92.2041]; + +image = imread('path_to_image/Jennifer_Aniston_0016.jpg'); +facial5points = [105.8306, 147.9323, 121.3533, 106.1169, 144.3622; ... + 109.8005, 112.5533, 139.1172, 155.6359, 156.3451]; + +Tfm = cp2tform(facial5points', coord5points', 'similarity'); +cropImg = imtransform(image, Tfm, 'XData', [1 imgSize(2)],... + 'YData', [1 imgSize(1)], 'Size', imgSize); + +% transform image, obtaining the original face and the horizontally flipped one +if size(cropImg, 3) < 3 + cropImg(:,:,2) = cropImg(:,:,1); + cropImg(:,:,3) = cropImg(:,:,1); +end +cropImg = single(cropImg); +cropImg = (cropImg - 127.5)/128; +cropImg = permute(cropImg, [2,1,3]); +cropImg = cropImg(:,:,[3,2,1]); + +cropImg_(:,:,1) = flipud(cropImg(:,:,1)); +cropImg_(:,:,2) = flipud(cropImg(:,:,2)); +cropImg_(:,:,3) = flipud(cropImg(:,:,3)); + +% extract deep feature +res = net.forward({cropImg}); +res_ = net.forward({cropImg_}); +deepfeature = [res{1}; res_{1}]; + +caffe.reset_all(); diff --git a/face_example/face_deploy.prototxt b/face_example/face_deploy.prototxt new file mode 100644 index 00000000000..89f72579732 --- /dev/null +++ b/face_example/face_deploy.prototxt @@ -0,0 +1,1092 @@ +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 112 +input_dim: 96 +layer { + name: "conv1a" + type: "Convolution" + bottom: "data" + top: "conv1a" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu1a" + type: "PReLU" + bottom: "conv1a" + top: "conv1a" +} +layer { + name: "conv1b" + type: "Convolution" + bottom: "conv1a" + top: "conv1b" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu1b" + type: "PReLU" + bottom: "conv1b" + top: "conv1b" +} +layer { + name: "pool1b" + type: "Pooling" + bottom: "conv1b" + top: "pool1b" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1b" + top: "conv2_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu2_1" + type: "PReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu2_2" + type: "PReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "res2_2" + type: "Eltwise" + bottom: "pool1b" + bottom: "conv2_2" + top: "res2_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "res2_2" + top: "conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu2" + type: "PReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_1" + type: "PReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_2" + type: "PReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "res3_2" + type: "Eltwise" + bottom: "pool2" + bottom: "conv3_2" + top: "res3_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "res3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_3" + type: "PReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "conv3_4" + type: "Convolution" + bottom: "conv3_3" + top: "conv3_4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_4" + type: "PReLU" + bottom: "conv3_4" + top: "conv3_4" +} +layer { + name: "res3_4" + type: "Eltwise" + bottom: "res3_2" + bottom: "conv3_4" + top: "res3_4" + eltwise_param { + operation: 1 + } +} + +layer { + name: "conv3" + type: "Convolution" + bottom: "res3_4" + top: "conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3" + type: "PReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_1" + type: "PReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_2" + type: "PReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "res4_2" + type: "Eltwise" + bottom: "pool3" + bottom: "conv4_2" + top: "res4_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_3" + type: "Convolution" + bottom: "res4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_3" + type: "PReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + name: "conv4_4" + type: "Convolution" + bottom: "conv4_3" + top: "conv4_4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_4" + type: "PReLU" + bottom: "conv4_4" + top: "conv4_4" +} +layer { + name: "res4_4" + type: "Eltwise" + bottom: "res4_2" + bottom: "conv4_4" + top: "res4_4" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_5" + type: "Convolution" + bottom: "res4_4" + top: "conv4_5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_5" + type: "PReLU" + bottom: "conv4_5" + top: "conv4_5" +} +layer { + name: "conv4_6" + type: "Convolution" + bottom: "conv4_5" + top: "conv4_6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_6" + type: "PReLU" + bottom: "conv4_6" + top: "conv4_6" +} +layer { + name: "res4_6" + type: "Eltwise" + bottom: "res4_4" + bottom: "conv4_6" + top: "res4_6" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_7" + type: "Convolution" + bottom: "res4_6" + top: "conv4_7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_7" + type: "PReLU" + bottom: "conv4_7" + top: "conv4_7" +} +layer { + name: "conv4_8" + type: "Convolution" + bottom: "conv4_7" + top: "conv4_8" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_8" + type: "PReLU" + bottom: "conv4_8" + top: "conv4_8" +} +layer { + name: "res4_8" + type: "Eltwise" + bottom: "res4_6" + bottom: "conv4_8" + top: "res4_8" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_9" + type: "Convolution" + bottom: "res4_8" + top: "conv4_9" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_9" + type: "PReLU" + bottom: "conv4_9" + top: "conv4_9" +} +layer { + name: "conv4_10" + type: "Convolution" + bottom: "conv4_9" + top: "conv4_10" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_10" + type: "PReLU" + bottom: "conv4_10" + top: "conv4_10" +} +layer { + name: "res4_10" + type: "Eltwise" + bottom: "res4_8" + bottom: "conv4_10" + top: "res4_10" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4" + type: "Convolution" + bottom: "res4_10" + top: "conv4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4" + type: "PReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "conv4" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_1" + type: "PReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_2" + type: "PReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + name: "res5_2" + type: "Eltwise" + bottom: "pool4" + bottom: "conv5_2" + top: "res5_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv5_3" + type: "Convolution" + bottom: "res5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_3" + type: "PReLU" + bottom: "conv5_3" + top: "conv5_3" +} +layer { + name: "conv5_4" + type: "Convolution" + bottom: "conv5_3" + top: "conv5_4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_4" + type: "PReLU" + bottom: "conv5_4" + top: "conv5_4" +} +layer { + name: "res5_4" + type: "Eltwise" + bottom: "res5_2" + bottom: "conv5_4" + top: "res5_4" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv5_5" + type: "Convolution" + bottom: "res5_4" + top: "conv5_5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_5" + type: "PReLU" + bottom: "conv5_5" + top: "conv5_5" +} +layer { + name: "conv5_6" + type: "Convolution" + bottom: "conv5_5" + top: "conv5_6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_6" + type: "PReLU" + bottom: "conv5_6" + top: "conv5_6" +} +layer { + name: "res5_6" + type: "Eltwise" + bottom: "res5_4" + bottom: "conv5_6" + top: "res5_6" + eltwise_param { + operation: 1 + } +} +layer { + name: "fc5" + type: "InnerProduct" + bottom: "res5_6" + top: "fc5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 512 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} diff --git a/face_example/face_snapshot/.gitignore b/face_example/face_snapshot/.gitignore new file mode 100644 index 00000000000..abf96602f93 --- /dev/null +++ b/face_example/face_snapshot/.gitignore @@ -0,0 +1 @@ +# Ignore everything in this directory \ No newline at end of file diff --git a/face_example/face_solver.prototxt b/face_example/face_solver.prototxt new file mode 100644 index 00000000000..a3e94117b6d --- /dev/null +++ b/face_example/face_solver.prototxt @@ -0,0 +1,20 @@ +net: "face_example/face_train_test.prototxt" +test_iter: 100 +test_interval: 2000 + +base_lr: 0.1 +lr_policy: "multistep" +gamma: 0.1 + +stepvalue: 16000 +stepvalue: 24000 +stepvalue: 28000 +max_iter: 28000 + +display: 100 +momentum: 0.9 +weight_decay: 0.0005 +snapshot: 1000 +snapshot_prefix: "face_example/face_snapshot/face_train_test" + +solver_mode: GPU diff --git a/face_example/face_train_test.prototxt b/face_example/face_train_test.prototxt new file mode 100644 index 00000000000..9e4f4e73395 --- /dev/null +++ b/face_example/face_train_test.prototxt @@ -0,0 +1,1181 @@ +name: "Face-ResNet" +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + mean_value: 127.5 + mean_value: 127.5 + mean_value: 127.5 + scale: 0.0078125 + mirror: true + } + image_data_param { + source: "face_example/data/caisa_train.txt" + batch_size: 256 + shuffle: true + } +} +layer { + name: "data" + type: "ImageData" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + mean_value: 127.5 + mean_value: 127.5 + mean_value: 127.5 + scale: 0.0078125 + mirror: true + } + image_data_param { + source: "face_example/data/caisa_val.txt" + batch_size: 128 + shuffle: true + } +} +layer { + name: "conv1a" + type: "Convolution" + bottom: "data" + top: "conv1a" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu1a" + type: "PReLU" + bottom: "conv1a" + top: "conv1a" +} +layer { + name: "conv1b" + type: "Convolution" + bottom: "conv1a" + top: "conv1b" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu1b" + type: "PReLU" + bottom: "conv1b" + top: "conv1b" +} +layer { + name: "pool1b" + type: "Pooling" + bottom: "conv1b" + top: "pool1b" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1b" + top: "conv2_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu2_1" + type: "PReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu2_2" + type: "PReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "res2_2" + type: "Eltwise" + bottom: "pool1b" + bottom: "conv2_2" + top: "res2_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "res2_2" + top: "conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu2" + type: "PReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_1" + type: "PReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_2" + type: "PReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "res3_2" + type: "Eltwise" + bottom: "pool2" + bottom: "conv3_2" + top: "res3_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "res3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_3" + type: "PReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "conv3_4" + type: "Convolution" + bottom: "conv3_3" + top: "conv3_4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3_4" + type: "PReLU" + bottom: "conv3_4" + top: "conv3_4" +} +layer { + name: "res3_4" + type: "Eltwise" + bottom: "res3_2" + bottom: "conv3_4" + top: "res3_4" + eltwise_param { + operation: 1 + } +} + +layer { + name: "conv3" + type: "Convolution" + bottom: "res3_4" + top: "conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu3" + type: "PReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_1" + type: "PReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_2" + type: "PReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "res4_2" + type: "Eltwise" + bottom: "pool3" + bottom: "conv4_2" + top: "res4_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_3" + type: "Convolution" + bottom: "res4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_3" + type: "PReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + name: "conv4_4" + type: "Convolution" + bottom: "conv4_3" + top: "conv4_4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_4" + type: "PReLU" + bottom: "conv4_4" + top: "conv4_4" +} +layer { + name: "res4_4" + type: "Eltwise" + bottom: "res4_2" + bottom: "conv4_4" + top: "res4_4" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_5" + type: "Convolution" + bottom: "res4_4" + top: "conv4_5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_5" + type: "PReLU" + bottom: "conv4_5" + top: "conv4_5" +} +layer { + name: "conv4_6" + type: "Convolution" + bottom: "conv4_5" + top: "conv4_6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_6" + type: "PReLU" + bottom: "conv4_6" + top: "conv4_6" +} +layer { + name: "res4_6" + type: "Eltwise" + bottom: "res4_4" + bottom: "conv4_6" + top: "res4_6" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_7" + type: "Convolution" + bottom: "res4_6" + top: "conv4_7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_7" + type: "PReLU" + bottom: "conv4_7" + top: "conv4_7" +} +layer { + name: "conv4_8" + type: "Convolution" + bottom: "conv4_7" + top: "conv4_8" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_8" + type: "PReLU" + bottom: "conv4_8" + top: "conv4_8" +} +layer { + name: "res4_8" + type: "Eltwise" + bottom: "res4_6" + bottom: "conv4_8" + top: "res4_8" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4_9" + type: "Convolution" + bottom: "res4_8" + top: "conv4_9" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_9" + type: "PReLU" + bottom: "conv4_9" + top: "conv4_9" +} +layer { + name: "conv4_10" + type: "Convolution" + bottom: "conv4_9" + top: "conv4_10" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4_10" + type: "PReLU" + bottom: "conv4_10" + top: "conv4_10" +} +layer { + name: "res4_10" + type: "Eltwise" + bottom: "res4_8" + bottom: "conv4_10" + top: "res4_10" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv4" + type: "Convolution" + bottom: "res4_10" + top: "conv4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu4" + type: "PReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "conv4" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_1" + type: "PReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_2" + type: "PReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + name: "res5_2" + type: "Eltwise" + bottom: "pool4" + bottom: "conv5_2" + top: "res5_2" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv5_3" + type: "Convolution" + bottom: "res5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_3" + type: "PReLU" + bottom: "conv5_3" + top: "conv5_3" +} +layer { + name: "conv5_4" + type: "Convolution" + bottom: "conv5_3" + top: "conv5_4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_4" + type: "PReLU" + bottom: "conv5_4" + top: "conv5_4" +} +layer { + name: "res5_4" + type: "Eltwise" + bottom: "res5_2" + bottom: "conv5_4" + top: "res5_4" + eltwise_param { + operation: 1 + } +} +layer { + name: "conv5_5" + type: "Convolution" + bottom: "res5_4" + top: "conv5_5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_5" + type: "PReLU" + bottom: "conv5_5" + top: "conv5_5" +} +layer { + name: "conv5_6" + type: "Convolution" + bottom: "conv5_5" + top: "conv5_6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 512 + kernel_size: 3 + stride: 1 + pad: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "relu5_6" + type: "PReLU" + bottom: "conv5_6" + top: "conv5_6" +} +layer { + name: "res5_6" + type: "Eltwise" + bottom: "res5_4" + bottom: "conv5_6" + top: "res5_6" + eltwise_param { + operation: 1 + } +} +layer { + name: "fc5" + type: "InnerProduct" + bottom: "res5_6" + top: "fc5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 512 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +############## softmax loss ############### +layer { + name: "fc6" + type: "InnerProduct" + bottom: "fc5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 10572 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "softmax_loss" + type: "SoftmaxWithLoss" + bottom: "fc6" + bottom: "label" + top: "softmax_loss" +} +############## center loss ############### +layer { + name: "center_loss" + type: "CenterLoss" + bottom: "fc5" + bottom: "label" + top: "center_loss" + param { + lr_mult: 1 + decay_mult: 2 + } + center_loss_param { + num_output: 10572 + center_filler { + type: "xavier" + } + } + loss_weight: 0.008 +} diff --git a/include/caffe/layers/center_loss_layer.hpp b/include/caffe/layers/center_loss_layer.hpp new file mode 100644 index 00000000000..cd6fd1cf994 --- /dev/null +++ b/include/caffe/layers/center_loss_layer.hpp @@ -0,0 +1,48 @@ +#ifndef CAFFE_CENTER_LOSS_LAYER_HPP_ +#define CAFFE_CENTER_LOSS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + +template +class CenterLossLayer : public LossLayer { + public: + explicit CenterLossLayer(const LayerParameter& param) + : LossLayer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "CenterLoss"; } + virtual inline int ExactNumBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return -1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int M_; + int K_; + int N_; + + Blob distance_; + Blob variation_sum_; +}; + +} // namespace caffe + +#endif // CAFFE_CENTER_LOSS_LAYER_HPP_ \ No newline at end of file diff --git a/mnist_example/data/.gitignore b/mnist_example/data/.gitignore new file mode 100644 index 00000000000..86d0cb2726c --- /dev/null +++ b/mnist_example/data/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/mnist_example/mnist_deploy.prototxt b/mnist_example/mnist_deploy.prototxt new file mode 100644 index 00000000000..235b9e6ddc6 --- /dev/null +++ b/mnist_example/mnist_deploy.prototxt @@ -0,0 +1,267 @@ +input: "data" +input_dim: 1000 +input_dim: 1 +input_dim: 28 +input_dim: 28 + +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu1" + type: "PReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "conv1+" + type: "Convolution" + bottom: "conv1" + top: "conv1+" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu1+" + type: "PReLU" + bottom: "conv1+" + top: "conv1+" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1+" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu2" + type: "PReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "conv2+" + type: "Convolution" + bottom: "conv2" + top: "conv2+" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu2+" + type: "PReLU" + bottom: "conv2+" + top: "conv2+" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2+" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu3" + type: "PReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv3+" + type: "Convolution" + bottom: "conv3" + top: "conv3+" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu3+" + type: "PReLU" + bottom: "conv3+" + top: "conv3+" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3+" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool3" + top: "ip1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "preluip1" + type: "PReLU" + bottom: "ip1" + top: "ip1" +} diff --git a/mnist_example/mnist_snapshot/.gitignore b/mnist_example/mnist_snapshot/.gitignore new file mode 100644 index 00000000000..86d0cb2726c --- /dev/null +++ b/mnist_example/mnist_snapshot/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/mnist_example/mnist_solver.prototxt b/mnist_example/mnist_solver.prototxt new file mode 100644 index 00000000000..e5ac53d45f9 --- /dev/null +++ b/mnist_example/mnist_solver.prototxt @@ -0,0 +1,25 @@ +net: "mnist_example/mnist_train_test.prototxt" + +test_iter: 100 +test_interval: 1000 + +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 + +lr_policy: "multistep" +gamma: 0.8 +stepvalue: 5000 +stepvalue: 8000 +stepvalue: 10000 + +display: 100 + +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 1000 +snapshot_prefix: "mnist_example/mnist_snapshot/mnist_train" + +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/mnist_example/mnist_train_test.prototxt b/mnist_example/mnist_train_test.prototxt new file mode 100644 index 00000000000..fa0fca3237d --- /dev/null +++ b/mnist_example/mnist_train_test.prototxt @@ -0,0 +1,343 @@ +name: "LeNet++" +layer { + name: "mnist" + type: "Data" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + mean_value: 127.5 + scale: 0.0078125 + } + data_param { + source: "mnist_example/data/mnist_train_lmdb" + batch_size: 128 + backend: LMDB + } +} +layer { + name: "mnist" + type: "Data" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + mean_value: 127.5 + scale: 0.0078125 + } + data_param { + source: "mnist_example/data/mnist_test_lmdb" + batch_size: 100 + backend: LMDB + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu1" + type: "PReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "conv1+" + type: "Convolution" + bottom: "conv1" + top: "conv1+" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu1+" + type: "PReLU" + bottom: "conv1+" + top: "conv1+" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1+" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu2" + type: "PReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "conv2+" + type: "Convolution" + bottom: "conv2" + top: "conv2+" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu2+" + type: "PReLU" + bottom: "conv2+" + top: "conv2+" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2+" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu3" + type: "PReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv3+" + type: "Convolution" + bottom: "conv3" + top: "conv3+" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 5 + stride: 1 + pad: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prelu3+" + type: "PReLU" + bottom: "conv3+" + top: "conv3+" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3+" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool3" + top: "ip1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "preluip1" + type: "PReLU" + bottom: "ip1" + top: "ip1" +} +################## train ################## +layer { + name: "ip2" + type: "InnerProduct" + bottom: "ip1" + top: "ip2" + param { + lr_mult: 1 + decay_mult: 1 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_term: false + } +} +############# softmax loss ############### +layer { + name: "softmax_loss" + type: "SoftmaxWithLoss" + bottom: "ip2" + bottom: "label" + top: "softmax_loss" +} +############# center loss ############### +layer { + name: "center_loss" + type: "CenterLoss" + bottom: "ip1" + bottom: "label" + top: "center_loss" + param { + lr_mult: 1 + decay_mult: 0 + } + center_loss_param { + num_output: 10 + center_filler { + type: "xavier" + } + } + loss_weight: 0.01 +} diff --git a/src/caffe/layers/center_loss_layer.cpp b/src/caffe/layers/center_loss_layer.cpp new file mode 100644 index 00000000000..5e79c3af528 --- /dev/null +++ b/src/caffe/layers/center_loss_layer.cpp @@ -0,0 +1,115 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/center_loss_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void CenterLossLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + const int num_output = this->layer_param_.center_loss_param().num_output(); + N_ = num_output; + const int axis = bottom[0]->CanonicalAxisIndex( + this->layer_param_.center_loss_param().axis()); + // Dimensions starting from "axis" are "flattened" into a single + // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), + // and axis == 1, N inner products with dimension CHW are performed. + K_ = bottom[0]->count(axis); + // Check if we need to set up the weights + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + this->blobs_.resize(1); + // Intialize the weight + vector center_shape(2); + center_shape[0] = N_; + center_shape[1] = K_; + this->blobs_[0].reset(new Blob(center_shape)); + // fill the weights + shared_ptr > center_filler(GetFiller( + this->layer_param_.center_loss_param().center_filler())); + center_filler->Fill(this->blobs_[0].get()); + + } // parameter initialization + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void CenterLossLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(bottom[1]->channels(), 1); + CHECK_EQ(bottom[1]->height(), 1); + CHECK_EQ(bottom[1]->width(), 1); + M_ = bottom[0]->num(); + // The top shape will be the bottom shape with the flattened axes dropped, + // and replaced by a single axis with dimension num_output (N_). + LossLayer::Reshape(bottom, top); + distance_.ReshapeLike(*bottom[0]); + variation_sum_.ReshapeLike(*this->blobs_[0]); +} + +template +void CenterLossLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* label = bottom[1]->cpu_data(); + const Dtype* center = this->blobs_[0]->cpu_data(); + Dtype* distance_data = distance_.mutable_cpu_data(); + + // the i-th distance_data + for (int i = 0; i < M_; i++) { + const int label_value = static_cast(label[i]); + // D(i,:) = X(i,:) - C(y(i),:) + caffe_sub(K_, bottom_data + i * K_, center + label_value * K_, distance_data + i * K_); + } + Dtype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data()); + Dtype loss = dot / M_ / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; +} + +template +void CenterLossLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + // Gradient with respect to centers + if (this->param_propagate_down_[0]) { + const Dtype* label = bottom[1]->cpu_data(); + Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff(); + Dtype* variation_sum_data = variation_sum_.mutable_cpu_data(); + const Dtype* distance_data = distance_.cpu_data(); + + // \sum_{y_i==j} + caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data()); + for (int n = 0; n < N_; n++) { + int count = 0; + for (int m = 0; m < M_; m++) { + const int label_value = static_cast(label[m]); + if (label_value == n) { + count++; + caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_); + } + } + caffe_axpy(K_, (Dtype)1./(count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_); + } + } + // Gradient with respect to bottom data + if (propagate_down[0]) { + caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff()); + caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff()); + } + if (propagate_down[1]) { + LOG(FATAL) << this->type() + << " Layer cannot backpropagate to label inputs."; + } +} + +#ifdef CPU_ONLY +STUB_GPU(CenterLossLayer); +#endif + +INSTANTIATE_CLASS(CenterLossLayer); +REGISTER_LAYER_CLASS(CenterLoss); + +} // namespace caffe diff --git a/src/caffe/layers/center_loss_layer.cu b/src/caffe/layers/center_loss_layer.cu new file mode 100644 index 00000000000..f493557d5fd --- /dev/null +++ b/src/caffe/layers/center_loss_layer.cu @@ -0,0 +1,78 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/center_loss_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +__global__ void Compute_distance_data_gpu(int nthreads, const int K, const Dtype* bottom, + const Dtype* label, const Dtype* center, Dtype* distance) { + CUDA_KERNEL_LOOP(index, nthreads) { + int m = index / K; + int k = index % K; + const int label_value = static_cast(label[m]); + // distance(i) = x(i) - c_{y(i)} + distance[index] = bottom[index] - center[label_value * K + k]; + } +} + +template +__global__ void Compute_center_diff_gpu(int nthreads, const int M, const int K, + const Dtype* label, const Dtype* distance, Dtype* variation_sum, + Dtype* center_diff) { + CUDA_KERNEL_LOOP(index, nthreads) { + int count = 0; + for (int m = 0; m < M; m++) { + const int label_value = static_cast(label[m]); + if (label_value == index) { + count++; + for (int k = 0; k < K; k++) { + variation_sum[index * K + k] -= distance[m * K + k]; + } + } + } + for (int k = 0; k < K; k++) { + center_diff[index * K + k] = variation_sum[index * K + k] /(count + (Dtype)1.); + } + } +} + + +template +void CenterLossLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + int nthreads = M_ * K_; + Compute_distance_data_gpu<<>>(nthreads, K_, bottom[0]->gpu_data(), bottom[1]->gpu_data(), + this->blobs_[0]->gpu_data(), distance_.mutable_gpu_data()); + Dtype dot; + caffe_gpu_dot(M_ * K_, distance_.gpu_data(), distance_.gpu_data(), &dot); + Dtype loss = dot / M_ / Dtype(2); + top[0]->mutable_cpu_data()[0] = loss; +} + +template +void CenterLossLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + int nthreads = N_; + caffe_gpu_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data()); + Compute_center_diff_gpu<<>>(nthreads, M_, K_, bottom[1]->gpu_data(), distance_.gpu_data(), + variation_sum_.mutable_cpu_data(), this->blobs_[0]->mutable_gpu_diff()); + + if (propagate_down[0]) { + caffe_gpu_scale(M_ * K_, top[0]->cpu_diff()[0] / M_, + distance_.gpu_data(), bottom[0]->mutable_gpu_diff()); + } + if (propagate_down[1]) { + LOG(FATAL) << this->type() + << " Layer cannot backpropagate to label inputs."; + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(CenterLossLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 6940a705eb6..2f054398356 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -306,7 +306,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param) +// LayerParameter next available layer-specific ID: 148 (last added: center_loss_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -362,6 +362,7 @@ message LayerParameter { optional ArgMaxParameter argmax_param = 103; optional BatchNormParameter batch_norm_param = 139; optional BiasParameter bias_param = 141; + optional CenterLossParameter center_loss_param = 147; optional ConcatParameter concat_param = 104; optional ContrastiveLossParameter contrastive_loss_param = 105; optional ConvolutionParameter convolution_param = 106; @@ -1397,3 +1398,12 @@ message PReLUParameter { // Whether or not slope paramters are shared across channels. optional bool channel_shared = 2 [default = false]; } + +message CenterLossParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + optional FillerParameter center_filler = 2; // The filler for the centers + // The first axis to be lumped into a single inner product computation; + // all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 3 [default = 1]; +}