diff --git a/brain/circuit.cpp b/brain/circuit.cpp index 39fe466..3cf4438 100644 --- a/brain/circuit.cpp +++ b/brain/circuit.cpp @@ -30,14 +30,21 @@ #include #include +#include #ifdef BRAIN_USE_MVD3 # include # include #endif -#include +#include +#include +#ifdef BRION_USE_CXX11 +# include +#endif + +namespace fs = boost::filesystem; using boost::lexical_cast; namespace brain @@ -99,19 +106,28 @@ size_t toSize_t( const std::string& in ) size_t toSize_t( const std::string& in ) { return std::stoul( in ); } #endif size_t nop( const size_t& in ) { return in; } + +typedef boost::unordered_map< std::string, neuron::MorphologyPtr > Loaded; } class Circuit::Impl { public: explicit Impl( const brion::BlueConfig& config ) - : _morphologySource( config.getMorphologySource( )) + : _circuitSource( config.getCircuitSource( )) + , _morphologySource( config.getMorphologySource( )) , _targetSources( config.getTargetSources( )) + , _cache( lunchbox::PersistentMap::createCache( )) {} virtual ~Impl() {} virtual size_t getNumNeurons() const = 0; + const brion::URI& getCircuitSource() const + { + return _circuitSource; + } + GIDSet getGIDs() const { brain::GIDSet gids; @@ -147,10 +163,60 @@ class Circuit::Impl return uri; } + void saveToCache( const std::string& hash, + neuron::MorphologyPtr morphology ) + { + if( _cache ) + { + servus::Serializable::Data data = morphology->toBinary(); + _cache->insert( hash, data.ptr.get(), data.size ); + } + } + + Loaded loadFromCache( const std::set< std::string >& hashes LB_UNUSED ) + const + { + Loaded loaded; +#ifdef BRION_USE_CXX11 + if( _cache ) + { + LBDEBUG << "Using cache for morphology loading" << std::endl; + typedef std::future< std::pair< std::string, + neuron::MorphologyPtr > > Future; + std::vector< Future > futures; + + Strings keys( hashes.begin(), hashes.end( )); + futures.reserve( keys.size( )); + + _cache->takeValues( keys, [&futures] ( const std::string& key, + char* data, const size_t size ) + { + futures.push_back( std::async( std::launch::async, + [key, data, size] + { + neuron::MorphologyPtr morphology( + new neuron::Morphology( data, size )); + std::free( data ); + return std::make_pair( key, morphology ); + })); + }); + + for( auto& future : futures ) + loaded.insert( future.get( )); + + LBINFO << "Loaded " << loaded.size() << " out of " << hashes.size() + << " morphologies from cache" << std::endl; + } +#endif + return loaded; + } + private: + const brion::URI _circuitSource; const brion::URI _morphologySource; const brion::URIs _targetSources; mutable brion::Targets _targetParsers; + lunchbox::PersistentMapPtr _cache; }; class MVD2 : public Circuit::Impl @@ -442,36 +508,64 @@ neuron::Morphologies Circuit::loadMorphologies( const GIDSet& gids, const Coordinates coords ) const { const URIs& uris = getMorphologyURIs( gids ); - neuron::Morphologies result; - result.reserve( uris.size( )); - if( coords == COORDINATES_GLOBAL ) + // < GID, hash > + Strings gidHashes; + gidHashes.reserve( uris.size( )); + std::set< std::string > hashes; + GIDSet::const_iterator gid = gids.begin(); + for( size_t i = 0; i < uris.size(); ++i, ++gid ) { - const Matrix4fs& transforms = getTransforms( gids ); - for( size_t i = 0; i < uris.size(); ++i ) + std::string hash( fs::canonical( uris[i].getPath( )).generic_string( )); + + if( coords == COORDINATES_GLOBAL ) { - const URI& uri = uris[i]; - const brion::Morphology raw( uri.getPath( )); - result.push_back( neuron::MorphologyPtr( - new neuron::Morphology( raw, transforms[i] ))); + // store circuit + GID for transformed morphology + hash += fs::canonical( + _impl->getCircuitSource().getPath( )).generic_string() + + boost::lexical_cast< std::string >( *gid ); + hash = servus::make_uint128( hash ).getString(); } - return result; + else + hash = servus::make_uint128( hash ).getString(); + + gidHashes.push_back( hash ); + hashes.insert( hash ); } - std::map< std::string, neuron::MorphologyPtr > loaded; + Loaded loaded = _impl->loadFromCache( hashes ); + + // resolve missing morphologies and put them in GID-order into result + neuron::Morphologies result; + result.reserve( uris.size( )); + + const Matrix4fs transforms = + coords == COORDINATES_GLOBAL ? getTransforms( gids ) : Matrix4fs(); for( size_t i = 0; i < uris.size(); ++i ) { const URI& uri = uris[i]; - neuron::MorphologyPtr& morphology = loaded[uri.getPath()]; - if( !morphology ) + const std::string& hash = gidHashes[i]; + Loaded::const_iterator it = loaded.find( hash ); + if( it == loaded.end( )) { + neuron::MorphologyPtr morphology; const brion::Morphology raw( uri.getPath( )); - morphology.reset( new neuron::Morphology( raw )); - } + if( coords == COORDINATES_GLOBAL ) + morphology.reset( new neuron::Morphology( raw, transforms[i] )); + else + morphology.reset( new neuron::Morphology( raw )); - result.push_back( morphology ); + loaded.insert( std::make_pair( hash, morphology )); + + _impl->saveToCache( hash, morphology ); + + result.push_back( morphology ); + } + else + result.push_back( it->second ); } + return result; } diff --git a/brain/neuron/morphology.cpp b/brain/neuron/morphology.cpp index 24839e5..689179e 100644 --- a/brain/neuron/morphology.cpp +++ b/brain/neuron/morphology.cpp @@ -34,6 +34,17 @@ namespace brain namespace neuron { +servus::Serializable::Data Morphology::toBinary() const +{ + return _impl->toBinary(); +} + +Morphology::Morphology( const void* data, const size_t size ) + : _impl( new Impl( data, size )) +{ + _impl->ref(); +} + Morphology::Morphology( const URI& source, const Matrix4f& transform ) : _impl( new Impl( brion::Morphology( source.getPath( )))) { diff --git a/brain/neuron/morphology.h b/brain/neuron/morphology.h index a49e84b..c713ca9 100644 --- a/brain/neuron/morphology.h +++ b/brain/neuron/morphology.h @@ -25,6 +25,7 @@ #include #include +#include namespace brain { @@ -130,6 +131,10 @@ class Morphology : public boost::noncopyable BRAIN_API const Matrix4f& getTransformation() const; private: + friend class brain::Circuit; + Morphology( const void* data, const size_t size ); + servus::Serializable::Data toBinary() const; + Impl* const _impl; }; diff --git a/brain/neuron/morphologyImpl.cpp b/brain/neuron/morphologyImpl.cpp index 2818d37..8efdc58 100644 --- a/brain/neuron/morphologyImpl.cpp +++ b/brain/neuron/morphologyImpl.cpp @@ -33,6 +33,37 @@ namespace brain { namespace neuron { +namespace +{ +template< typename T > +void _serializeArray( uint8_t*& dst, + const boost::shared_ptr< std::vector< T > >& src ) +{ + const size_t arraySize = src->size(); + *reinterpret_cast< size_t* >( dst ) = arraySize; + dst += sizeof(size_t); + memcpy( dst, src->data(), sizeof(T) * src->size()); + dst += sizeof(T) * src->size(); +} + +template< typename T > +void _deserializeArray( boost::shared_ptr< std::vector< T > >& dst, + const uint8_t*& src ) +{ + const size_t arraySize = *reinterpret_cast< const size_t* >( src ); + src += sizeof(size_t); + const T* dstPtr = + reinterpret_cast< const T* >( src ); + dst.reset( new std::vector< T >( dstPtr, dstPtr + arraySize )); + src += sizeof(T) * arraySize; +} + +} + +Morphology::Impl::Impl( const void* data, const size_t size ) +{ + _fromBinary( data, size ); +} Morphology::Impl::Impl( const brion::Morphology& morphology ) : points( morphology.readPoints( MORPHOLOGY_UNDEFINED )) @@ -51,6 +82,50 @@ Morphology::Impl::Impl( const brion::Morphology& morphology ) somaSection = ids[0]; } +bool Morphology::Impl::_fromBinary( const void* data, const size_t size ) +{ + const uint8_t* ptr = reinterpret_cast< const uint8_t* >( data ); + + _deserializeArray( points, ptr ); + _deserializeArray( sections, ptr ); + _deserializeArray( types, ptr ); + if( size_t(ptr - reinterpret_cast< const uint8_t* >( data )) < size ) + _deserializeArray( apicals, ptr ); + + _extractChildrenLists(); + + const uint32_ts ids = + getSectionIDs( SectionTypes( 1, SECTION_SOMA ), false ); + + if( ids.size() != 1 ) + LBTHROW( std::runtime_error( + "Bad input morphology. None or more than one soma found" )); + somaSection = ids[0]; + return true; +} + +servus::Serializable::Data Morphology::Impl::_toBinary() const +{ + servus::Serializable::Data data; + + data.size = sizeof(size_t) + sizeof(brion::Vector4f) * points->size() + + sizeof(size_t) + sizeof(brion::Vector2i) * sections->size() + + sizeof(size_t) + sizeof(uint32_t) * types->size(); + if( !apicals->empty( )) + data.size += sizeof(size_t) + sizeof(brion::Vector2i) * apicals->size(); + + uint8_t* ptr = new uint8_t[data.size]; + data.ptr.reset( ptr ); + + _serializeArray( ptr, points ); + _serializeArray( ptr, sections ); + _serializeArray( ptr, types ); + if( !apicals->empty( )) + _serializeArray( ptr, apicals ); + + return data; +} + SectionRange Morphology::Impl::getSectionRange( const uint32_t sectionID ) const { const size_t start = ( *sections )[sectionID][0]; diff --git a/brain/neuron/morphologyImpl.h b/brain/neuron/morphologyImpl.h index 79fedad..04d1639 100644 --- a/brain/neuron/morphologyImpl.h +++ b/brain/neuron/morphologyImpl.h @@ -34,18 +34,20 @@ namespace neuron typedef std::pair< size_t, size_t > SectionRange; -class Morphology::Impl : public lunchbox::Referenced +class Morphology::Impl : public lunchbox::Referenced, public servus::Serializable { public: - const brion::Vector4fsPtr points; - const brion::Vector2isPtr sections; - const brion::SectionTypesPtr types; - const brion::Vector2isPtr apicals; + brion::Vector4fsPtr points; + brion::Vector2isPtr sections; + brion::SectionTypesPtr types; + brion::Vector2isPtr apicals; Matrix4f transformation; uint32_t somaSection; + Impl( const void* data, const size_t size ); + explicit Impl( const brion::Morphology& morphology ); SectionRange getSectionRange( const uint32_t sectionID ) const; @@ -69,6 +71,9 @@ class Morphology::Impl : public lunchbox::Referenced void transform( const Matrix4f& matrix ); private: + std::string getTypeName() const final { return "brain::neuron::Morphology::Impl"; } + bool _fromBinary( const void* data, const size_t size ) final; + servus::Serializable::Data _toBinary() const final; // Distances caches. These caches need to be thread-safe to follow the // recommendations for C++11 about mutable and const correctness. diff --git a/brion/plugin/morphologyHDF5.cpp b/brion/plugin/morphologyHDF5.cpp index 96f8bf4..547f988 100644 --- a/brion/plugin/morphologyHDF5.cpp +++ b/brion/plugin/morphologyHDF5.cpp @@ -603,19 +603,20 @@ void MorphologyHDF5::_checkVersion( const std::string& source ) if( _readV11Metadata( )) return; + if( _readV2Metadata( )) + return; + try { _resolveV1(); _version = MORPHOLOGY_VERSION_H5_1; return; } - catch( ... ) {} - - if( _readV2Metadata( )) - _version = MORPHOLOGY_VERSION_H5_2; - else + catch( ... ) + { LBTHROW( std::runtime_error( "Unknown morphology file format for " "file " + source )); + } } void MorphologyHDF5::_selectRepairStage() @@ -740,7 +741,7 @@ bool MorphologyHDF5::_readV11Metadata() } } -bool MorphologyHDF5:: _readV2Metadata() const +bool MorphologyHDF5:: _readV2Metadata() { try { @@ -748,10 +749,9 @@ bool MorphologyHDF5:: _readV2Metadata() const const H5::Group& root = _file.openGroup( _g_root ); const H5::Attribute& attr = root.openAttribute( _a_version ); - int32_t version; - attr.read( H5::PredType::NATIVE_INT, &version ); + attr.read( H5::PredType::NATIVE_INT, &_version ); - if( version == MORPHOLOGY_VERSION_H5_2 ) + if( _version == MORPHOLOGY_VERSION_H5_2 ) return true; } catch( const H5::Exception& ) {} @@ -760,6 +760,7 @@ bool MorphologyHDF5:: _readV2Metadata() const { detail::SilenceHDF5 silence; _file.openGroup( _g_root ); + _version = MORPHOLOGY_VERSION_H5_2; return true; } catch( const H5::Exception& ) diff --git a/brion/plugin/morphologyHDF5.h b/brion/plugin/morphologyHDF5.h index 6bf0706..33507b7 100644 --- a/brion/plugin/morphologyHDF5.h +++ b/brion/plugin/morphologyHDF5.h @@ -91,7 +91,7 @@ class MorphologyHDF5 : public MorphologyPlugin void _writeV11Metadata( const MorphologyInitData& initData ); bool _readV11Metadata(); - bool _readV2Metadata() const; + bool _readV2Metadata(); void _writeV2Metadata(); H5::DataSet _getStructureDataSet( size_t nSections ); diff --git a/brion/synapse.cpp b/brion/synapse.cpp index cd09a10..98e1069 100644 --- a/brion/synapse.cpp +++ b/brion/synapse.cpp @@ -107,6 +107,7 @@ class SynapseFile : public boost::noncopyable std::string cacheKey; if( _cache ) { + lunchbox::ScopedWrite mutex( _cacheLock ); cacheKey = _cacheKey + "/" + lexical_cast< std::string >( gid ) + "/" + lexical_cast< std::string >( attributes ); const std::string& cached = (*_cache)[ cacheKey ]; @@ -152,9 +153,13 @@ class SynapseFile : public boost::noncopyable dataset.dataset.read( values.data(), H5::PredType::NATIVE_FLOAT, targetspace, dataset.dataspace ); + if( _cache ) + { + lunchbox::ScopedWrite cacheMutex( _cacheLock ); _cache->insert( cacheKey, values.data(), dataset.dims[0] * bits.count() * sizeof( float )); + } return values; } @@ -232,6 +237,7 @@ class SynapseFile : public boost::noncopyable private: lunchbox::PersistentMapPtr _cache; + mutable lunchbox::Lock _cacheLock; std::string _cacheKey; H5::H5File _file; size_t _numAttributes; diff --git a/doc/Changelog.md b/doc/Changelog.md index f71682f..fc7a57b 100644 --- a/doc/Changelog.md +++ b/doc/Changelog.md @@ -5,6 +5,9 @@ Changelog {#Changelog} * [81](https://github.com/BlueBrain/Brion/pull/81): Fix GID out-of-bounds handling for MVD3 in brain::Circuit +* [79](https://github.com/BlueBrain/Brion/pull/79): + Use PersistentMap for cache in brain::Circuit::loadMorphologies(); add thread + safety w/ synapses cache # Release 1.8.0 (30-Jun-2016) diff --git a/doc/caching.md b/doc/caching.md new file mode 100644 index 0000000..e0d5b32 --- /dev/null +++ b/doc/caching.md @@ -0,0 +1,21 @@ +Caching support +============ + +Brion and Brain have optional caching support for speeding up loading of various +data. The caching is based on the lunchbox::PersistentMap API which has several +key-value store backends which can be activated by the following environment +variables: + +* MEMCACHED_SERVERS: a comma-separated list of servers with optional :port to + use memcached as a cache +* LEVELDB_CACHE: a path to the leveldb storage to use leveldb as a cache + +## Cached data support + +* Morphologies from brain::Circuit::loadMorphologies() + * each morphology is hashed by its canonical filepath if COORDINATES_LOCAL + * each morphology is hashed by its canonical filepath plus canonical circuit + filepath and GID if COORDINATES_GLOBAL +* Synapse attributes from brion::Synapse::read() + * each set of synapse attributes is hashed together with the synapse canonical + filepath and GID