diff --git a/.gitsubprojects b/.gitsubprojects index 902b81c..731a99c 100644 --- a/.gitsubprojects +++ b/.gitsubprojects @@ -1,5 +1,5 @@ # -*- mode: cmake -*- git_subproject(Servus https://github.com/HBPVIS/Servus.git 134b674) -git_subproject(Lunchbox https://github.com/Eyescale/Lunchbox.git 90ec951) +git_subproject(Lunchbox https://github.com/Eyescale/Lunchbox.git 4345dd0) git_subproject(vmmlib https://github.com/Eyescale/vmmlib.git 10d93e6) git_subproject(MVDTool https://github.com/BlueBrain/MVDTool.git 58b5e8b) diff --git a/.travis.yml b/.travis.yml index 55fdb64..0ae0cfd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,9 @@ before_install: - brew outdated cmake || brew upgrade cmake - brew install cppcheck doxygen ninja - brew install homebrew/science/hdf5 + - brew tap homebrew/versions + - brew install homebrew/versions/boost159 + - brew switch boost159 1.59.0 script: - mkdir $BUILD_TYPE - cd $BUILD_TYPE diff --git a/CMakeLists.txt b/CMakeLists.txt index bc4f1de..b1e1cf1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ include(Common) common_find_package(BBPTestData) common_find_package(Boost REQUIRED COMPONENTS date_time filesystem - program_options regex system unit_test_framework) + program_options regex system thread unit_test_framework) common_find_package(Boost COMPONENTS python${USE_BOOST_PYTHON_VERSION}) common_find_package(HDF5 SYSTEM REQUIRED COMPONENTS C CXX) common_find_package(Lunchbox REQUIRED) diff --git a/brain/circuit.cpp b/brain/circuit.cpp index 08555ca..7cfcbd0 100644 --- a/brain/circuit.cpp +++ b/brain/circuit.cpp @@ -115,7 +115,7 @@ neuron::Morphologies Circuit::loadMorphologies( const GIDSet& gids, hashes.insert( hash ); } - Loaded loaded = _impl->loadFromCache( hashes ); + CachedMorphologies cached = _impl->loadMorphologiesFromCache( hashes ); // resolve missing morphologies and put them in GID-order into result neuron::Morphologies result; @@ -128,8 +128,8 @@ neuron::Morphologies Circuit::loadMorphologies( const GIDSet& gids, const URI& uri = uris[i]; const std::string& hash = gidHashes[i]; - Loaded::const_iterator it = loaded.find( hash ); - if( it == loaded.end( )) + CachedMorphologies::const_iterator it = cached.find( hash ); + if( it == cached.end( )) { neuron::MorphologyPtr morphology; const brion::Morphology raw( uri.getPath( )); @@ -138,9 +138,9 @@ neuron::Morphologies Circuit::loadMorphologies( const GIDSet& gids, else morphology.reset( new neuron::Morphology( raw )); - loaded.insert( std::make_pair( hash, morphology )); + cached.insert( std::make_pair( hash, morphology )); - _impl->saveToCache( hash, morphology ); + _impl->saveMorphologiesToCache( uri.getPath(), hash, morphology ); result.push_back( morphology ); } diff --git a/brain/detail/circuit.h b/brain/detail/circuit.h index ca51807..1ae2b5c 100644 --- a/brain/detail/circuit.h +++ b/brain/detail/circuit.h @@ -42,10 +42,10 @@ #endif #include -#include #include #include +#include namespace fs = boost::filesystem; using boost::lexical_cast; @@ -134,7 +134,10 @@ void _shuffle( T& container ) std::shuffle( container.begin(), container.end(), randomEngine ); } -typedef boost::unordered_map< std::string, neuron::MorphologyPtr > Loaded; +typedef std::unordered_map< std::string, + neuron::MorphologyPtr > CachedMorphologies; +typedef std::unordered_map< std::string, + brion::SynapseMatrix > CachedSynapses; } // anonymous namespace class Circuit::Impl @@ -270,49 +273,111 @@ class Circuit::Impl return **_synapsePositions[i]; } - void saveToCache( const std::string& hash, - neuron::MorphologyPtr morphology ) const + void saveMorphologiesToCache( const std::string& uri, + const std::string& hash, + neuron::MorphologyPtr morphology ) const { - if( _cache ) + if( !_cache ) + return; + + servus::Serializable::Data data = morphology->toBinary(); + if( !_cache->insert( hash, data.ptr.get(), data.size )) { - servus::Serializable::Data data = morphology->toBinary(); - _cache->insert( hash, data.ptr.get(), data.size ); + LBWARN << "Failed to insert morphology " << uri + << " into cache; item size is " << float(data.size) / LB_1MB + << " MB" << std::endl; } } - Loaded loadFromCache( const std::set< std::string >& hashes LB_UNUSED ) - const + CachedMorphologies + loadMorphologiesFromCache( const std::set< std::string >& hashes ) const { - Loaded loaded; - if( _cache ) - { - LBDEBUG << "Using cache for morphology loading" << std::endl; - typedef std::future< std::pair< std::string, - neuron::MorphologyPtr > > Future; - std::vector< Future > futures; + CachedMorphologies loaded; + if( !_cache ) + return loaded; + + LBDEBUG << "Using cache for morphology loading" << std::endl; + typedef std::future< std::pair< std::string, + neuron::MorphologyPtr >> Future; + std::vector< Future > futures; - Strings keys( hashes.begin(), hashes.end( )); - futures.reserve( keys.size( )); + Strings keys( hashes.begin(), hashes.end( )); + futures.reserve( keys.size( )); - _cache->takeValues( keys, [&futures] ( const std::string& key, - char* data, const size_t size ) + _cache->takeValues( keys, [&futures] ( const std::string& key, + char* data, const size_t size ) + { + futures.push_back( std::async( [key, data, size] { - futures.push_back( std::async( std::launch::async, - [key, data, size] - { - neuron::MorphologyPtr morphology( - new neuron::Morphology( data, size )); - std::free( data ); - return std::make_pair( key, morphology ); - })); - }); - - for( auto& future : futures ) - loaded.insert( future.get( )); - - LBINFO << "Loaded " << loaded.size() << " out of " << hashes.size() - << " morphologies from cache" << std::endl; + neuron::MorphologyPtr morphology( + new neuron::Morphology( data, size )); + std::free( data ); + return std::make_pair( key, morphology ); + })); + }); + + for( auto& future : futures ) + loaded.insert( future.get( )); + + LBINFO << "Loaded " << loaded.size() << " out of " << hashes.size() + << " morphologies from cache" << std::endl; + return loaded; + } + + void saveSynapsePositionsToCache( const uint32_t gid, + const std::string& hash, + const brion::SynapseMatrix& value ) const + { + if( !_cache ) + return; + + const size_t size = value.num_elements() * sizeof( float ); + if( !_cache->insert( hash, value.data(), size )) + { + LBWARN << "Failed to insert synapse positions for GID " << gid + << " into cache; item size is " << float(size) / LB_1MB + << " MB" << std::endl; } + } + + CachedSynapses loadSynapsePositionsFromCache( const Strings& keys ) const + { + CachedSynapses loaded; + if( !_cache ) + return loaded; + + LBDEBUG << "Using cache for synapses position loading" << std::endl; + typedef std::future< std::pair< std::string, + brion::SynapseMatrix >> Future; + + std::vector< Future > futures; + futures.reserve( keys.size( )); + + _cache->takeValues( keys, [&futures] ( const std::string& key, + char* data, const size_t size ) + { + futures.push_back( std::async( [key, data, size] + { + // there is no constructor in multi_array which just accepts the + // size in bytes (although there's a getter for it used in + // saveSynapsePositionsToCache()), so we reconstruct the row and + // column count here. + const size_t numColumns = brion::SYNAPSE_POSITION_ALL; + const size_t numRows = size / sizeof(float) / numColumns; + brion::SynapseMatrix values( boost::extents[numRows] + [numColumns]); + ::memmove( values.data(), data, size ); + std::free( data ); + return std::make_pair( key, values ); + })); + }); + + for( auto& future : futures ) + loaded.insert( future.get( )); + + LBDEBUG << "Loaded synapse positions for " << loaded.size() + << " out of " << keys.size() << " neurons from cache" + << std::endl; return loaded; } diff --git a/brain/synapses.cpp b/brain/synapses.cpp index 5fb902e..16fb64d 100644 --- a/brain/synapses.cpp +++ b/brain/synapses.cpp @@ -184,17 +184,71 @@ struct Synapses::Impl : public Synapses::BaseImpl if( _preSurfacePositionX ) return; + Strings hashes; + hashes.reserve( gids.size( )); + const auto& path = _circuit._impl->_synapseSource.getPath(); + const std::string baseHash( fs::canonical( path ).generic_string( )); + for( const auto gid : gids ) + { + std::string gidHash = baseHash; + gidHash += _afferent ? "_afferent" : "_efferent"; + gidHash += std::to_string( gid ); + gidHash = servus::make_uint128( gidHash ).getString(); + hashes.push_back( gidHash ); + } + + CachedSynapses loaded = + _circuit._impl->loadSynapsePositionsFromCache( hashes ); + const bool haveSize = _size > 0; - const brion::Synapse& synapsePositions = - _circuit._impl->getSynapsePositions( _afferent ); - _allocatePositions( haveSize ? _size - : synapsePositions.getNumSynapses( gids )); + + // delay the opening of the synapse file as much as possible, even + // though the code looks ugly... As the circuit impl keeps the file + // opened, we can safely just get a loose pointer here. + const brion::Synapse* synapsePositions = nullptr; + + if( !haveSize ) + { + auto hash = hashes.begin(); + for( const auto gid : gids ) + { + auto it = loaded.find( *hash ); + ++hash; + if( it != loaded.end() ) + _size += it->second.shape()[0]; + else + { + if( !synapsePositions ) + synapsePositions = + &_circuit._impl->getSynapsePositions( _afferent ); + _size += synapsePositions->getNumSynapses( GIDSet{ gid } ); + } + } + } + + _allocatePositions( _size ); size_t i = 0; + auto hash = hashes.begin(); for( const auto gid : gids ) { - const auto& pos = synapsePositions.read( gid, - brion::SYNAPSE_POSITION ); + auto it = loaded.find( *hash ); + const bool cached = it != loaded.end(); + + const auto readFromFile = [&] + { + if( !synapsePositions ) + synapsePositions = + &_circuit._impl->getSynapsePositions( _afferent ); + return synapsePositions->read( gid, brion::SYNAPSE_POSITION ); + }; + + const brion::SynapseMatrix pos = cached ? it->second + : readFromFile(); + + if( !cached ) + _circuit._impl->saveSynapsePositionsToCache( gid, *hash, pos ); + ++hash; for( size_t j = 0; j < pos.shape()[0]; ++j ) { diff --git a/brion/CMakeLists.txt b/brion/CMakeLists.txt index 796859e..1a524b5 100644 --- a/brion/CMakeLists.txt +++ b/brion/CMakeLists.txt @@ -76,10 +76,11 @@ endif() set(BRION_PUBLIC_INCLUDE_DIRECTORIES ${Boost_INCLUDE_DIRS}) set(BRION_LINK_LIBRARIES PUBLIC Lunchbox Servus vmmlib - PRIVATE ${Boost_SYSTEM_LIBRARIES} - ${Boost_FILESYSTEM_LIBRARIES} - ${Boost_DATE_TIME_LIBRARIES} - ${Boost_REGEX_LIBRARIES} + PRIVATE ${Boost_SYSTEM_LIBRARIES} + ${Boost_FILESYSTEM_LIBRARIES} + ${Boost_DATE_TIME_LIBRARIES} + ${Boost_REGEX_LIBRARIES} + ${Boost_THREAD_LIBRARIES} ${HDF5_LIBRARIES} ${CMAKE_THREADS_LIB_INIT} ) diff --git a/brion/synapse.cpp b/brion/synapse.cpp index dc0ea38..fdcefa1 100644 --- a/brion/synapse.cpp +++ b/brion/synapse.cpp @@ -33,9 +33,7 @@ #include -#include #include -#include #include #include @@ -45,11 +43,6 @@ namespace brion { namespace detail { -namespace -{ -static lunchbox::a_ssize_t _cacheHits; -static lunchbox::a_ssize_t _cacheMiss; -} struct Dataset { @@ -65,8 +58,6 @@ class SynapseFile : public boost::noncopyable { public: explicit SynapseFile( const std::string& source ) - : _cache( lunchbox::PersistentMap::createCache( )) - , _cacheKey( fs::canonical( fs::path( source )).generic_string( )) { lunchbox::ScopedWrite mutex( detail::_hdf5Lock ); @@ -110,32 +101,6 @@ class SynapseFile : public boost::noncopyable if( !bits.any( )) return SynapseMatrix(); - std::string cacheKey; - if( _cache ) - { - lunchbox::ScopedWrite mutex( _cacheLock ); - cacheKey = _cacheKey + "/" + lexical_cast< std::string >( gid ) + - "/" + lexical_cast< std::string >( attributes ); - const std::string& cached = (*_cache)[ cacheKey ]; - if( !cached.empty( )) - { - if( (++_cacheHits % 5000) == 0 ) - LBDEBUG << int( float( _cacheHits ) / - float( _cacheHits+_cacheMiss )*100.f + .5f ) - << "% cache hit rate" << std::endl; - - const size_t dim0 = cached.size() / bits.count() / - sizeof( float ); - SynapseMatrix values( boost::extents[ dim0 ][ bits.count( )]); - ::memcpy( values.data(), cached.data(), cached.size( )); - return values; - } - if( (++_cacheMiss % 5000) == 0 ) - LBDEBUG << int( float( _cacheHits ) / - float( _cacheHits + _cacheMiss ) * 100.f + .5f ) - << "% cache hit rate" << std::endl; - } - lunchbox::ScopedWrite mutex( detail::_hdf5Lock ); Dataset dataset; if( !_openDataset( gid, dataset )) @@ -159,17 +124,6 @@ class SynapseFile : public boost::noncopyable dataset.dataset.read( values.data(), H5::PredType::NATIVE_FLOAT, targetspace, dataset.dataspace ); - - if( _cache ) - { - lunchbox::ScopedWrite cacheMutex( _cacheLock ); - const size_t size = dataset.dims[0] * bits.count() * sizeof( float ); - if( !_cache->insert( cacheKey, values.data(), size )) - LBWARN << "Failed to insert synapse information for GID " << gid - << " into cache; item size is " << float(size) / LB_1MB - << " MB" << std::endl; - - } return values; } @@ -246,9 +200,6 @@ class SynapseFile : public boost::noncopyable } private: - lunchbox::PersistentMapPtr _cache; - mutable lunchbox::Lock _cacheLock; - std::string _cacheKey; H5::H5File _file; size_t _numAttributes; }; diff --git a/doc/Changelog.md b/doc/Changelog.md index fca506c..fc87dcd 100644 --- a/doc/Changelog.md +++ b/doc/Changelog.md @@ -3,6 +3,8 @@ Changelog {#Changelog} # Release 1.9.0 (git master) +* [102](https://github.com/BlueBrain/Brion/pull/102): + Use PersistentMap for caching synapse positions loaded from brain::Circuit * [94](https://github.com/BlueBrain/Brion/pull/94): Fixed SWC morphology parser for morphologies with soma contour. The parser was creating invalid soma sections when the first order sections where connected diff --git a/doc/caching.md b/doc/caching.md index e0d5b32..0323bf3 100644 --- a/doc/caching.md +++ b/doc/caching.md @@ -16,6 +16,6 @@ variables: * each morphology is hashed by its canonical filepath if COORDINATES_LOCAL * each morphology is hashed by its canonical filepath plus canonical circuit filepath and GID if COORDINATES_GLOBAL -* Synapse attributes from brion::Synapse::read() - * each set of synapse attributes is hashed together with the synapse canonical - filepath and GID +* Synapse position from brain::Circuit::getSynapses() + * all synapse positions per neuron are hashed by its canonical filepath of the + nrn file plus if afferent/efferent plus the GID of the neuron.