From b565ac46b679ee90d23a5dcaab073a47061431e0 Mon Sep 17 00:00:00 2001 From: John Loy Date: Mon, 5 Oct 2015 16:53:14 -0700 Subject: [PATCH 1/2] Reduce smart_graph overhead when thousands of casts are registered. The original design of smart_graph featured a dense N^2 table of distances, which allowed fast lookup using a simple offset. This works really well when N is small. However, in a system with 4000 entries (which is not a hypothetical number) smart_graph consumes 122MB. Additionally, all entries in the table are initialized each time the table is resized. In a process where a module is imported, some work is done, then another module is imported, etc..., the table may have to be re-initialized many times as additional casts are introduced, which becomes a significant drag[1] on the time required to import Boost.Python libraries. This change addresses both the storage requirements and re-initialization costs. To address the memory use, smart_graph holds an unordered_map from (src, dst) vertex pairs to (distance, version) pairs. While this unfortunately means that memory consumption will increase for very small modules, as the number of vertices increases, the additional memory overhead required for hash table entries should be compensated for by the sparseness[2] of the graph. Similarly, the performance loss from hash table lookups is negligible compared to the savings from avoiding dense initialization. Also, there is a outer level of caching (see cache_t) that helps hide the increased smart_graph costs from clients making repeated queries over the same casts. Sparse re-initialization is implemented by tagging each distance table entry with a version. This corresponds to the version of the graph from which the distance was computed. As the graph grows, its version is incremented. When an existing entry is found, its version can be checked against the graph's current version to determine if it is still valid or should be re-computed. [1] Roughly 250ms is spent just filling the tables with the unreachable sentinel value during a startup sequence that registers around 4000 types across dozens of libraries. [2] In internal testing, the densest observed graphs have ~3700 vertices but only 1100 initialized entries in the smart_graph. The dense implementation consumes 104MB while the sparse table uses only 37KB. --- src/object/inheritance.cpp | 129 +++++++++++++++++++++++++++++++++------------ 1 file changed, 95 insertions(+), 34 deletions(-) diff --git a/src/object/inheritance.cpp b/src/object/inheritance.cpp index 7dc9db1cd..35cd0fcdc 100644 --- a/src/object/inheritance.cpp +++ b/src/object/inheritance.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,9 @@ namespace // typedef python::type_info class_id; + // Integer type for cast distances + typedef unsigned int distance_t; + // represents a graph of available casts #if 0 @@ -61,7 +65,7 @@ namespace adjacency_list, distance_version> + all_pairs_distance_map; + + // For a pair of vertices, (outer_key, inner_key), node_distance_map + // provides a BGL property_map interface to the inner_keys for a given + // outer_key. + class node_distance_map + { + public: + typedef distance_t value_type; + typedef vertex_t key_type; + typedef distance_t& reference; + typedef read_write_property_map_tag category; + + node_distance_map(all_pairs_distance_map &full_map, vertex_t outer_key, unsigned int version) + : m_all_pairs_map(full_map) + , m_outer_key(outer_key) + , m_version(version) + {} + + bool is_initialized() const + { + // Check the version of the identity entry (if it exists) to see if + // this row needs (re-)initialization. + all_pairs_distance_map::const_iterator i = + m_all_pairs_map.find(make_full_key(m_outer_key)); + return i != m_all_pairs_map.end() && i->second.version == m_version; + } + + distance_t distance(vertex_t inner_key) const + { + all_pairs_distance_map::const_iterator i = + m_all_pairs_map.find(make_full_key(inner_key)); + return (i != m_all_pairs_map.end() && i->second.version == m_version) + ? i->second.distance : std::numeric_limits::max(); + } + + void set_distance(vertex_t inner_key, distance_t value) + { + distance_version dv = { value, m_version }; + m_all_pairs_map[make_full_key(inner_key)] = dv; + } + + private: + std::pair make_full_key(vertex_t inner_key) const + { + return std::pair(m_outer_key, inner_key); + } + + all_pairs_distance_map &m_all_pairs_map; + vertex_t m_outer_key; + unsigned int m_version; + }; + + inline distance_t get(node_distance_map const& m, vertex_t inner_key) + { + return m.distance(inner_key); + } + + inline void put(node_distance_map& m, vertex_t inner_key, distance_t value) + { + m.set_distance(inner_key, value); + } struct smart_graph { - typedef std::vector::const_iterator node_distance_map; typedef std::pair out_edges_t; @@ -86,36 +163,26 @@ namespace // target node node_distance_map distances_to(vertex_t target) const { - std::size_t n = num_vertices(m_topology); - if (m_distances.size() != n * n) - { - m_distances.clear(); - m_distances.resize(n * n, (std::numeric_limits::max)()); - m_known_vertices = n; - } - - std::vector::iterator to_target = m_distances.begin() + n * target; + // Because the graph only expands, use the number of vertices as the + // "version" of the node_distance_map. + unsigned int version = num_vertices(m_topology); + + node_distance_map to_target(m_distances, target, version); // this node hasn't been used as a target yet - if (to_target[target] != 0) + if (!to_target.is_initialized()) { typedef reverse_graph reverse_cast_graph; reverse_cast_graph reverse_topology(m_topology); - to_target[target] = 0; + to_target.set_distance(target, 0); breadth_first_search( reverse_topology, target , visitor( make_bfs_visitor( record_distances( - make_iterator_property_map( - to_target - , get(vertex_index, reverse_topology) -# ifdef BOOST_NO_STD_ITERATOR_TRAITS - , *to_target -# endif - ) + to_target , on_tree_edge() )))); } @@ -127,13 +194,11 @@ namespace cast_graph const& topology() const { return m_topology; } smart_graph() - : m_known_vertices(0) {} private: cast_graph m_topology; - mutable std::vector m_distances; - mutable std::size_t m_known_vertices; + mutable all_pairs_distance_map m_distances; }; smart_graph& full_graph() @@ -236,7 +301,7 @@ namespace struct q_elt { - q_elt(std::size_t distance + q_elt(distance_t distance , void* src_address , vertex_t target , cast_function cast @@ -247,7 +312,7 @@ namespace , cast(cast) {} - std::size_t distance; + distance_t distance; void* src_address; vertex_t target; cast_function cast; @@ -285,14 +350,10 @@ namespace void* search(smart_graph const& g, void* p, vertex_t src, vertex_t dst) { - // I think this test was thoroughly bogus -- dwa - // If we know there's no path; bail now. - // if (src > g.known_vertices() || dst > g.known_vertices()) - // return 0; - - smart_graph::node_distance_map d(g.distances_to(dst)); + node_distance_map d(g.distances_to(dst)); - if (d[src] == (std::numeric_limits::max)()) + const distance_t src_distance = d.distance(src); + if (src_distance == std::numeric_limits::max()) return 0; typedef property_map::const_type cast_map; @@ -303,7 +364,7 @@ namespace visited_t visited; std::priority_queue q; - q.push(q_elt(d[src], p, src, identity_cast)); + q.push(q_elt(src_distance, p, src, identity_cast)); while (!q.empty()) { q_elt top = q.top(); @@ -338,7 +399,7 @@ namespace { edge_t e = *p; q.push(q_elt( - d[target(e, g.topology())] + d.distance(target(e, g.topology())) , dst_address , target(e, g.topology()) , boost::get(casts, e))); From e1d2da509126b442026293fe7c55b702dca9bb56 Mon Sep 17 00:00:00 2001 From: John Loy Date: Thu, 19 Nov 2015 15:17:57 -0800 Subject: [PATCH 2/2] Ensure that num_edges() runs in constant time. The default type of adjacency_list's EdgeList (not to be confused with OutEdgeList) is listS (a.k.a std::list.) Many versions of libstdc++ implement std::list::size in O(n) time. Switch EdgeList to vecS for substantial speedups when adding casts to the inheritance graph. (While C++ 11 requires std::list::size() be implemented in constant time, the contiguous layout of vector should still be advantageous, especially as edges are never removed.) --- src/object/inheritance.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/object/inheritance.cpp b/src/object/inheritance.cpp index 35cd0fcdc..af33fcf15 100644 --- a/src/object/inheritance.cpp +++ b/src/object/inheritance.cpp @@ -69,7 +69,8 @@ namespace // The function which casts a void* from the edge's source type // to its destination type. - , property > > + , property > + , no_property, vecS > #if 0 {}; #else