diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 143a84b419..b73794f0a3 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -158,7 +158,7 @@ oldest-supported-compiler-job:
     GIT_SUBMODULE_STRATEGY: none
     # DO NOT change this version number without updating the README to reflect
     # the requirement bump.
-    COMPILER_VERSION: 9
+    COMPILER_VERSION: 10
 
 
 # We define one job to do the Docker container build
diff --git a/BOTS.md b/BOTS.md
new file mode 100644
index 0000000000..2c35275ac0
--- /dev/null
+++ b/BOTS.md
@@ -0,0 +1,75 @@
+# VG Project Notes
+
+## Building
+- New `.cpp` files auto-discovered
+- Build with `make -j8` or `make obj/whatever.o` to build just one .o.
+- You may be getting errors from `clangd`. If these errors seem spurious, stop and demand a `clangd` that works properly.
+
+## Testing
+
+### Running Bash-TAP Tests
+Use `prove -v` (not `bash`) to execute Bash-TAP tests. This provides proper test harness output and better error reporting.
+
+**Important**: Run `prove` from the `test/` directory:
+```bash
+cd test
+prove -v t/26_deconstruct.t
+```
+
+### Running Unit Tests
+To run all unit tests:
+```bash
+./bin/vg test
+```
+- `./bin/vg test "[tag]"` runs tests matching a tag
+
+#### Writing Unit Tests
+- Framework: Catch v2 (header-only)
+- Include: `#include "catch.hpp"` (in `src/unittest/catch.hpp`)
+- Macros: `TEST_CASE("name", "[tags]")`, `SECTION("name")`, `REQUIRE(cond)`
+- Namespace: `vg::unittest`
+- Directory: `src/unittest/`
+
+### Running All Tests
+```bash
+make test
+```
+
+## Writing Code
+
+### HandleGraph API
+The interfaces in libhandlegraph model a bidirected sequence graph (where nodes have DNA sequences and edges can connect to either the start or end of each involved node).
+
+#### Core types
+- `handle_t` - opaque 64-bit value
+- `nid_t` - node ID type
+- `edge_t` = `pair<handle_t, handle_t>`
+
+#### Key HandleGraph methods
+- `get_handle(nid_t, bool is_reverse=false)` → `handle_t`
+- `get_id(handle_t)` → `nid_t`
+- `get_is_reverse(handle_t)` → `bool`
+- `flip(handle_t)` → `handle_t` (toggle orientation)
+- `get_sequence(handle_t)` → `string` (in handle's orientation)
+- `follow_edges(handle_t, bool go_left, iteratee)` - iterate neighbors
+- `for_each_handle(iteratee, bool parallel=false)` - iterate all nodes
+- `for_each_edge(iteratee, bool parallel=false)` - iterate all edges
+- `has_edge(handle_t left, handle_t right)` → `bool`
+
+#### MutableHandleGraph additions
+- `create_handle(string seq)` / `create_handle(string seq, nid_t id)` → `handle_t`
+- `create_edge(handle_t left, handle_t right)`
+- `destroy_handle(handle_t)` / `destroy_edge(handle_t, handle_t)`
+
+#### HandleGraph algorithms
+- Things like `topological_sort.hpp` and copy_graph.hpp` are in `deps/libhandlegraph/src/include/handlegraph/algorithms`.
+
+#### bdsg::HashGraph
+- Header: `deps/libbdsg/bdsg/include/bdsg/hash_graph.hpp`
+- Implements MutablePathMutableHandleGraph
+- Go-to handlegraph implementation to use
+- In libbdsg
+
+### Utilities
+- `reverse_complement(string)` → `string` in src/utility.hpp
+
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 0000000000..1a1007d91a
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+BOTS.md
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 6506208d58..6159297339 100644
--- a/Makefile
+++ b/Makefile
@@ -104,7 +104,8 @@ ifeq ($(shell uname -s),Darwin)
     LD_UTIL_RPATH_FLAGS=""
 
     # Homebrew installs a Protobuf that uses an Abseil that is built with C++17, so we need to build with at least C++17
-    CXX_STANDARD?=17
+	# C++20 for spaceship operator and ranges
+    CXX_STANDARD?=20
 
     # We may need libraries from Macports
     ifeq ($(shell if [ -d /opt/local/lib ];then echo 1;else echo 0;fi), 1)
@@ -229,8 +230,9 @@ else
     $(info Compiler $(CXX) is assumed to be GCC)
 
 	# gbwtgraph uses inline variables and our oldest supported compiler has
-	# C++17, so we should use C++17
-    CXX_STANDARD?=17
+	# C++17, so we should use at least C++17.
+    # C++20 for spaceship operator and ranges
+    CXX_STANDARD?=20
 
     # Set an rpath for vg and dependency utils to find installed libraries
     LD_UTIL_RPATH_FLAGS="-Wl,-rpath,$(CWD)/$(LIB_DIR)"
@@ -820,7 +822,7 @@ $(INC_DIR)/dynamic/dynamic.hpp: $(DYNAMIC_DIR)/include/dynamic/*.hpp $(DYNAMIC_D
 	+mkdir -p $(INC_DIR)/dynamic && cp -r $(CWD)/$(DYNAMIC_DIR)/include/dynamic/* $(INC_DIR)/dynamic/
 
 $(INC_DIR)/sparsehash/sparse_hash_map: $(wildcard $(SPARSEHASH_DIR)/**/*.cc) $(wildcard $(SPARSEHASH_DIR)/**/*.h)
-	+cd $(SPARSEHASH_DIR) && ./autogen.sh && LDFLAGS="$(LD_LIB_DIR_FLAGS) $(LDFLAGS)" ./configure --prefix=$(CWD) $(FILTER) && $(MAKE) $(FILTER) && $(MAKE) install
+	+cd $(SPARSEHASH_DIR) && ./autogen.sh && LDFLAGS="$(LD_LIB_DIR_FLAGS) $(LDFLAGS)" ./configure --prefix=$(CWD) $(FILTER) && $(MAKE) src/sparsehash/internal/sparseconfig.h $(FILTER) && $(MAKE) install-data $(FILTER)
 
 $(INC_DIR)/sparsepp/spp.h: $(wildcard $(SPARSEPP_DIR)/sparsepp/*.h)
 	+cp -r $(SPARSEPP_DIR)/sparsepp $(INC_DIR)/
diff --git a/README.md b/README.md
index a3e1d5e4cd..2c616f69fe 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ On other distros, or if you do not have root access, you will need to perform th
                          liblzma-dev liblz4-dev libffi-dev libcairo-dev libboost-all-dev \
                          libzstd-dev pybind11-dev python3-pybind11 libssl-dev kmc
                          
-At present, you will need GCC version 9 or greater, with support for C++17, to compile vg. (Check your version with `gcc --version`.) GCC up to 11.4.0 is supported.
+At present, you will need GCC version 10 or greater, with support for C++20, to compile vg. (Check your version with `gcc --version`.) GCC up to 11.4.0 is supported.
 
 Other libraries may be required. Please report any build difficulties.
 
diff --git a/deps/libbdsg b/deps/libbdsg
index e74fb663a5..a7602fd4a4 160000
--- a/deps/libbdsg
+++ b/deps/libbdsg
@@ -1 +1 @@
-Subproject commit e74fb663a5f85bc1f76d159b2b3a3691ed85862f
+Subproject commit a7602fd4a462ca617502640022c6f1dd9109b13f
diff --git a/deps/libvgio b/deps/libvgio
index fff151be9d..3026f7d28e 160000
--- a/deps/libvgio
+++ b/deps/libvgio
@@ -1 +1 @@
-Subproject commit fff151be9d8255672d91f32a5b41285584905743
+Subproject commit 3026f7d28ef1576982968aff4eed7adf5a10f262
diff --git a/src/algorithms/chain_items.cpp b/src/algorithms/chain_items.cpp
index c84e399ce1..ebc46da684 100644
--- a/src/algorithms/chain_items.cpp
+++ b/src/algorithms/chain_items.cpp
@@ -13,6 +13,7 @@
 
 //#define debug_chaining
 //#define debug_transition
+//#define debug_missing_transition
 //#define debug_dp
 
 namespace vg {
@@ -262,6 +263,18 @@ transition_iterator zip_tree_transition_iterator(const std::vector<SnarlDistance
             generate_zip_tree_transitions(seeds, zip_code_tree, max_graph_lookback_bases,
                                           seed_to_starting, seed_to_ending);
 
+#ifdef debug_missing_transition
+        bool has_missing = \
+        find_missing_zip_tree_transitions(seeds, zip_code_tree, max_graph_lookback_bases,
+                                          seed_to_starting, seed_to_ending, distance_index, 
+                                          all_transitions);
+        if (has_missing) {
+            throw std::runtime_error("Zipcode tree iterator failed to output some transitions");
+        } else {
+            cerr << "No missing transitions" << endl;
+        }
+#endif
+
         std::vector<transition_info> filtered_transitions =
             calculate_transition_read_distances(all_transitions, to_chain, max_read_lookback_bases);
 
@@ -385,6 +398,103 @@ std::vector<transition_info> generate_zip_tree_transitions(
     return all_transitions;
 }
 
+bool find_missing_zip_tree_transitions(
+    const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds,
+    const ZipCodeTree& zip_code_tree,
+    size_t max_graph_lookback_bases,
+    const std::unordered_map<size_t, size_t>& seed_to_starting, 
+    const std::unordered_map<size_t, size_t>& seed_to_ending,
+    const SnarlDistanceIndex& distance_index,
+    const std::vector<transition_info>& all_transitions) {
+    
+    // {source anchor : {dest anchor : dist}}
+    std::unordered_map<size_t, std::unordered_map<size_t, size_t>> found;
+    for (const auto& transition : all_transitions) {
+        size_t dist_to_save = transition.graph_distance;
+        if (!found.count(transition.from_anchor)) {
+            found[transition.from_anchor] = std::unordered_map<size_t, size_t>();
+        }
+        if (found[transition.from_anchor].count(transition.to_anchor)) {
+            // If a transition appears multiple times, remember the min
+            dist_to_save = std::min(transition.graph_distance, 
+                                    found[transition.from_anchor][transition.to_anchor]);
+        }
+        found[transition.from_anchor][transition.to_anchor] = transition.graph_distance;
+    }
+
+    bool has_missing = false;
+
+    // Helper function to check for a distance between two seeds
+    auto check_distance = [&] (const ZipCodeTree::oriented_seed_t& from_seed, bool rev_from,
+                               const ZipCodeTree::oriented_seed_t& to_seed, bool rev_to) {
+        // XOR to get appropriate orientations
+        rev_from ^= from_seed.is_reversed;
+        rev_to ^= to_seed.is_reversed;
+        if (rev_from != rev_to) {
+            // Cannot be compared; incompatible orientations
+            return;
+        }
+
+        // Look up appropriate anchors
+        auto from_anchor_itr = rev_from ? seed_to_starting.find(from_seed.seed)
+                                        : seed_to_ending.find(from_seed.seed);
+        if ((rev_from && from_anchor_itr == seed_to_starting.end())
+            || (!rev_from && from_anchor_itr == seed_to_ending.end())) {
+            // No anchor exists
+            return;
+        }
+        auto to_anchor_itr = rev_to ? seed_to_ending.find(to_seed.seed)
+                                    : seed_to_starting.find(to_seed.seed);
+        if ((rev_to && to_anchor_itr == seed_to_ending.end())
+            || (!rev_to && to_anchor_itr == seed_to_starting.end())) {
+            // No anchor exists
+            return;
+        }
+
+        // Construct seed positions
+        pos_t from_pos = seeds.at(from_seed.seed).pos;
+        size_t from_length = distance_index.minimum_length(distance_index.get_node_net_handle(id(from_pos)));
+        from_pos = rev_from ? reverse(from_pos, from_length)
+                            : from_pos;
+        pos_t to_pos = seeds.at(to_seed.seed).pos;
+        size_t to_length = distance_index.minimum_length(distance_index.get_node_net_handle(id(to_pos)));
+        to_pos = rev_to ? reverse(to_pos, to_length)
+                        : to_pos;
+
+        // Look up true minimum distance
+        size_t true_distance = minimum_nontrivial_distance(distance_index, from_pos, to_pos);
+        if (true_distance <= max_graph_lookback_bases) {
+            // We should've found this transition
+            auto from_anchor = from_anchor_itr->second;
+            auto to_anchor = to_anchor_itr->second;
+            if (!found.count(from_anchor) 
+                || !found[from_anchor].count(to_anchor)
+                || found[from_anchor][to_anchor] != true_distance) {
+                has_missing = true;
+                cerr << "Missing transition " << from_pos << "->" 
+                     << to_pos << " dist " << true_distance << endl;
+            }
+        }
+    };
+    
+    vector<ZipCodeTree::oriented_seed_t> tree_seeds = zip_code_tree.get_all_seeds();
+    for (size_t i = 0; i < tree_seeds.size(); i++) {
+        // Check self-loops
+        check_distance(tree_seeds[i], false, tree_seeds[i], false);
+        check_distance(tree_seeds[i], false, tree_seeds[i], true);
+        check_distance(tree_seeds[i], true, tree_seeds[i], false);
+        for (size_t j = i + 1; j < tree_seeds.size(); j++) {
+            // Check all orientation pairs
+            check_distance(tree_seeds[i], false, tree_seeds[j], false);
+            check_distance(tree_seeds[i], false, tree_seeds[j], true);
+            check_distance(tree_seeds[i], true, tree_seeds[j], false);
+            check_distance(tree_seeds[i], true, tree_seeds[j], true);
+        }
+    }
+
+    return has_missing;
+}
+
 std::vector<transition_info> calculate_transition_read_distances(
     const std::vector<transition_info>& all_transitions,
     const VectorView<Anchor>& to_chain,
@@ -547,6 +657,8 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
         cerr << "Chaining group of " << to_chain.size() << " items" << endl;
     }
 
+    crash_unless(recomb_penalty >= 0);
+
     // Compute a base seed average length.
     // TODO: Weight anchors differently?
     // TODO: Will this always be the same for all anchors in practice?
@@ -557,6 +669,20 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
     base_seed_length /= to_chain.size();
 
     chain_scores.resize(to_chain.size());
+
+    // We want to prefer to come from seeds where the transition preserves
+    // access to matching haplotypes, because we don't want to back ourselves
+    // into a corner where we need a recombination when we don't really have
+    // to. So we cheat on the dynamic programming by adding an "evaluation
+    // bonus" to the scores of the different DP options when comparing them. We
+    // keep this bonus out of the actual recorded scores because we don't want
+    // it raising the scores we actually get the more transitions we take.
+    //
+    // We store the bonus used to select the current winning predecessor for
+    // each seed in this vector, which runs alongside the DP table.
+    //
+    // Starting from nowhere means full path conservation, so bonus = recomb_penalty.
+    std::vector<int> eval_bonuses(to_chain.size(), recomb_penalty);
     for (size_t i = 0; i < to_chain.size(); i++) {
         // Set up DP table so we can start anywhere with that item's score, scaled and with bonus applied.
         chain_scores[i] = {(int)(to_chain[i].score() * item_scale + item_bonus), TracedScore::nowhere(), to_chain[i].anchor_end_paths()};
@@ -586,8 +712,20 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
         }
         
         // If we come from nowhere, we get those points.
-        chain_scores[transition.to_anchor] = std::max(chain_scores[transition.to_anchor], 
-                                                      {(int)item_points, TracedScore::nowhere(), here.anchor_end_paths()});
+        // This also has full path conservation (bonus = recomb_penalty).
+        {
+            TracedScore from_nowhere = {(int)item_points, TracedScore::nowhere(), here.anchor_end_paths()};
+            int nowhere_bonus = recomb_penalty;
+            int eval_nowhere = from_nowhere.score + nowhere_bonus;
+            int eval_current = chain_scores[transition.to_anchor].score + eval_bonuses[transition.to_anchor];
+            if (eval_nowhere > eval_current) {
+                chain_scores[transition.to_anchor] = from_nowhere;
+                eval_bonuses[transition.to_anchor] = nowhere_bonus;
+            } else if (eval_nowhere == eval_current && from_nowhere > chain_scores[transition.to_anchor]) {
+                chain_scores[transition.to_anchor] = from_nowhere;
+                eval_bonuses[transition.to_anchor] = nowhere_bonus;
+            }
+        }
         
         // For each source we could come from
         auto& source = to_chain[transition.from_anchor];
@@ -664,8 +802,34 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
             TracedScore from_source_score = source_score.add_points(jump_points + item_points)
                                                         .set_shared_paths(here.anchor_paths());
             
-            // Remember that we could make this jump
-            chain_scores[transition.to_anchor] = std::max(chain_scores[transition.to_anchor], from_source_score);
+            // Evaluate heuristic to preserve path flexibility without inflating actual scoring DP.
+            // Bonus = fraction of conserved paths * recomb_penalty.
+            // Bonus is 0 when recombination occurs (no shared paths).
+            int eval_bonus_from = 0;
+            if (recomb_penalty > 0) {
+                int pre_count = __builtin_popcountll(source_score.paths);
+                if (pre_count > 0 && (source_score.paths & here.anchor_start_paths()) != 0) {
+                    // No recombination: bonus = fraction of paths conserved * penalty
+                    int post_count = __builtin_popcountll(from_source_score.paths);
+                    eval_bonus_from = (recomb_penalty * post_count) / pre_count;
+                }
+                // Recombination case (no shared paths): bonus stays 0
+            }
+            
+            // Grab the DP table slot we are updating
+            auto& current_best = chain_scores[transition.to_anchor];
+            // Compute the evaluation value for the new candidate
+            int eval_from = from_source_score.score + eval_bonus_from;
+            // Reconstruct the evaluation value for the current winner
+            int eval_best = current_best.score + eval_bonuses[transition.to_anchor];
+
+            if (eval_from > eval_best || (eval_from == eval_best && from_source_score > current_best)) {
+                // Using the evaluation values, and then if tied the real DP
+                // scores, this new candidate beats the previous winner, so
+                // replace it.
+                current_best = from_source_score;
+                eval_bonuses[transition.to_anchor] = eval_bonus_from;
+            }
                                            
             if (show_work) {
 #ifdef debug_dp
diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp
index 9ac7b792e0..f4531d4201 100644
--- a/src/algorithms/chain_items.hpp
+++ b/src/algorithms/chain_items.hpp
@@ -477,6 +477,23 @@ std::vector<transition_info> generate_zip_tree_transitions(
     const std::unordered_map<size_t, size_t>& seed_to_starting, 
     const std::unordered_map<size_t, size_t>& seed_to_ending);
 
+/**
+ * Check if all possible transitions were actually found.
+ * 
+ * Iterates over all pairs of seeds and uses the distance index
+ * to determine if there SHOULD have been a transition.
+ * 
+ * Returns if any transitions were missing.
+ */
+bool find_missing_zip_tree_transitions(
+    const std::vector<SnarlDistanceIndexClusterer::Seed>& seeds,
+    const ZipCodeTree& zip_code_tree,
+    size_t max_graph_lookback_bases,
+    const std::unordered_map<size_t, size_t>& seed_to_starting, 
+    const std::unordered_map<size_t, size_t>& seed_to_ending,
+    const SnarlDistanceIndex& distance_index,
+    const std::vector<transition_info>& all_transitions);
+
 /**
  * Calculate read distances for each of the zip tree's transitions.
  * Also filters out transitions that can't be used,
diff --git a/src/alignment.cpp b/src/alignment.cpp
index 567bfb016a..faf669ea69 100644
--- a/src/alignment.cpp
+++ b/src/alignment.cpp
@@ -3507,6 +3507,65 @@ pair<int64_t, int64_t> aligned_interval(const Alignment& aln) {
     return pair<int64_t, int64_t>(softclip_start(aln), aln.sequence().size() - softclip_end(aln));
 }
 
+void count_alignment_operations(const Alignment& aln, size_t& matches, size_t& mismatches, std::vector<size_t>& gap_lengths) {
+    matches = 0;
+    mismatches = 0;
+    gap_lengths.clear();
+    
+    enum class EditType { MATCH, MISMATCH, INS, DEL, COMPLEX, NONE };
+    EditType prev_type = EditType::NONE;
+    size_t current_gap_length = 0;
+    
+    auto finish_gap = [&]() {
+        if (current_gap_length > 0) {
+            gap_lengths.push_back(current_gap_length);
+            current_gap_length = 0;
+        }
+    };
+
+    for (size_t i = 0; i < aln.path().mapping_size(); ++i) {
+        auto& mapping = aln.path().mapping(i);
+        for (size_t j = 0; j < mapping.edit_size(); ++j) {
+            auto& edit = mapping.edit(j);
+            if (edit.from_length() == edit.to_length() && edit.from_length() > 0) {
+                finish_gap();
+                if (edit.sequence().empty()) {
+                    matches += edit.from_length();
+                    prev_type = EditType::MATCH;
+                } else {
+                    mismatches += edit.from_length();
+                    prev_type = EditType::MISMATCH;
+                }
+            } else if (edit.from_length() == 0 && edit.to_length() > 0) {
+                if (prev_type != EditType::INS) finish_gap();
+                current_gap_length += edit.to_length();
+                prev_type = EditType::INS;
+            } else if (edit.from_length() > 0 && edit.to_length() == 0) {
+                if (prev_type != EditType::DEL) finish_gap();
+                current_gap_length += edit.from_length();
+                prev_type = EditType::DEL;
+            } else {
+                finish_gap();
+                mismatches += max(edit.from_length(), edit.to_length());
+                prev_type = EditType::COMPLEX;
+            }
+        }
+    }
+    finish_gap();
+}
+
+int score_alignment_with_logged_gaps(const size_t& matches, const size_t& mismatches, const std::vector<size_t>& gap_lengths) {
+    double d = max(0.02, static_cast<double>(mismatches + gap_lengths.size()) / static_cast<double>(matches + mismatches + gap_lengths.size()));
+    double non_match_penalty = static_cast<double>(mismatches + gap_lengths.size()) / (2.0 * d);
+    
+    double indel_penalty = 0;
+    for (auto& gap_length : gap_lengths) {
+        indel_penalty += log2(1.0 + gap_length);
+    }
+    int adjusted_score = std::round(matches - non_match_penalty - indel_penalty);
+    return adjusted_score;
+}
+
 string mate_info(const string& path, int32_t pos, bool rev_strand, bool is_read1) {
     subrange_t subrange;
     string path_name = Paths::strip_subrange(path, &subrange);
diff --git a/src/alignment.hpp b/src/alignment.hpp
index cea9bc524d..1dbfc9870f 100644
--- a/src/alignment.hpp
+++ b/src/alignment.hpp
@@ -330,6 +330,15 @@ bool is_supplementary(const Alignment& alignment);
 // The indexes on the read sequence of the portion of the read that is aligned outside of soft clips
 pair<int64_t, int64_t> aligned_interval(const Alignment& aln);
 
+/// Count the various types of edits in an Alignment, including individual gap lengths.
+void count_alignment_operations(const Alignment& aln, size_t& matches, size_t& mismatches, std::vector<size_t>& gaps_lengths);
+/// Compute an alignment score using minimap2 long indels penalty adjustment.
+///
+/// This scoring method penalize long continous indels less, using the formula:
+/// score = matches - (mismatches + gap_opens)/2d - sum_{i=1}^{gap_opens} (log_2(1 + gap_length_i))
+/// with d = max{0.02, (mismatches + gap_opens)/(matches + mismatches + gap_opens)}
+int score_alignment_with_logged_gaps(const size_t& matches, const size_t& mismatches, const std::vector<size_t>& gap_lengths);
+
 // create an annotation string required to properly set the SAM fields/flags of a supplementary alignment
 // the arguments all refer to properties of the primary *mate* alignment
 // the path name saved in the info is the base path name, with any subrange info reflected in the position
diff --git a/src/cactus.cpp b/src/cactus.cpp
index 6179663968..49eab63294 100644
--- a/src/cactus.cpp
+++ b/src/cactus.cpp
@@ -999,8 +999,8 @@ VG cactus_to_vg(stCactusGraph* cactus_graph) {
     return vg_graph;
 }
 
-VG cactusify(VG& graph) {
-    if (graph.size() == 0) {
+VG cactusify(const PathHandleGraph& graph) {
+    if (graph.get_node_count() == 0) {
         return VG();
     }
     auto parts = handle_graph_to_cactus(graph, unordered_set<string>());
diff --git a/src/cactus.hpp b/src/cactus.hpp
index 36d53f2fab..21cfd8ebc7 100644
--- a/src/cactus.hpp
+++ b/src/cactus.hpp
@@ -46,7 +46,7 @@ VG cactus_to_vg(stCactusGraph* cactus_graph);
 
 // Convert vg into vg formatted cactus representation
 // Input graph must be sorted!
-VG cactusify(VG& graph);
+VG cactusify(const PathHandleGraph& graph);
 
 }
 
diff --git a/src/cluster.hpp b/src/cluster.hpp
index df997cc51c..cd6deab517 100644
--- a/src/cluster.hpp
+++ b/src/cluster.hpp
@@ -212,8 +212,8 @@ class MEMClusterer {
     
 protected:
     
-    class HitNode;
     class HitEdge;
+    class HitNode;
     class HitGraph;
     class DPScoreComparator;
     
@@ -232,7 +232,47 @@ class MEMClusterer {
     /// is closest to the optimal separation
     void deduplicate_cluster_pairs(vector<pair<pair<size_t, size_t>, int64_t>>& cluster_pairs, int64_t optimal_separation);
 };
+
+class MEMClusterer::HitEdge {
+public:
+    HitEdge(size_t to_idx, int32_t weight, int64_t distance) : to_idx(to_idx), weight(weight), distance(distance) {}
+    HitEdge() = default;
+    ~HitEdge() = default;
+    
+    /// Index of the node that the edge points to
+    size_t to_idx;
     
+    /// Weight for dynamic programming
+    int32_t weight;
+    
+    /// Estimated distance
+    int64_t distance;
+};
+
+class MEMClusterer::HitNode {
+public:
+    HitNode(const MaximalExactMatch& mem, pos_t start_pos, int32_t score) : mem(&mem), start_pos(start_pos), score(score) { }
+    HitNode() = default;
+    ~HitNode() = default;
+    
+    const MaximalExactMatch* mem;
+    
+    /// Position of GCSA hit in the graph
+    pos_t start_pos;
+    
+    /// Score of the exact match this node represents
+    int32_t score;
+    
+    /// Score used in dynamic programming
+    int32_t dp_score;
+    
+    /// Edges from this node that are colinear with the read
+    vector<HitEdge> edges_from;
+    
+    /// Edges to this node that are colinear with the read
+    vector<HitEdge> edges_to;
+};
+
 class MEMClusterer::HitGraph {
 public:
     
@@ -286,46 +326,6 @@ class MEMClusterer::HitGraph {
     UnionFind components;
 };
     
-class MEMClusterer::HitNode {
-public:
-    HitNode(const MaximalExactMatch& mem, pos_t start_pos, int32_t score) : mem(&mem), start_pos(start_pos), score(score) { }
-    HitNode() = default;
-    ~HitNode() = default;
-    
-    const MaximalExactMatch* mem;
-    
-    /// Position of GCSA hit in the graph
-    pos_t start_pos;
-    
-    /// Score of the exact match this node represents
-    int32_t score;
-    
-    /// Score used in dynamic programming
-    int32_t dp_score;
-    
-    /// Edges from this node that are colinear with the read
-    vector<HitEdge> edges_from;
-    
-    /// Edges to this node that are colinear with the read
-    vector<HitEdge> edges_to;
-};
-
-class MEMClusterer::HitEdge {
-public:
-    HitEdge(size_t to_idx, int32_t weight, int64_t distance) : to_idx(to_idx), weight(weight), distance(distance) {}
-    HitEdge() = default;
-    ~HitEdge() = default;
-    
-    /// Index of the node that the edge points to
-    size_t to_idx;
-    
-    /// Weight for dynamic programming
-    int32_t weight;
-    
-    /// Estimated distance
-    int64_t distance;
-};
-
 struct MEMClusterer::DPScoreComparator {
 private:
     const vector<HitNode>& nodes;
diff --git a/src/gbwtgraph_helper.cpp b/src/gbwtgraph_helper.cpp
index 96ca15be2d..6c626bc7db 100644
--- a/src/gbwtgraph_helper.cpp
+++ b/src/gbwtgraph_helper.cpp
@@ -449,7 +449,7 @@ std::vector<key_type> find_frequent_kmers(const gbwtgraph::GBZ& gbz, const Minim
 void cache_payloads(
     const gbwtgraph::GBZ& gbz,
     const SnarlDistanceIndex& distance_index,
-    hash_map<nid_t, payload_t>& node_id_to_payload,
+    vg::hash_map<nid_t, payload_t>& node_id_to_payload,
     ZipCodeCollection* oversized_zipcodes,
     bool progress
 ) {
@@ -460,22 +460,37 @@ void cache_payloads(
 
     const handlegraph::HandleGraph* graph_ptr = (const handlegraph::HandleGraph*) &gbz.graph;
 
+    double total_zipcode_time = 0.0, total_decoder_time = 0.0;
+    std::atomic<uint64_t> node_count = 0;
     gbz.graph.for_each_handle([&](const handle_t& handle) {
         nid_t node_id = gbz.graph.get_id(handle);
-        ZipCode zipcode;
         pos_t pos = make_pos_t(node_id, false, 0);
-        zipcode.fill_in_zipcode_from_pos(distance_index, pos, true, graph_ptr);
+        ZipCode zipcode;
+        zipcode.fill_in_zipcode_from_pos(distance_index, pos, false, graph_ptr);
+        zipcode.fill_in_full_decoder();
+        if (++node_count % 10000 == 0 && progress) {
+            double telapsed = gbwt::readTimer() - start;
+            #pragma omp critical (cerr)
+            std::cerr << "  Cached " << node_count << " nodes in " << telapsed << "s" << std::endl;
+        }
+
         payload_t payload = zipcode.get_payload_from_zip();
         if (payload == MIPayload::NO_CODE && oversized_zipcodes != nullptr) {
             // The zipcode is too large for the payload field.
             // Add it to the oversized zipcode list.
-            zipcode.fill_in_full_decoder();
-            size_t offset = oversized_zipcodes->size();
-            oversized_zipcodes->emplace_back(zipcode);
+            size_t offset;
+            #pragma omp critical (cache_payloads_zipcodes)
+            {
+                offset = oversized_zipcodes->size();
+                oversized_zipcodes->emplace_back(zipcode);
+            }
             payload = { 0, offset };
         }
-        node_id_to_payload.emplace(node_id, payload);
-    });
+        #pragma omp critical (cache_payloads_map)
+        {
+            node_id_to_payload.emplace(node_id, payload);
+        }
+    }, true);
 
     if (progress) {
         double seconds = gbwt::readTimer() - start;
@@ -537,8 +552,18 @@ gbwtgraph::DefaultMinimizerIndex build_minimizer_index(
     } else {
         // Cache payloads before building the index.
         // A zipcode only depends on the node id.
-        hash_map<nid_t, payload_t> node_id_to_payload;
+        vg::hash_map<nid_t, payload_t> node_id_to_payload;
         node_id_to_payload.reserve(gbz.graph.max_node_id() - gbz.graph.min_node_id());
+        // Re-preload the distance index right before use. find_frequent_kmers
+        // runs for a long time and may evict the mmap'd index pages from the OS
+        // page cache. We also preload eagerly right after loading the index (in
+        // minimizer_main.cpp) so the kernel treats those pages as recently-used;
+        // together the two preloads prevent cache_payloads from page-faulting on
+        // every node under the memory pressure of 32 parallel threads.
+        if (params.progress) {
+            std::cerr << "Preloading distance index";
+        }
+        distance_index->preload(true);
         cache_payloads(gbz, *distance_index, node_id_to_payload, oversized_zipcodes, params.progress);
 
         auto get_payload = [&](const pos_t& pos) -> const code_type* {
diff --git a/src/graph.cpp b/src/graph.cpp
index beca52b5e1..3f23ffef18 100644
--- a/src/graph.cpp
+++ b/src/graph.cpp
@@ -2,93 +2,6 @@
 
 namespace vg {
 
-void sort_by_id_dedup_and_clean(Graph& graph) {
-    remove_duplicates(graph); // graph is sorted here
-    remove_orphan_edges(graph);
-}
-
-void remove_duplicates(Graph& graph) {
-    remove_duplicate_nodes(graph);
-    remove_duplicate_edges(graph);
-}
-
-void remove_duplicate_edges(Graph& graph) {
-    sort_edges_by_id(graph);
-    graph.mutable_edge()->erase(std::unique(graph.mutable_edge()->begin(),
-                                            graph.mutable_edge()->end(),
-                                            [](const Edge& a, const Edge& b) {
-                                                return make_tuple(a.from(), a.to(), a.from_start(), a.to_end())
-                                                    == make_tuple(b.from(), b.to(), b.from_start(), b.to_end());
-                                            }), graph.mutable_edge()->end());
-
-}
-
-void remove_duplicate_nodes(Graph& graph) {
-    sort_nodes_by_id(graph);
-    graph.mutable_node()->erase(std::unique(graph.mutable_node()->begin(),
-                                            graph.mutable_node()->end(),
-                                            [](const Node& a, const Node& b) {
-                                                return a.id() == b.id();
-                                            }), graph.mutable_node()->end());
-}
-
-void remove_orphan_edges(Graph& graph) {
-    set<id_t> ids;
-    for (auto& node : graph.node()) {
-        ids.insert(node.id());
-    }
-    graph.mutable_edge()->erase(std::remove_if(graph.mutable_edge()->begin(),
-                                               graph.mutable_edge()->end(),
-                                               [&ids](const Edge& e) {
-                                                   return !ids.count(e.from()) || !ids.count(e.to());
-                                               }), graph.mutable_edge()->end());
-}
-
-void sort_by_id(Graph& graph) {
-    sort_nodes_by_id(graph);
-    sort_edges_by_id(graph);
-}
-
-void sort_nodes_by_id(Graph& graph) {
-    std::sort(graph.mutable_node()->begin(),
-              graph.mutable_node()->end(),
-              [](const Node& a, const Node& b) {
-                  return a.id() < b.id();
-              });
-}
-
-void sort_edges_by_id(Graph& graph) {
-    std::sort(graph.mutable_edge()->begin(),
-              graph.mutable_edge()->end(),
-              [](const Edge& a, const Edge& b) {
-                  return make_tuple(a.from(), a.to(), a.from_start(), a.to_end())
-                      < make_tuple(b.from(), b.to(), b.from_start(), b.to_end());
-              });
-}
-
-bool is_id_sortable(const Graph& graph) {
-    for (auto& edge : graph.edge()) {
-        if (edge.from() >= edge.to()) return false;
-    }
-    return true;
-}
-
-bool has_inversion(const Graph& graph) {
-    for (auto& edge : graph.edge()) {
-        if (edge.from_start() || edge.to_end()) return true;
-    }
-    return false;
-}
-
-void flip_doubly_reversed_edges(Graph& graph) {
-    for (auto& edge : *graph.mutable_edge()) {
-        if (edge.from_start() && edge.to_end()) {
-            edge.set_from_start(false);
-            edge.set_to_end(false);
-        }
-    }
-}
-    
 void from_handle_graph(const HandleGraph& from, Graph& to) {
     from.for_each_handle([&](const handle_t& h) {
         Node* node = to.add_node();
diff --git a/src/graph.hpp b/src/graph.hpp
index 964e46cceb..c85afe88ab 100644
--- a/src/graph.hpp
+++ b/src/graph.hpp
@@ -11,39 +11,6 @@ namespace vg {
 
 using namespace std;
 
-/// remove duplicates and sort by id
-void sort_by_id_dedup_and_clean(Graph& graph);
-
-/// remove duplicate nodes and edges
-void remove_duplicates(Graph& graph);
-
-/// remove duplicate edges
-void remove_duplicate_edges(Graph& graph);
-
-/// remove duplicate nodes
-void remove_duplicate_nodes(Graph& graph);
-
-/// remove edges that link to a node that is not in the graph
-void remove_orphan_edges(Graph& graph);
-
-/// order the nodes and edges in the graph by id
-void sort_by_id(Graph& graph);
-
-/// order the nodes in the graph by id
-void sort_nodes_by_id(Graph& graph);
-
-/// order the edges in the graph by id pairs
-void sort_edges_by_id(Graph& graph);
-
-/// returns true if the graph is id-sortable (no reverse links)
-bool is_id_sortable(const Graph& graph);
-
-/// returns true if we find an edge that may specify an inversion
-bool has_inversion(const Graph& graph);
-
-/// clean up doubly-reversed edges
-void flip_doubly_reversed_edges(Graph& graph);
-
 // transfer data from a HandleGraph into an empty Graph
 void from_handle_graph(const HandleGraph& from, Graph& to);
 
diff --git a/src/index_registry.cpp b/src/index_registry.cpp
index 9a8f2923d6..cb09dbcbfa 100644
--- a/src/index_registry.cpp
+++ b/src/index_registry.cpp
@@ -551,6 +551,10 @@ construct_minimizers_impl(
         *gbz, distance_index.get(), &oversized_zipcodes, params
     );
 
+    // Close the distance index so it can't appear to be modified after the
+    // files that depend on it.
+    distance_index.reset();
+
     string output_name = plan->output_filepath(minimizer_output);
     save_minimizer(minimizers, output_name, IndexingParameters::verbosity == IndexingParameters::Debug);
     output_name_minimizer.push_back(output_name);
@@ -5222,6 +5226,34 @@ vector<string> IndexRegistry::require(const IndexName& identifier) const {
     return index->get_filenames();
 }
 
+bool IndexRegistry::predates(const IndexName& earlier, const IndexName& later) const {
+    // Get all the files
+    std::vector<std::string> earlier_files = require(earlier);
+    std::vector<std::string> later_files = require(later);
+
+    // Make sure they're nonempty
+    if (earlier_files.empty()) {
+        throw std::runtime_error(earlier + " index has no files");
+    }
+    if (later_files.empty()) {
+        throw std::runtime_error(later + " index has no files");
+    }
+
+    // Get all their modification times
+    std::filesystem::file_time_type (*predicate)(const std::filesystem::path&) = std::filesystem::last_write_time;
+    std::vector<std::filesystem::file_time_type> earlier_times;
+    std::transform(earlier_files.begin(), earlier_files.end(), std::back_inserter(earlier_times), predicate);
+    std::vector<std::filesystem::file_time_type> later_times;
+    std::transform(later_files.begin(), later_files.end(), std::back_inserter(later_times), predicate);
+
+    // Find where the times that shouldn't intersect are, and get them. 
+    std::filesystem::file_time_type earlier_time = *std::max_element(earlier_times.begin(), earlier_times.end());
+    std::filesystem::file_time_type later_time = *std::max_element(later_times.begin(), later_times.end());
+    
+    // Return if the earlier files are touched no later than the later files.
+    return earlier_time <= later_time; 
+}
+
 void IndexRegistry::set_target_memory_usage(int64_t bytes) {
     target_memory_usage = bytes;
 }
diff --git a/src/index_registry.hpp b/src/index_registry.hpp
index 3b7f58dd4c..7f20a5fd37 100644
--- a/src/index_registry.hpp
+++ b/src/index_registry.hpp
@@ -331,6 +331,13 @@ class IndexRegistry {
     /// Return true if the given index is available and can be require()'d, and
     /// false otherwise.
     bool available(const IndexName& identifier) const;
+    
+    /// For two available indexes, returns true if the modification times
+    /// on the eariler index are no later than those on the later index.
+    ///
+    /// Useful for enforcing that downstream indexes haven't had their upstream
+    /// indexes overwritten.
+    bool predates(const IndexName& earlier, const IndexName& later) const;
 
     /// Get the possible filename(s) associated with the given index with the given prefix.
     /// TODO: Get this to account for sample-scoped indexes.
diff --git a/src/io/register_loader_saver_distance_index.cpp b/src/io/register_loader_saver_distance_index.cpp
index 54245956a9..926dc0f3f6 100644
--- a/src/io/register_loader_saver_distance_index.cpp
+++ b/src/io/register_loader_saver_distance_index.cpp
@@ -1,6 +1,6 @@
 /**
  * \file register_loader_saver_distance_index.cpp
- * Defines IO for an XG index from stream files.
+ * Defines IO for a SnarlDistanceIndex index from stream files.
  */
 
 #include <vg/io/registry.hpp>
diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp
index 693f19cb8b..4b15e8b4ce 100644
--- a/src/minimizer_mapper.hpp
+++ b/src/minimizer_mapper.hpp
@@ -871,7 +871,8 @@ class MinimizerMapper : public AlignerClient {
      */
     void do_chaining_on_trees(Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector<Seed>& seeds, const VectorView<MinimizerMapper::Minimizer>& minimizers,
                               const vector<algorithms::Anchor>& seed_anchors,
-                              std::vector<std::vector<size_t>>& chains, std::vector<std::vector<bool>>& chain_rec_flags, std::vector<size_t>& chain_source_tree,
+                              std::vector<std::vector<size_t>>& chains, std::vector<std::vector<bool>>& chain_rec_flags,
+                              std::vector<size_t>& chain_rec_counts, std::vector<size_t>& chain_source_tree,
                               std::vector<int>& chain_score_estimates, std::vector<std::vector<size_t>>& minimizer_kept_chain_count,
                               std::vector<double>& multiplicity_by_chain,
                               std::vector<Alignment>& alignments, SmallBitset& minimizer_explored, vector<double>& multiplicity_by_alignment,
@@ -1081,11 +1082,11 @@ class MinimizerMapper : public AlignerClient {
      *
      * For connecting alignment, restricts the alignment to use <= max_dp_cells
      * cells. If too many DP cells would be used, produces an Alignment with
-     * and empty path.
+     * an empty path.
      *
-     * Returns the number of nodes and bases in the graph aligned against.
+     * Returns whether a graph was aligned against or not.
      */
-    static std::pair<size_t, size_t> align_sequence_between(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name = nullptr, size_t max_dp_cells = std::numeric_limits<size_t>::max(), const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding = algorithms::pad_band_random_walk());
+    static bool align_sequence_between(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name = nullptr, size_t max_dp_cells = std::numeric_limits<size_t>::max(), const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding = algorithms::pad_band_random_walk());
 
 public:
     /**
@@ -1093,7 +1094,7 @@ class MinimizerMapper : public AlignerClient {
      * same answer (modulo reverse-complementation) no matter whether the
      * sequence and anchors are reverse-complemented or not.
      */
-    static std::pair<size_t, size_t> align_sequence_between_consistently(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name = nullptr, size_t max_dp_cells = std::numeric_limits<size_t>::max(), const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding = algorithms::pad_band_random_walk());
+    static bool align_sequence_between_consistently(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name = nullptr, size_t max_dp_cells = std::numeric_limits<size_t>::max(), const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding = algorithms::pad_band_random_walk());
 
 protected:
     /**
diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp
index 4ba5d3b909..c8f47d5bb9 100644
--- a/src/minimizer_mapper_from_chains.cpp
+++ b/src/minimizer_mapper_from_chains.cpp
@@ -32,6 +32,7 @@
 #include <cmath>
 #include <cfloat>
 #include <unordered_set>
+#include <bitset>
 
 // Turn on debugging prints
 //#define debug
@@ -774,6 +775,8 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     std::vector<std::vector<size_t>> chains;
     // For each chain, mark per-seed whether it came from a recombinant anchor
     std::vector<std::vector<bool>> chain_rec_flags;
+    // For each chain, track how many recombination events were used
+    std::vector<size_t> chain_rec_counts;
     // The zip code tree it came from
     std::vector<size_t> chain_source_tree;
     // An estimated alignment score
@@ -784,7 +787,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
     std::vector<double> multiplicity_by_chain;
 
     do_chaining_on_trees(aln, zip_code_forest, seeds, minimizers, seed_anchors,
-                         chains, chain_rec_flags, chain_source_tree, chain_score_estimates,
+                         chains, chain_rec_flags, chain_rec_counts, chain_source_tree, chain_score_estimates,
                          minimizer_kept_chain_count, multiplicity_by_chain,
                          alignments, minimizer_explored, multiplicity_by_alignment,
                          rng, funnel);
@@ -844,6 +847,49 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
                                alignments_to_source, minimizer_explored, stats, funnel_depleted, rng, funnel);
     }
     
+    for (size_t alignment_index = 0; alignment_index < alignments.size(); ++alignment_index) {
+        // Rescore all the alignments using minimap2 logged-gap-length, read-identity-based scoring
+
+        if (alignments[alignment_index].path().mapping_size() == 0) {
+            // Unmapped, so skip it.
+            continue;
+        }
+
+        size_t matches, mismatches;
+        std::vector<size_t> gap_lengths;
+        count_alignment_operations(alignments[alignment_index], matches, mismatches, gap_lengths);
+
+        if (matches + mismatches + gap_lengths.size() == 0) {
+            continue;
+        }
+        
+        // Compute the logged-gaps score
+        auto logged_gaps_score = score_alignment_with_logged_gaps(matches, mismatches, gap_lengths);
+        alignments[alignment_index].set_score(logged_gaps_score);
+         if (show_work) {
+            #pragma omp critical (cerr)
+            {   
+                cerr << log_name() << "Matches: " << matches << " Mismatches: " << mismatches << " Gap opens: " << gap_lengths.size() << " New score: " << logged_gaps_score << endl;
+            }
+        }
+    }
+    if (!chain_rec_counts.empty() && !alignments_to_source.empty()) {
+        for (size_t alignment_index = 0; alignment_index < alignments_to_source.size(); ++alignment_index) {
+            size_t chain_index = alignments_to_source[alignment_index];
+            if (chain_index != std::numeric_limits<size_t>::max() && chain_index < chain_rec_counts.size()) {
+                set_annotation(alignments[alignment_index], "chain.rec_count", (double) chain_rec_counts[chain_index]);
+                if (rec_penalty_chain != 0) {
+                    // Penalize the score of alignment candidates according to the number of recombinations their chains required.
+                    // This allows alignments that required fewer recombinations in their chains to win.
+                    // TODO: We'd also eventaully like to count recombinations that we don't know are needed until base-level DP.
+                    int64_t penalty = static_cast<int64_t>(rec_penalty_chain) * static_cast<int64_t>(chain_rec_counts[chain_index]);
+                    int64_t penalized_score = static_cast<int64_t>(alignments[alignment_index].score()) - penalty;
+                    alignments[alignment_index].set_score(static_cast<int>(penalized_score));
+                }
+            }
+        }
+    }
+    
     
     if (track_provenance) {
         // Now say we are finding the winner(s)
@@ -1096,7 +1142,8 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
 void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& zip_code_forest,
     const std::vector<Seed>& seeds, const VectorView<MinimizerMapper::Minimizer>& minimizers,
     const vector<algorithms::Anchor>& seed_anchors,
-    std::vector<std::vector<size_t>>& chains, std::vector<std::vector<bool>>& chain_rec_flags, std::vector<size_t>& chain_source_tree,
+    std::vector<std::vector<size_t>>& chains, std::vector<std::vector<bool>>& chain_rec_flags,
+    std::vector<size_t>& chain_rec_counts, std::vector<size_t>& chain_source_tree,
     std::vector<int>& chain_score_estimates, std::vector<std::vector<size_t>>& minimizer_kept_chain_count,
     std::vector<double>& multiplicity_by_chain,
     std::vector<Alignment>& alignments, SmallBitset& minimizer_explored, vector<double>& multiplicity_by_alignment,
@@ -1544,9 +1591,13 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
                 indel_limit,
                 show_work
             );
+#ifdef debug_rec
+            if (true) {
+#else
             if (show_work) {
+#endif
                 #pragma omp critical (cerr)
-                cerr << log_name() << "Found " << chain_results.chains.size() << " chains in zip code tree " << item_num
+                cerr << log_name() << "\t[" << aln.name() << "] Found " << chain_results.chains.size() << " chains in zip code tree " << item_num
                     << " running " << anchors_to_chain[anchor_indexes.front()] << " to " << anchors_to_chain[anchor_indexes.back()] << std::endl;
             }
 
@@ -1556,7 +1607,12 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
                 auto& entry = chain_results.chains[result];
                 auto& scored_chain = entry.scored_chain;
                 auto& chain_rec_positions = entry.rec_positions;
-                if (show_work) {
+#ifdef debug_rec
+                if (true)
+#else
+                if (show_work)
+#endif
+                {
 #ifdef debug
                     if(true)
 #else
@@ -1566,24 +1622,42 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
                         if (!scored_chain.second.empty()) {
                             #pragma omp critical (cerr)
                             {
-                                cerr << log_name() << "\tChain with score " << scored_chain.first
+                                cerr << log_name() << "\t[" << aln.name() << "] Chain " << result << " with score " << scored_chain.first
                                     << " (rec num =" << chain_rec_positions.size() << ") and length " << scored_chain.second.size()
                                     << " running " << anchor_view[scored_chain.second.front()]
-                                    << " to " << anchor_view[scored_chain.second.back()] << std::endl;
+                                    << " to " << anchor_view[scored_chain.second.back()];
                                 if (!chain_rec_positions.empty()) {
-                                    {
-                                        cerr << log_name() << "\t\tRecombination introduced at anchors: ";
-                                        for (size_t pi = 0; pi < chain_rec_positions.size(); ++pi) {
-                                            if (pi) cerr << ", ";
-                                            cerr << chain_rec_positions[pi];
-                                        }
-                                        cerr << std::endl;
+                                    cerr << " recombination introduced at anchors: ";
+                                    for (size_t pi = 0; pi < chain_rec_positions.size(); ++pi) {
+                                        if (pi) cerr << ", ";
+                                        cerr << chain_rec_positions[pi];
                                     }
                                 }
-#ifdef debug
-
-                                for (auto& anchor_number : scored_chain.second) {
-                                    std::cerr << log_name() << "\t\t" << anchor_view[anchor_number] << std::endl;
+                                cerr << std::endl;
+#ifdef debug_rec
+                                algorithms::path_flags_t current_paths = 0;
+                                bool first = true;
+                                for (auto& selected_number : scored_chain.second) {
+                                    auto& anchor = anchor_view[selected_number];
+                                    auto new_paths = anchor.anchor_paths();
+                                    if (first) {
+                                        current_paths = new_paths.second;
+                                        first = false;
+                                    } else {
+                                        if (new_paths.first == new_paths.second) {
+                                            if ((current_paths & new_paths.first) == 0) {
+                                                current_paths = new_paths.first;
+                                            } else {
+                                                current_paths &= new_paths.first;
+                                            }
+                                        } else {
+                                            current_paths = new_paths.second;
+                                        }
+                                    }
+                                    
+                                    std::cerr << log_name() << "\t\t" << anchor 
+                                              << " anchor_paths: " << std::bitset<64>(new_paths.first).count() << " " << std::bitset<64>(new_paths.first) 
+                                              << " chain_paths: " << std::bitset<64>(current_paths).count() << " " << std::bitset<64>(current_paths) << std::endl;
                                 }
 #endif
 
@@ -1591,7 +1665,7 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
                         }
                     } else if (result == MANY_LIMIT) {
                         #pragma omp critical (cerr)
-                        std::cerr << log_name() << "\t<" << (chain_results.chains.size() - result) << " more chains>" << std::endl;
+                        std::cerr << log_name() << "\t[" << aln.name() << "] <" << (chain_results.chains.size() - result) << " more chains>" << std::endl;
                     }
                 }
 
@@ -1627,6 +1701,8 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&
                 }
                 // Remember the score
                 chain_score_estimates.push_back(scored_chain.first);
+                // Remember how many recombinations were in this chain
+                chain_rec_counts.push_back(chain_rec_positions.size());
 
                 // Remember how we got it
                 chain_source_tree.push_back(item_num);
@@ -1773,7 +1849,7 @@ void MinimizerMapper::get_best_chain_stats(Alignment& aln, const ZipCodeForest&
             best_chain_longest_jump = std::max(best_chain_longest_jump, jump);
             best_chain_total_jump += jump;
         }
-        best_chain_average_jump = chains.at(best_chain).size() > 1 ? best_chain_total_jump / (chains.at(best_chain).size() - 1) : 0.0;
+        best_chain_average_jump = chains.at(best_chain).size() > 1 ? (double)best_chain_total_jump / (chains.at(best_chain).size() - 1) : 0.0;
     }
 
     // Also count anchors in the chain
@@ -2557,10 +2633,10 @@ Alignment MinimizerMapper::find_chain_alignment(
                 if (stats) {
                     start_time = std::chrono::high_resolution_clock::now();
                 }
-                auto nodes_and_bases = align_sequence_between_consistently(empty_pos_t(), right_anchor, graph_horizon, max_gap_length, &this->gbwt_graph, this->get_regular_aligner(), tail_aln, &aln.name(), this->max_dp_cells, this->choose_band_padding);
+                bool did_aln = align_sequence_between_consistently(empty_pos_t(), right_anchor, graph_horizon, max_gap_length, &this->gbwt_graph, this->get_regular_aligner(), tail_aln, &aln.name(), this->max_dp_cells, this->choose_band_padding);
                 if (stats) {
                     stop_time = std::chrono::high_resolution_clock::now();
-                    if (nodes_and_bases.first > 0) {
+                    if (did_aln) {
                         // Actually did the alignment
                         stats->bases.dozeu_tail += left_tail_length;
                         stats->time.dozeu_tail += std::chrono::duration_cast<chrono::duration<double>>(stop_time - start_time).count();
@@ -2883,10 +2959,10 @@ Alignment MinimizerMapper::find_chain_alignment(
             if (stats) {
                 start_time = std::chrono::high_resolution_clock::now();
             }
-            auto nodes_and_bases = MinimizerMapper::align_sequence_between_consistently((*here).graph_end(), (*next).graph_start(), path_length+max_gap_length, max_gap_length, &this->gbwt_graph, this->get_regular_aligner(), link_aln, &aln.name(), this->max_dp_cells, this->choose_band_padding);
+            bool did_aln = MinimizerMapper::align_sequence_between_consistently((*here).graph_end(), (*next).graph_start(), path_length+max_gap_length, max_gap_length, &this->gbwt_graph, this->get_regular_aligner(), link_aln, &aln.name(), this->max_dp_cells, this->choose_band_padding);
             if (stats) {
                 stop_time = std::chrono::high_resolution_clock::now();
-                if (nodes_and_bases.first > 0) {
+                if (did_aln) {
                     // Actually did the alignment
                     stats->bases.bga_middle += link_length;
                     stats->time.bga_middle += std::chrono::duration_cast<chrono::duration<double>>(stop_time - start_time).count();
@@ -3070,10 +3146,10 @@ Alignment MinimizerMapper::find_chain_alignment(
                 if (stats) {
                     start_time = std::chrono::high_resolution_clock::now();
                 }
-                auto nodes_and_bases = align_sequence_between_consistently(left_anchor_included, empty_pos_t(), graph_horizon, max_gap_length, &this->gbwt_graph, this->get_regular_aligner(), tail_aln, &aln.name(), this->max_dp_cells, this->choose_band_padding);
+                bool did_aln = align_sequence_between_consistently(left_anchor_included, empty_pos_t(), graph_horizon, max_gap_length, &this->gbwt_graph, this->get_regular_aligner(), tail_aln, &aln.name(), this->max_dp_cells, this->choose_band_padding);
                 if (stats) {
                     stop_time = std::chrono::high_resolution_clock::now();
-                    if (nodes_and_bases.first > 0) {
+                    if (did_aln) {
                         // Actually did the alignment
                         stats->bases.dozeu_tail += right_tail_length;
                         stats->time.dozeu_tail += std::chrono::duration_cast<chrono::duration<double>>(stop_time - start_time).count();
@@ -3464,11 +3540,9 @@ size_t MinimizerMapper::longest_detectable_gap_in_range(const Alignment& aln, co
     return aligner->longest_detectable_gap(aln, sequence_end);
 }
 
-std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name, size_t max_dp_cells, const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding) {
-
-    // This holds node count and node length aligned to.
-    std::pair<size_t, size_t> to_return;
+bool MinimizerMapper::align_sequence_between(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name, size_t max_dp_cells, const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding) {
 
+    bool did_aln = true;
     // Get the dagified local graph, and the back translation
     MinimizerMapper::with_dagified_local_graph(left_anchor, right_anchor, max_path_length, *graph,
         [&](DeletableHandleGraph& dagified_graph,
@@ -3594,10 +3668,6 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& l
                 // Clear out the alignment path to indicate that we didn't actually compute an alignment.
                 alignment.mutable_path()->clear_mapping();
             }
-            // Always report the size of what we were aligning to.
-            // TODO: Do we still need this?
-            to_return.first = dagified_graph.get_node_count();
-            to_return.second = dagified_graph.get_total_length();
         } else {
             // Do pinned alignment off the anchor we actually have.
             // Work out how big it will be.
@@ -3621,8 +3691,7 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& l
                 Edit* e = m->add_edit();
                 e->set_to_length(alignment.sequence().size());
                 e->set_sequence(alignment.sequence());
-                to_return.first = 0;
-                to_return.second = 0;
+                did_aln = false;
                 return;
             } else {
 #ifdef debug
@@ -3630,8 +3699,6 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& l
                 std::cerr << "debug[MinimizerMapper::align_sequence_between]: Fill " << cell_count << " DP cells in tail with Xdrop" << std::endl;
 #endif
                 aligner->align_pinned(alignment, dagified_graph, !is_empty(left_anchor), true, max_gap_length);
-                to_return.first = dagified_graph.get_node_count();
-                to_return.second = dagified_graph.get_total_length();
             }
         }
 
@@ -3687,10 +3754,10 @@ std::pair<size_t, size_t> MinimizerMapper::align_sequence_between(const pos_t& l
         // Now the alignment is filled in!
     });
 
-    return to_return;
+    return did_aln;
 }
 
-std::pair<size_t, size_t> MinimizerMapper::align_sequence_between_consistently(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name, size_t max_dp_cells, const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding) {
+bool MinimizerMapper::align_sequence_between_consistently(const pos_t& left_anchor, const pos_t& right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph* graph, const GSSWAligner* aligner, Alignment& alignment, const std::string* alignment_name, size_t max_dp_cells, const std::function<size_t(const Alignment&, const HandleGraph&)>& choose_band_padding) {
     if (left_anchor < right_anchor) {
         // Left anchor is unambiguously first, so align as-is
         return align_sequence_between(left_anchor, right_anchor, max_path_length, max_gap_length, graph, aligner, alignment, alignment_name, max_dp_cells, choose_band_padding);
diff --git a/src/multipath_mapper.cpp b/src/multipath_mapper.cpp
index 4632edee8b..4241fa12e6 100644
--- a/src/multipath_mapper.cpp
+++ b/src/multipath_mapper.cpp
@@ -2448,7 +2448,7 @@ namespace vg {
                     // in the left_idxs and right_idxs vectors
                     int64_t target_len = 2 * seq_len - left_side.clip_length - right_side.clip_length;
                     auto distance_diff = [&](size_t l, size_t r) {
-                        return abs<int64_t>(get<2>(left_sites[left_idxs[l]]) + get<2>(right_sites[right_idxs[r]]) - target_len);
+                        return std::abs(static_cast<int64_t>(get<2>(left_sites[left_idxs[l]]) + get<2>(right_sites[right_idxs[r]]) - target_len));
                     };
                     
                     // sweep to identify pairs that most nearly align
diff --git a/src/recombinator.cpp b/src/recombinator.cpp
index a9aaed4b10..07915118ed 100644
--- a/src/recombinator.cpp
+++ b/src/recombinator.cpp
@@ -1585,7 +1585,7 @@ void add_path(const gbwt::GBWT& source, gbwt::size_type path_id, gbwt::GBWTBuild
     gbwt::PathName path_name = source.metadata.path(path_id);
     std::string sample_name = source.metadata.sample(path_name.sample);
     std::string contig_name = source.metadata.contig(path_name.contig);
-    if (sample_name == gbwtgraph::REFERENCE_PATH_SAMPLE_NAME) {
+    if (sample_name == gbwtgraph::GENERIC_PATH_SAMPLE_NAME) {
         metadata.add_generic_path(contig_name);
     } else {
         // Reference samples will be copied later.
diff --git a/src/snarl_distance_index.cpp b/src/snarl_distance_index.cpp
index 65f7dec363..24e1957443 100644
--- a/src/snarl_distance_index.cpp
+++ b/src/snarl_distance_index.cpp
@@ -1,7 +1,9 @@
-//#define debug_distance_indexing
-//#define debug_snarl_traversal
-//#define debug_distances
-//#define debug_subgraph
+// #define debug_distance_indexing
+// #define debug_snarl_traversal
+// #define debug_distances
+// #define debug_subgraph
+// #define debug_hub_label_build
+// #define debug_hub_label_storage
 
 #include "snarl_distance_index.hpp"
 
@@ -9,2022 +11,161 @@ using namespace std;
 using namespace handlegraph;
 namespace vg {
 
-size_t minimum_distance(const SnarlDistanceIndex& distance_index, pos_t pos1, pos_t pos2,
-                        bool unoriented_distance, const HandleGraph* graph) {
-    return distance_index.minimum_distance( get_id(pos1), get_is_rev(pos1), get_offset(pos1),
-                                            get_id(pos2), get_is_rev(pos2), get_offset(pos2),
-                                            unoriented_distance, graph, nullptr); 
+size_t minimum_distance(const SnarlDistanceIndex &distance_index, pos_t pos1,
+                        pos_t pos2, bool unoriented_distance,
+                        const HandleGraph *graph) {
+  return distance_index.minimum_distance(
+      get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2),
+      get_is_rev(pos2), get_offset(pos2), unoriented_distance, graph, nullptr);
 }
 
-size_t minimum_nontrivial_distance(const SnarlDistanceIndex& distance_index, pos_t pos1, pos_t pos2,
-                                   size_t pos2_length, const HandleGraph* graph) {
-    bool shifted = false;
-    if (pos1 == pos2) {
-        if (pos2_length == std::numeric_limits<size_t>::max()) {
-            // If we don't know the length, we can get it from the graph
-            pos2_length = distance_index.minimum_length(
-                distance_index.get_node_net_handle(id(pos2)));
-        }
-        // Must shift one position to avoid self-distance of 0
-        if (offset(pos1) == pos2_length) {
-            // Shift ending pos backward (not safe to shift forward)
-            get_offset(pos2)--;
-        } else {
-            // Shift starting position forward
-            get_offset(pos1)++;
-        }
-        
-        shifted = true;
-    }
-
-    size_t distance = minimum_distance(distance_index, pos1, pos2, false, graph);
-    if (shifted && distance != std::numeric_limits<size_t>::max()) {
-        // This loop is possible, so add back in the shift
-        distance++;
-    }
-
-    return distance;
-}
-
-size_t maximum_distance(const SnarlDistanceIndex& distance_index, pos_t pos1, pos_t pos2) {
-    return distance_index.maximum_distance( get_id(pos1), get_is_rev(pos1), get_offset(pos1),
-                                            get_id(pos2), get_is_rev(pos2), get_offset(pos2)); 
-}
-
-void fill_in_distance_index(SnarlDistanceIndex* distance_index, const HandleGraph* graph, const HandleGraphSnarlFinder* snarl_finder, size_t size_limit, bool only_top_level_chain_distances, bool silence_warnings) {
-    distance_index->set_snarl_size_limit(size_limit);
-    distance_index->set_only_top_level_chain_distances(only_top_level_chain_distances);
-
-    //Build the temporary distance index from the graph
-    SnarlDistanceIndex::TemporaryDistanceIndex temp_index = make_temporary_distance_index(graph, snarl_finder, size_limit, only_top_level_chain_distances);
-
-    if (!silence_warnings && temp_index.use_oversized_snarls) {
-        cerr << "warning: distance index uses oversized snarls, (the biggest has "
-             << temp_index.most_oversized_snarl_size << " nodes), which may make mapping slow" << endl;
-        cerr << "\ttry increasing --snarl-limit when building the distance index" << endl;
-    }
-
-    //And fill in the permanent distance index
-    vector<const SnarlDistanceIndex::TemporaryDistanceIndex*> indexes;
-    indexes.emplace_back(&temp_index);
-    distance_index->get_snarl_tree_records(indexes, graph);
-}
-SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
-    const HandleGraph* graph, const HandleGraphSnarlFinder* snarl_finder, size_t size_limit, bool only_top_level_chain_distances)  {
-
-#ifdef debug_distance_indexing
-    cerr << "Creating new distance index for nodes between " << graph->min_node_id() << " and " << graph->max_node_id() << endl;
-
-#endif
-
-    SnarlDistanceIndex::TemporaryDistanceIndex temp_index;
-
-    temp_index.min_node_id=graph->min_node_id();
-    temp_index.max_node_id=graph->max_node_id();
-
-    //Construct the distance index using the snarl decomposition
-    //traverse_decomposition will visit all structures (including trivial snarls), calling
-    //each of the given functions for the start and ends of the snarls and chains
-
-    temp_index.temp_node_records.resize(temp_index.max_node_id-temp_index.min_node_id+1);
-
-
-
-    //Stores unfinished records, as type of record and offset into appropriate vector
-    //(temp_node/snarl/chain_records)
-    vector<pair<SnarlDistanceIndex::temp_record_t, size_t>> stack;
-
-    //There may be components of the root that are connected to each other. Each connected component will
-    //get put into a (fake) root-level snarl, but we don't know what those components will be initially,
-    //since the decomposition just puts them in the same root snarl. This is used to group the root-level
-    //components into connected components that will later be used to make root snarls
-    structures::UnionFind root_snarl_component_uf (0);
-
-
-    /*Go through the decomposition top down and record the connectivity of the snarls and chains
-     * Distances will be added later*/
-
-    snarl_finder->traverse_decomposition(
-    [&](handle_t chain_start_handle) {
-        /*This gets called when a new chain is found, starting at the start handle going into chain
-         * For the first node in a chain, create a chain record and fill in the first node.
-         * Also add the first node record
-         */
-#ifdef debug_distance_indexing
-        cerr << "  Starting new chain at " << graph->get_id(chain_start_handle) << (graph->get_is_reverse(chain_start_handle) ? " reverse" : " forward") << endl;
-        //We shouldn't have seen this node before
-        //assert(temp_index.temp_node_records[graph->get_id(chain_start_handle)-min_node_id].node_id == 0);
-#endif
-
-        //Fill in node in chain
-        stack.emplace_back(SnarlDistanceIndex::TEMP_CHAIN, temp_index.temp_chain_records.size());
-        nid_t node_id = graph->get_id(chain_start_handle);
-        temp_index.temp_chain_records.emplace_back();
-        auto& temp_chain = temp_index.temp_chain_records.back();
-        temp_chain.start_node_id = node_id; 
-        temp_chain.start_node_rev = graph->get_is_reverse(chain_start_handle);
-        temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
-
-
-        //And the node record itself
-        auto& temp_node = temp_index.temp_node_records.at(node_id-temp_index.min_node_id);
-        temp_node.node_id = node_id;
-        temp_node.node_length = graph->get_length(chain_start_handle);
-        temp_node.reversed_in_parent = graph->get_is_reverse(chain_start_handle);
-        temp_node.parent = stack.back(); //The parent is this chain
-
-    },
-    [&](handle_t chain_end_handle) {
-        /*This gets called at the end of a chain, facing out
-         * Record the chain's end node. The node record itself would have been added as part of the snarl
-         * Also record the chain's parent here
-         */
-
-        //Done with this chain
-        pair<SnarlDistanceIndex::temp_record_t, size_t> chain_index = stack.back();
-        stack.pop_back();
-
-#ifdef debug_distance_indexing
-        assert(chain_index.first == SnarlDistanceIndex::TEMP_CHAIN);
-#endif
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.temp_chain_records.at(chain_index.second);
-        nid_t node_id = graph->get_id(chain_end_handle);
-
-        if (temp_chain_record.children.size() == 1 && node_id == temp_chain_record.start_node_id) {
-            //This is a trivial snarl
-
-#ifdef debug_distance_indexing
-            //Then this must be the last thing on the chain_records vector
-            assert(temp_index.temp_chain_records.size() == chain_index.second+1);
-#endif
-
-            //Get the node
-            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.temp_node_records.at(node_id - temp_index.min_node_id);
-
-            temp_node_record.reversed_in_parent = false;
-
-            //And give the chain's parent the node info
-            //
-            if (stack.empty()) {
-                temp_node_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
-                //If this was the last thing on the stack, then this was a root
-
-                //Check to see if there is anything connected to the ends of the chain
-                vector<nid_t> reachable_nodes;
-                graph->follow_edges(graph->get_handle(node_id, false),
-                    false, [&] (const handle_t& next) {
-                        if (graph->get_id(next) != node_id) {
-                            reachable_nodes.emplace_back(graph->get_id(next));
-                        }
-                    });
-                graph->follow_edges(graph->get_handle(node_id, true),
-                    false, [&] (const handle_t& next) {
-                        if (graph->get_id(next) != node_id) {
-                            reachable_nodes.emplace_back(graph->get_id(next));
-                        }
-                    });
-                if (reachable_nodes.size()) {
-                    //If we can reach anything leaving the chain (besides the chain itself), then it is part of a root snarl
-                    //Note that if the chain's start and end node are the same, then it will always be a single component
-#ifdef debug_distance_indexing
-                    cerr << "                 This trivial chain is part of the root but connects with something else in the root"<<endl;
-#endif
-                    bool new_component = true;
-
-                    //Add this to the union find
-                    root_snarl_component_uf.resize(root_snarl_component_uf.size() + 1);
-                    //And remember that it's in a connected component of the root
-                    temp_node_record.root_snarl_index = temp_index.root_snarl_components.size();
-                    temp_index.root_snarl_components.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
-                    for (nid_t next_id : reachable_nodes) {
-                        //For each node that this is connected to, check if we've already seen it and if we have, then
-                        //union this chain and that node's chain
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.temp_node_records[next_id-temp_index.min_node_id];
-                        if (node_record.node_id != 0) {
-                            //If we've already seen this node, union it with the new one
-                            //If we can see it by walking out from this top-level chain, then it must also be a
-                            //top-level chain (or node pretending to be a chain)
-                            size_t other_i = node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN
-                                           ? temp_index.temp_chain_records[node_record.parent.second].root_snarl_index
-                                           : node_record.root_snarl_index;
-#ifdef debug_distance_indexing
-                            assert(other_i != std::numeric_limits<size_t>::max());
-#endif
-                            root_snarl_component_uf.union_groups(other_i, temp_node_record.root_snarl_index);
-//#ifdef debug_distance_indexing
-//                            cerr << "        Union this trivial  with " << temp_index.temp_chain_records[node_record.parent.second].start_node_id << " " << temp_index.temp_chain_records[node_record.parent.second].end_node_id << endl;
-//#endif
-                        } else {
-                            new_component = false;
-                        }
-                    }
-                } else {
-                    //If this chain isn't connected to anything else, then it is a single component of the root
-                    temp_node_record.rank_in_parent = temp_index.components.size();
-                    temp_index.components.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
-                }
-            } else {
-                //The last thing on the stack is the parent of this chain, which must be a snarl
-                temp_node_record.parent = stack.back();
-                auto& parent_snarl_record = temp_index.temp_snarl_records.at(temp_node_record.parent.second);
-                temp_node_record.rank_in_parent = parent_snarl_record.children.size() + 2;
-                parent_snarl_record.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
-            }
-
-
-            //Remove the chain record
-            temp_index.temp_chain_records.pop_back();
-            temp_index.max_index_size += temp_node_record.get_max_record_length();
-
-        } else {
-            //Otherwise, it is an actual chain
-
-            //Fill in node in chain
-            temp_chain_record.end_node_id = node_id;
-            temp_chain_record.end_node_rev = graph->get_is_reverse(chain_end_handle);
-            temp_chain_record.end_node_length = graph->get_length(chain_end_handle);
-            
-            bool is_root_chain = false;
-
-            if (stack.empty()) {
-                //If this was the last thing on the stack, then this was a root
-                is_root_chain = true;
-
-                //Check to see if there is anything connected to the ends of the chain
-                vector<nid_t> reachable_nodes;
-                graph->follow_edges(graph->get_handle(temp_chain_record.start_node_id, !temp_chain_record.start_node_rev),
-                    false, [&] (const handle_t& next) {
-                        if (graph->get_id(next) != temp_chain_record.start_node_id &&
-                            graph->get_id(next) != temp_chain_record.end_node_id) {
-                            reachable_nodes.emplace_back(graph->get_id(next));
-                        }
-                    });
-                graph->follow_edges(graph->get_handle(temp_chain_record.end_node_id, temp_chain_record.end_node_rev),
-                    false, [&] (const handle_t& next) {
-                        if (graph->get_id(next) != temp_chain_record.start_node_id &&
-                            graph->get_id(next) != temp_chain_record.end_node_id) {
-                            reachable_nodes.emplace_back(graph->get_id(next));
-                        }
-                    });
-                if (reachable_nodes.size() && (temp_chain_record.is_trivial || temp_chain_record.start_node_id != temp_chain_record.end_node_id)) {
-                    //If we can reach anything leaving the chain (besides the chain itself), then it is part of a root snarl
-                    //Note that if the chain's start and end node are the same, then it will always be a single component
-#ifdef debug_distance_indexing
-                    cerr << "                 This chain is part of the root but connects with something else in the root"<<endl;
-#endif
-                    bool new_component = true;
-
-                    //Add this to the union find
-                    root_snarl_component_uf.resize(root_snarl_component_uf.size() + 1);
-                    //And remember that it's in a connected component of the root
-                    temp_chain_record.root_snarl_index = temp_index.root_snarl_components.size();
-                    temp_index.root_snarl_components.emplace_back(chain_index);
-                    for (nid_t next_id : reachable_nodes) {
-                        //For each node that this is connected to, check if we've already seen it and if we have, then
-                        //union this chain and that node's chain
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.temp_node_records[next_id-temp_index.min_node_id];
-                        if (node_record.node_id != 0) {
-                            //If we've already seen this node, union it with the new one
-                            //If we can see it by walking out from this top-level chain, then it must also be a
-                            //top-level chain (or node pretending to be a chain)
-                            size_t other_i = node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN
-                                           ? temp_index.temp_chain_records[node_record.parent.second].root_snarl_index
-                                           : node_record.root_snarl_index;
-#ifdef debug_distance_indexing
-                            assert(other_i != std::numeric_limits<size_t>::max());
-#endif
-                            root_snarl_component_uf.union_groups(other_i, temp_chain_record.root_snarl_index);
-#ifdef debug_distance_indexing
-                            cerr << "        Union this chain with " << temp_index.temp_chain_records[node_record.parent.second].start_node_id << " " << temp_index.temp_chain_records[node_record.parent.second].end_node_id << endl;
-#endif
-                        } else {
-                            new_component = false;
-                        }
-                    }
-                } else {
-                    //If this chain isn't connected to anything else, then it is a single component of the root
-                    temp_chain_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
-                    temp_chain_record.rank_in_parent = temp_index.components.size();
-                    temp_index.components.emplace_back(chain_index);
-                }
-            } else {
-                //The last thing on the stack is the parent of this chain, which must be a snarl
-                temp_chain_record.parent = stack.back();
-                auto& parent_snarl_record = temp_index.temp_snarl_records.at(temp_chain_record.parent.second);
-                temp_chain_record.rank_in_parent = parent_snarl_record.children.size() + 2;
-                parent_snarl_record.children.emplace_back(chain_index);
-            }
-
-            temp_index.max_index_size += temp_chain_record.get_max_record_length(!only_top_level_chain_distances || is_root_chain ? true : false );
-#ifdef debug_distance_indexing
-            cerr << "  Ending new " << (temp_chain_record.is_trivial ? "trivial " : "") <<  "chain " << temp_index.structure_start_end_as_string(chain_index)
-              << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_chain_record.parent) << endl;
-#endif
-        }
-    },
-    [&](handle_t snarl_start_handle) {
-        /*This gets called at the beginning of a new snarl facing in
-         * Create a new snarl record and fill in the start node.
-         * The node record would have been created as part of the chain, or as the end node
-         * of the previous snarl
-         */
-
-#ifdef debug_distance_indexing
-        cerr << "  Starting new snarl at " << graph->get_id(snarl_start_handle) << (graph->get_is_reverse(snarl_start_handle) ? " reverse" : " forward") << endl;
-        cerr << "with index " << temp_index.temp_snarl_records.size() << endl;
-#endif
-        auto& parent = stack.back();
-        stack.emplace_back(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size());
-        temp_index.temp_snarl_records.emplace_back();
-        temp_index.temp_snarl_records.back().start_node_id = graph->get_id(snarl_start_handle);
-        temp_index.temp_snarl_records.back().start_node_rev = graph->get_is_reverse(snarl_start_handle);
-        temp_index.temp_snarl_records.back().start_node_length = graph->get_length(snarl_start_handle);
-
-    },
-    [&](handle_t snarl_end_handle){
-        /*This gets called at the end of the snarl facing out
-         * Fill in the end node of the snarl, its parent, and record the snarl as a child of its
-         * parent chain
-         * Also create a node record
-         */
-        pair<SnarlDistanceIndex::temp_record_t, size_t> snarl_index = stack.back();
-        stack.pop_back();
-#ifdef debug_distance_indexing
-        assert(snarl_index.first == SnarlDistanceIndex::TEMP_SNARL);
-        assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
-#endif
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records[snarl_index.second];
-        nid_t node_id = graph->get_id(snarl_end_handle);
-
-        //Record the end node in the snarl
-        temp_snarl_record.end_node_id = node_id;
-        temp_snarl_record.end_node_rev = graph->get_is_reverse(snarl_end_handle);
-        temp_snarl_record.end_node_length = graph->get_length(snarl_end_handle);
-        temp_snarl_record.node_count = temp_snarl_record.children.size();
-        bool any_edges_in_snarl = false;
-        graph->follow_edges(graph->get_handle(temp_snarl_record.start_node_id, temp_snarl_record.start_node_rev), false, [&](const handle_t next_handle) {
-            if (graph->get_id(next_handle) != temp_snarl_record.end_node_id) {
-                any_edges_in_snarl = true;
-            }
-        });
-        graph->follow_edges(graph->get_handle(temp_snarl_record.end_node_id, !temp_snarl_record.end_node_rev), false, [&](const handle_t next_handle) {
-            if (graph->get_id(next_handle) != temp_snarl_record.start_node_id) {
-                any_edges_in_snarl = true;
-            }
-        });
-
-        if (temp_snarl_record.children.size() == 0) {
-            //This is a trivial snarl
-            temp_snarl_record.is_trivial = true;
-
-            //Add the end node to the chain
-#ifdef debug_distance_indexing
-            assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
-#endif
-            temp_snarl_record.parent = stack.back();
-            auto& temp_chain = temp_index.temp_chain_records.at(stack.back().second);
-            temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
-
-            //Remove the snarl record
-#ifdef debug_distance_indexing
-            assert(temp_index.temp_snarl_records.size() == snarl_index.second+1);
-#endif
-            temp_index.temp_snarl_records.pop_back();
-        } else {
-            //This is the child of a chain
-#ifdef debug_distance_indexing
-            assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
-#endif
-            temp_snarl_record.parent = stack.back();
-            auto& temp_chain = temp_index.temp_chain_records.at(stack.back().second);
-            temp_chain.children.emplace_back(snarl_index);
-            temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
-
-        }
-        //Record the snarl as a child of its chain
-        //if (stack.empty()) {
-        //    assert(false);
-        //    //TODO: The snarl should always be the child of a chain
-        //    //If this was the last thing on the stack, then this was a root
-        //    //TODO: I'm not sure if this would get put into a chain or not
-        //    temp_snarl_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
-        //    temp_index.components.emplace_back(snarl_index);
-        //} 
-
-        //Record the node itself. This gets done for the start of the chain, and ends of snarls
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.temp_node_records.at(node_id-temp_index.min_node_id);
-        temp_node_record.node_id = node_id;
-        temp_node_record.node_length = graph->get_length(snarl_end_handle);
-        temp_node_record.reversed_in_parent = graph->get_is_reverse(snarl_end_handle);
-        temp_node_record.parent = stack.back();
-
-
-
-#ifdef debug_distance_indexing
-        cerr << "  Ending new snarl " << temp_index.structure_start_end_as_string(snarl_index)
-             << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_snarl_record.parent) << endl;
-#endif
-    });
-
-    /*
-     * We finished going through everything that exists according to the snarl decomposition, but
-     * it's still missing tips, which will be discovered when filling in the snarl distances,
-     * and root-level snarls, which we'll add now by combining the chain components in root_snarl_components
-     * into snarls defined by root_snarl_component_uf
-     * The root-level snarl is a fake snarl that doesn't exist according to the snarl decomposition,
-     * but is an extra layer that groups together components of the root that are connected
-     */
-
-    vector<vector<size_t>> root_snarl_component_indexes = root_snarl_component_uf.all_groups();
-    for (vector<size_t>& root_snarl_indexes : root_snarl_component_indexes) {
-#ifdef debug_distance_indexing
-        cerr << "Create a new root snarl from components" << endl;
-#endif
-        //For each of the root snarls
-        temp_index.components.emplace_back(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size());
-        temp_index.temp_snarl_records.emplace_back();
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.back();
-        temp_snarl_record.is_root_snarl = true;
-        temp_snarl_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
-
-
-        for (size_t chain_i : root_snarl_indexes) {
-            //For each chain component of this root-level snarl
-            if (temp_index.root_snarl_components[chain_i].first == SnarlDistanceIndex::TEMP_CHAIN){
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.temp_chain_records[temp_index.root_snarl_components[chain_i].second];
-                temp_chain_record.parent = make_pair(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size() - 1);
-                temp_chain_record.rank_in_parent = temp_snarl_record.children.size();
-                temp_chain_record.reversed_in_parent = false;
-
-                temp_snarl_record.children.emplace_back(temp_index.root_snarl_components[chain_i]);
-            } else {
-#ifdef debug_distance_indexing
-                assert(temp_index.root_snarl_components[chain_i].first == SnarlDistanceIndex::TEMP_NODE);
-#endif
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.temp_node_records[temp_index.root_snarl_components[chain_i].second - temp_index.min_node_id];
-                temp_node_record.parent = make_pair(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size() - 1);
-                temp_node_record.rank_in_parent = temp_snarl_record.children.size();
-                temp_node_record.reversed_in_parent = false;
-
-                temp_snarl_record.children.emplace_back(temp_index.root_snarl_components[chain_i]);
-            }
-        }
-        temp_snarl_record.node_count = temp_snarl_record.children.size();
-    }
-
-
-    /*Now go through the decomposition again to fill in the distances
-     * This traverses all chains in reverse order that we found them in, so bottom up
-     * Each chain and snarl already knows its parents and children, except for single nodes
-     * that are children of snarls. These nodes were not in chains will have their node
-     * records created here
-     */
-
-#ifdef debug_distance_indexing
-    cerr << "Filling in the distances in snarls" << endl;
-#endif
-    for (int i = temp_index.temp_chain_records.size()-1 ; i >= 0 ; i--) {
-
-        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.temp_chain_records[i];
-#ifdef debug_distance_indexing
-        assert(!temp_chain_record.is_trivial);
-        cerr << "  At "  << (temp_chain_record.is_trivial ? " trivial " : "") << " chain " << temp_index.structure_start_end_as_string(make_pair(SnarlDistanceIndex::TEMP_CHAIN, i)) << endl;
-#endif
-
-        //Add the first values for the prefix sum and backwards loop vectors
-        temp_chain_record.prefix_sum.emplace_back(0);
-        temp_chain_record.max_prefix_sum.emplace_back(0);
-        temp_chain_record.backward_loops.emplace_back(std::numeric_limits<size_t>::max());
-        temp_chain_record.chain_components.emplace_back(0);
-
-
-        /*First, go through each of the snarls in the chain in the forward direction and
-         * fill in the distances in the snarl. Also fill in the prefix sum and backwards
-         * loop vectors here
-         */
-        size_t curr_component = 0; //which component of the chain are we in
-        size_t last_node_length = 0;
-        for (size_t chain_child_i = 0 ; chain_child_i < temp_chain_record.children.size() ; chain_child_i++ ){
-            const pair<SnarlDistanceIndex::temp_record_t, size_t>& chain_child_index = temp_chain_record.children[chain_child_i];
-            //Go through each of the children in the chain, skipping nodes
-            //The snarl may be trivial, in which case don't fill in the distances
-#ifdef debug_distance_indexing
-            cerr << "    Looking at child " << temp_index.structure_start_end_as_string(chain_child_index) 
-                 << " current max prefix sum " << temp_chain_record.max_prefix_sum.back() << endl;
-#endif
-
-            if (chain_child_index.first == SnarlDistanceIndex::TEMP_SNARL){
-                //This is where all the work gets done. Need to go through the snarl and add
-                //all distances, then add distances to the chain that this is in
-                //The parent chain will be the last thing in the stack
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = 
-                        temp_index.temp_snarl_records.at(chain_child_index.second);
-
-                //Fill in this snarl's distances
-                populate_snarl_index(temp_index, chain_child_index, size_limit, only_top_level_chain_distances, graph);
-
-                bool new_component = temp_snarl_record.min_length == std::numeric_limits<size_t>::max();
-                if (new_component){
-                    curr_component++;
-                }
-
-                //And get the distance values for the end node of the snarl in the chain
-                if (new_component) {
-                    //If this snarl wasn't start-end connected, then we start 
-                    //tracking the distance vectors here
-
-                    //Update the maximum distance
-                    temp_index.max_distance = std::max(temp_index.max_distance, temp_chain_record.max_prefix_sum.back());
-
-                    temp_chain_record.prefix_sum.emplace_back(0);
-                    temp_chain_record.max_prefix_sum.emplace_back(0);
-                    temp_chain_record.backward_loops.emplace_back(temp_snarl_record.distance_end_end);
-                    //If the chain is disconnected, the max length is infinite
-                    temp_chain_record.max_length =  std::numeric_limits<size_t>::max();
-                } else {
-                    temp_chain_record.prefix_sum.emplace_back(SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                                                              temp_chain_record.prefix_sum.back(),
-                                                              temp_snarl_record.min_length), 
-                                                              temp_snarl_record.start_node_length));
-                    temp_chain_record.max_prefix_sum.emplace_back(SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                                                                   temp_chain_record.max_prefix_sum.back(),
-                                                                   temp_snarl_record.max_length), 
-                                                                   temp_snarl_record.start_node_length));
-                    temp_chain_record.backward_loops.emplace_back(std::min(temp_snarl_record.distance_end_end,
-                        SnarlDistanceIndex::sum(temp_chain_record.backward_loops.back()
-                        , 2 * (temp_snarl_record.start_node_length + temp_snarl_record.min_length))));
-                    temp_chain_record.max_length = SnarlDistanceIndex::sum(temp_chain_record.max_length,
-                                                                           temp_snarl_record.max_length);
-                }
-                temp_chain_record.chain_components.emplace_back(curr_component);
-                if (chain_child_i == temp_chain_record.children.size() - 2 && temp_snarl_record.min_length == std::numeric_limits<size_t>::max()) {
-                    temp_chain_record.loopable = false;
-                }
-                last_node_length = 0;
-            } else {
-                if (last_node_length != 0) {
-                    //If this is a node and the last thing was also a node,
-                    //then there was a trivial snarl 
-                    SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = 
-                            temp_index.temp_node_records.at(chain_child_index.second-temp_index.min_node_id);
-
-                    //Check if there is a loop in this node
-                    //Snarls get counted as trivial if they contain no nodes but they might still have edges
-                    size_t backward_loop = std::numeric_limits<size_t>::max();
-
-                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, !temp_node_record.reversed_in_parent), false, [&](const handle_t next_handle) {
-                        if (graph->get_id(next_handle) == temp_node_record.node_id) {
-                            //If there is a loop going backwards (relative to the chain) back to the same node
-                            backward_loop = 0;
-                        }
-                    });
-
-                    temp_chain_record.prefix_sum.emplace_back(SnarlDistanceIndex::sum(temp_chain_record.prefix_sum.back(), last_node_length));
-                    temp_chain_record.max_prefix_sum.emplace_back(SnarlDistanceIndex::sum(temp_chain_record.max_prefix_sum.back(), last_node_length));
-                    temp_chain_record.backward_loops.emplace_back(std::min(backward_loop,
-                        SnarlDistanceIndex::sum(temp_chain_record.backward_loops.back(), 2 * last_node_length)));
-
-                    if (chain_child_i == temp_chain_record.children.size()-1) {
-                        //If this is the last node
-                        temp_chain_record.loopable=false;
-                    }
-                    temp_chain_record.chain_components.emplace_back(curr_component);
-                }
-                last_node_length = temp_index.temp_node_records.at(chain_child_index.second - temp_index.min_node_id).node_length;
-                //And update the chains max length
-                temp_chain_record.max_length = SnarlDistanceIndex::sum(temp_chain_record.max_length,
-                                                                       last_node_length);
-            }
-        } //Finished walking through chain
-        if (temp_chain_record.start_node_id == temp_chain_record.end_node_id && temp_chain_record.chain_components.back() != 0) {
-            //If this is a looping, multicomponent chain, the start/end node could end up in separate chain components
-            //despite being the same node.
-            //Since the first component will always be 0, set the first node's component to be whatever the last
-            //component was
-            temp_chain_record.chain_components[0] = temp_chain_record.chain_components.back();
-
-        }
-
-        //For a multicomponent chain, the actual minimum length will always be infinite, but since we sometimes need
-        //the length of the last component, save that here
-        temp_chain_record.min_length = !temp_chain_record.is_trivial && temp_chain_record.start_node_id == temp_chain_record.end_node_id
-                        ? temp_chain_record.prefix_sum.back()
-                        : SnarlDistanceIndex::sum(temp_chain_record.prefix_sum.back() , temp_chain_record.end_node_length);
-
-#ifdef debug_distance_indexing
-        assert(temp_chain_record.prefix_sum.size() == temp_chain_record.backward_loops.size());
-        assert(temp_chain_record.prefix_sum.size() == temp_chain_record.chain_components.size());
-#endif
-
-
-        /*Now that we've gone through all the snarls in the chain, fill in the forward loop vector
-         * by going through the chain in the backwards direction
-         */
-        temp_chain_record.forward_loops.resize(temp_chain_record.prefix_sum.size(),
-                                               std::numeric_limits<size_t>::max());
-        if (temp_chain_record.start_node_id == temp_chain_record.end_node_id && temp_chain_record.children.size() > 1) {
-
-            //If this is a looping chain, then check the first snarl for a loop
-            if (temp_chain_record.children.at(1).first == SnarlDistanceIndex::TEMP_SNARL) {
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(temp_chain_record.children.at(1).second);
-                temp_chain_record.forward_loops[temp_chain_record.forward_loops.size()-1] = temp_snarl_record.distance_start_start;
-            } 
-        }
-
-        size_t node_i = temp_chain_record.prefix_sum.size() - 2;
-        // We start at the next to last node because we need to look at this record and the next one.
-        last_node_length = 0;
-        for (int j = (int)temp_chain_record.children.size() - 1 ; j >= 0 ; j--) {
-            auto& child = temp_chain_record.children.at(j);
-            if (child.first == SnarlDistanceIndex::TEMP_SNARL){
-                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(child.second);
-                if (temp_chain_record.chain_components.at(node_i) != temp_chain_record.chain_components.at(node_i+1) &&
-                    temp_chain_record.chain_components.at(node_i+1) != 0){
-                    //If this is a new chain component, then add the loop distance from the snarl
-                    //If the component of the next node is 0, then we're still in the same component since we're going backwards
-                    temp_chain_record.forward_loops.at(node_i) = temp_snarl_record.distance_start_start;
-                } else {
-                    temp_chain_record.forward_loops.at(node_i) =
-                        std::min(SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                                    temp_chain_record.forward_loops.at(node_i+1), 
-                                    2* temp_snarl_record.min_length),
-                                    2*temp_snarl_record.end_node_length), 
-                                temp_snarl_record.distance_start_start);
-                }
-                node_i --;
-                last_node_length = 0;
-            } else {
-                if (last_node_length != 0) {
-                    SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = 
-                            temp_index.temp_node_records.at(child.second-temp_index.min_node_id);
-
-
-                    //Check if there is a loop in this node
-                    //Snarls get counted as trivial if they contain no nodes but they might still have edges
-                    size_t forward_loop = std::numeric_limits<size_t>::max();
-                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, temp_node_record.reversed_in_parent), false, [&](const handle_t next_handle) {
-                        if (graph->get_id(next_handle) == temp_node_record.node_id) {
-                            //If there is a loop going forward (relative to the chain) back to the same node
-                            forward_loop = 0;
-                        }
-                    });
-                    temp_chain_record.forward_loops.at(node_i) = std::min( forward_loop,
-                        SnarlDistanceIndex::sum(temp_chain_record.forward_loops.at(node_i+1) , 
-                                                 2*last_node_length));
-                    node_i--;
-                }
-                last_node_length = temp_index.temp_node_records.at(child.second - temp_index.min_node_id).node_length;
-            }
-        }
-
-
-        //If this is a looping chain, check if the loop distances can be improved by going around the chain
-
-        if (temp_chain_record.start_node_id == temp_chain_record.end_node_id && temp_chain_record.children.size() > 1) {
-
-
-            //Also check if the reverse loop values would be improved if we went around again
-
-            if (temp_chain_record.backward_loops.back() < temp_chain_record.backward_loops.front()) {
-                temp_chain_record.backward_loops[0] = temp_chain_record.backward_loops.back();
-                size_t node_i = 1;
-                size_t last_node_length = 0;
-                for (size_t i = 1 ; i < temp_chain_record.children.size()-1 ; i++ ) {
-                    auto& child = temp_chain_record.children.at(i);
-                    if (child.first == SnarlDistanceIndex::TEMP_SNARL) {
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(child.second);
-                        size_t new_loop_distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                                                      temp_chain_record.backward_loops.at(node_i-1), 
-                                                      2*temp_snarl_record.min_length), 
-                                                      2*temp_snarl_record.start_node_length); 
-                        if (temp_chain_record.chain_components.at(node_i)!= 0 || new_loop_distance >= temp_chain_record.backward_loops.at(node_i)) {
-                            //If this is a new chain component or it doesn't improve, stop
-                            break;
-                        } else {
-                            //otherwise record the better distance
-                            temp_chain_record.backward_loops.at(node_i) = new_loop_distance;
-
-                        }
-                        node_i++;
-                        last_node_length = 0;
-                    } else {
-                        if (last_node_length != 0) {
-                            size_t new_loop_distance = SnarlDistanceIndex::sum(temp_chain_record.backward_loops.at(node_i-1), 
-                                    2*last_node_length); 
-                            size_t old_loop_distance = temp_chain_record.backward_loops.at(node_i);
-                            temp_chain_record.backward_loops.at(node_i) = std::min(old_loop_distance,new_loop_distance);
-                            node_i++;
-                        }
-                        last_node_length = temp_index.temp_node_records.at(child.second - temp_index.min_node_id).node_length;
-                    }
-                }
-            }
-            if (temp_chain_record.forward_loops.front() < temp_chain_record.forward_loops.back()) {
-                //If this is a looping chain and looping improves the forward loops, 
-                //then we have to keep going around to update distance
-
-                temp_chain_record.forward_loops.back() = temp_chain_record.forward_loops.front();
-                size_t last_node_length = 0;
-                node_i = temp_chain_record.prefix_sum.size() - 2;
-                for (int j = (int)temp_chain_record.children.size() - 1 ; j >= 0 ; j--) {
-                    auto& child = temp_chain_record.children.at(j);
-                    if (child.first == SnarlDistanceIndex::TEMP_SNARL){
-                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(child.second);
-                        size_t new_distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                                                temp_chain_record.forward_loops.at(node_i+1), 
-                                                2* temp_snarl_record.min_length),
-                                                2*temp_snarl_record.end_node_length);
-                        if (temp_chain_record.chain_components.at(node_i) != temp_chain_record.chain_components.at(node_i+1) ||
-                            new_distance >= temp_chain_record.forward_loops.at(node_i)){
-                            //If this is a new component or the distance doesn't improve, stop looking
-                            break;
-                        } else {
-                            //otherwise, update the distance
-                            temp_chain_record.forward_loops.at(node_i) = new_distance;
-                        }
-                        node_i --;
-                        last_node_length =0;
-                    } else {
-                        if (last_node_length != 0) {
-                            size_t new_distance = SnarlDistanceIndex::sum(temp_chain_record.forward_loops.at(node_i+1) , 2* last_node_length);
-                            size_t old_distance = temp_chain_record.forward_loops.at(node_i);
-                            temp_chain_record.forward_loops.at(node_i) = std::min(old_distance, new_distance);
-                            node_i--;
-                        }
-                        last_node_length = temp_index.temp_node_records.at(child.second - temp_index.min_node_id).node_length;
-                    }
-                } 
-            }
-        }
-
-        temp_index.max_distance = std::max(temp_index.max_distance, temp_chain_record.max_prefix_sum.back());
-        temp_index.max_distance = temp_chain_record.forward_loops.back() == std::numeric_limits<size_t>::max() ? temp_index.max_distance : std::max(temp_index.max_distance, temp_chain_record.forward_loops.back());
-        temp_index.max_distance = temp_chain_record.backward_loops.front() == std::numeric_limits<size_t>::max() ? temp_index.max_distance : std::max(temp_index.max_distance, temp_chain_record.backward_loops.front());
-        assert(temp_index.max_distance <= 2742664019);
-
-    }
-
-#ifdef debug_distance_indexing
-    cerr << "Filling in the distances in root snarls and distances along chains" << endl;
-#endif
-    for (pair<SnarlDistanceIndex::temp_record_t, size_t>& component_index : temp_index.components) {
-        if (component_index.first == SnarlDistanceIndex::TEMP_SNARL) {
-            SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(component_index.second);
-            populate_snarl_index(temp_index, component_index, size_limit, only_top_level_chain_distances, graph);
-            temp_snarl_record.min_length = std::numeric_limits<size_t>::max();
-        }
-    }
-    temp_index.root_structure_count = temp_index.components.size();
-#ifdef debug_distance_indexing
-    assert(temp_index.components.size() == temp_index.root_structure_count);
-    cerr << "Finished temp index with " << temp_index.root_structure_count << " connected components" << endl;
-#endif
-    return temp_index;
-}
-
-
-
-/*Fill in the snarl index.
- * The index will already know its boundaries and everything knows their relationships in the
- * snarl tree. This needs to fill in the distances and the ranks of children in the snarl
- * The rank of a child is arbitrary, except that the start node will always be 0 and the end node
- * will always be the node count+1 (since node count doesn't count the boundary nodes)
- */
-void populate_snarl_index(
-                SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
-                pair<SnarlDistanceIndex::temp_record_t, size_t> snarl_index, size_t size_limit,
-                bool only_top_level_chain_distances, const HandleGraph* graph) {
-#ifdef debug_distance_indexing
-    cerr << "Getting the distances for snarl " << temp_index.structure_start_end_as_string(snarl_index) << endl;
-    assert(snarl_index.first == SnarlDistanceIndex::TEMP_SNARL);
-#endif
-    SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.at(snarl_index.second);
-    temp_snarl_record.is_simple=true;
-
-
-
-
-    /*Helper function to find the ancestor of a node that is a child of this snarl */
-    auto get_ancestor_of_node = [&](pair<SnarlDistanceIndex::temp_record_t, size_t> curr_index,
-                                    pair<SnarlDistanceIndex::temp_record_t, size_t> ancestor_snarl_index) {
-
-        //This is a child that isn't a node, so it must be a chain
-        if (curr_index.second == temp_snarl_record.start_node_id || 
-            curr_index.second == temp_snarl_record.end_node_id) {
-            return curr_index;
-        }
-
-        //Otherwise, walk up until we hit the current snarl
-        pair<SnarlDistanceIndex::temp_record_t, size_t> parent_index = temp_index.temp_node_records.at(curr_index.second-temp_index.min_node_id).parent;
-        while (parent_index != ancestor_snarl_index) {
-            curr_index=parent_index;
-            parent_index = parent_index.first == SnarlDistanceIndex::TEMP_SNARL ? temp_index.temp_snarl_records.at(parent_index.second).parent
-                                                            : temp_index.temp_chain_records.at(parent_index.second).parent;
-#ifdef debug_distance_indexing
-            assert(parent_index.first != SnarlDistanceIndex::TEMP_ROOT); 
-#endif
-        }
-        
-        return curr_index;
-    };
-
-    // TODO: Copying the list
-    vector<pair<SnarlDistanceIndex::temp_record_t, size_t>> all_children = temp_snarl_record.children;
-
-    // Identify tips
-    for (const auto& child : all_children) {
-        // Check if this node is a tip
-        if (child.first != SnarlDistanceIndex::TEMP_NODE 
-            || (child.second != temp_snarl_record.start_node_id 
-                && child.second != temp_snarl_record.end_node_id)) {
-            bool is_node = (child.first == SnarlDistanceIndex::TEMP_NODE);
-            // Set up to check edges leaving the end of the chain/node
-            nid_t node_id = is_node ? child.second 
-                                    : temp_index.temp_chain_records.at(child.second).end_node_id;
-            size_t rank = is_node ? temp_index.temp_node_records.at(child.second - temp_index.min_node_id).rank_in_parent 
-                                  : temp_index.temp_chain_records.at(child.second).rank_in_parent;
-            bool is_reverse = is_node ? false
-                                      : temp_index.temp_chain_records.at(child.second).end_node_rev;
-            // Convert to an index in all_children
-            rank -= 2;
-            
-            bool has_edges = false;
-            graph->follow_edges(graph->get_handle(node_id, is_reverse), false, [&](const handle_t next_handle) {
-                has_edges = true;
-            });
-            if (!has_edges) {
-                temp_index.temp_node_records.at(node_id - temp_index.min_node_id).is_tip = true;
-                temp_snarl_record.tippy_child_ranks.emplace(rank, false);
-                // It is a tip so this isn't simple snarl
-                temp_snarl_record.is_simple = false;
-            }
-            // Repeat for the other side of the chain/node
-            node_id = is_node ? child.second 
-                              : temp_index.temp_chain_records.at(child.second).start_node_id;
-            is_reverse = is_node ? true
-                                 : !temp_index.temp_chain_records.at(child.second).start_node_rev;
-            has_edges = false;
-            graph->follow_edges(graph->get_handle(node_id, is_reverse), false, [&](const handle_t next_handle) {
-                has_edges = true;
-            });
-            if (!has_edges) {
-                temp_index.temp_node_records.at(node_id - temp_index.min_node_id).is_tip = true;
-                temp_snarl_record.tippy_child_ranks.emplace(rank, true);
-                // It is a tip so this isn't simple snarl
-                temp_snarl_record.is_simple = false;
-            }
-        }
-    }
-
-    /*
-     * Do a topological sort of the children and re-assign ranks based on the sort
-     * TODO: For non-DAGs, this sort will end up arbitrary.
-     *       That doesn't matter right now since the only consumer of ranks
-     *       (ziptrees) expects arbitrary ranks, though.
-     */
-     if (!temp_snarl_record.is_root_snarl) {
-        // Always start the topological sort at the start
-        handle_t topological_sort_start = graph->get_handle(temp_snarl_record.start_node_id,
-                                                            temp_snarl_record.start_node_rev);
-
-        // New sort order. Each value is an index into all_children, which 
-        // matches the ranks(-2) of the children 
-        vector<size_t> topological_sort_order;
-        topological_sort_order.reserve(all_children.size());
-
-        // Which ranks have already been sorted?
-        unordered_set<size_t> visited_ranks;
-        visited_ranks.reserve(all_children.size());
-
-        // All nodes that have no incoming edges
-        vector<pair<size_t, bool>> source_nodes;
-
-        // Add all sources. This will start out as the start node and any tips
-        for (const auto& tip : temp_snarl_record.tippy_child_ranks) {
-            source_nodes.emplace_back(tip.first, !tip.second);
-        }
-
-        // Start node dummy rank is max(). This is traversed first
-        source_nodes.emplace_back(std::numeric_limits<size_t>::max(), false);
-
-        // We'll be done sorting when everything is in the sorted vector
-        while (!source_nodes.empty()) {
-            // Pick a child with no incoming edges
-            pair<size_t, bool> current_child_index = source_nodes.back();
-            source_nodes.pop_back();
-
-            // Visit it
-            if (visited_ranks.count(current_child_index.first) != 0) {
-                // We tried to revisit a source node, so this must be a loop
-                // (we got turned around somewhere is the only way)
-                // Thus it is safe to abort and allow random ranks
-                break;
-            }
-            if (current_child_index.first != std::numeric_limits<size_t>::max()) {
-                topological_sort_order.emplace_back(current_child_index.first);
-            }
-            visited_ranks.emplace(current_child_index.first);
-
-            // Get the graph handle for that child, pointing out from the end of the chain
-            handle_t current_graph_handle;
-            if (current_child_index.first == std::numeric_limits<size_t>::max()) {
-                // If the current child is the start bound, then get the start node pointing in 
-                current_graph_handle = topological_sort_start;
-            } else {
-                pair<SnarlDistanceIndex::temp_record_t, size_t> current_index = all_children[current_child_index.first];
-                if (current_index.first == SnarlDistanceIndex::TEMP_NODE) {
-                    // If the current child is a node, then get the node pointing in the correct direction
-                    current_graph_handle = graph->get_handle(current_index.second, current_child_index.second);
-                } else if (current_child_index.second) {
-                    // If the current child is a chain, and we're traversing the chain backwards
-                    current_graph_handle = graph->get_handle(temp_index.temp_chain_records[current_index.second].start_node_id, 
-                                                            !temp_index.temp_chain_records[current_index.second].start_node_rev);
-                } else {
-                    // Otherwise, the current child is a chain and we're traversing the chain forwards
-                    current_graph_handle = graph->get_handle(temp_index.temp_chain_records[current_index.second].end_node_id, 
-                                                             temp_index.temp_chain_records[current_index.second].end_node_rev);
-                }
-            }
-                 
-            // Try all edges leaving this side
-            graph->follow_edges(current_graph_handle, false, [&](const handle_t next_handle) {
-#ifdef debug_distance_indexing
-                cerr << "Following forward edges from " << graph->get_id(current_graph_handle) 
-                     << " to " << graph->get_id(next_handle) << endl;
-#endif
-                if (graph->get_id(next_handle) == temp_snarl_record.start_node_id ||
-                    graph->get_id(next_handle) == temp_snarl_record.end_node_id) {
-                    // If this is trying to leave the snarl, skip it
-                    return true;
-                }
-                // Is next_handle a new source? Any unvisited predecessors?
-                pair<SnarlDistanceIndex::temp_record_t, size_t> next_index = 
-                    get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(next_handle)), snarl_index); 
-                bool next_is_node = next_index.first == SnarlDistanceIndex::TEMP_NODE;
-                size_t next_rank = next_is_node
-                            ? temp_index.temp_node_records.at(next_index.second - temp_index.min_node_id).rank_in_parent  
-                            : temp_index.temp_chain_records[next_index.second].rank_in_parent;
-                // Subtract 2 to get the index from the rank
-                assert(next_rank >= 2);
-                next_rank -= 2;
-                assert(all_children[next_rank] == next_index);
-                bool next_rev = (next_is_node || temp_index.temp_chain_records[next_index.second].is_trivial)
-                            ? graph->get_is_reverse(next_handle) 
-                            : graph->get_id(next_handle) == temp_index.temp_chain_records[next_index.second].end_node_id;
-                if (visited_ranks.count(next_rank) != 0) {
-                    // If this is a loop, abort
-                    return true;
-                }
-
-                // Get the handle from the child represented by next_handle going the other way
-                handle_t reverse_handle = next_index.first == SnarlDistanceIndex::TEMP_NODE ? 
-                            graph->get_handle(next_index.second, !next_rev) :
-                            (next_rev ? graph->get_handle(temp_index.temp_chain_records[next_index.second].end_node_id, 
-                                                          temp_index.temp_chain_records[next_index.second].end_node_rev)
-                                      : graph->get_handle(temp_index.temp_chain_records[next_index.second].start_node_id, 
-                                                         !temp_index.temp_chain_records[next_index.second].start_node_rev));
-
-                // Does this have no unseen incoming edges? Check as we go through incoming edges
-                bool is_source = true;
-
-                // Does this have no unseen incoming edges?
-                graph->follow_edges(reverse_handle, false, [&](const handle_t incoming_handle) {
-#ifdef debug_distance_indexing
-                cerr << "Getting backwards edge to " << graph->get_id(incoming_handle) << endl;
-#endif
-                    if (graph->get_id(incoming_handle) == temp_snarl_record.start_node_id ||
-                        graph->get_id(incoming_handle) == temp_snarl_record.end_node_id) {
-                        // If this is trying to leave the snarl, that is OK
-                        return true;
-                    }
-                    // The index of the snarl's child that next_handle represents
-                    pair<SnarlDistanceIndex::temp_record_t, size_t> incoming_index = 
-                        get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(incoming_handle)), snarl_index); 
-                    bool incoming_is_node = incoming_index.first == SnarlDistanceIndex::TEMP_NODE;
-                    size_t incoming_rank = incoming_is_node
-                                ? temp_index.temp_node_records.at(incoming_index.second - temp_index.min_node_id).rank_in_parent  
-                                : temp_index.temp_chain_records[incoming_index.second].rank_in_parent;
-
-                    bool incoming_rev = incoming_is_node || temp_index.temp_chain_records[incoming_index.second].is_trivial 
-                                ? graph->get_is_reverse(incoming_handle) 
-                                : graph->get_id(incoming_handle) == temp_index.temp_chain_records[incoming_index.second].end_node_id;
-                    // Subtract 2 to get the index from the rank
-                    assert(incoming_rank >= 2);
-                    incoming_rank -= 2;
-
-                    // This predecessor is unvisited
-                    if (visited_ranks.count(incoming_rank) == 0) {
-                        is_source = false;
-                    }
-                    // Keep going
-                    return true;
-                });
-                if (is_source) {
-                    source_nodes.emplace_back(next_rank, next_rev);
-                }
-                return true;
-            });
-        }
-
-        // If we have leftover chains, this is a non-DAG and ranks are arbitrary
-        // So we will add any leftover ranks to the topological order
-        vector<bool> check_ranks (all_children.size(), false);
-        for (size_t x : topological_sort_order) {
-            check_ranks[x] = true;
-        }
-        for (size_t i = 0 ; i < check_ranks.size() ; i++) {
-            if (!check_ranks[i]) {
-                topological_sort_order.emplace_back(i);
-            }
-        }
-        assert(topological_sort_order.size() == all_children.size());
-
-
-        // We've finished doing to topological sort, so update every child's rank to be the new order
-        auto old_tippy_ranks = temp_snarl_record.tippy_child_ranks;
-        temp_snarl_record.tippy_child_ranks.clear();
-        for (size_t new_rank = 0 ; new_rank < topological_sort_order.size() ; new_rank++) {
-            size_t old_rank = topological_sort_order[new_rank];
-            if (all_children[old_rank].first == SnarlDistanceIndex::TEMP_NODE) {
-                temp_index.temp_node_records.at(all_children[old_rank].second-temp_index.min_node_id).rank_in_parent = new_rank+2;
-            } else {
-                temp_index.temp_chain_records[all_children[old_rank].second].rank_in_parent = new_rank+2;
-            }
-            const auto& old_is_tip = old_tippy_ranks.find(old_rank);
-            if (old_is_tip != old_tippy_ranks.end()) {
-                temp_snarl_record.tippy_child_ranks.emplace(new_rank, old_is_tip->second);
-            }
-        }
-     }
-
-    /*
-     * Now go through each of the children and add distances from that child to everything reachable from it
-     * Start a dijkstra traversal from each node side in the snarl and record all distances
-     */
-
-
-    if (size_limit != 0 && !only_top_level_chain_distances) { 
-        //If we are saving distances
-        //Reserve enough space to store all possible distances
-        temp_snarl_record.distances.reserve( temp_snarl_record.node_count > size_limit
-                ? temp_snarl_record.node_count * 2
-                : temp_snarl_record.node_count * temp_snarl_record.node_count);
-    } else {
-        temp_snarl_record.include_distances = false;
-    }
-
-    if (size_limit != 0 && temp_snarl_record.node_count > size_limit) {
-        temp_index.most_oversized_snarl_size = std::max(temp_index.most_oversized_snarl_size, temp_snarl_record.node_count);
-        temp_index.use_oversized_snarls = true;
-    }
-
-    //Add the start and end nodes to the list of children so that we include them in the traversal 
-    if (!temp_snarl_record.is_root_snarl) {
-        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.start_node_id);
-        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.end_node_id);
-    }
-
-    while (!all_children.empty()) {
-        const pair<SnarlDistanceIndex::temp_record_t, size_t> start_index = std::move(all_children.back());
-        all_children.pop_back();
-
-        bool is_internal_node = false;
-
-        if ((start_index.first == SnarlDistanceIndex::TEMP_NODE 
-             && start_index.second != temp_snarl_record.start_node_id 
-             && start_index.second != temp_snarl_record.end_node_id) 
-            || 
-            (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && temp_index.temp_chain_records.at(start_index.second).is_trivial)) {
-            // This is an internal node
-            is_internal_node = true;
-        } else if (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && !temp_index.temp_chain_records.at(start_index.second).is_trivial) {
-            // If this is an internal chain, then it isn't a simple snarl
-            temp_snarl_record.is_simple=false;
-        }
-
-        bool start_is_tip = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                      ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).is_tip 
-                      : temp_index.temp_chain_records.at(start_index.second).is_tip;
-
-        size_t start_rank = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).rank_in_parent
-                : temp_index.temp_chain_records.at(start_index.second).rank_in_parent;
-
-
-        if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.start_node_id) {
-            start_rank = 0;
-        } else if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.end_node_id) {
-            start_rank = 1;
-        } //TODO:
-          //else {
-          //  assert(start_rank != 0 && start_rank != 1);
-          //}
-
-        if ( (temp_snarl_record.node_count > size_limit || size_limit == 0 || only_top_level_chain_distances) && (temp_snarl_record.is_root_snarl || (!start_is_tip &&
-             start_rank != 0 && start_rank != 1))) {
-            //If we don't care about internal distances, and we also are not at a boundary or tip
-            //TODO: Why do we care about tips specifically?
-            continue;
-        }
-
-        //Start from either direction for all nodes, but only going in for start and end
-        vector<bool> directions;
-        if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.start_node_id) {
-            directions.emplace_back(temp_snarl_record.start_node_rev);
-        } else if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.end_node_id){
-            directions.emplace_back(!temp_snarl_record.end_node_rev);
-        } else {
-            directions.emplace_back(true);
-            directions.emplace_back(false);
-        }
-        for (bool start_rev : directions) {
-            //Start a dijkstra traversal from start_index going in the direction indicated by start_rev
-            //Record the distances to each node (child of the snarl) found
-            size_t reachable_node_count = 0; //How many nodes can we reach from this node side?
-
-#ifdef debug_distance_indexing
-            cerr << "  Starting from child " << temp_index.structure_start_end_as_string(start_index)
-                 << " going " << (start_rev ? "rev" : "fd") << endl;
-#endif
-
-            //Define a NetgraphNode as the value for the priority queue:
-            // <distance, <<type of node, index into temp_node/chain_records>, direction>
-            using NetgraphNode = pair<size_t, pair<pair<SnarlDistanceIndex::temp_record_t, size_t>, bool>>; 
-            auto cmp = [] (const NetgraphNode a, const NetgraphNode b) {
-                return a.first > b.first;
-            };
-
-            //The priority queue of the next nodes to visit, ordered by the distance
-            std::priority_queue<NetgraphNode, vector<NetgraphNode>, decltype(cmp)> queue(cmp);
-            //The nodes we've already visited
-            unordered_set<pair<pair<SnarlDistanceIndex::temp_record_t, size_t>, bool>> visited_nodes;
-            visited_nodes.reserve(temp_snarl_record.node_count * 2);
-
-            //Start from the current start node
-            queue.push(make_pair(0, make_pair(start_index, start_rev)));
-
-            while (!queue.empty()) {
-
-                //Get the current node from the queue and pop it out of the queue
-                size_t current_distance = queue.top().first;
-                pair<SnarlDistanceIndex::temp_record_t, size_t> current_index = queue.top().second.first;
-                bool current_rev = queue.top().second.second;
-                if (visited_nodes.count(queue.top().second)) {
-                    queue.pop();
-                    continue;
-                }
-                visited_nodes.emplace(queue.top().second);
-                queue.pop();
-
-
-                //The handle that we need to follow to get the next reachable nodes
-                //If the current node is a node, then its just the node. Otherwise, it's the 
-                //opposite side of the child chain
-                handle_t current_end_handle = current_index.first == SnarlDistanceIndex::TEMP_NODE ? 
-                        graph->get_handle(current_index.second, current_rev) :
-                        (current_rev ? graph->get_handle(temp_index.temp_chain_records[current_index.second].start_node_id, 
-                                                        !temp_index.temp_chain_records[current_index.second].start_node_rev) 
-                                  : graph->get_handle(temp_index.temp_chain_records[current_index.second].end_node_id, 
-                                                      temp_index.temp_chain_records[current_index.second].end_node_rev));
-
-#ifdef debug_distance_indexing
-                        cerr << "    at child " << temp_index.structure_start_end_as_string(current_index) << " going "
-                             << (current_rev ? "rev" : "fd") << " at actual node " << graph->get_id(current_end_handle) 
-                             << (graph->get_is_reverse(current_end_handle) ? "rev" : "fd") << endl;
-#endif
-                graph->follow_edges(current_end_handle, false, [&](const handle_t next_handle) {
-                    if (graph->get_id(current_end_handle) == graph->get_id(next_handle)){
-                        //If this loops onto the same node side then this isn't a simple snarl
-                        temp_snarl_record.is_simple = false;
-                    } else if ((current_index.first == SnarlDistanceIndex::TEMP_NODE ? current_index.second 
-                                                                                     : (current_rev ? temp_index.temp_chain_records[current_index.second].end_node_id
-                                                                                                    : temp_index.temp_chain_records[current_index.second].start_node_id))
-                                    == graph->get_id(next_handle)){
-                        //If this loops to the other end of the chain then this isn't a simple snarl
-                        temp_snarl_record.is_simple = false;
-                    } else if (!temp_snarl_record.is_root_snarl && start_rank == 0 && 
-                               current_index != start_index && graph->get_id(next_handle) != temp_snarl_record.end_node_id) {
-                        //If the starting point of this traversal was the start of the snarl, the current starting point is not the start node,
-                        //and we found another child, then this is not a simple snarl
-                        temp_snarl_record.is_simple = false;
-                    } else if (!temp_snarl_record.is_root_snarl && start_rank == 1 && 
-                               current_index != start_index && graph->get_id(next_handle) != temp_snarl_record.start_node_id) {
-                        //If the starting point of this traversal was the end of the snarl, the current starting point is not the end node,
-                        //and we found another child, then this is not a simple snarl
-                        temp_snarl_record.is_simple = false;
-                    }
-
-                    reachable_node_count++;
-                    //At each of the nodes reachable from the current one, fill in the distance from the start
-                    //node to the next node (current_distance). If this handle isn't leaving the snarl,
-                    //add the next nodes along with the distance to the end of the next node
-                    auto& node_record = temp_index.temp_node_records.at(graph->get_id(next_handle)-temp_index.min_node_id);
-
-                    //The index of the snarl's child that next_handle represents
-                    pair<SnarlDistanceIndex::temp_record_t, size_t> next_index = 
-                        get_ancestor_of_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(next_handle)), snarl_index); 
-
-                    bool next_is_tip = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                              ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).is_tip 
-                              : temp_index.temp_chain_records.at(start_index.second).is_tip;
-
-                    //The rank and orientation of next in the snarl
-                    size_t next_rank = next_index.first == SnarlDistanceIndex::TEMP_NODE 
-                            ? node_record.rank_in_parent
-                            : temp_index.temp_chain_records[next_index.second].rank_in_parent;
-                    if (next_index.first == SnarlDistanceIndex::TEMP_NODE && next_index.second == temp_snarl_record.start_node_id) {
-                        next_rank = 0;
-                    } else if (next_index.first == SnarlDistanceIndex::TEMP_NODE && next_index.second == temp_snarl_record.end_node_id) {
-                        next_rank = 1;
-                    } else {
-                        //If the next thing wasn't a boundary node and this was an internal node, then it isn't a simple snarl
-                        if (is_internal_node) {
-                            temp_snarl_record.is_simple = false;
-                        }
-                    }//TODO: This won't be true of root snarls 
-                      //else {
-                      //  assert(next_rank != 0 && next_rank != 1);
-                      //}
-                    bool next_rev = next_index.first == SnarlDistanceIndex::TEMP_NODE || temp_index.temp_chain_records[next_index.second].is_trivial 
-                            ? graph->get_is_reverse(next_handle) 
-                            : graph->get_id(next_handle) == temp_index.temp_chain_records[next_index.second].end_node_id;
-                    
-                    /**Record the distance **/
-                    bool start_is_boundary = !temp_snarl_record.is_root_snarl && (start_rank == 0 || start_rank == 1);
-                    bool next_is_boundary = !temp_snarl_record.is_root_snarl && (next_rank == 0 || next_rank == 1);
-
-                    if (size_limit != 0 &&
-                        (temp_snarl_record.node_count <= size_limit || start_is_boundary || next_is_boundary)) {
-                        //If the snarl is too big, then we don't record distances between internal nodes
-                        //If we are looking at all distances or we are looking at boundaries
-                        bool added_new_distance = false;
-
-                        //Set the distance
-                        pair<size_t, bool> start = start_is_boundary 
-                            ? make_pair(start_rank, false) : make_pair(start_rank, !start_rev);
-                        pair<size_t, bool> next = next_is_boundary 
-                            ? make_pair(next_rank, false) : make_pair(next_rank, next_rev);
-                        if (start_is_boundary && next_is_boundary) {
-                            //If it is between bounds of the snarl, then the snarl stores it
-                            if (start_rank == 0 && next_rank == 0 && 
-                                temp_snarl_record.distance_start_start == std::numeric_limits<size_t>::max()) {
-                                temp_snarl_record.distance_start_start = current_distance;
-                                added_new_distance = true;
-                            } else if (start_rank == 1 && next_rank == 1 && 
-                                       temp_snarl_record.distance_end_end == std::numeric_limits<size_t>::max()) {
-                                temp_snarl_record.distance_end_end = current_distance;
-                                added_new_distance = true;
-                            } else if (((start_rank == 0 && next_rank == 1) || (start_rank == 1 && next_rank == 0))
-                                        && temp_snarl_record.min_length == std::numeric_limits<size_t>::max()){
-                                temp_snarl_record.min_length = current_distance;
-                                added_new_distance = true;
-
-                            }
-                        } else if (start_is_boundary){
-                            //If start is a boundary node
-                            if (next_index.first == SnarlDistanceIndex::TEMP_NODE) {
-                                //Next is a node
-                                auto& temp_node_record = temp_index.temp_node_records.at(next_index.second-temp_index.min_node_id);
-                                if (start_rank == 0 && !next_rev &&
-                                        temp_node_record.distance_left_start == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_left_start = current_distance;
-                                    added_new_distance = true;
-                                } else if (start_rank == 0 && next_rev &&
-                                        temp_node_record.distance_right_start == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_right_start = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && !next_rev &&
-                                        temp_node_record.distance_left_end == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_left_end = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && next_rev &&
-                                        temp_node_record.distance_right_end == std::numeric_limits<size_t>::max()) {
-                                    temp_node_record.distance_right_end = current_distance;
-                                    added_new_distance = true; 
-                                }
-                            }  else {
-                                //Next is a chain
-                                auto& temp_chain_record = temp_index.temp_chain_records.at(next_index.second);
-                                if (start_rank == 0 && !next_rev &&
-                                        temp_chain_record.distance_left_start == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_left_start = current_distance;
-                                    added_new_distance = true;
-                                } else if (start_rank == 0 && next_rev &&
-                                        temp_chain_record.distance_right_start == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_right_start = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && !next_rev &&
-                                        temp_chain_record.distance_left_end == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_left_end = current_distance;
-                                    added_new_distance = true; 
-                                } else if (start_rank == 1 && next_rev &&
-                                        temp_chain_record.distance_right_end == std::numeric_limits<size_t>::max()) {
-                                    temp_chain_record.distance_right_end = current_distance;
-                                    added_new_distance = true; 
-                                }
-                            }
-                        } else if (!next_is_boundary && !temp_snarl_record.distances.count(make_pair(start, next))) {
-                            //Otherwise the snarl stores it in its distance
-                            //If the distance isn't from an internal node to a bound and we haven't stored the distance yet
-
-                            temp_snarl_record.distances[make_pair(start, next)] = current_distance;
-                            added_new_distance = true;
-#ifdef debug_distance_indexing
-                            cerr << "           Adding distance between ranks " << start.first << " " << start.second << " and " << next.first << " " << next.second << ": " << current_distance << endl;
-#endif
-                        }
-                        if (added_new_distance) {
-                            temp_snarl_record.max_distance = std::max(temp_snarl_record.max_distance, current_distance);
-                        }
-                    }
-
-
-                    /**Add the next node to the priority queue**/
-
-                    if (visited_nodes.count(make_pair(next_index, next_rev)) == 0 &&
-                        graph->get_id(next_handle) != temp_snarl_record.start_node_id &&
-                        graph->get_id(next_handle) != temp_snarl_record.end_node_id
-                        ) {
-                        //If this isn't leaving the snarl,
-                        //then add the next node to the queue, along with the distance to traverse it
-                        size_t next_node_length = next_index.first == SnarlDistanceIndex::TEMP_NODE ? graph->get_length(next_handle) :
-                                        temp_index.temp_chain_records[next_index.second].min_length;
-                        if (next_index.first == SnarlDistanceIndex::TEMP_CHAIN &&
-                            temp_index.temp_chain_records[next_index.second].chain_components.back() != 0) {
-                            //If there are multiple components, then the chain is not start-end reachable so its length
-                            //is actually infinite
-                            next_node_length = std::numeric_limits<size_t>::max();
-                        }
-                        if (next_node_length != std::numeric_limits<size_t>::max()) {
-                            queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, next_node_length), 
-                                           make_pair(next_index, next_rev)));
-                        }
-                    }
-                    if (next_index.first == SnarlDistanceIndex::TEMP_CHAIN) {
-                        size_t loop_distance = next_rev ? temp_index.temp_chain_records[next_index.second].backward_loops.back() 
-                                                         : temp_index.temp_chain_records[next_index.second].forward_loops.front();
-                        if (loop_distance != std::numeric_limits<size_t>::max() &&
-                            visited_nodes.count(make_pair(next_index, !next_rev)) == 0 &&
-                            graph->get_id(next_handle) != temp_snarl_record.start_node_id &&
-                            graph->get_id(next_handle) != temp_snarl_record.end_node_id
-                            ) {
-                            //If the next node can loop back on itself, then add the next node in the opposite direction
-                            size_t next_node_len = loop_distance + 2 * graph->get_length(next_handle);
-                            queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, next_node_len), 
-                                           make_pair(next_index, !next_rev)));
-                        }
-                    }
-#ifdef debug_distance_indexing
-                    cerr << "        reached child " << temp_index.structure_start_end_as_string(next_index) << "going " 
-                         << (next_rev ? "rev" : "fd") << " with distance " << current_distance << " for ranks " << start_rank << " " << next_rank << endl;
-#endif
-                });
-            }
-            if (is_internal_node && reachable_node_count != 1) {
-                //If this is an internal node, then it must have only one edge for it to be a simple snarl
-                temp_snarl_record.is_simple = false;
-            }
-        }
-
-        /** Check the minimum length of the snarl passing through this node **/
-        if (start_rank != 0 && start_rank != 1) {
-
-            size_t child_max_length = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).node_length
-                : temp_index.temp_chain_records.at(start_index.second).max_length;
-            //The distance through the whole snarl traversing this node forwards
-            //(This might actually be traversing it backwards but it doesn't really matter)
-
-            size_t dist_start_left = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_left_start
-                : temp_index.temp_chain_records.at(start_index.second).distance_left_start;
-            size_t dist_end_right = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_right_end
-                : temp_index.temp_chain_records.at(start_index.second).distance_right_end;
-            size_t dist_start_right =  start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_right_start
-                : temp_index.temp_chain_records.at(start_index.second).distance_right_start;
-            size_t dist_end_left = start_index.first == SnarlDistanceIndex::TEMP_NODE 
-                ? temp_index.temp_node_records.at(start_index.second-temp_index.min_node_id).distance_left_end
-                : temp_index.temp_chain_records.at(start_index.second).distance_left_end;
-
-            size_t snarl_length_fd = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                    dist_start_left, dist_end_right),child_max_length);
-            //The same thing traversing this node backwards
-            size_t snarl_length_rev = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
-                    dist_start_right, dist_end_left), child_max_length);
-            //The max that isn't infinite
-            size_t max_length = 
-                snarl_length_rev == std::numeric_limits<size_t>::max() 
-                ? snarl_length_fd 
-                : (snarl_length_fd == std::numeric_limits<size_t>::max() 
-                        ? snarl_length_rev 
-                        : std::max(snarl_length_rev, snarl_length_fd));
-            if (max_length != std::numeric_limits<size_t>::max()) {
-                temp_snarl_record.max_length = std::max(temp_snarl_record.max_length, max_length);
-            }
-            if ( temp_snarl_record.is_simple && 
-                ! ((dist_start_left == 0 && dist_end_right == 0 && dist_end_left == std::numeric_limits<size_t>::max() && dist_start_right == std::numeric_limits<size_t>::max() ) || 
-                   (dist_start_left == std::numeric_limits<size_t>::max() && dist_end_right == std::numeric_limits<size_t>::max() && dist_end_left == 0 && dist_start_right == 0 ))){
-                //If the snarl is simple, double check that this node is actually simple: that it can only be traversed going
-                //across the nsarl
-                temp_snarl_record.is_simple = false;
-            }
-        }
-    }
-
-
-    //If this is a simple snarl (one with only single nodes that connect to the start and end nodes), then
-    // we want to remember if the child nodes are reversed 
-    if (temp_snarl_record.is_simple) {
-        for (size_t i = 0 ; i < temp_snarl_record.node_count ; i++) {
-            //Get the index of the child
-            const pair<SnarlDistanceIndex::temp_record_t, size_t>& child_index = temp_snarl_record.children[i];
-            //Which is a node
-#ifdef debug_distance_indexing
-            assert(child_index.first == SnarlDistanceIndex::TEMP_NODE);
-#endif
-
-            //And get the record
-            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record =
-                 temp_index.temp_node_records[child_index.second-temp_index.min_node_id];
-            size_t rank =temp_node_record.rank_in_parent;
-
-            
-
-            //Set the orientation of this node in the simple snarl
-            temp_node_record.reversed_in_parent = temp_node_record.distance_left_start == std::numeric_limits<size_t>::max();
-
-        }
-    }
-
-    //Now that the distances are filled in, predict the size of the snarl in the index
-    temp_index.max_index_size += temp_snarl_record.get_max_record_length();
-    if (temp_snarl_record.is_simple) {
-        temp_index.max_index_size -= (temp_snarl_record.children.size() * SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length());
-    }
-
-    // For simple snarl records, need  11 + 11 + number of bits for the number of children
-    temp_index.max_bits = std::max(temp_index.max_bits, 22 + SnarlDistanceIndex::bit_width(temp_snarl_record.children.size())); 
-}
-
-
-//Given an alignment to a graph and a range, find the set of nodes in the
-//graph for which the minimum distance from the position to any position
-//in the node is within the given distance range
-//If look_forward is true, then start from the start of the path forward,
-//otherwise start from the end going backward
-void subgraph_in_distance_range(const SnarlDistanceIndex& distance_index, const Path& path, const HandleGraph* super_graph, size_t min_distance,
-                                        size_t max_distance, std::unordered_set<nid_t>& subgraph, bool look_forward){
-
-    //The position we're starting from - either the start or end of the path
-    pos_t start_pos;
-    size_t node_len;
-    if (look_forward ){
-        start_pos = initial_position(path);
-        node_len = super_graph->get_length(super_graph->get_handle(get_id(start_pos)));
+size_t minimum_nontrivial_distance(const SnarlDistanceIndex &distance_index,
+                                   pos_t pos1, pos_t pos2, size_t pos2_length,
+                                   const HandleGraph *graph) {
+  bool shifted = false;
+  if (pos1 == pos2) {
+    if (pos2_length == std::numeric_limits<size_t>::max()) {
+      // If we don't know the length, we can get it from the graph
+      pos2_length = distance_index.minimum_length(
+          distance_index.get_node_net_handle(id(pos2)));
+    }
+    // Must shift one position to avoid self-distance of 0
+    if (offset(pos1) == pos2_length) {
+      // Shift ending pos backward (not safe to shift forward)
+      get_offset(pos2)--;
     } else {
-        start_pos = final_position(path);
-        node_len = super_graph->get_length(super_graph->get_handle(get_id(start_pos)));
-        start_pos = reverse_base_pos(start_pos, node_len);
-    }
-    pair<nid_t, bool> traversal_start = std::make_pair(get_id(start_pos), get_is_rev(start_pos));
-
-#ifdef debug_subgraph
-cerr << endl << "Find subgraph in distance range " << min_distance << " to " << max_distance << endl;
-cerr << "Start positon: "<< start_pos << endl;
-#endif
-    //The distance from the position to the ends of the current node(/snarl/chain)
-    size_t current_distance_left = is_rev(start_pos) ? node_len - get_offset(start_pos) : std::numeric_limits<size_t>::max() ;
-    size_t current_distance_right = is_rev(start_pos) ? std::numeric_limits<size_t>::max() : node_len - get_offset(start_pos) ;
-
-    //Graph node of the start and end of the current node(/snarl/chain) pointing out
-    net_handle_t current_net = distance_index.get_node_net_handle(get_id(start_pos));
-    net_handle_t parent = distance_index.start_end_traversal_of(distance_index.get_parent(current_net));
-
-    //The id and orientation of nodes that are too close and should be avoided
-    hash_set<pair<id_t, bool>> seen_nodes;
-    //Nodes that we want to start a search from - the distance is smaller or equal to than min_distance but
-    //we can't walk out any further along the snarl tree without exceeding it
-    //The distance is the distance from the start position to the beginning (or end if its backwards) of the node,
-    //including the position
-    vector<pair<handle_t, size_t>> search_start_nodes;
-
-    if (((current_distance_left != std::numeric_limits<size_t>::max() && current_distance_left > min_distance) ||
-           (current_distance_right != std::numeric_limits<size_t>::max() && current_distance_right > min_distance)) ||
-         (distance_index.is_trivial_chain(parent) 
-            && distance_index.distance_in_parent(distance_index.get_parent(parent), parent, distance_index.flip(parent)) == 0
-            && node_len*2 > min_distance)) {
-        //If the distance to either end of the node is within the range
-        //Or of there is a loop on the node ( a duplication of just the node) and the node length would put one loop in the distance range
-
-        //Add this node to the subgraph
-        subgraph.emplace(get_id(start_pos));
-
-        handle_t start = is_rev(start_pos) ? distance_index.get_handle(distance_index.flip(current_net), super_graph)
-                                           : distance_index.get_handle(current_net, super_graph); 
-
-        //Add any node one step out from this one to search_start_nodes
-        super_graph->follow_edges(start, 
-                false, [&](const handle_t& next_handle) {
-            search_start_nodes.emplace_back(next_handle, is_rev(start_pos) ? current_distance_left : current_distance_right);
-        });
-
-        //Search for reachable nodes
-        subgraph_in_distance_range_walk_graph(super_graph, min_distance, max_distance, subgraph, search_start_nodes, seen_nodes, traversal_start); 
-
-        return;
+      // Shift starting position forward
+      get_offset(pos1)++;
     }
 
+    shifted = true;
+  }
 
-    
-    while (!distance_index.is_root(parent)) {
-#ifdef debug_subgraph
-        cerr << "At child " << distance_index.net_handle_as_string(current_net) << " with distances " << current_distance_left << " " << current_distance_right << endl;
-#endif
-
-        size_t max_parent_length = distance_index.maximum_length(parent);
-
-
-        //Distances to get to the ends of the parent
-        size_t distance_start_left = SnarlDistanceIndex::sum(current_distance_left,
-                    distance_index.distance_to_parent_bound(parent, true, distance_index.flip(current_net)));
-        size_t distance_start_right = SnarlDistanceIndex::sum(current_distance_right,
-                     distance_index.distance_to_parent_bound(parent, true, current_net));
-        size_t distance_end_left = SnarlDistanceIndex::sum(current_distance_left,
-                    distance_index.distance_to_parent_bound(parent, false, distance_index.flip(current_net)));
-        size_t distance_end_right = SnarlDistanceIndex::sum(current_distance_right,
-                     distance_index.distance_to_parent_bound(parent, false, current_net));
-
-        if ((current_distance_right != std::numeric_limits<size_t>::max() && current_distance_right >= min_distance)
-            || (current_distance_left != std::numeric_limits<size_t>::max() && current_distance_left >= min_distance)
-            || (distance_start_right != std::numeric_limits<size_t>::max() && distance_start_right>= min_distance)
-            || (distance_end_right != std::numeric_limits<size_t>::max() && distance_end_right >= min_distance) 
-            || (distance_start_left != std::numeric_limits<size_t>::max() && distance_start_left >= min_distance)
-            || (distance_end_left != std::numeric_limits<size_t>::max() && distance_end_left >= min_distance)
-            || (max_parent_length != std::numeric_limits<size_t>::max() && max_parent_length >= min_distance)) {
-            //If the min distance will be exceeded within this parent, then start a search from the ends of this child
-
-            if (distance_index.is_snarl(parent)) {
-                //If this is the child of a snarl, then just traverse from the end of the node
-#ifdef debug_subgraph
-cerr << "Start search in parent " << distance_index.net_handle_as_string(parent);
-#endif
-                if (current_distance_left != std::numeric_limits<size_t>::max() ){
-                    //If we can go left
-                    net_handle_t bound = distance_index.is_node(current_net) ? distance_index.flip(current_net) 
-                                : distance_index.get_bound(current_net, false, false);
-                    if (distance_index.is_sentinel(bound)) {
-                        bound = distance_index.get_node_from_sentinel(bound);
-                    }
-                    handle_t current_node = distance_index.get_handle(bound, super_graph);
-                    //Add everything immediately after the left bound of this node/chain
-                    super_graph->follow_edges(distance_index.get_handle(bound, super_graph),
-                            false, [&](const handle_t& next_handle) {
-                        seen_nodes.erase(make_pair(super_graph->get_id(next_handle), super_graph->get_is_reverse(next_handle)));
-                        search_start_nodes.emplace_back(next_handle,current_distance_left);
+  size_t distance = minimum_distance(distance_index, pos1, pos2, false, graph);
+  if (shifted && distance != std::numeric_limits<size_t>::max()) {
+    // This loop is possible, so add back in the shift
+    distance++;
+  }
 
-                    });
-
-#ifdef debug_subgraph
-                    cerr << " going left from " << super_graph->get_id(current_node) << (super_graph->get_is_reverse(current_node) ? "rev " : "fd ") ;
-#endif
-                } 
-                if (current_distance_right != std::numeric_limits<size_t>::max()) {
-                    //If we can go right
-                    net_handle_t bound = distance_index.is_node(current_net) ? current_net 
-                                : distance_index.get_bound(current_net, true, false);
-                    if (distance_index.is_sentinel(bound)) {
-                        bound = distance_index.get_node_from_sentinel(bound);
-                    }
-                    handle_t current_node = distance_index.get_handle(bound, super_graph);
-
-                    //Add everything immediately after the right bound of this node/chain
-                    super_graph->follow_edges(distance_index.get_handle(bound, super_graph),
-                            false, [&](const handle_t& next_handle) {
-                        seen_nodes.erase(make_pair(super_graph->get_id(next_handle),super_graph->get_is_reverse(next_handle)));
-                        search_start_nodes.emplace_back(next_handle, current_distance_right);
-                    });
-
-#ifdef debug_subgraph
-                    cerr << " going right from " << super_graph->get_id(current_node) << (super_graph->get_is_reverse(current_node) ? "rev " : "fd ");
-#endif
-                }
-#ifdef debug_subgraph
-                cerr << endl;
-#endif
-            } else {
-#ifdef debug_subgraph
-cerr << "Start search along parent chain " << distance_index.net_handle_as_string(parent);
-#endif
-                //If this is the child of a chain, then traverse along the chain
-                if (current_distance_left != std::numeric_limits<size_t>::max()) {
-                    subgraph_in_distance_range_walk_across_chain (distance_index, super_graph,  subgraph, 
-                        distance_index.flip(current_net), current_distance_left, search_start_nodes, seen_nodes, min_distance, max_distance, false);
-                }
-                if (current_distance_right != std::numeric_limits<size_t>::max()) {
-                    subgraph_in_distance_range_walk_across_chain (distance_index, super_graph,  subgraph, 
-                        current_net, current_distance_right, search_start_nodes, seen_nodes, min_distance, max_distance, false);
-                }
-            }
-            subgraph_in_distance_range_walk_graph(super_graph, min_distance, max_distance, subgraph, search_start_nodes, seen_nodes, traversal_start); 
-            return;
-        } else if (distance_index.is_snarl(parent)){
-            //TODO: This might be overkill. It prevents us from adding nodes that shouldn't be in the subgraph, but might be too slow
-            //If we don't check the other direction, go through the loop and add everything whose distance is lower than the minimum
-            //to seen_nodes
-            vector<pair<handle_t, size_t>> loop_handles_to_check;
-            handle_t start_out = distance_index.get_handle(distance_index.get_bound(parent, false, false), super_graph);
-            handle_t end_out = distance_index.get_handle(distance_index.get_bound(parent, true, false), super_graph);
-            if (current_distance_left != std::numeric_limits<size_t>::max()) {
-                loop_handles_to_check.emplace_back(distance_index.get_handle(distance_index.get_bound(current_net, false, false), super_graph), current_distance_left);
-            }
-            if (current_distance_right != std::numeric_limits<size_t>::max()) {
-                loop_handles_to_check.emplace_back(distance_index.get_handle(distance_index.get_bound(current_net, true, false), super_graph), current_distance_right);
-            }
-            while (!loop_handles_to_check.empty()) {
-                handle_t current_loop_handle = loop_handles_to_check.back().first;
-                size_t current_loop_distance = loop_handles_to_check.back().second;
-                loop_handles_to_check.pop_back();
-
-                //Add to seen_nodes
-                seen_nodes.emplace(super_graph->get_id(current_loop_handle), super_graph->get_is_reverse(current_loop_handle));
-
-                //Walk one step out from this node
-                super_graph->follow_edges(current_loop_handle, false, [&](const handle_t& next_handle) {
-                    //If the next node is close enough and isn't exiting the snarl, then add it to stack
-                    size_t new_distance = SnarlDistanceIndex::sum(current_loop_distance, super_graph->get_length(next_handle));
-                    if (new_distance < min_distance && next_handle != start_out && next_handle != end_out) {
-                        loop_handles_to_check.emplace_back(next_handle, new_distance);
-                    }
-                });
-            }
-        } else if (distance_index.is_chain(parent)) {
-            //TODO: This is probably also overkill - walk a chain if there is a viable loop
-            size_t distance_loop_right = distance_index.distance_in_parent(parent, current_net, current_net, super_graph, max_distance);
-            size_t distance_loop_left =  distance_index.distance_in_parent(parent, distance_index.flip(current_net), distance_index.flip(current_net), super_graph, max_distance);
-            if ((current_distance_left != std::numeric_limits<size_t>::max() && distance_loop_left != std::numeric_limits<size_t>::max()) ||
-                (current_distance_right != std::numeric_limits<size_t>::max() && distance_loop_right != std::numeric_limits<size_t>::max())) {
-                //If there is a loop that we can take, then take it
-                if (current_distance_left != std::numeric_limits<size_t>::max()) {
-                    subgraph_in_distance_range_walk_across_chain (distance_index, super_graph,  subgraph, 
-                        distance_index.flip(current_net), current_distance_left, search_start_nodes, seen_nodes, min_distance, max_distance, false);
-                }
-                if (current_distance_right != std::numeric_limits<size_t>::max()) {
-                    subgraph_in_distance_range_walk_across_chain (distance_index, super_graph,  subgraph, 
-                        current_net, current_distance_right, search_start_nodes, seen_nodes, min_distance, max_distance, false);
-                }
-                subgraph_in_distance_range_walk_graph(super_graph, min_distance, max_distance, subgraph, search_start_nodes, seen_nodes, traversal_start); 
-                return;
-            }
-        }
-
-        //Remember the bounds of this child so we don't return to it
-        if (current_distance_left != std::numeric_limits<size_t>::max() ){
-            //If we can go left
-            net_handle_t bound = distance_index.is_node(current_net) ? distance_index.flip(current_net) 
-                        : distance_index.get_bound(current_net, false, false);
-            if (distance_index.is_sentinel(bound)) {
-                bound = distance_index.get_node_from_sentinel(bound);
-            }
-            handle_t current_node = distance_index.get_handle(bound, super_graph);
-            seen_nodes.emplace(super_graph->get_id(current_node), super_graph->get_is_reverse(current_node));
-        }
-        if (current_distance_right != std::numeric_limits<size_t>::max()) {
-            //If we can go right
-            net_handle_t bound = distance_index.is_node(current_net) ? current_net 
-                        : distance_index.get_bound(current_net, true, false);
-            if (distance_index.is_sentinel(bound)) {
-                bound = distance_index.get_node_from_sentinel(bound);
-            }
-            handle_t current_node = distance_index.get_handle(bound, super_graph);
-            seen_nodes.emplace(super_graph->get_id(current_node), super_graph->get_is_reverse(current_node));
-        }
-
-        current_distance_left = std::min(distance_start_left, distance_start_right);
-        current_distance_right = std::min(distance_end_left, distance_end_right);
-
-        current_net = std::move(parent);
-        parent = distance_index.canonical(distance_index.get_parent(current_net));
-    }
-    if (current_distance_left <= min_distance) {
-#ifdef debug_subgraph
-        cerr << "Adding the end of a child of the root " << distance_index.net_handle_as_string(distance_index.get_bound(current_net, false, false)) << " with distance " << current_distance_left << endl;
-#endif
-
-        handle_t bound = distance_index.get_handle(distance_index.get_bound(current_net, false, false), super_graph);
-        search_start_nodes.emplace_back(bound, current_distance_left);
-    }
-    if (current_distance_right <= min_distance) {
-#ifdef debug_subgraph
-        cerr << "Adding the end of a child of the root " << distance_index.net_handle_as_string(distance_index.get_bound(current_net, false, false)) << " with distance " << current_distance_right << endl;
-#endif
-        handle_t bound = distance_index.get_handle(distance_index.get_bound(current_net, true, false), super_graph);
-        search_start_nodes.emplace_back(bound,current_distance_right);
-    }
-    subgraph_in_distance_range_walk_graph(super_graph, min_distance, max_distance, subgraph, search_start_nodes, seen_nodes, traversal_start); 
-
-    return;
+  return distance;
 }
 
-
-///Helper for subgraph_in_distance_range
-///Given starting handles in the super graph and the distances to each handle (including the start position and
-//the first position in the handle), add all nodes within the distance range, excluding nodes in seen_nodes
-void subgraph_in_distance_range_walk_graph(const HandleGraph* super_graph, size_t min_distance, size_t max_distance,
-                        std::unordered_set<nid_t>& subgraph, vector<pair<handle_t, size_t>>& start_nodes,
-                        hash_set<pair<nid_t, bool>>& seen_nodes, const pair<nid_t, bool>& traversal_start) {
-#ifdef debug_subgraph
-    cerr << "Starting search from nodes " << endl;
-    for (auto& start_handle : start_nodes) {
-        cerr << "\t" << super_graph->get_id(start_handle.first) << " " << super_graph->get_is_reverse(start_handle.first)
-             << " with distance " << start_handle.second << endl;
-    }
-#endif
-
-    //Order based on the distance to the position (handle)
-    auto cmp =  [] (const pair<handle_t, size_t> a, const pair<handle_t, size_t> b ) {
-            return a.second > b.second;
-        };
-    priority_queue< pair<handle_t, size_t>, vector<pair<handle_t, size_t>>, decltype(cmp)> next_handles (cmp);
-    for (auto& start_handle : start_nodes) {
-        next_handles.emplace(start_handle);
-    }
-    bool first_node = true;
-
-    while (next_handles.size() > 0) {
-        //Traverse the graph, adding nodes if they are within the range
-        handle_t curr_handle=next_handles.top().first;
-        size_t curr_distance=next_handles.top().second;
-        next_handles.pop();
-#ifdef debug_subgraph
-        cerr << "At node " << super_graph->get_id(curr_handle) << " " << super_graph->get_is_reverse(curr_handle) << " with distance " << curr_distance << endl;
-#endif
-        if (seen_nodes.count(make_pair(super_graph->get_id(curr_handle), super_graph->get_is_reverse(curr_handle))) == 0) {
-            seen_nodes.emplace(super_graph->get_id(curr_handle), super_graph->get_is_reverse(curr_handle));
-
-            size_t node_len = super_graph->get_length(curr_handle);
-            size_t curr_distance_end = SnarlDistanceIndex::sum(curr_distance, node_len)-1;
-            if ((curr_distance >= min_distance && curr_distance <= max_distance) ||
-                 (curr_distance_end >= min_distance && curr_distance_end <= max_distance) ||
-                 (curr_distance <= min_distance && curr_distance_end >= max_distance)) {
-#ifdef debug_subgraph
-                cerr << "\tadding node " << super_graph->get_id(curr_handle) << " " << super_graph->get_is_reverse(curr_handle) << " with distance "
-                     << curr_distance << " and node length " << node_len << endl;
-#endif
-                subgraph.insert(super_graph->get_id(curr_handle));
-
-            }
-#ifdef debug_subgraph
-            else {
-                cerr << "\tdisregarding node " << super_graph->get_id(curr_handle) << " " << super_graph->get_is_reverse(curr_handle)
-                     << " with distance " << curr_distance << " and node length " << node_len << endl;
-            }
-#endif
-            curr_distance = SnarlDistanceIndex::sum(node_len, curr_distance);
-
-            //If the end of this node is still within the range, add the next nodes that are within
-            //Also check that the node we're currently at isn't the start node
-            if (SnarlDistanceIndex::minus(curr_distance,1) <= max_distance) {
-                super_graph->follow_edges(curr_handle, false, [&](const handle_t& next) {
-                    nid_t next_id = super_graph->get_id(next);
-                    if (seen_nodes.count(make_pair(next_id, super_graph->get_is_reverse(next))) == 0) {
-                        next_handles.emplace(next, curr_distance);
-                    } 
-                    return true;
-                });
-            }
-            first_node = false;
-        } 
-#ifdef debug_subgraph 
-        else {
-            cerr << "\tthe node was already seen" << endl;
-        }
-#endif
-
-    }
-
-#ifdef debug_subgraph
-    cerr << "Subgraph has nodes: ";
-    for (const nid_t& node : subgraph) {
-        cerr << node << ", ";
-    }
-    cerr << endl;
-#endif
-    return;
+size_t maximum_distance(const SnarlDistanceIndex &distance_index, pos_t pos1,
+                        pos_t pos2) {
+  return distance_index.maximum_distance(get_id(pos1), get_is_rev(pos1),
+                                         get_offset(pos1), get_id(pos2),
+                                         get_is_rev(pos2), get_offset(pos2));
 }
-//helper function to walk along a chain from the current node until the distance traversed
-//exceeds the minimum limit. Add the node just before this happens to search_start_nodes
-void subgraph_in_distance_range_walk_across_chain (const SnarlDistanceIndex& distance_index, const HandleGraph* super_graph,
-        std::unordered_set<nid_t>& subgraph, net_handle_t current_node, 
-        size_t current_distance, vector<pair<handle_t, size_t>>& search_start_nodes, hash_set<pair<nid_t, bool>>& seen_nodes, 
-        const size_t& min_distance, const size_t& max_distance, bool checked_loop){
-#ifdef debug_subgraph
-    cerr << "Walk along parent chain " << distance_index.net_handle_as_string(distance_index.get_parent(current_node)) << " from " << distance_index.net_handle_as_string(current_node) << " with " << current_distance << endl;
-#endif
-    if (distance_index.is_trivial_chain(distance_index.get_parent(current_node))){
-        return;
-    }
-    bool finished_chain = false;
-    bool added_nodes = false; //Did we start a search? if not, add the last node in the chain
-    while (current_distance <= min_distance && !finished_chain) {
-        finished_chain = distance_index.follow_net_edges(current_node, super_graph, false, 
-            [&](const net_handle_t& next) {
-                size_t next_length = distance_index.minimum_length(next);
-                //If the next child is a snarl, then the distance to loop in the snarl
-                if (distance_index.is_snarl(next)) {
-                    net_handle_t bound_fd = distance_index.get_bound(next, distance_index.ends_at(next) == SnarlDistanceIndex::START, true);
-                    size_t next_loop = distance_index.distance_in_parent(next, bound_fd, bound_fd, super_graph, max_distance);
-                    if (!checked_loop && next_loop != std::numeric_limits<size_t>::max()) {
-#ifdef debug_subgraph
-                        cerr << "\tsnarl loops so also check the other direction" << endl;
-#endif
-                        //If we haven't yet checked the chain in the other direction and this snarl allows us to loop
-                        if ( SnarlDistanceIndex::sum(next_loop, current_distance) != std::numeric_limits<size_t>::max()  &&
-                             SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(next_loop, 
-                                                                             current_distance), 
-                                                                             distance_index.node_length(current_node)) >= min_distance) {
-#ifdef debug_subgraph
-                            cerr << "\t\t add the current node" << endl;
-#endif
-                            //If the loop will put us over the edge, then start from the current node
-                            super_graph->follow_edges(distance_index.get_handle(current_node, super_graph), false, [&](const handle_t& next_handle) {
-                                search_start_nodes.emplace_back(next_handle,current_distance);
-                            });
-                            return true;
-                        } else {
-                            //Otherwise, switch direction in the chain and walk along it again
-                            subgraph_in_distance_range_walk_across_chain(distance_index, super_graph, subgraph, distance_index.flip(current_node),
-                                    SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(current_distance, 
-                                                                                    next_loop), 
-                                                                                    distance_index.node_length(current_node)), 
-                                    search_start_nodes, seen_nodes, min_distance, max_distance, true);
-                            checked_loop = true;
-                        }
-                    }
-                    if (next_loop != std::numeric_limits<size_t>::max()){
-                        //TODO: This might be overkill. It prevents us from adding nodes that shouldn't be in the subgraph, but might be too slow
-                        //If we don't check the other direction, go through the loop and add everything whose distance is lower than the minimum
-                        //to seen_nodes
-                        vector<pair<handle_t, size_t>> loop_handles_to_check;
-                        handle_t start_out = distance_index.get_handle(distance_index.get_bound(next, false, false), super_graph);
-                        handle_t end_out = distance_index.get_handle(distance_index.get_bound(next, true, false), super_graph);
-                        loop_handles_to_check.emplace_back(distance_index.get_handle(bound_fd, super_graph), current_distance);
-                        while (!loop_handles_to_check.empty()) {
-                            handle_t current_loop_handle = loop_handles_to_check.back().first;
-                            size_t current_loop_distance = loop_handles_to_check.back().second;
-                            loop_handles_to_check.pop_back();
-
-                            //Add to seen_nodes
-                            seen_nodes.emplace(super_graph->get_id(current_loop_handle), super_graph->get_is_reverse(current_loop_handle));
-
-                            //Walk one step out from this node
-                            super_graph->follow_edges(current_loop_handle, false, [&](const handle_t& next_handle) {
-                                //If the next node is close enough and isn't exiting the snarl, then add it to stack
-                                size_t new_distance = SnarlDistanceIndex::sum(current_loop_distance, super_graph->get_length(next_handle));
-                                if (new_distance < min_distance && next_handle != start_out && next_handle != end_out) {
-                                    loop_handles_to_check.emplace_back(next_handle, new_distance);
-                                }
-                            });
-                        }
-
-                    }
-                }
-                size_t next_max_length = distance_index.maximum_length(next);
-#ifdef debug_subgraph
-                cerr << "\tnext node: " << distance_index.net_handle_as_string(next) << " with distance " << current_distance << " and min and max lengths " << next_length << " " << next_max_length << endl;
-#endif
-                if (( SnarlDistanceIndex::sum(next_max_length, current_distance) != std::numeric_limits<size_t>::max()  &&
-                     SnarlDistanceIndex::sum(next_max_length, current_distance) >= min_distance)){
-                    if (distance_index.is_node(next)) {
-                        size_t curr_distance_end = SnarlDistanceIndex::minus(SnarlDistanceIndex::sum(next_max_length, current_distance),1);
-                        //If its a node that puts us over, add the node to the subgraph, then start the search from that node
-#ifdef debug_subgraph
-                        cerr << "\t\tAdding node from a chain " << distance_index.net_handle_as_string(next) << " with distance " << current_distance << endl;
-#endif
-                        if ((current_distance >= min_distance && current_distance <= max_distance) ||
-                             (curr_distance_end >= min_distance && curr_distance_end <= max_distance) ||
-                             (current_distance <= min_distance && curr_distance_end >= max_distance)) {
-                            subgraph.emplace(distance_index.node_id(next));
-                        }
-                        super_graph->follow_edges(distance_index.get_handle(next, super_graph), false, [&](const handle_t& next_handle) {
-                            search_start_nodes.emplace_back(next_handle, SnarlDistanceIndex::sum(current_distance, next_length));
-                            seen_nodes.erase(make_pair(super_graph->get_id(next_handle), super_graph->get_is_reverse(next_handle)));
-                        });
-                    } else {
-                        //If it's a snarl, then we'll start from the last node
-#ifdef debug_subgraph
-                        cerr << "\t\tAdding node from a chain " << distance_index.net_handle_as_string(next) << " with distance " << current_distance << endl;
-#endif
-                        super_graph->follow_edges(distance_index.get_handle(current_node, super_graph), false, [&](const handle_t& next_handle) {
-                            search_start_nodes.emplace_back(next_handle,current_distance);
-                            seen_nodes.erase(make_pair(super_graph->get_id(next_handle), super_graph->get_is_reverse(next_handle)));
-                        });
-                    }
-                    //If we added something, stop traversing the chain
-                    added_nodes = true;
-                    return true;
-                } else if (distance_index.is_node(next)) {
-                    seen_nodes.emplace(distance_index.node_id(next), distance_index.ends_at(next) == SnarlDistanceIndex::START);
-                }
-                current_node = next;
-                current_distance = SnarlDistanceIndex::sum(next_length, current_distance);
-                if (current_distance > max_distance) {
-                    added_nodes = true;
-                    return true;
-                } else {
-                    return false;
-                }
-        }); 
-    }
-    if (!added_nodes && current_distance <= max_distance) {
-        //If we haven't added anything and haven't exceeded the distance limit, then start from the end of the chain
-        handle_t bound = distance_index.get_handle(current_node, super_graph);
 
-        super_graph->follow_edges(bound, false, [&](const handle_t& next_handle) {
-            search_start_nodes.emplace_back(next_handle,current_distance);
-            seen_nodes.erase(make_pair(super_graph->get_id(next_handle), super_graph->get_is_reverse(next_handle)));
-        });
-        //seen_nodes.erase(make_pair(super_graph->get_id(bound), super_graph->get_is_reverse(bound)));
-        //search_start_nodes.emplace_back( bound, current_distance);
-    }
-};
-
-
-void subgraph_containing_path_snarls(const SnarlDistanceIndex& distance_index, const HandleGraph* graph, const Path& path, std::unordered_set<nid_t>& subgraph) {
-    //Get the start and end of the path
-    pos_t start_pos = initial_position(path);
-    net_handle_t start_node = distance_index.get_node_net_handle(get_id(start_pos));
-    subgraph.insert(get_id(start_pos));
-
-    pos_t end_pos = final_position(path);
-    net_handle_t end_node = distance_index.get_node_net_handle(get_id(end_pos));
-    subgraph.insert(get_id(end_pos));
-
-    //Get the lowest common ancestor
-    pair<net_handle_t, bool> lowest_ancestor_bool = distance_index.lowest_common_ancestor(start_node, end_node);
-    net_handle_t common_ancestor = lowest_ancestor_bool.first;
-    
-    
-    if (distance_index.is_snarl(common_ancestor) || common_ancestor == start_node) {
-        //If the lowest common ancestor is a snarl, just add the entire snarl
-
-        add_descendants_to_subgraph(distance_index, common_ancestor, subgraph);
-
-    } else if (distance_index.is_chain(common_ancestor)) {
-
-        //Get the ancestors of the nodes that are children of the common ancestor
-        net_handle_t ancestor1 = distance_index.canonical(distance_index.get_parent(start_node));
-        while (ancestor1 != common_ancestor) {
-            start_node = ancestor1;
-            ancestor1 = distance_index.canonical(distance_index.get_parent(start_node));
-        }
-        net_handle_t ancestor2 = distance_index.canonical(distance_index.get_parent(end_node));
-        while (ancestor2 != common_ancestor) {
-            end_node = ancestor2;
-            ancestor2 = distance_index.canonical(distance_index.get_parent(end_node));
-        }
-#ifdef debug_distance_indexing
-        assert(ancestor1 == ancestor2);
-#endif
-
-
-        //Walk from one ancestor to the other and add everything in the chain
-        net_handle_t current_child = distance_index.canonical(distance_index.is_ordered_in_chain(start_node, end_node) ? start_node : end_node);
-        net_handle_t end_child = distance_index.canonical(distance_index.is_ordered_in_chain(start_node, end_node) ? end_node : start_node);
-        if (distance_index.is_reversed_in_parent(current_child)) {
-            current_child = distance_index.flip(current_child);
-        }
-        if (distance_index.is_reversed_in_parent(end_child)) {
-            end_child = distance_index.flip(end_child);
-        }
-
-        add_descendants_to_subgraph(distance_index, current_child, subgraph);
-        while (current_child != end_child) {
-            distance_index.follow_net_edges(current_child, graph, false, [&](const net_handle_t& next) {
-                add_descendants_to_subgraph(distance_index, next, subgraph);
-                current_child = next;
-
-            });
-        }
-
-    }
-    
+void fill_in_distance_index(SnarlDistanceIndex *distance_index,
+                            const HandleGraph *graph,
+                            const HandleGraphSnarlFinder *snarl_finder,
+                            size_t size_limit,
+                            bool only_top_level_chain_distances,
+                            bool silence_warnings) {
+  distance_index->set_snarl_size_limit(size_limit);
+  distance_index->set_only_top_level_chain_distances(
+      only_top_level_chain_distances);
+
+  // Build the temporary distance index from the graph
+  SnarlDistanceIndex::TemporaryDistanceIndex temp_index =
+      make_temporary_distance_index(graph, snarl_finder, size_limit,
+                                    only_top_level_chain_distances);
+
+  if (!silence_warnings && temp_index.use_oversized_snarls) {
+    cerr << "warning: distance index uses oversized snarls, (the biggest has "
+         << temp_index.most_oversized_snarl_size
+         << " nodes), which may make mapping slow" << endl;
+    cerr << "\ttry increasing --snarl-limit when building the distance index"
+         << endl;
+  }
+
+  // And fill in the permanent distance index
+  vector<const SnarlDistanceIndex::TemporaryDistanceIndex *> indexes;
+  indexes.emplace_back(&temp_index);
+  distance_index->get_snarl_tree_records(indexes, graph);
 }
 
-
-//Recursively add all nodes in parent to the subgraph
-void add_descendants_to_subgraph(const SnarlDistanceIndex& distance_index, const net_handle_t& parent, std::unordered_set<nid_t>& subgraph) {
-    if (distance_index.is_node(parent)) {
-        subgraph.insert(distance_index.node_id(parent));
-    } else {
-        distance_index.for_each_child(parent, [&](const net_handle_t& child) {
-            add_descendants_to_subgraph(distance_index, child, subgraph);
-        });
-    }
+void subgraph_containing_path_snarls(const SnarlDistanceIndex &distance_index,
+                                     const HandleGraph *graph, const Path &path,
+                                     std::unordered_set<nid_t> &subgraph) {
+  // Get the start and end of the path
+  pos_t start_pos = initial_position(path);
+  net_handle_t start_node =
+      distance_index.get_node_net_handle(get_id(start_pos));
+  subgraph.insert(get_id(start_pos));
+
+  pos_t end_pos = final_position(path);
+  net_handle_t end_node = distance_index.get_node_net_handle(get_id(end_pos));
+  subgraph.insert(get_id(end_pos));
+
+  // Get the lowest common ancestor
+  pair<net_handle_t, bool> lowest_ancestor_bool =
+      distance_index.lowest_common_ancestor(start_node, end_node);
+  net_handle_t common_ancestor = lowest_ancestor_bool.first;
+
+  if (distance_index.is_snarl(common_ancestor) ||
+      common_ancestor == start_node) {
+    // If the lowest common ancestor is a snarl, just add the entire snarl
+
+    add_descendants_to_subgraph(distance_index, common_ancestor, subgraph);
+
+  } else if (distance_index.is_chain(common_ancestor)) {
+
+    // Get the ancestors of the nodes that are children of the common ancestor
+    net_handle_t ancestor1 =
+        distance_index.canonical(distance_index.get_parent(start_node));
+    while (ancestor1 != common_ancestor) {
+      start_node = ancestor1;
+      ancestor1 =
+          distance_index.canonical(distance_index.get_parent(start_node));
+    }
+    net_handle_t ancestor2 =
+        distance_index.canonical(distance_index.get_parent(end_node));
+    while (ancestor2 != common_ancestor) {
+      end_node = ancestor2;
+      ancestor2 = distance_index.canonical(distance_index.get_parent(end_node));
+    }
+#ifdef debug_distance_indexing
+    assert(ancestor1 == ancestor2);
+#endif
+
+    // Walk from one ancestor to the other and add everything in the chain
+    net_handle_t current_child = distance_index.canonical(
+        distance_index.is_ordered_in_chain(start_node, end_node) ? start_node
+                                                                 : end_node);
+    net_handle_t end_child = distance_index.canonical(
+        distance_index.is_ordered_in_chain(start_node, end_node) ? end_node
+                                                                 : start_node);
+    if (distance_index.is_reversed_in_parent(current_child)) {
+      current_child = distance_index.flip(current_child);
+    }
+    if (distance_index.is_reversed_in_parent(end_child)) {
+      end_child = distance_index.flip(end_child);
+    }
+
+    add_descendants_to_subgraph(distance_index, current_child, subgraph);
+    while (current_child != end_child) {
+      distance_index.follow_net_edges(
+          current_child, graph, false, [&](const net_handle_t &next) {
+            add_descendants_to_subgraph(distance_index, next, subgraph);
+            current_child = next;
+          });
+    }
+  }
 }
-   
-
 
+// Recursively add all nodes in parent to the subgraph
+void add_descendants_to_subgraph(const SnarlDistanceIndex &distance_index,
+                                 const net_handle_t &parent,
+                                 std::unordered_set<nid_t> &subgraph) {
+  if (distance_index.is_node(parent)) {
+    subgraph.insert(distance_index.node_id(parent));
+  } else {
+    distance_index.for_each_child(parent, [&](const net_handle_t &child) {
+      add_descendants_to_subgraph(distance_index, child, subgraph);
+    });
+  }
 }
 
+} // namespace vg
diff --git a/src/snarl_distance_index.hpp b/src/snarl_distance_index.hpp
index 43268d4b23..e502b9aa12 100644
--- a/src/snarl_distance_index.hpp
+++ b/src/snarl_distance_index.hpp
@@ -2,6 +2,7 @@
 #define VG_SNARL_DISTANCE_HPP_INCLUDED
 
 #include <bdsg/snarl_distance_index.hpp>
+#include <bdsg/ch.hpp> 
 #include "snarls.hpp"
 #include <structures/union_find.hpp>
 #include "hash_map.hpp"
@@ -36,7 +37,7 @@ void fill_in_distance_index(SnarlDistanceIndex* distance_index, const HandleGrap
 
 /// Fill in the temporary snarl record with distances
 void populate_snarl_index(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, 
-    pair<SnarlDistanceIndex::temp_record_t, size_t> snarl_index, size_t size_limit, bool only_top_level_chain_distances, const HandleGraph* graph) ;
+    SnarlDistanceIndex::temp_record_ref_t snarl_index, size_t size_limit, bool only_top_level_chain_distances, const HandleGraph* graph) ;
 
 SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(const HandleGraph* graph, const HandleGraphSnarlFinder* snarl_finder, 
                                                                          size_t size_limit, bool only_top_level_chain_distances);
diff --git a/src/snarl_distance_index_build.cpp b/src/snarl_distance_index_build.cpp
new file mode 100644
index 0000000000..7100616c04
--- /dev/null
+++ b/src/snarl_distance_index_build.cpp
@@ -0,0 +1,1708 @@
+//#define debug_distance_indexing
+//#define debug_snarl_traversal
+//#define debug_distances
+//#define debug_hub_label_build
+//#define debug_hub_label_storage
+
+#include "snarl_distance_index.hpp"
+#include "snarl_distance_index_child_graph.hpp"
+#include <span>
+
+using namespace std;
+using namespace handlegraph;
+namespace vg {
+
+SnarlDistanceIndex::TemporaryDistanceIndex make_temporary_distance_index(
+    const HandleGraph* graph, const HandleGraphSnarlFinder* snarl_finder, size_t size_limit, bool only_top_level_chain_distances)  {
+
+#ifdef debug_distance_indexing
+    cerr << "Creating new distance index for nodes between " << graph->min_node_id() << " and " << graph->max_node_id() << endl;
+
+#endif
+
+    SnarlDistanceIndex::TemporaryDistanceIndex temp_index;
+
+    temp_index.min_node_id=graph->min_node_id();
+    temp_index.max_node_id=graph->max_node_id();
+
+    //Construct the distance index using the snarl decomposition
+    //traverse_decomposition will visit all structures (including trivial snarls), calling
+    //each of the given functions for the start and ends of the snarls and chains
+
+    temp_index.temp_node_records.resize(temp_index.max_node_id-temp_index.min_node_id+1);
+
+
+
+    //Stores unfinished records, as type of record and offset into appropriate vector
+    //(temp_node/snarl/chain_records)
+    vector<SnarlDistanceIndex::temp_record_ref_t> stack;
+
+    //There may be components of the root that are connected to each other. Each connected component will
+    //get put into a (fake) root-level snarl, but we don't know what those components will be initially,
+    //since the decomposition just puts them in the same root snarl. This is used to group the root-level
+    //components into connected components that will later be used to make root snarls
+    structures::UnionFind root_snarl_component_uf (0);
+
+
+    /*Go through the decomposition top down and record the connectivity of the snarls and chains
+     * Distances will be added later*/
+
+    snarl_finder->traverse_decomposition(
+    [&](handle_t chain_start_handle) {
+        /*This gets called when a new chain is found, starting at the start handle going into chain
+         * For the first node in a chain, create a chain record and fill in the first node.
+         * Also add the first node record
+         */
+#ifdef debug_distance_indexing
+        cerr << "  Starting new chain at " << graph->get_id(chain_start_handle) << (graph->get_is_reverse(chain_start_handle) ? " reverse" : " forward") << endl;
+        //We shouldn't have seen this node before
+        //assert(temp_index.get_node(make_pair(SnarlDistanceIndex::TEMP_NODE, graph->get_id(chain_start_handle))).node_id == 0);
+#endif
+
+        //Fill in node in chain
+        stack.emplace_back(SnarlDistanceIndex::TEMP_CHAIN, temp_index.temp_chain_records.size());
+        nid_t node_id = graph->get_id(chain_start_handle);
+        temp_index.temp_chain_records.emplace_back();
+        auto& temp_chain = temp_index.temp_chain_records.back();
+        temp_chain.start_node_id = node_id; 
+        temp_chain.start_node_rev = graph->get_is_reverse(chain_start_handle);
+        temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
+
+
+        //And the node record itself
+        auto& temp_node = temp_index.get_node(temp_chain.children.back());
+        temp_node.node_id = node_id;
+        temp_node.node_length = graph->get_length(chain_start_handle);
+        temp_node.reversed_in_parent = graph->get_is_reverse(chain_start_handle);
+        temp_node.parent = stack.back(); //The parent is this chain
+
+    },
+    [&](handle_t chain_end_handle) {
+        /*This gets called at the end of a chain, facing out
+         * Record the chain's end node. The node record itself would have been added as part of the snarl
+         * Also record the chain's parent here
+         */
+
+        //Done with this chain
+        SnarlDistanceIndex::temp_record_ref_t chain_index = stack.back();
+        stack.pop_back();
+
+#ifdef debug_distance_indexing
+        assert(chain_index.first == SnarlDistanceIndex::TEMP_CHAIN);
+#endif
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(chain_index);
+        nid_t node_id = graph->get_id(chain_end_handle);
+
+        if (temp_chain_record.children.size() == 1 && node_id == temp_chain_record.start_node_id) {
+            //This is a trivial snarl
+
+#ifdef debug_distance_indexing
+            //Then this must be the last thing on the chain_records vector
+            assert(temp_index.temp_chain_records.size() == chain_index.second+1);
+#endif
+
+            //Get the node
+            SnarlDistanceIndex::temp_record_ref_t node_index = make_pair(SnarlDistanceIndex::TEMP_NODE, node_id);
+            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.get_node(node_index);
+
+            temp_node_record.reversed_in_parent = false;
+
+            //And give the chain's parent the node info
+            //
+            if (stack.empty()) {
+                temp_node_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
+                //If this was the last thing on the stack, then this was a root
+
+                //Check to see if there is anything connected to the ends of the chain
+                vector<nid_t> reachable_nodes;
+                graph->follow_edges(graph->get_handle(node_id, false),
+                    false, [&] (const handle_t& next) {
+                        if (graph->get_id(next) != node_id) {
+                            reachable_nodes.emplace_back(graph->get_id(next));
+                        }
+                    });
+                graph->follow_edges(graph->get_handle(node_id, true),
+                    false, [&] (const handle_t& next) {
+                        if (graph->get_id(next) != node_id) {
+                            reachable_nodes.emplace_back(graph->get_id(next));
+                        }
+                    });
+                if (reachable_nodes.size()) {
+                    //If we can reach anything leaving the chain (besides the chain itself), then it is part of a root snarl
+                    //Note that if the chain's start and end node are the same, then it will always be a single component
+#ifdef debug_distance_indexing
+                    cerr << "                 This trivial chain is part of the root but connects with something else in the root"<<endl;
+#endif
+                    bool new_component = true;
+
+                    //Add this to the union find
+                    root_snarl_component_uf.resize(root_snarl_component_uf.size() + 1);
+                    //And remember that it's in a connected component of the root
+                    temp_node_record.root_snarl_index = temp_index.root_snarl_components.size();
+                    temp_index.root_snarl_components.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
+                    for (nid_t next_id : reachable_nodes) {
+                        //For each node that this is connected to, check if we've already seen it and if we have, then
+                        //union this chain and that node's chain
+                        SnarlDistanceIndex::temp_record_ref_t next_index = make_pair(SnarlDistanceIndex::TEMP_NODE, next_id);
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.get_node(next_index);
+                        if (node_record.node_id != 0) {
+                            //If we've already seen this node, union it with the new one
+                            //If we can see it by walking out from this top-level chain, then it must also be a
+                            //top-level chain (or node pretending to be a chain)
+                            size_t other_i = node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN
+                                           ? temp_index.get_chain(node_record.parent).root_snarl_index
+                                           : node_record.root_snarl_index;
+#ifdef debug_distance_indexing
+                            assert(other_i != std::numeric_limits<size_t>::max());
+#endif
+                            root_snarl_component_uf.union_groups(other_i, temp_node_record.root_snarl_index);
+//#ifdef debug_distance_indexing
+//                            cerr << "        Union this trivial  with " << temp_index.get_chain(node_record.parent).start_node_id << " " << temp_index.get_chain(node_record.parent).end_node_id << endl;
+//#endif
+                        } else {
+                            new_component = false;
+                        }
+                    }
+                } else {
+                    //If this chain isn't connected to anything else, then it is a single component of the root
+                    temp_node_record.rank_in_parent = temp_index.components.size();
+                    temp_index.components.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
+                }
+            } else {
+                //The last thing on the stack is the parent of this chain, which must be a snarl
+                temp_node_record.parent = stack.back();
+                auto& parent_snarl_record = temp_index.get_snarl(temp_node_record.parent);
+                temp_node_record.rank_in_parent = parent_snarl_record.children.size() + 2;
+                parent_snarl_record.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
+            }
+
+
+            //Remove the chain record
+            temp_index.temp_chain_records.pop_back();
+            temp_index.max_index_size += temp_node_record.get_max_record_length();
+
+        } else {
+            //Otherwise, it is an actual chain
+
+            //Fill in node in chain
+            temp_chain_record.end_node_id = node_id;
+            temp_chain_record.end_node_rev = graph->get_is_reverse(chain_end_handle);
+            temp_chain_record.end_node_length = graph->get_length(chain_end_handle);
+            
+            bool is_root_chain = false;
+
+            if (stack.empty()) {
+                //If this was the last thing on the stack, then this was a root
+                is_root_chain = true;
+
+                //Check to see if there is anything connected to the ends of the chain
+                vector<nid_t> reachable_nodes;
+                graph->follow_edges(graph->get_handle(temp_chain_record.start_node_id, !temp_chain_record.start_node_rev),
+                    false, [&] (const handle_t& next) {
+                        if (graph->get_id(next) != temp_chain_record.start_node_id &&
+                            graph->get_id(next) != temp_chain_record.end_node_id) {
+                            reachable_nodes.emplace_back(graph->get_id(next));
+                        }
+                    });
+                graph->follow_edges(graph->get_handle(temp_chain_record.end_node_id, temp_chain_record.end_node_rev),
+                    false, [&] (const handle_t& next) {
+                        if (graph->get_id(next) != temp_chain_record.start_node_id &&
+                            graph->get_id(next) != temp_chain_record.end_node_id) {
+                            reachable_nodes.emplace_back(graph->get_id(next));
+                        }
+                    });
+                if (reachable_nodes.size() && (temp_chain_record.is_trivial || temp_chain_record.start_node_id != temp_chain_record.end_node_id)) {
+                    //If we can reach anything leaving the chain (besides the chain itself), then it is part of a root snarl
+                    //Note that if the chain's start and end node are the same, then it will always be a single component
+#ifdef debug_distance_indexing
+                    cerr << "                 This chain is part of the root but connects with something else in the root"<<endl;
+#endif
+                    bool new_component = true;
+
+                    //Add this to the union find
+                    root_snarl_component_uf.resize(root_snarl_component_uf.size() + 1);
+                    //And remember that it's in a connected component of the root
+                    temp_chain_record.root_snarl_index = temp_index.root_snarl_components.size();
+                    temp_index.root_snarl_components.emplace_back(chain_index);
+                    for (nid_t next_id : reachable_nodes) {
+                        //For each node that this is connected to, check if we've already seen it and if we have, then
+                        //union this chain and that node's chain
+                        SnarlDistanceIndex::temp_record_ref_t next_index = make_pair(SnarlDistanceIndex::TEMP_NODE, next_id);
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& node_record = temp_index.get_node(next_index);
+                        if (node_record.node_id != 0) {
+                            //If we've already seen this node, union it with the new one
+                            //If we can see it by walking out from this top-level chain, then it must also be a
+                            //top-level chain (or node pretending to be a chain)
+                            size_t other_i = node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN
+                                           ? temp_index.get_chain(node_record.parent).root_snarl_index
+                                           : node_record.root_snarl_index;
+#ifdef debug_distance_indexing
+                            assert(other_i != std::numeric_limits<size_t>::max());
+#endif
+                            root_snarl_component_uf.union_groups(other_i, temp_chain_record.root_snarl_index);
+#ifdef debug_distance_indexing
+                            if (node_record.parent.first == SnarlDistanceIndex::TEMP_CHAIN) {
+                                cerr << "        Union this chain with " << temp_index.get_chain(node_record.parent).start_node_id << " " << temp_index.get_chain(node_record.parent).end_node_id << endl;
+                            } else {
+                                cerr << "        Union this chain with root " << node_record.root_snarl_index << endl;
+                            }
+#endif
+                        } else {
+                            new_component = false;
+                        }
+                    }
+                } else {
+                    //If this chain isn't connected to anything else, then it is a single component of the root
+                    temp_chain_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
+                    temp_chain_record.rank_in_parent = temp_index.components.size();
+                    temp_index.components.emplace_back(chain_index);
+                }
+            } else {
+                //The last thing on the stack is the parent of this chain, which must be a snarl
+                temp_chain_record.parent = stack.back();
+                auto& parent_snarl_record = temp_index.get_snarl(temp_chain_record.parent);
+                temp_chain_record.rank_in_parent = parent_snarl_record.children.size() + 2;
+                parent_snarl_record.children.emplace_back(chain_index);
+            }
+
+            temp_index.max_index_size += temp_chain_record.get_max_record_length(!only_top_level_chain_distances || is_root_chain ? true : false );
+#ifdef debug_distance_indexing
+            cerr << "  Ending new " << (temp_chain_record.is_trivial ? "trivial " : "") <<  "chain " << temp_index.structure_start_end_as_string(chain_index)
+              << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_chain_record.parent) << endl;
+#endif
+        }
+    },
+    [&](handle_t snarl_start_handle) {
+        /*This gets called at the beginning of a new snarl facing in
+         * Create a new snarl record and fill in the start node.
+         * The node record would have been created as part of the chain, or as the end node
+         * of the previous snarl
+         */
+
+#ifdef debug_distance_indexing
+        cerr << "  Starting new snarl at " << graph->get_id(snarl_start_handle) << (graph->get_is_reverse(snarl_start_handle) ? " reverse" : " forward") << endl;
+        cerr << "with index " << temp_index.temp_snarl_records.size() << endl;
+#endif
+        auto& parent = stack.back();
+        stack.emplace_back(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size());
+        temp_index.temp_snarl_records.emplace_back();
+        temp_index.temp_snarl_records.back().start_node_id = graph->get_id(snarl_start_handle);
+        temp_index.temp_snarl_records.back().start_node_rev = graph->get_is_reverse(snarl_start_handle);
+        temp_index.temp_snarl_records.back().start_node_length = graph->get_length(snarl_start_handle);
+
+    },
+    [&](handle_t snarl_end_handle){
+        /*This gets called at the end of the snarl facing out
+         * Fill in the end node of the snarl, its parent, and record the snarl as a child of its
+         * parent chain
+         * Also create a node record
+         */
+        SnarlDistanceIndex::temp_record_ref_t snarl_index = stack.back();
+        stack.pop_back();
+#ifdef debug_distance_indexing
+        assert(snarl_index.first == SnarlDistanceIndex::TEMP_SNARL);
+        assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
+#endif
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(snarl_index);
+        nid_t node_id = graph->get_id(snarl_end_handle);
+
+        //Record the end node in the snarl
+        temp_snarl_record.end_node_id = node_id;
+        temp_snarl_record.end_node_rev = graph->get_is_reverse(snarl_end_handle);
+        temp_snarl_record.end_node_length = graph->get_length(snarl_end_handle);
+        temp_snarl_record.node_count = temp_snarl_record.children.size();
+        bool any_edges_in_snarl = false;
+        graph->follow_edges(graph->get_handle(temp_snarl_record.start_node_id, temp_snarl_record.start_node_rev), false, [&](const handle_t& next_handle) {
+            if (graph->get_id(next_handle) != temp_snarl_record.end_node_id) {
+                any_edges_in_snarl = true;
+            }
+        });
+        graph->follow_edges(graph->get_handle(temp_snarl_record.end_node_id, !temp_snarl_record.end_node_rev), false, [&](const handle_t& next_handle) {
+            if (graph->get_id(next_handle) != temp_snarl_record.start_node_id) {
+                any_edges_in_snarl = true;
+            }
+        });
+
+        if (temp_snarl_record.children.size() == 0) {
+            //This is a trivial snarl
+            temp_snarl_record.is_trivial = true;
+
+#ifdef debug_distance_indexing
+            cerr << "  Ending and forgetting trivial snarl " << temp_index.structure_start_end_as_string(snarl_index)
+                 << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_snarl_record.parent) << endl;
+#endif
+
+            //Add the end node to the chain
+#ifdef debug_distance_indexing
+            assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
+#endif
+            temp_snarl_record.parent = stack.back();
+            auto& temp_chain = temp_index.get_chain(stack.back());
+            temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
+
+            //Remove the snarl record.
+            //This invalidates snarl_index!!!
+#ifdef debug_distance_indexing
+            assert(temp_index.temp_snarl_records.size() == snarl_index.second+1);
+#endif
+            temp_index.temp_snarl_records.pop_back();
+        } else {
+            //This is the child of a chain
+            
+#ifdef debug_distance_indexing
+            cerr << "  Ending new snarl " << temp_index.structure_start_end_as_string(snarl_index)
+                 << endl << "    that is a child of " << temp_index.structure_start_end_as_string(temp_snarl_record.parent) << endl;
+#endif
+
+#ifdef debug_distance_indexing
+            assert(stack.back().first == SnarlDistanceIndex::TEMP_CHAIN);
+#endif
+            temp_snarl_record.parent = stack.back();
+            auto& temp_chain = temp_index.get_chain(stack.back());
+            temp_chain.children.emplace_back(snarl_index);
+            temp_chain.children.emplace_back(SnarlDistanceIndex::TEMP_NODE, node_id);
+
+        }
+
+        //Record the node itself. This gets done for the start of the chain, and ends of snarls
+        SnarlDistanceIndex::temp_record_ref_t node_index = make_pair(SnarlDistanceIndex::TEMP_NODE, node_id);
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.get_node(node_index);
+        temp_node_record.node_id = node_id;
+        temp_node_record.node_length = graph->get_length(snarl_end_handle);
+        temp_node_record.reversed_in_parent = graph->get_is_reverse(snarl_end_handle);
+        temp_node_record.parent = stack.back();
+    });
+
+    /*
+     * We finished going through everything that exists according to the snarl decomposition, but
+     * it's still missing tips, which will be discovered when filling in the snarl distances,
+     * and root-level snarls, which we'll add now by combining the chain components in root_snarl_components
+     * into snarls defined by root_snarl_component_uf
+     * The root-level snarl is a fake snarl that doesn't exist according to the snarl decomposition,
+     * but is an extra layer that groups together components of the root that are connected
+     */
+
+    vector<vector<size_t>> root_snarl_component_indexes = root_snarl_component_uf.all_groups();
+    for (vector<size_t>& root_snarl_indexes : root_snarl_component_indexes) {
+#ifdef debug_distance_indexing
+        cerr << "Create a new root snarl from components" << endl;
+#endif
+        //For each of the root snarls
+        temp_index.components.emplace_back(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size());
+        temp_index.temp_snarl_records.emplace_back();
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.temp_snarl_records.back();
+        temp_snarl_record.is_root_snarl = true;
+        temp_snarl_record.parent = make_pair(SnarlDistanceIndex::TEMP_ROOT, 0);
+
+
+        for (size_t chain_i : root_snarl_indexes) {
+            //For each chain component of this root-level snarl
+            if (temp_index.root_snarl_components[chain_i].first == SnarlDistanceIndex::TEMP_CHAIN){
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(temp_index.root_snarl_components[chain_i]);
+                temp_chain_record.parent = make_pair(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size() - 1);
+                temp_chain_record.rank_in_parent = temp_snarl_record.children.size();
+                temp_chain_record.reversed_in_parent = false;
+
+                temp_snarl_record.children.emplace_back(temp_index.root_snarl_components[chain_i]);
+            } else {
+#ifdef debug_distance_indexing
+                assert(temp_index.root_snarl_components[chain_i].first == SnarlDistanceIndex::TEMP_NODE);
+#endif
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index.get_node(temp_index.root_snarl_components[chain_i]);
+                temp_node_record.parent = make_pair(SnarlDistanceIndex::TEMP_SNARL, temp_index.temp_snarl_records.size() - 1);
+                temp_node_record.rank_in_parent = temp_snarl_record.children.size();
+                temp_node_record.reversed_in_parent = false;
+
+                temp_snarl_record.children.emplace_back(temp_index.root_snarl_components[chain_i]);
+            }
+        }
+        temp_snarl_record.node_count = temp_snarl_record.children.size();
+    }
+
+
+    /*Now go through the decomposition again to fill in the distances
+     * This traverses all chains in reverse order that we found them in, so bottom up
+     * Each chain and snarl already knows its parents and children, except for single nodes
+     * that are children of snarls. These nodes were not in chains will have their node
+     * records created here
+     */
+
+#ifdef debug_distance_indexing
+    cerr << "Filling in the distances in snarls" << endl;
+#endif
+    for (int i = temp_index.temp_chain_records.size()-1 ; i >= 0 ; i--) {
+        SnarlDistanceIndex::temp_record_ref_t chain_index = make_pair(SnarlDistanceIndex::TEMP_CHAIN, i);
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(chain_index);
+#ifdef debug_distance_indexing
+        assert(!temp_chain_record.is_trivial);
+        cerr << "  At"  << (temp_chain_record.is_trivial ? " trivial " : "") << "chain " << temp_index.structure_start_end_as_string(chain_index) << endl;
+#endif
+
+        //Add the first values for the prefix sum and backwards loop vectors
+        temp_chain_record.prefix_sum.emplace_back(0);
+        temp_chain_record.max_prefix_sum.emplace_back(0);
+        temp_chain_record.backward_loops.emplace_back(std::numeric_limits<size_t>::max());
+        temp_chain_record.chain_components.emplace_back(0);
+
+
+        /*First, go through each of the snarls in the chain in the forward direction and
+         * fill in the distances in the snarl. Also fill in the prefix sum and backwards
+         * loop vectors here
+         */
+        size_t curr_component = 0; //which component of the chain are we in
+        size_t last_node_length = 0;
+        for (size_t chain_child_i = 0 ; chain_child_i < temp_chain_record.children.size() ; chain_child_i++ ){
+            const SnarlDistanceIndex::temp_record_ref_t& chain_child_index = temp_chain_record.children[chain_child_i];
+            //Go through each of the children in the chain, skipping nodes
+            //The snarl may be trivial, in which case don't fill in the distances
+#ifdef debug_distance_indexing
+            cerr << "    Looking at child " << temp_index.structure_start_end_as_string(chain_child_index) 
+                 << " current max prefix sum " << temp_chain_record.max_prefix_sum.back() << endl;
+#endif
+
+            if (chain_child_index.first == SnarlDistanceIndex::TEMP_SNARL){
+                //This is where all the work gets done. Need to go through the snarl and add
+                //all distances, then add distances to the chain that this is in
+                //The parent chain will be the last thing in the stack
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = 
+                        temp_index.get_snarl(chain_child_index);
+
+                //Fill in this snarl's distances
+                populate_snarl_index(temp_index, chain_child_index, size_limit, only_top_level_chain_distances, graph);
+
+                bool new_component = temp_snarl_record.min_length == std::numeric_limits<size_t>::max();
+                if (new_component){
+                    curr_component++;
+                }
+
+                //And get the distance values for the end node of the snarl in the chain
+                if (new_component) {
+                    //If this snarl wasn't start-end connected, then we start 
+                    //tracking the distance vectors here
+
+                    //Update the maximum distance
+                    temp_index.max_distance = std::max(temp_index.max_distance, temp_chain_record.max_prefix_sum.back());
+
+                    temp_chain_record.prefix_sum.emplace_back(0);
+                    temp_chain_record.max_prefix_sum.emplace_back(0);
+                    temp_chain_record.backward_loops.emplace_back(temp_snarl_record.distance_end_end);
+                    //If the chain is disconnected, the max length is infinite
+                    temp_chain_record.max_length =  std::numeric_limits<size_t>::max();
+                } else {
+                    temp_chain_record.prefix_sum.emplace_back(SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+                                                              temp_chain_record.prefix_sum.back(),
+                                                              temp_snarl_record.min_length), 
+                                                              temp_snarl_record.start_node_length));
+                    temp_chain_record.max_prefix_sum.emplace_back(SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+                                                                   temp_chain_record.max_prefix_sum.back(),
+                                                                   temp_snarl_record.max_length), 
+                                                                   temp_snarl_record.start_node_length));
+                    temp_chain_record.backward_loops.emplace_back(std::min(temp_snarl_record.distance_end_end,
+                        SnarlDistanceIndex::sum(temp_chain_record.backward_loops.back()
+                        , 2 * (temp_snarl_record.start_node_length + temp_snarl_record.min_length))));
+                    temp_chain_record.max_length = SnarlDistanceIndex::sum(temp_chain_record.max_length,
+                                                                           temp_snarl_record.max_length);
+                }
+                temp_chain_record.chain_components.emplace_back(curr_component);
+                if (chain_child_i == temp_chain_record.children.size() - 2 && temp_snarl_record.min_length == std::numeric_limits<size_t>::max()) {
+                    temp_chain_record.loopable = false;
+                }
+                last_node_length = 0;
+            } else {
+                if (last_node_length != 0) {
+                    //If this is a node and the last thing was also a node,
+                    //then there was a trivial snarl 
+                    SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = 
+                            temp_index.get_node(chain_child_index);
+
+                    //Check if there is a loop in this node
+                    //Snarls get counted as trivial if they contain no nodes but they might still have edges
+                    size_t backward_loop = std::numeric_limits<size_t>::max();
+
+                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, !temp_node_record.reversed_in_parent), false, [&](const handle_t& next_handle) {
+                        if (graph->get_id(next_handle) == temp_node_record.node_id) {
+                            //If there is a loop going backwards (relative to the chain) back to the same node
+                            backward_loop = 0;
+                        }
+                    });
+
+                    temp_chain_record.prefix_sum.emplace_back(SnarlDistanceIndex::sum(temp_chain_record.prefix_sum.back(), last_node_length));
+                    temp_chain_record.max_prefix_sum.emplace_back(SnarlDistanceIndex::sum(temp_chain_record.max_prefix_sum.back(), last_node_length));
+                    temp_chain_record.backward_loops.emplace_back(std::min(backward_loop,
+                        SnarlDistanceIndex::sum(temp_chain_record.backward_loops.back(), 2 * last_node_length)));
+
+                    if (chain_child_i == temp_chain_record.children.size()-1) {
+                        //If this is the last node
+                        temp_chain_record.loopable=false;
+                    }
+                    temp_chain_record.chain_components.emplace_back(curr_component);
+                }
+                last_node_length = temp_index.get_node(chain_child_index).node_length;
+                //And update the chains max length
+                temp_chain_record.max_length = SnarlDistanceIndex::sum(temp_chain_record.max_length,
+                                                                       last_node_length);
+            }
+        } //Finished walking through chain
+        if (temp_chain_record.start_node_id == temp_chain_record.end_node_id && temp_chain_record.chain_components.back() != 0) {
+            //If this is a looping, multicomponent chain, the start/end node could end up in separate chain components
+            //despite being the same node.
+            //Since the first component will always be 0, set the first node's component to be whatever the last
+            //component was
+            temp_chain_record.chain_components[0] = temp_chain_record.chain_components.back();
+
+        }
+
+        //For a multicomponent chain, the actual minimum length will always be infinite, but since we sometimes need
+        //the length of the last component, save that here
+        temp_chain_record.min_length = !temp_chain_record.is_trivial && temp_chain_record.start_node_id == temp_chain_record.end_node_id
+                        ? temp_chain_record.prefix_sum.back()
+                        : SnarlDistanceIndex::sum(temp_chain_record.prefix_sum.back() , temp_chain_record.end_node_length);
+
+#ifdef debug_distance_indexing
+        assert(temp_chain_record.prefix_sum.size() == temp_chain_record.backward_loops.size());
+        assert(temp_chain_record.prefix_sum.size() == temp_chain_record.chain_components.size());
+#endif
+
+
+        /*Now that we've gone through all the snarls in the chain, fill in the forward loop vector
+         * by going through the chain in the backwards direction
+         */
+        temp_chain_record.forward_loops.resize(temp_chain_record.prefix_sum.size(),
+                                               std::numeric_limits<size_t>::max());
+        if (temp_chain_record.start_node_id == temp_chain_record.end_node_id && temp_chain_record.children.size() > 1) {
+
+            //If this is a looping chain, then check the first snarl for a loop
+            if (temp_chain_record.children.at(1).first == SnarlDistanceIndex::TEMP_SNARL) {
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(temp_chain_record.children.at(1));
+                temp_chain_record.forward_loops[temp_chain_record.forward_loops.size()-1] = temp_snarl_record.distance_start_start;
+            } 
+        }
+
+        size_t node_i = temp_chain_record.prefix_sum.size() - 2;
+        // We start at the next to last node because we need to look at this record and the next one.
+        last_node_length = 0;
+        for (int j = (int)temp_chain_record.children.size() - 1 ; j >= 0 ; j--) {
+            auto& child = temp_chain_record.children.at(j);
+            if (child.first == SnarlDistanceIndex::TEMP_SNARL){
+                SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(child);
+                if (temp_chain_record.chain_components.at(node_i) != temp_chain_record.chain_components.at(node_i+1) &&
+                    temp_chain_record.chain_components.at(node_i+1) != 0){
+                    //If this is a new chain component, then add the loop distance from the snarl
+                    //If the component of the next node is 0, then we're still in the same component since we're going backwards
+                    temp_chain_record.forward_loops.at(node_i) = temp_snarl_record.distance_start_start;
+                } else {
+                    temp_chain_record.forward_loops.at(node_i) =
+                        std::min(SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+                                    temp_chain_record.forward_loops.at(node_i+1), 
+                                    2* temp_snarl_record.min_length),
+                                    2*temp_snarl_record.end_node_length), 
+                                temp_snarl_record.distance_start_start);
+                }
+                node_i --;
+                last_node_length = 0;
+            } else {
+                if (last_node_length != 0) {
+                    SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = 
+                            temp_index.get_node(child);
+
+
+                    //Check if there is a loop in this node
+                    //Snarls get counted as trivial if they contain no nodes but they might still have edges
+                    size_t forward_loop = std::numeric_limits<size_t>::max();
+                    graph->follow_edges(graph->get_handle(temp_node_record.node_id, temp_node_record.reversed_in_parent), false, [&](const handle_t& next_handle) {
+                        if (graph->get_id(next_handle) == temp_node_record.node_id) {
+                            //If there is a loop going forward (relative to the chain) back to the same node
+                            forward_loop = 0;
+                        }
+                    });
+                    temp_chain_record.forward_loops.at(node_i) = std::min( forward_loop,
+                        SnarlDistanceIndex::sum(temp_chain_record.forward_loops.at(node_i+1) , 
+                                                 2*last_node_length));
+                    node_i--;
+                }
+                last_node_length = temp_index.get_node(child).node_length;
+            }
+        }
+
+
+        //If this is a looping chain, check if the loop distances can be improved by going around the chain
+
+        if (temp_chain_record.start_node_id == temp_chain_record.end_node_id && temp_chain_record.children.size() > 1) {
+
+
+            //Also check if the reverse loop values would be improved if we went around again
+
+            if (temp_chain_record.backward_loops.back() < temp_chain_record.backward_loops.front()) {
+                temp_chain_record.backward_loops[0] = temp_chain_record.backward_loops.back();
+                size_t node_i = 1;
+                size_t last_node_length = 0;
+                for (size_t i = 1 ; i < temp_chain_record.children.size()-1 ; i++ ) {
+                    auto& child = temp_chain_record.children.at(i);
+                    if (child.first == SnarlDistanceIndex::TEMP_SNARL) {
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(child);
+                        size_t new_loop_distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+                                                      temp_chain_record.backward_loops.at(node_i-1), 
+                                                      2*temp_snarl_record.min_length), 
+                                                      2*temp_snarl_record.start_node_length); 
+                        if (temp_chain_record.chain_components.at(node_i)!= 0 || new_loop_distance >= temp_chain_record.backward_loops.at(node_i)) {
+                            //If this is a new chain component or it doesn't improve, stop
+                            break;
+                        } else {
+                            //otherwise record the better distance
+                            temp_chain_record.backward_loops.at(node_i) = new_loop_distance;
+
+                        }
+                        node_i++;
+                        last_node_length = 0;
+                    } else {
+                        if (last_node_length != 0) {
+                            size_t new_loop_distance = SnarlDistanceIndex::sum(temp_chain_record.backward_loops.at(node_i-1), 
+                                    2*last_node_length); 
+                            size_t old_loop_distance = temp_chain_record.backward_loops.at(node_i);
+                            temp_chain_record.backward_loops.at(node_i) = std::min(old_loop_distance,new_loop_distance);
+                            node_i++;
+                        }
+                        last_node_length = temp_index.get_node(child).node_length;
+                    }
+                }
+            }
+            if (temp_chain_record.forward_loops.front() < temp_chain_record.forward_loops.back()) {
+                //If this is a looping chain and looping improves the forward loops, 
+                //then we have to keep going around to update distance
+
+                temp_chain_record.forward_loops.back() = temp_chain_record.forward_loops.front();
+                size_t last_node_length = 0;
+                node_i = temp_chain_record.prefix_sum.size() - 2;
+                for (int j = (int)temp_chain_record.children.size() - 1 ; j >= 0 ; j--) {
+                    auto& child = temp_chain_record.children.at(j);
+                    if (child.first == SnarlDistanceIndex::TEMP_SNARL){
+                        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(child);
+                        size_t new_distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+                                                temp_chain_record.forward_loops.at(node_i+1), 
+                                                2* temp_snarl_record.min_length),
+                                                2*temp_snarl_record.end_node_length);
+                        if (temp_chain_record.chain_components.at(node_i) != temp_chain_record.chain_components.at(node_i+1) ||
+                            new_distance >= temp_chain_record.forward_loops.at(node_i)){
+                            //If this is a new component or the distance doesn't improve, stop looking
+                            break;
+                        } else {
+                            //otherwise, update the distance
+                            temp_chain_record.forward_loops.at(node_i) = new_distance;
+                        }
+                        node_i --;
+                        last_node_length =0;
+                    } else {
+                        if (last_node_length != 0) {
+                            size_t new_distance = SnarlDistanceIndex::sum(temp_chain_record.forward_loops.at(node_i+1) , 2* last_node_length);
+                            size_t old_distance = temp_chain_record.forward_loops.at(node_i);
+                            temp_chain_record.forward_loops.at(node_i) = std::min(old_distance, new_distance);
+                            node_i--;
+                        }
+                        last_node_length = temp_index.get_node(child).node_length;
+                    }
+                } 
+            }
+        }
+
+        temp_index.max_distance = std::max(temp_index.max_distance, temp_chain_record.max_prefix_sum.back());
+        temp_index.max_distance = temp_chain_record.forward_loops.back() == std::numeric_limits<size_t>::max() ? temp_index.max_distance : std::max(temp_index.max_distance, temp_chain_record.forward_loops.back());
+        temp_index.max_distance = temp_chain_record.backward_loops.front() == std::numeric_limits<size_t>::max() ? temp_index.max_distance : std::max(temp_index.max_distance, temp_chain_record.backward_loops.front());
+        assert(temp_index.max_distance <= 2742664019);
+
+    }
+
+#ifdef debug_distance_indexing
+    cerr << "Filling in the distances in root snarls and distances along chains" << endl;
+#endif
+    for (SnarlDistanceIndex::temp_record_ref_t& component_index : temp_index.components) {
+        if (component_index.first == SnarlDistanceIndex::TEMP_SNARL) {
+            SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index.get_snarl(component_index);
+            populate_snarl_index(temp_index, component_index, size_limit, only_top_level_chain_distances, graph);
+            temp_snarl_record.min_length = std::numeric_limits<size_t>::max();
+        }
+    }
+    temp_index.root_structure_count = temp_index.components.size();
+#ifdef debug_distance_indexing
+    assert(temp_index.components.size() == temp_index.root_structure_count);
+    cerr << "Finished temp index with " << temp_index.root_structure_count << " connected components" << endl;
+#endif
+    return temp_index;
+}
+
+/**
+ * Populate a row of the distance matrix.
+ * Also responsible for filling in min_length, distance_start_start, and distance_start_end on the TemporarySnarlRecord when a distance matrix is used.
+ */
+static void populate_distance_matrix_row(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const SnarlDistanceIndex::temp_record_ref_t& start_index, const HandleGraph* graph, size_t start_rank, bool is_internal_node, size_t size_limit); 
+
+/** 
+ * Fills in required distance matrix rows for each child
+ * - Normal snarl: all rows
+ * - Oversized snarl: boundaries and tips
+ * - size_limit == 0: no distances in index, so no rows
+ * - Top-level chain distances only: ??? 
+ */
+static void populate_distance_matrix_if_needed(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph, size_t size_limit, bool only_top_level_chain_distances); 
+
+/**
+ * Does three things:
+ * - Builds temp graph that hub labels will be built on
+ * - Builds the hub labels
+ * - Stores labels in temp_snarl_record
+ */
+static void populate_hub_labeling(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph);
+
+/**
+ * Determine if a snarl is regular or not.
+ *
+ * A regular snarl is a snarl that consists of only nodes or
+ * chains connected to the start and end, without any connections between
+ * multiple children, or any way to turn around. There may be an edge directly
+ * across.
+ *
+ * A simple snarl is always regular.
+ */
+bool check_regularity(const SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph);
+
+// ---------------------------------------------------------------------------
+// Phase helpers for populate_snarl_index (all file-static)
+// ---------------------------------------------------------------------------
+
+// Step 1: Walk up the snarl tree from curr_index until we find the direct
+// child of ancestor_snarl_index that contains curr_index.
+static SnarlDistanceIndex::temp_record_ref_t ancestor_of_node_in_snarl(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        SnarlDistanceIndex::temp_record_ref_t curr_index,
+        SnarlDistanceIndex::temp_record_ref_t ancestor_snarl_index) {
+
+    const auto& snarl = temp_index.get_snarl(ancestor_snarl_index);
+    if (curr_index.second == snarl.start_node_id ||
+        curr_index.second == snarl.end_node_id) {
+        return curr_index;
+    }
+
+    SnarlDistanceIndex::temp_record_ref_t parent_index = temp_index.get_node(curr_index).parent;
+    while (parent_index != ancestor_snarl_index) {
+        curr_index = parent_index;
+        parent_index = parent_index.first == SnarlDistanceIndex::TEMP_SNARL
+                        ? temp_index.get_snarl(parent_index).parent
+                        : temp_index.get_chain(parent_index).parent;
+#ifdef debug_distance_indexing
+        assert(parent_index.first != SnarlDistanceIndex::TEMP_ROOT);
+#endif
+    }
+    return curr_index;
+}
+
+// Step 2a: Return the handle pointing out from child_index in the given
+// traversal direction (reversed=false → forward/end side; reversed=true →
+// backward/start side).
+static handle_t child_boundary_handle(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        const SnarlDistanceIndex::temp_record_ref_t& child_index,
+        bool reversed,
+        const HandleGraph* graph) {
+
+    if (child_index.first == SnarlDistanceIndex::TEMP_NODE) {
+        return graph->get_handle(child_index.second, reversed);
+    } else if (reversed) {
+        return graph->get_handle(temp_index.get_chain(child_index).start_node_id,
+                                 !temp_index.get_chain(child_index).start_node_rev);
+    } else {
+        return graph->get_handle(temp_index.get_chain(child_index).end_node_id,
+                                  temp_index.get_chain(child_index).end_node_rev);
+    }
+}
+
+// Step 2b: Determine the traversal direction of child_index when entered via
+// graph_handle.
+static bool child_side_reversed(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        const SnarlDistanceIndex::temp_record_ref_t& child_index,
+        handle_t graph_handle,
+        const HandleGraph* graph) {
+
+    if (child_index.first == SnarlDistanceIndex::TEMP_NODE ||
+        temp_index.get_chain(child_index).is_trivial) {
+        return graph->get_is_reverse(graph_handle);
+    }
+    return graph->get_id(graph_handle) == temp_index.get_chain(child_index).end_node_id;
+}
+
+// ---------------------------------------------------------------------------
+// SnarlChildGraph implementation
+// ---------------------------------------------------------------------------
+
+SnarlChildGraph::SnarlChildGraph(
+        TempIndex& temp_index,
+        temp_record_ref_t snarl_index,
+        std::span<const temp_record_ref_t> children,
+        const handlegraph::HandleGraph* graph)
+    : temp_index_(temp_index)
+    , snarl_index_(snarl_index)
+    , children_(children)
+    , graph_(graph) {}
+
+std::span<const SnarlChildGraph::temp_record_ref_t>
+SnarlChildGraph::children() const noexcept {
+    return children_;
+}
+
+std::pair<SnarlChildGraph::temp_record_ref_t, bool>
+SnarlChildGraph::boundary(bool start) const {
+    const auto& snarl = temp_index_.get_snarl(snarl_index_);
+    if (start) {
+        return {{SnarlDistanceIndex::TEMP_NODE, snarl.start_node_id}, snarl.start_node_rev};
+    }
+    return {{SnarlDistanceIndex::TEMP_NODE, snarl.end_node_id}, snarl.end_node_rev};
+}
+
+void SnarlChildGraph::for_each_outgoing(
+        temp_record_ref_t child,
+        bool go_left,
+        const std::function<void(temp_record_ref_t neighbor,
+                                 bool neighbor_rev,
+                                 size_t edge_distance,
+                                 handlegraph::nid_t arriving_node_id)>& callback) const {
+
+    handle_t out_handle = child_boundary_handle(temp_index_, child, go_left, graph_);
+
+    graph_->follow_edges(out_handle, false, [&](const handle_t& next_handle) {
+        handlegraph::nid_t arriving_nid = graph_->get_id(next_handle);
+        temp_record_ref_t next_node = {SnarlDistanceIndex::TEMP_NODE, arriving_nid};
+        temp_record_ref_t neighbor  = ancestor_of_node_in_snarl(temp_index_, next_node, snarl_index_);
+        bool neighbor_rev           = child_side_reversed(temp_index_, neighbor, next_handle, graph_);
+
+        size_t edge_distance;
+        if (neighbor.first == SnarlDistanceIndex::TEMP_NODE) {
+            edge_distance = graph_->get_length(next_handle);
+        } else {
+            const auto& chain = temp_index_.get_chain(neighbor);
+            edge_distance = chain.min_length;
+            if (chain.chain_components.back() != 0) {
+                edge_distance = std::numeric_limits<size_t>::max();
+            }
+        }
+
+        callback(neighbor, neighbor_rev, edge_distance, arriving_nid);
+        return true;
+    });
+}
+
+// Phase 1: Mark tip nodes and set is_simple=false if any tip is found.
+static void identify_tips(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record,
+        const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children,
+        const HandleGraph* graph) {
+
+    for (const auto& child : all_children) {
+        if (child.first != SnarlDistanceIndex::TEMP_NODE
+            || (child.second != temp_snarl_record.start_node_id
+                && child.second != temp_snarl_record.end_node_id)) {
+            bool is_node = (child.first == SnarlDistanceIndex::TEMP_NODE);
+            nid_t node_id = is_node ? child.second
+                                    : temp_index.temp_chain_records.at(child.second).end_node_id;
+            size_t rank = is_node ? temp_index.temp_node_records.at(child.second - temp_index.min_node_id).rank_in_parent
+                                  : temp_index.temp_chain_records.at(child.second).rank_in_parent;
+            bool is_reverse = is_node ? false
+                                      : temp_index.temp_chain_records.at(child.second).end_node_rev;
+            rank -= 2;
+
+            bool has_edges = false;
+            graph->follow_edges(graph->get_handle(node_id, is_reverse), false, [&](const handle_t next_handle) {
+                has_edges = true;
+            });
+            if (!has_edges) {
+                temp_index.temp_node_records.at(node_id - temp_index.min_node_id).is_tip = true;
+                temp_snarl_record.tippy_child_ranks.emplace(rank, false);
+                temp_snarl_record.is_simple = false;
+            }
+            node_id = is_node ? child.second
+                              : temp_index.temp_chain_records.at(child.second).start_node_id;
+            is_reverse = is_node ? true
+                                 : !temp_index.temp_chain_records.at(child.second).start_node_rev;
+            has_edges = false;
+            graph->follow_edges(graph->get_handle(node_id, is_reverse), false, [&](const handle_t next_handle) {
+                has_edges = true;
+            });
+            if (!has_edges) {
+                temp_index.temp_node_records.at(node_id - temp_index.min_node_id).is_tip = true;
+                temp_snarl_record.tippy_child_ranks.emplace(rank, true);
+                temp_snarl_record.is_simple = false;
+            }
+        }
+    }
+}
+
+// Phase 2a: BFS topological sort of children; returns new-to-old rank mapping.
+// TODO: For non-DAGs this sort will end up arbitrary. That doesn't matter
+//       since the only consumer of ranks (ziptrees) expects arbitrary ranks.
+static vector<size_t> topo_sort_children(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        const SnarlDistanceIndex::temp_record_ref_t& snarl_index,
+        const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record,
+        std::span<const SnarlDistanceIndex::temp_record_ref_t> all_children,
+        const HandleGraph* graph) {
+
+    SnarlChildGraph child_graph(temp_index, snarl_index, all_children, graph);
+
+    vector<size_t> topological_sort_order;
+    topological_sort_order.reserve(all_children.size());
+
+    unordered_set<size_t> visited_ranks;
+    visited_ranks.reserve(all_children.size());
+
+    vector<pair<size_t, bool>> source_nodes;
+
+    // Add tips as sources. Tips push before the start sentinel so sentinel pops first (LIFO).
+    for (const auto& tip : temp_snarl_record.tippy_child_ranks) {
+        source_nodes.emplace_back(tip.first, !tip.second);
+    }
+    // Start node dummy rank is max() — pops first as LIFO sentinel.
+    source_nodes.emplace_back(std::numeric_limits<size_t>::max(), false);
+
+    while (!source_nodes.empty()) {
+        pair<size_t, bool> current_child_index = source_nodes.back();
+        source_nodes.pop_back();
+
+        if (visited_ranks.count(current_child_index.first) != 0) {
+            // Revisiting a source means we hit a loop; abort with arbitrary ranks.
+            break;
+        }
+        if (current_child_index.first != std::numeric_limits<size_t>::max()) {
+            topological_sort_order.emplace_back(current_child_index.first);
+        }
+        visited_ranks.emplace(current_child_index.first);
+
+        // Determine which child (or sentinel start boundary) to follow edges from.
+        // For the sentinel, use the snarl's start boundary node in its stored orientation,
+        // which produces the same handle as topological_sort_start in the original code.
+        SnarlDistanceIndex::temp_record_ref_t current_ref;
+        bool go_left;
+        if (current_child_index.first == std::numeric_limits<size_t>::max()) {
+            current_ref = {SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.start_node_id};
+            go_left     = temp_snarl_record.start_node_rev;
+        } else {
+            current_ref = all_children[current_child_index.first];
+            go_left     = current_child_index.second;
+        }
+
+        child_graph.for_each_outgoing(current_ref, go_left, [&](
+                SnarlDistanceIndex::temp_record_ref_t neighbor,
+                bool neighbor_rev,
+                size_t /*edge_distance*/,
+                handlegraph::nid_t /*arriving_nid*/) {
+#ifdef debug_distance_indexing
+            cerr << "Following forward edges to "
+                 << temp_index.structure_start_end_as_string(neighbor) << endl;
+#endif
+            // Skip snarl boundaries.
+            if (neighbor.first == SnarlDistanceIndex::TEMP_NODE &&
+                (neighbor.second == temp_snarl_record.start_node_id ||
+                 neighbor.second == temp_snarl_record.end_node_id)) {
+                return;
+            }
+            size_t next_rank = neighbor.first == SnarlDistanceIndex::TEMP_NODE
+                        ? temp_index.get_node(neighbor).rank_in_parent
+                        : temp_index.get_chain(neighbor).rank_in_parent;
+            assert(next_rank >= 2);
+            next_rank -= 2;
+            assert(all_children[next_rank] == neighbor);
+            if (visited_ranks.count(next_rank) != 0) {
+                return;
+            }
+
+            // Check if neighbor is a topological source (no unvisited predecessors).
+            bool is_source = true;
+            child_graph.for_each_outgoing(neighbor, !neighbor_rev, [&](
+                    SnarlDistanceIndex::temp_record_ref_t incoming,
+                    bool /*incoming_rev*/,
+                    size_t /*edge_distance*/,
+                    handlegraph::nid_t /*arriving_nid*/) {
+#ifdef debug_distance_indexing
+                cerr << "Getting backwards edge from "
+                     << temp_index.structure_start_end_as_string(incoming) << endl;
+#endif
+                if (incoming.first == SnarlDistanceIndex::TEMP_NODE &&
+                    (incoming.second == temp_snarl_record.start_node_id ||
+                     incoming.second == temp_snarl_record.end_node_id)) {
+                    return;
+                }
+                size_t incoming_rank = incoming.first == SnarlDistanceIndex::TEMP_NODE
+                            ? temp_index.get_node(incoming).rank_in_parent
+                            : temp_index.get_chain(incoming).rank_in_parent;
+                assert(incoming_rank >= 2);
+                incoming_rank -= 2;
+                if (visited_ranks.count(incoming_rank) == 0) {
+                    is_source = false;
+                }
+            });
+            if (is_source) {
+                source_nodes.emplace_back(next_rank, neighbor_rev);
+            }
+        });
+    }
+
+    // Non-DAG fallback: append any ranks not yet visited in arbitrary order.
+    vector<bool> check_ranks(all_children.size(), false);
+    for (size_t x : topological_sort_order) {
+        check_ranks[x] = true;
+    }
+    for (size_t i = 0; i < check_ranks.size(); i++) {
+        if (!check_ranks[i]) {
+            topological_sort_order.emplace_back(i);
+        }
+    }
+    assert(topological_sort_order.size() == all_children.size());
+    return topological_sort_order;
+}
+
+// Phase 2b: Apply the topo-sort permutation: update rank_in_parent for every
+// child and rebuild tippy_child_ranks with new ranks.
+static void apply_topo_permutation(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record,
+        std::span<const SnarlDistanceIndex::temp_record_ref_t> all_children,
+        const vector<size_t>& new_to_old) {
+
+    auto old_tippy_ranks = temp_snarl_record.tippy_child_ranks;
+    temp_snarl_record.tippy_child_ranks.clear();
+    for (size_t new_rank = 0; new_rank < new_to_old.size(); new_rank++) {
+        size_t old_rank = new_to_old[new_rank];
+        if (all_children[old_rank].first == SnarlDistanceIndex::TEMP_NODE) {
+            temp_index.get_node(all_children[old_rank]).rank_in_parent = new_rank + 2;
+        } else {
+            temp_index.get_chain(all_children[old_rank]).rank_in_parent = new_rank + 2;
+        }
+        const auto& old_is_tip = old_tippy_ranks.find(old_rank);
+        if (old_is_tip != old_tippy_ranks.end()) {
+            temp_snarl_record.tippy_child_ranks.emplace(new_rank, old_is_tip->second);
+        }
+    }
+}
+
+// Phase 3: Compute snarl distances (normal or oversized hub-label path).
+// Appends boundary nodes to all_children (unless is_root_snarl).
+static void compute_snarl_distances(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        const SnarlDistanceIndex::temp_record_ref_t& snarl_index,
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record,
+        vector<SnarlDistanceIndex::temp_record_ref_t>& all_children,
+        const HandleGraph* graph,
+        size_t size_limit,
+        bool only_top_level_chain_distances) {
+
+    if (!temp_snarl_record.is_root_snarl) {
+        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.start_node_id);
+        all_children.emplace_back(SnarlDistanceIndex::TEMP_NODE, temp_snarl_record.end_node_id);
+    }
+
+    if (size_limit != 0 && temp_snarl_record.node_count > size_limit) {
+        temp_index.most_oversized_snarl_size = std::max(temp_index.most_oversized_snarl_size, temp_snarl_record.node_count);
+        temp_index.use_oversized_snarls = true;
+        temp_snarl_record.is_simple = false;
+        populate_hub_labeling(temp_index, snarl_index, temp_snarl_record, all_children, graph);
+
+        // Query hub labeling for connectivity distances (excluding boundary lengths).
+        // Start is always child rank 0 forward, end is always child rank 1 forward.
+        temp_snarl_record.min_length = promote_distance<size_t>(hhl_query(temp_snarl_record.hub_labels.begin(), bgid(0, false, true), bgid(1, false, false)));
+        temp_snarl_record.distance_start_start = promote_distance<size_t>(hhl_query(temp_snarl_record.hub_labels.begin(), bgid(0, false, true), bgid(0, true, false)));
+        temp_snarl_record.distance_end_end = promote_distance<size_t>(hhl_query(temp_snarl_record.hub_labels.begin(), bgid(1, true, true), bgid(1, false, false)));
+        // TODO: Should this be here or should it be part of populate_hub_labeling()? Or its own function?
+    } else {
+        if (size_limit == 0 || only_top_level_chain_distances) {
+            temp_snarl_record.include_distances = false;
+        }
+        // Also fills in min_length, distance_start_start, distance_start_end, sets is_simple=false if not simple.
+        populate_distance_matrix_if_needed(temp_index, snarl_index, temp_snarl_record, all_children, graph, size_limit, only_top_level_chain_distances);
+    }
+}
+
+// Phase 4: For simple snarls, record child node orientations.
+// IMPORTANT: iterates temp_snarl_record.children[0..node_count), NOT all_children —
+// boundary nodes appended in Phase 3 must not be included here.
+static void mark_simple_snarl_orientations(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record) {
+
+    for (size_t i = 0; i < temp_snarl_record.node_count; i++) {
+        const SnarlDistanceIndex::temp_record_ref_t& child_index = temp_snarl_record.children[i];
+#ifdef debug_distance_indexing
+        assert(child_index.first == SnarlDistanceIndex::TEMP_NODE);
+#endif
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record =
+            temp_index.get_node(child_index);
+        temp_node_record.reversed_in_parent =
+            temp_node_record.distance_left_start == std::numeric_limits<size_t>::max();
+    }
+}
+
+// Phase 5: Regularity check and index-size accounting.
+static void finalize_snarl_record(
+        SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+        const SnarlDistanceIndex::temp_record_ref_t& snarl_index,
+        SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record,
+        const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children,
+        const HandleGraph* graph) {
+
+    temp_snarl_record.is_regular = check_regularity(temp_index, snarl_index, temp_snarl_record, all_children, graph);
+
+    temp_index.max_index_size += temp_snarl_record.get_max_record_length();
+    if (temp_snarl_record.is_simple) {
+        temp_index.max_index_size -= (temp_snarl_record.children.size() *
+            SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length());
+    }
+    temp_index.max_bits = std::max(temp_index.max_bits,
+        22 + SnarlDistanceIndex::bit_width(temp_snarl_record.children.size()));
+}
+
+/**
+ * Fill in the snarl index.
+ * The index will already know its boundaries and everything knows their relationships in the
+ * snarl tree. This needs to fill in the distances and the ranks of children in the snarl
+ * The rank of a child is arbitrary, except that the start node will always be 0 and the end node
+ * will always be the node count+1 (since node count doesn't count the boundary nodes)
+ */
+void populate_snarl_index(
+                SnarlDistanceIndex::TemporaryDistanceIndex& temp_index,
+                SnarlDistanceIndex::temp_record_ref_t snarl_index, size_t size_limit,
+                bool only_top_level_chain_distances, const HandleGraph* graph) {
+#ifdef debug_distance_indexing
+    cerr << "Getting the distances for snarl " << temp_index.structure_start_end_as_string(snarl_index) << endl;
+    assert(snarl_index.first == SnarlDistanceIndex::TEMP_SNARL);
+#endif
+    auto& temp_snarl_record = temp_index.get_snarl(snarl_index);
+    temp_snarl_record.is_simple = true;
+
+    vector<SnarlDistanceIndex::temp_record_ref_t> all_children = temp_snarl_record.children;
+
+    identify_tips(temp_index, temp_snarl_record, all_children, graph);
+
+    if (!temp_snarl_record.is_root_snarl) {
+        auto new_to_old = topo_sort_children(temp_index, snarl_index, temp_snarl_record, all_children, graph);
+        apply_topo_permutation(temp_index, temp_snarl_record, all_children, new_to_old);
+    }
+
+    compute_snarl_distances(temp_index, snarl_index, temp_snarl_record, all_children, graph,
+                            size_limit, only_top_level_chain_distances);
+
+#ifdef debug_distance_indexing
+    cerr << "snarl " << temp_index.structure_start_end_as_string(snarl_index) << " is_simple: " << temp_snarl_record.is_simple << endl;
+#endif
+
+    if (temp_snarl_record.is_simple) {
+        mark_simple_snarl_orientations(temp_index, temp_snarl_record);
+    }
+
+    finalize_snarl_record(temp_index, snarl_index, temp_snarl_record, all_children, graph);
+}
+
+
+void populate_hub_labeling(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph) {
+  CHOverlay ov = make_boost_graph(temp_index, snarl_index, temp_snarl_record, all_children, graph);
+
+#ifdef debug_hub_label_build
+  // Dump CHOverlay graph to stderr for debugging
+  std::cerr << "=== CHOverlay Graph Dump ===" << std::endl;
+  std::cerr << ov << std::endl;
+  std::cerr << "=== End CHOverlay Dump ===" << std::endl;
+#endif
+
+  make_contraction_hierarchy(ov);
+
+  vector<vector<HubRecord>> labels; labels.resize(num_vertices(ov));
+  vector<vector<HubRecord>> labels_rev; labels_rev.resize(num_vertices(ov)); 
+  create_labels(labels, labels_rev, ov);
+#ifdef debug_hub_label_storage
+  std::cerr << "Hub labels unpacked:" << std::endl;
+  for (const auto& node_list : {labels, labels_rev}) {
+    std::cerr << "Labels for all nodes:" << std::endl;
+    for (size_t i = 0; i < node_list.size(); i++) {
+        std::cerr << "\tLabels for rank " << i << ":" << std::endl;
+        for (const HubRecord& label : node_list[i]) {
+            std::cerr << "\t\tHub: " << label.hub << " Dist: " << label.dist << std::endl; 
+        }
+    }
+  }
+#endif
+  
+  // Put labels in temp_snarl_record
+  temp_snarl_record.hub_labels = pack_labels(labels, labels_rev);
+#ifdef debug_hub_label_storage
+  std::cerr << "Hub labels as packed: ";
+  for (size_t i = 0; i < temp_snarl_record.hub_labels.size(); i++) {
+    if (i > 0) {
+        std::cerr << " | ";
+    }
+    std::cerr << temp_snarl_record.hub_labels[i];
+  }
+  std::cerr << std::endl;
+#endif
+}
+
+void populate_distance_matrix_if_needed(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph, size_t size_limit, bool only_top_level_chain_distances) {
+    if (size_limit != 0 && !only_top_level_chain_distances) { 
+      //If we are saving distances
+      //Reserve enough space to store all possible distances
+      temp_snarl_record.distances.reserve( temp_snarl_record.node_count > size_limit
+              ? temp_snarl_record.node_count * 2
+              : temp_snarl_record.node_count * temp_snarl_record.node_count);
+    } else {
+      temp_snarl_record.include_distances = false;
+    }
+    for (auto it = all_children.rbegin(); it != all_children.rend(); ++it) {
+        // Visit all the children in reverse order
+        const SnarlDistanceIndex::temp_record_ref_t& start_index = *it;
+
+        bool is_internal_node = false;
+
+        if ((start_index.first == SnarlDistanceIndex::TEMP_NODE 
+             && start_index.second != temp_snarl_record.start_node_id 
+             && start_index.second != temp_snarl_record.end_node_id) 
+            || 
+            (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && temp_index.get_chain(start_index).is_trivial)) {
+            // If this is an internal node
+            is_internal_node = true;
+            nid_t node_id = start_index.first == SnarlDistanceIndex::TEMP_NODE ? start_index.second : temp_index.get_chain(start_index).start_node_id;
+            SnarlDistanceIndex::temp_record_ref_t node_index {SnarlDistanceIndex::TEMP_NODE, node_id};
+            size_t rank = start_index.first == SnarlDistanceIndex::TEMP_NODE ? temp_index.get_node(start_index).rank_in_parent
+                                                          : temp_index.get_chain(start_index).rank_in_parent;
+
+            bool has_edges = false;
+            graph->follow_edges(graph->get_handle(node_id, false), false, [&](const handle_t& next_handle) {
+                has_edges = true;
+            });
+            if (!has_edges) {
+                temp_index.get_node(node_index).is_tip = true;
+                temp_snarl_record.tippy_child_ranks.emplace(rank, false);
+                temp_snarl_record.is_simple=false; //It is a tip so this isn't simple snarl
+            }
+            has_edges = false;
+            graph->follow_edges(graph->get_handle(node_id, true), false, [&](const handle_t& next_handle) {
+                has_edges = true;
+            });
+            if (!has_edges) {
+                temp_index.get_node(node_index).is_tip = true;
+                temp_snarl_record.tippy_child_ranks.emplace(rank, true);
+                temp_snarl_record.is_simple=false; //It is a tip so this isn't simple snarl
+            }
+        } else if (start_index.first == SnarlDistanceIndex::TEMP_CHAIN && !temp_index.get_chain(start_index).is_trivial) {
+            // If this is an internal chain, then it isn't a simple snarl
+            temp_snarl_record.is_simple=false;
+        }
+
+        bool start_is_tip = start_index.first == SnarlDistanceIndex::TEMP_NODE 
+                      ? temp_index.get_node(start_index).is_tip 
+                      : temp_index.get_chain(start_index).is_tip;
+
+        size_t start_rank = start_index.first == SnarlDistanceIndex::TEMP_NODE 
+                ? temp_index.get_node(start_index).rank_in_parent
+                : temp_index.get_chain(start_index).rank_in_parent;
+
+
+        if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.start_node_id) {
+            start_rank = 0;
+        } else if (start_index.first == SnarlDistanceIndex::TEMP_NODE && start_index.second == temp_snarl_record.end_node_id) {
+            start_rank = 1;
+        } //TODO:
+          //else {
+          //  assert(start_rank != 0 && start_rank != 1);
+          //} 
+
+        //traversal start is not a tip or a boundary node
+        bool start_normal_child = (!start_is_tip && start_rank != 0 && start_rank != 1);
+ 
+        if ( (temp_snarl_record.node_count > size_limit || size_limit == 0 || only_top_level_chain_distances) && (temp_snarl_record.is_root_snarl || start_normal_child)) {
+            //If we don't care about internal distances, and we also are not at a boundary or tip
+            //TODO: Why do we care about tips specifically?
+            continue;
+        }
+        //getting here means snarl is not oversized
+        //fill in all distances for a row
+        populate_distance_matrix_row(temp_index, snarl_index, temp_snarl_record, start_index, graph, start_rank, is_internal_node, size_limit);   
+    }                                                                                                                    
+}      
+      
+    
+                        
+namespace {
+
+using temp_record_ref_t = SnarlDistanceIndex::temp_record_ref_t;
+using NetgraphNode      = pair<size_t, pair<temp_record_ref_t, bool>>;
+struct NetgraphCmp {
+    bool operator()(const NetgraphNode& a, const NetgraphNode& b) const {
+        return a.first > b.first;
+    }
+};
+using DijkstraQueue = priority_queue<NetgraphNode, vector<NetgraphNode>, NetgraphCmp>;
+using VisitedSet    = unordered_set<pair<temp_record_ref_t, bool>>;
+
+struct DistanceRowContext {
+    SnarlDistanceIndex::TemporaryDistanceIndex&                       temp_index;
+    temp_record_ref_t                                                 snarl_index;
+    SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& snarl_record;
+    temp_record_ref_t                                                 start_index;
+    size_t                                                            start_rank;
+    bool                                                              is_internal_node;
+    size_t                                                            size_limit;
+    SnarlChildGraph&                                                  child_graph;
+};
+
+struct NeighborSide {
+    size_t rank;
+    bool   reversed;
+    bool   is_boundary;
+};
+
+[[nodiscard]] vector<bool> starting_directions(const DistanceRowContext& ctx) {
+    vector<bool> directions;
+    if (ctx.start_index.first == SnarlDistanceIndex::TEMP_NODE &&
+        ctx.start_index.second == ctx.snarl_record.start_node_id) {
+        directions.emplace_back(ctx.snarl_record.start_node_rev);
+    } else if (ctx.start_index.first == SnarlDistanceIndex::TEMP_NODE &&
+               ctx.start_index.second == ctx.snarl_record.end_node_id) {
+        directions.emplace_back(!ctx.snarl_record.end_node_rev);
+    } else {
+        directions.emplace_back(true);
+        directions.emplace_back(false);
+    }
+    return directions;
+}
+
+void update_simplicity_on_edge(DistanceRowContext& ctx,
+        temp_record_ref_t current_index,
+        nid_t current_end_nid,
+        nid_t current_other_side_nid,
+        temp_record_ref_t next_index,
+        nid_t arriving_nid) {
+    if (arriving_nid == current_end_nid || arriving_nid == current_other_side_nid) {
+        ctx.snarl_record.is_simple = false;
+    } else if (!ctx.snarl_record.is_root_snarl && ctx.start_rank == 0 &&
+               current_index != ctx.start_index &&
+               !(next_index.first == SnarlDistanceIndex::TEMP_NODE &&
+                 next_index.second == ctx.snarl_record.end_node_id)) {
+        ctx.snarl_record.is_simple = false;
+    } else if (!ctx.snarl_record.is_root_snarl && ctx.start_rank == 1 &&
+               current_index != ctx.start_index &&
+               !(next_index.first == SnarlDistanceIndex::TEMP_NODE &&
+                 next_index.second == ctx.snarl_record.start_node_id)) {
+        ctx.snarl_record.is_simple = false;
+    }
+}
+
+[[nodiscard]] NeighborSide resolve_neighbor_side(const DistanceRowContext& ctx,
+        temp_record_ref_t next_index, bool next_rev) {
+    size_t rank;
+    if (next_index.first == SnarlDistanceIndex::TEMP_NODE &&
+        next_index.second == ctx.snarl_record.start_node_id) {
+        rank = 0;
+    } else if (next_index.first == SnarlDistanceIndex::TEMP_NODE &&
+               next_index.second == ctx.snarl_record.end_node_id) {
+        rank = 1;
+    } else {
+        rank = next_index.first == SnarlDistanceIndex::TEMP_NODE
+            ? ctx.temp_index.get_node(next_index).rank_in_parent
+            : ctx.temp_index.get_chain(next_index).rank_in_parent;
+    }
+    bool is_boundary = !ctx.snarl_record.is_root_snarl && (rank == 0 || rank == 1);
+    bool reversed    = is_boundary ? false : next_rev;
+    return NeighborSide{rank, reversed, is_boundary};
+}
+
+[[nodiscard]] bool record_distance(DistanceRowContext& ctx,
+        size_t current_distance,
+        bool start_rev,
+        NeighborSide next,
+        temp_record_ref_t next_index) {
+    bool start_is_boundary = !ctx.snarl_record.is_root_snarl &&
+                              (ctx.start_rank == 0 || ctx.start_rank == 1);
+
+    pair<size_t, bool> start_key = start_is_boundary
+        ? make_pair(ctx.start_rank, false) : make_pair(ctx.start_rank, !start_rev);
+    pair<size_t, bool> next_key  = next.is_boundary
+        ? make_pair(next.rank, false) : make_pair(next.rank, next.reversed);
+
+    if (ctx.size_limit == 0 && start_is_boundary && next.is_boundary) {
+        // If not measuring distances, we need to use
+        // distance_start_start and distance_end_end as
+        // connectivity flags so we can still detect reversals
+        // within chains and recognize regular snarls.
+        if (ctx.start_rank == 0 && next.rank == 0) {
+            ctx.snarl_record.distance_start_start = 0;
+#ifdef debug_distance_indexing
+            cerr << "        set loop indicator start start distance " << ctx.snarl_record.distance_start_start << endl;
+#endif
+        } else if (ctx.start_rank == 1 && next.rank == 1) {
+            ctx.snarl_record.distance_end_end = 0;
+#ifdef debug_distance_indexing
+            cerr << "        set loop indicator end end distance " << ctx.snarl_record.distance_start_start << endl;
+#endif
+        }
+        return false;
+    }
+
+    if (ctx.size_limit == 0 ||
+        !(ctx.snarl_record.node_count <= ctx.size_limit || start_is_boundary || next.is_boundary)) {
+        return false;
+    }
+
+    //If the snarl is too big, then we don't record distances between internal nodes
+    //If we are looking at all distances or we are looking at boundaries
+    bool added_new_distance = false;
+
+    if (start_is_boundary && next.is_boundary) {
+        //If it is between bounds of the snarl, then the snarl stores it
+        if (ctx.start_rank == 0 && next.rank == 0 &&
+            ctx.snarl_record.distance_start_start == std::numeric_limits<size_t>::max()) {
+            ctx.snarl_record.distance_start_start = current_distance;
+#ifdef debug_distance_indexing
+            cerr << "        set start start distance " << ctx.snarl_record.distance_start_start << endl;
+#endif
+            added_new_distance = true;
+        } else if (ctx.start_rank == 1 && next.rank == 1 &&
+                   ctx.snarl_record.distance_end_end == std::numeric_limits<size_t>::max()) {
+            ctx.snarl_record.distance_end_end = current_distance;
+#ifdef debug_distance_indexing
+            cerr << "        set end end distance " << ctx.snarl_record.distance_start_start << endl;
+#endif
+            added_new_distance = true;
+        } else if (((ctx.start_rank == 0 && next.rank == 1) || (ctx.start_rank == 1 && next.rank == 0)) &&
+                   ctx.snarl_record.min_length == std::numeric_limits<size_t>::max()) {
+            ctx.snarl_record.min_length = current_distance;
+            added_new_distance = true;
+        }
+    } else if (start_is_boundary) {
+        //If start is a boundary node, collapse TEMP_NODE/TEMP_CHAIN via generic lambda
+        auto assign_if_unset = [&](auto& rec) -> bool {
+            if (ctx.start_rank == 0 && !next.reversed &&
+                    rec.distance_left_start == std::numeric_limits<size_t>::max()) {
+                rec.distance_left_start = current_distance; return true;
+            } else if (ctx.start_rank == 0 && next.reversed &&
+                    rec.distance_right_start == std::numeric_limits<size_t>::max()) {
+                rec.distance_right_start = current_distance; return true;
+            } else if (ctx.start_rank == 1 && !next.reversed &&
+                    rec.distance_left_end == std::numeric_limits<size_t>::max()) {
+                rec.distance_left_end = current_distance; return true;
+            } else if (ctx.start_rank == 1 && next.reversed &&
+                    rec.distance_right_end == std::numeric_limits<size_t>::max()) {
+                rec.distance_right_end = current_distance; return true;
+            }
+            return false;
+        };
+        added_new_distance = next_index.first == SnarlDistanceIndex::TEMP_NODE
+            ? assign_if_unset(ctx.temp_index.get_node(next_index))
+            : assign_if_unset(ctx.temp_index.get_chain(next_index));
+    } else if (!next.is_boundary &&
+               !ctx.snarl_record.distances.count(make_pair(start_key, next_key))) {
+        //Otherwise the snarl stores it in its distance
+        //If the distance isn't from an internal node to a bound and we haven't stored the distance yet
+        ctx.snarl_record.distances[make_pair(start_key, next_key)] = current_distance;
+        added_new_distance = true;
+#ifdef debug_distance_indexing
+        cerr << "           Adding distance between ranks " << start_key.first << " " << start_key.second
+             << " and " << next_key.first << " " << next_key.second << ": " << current_distance << endl;
+#endif
+    }
+
+    if (added_new_distance) {
+        ctx.snarl_record.max_distance = std::max(ctx.snarl_record.max_distance, current_distance);
+    }
+    return added_new_distance;
+}
+
+void enqueue_relaxations(DijkstraQueue& queue, VisitedSet& visited_nodes,
+        DistanceRowContext& ctx,
+        size_t current_distance,
+        temp_record_ref_t next_index,
+        bool next_rev,
+        nid_t arriving_nid,
+        size_t edge_distance) {
+    if (visited_nodes.count(make_pair(next_index, next_rev)) == 0 &&
+        arriving_nid != ctx.snarl_record.start_node_id &&
+        arriving_nid != ctx.snarl_record.end_node_id) {
+        //If this isn't leaving the snarl,
+        //then add the next node to the queue, along with the distance to traverse it
+        // edge_distance already encodes chain.min_length (∞ if disconnected).
+        if (edge_distance != std::numeric_limits<size_t>::max()) {
+            queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, edge_distance),
+                           make_pair(next_index, next_rev)));
+        }
+    }
+    if (next_index.first == SnarlDistanceIndex::TEMP_CHAIN) {
+        size_t loop_distance = next_rev
+            ? ctx.temp_index.get_chain(next_index).backward_loops.back()
+            : ctx.temp_index.get_chain(next_index).forward_loops.front();
+        if (loop_distance != std::numeric_limits<size_t>::max() &&
+            visited_nodes.count(make_pair(next_index, !next_rev)) == 0 &&
+            arriving_nid != ctx.snarl_record.start_node_id &&
+            arriving_nid != ctx.snarl_record.end_node_id) {
+            //If the next node can loop back on itself, then add the next node in the opposite direction
+            const auto& nchain = ctx.temp_index.get_chain(next_index);
+            nid_t boundary_id = next_rev ? nchain.end_node_id : nchain.start_node_id;
+            size_t boundary_len = ctx.temp_index.get_node(
+                {SnarlDistanceIndex::TEMP_NODE, boundary_id}).node_length;
+            size_t next_node_len = loop_distance + 2 * boundary_len;
+            queue.push(make_pair(SnarlDistanceIndex::sum(current_distance, next_node_len),
+                           make_pair(next_index, !next_rev)));
+        }
+    }
+}
+
+void run_dijkstra_from_side(DistanceRowContext& ctx, bool start_rev) {
+    //Start a dijkstra traversal from start_index going in the direction indicated by start_rev
+    //Record the distances to each node (child of the snarl) found
+    size_t reachable_node_count = 0; //How many nodes can we reach from this node side?
+
+#ifdef debug_distance_indexing
+    cerr << "  Starting from child " << ctx.temp_index.structure_start_end_as_string(ctx.start_index)
+         << " going " << (start_rev ? "rev" : "fd") << endl;
+#endif
+
+    DijkstraQueue queue;
+    VisitedSet    visited_nodes;
+    visited_nodes.reserve(ctx.snarl_record.node_count * 2);
+
+    queue.push(make_pair(0, make_pair(ctx.start_index, start_rev)));
+
+    while (!queue.empty()) {
+
+        //Get the current node from the queue and pop it out of the queue
+        size_t current_distance        = queue.top().first;
+        temp_record_ref_t current_index = queue.top().second.first;
+        bool current_rev               = queue.top().second.second;
+        if (visited_nodes.count(queue.top().second)) {
+            queue.pop();
+            continue;
+        }
+        visited_nodes.emplace(queue.top().second);
+        queue.pop();
+
+        // Pre-compute the current child's outgoing node ID and "other side" node ID
+        // for is_simple detection (mirrors the original current_end_handle checks).
+        nid_t current_end_nid, current_other_side_nid;
+        if (current_index.first == SnarlDistanceIndex::TEMP_NODE) {
+            current_end_nid        = current_index.second;
+            current_other_side_nid = current_index.second;
+        } else {
+            const auto& ccr = ctx.temp_index.get_chain(current_index);
+            current_end_nid        = current_rev ? ccr.start_node_id : ccr.end_node_id;
+            current_other_side_nid = current_rev ? ccr.end_node_id   : ccr.start_node_id;
+        }
+
+#ifdef debug_distance_indexing
+        cerr << "    at child " << ctx.temp_index.structure_start_end_as_string(current_index) << " going "
+             << (current_rev ? "rev" : "fd") << " outgoing from node " << current_end_nid << endl;
+#endif
+
+        ctx.child_graph.for_each_outgoing(current_index, current_rev, [&](
+                temp_record_ref_t next_index,
+                bool next_rev,
+                size_t edge_distance,
+                nid_t arriving_nid) {
+#ifdef debug_distance_indexing
+            cerr << "      see edge " << current_end_nid << " -> " << arriving_nid << endl;
+#endif
+            update_simplicity_on_edge(ctx, current_index, current_end_nid,
+                current_other_side_nid, next_index, arriving_nid);
+
+            reachable_node_count++;
+
+            NeighborSide next = resolve_neighbor_side(ctx, next_index, next_rev);
+            //If the next thing wasn't a boundary node and this was an internal node, then it isn't a simple snarl
+            if (!next.is_boundary && ctx.is_internal_node) {
+                ctx.snarl_record.is_simple = false;
+            }
+            //TODO: This won't be true of root snarls
+            //else { assert(next.rank != 0 && next.rank != 1); }
+
+#ifdef debug_distance_indexing
+            if (next.rank == 0) std::cerr << "        edge arrived at start" << std::endl;
+            else if (next.rank == 1) std::cerr << "        edge arrived at end" << std::endl;
+#endif
+
+            record_distance(ctx, current_distance, start_rev, next, next_index);
+
+            enqueue_relaxations(queue, visited_nodes, ctx, current_distance,
+                next_index, next_rev, arriving_nid, edge_distance);
+
+#ifdef debug_distance_indexing
+            cerr << "        reached child " << ctx.temp_index.structure_start_end_as_string(next_index) << " going "
+                 << (next_rev ? "rev" : "fd") << " with distance " << current_distance
+                 << " for ranks " << ctx.start_rank << " " << next.rank << endl;
+#endif
+        });
+    }
+    if (ctx.is_internal_node && reachable_node_count != 1) {
+        //If this is an internal node, then it must have only one edge for it to be a simple snarl
+        ctx.snarl_record.is_simple = false;
+    }
+}
+
+void finalize_internal_node_contribution(DistanceRowContext& ctx) {
+    /** Check the minimum length of the snarl passing through this node **/
+    if (ctx.start_rank == 0 || ctx.start_rank == 1) {
+        return;
+    }
+    size_t child_max_length = ctx.start_index.first == SnarlDistanceIndex::TEMP_NODE
+        ? ctx.temp_index.get_node(ctx.start_index).node_length
+        : ctx.temp_index.get_chain(ctx.start_index).max_length;
+    //The distance through the whole snarl traversing this node forwards
+    //(This might actually be traversing it backwards but it doesn't really matter)
+    size_t dist_start_left = ctx.start_index.first == SnarlDistanceIndex::TEMP_NODE
+        ? ctx.temp_index.get_node(ctx.start_index).distance_left_start
+        : ctx.temp_index.get_chain(ctx.start_index).distance_left_start;
+    size_t dist_end_right = ctx.start_index.first == SnarlDistanceIndex::TEMP_NODE
+        ? ctx.temp_index.get_node(ctx.start_index).distance_right_end
+        : ctx.temp_index.get_chain(ctx.start_index).distance_right_end;
+    size_t dist_start_right = ctx.start_index.first == SnarlDistanceIndex::TEMP_NODE
+        ? ctx.temp_index.get_node(ctx.start_index).distance_right_start
+        : ctx.temp_index.get_chain(ctx.start_index).distance_right_start;
+    size_t dist_end_left = ctx.start_index.first == SnarlDistanceIndex::TEMP_NODE
+        ? ctx.temp_index.get_node(ctx.start_index).distance_left_end
+        : ctx.temp_index.get_chain(ctx.start_index).distance_left_end;
+
+    size_t snarl_length_fd = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+            dist_start_left, dist_end_right), child_max_length);
+    //The same thing traversing this node backwards
+    size_t snarl_length_rev = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(
+            dist_start_right, dist_end_left), child_max_length);
+    //The max that isn't infinite
+    size_t max_length =
+        snarl_length_rev == std::numeric_limits<size_t>::max()
+        ? snarl_length_fd
+        : (snarl_length_fd == std::numeric_limits<size_t>::max()
+                ? snarl_length_rev
+                : std::max(snarl_length_rev, snarl_length_fd));
+    if (max_length != std::numeric_limits<size_t>::max()) {
+        ctx.snarl_record.max_length = std::max(ctx.snarl_record.max_length, max_length);
+    }
+    if (ctx.snarl_record.is_simple &&
+        !((dist_start_left == 0 && dist_end_right == 0 &&
+           dist_end_left == std::numeric_limits<size_t>::max() &&
+           dist_start_right == std::numeric_limits<size_t>::max()) ||
+          (dist_start_left == std::numeric_limits<size_t>::max() &&
+           dist_end_right == std::numeric_limits<size_t>::max() &&
+           dist_end_left == 0 && dist_start_right == 0))) {
+        //If the snarl is simple, double check that this node is actually simple: that it can only be traversed going
+        //across the snarl
+        ctx.snarl_record.is_simple = false;
+    }
+}
+
+} // anonymous namespace
+
+void populate_distance_matrix_row(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const SnarlDistanceIndex::temp_record_ref_t& start_index, const HandleGraph* graph, size_t start_rank, bool is_internal_node, size_t size_limit) {
+    // SnarlChildGraph encapsulates the follow_edges + ancestor-resolution kernel.
+    // Pass an empty children span — for_each_outgoing doesn't use it.
+    SnarlChildGraph child_graph(temp_index, snarl_index,
+                                std::span<const SnarlDistanceIndex::temp_record_ref_t>{}, graph);
+    DistanceRowContext ctx{temp_index, snarl_index, temp_snarl_record, start_index,
+                           start_rank, is_internal_node, size_limit, child_graph};
+    for (bool start_rev : starting_directions(ctx)) {
+        run_dijkstra_from_side(ctx, start_rev);
+    }
+    finalize_internal_node_contribution(ctx);
+}
+
+} // namespace vg
diff --git a/src/snarl_distance_index_check_regularity.cpp b/src/snarl_distance_index_check_regularity.cpp
new file mode 100644
index 0000000000..015f5e2b6e
--- /dev/null
+++ b/src/snarl_distance_index_check_regularity.cpp
@@ -0,0 +1,201 @@
+//#define debug_distance_indexing
+
+#include "snarl_distance_index.hpp"
+
+using namespace std;
+using namespace handlegraph;
+namespace vg {
+
+bool check_regularity(const SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector<SnarlDistanceIndex::temp_record_ref_t>& all_children, const HandleGraph* graph) {
+#ifdef debug_distance_indexing
+    std::cerr << "Check if snarl " << temp_snarl_record.start_node_id << " to " << temp_snarl_record.end_node_id << " with " << all_children.size() << " children is regular" << std::endl;
+#endif
+
+    if (temp_snarl_record.is_root_snarl) {
+        // Roots can't be regular.
+#ifdef debug_distance_indexing
+        std::cerr << "Snarl is not regular because it is a root snarl." << std::endl;
+#endif
+        return false;
+    }
+    if (temp_snarl_record.is_simple) {
+        // Simple snarls are always also regular.
+#ifdef debug_distance_indexing
+        std::cerr << "Snarl is regular because it is simple." << std::endl;
+#endif
+        return true;
+    }
+
+    // Get the snarl boundary nodes, facing out
+    handle_t start_out = graph->get_handle(temp_snarl_record.start_node_id, !temp_snarl_record.start_node_rev);
+    handle_t end_out = graph->get_handle(temp_snarl_record.end_node_id, temp_snarl_record.end_node_rev);
+
+    // Define accessors to get bounding graph handles for children, facing out.
+    auto child_start_out = [&](const SnarlDistanceIndex::temp_record_ref_t& child_index) {
+        return child_index.first == SnarlDistanceIndex::TEMP_NODE ? 
+            graph->get_handle(child_index.second, true) :
+            graph->get_handle(
+                temp_index.get_chain(child_index).start_node_id,
+                !temp_index.get_chain(child_index).start_node_rev
+            );
+    };
+    auto child_end_out = [&](const SnarlDistanceIndex::temp_record_ref_t& child_index) {
+        return child_index.first == SnarlDistanceIndex::TEMP_NODE ? 
+            graph->get_handle(child_index.second, false) :
+            graph->get_handle(
+                temp_index.get_chain(child_index).end_node_id,
+                temp_index.get_chain(child_index).end_node_rev
+            );
+    };
+
+    for (const SnarlDistanceIndex::temp_record_ref_t& child_index : all_children) {
+        // We should only have nodes and chains as children
+        assert(child_index.first == SnarlDistanceIndex::TEMP_NODE
+            || child_index.first == SnarlDistanceIndex::TEMP_CHAIN);
+        if (child_index.first == SnarlDistanceIndex::TEMP_NODE
+            && (child_index.second == temp_snarl_record.start_node_id
+                || child_index.second == temp_snarl_record.end_node_id)) {
+            // Don't think about children for the snarl bounds now; we handle the bounds later.
+            continue;
+        }
+
+        // Have we seen the snarl start?
+        bool saw_start = false;
+        // Have we seen the snarl end?
+        bool saw_end = false;
+        // Have we seen anything else, or a duplicate snarl boundary?
+        bool saw_other = false;
+
+        auto handle_destination = [&](const handle_t& next_handle) {
+#ifdef debug_distance_indexing
+            std::cerr << "\tConnects to " << graph->get_id(next_handle) << (graph->get_is_reverse(next_handle) ? "-" : "+") << std::endl;
+#endif
+
+            // Every edge out the end the child must go to a snarl boundary out
+            // that hasn't been reached yet.
+            if (next_handle == start_out && !saw_start) {
+                saw_start = true;
+#ifdef debug_distance_indexing
+                std::cerr << "\t\tThis is a new connection to snarl start" << std::endl;
+#endif
+                return true;
+            } else if (next_handle == end_out && !saw_end) {
+                saw_end = true;
+#ifdef debug_distance_indexing
+                std::cerr << "\t\tThis is a new connection to snarl end" << std::endl;
+#endif
+                return true;
+            } else {
+                saw_other = true;
+                // We don't care if we have an edge going the right way because
+                // we found an edge going the wrong way.
+#ifdef debug_distance_indexing
+                std::cerr << "\t\tThis is an unwanted connection!" << std::endl;
+#endif
+                return false;
+            }
+        };
+        
+        // Check the edges off the child start
+        handle_t here = child_start_out(child_index);
+#ifdef debug_distance_indexing
+            std::cerr << "Look right from " << graph->get_id(here) << (graph->get_is_reverse(here) ? "-" : "+") << std::endl;
+#endif
+        graph->follow_edges(here, false, handle_destination);
+
+        if (saw_other || !(saw_start != saw_end)) {
+            // We have an edge we shouldn't, or we don't connect to exactly one boundary.
+#ifdef debug_distance_indexing
+            std::cerr << "\tWe must not be regular" << std::endl;
+#endif
+            return false;
+        }
+        
+        // Check the edges off the child end
+        here = child_end_out(child_index);
+#ifdef debug_distance_indexing
+            std::cerr << "Look right from " << graph->get_id(here) << (graph->get_is_reverse(here) ? "-" : "+") << std::endl;
+#endif
+        graph->follow_edges(here, false, handle_destination);
+
+        if (saw_other || !saw_start || !saw_end) {
+            // We have an edge we shouldn't, or we haven't reached both
+            // boundaries exactly once across the two ends of the child.
+#ifdef debug_distance_indexing
+            std::cerr << "\tWe must not be regular" << std::endl;
+#endif
+            return false;
+        }
+
+        if (child_index.first == SnarlDistanceIndex::TEMP_CHAIN) {
+            // If a child is a chain, check it for loops
+#ifdef debug_distance_indexing
+            std::cerr << "Check child chain for loops." << std::endl;
+#endif
+            const SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index.get_chain(child_index);
+#ifdef debug_distance_indexing
+            std::cerr << "Forward loops:";
+            for (auto& l : temp_chain_record.forward_loops) {
+                std::cerr << " " << l;
+            }
+            std::cerr << std::endl;
+#endif
+
+            if (!temp_chain_record.forward_loops.empty() && temp_chain_record.forward_loops.front() != std::numeric_limits<size_t>::max()) {
+                // There's a forward loop in this child chain, so the snarl's not regular.
+#ifdef debug_distance_indexing
+                std::cerr << "We are not regular because there's a forward loop in this child chain." << std::endl;
+#endif
+                return false;
+            }
+
+#ifdef debug_distance_indexing
+            std::cerr << "Backward loops:";
+            for (auto& l : temp_chain_record.backward_loops) {
+                std::cerr << " " << l;
+            }
+            std::cerr << std::endl;
+#endif
+
+            if (!temp_chain_record.backward_loops.empty() && temp_chain_record.backward_loops.back() != std::numeric_limits<size_t>::max()) {
+                // There's a backward loop in this child chain, so the snarl's not regular.
+#ifdef debug_distance_indexing
+                std::cerr << "We are not regular because there's a backward loop in this child chain." << std::endl;
+#endif
+                return false;
+            }
+        }
+    }
+
+    // Now we know the children are fine; check for disallowed edges between
+    // the sentinels.
+
+    handle_t start_in = graph->flip(start_out);
+    if (graph->has_edge(start_in, start_out)) {
+#ifdef debug_distance_indexing
+        std::cerr << "We are not regular because we have a start-start loop." << std::endl;
+#endif
+        return false;
+    }
+
+    handle_t end_in = graph->flip(end_out);
+    if (graph->has_edge(end_in, end_out)) {
+#ifdef debug_distance_indexing
+        std::cerr << "We are not regular because we have an end-end loop." << std::endl;
+#endif
+        return false;
+    }
+
+    // If we don't have any disallowed edges, and we don't have any children
+    // without the exact right connectivity, we must be regular.
+
+    // We don't make sure we actually had any children.
+    
+#ifdef debug_distance_indexing
+    std::cerr << "We are a regular snarl." << std::endl;
+#endif
+
+    return true;
+}
+
+} // namespace vg
diff --git a/src/snarl_distance_index_child_graph.hpp b/src/snarl_distance_index_child_graph.hpp
new file mode 100644
index 0000000000..3626f75bbc
--- /dev/null
+++ b/src/snarl_distance_index_child_graph.hpp
@@ -0,0 +1,69 @@
+#pragma once
+#include <cstddef>
+#include <functional>
+#include <span>
+#include <utility>
+#include <bdsg/snarl_distance_index.hpp>
+#include <handlegraph/handle_graph.hpp>
+
+namespace vg {
+
+// Read-only view of a snarl's child net-graph.
+//
+// Encapsulates the "get outgoing handle → follow_edges → map to snarl child"
+// kernel shared by topo_sort_children and populate_distance_matrix_row.
+// Both callers need the same three-step operation:
+//   1. compute the handle pointing out of `child` in direction `go_left`
+//   2. follow graph edges from that handle
+//   3. for each landing node, resolve its snarl-level ancestor + direction
+//
+// Dijkstra priority-queue management and topo-sort BFS logic stay with the
+// respective callers.
+class SnarlChildGraph {
+public:
+    using temp_record_ref_t  = SnarlDistanceIndex::temp_record_ref_t;
+    using TempIndex          = SnarlDistanceIndex::TemporaryDistanceIndex;
+
+    // `children` may be empty if the caller doesn't need the children() accessor.
+    // All other methods only require temp_index, snarl_index, and graph.
+    SnarlChildGraph(TempIndex& temp_index,
+                    temp_record_ref_t snarl_index,
+                    std::span<const temp_record_ref_t> children,
+                    const handlegraph::HandleGraph* graph);
+
+    std::span<const temp_record_ref_t> children() const noexcept;
+
+    // For each graph edge leaving `child` in direction `go_left`, invoke the
+    // callback with:
+    //   neighbor        — snarl-level ancestor of the landing node
+    //   neighbor_rev    — true iff `neighbor` is entered from its right side
+    //                     (chain traversed right-to-left, or node reversed)
+    //   edge_distance   — traversal length of `neighbor`:
+    //                     TEMP_NODE → sequence length of the landing node;
+    //                     TEMP_CHAIN → chain.min_length (∞ if disconnected)
+    //   arriving_node_id — graph node id of the immediate landing handle
+    //                      (before ancestor resolution; needed by callers that
+    //                       must preserve the original follow_edges semantics
+    //                       for is_simple detection)
+    //
+    // Boundary nodes ARE included in the callback; callers filter as needed.
+    void for_each_outgoing(
+        temp_record_ref_t child,
+        bool go_left,
+        const std::function<void(temp_record_ref_t neighbor,
+                                 bool neighbor_rev,
+                                 size_t edge_distance,
+                                 handlegraph::nid_t arriving_node_id)>&) const;
+
+    // Returns {boundary_node_ref, start_node_rev} for start=true,
+    //         {boundary_node_ref, end_node_rev}   for start=false.
+    std::pair<temp_record_ref_t, bool> boundary(bool start) const;
+
+private:
+    TempIndex&                           temp_index_;
+    temp_record_ref_t                    snarl_index_;
+    std::span<const temp_record_ref_t>   children_;
+    const handlegraph::HandleGraph*      graph_;
+};
+
+} // namespace vg
diff --git a/src/snarl_distance_index_query.cpp b/src/snarl_distance_index_query.cpp
new file mode 100644
index 0000000000..4e5191b6ac
--- /dev/null
+++ b/src/snarl_distance_index_query.cpp
@@ -0,0 +1,705 @@
+// #define debug_subgraph
+
+#include "snarl_distance_index.hpp"
+
+using namespace std;
+using namespace handlegraph;
+namespace vg {
+
+void subgraph_in_distance_range(const SnarlDistanceIndex &distance_index,
+                                const Path &path,
+                                const HandleGraph *super_graph,
+                                size_t min_distance, size_t max_distance,
+                                std::unordered_set<nid_t> &subgraph,
+                                bool look_forward) {
+
+  // The position we're starting from - either the start or end of the path
+  pos_t start_pos;
+  size_t node_len;
+  if (look_forward) {
+    start_pos = initial_position(path);
+    node_len =
+        super_graph->get_length(super_graph->get_handle(get_id(start_pos)));
+  } else {
+    start_pos = final_position(path);
+    node_len =
+        super_graph->get_length(super_graph->get_handle(get_id(start_pos)));
+    start_pos = reverse_base_pos(start_pos, node_len);
+  }
+  pair<nid_t, bool> traversal_start =
+      std::make_pair(get_id(start_pos), get_is_rev(start_pos));
+
+#ifdef debug_subgraph
+  cerr << endl
+       << "Find subgraph in distance range " << min_distance << " to "
+       << max_distance << endl;
+  cerr << "Start positon: " << start_pos << endl;
+#endif
+  // The distance from the position to the ends of the current
+  // node(/snarl/chain)
+  size_t current_distance_left = is_rev(start_pos)
+                                     ? node_len - get_offset(start_pos)
+                                     : std::numeric_limits<size_t>::max();
+  size_t current_distance_right = is_rev(start_pos)
+                                      ? std::numeric_limits<size_t>::max()
+                                      : node_len - get_offset(start_pos);
+
+  // Graph node of the start and end of the current node(/snarl/chain) pointing
+  // out
+  net_handle_t current_net =
+      distance_index.get_node_net_handle(get_id(start_pos));
+  net_handle_t parent = distance_index.start_end_traversal_of(
+      distance_index.get_parent(current_net));
+
+  // The id and orientation of nodes that are too close and should be avoided
+  hash_set<pair<id_t, bool>> seen_nodes;
+  // Nodes that we want to start a search from - the distance is smaller or
+  // equal to than min_distance but we can't walk out any further along the
+  // snarl tree without exceeding it The distance is the distance from the start
+  // position to the beginning (or end if its backwards) of the node, including
+  // the position
+  vector<pair<handle_t, size_t>> search_start_nodes;
+
+  if (((current_distance_left != std::numeric_limits<size_t>::max() &&
+        current_distance_left > min_distance) ||
+       (current_distance_right != std::numeric_limits<size_t>::max() &&
+        current_distance_right > min_distance)) ||
+      (distance_index.is_trivial_chain(parent) &&
+       distance_index.distance_in_parent(distance_index.get_parent(parent),
+                                         parent,
+                                         distance_index.flip(parent)) == 0 &&
+       node_len * 2 > min_distance)) {
+    // If the distance to either end of the node is within the range
+    // Or of there is a loop on the node ( a duplication of just the node) and
+    // the node length would put one loop in the distance range
+
+    // Add this node to the subgraph
+    subgraph.emplace(get_id(start_pos));
+
+    handle_t start = is_rev(start_pos)
+                         ? distance_index.get_handle(
+                               distance_index.flip(current_net), super_graph)
+                         : distance_index.get_handle(current_net, super_graph);
+
+    // Add any node one step out from this one to search_start_nodes
+    super_graph->follow_edges(start, false, [&](const handle_t &next_handle) {
+      search_start_nodes.emplace_back(
+          next_handle,
+          is_rev(start_pos) ? current_distance_left : current_distance_right);
+    });
+
+    // Search for reachable nodes
+    subgraph_in_distance_range_walk_graph(
+        super_graph, min_distance, max_distance, subgraph, search_start_nodes,
+        seen_nodes, traversal_start);
+
+    return;
+  }
+
+  while (!distance_index.is_root(parent)) {
+#ifdef debug_subgraph
+    cerr << "At child " << distance_index.net_handle_as_string(current_net)
+         << " with distances " << current_distance_left << " "
+         << current_distance_right << endl;
+    cerr << "Parent is " << distance_index.net_handle_as_string(parent)
+         << " at offset " << SnarlDistanceIndex::get_record_offset(parent)
+         << endl;
+#endif
+
+    size_t max_parent_length = distance_index.maximum_length(parent);
+
+    // Distances to get to the ends of the parent
+    size_t distance_start_left = SnarlDistanceIndex::sum(
+        current_distance_left,
+        distance_index.distance_to_parent_bound(
+            parent, true, distance_index.flip(current_net)));
+    size_t distance_start_right = SnarlDistanceIndex::sum(
+        current_distance_right,
+        distance_index.distance_to_parent_bound(parent, true, current_net));
+    size_t distance_end_left = SnarlDistanceIndex::sum(
+        current_distance_left,
+        distance_index.distance_to_parent_bound(
+            parent, false, distance_index.flip(current_net)));
+    size_t distance_end_right = SnarlDistanceIndex::sum(
+        current_distance_right,
+        distance_index.distance_to_parent_bound(parent, false, current_net));
+
+    if ((current_distance_right != std::numeric_limits<size_t>::max() &&
+         current_distance_right >= min_distance) ||
+        (current_distance_left != std::numeric_limits<size_t>::max() &&
+         current_distance_left >= min_distance) ||
+        (distance_start_right != std::numeric_limits<size_t>::max() &&
+         distance_start_right >= min_distance) ||
+        (distance_end_right != std::numeric_limits<size_t>::max() &&
+         distance_end_right >= min_distance) ||
+        (distance_start_left != std::numeric_limits<size_t>::max() &&
+         distance_start_left >= min_distance) ||
+        (distance_end_left != std::numeric_limits<size_t>::max() &&
+         distance_end_left >= min_distance) ||
+        (max_parent_length != std::numeric_limits<size_t>::max() &&
+         max_parent_length >= min_distance)) {
+      // If the min distance will be exceeded within this parent, then start a
+      // search from the ends of this child
+
+      if (distance_index.is_snarl(parent)) {
+        // If this is the child of a snarl, then just traverse from the end of
+        // the node
+#ifdef debug_subgraph
+        cerr << "Start search in parent "
+             << distance_index.net_handle_as_string(parent);
+#endif
+        if (current_distance_left != std::numeric_limits<size_t>::max()) {
+          // If we can go left
+          net_handle_t bound =
+              distance_index.is_node(current_net)
+                  ? distance_index.flip(current_net)
+                  : distance_index.get_bound(current_net, false, false);
+          if (distance_index.is_sentinel(bound)) {
+            bound = distance_index.get_node_from_sentinel(bound);
+          }
+          handle_t current_node = distance_index.get_handle(bound, super_graph);
+          // Add everything immediately after the left bound of this node/chain
+          super_graph->follow_edges(
+              distance_index.get_handle(bound, super_graph), false,
+              [&](const handle_t &next_handle) {
+                seen_nodes.erase(
+                    make_pair(super_graph->get_id(next_handle),
+                              super_graph->get_is_reverse(next_handle)));
+                search_start_nodes.emplace_back(next_handle,
+                                                current_distance_left);
+              });
+
+#ifdef debug_subgraph
+          cerr << " going left from " << super_graph->get_id(current_node)
+               << (super_graph->get_is_reverse(current_node) ? "rev " : "fd ");
+#endif
+        }
+        if (current_distance_right != std::numeric_limits<size_t>::max()) {
+          // If we can go right
+          net_handle_t bound =
+              distance_index.is_node(current_net)
+                  ? current_net
+                  : distance_index.get_bound(current_net, true, false);
+          if (distance_index.is_sentinel(bound)) {
+            bound = distance_index.get_node_from_sentinel(bound);
+          }
+          handle_t current_node = distance_index.get_handle(bound, super_graph);
+
+          // Add everything immediately after the right bound of this node/chain
+          super_graph->follow_edges(
+              distance_index.get_handle(bound, super_graph), false,
+              [&](const handle_t &next_handle) {
+                seen_nodes.erase(
+                    make_pair(super_graph->get_id(next_handle),
+                              super_graph->get_is_reverse(next_handle)));
+                search_start_nodes.emplace_back(next_handle,
+                                                current_distance_right);
+              });
+
+#ifdef debug_subgraph
+          cerr << " going right from " << super_graph->get_id(current_node)
+               << (super_graph->get_is_reverse(current_node) ? "rev " : "fd ");
+#endif
+        }
+#ifdef debug_subgraph
+        cerr << endl;
+#endif
+      } else {
+#ifdef debug_subgraph
+        cerr << "Start search along parent chain "
+             << distance_index.net_handle_as_string(parent);
+#endif
+        // If this is the child of a chain, then traverse along the chain
+        if (current_distance_left != std::numeric_limits<size_t>::max()) {
+          subgraph_in_distance_range_walk_across_chain(
+              distance_index, super_graph, subgraph,
+              distance_index.flip(current_net), current_distance_left,
+              search_start_nodes, seen_nodes, min_distance, max_distance,
+              false);
+        }
+        if (current_distance_right != std::numeric_limits<size_t>::max()) {
+          subgraph_in_distance_range_walk_across_chain(
+              distance_index, super_graph, subgraph, current_net,
+              current_distance_right, search_start_nodes, seen_nodes,
+              min_distance, max_distance, false);
+        }
+      }
+      subgraph_in_distance_range_walk_graph(
+          super_graph, min_distance, max_distance, subgraph, search_start_nodes,
+          seen_nodes, traversal_start);
+      return;
+    } else if (distance_index.is_snarl(parent)) {
+#ifdef debug_subgraph
+      cerr << "Parent is a snarl of handle type "
+           << SnarlDistanceIndex::get_handle_type(parent) << " at offset "
+           << SnarlDistanceIndex::get_record_offset(parent) << endl;
+#endif
+      // TODO: This might be overkill. It prevents us from adding nodes that
+      // shouldn't be in the subgraph, but might be too slow If we don't check
+      // the other direction, go through the loop and add everything whose
+      // distance is lower than the minimum to seen_nodes
+      vector<pair<handle_t, size_t>> loop_handles_to_check;
+      handle_t start_out = distance_index.get_handle(
+          distance_index.get_bound(parent, false, false), super_graph);
+      handle_t end_out = distance_index.get_handle(
+          distance_index.get_bound(parent, true, false), super_graph);
+      if (current_distance_left != std::numeric_limits<size_t>::max()) {
+        loop_handles_to_check.emplace_back(
+            distance_index.get_handle(
+                distance_index.get_bound(current_net, false, false),
+                super_graph),
+            current_distance_left);
+      }
+      if (current_distance_right != std::numeric_limits<size_t>::max()) {
+        loop_handles_to_check.emplace_back(
+            distance_index.get_handle(
+                distance_index.get_bound(current_net, true, false),
+                super_graph),
+            current_distance_right);
+      }
+      while (!loop_handles_to_check.empty()) {
+        handle_t current_loop_handle = loop_handles_to_check.back().first;
+        size_t current_loop_distance = loop_handles_to_check.back().second;
+        loop_handles_to_check.pop_back();
+
+        // Add to seen_nodes
+        seen_nodes.emplace(super_graph->get_id(current_loop_handle),
+                           super_graph->get_is_reverse(current_loop_handle));
+
+        // Walk one step out from this node
+        super_graph->follow_edges(
+            current_loop_handle, false, [&](const handle_t &next_handle) {
+              // If the next node is close enough and isn't exiting the snarl,
+              // then add it to stack
+              size_t new_distance = SnarlDistanceIndex::sum(
+                  current_loop_distance, super_graph->get_length(next_handle));
+              if (new_distance < min_distance && next_handle != start_out &&
+                  next_handle != end_out &&
+                  seen_nodes.count(std::make_pair(
+                      super_graph->get_id(next_handle),
+                      super_graph->get_is_reverse(next_handle))) == 0) {
+                loop_handles_to_check.emplace_back(next_handle, new_distance);
+              }
+            });
+      }
+    } else if (distance_index.is_chain(parent)) {
+#ifdef debug_subgraph
+      cerr << "Parent is a chain of handle type "
+           << SnarlDistanceIndex::get_handle_type(parent) << " at offset "
+           << SnarlDistanceIndex::get_record_offset(parent) << endl;
+#endif
+      // TODO: This is probably also overkill - walk a chain if there is a
+      // viable loop
+      size_t distance_loop_right = distance_index.distance_in_parent(
+          parent, current_net, current_net, super_graph, max_distance);
+      size_t distance_loop_left = distance_index.distance_in_parent(
+          parent, distance_index.flip(current_net),
+          distance_index.flip(current_net), super_graph, max_distance);
+      if ((current_distance_left != std::numeric_limits<size_t>::max() &&
+           distance_loop_left != std::numeric_limits<size_t>::max()) ||
+          (current_distance_right != std::numeric_limits<size_t>::max() &&
+           distance_loop_right != std::numeric_limits<size_t>::max())) {
+        // If there is a loop that we can take, then take it
+        if (current_distance_left != std::numeric_limits<size_t>::max()) {
+          subgraph_in_distance_range_walk_across_chain(
+              distance_index, super_graph, subgraph,
+              distance_index.flip(current_net), current_distance_left,
+              search_start_nodes, seen_nodes, min_distance, max_distance,
+              false);
+        }
+        if (current_distance_right != std::numeric_limits<size_t>::max()) {
+          subgraph_in_distance_range_walk_across_chain(
+              distance_index, super_graph, subgraph, current_net,
+              current_distance_right, search_start_nodes, seen_nodes,
+              min_distance, max_distance, false);
+        }
+        subgraph_in_distance_range_walk_graph(
+            super_graph, min_distance, max_distance, subgraph,
+            search_start_nodes, seen_nodes, traversal_start);
+        return;
+      }
+    }
+
+    // Remember the bounds of this child so we don't return to it
+    if (current_distance_left != std::numeric_limits<size_t>::max()) {
+      // If we can go left
+      net_handle_t bound =
+          distance_index.is_node(current_net)
+              ? distance_index.flip(current_net)
+              : distance_index.get_bound(current_net, false, false);
+      if (distance_index.is_sentinel(bound)) {
+        bound = distance_index.get_node_from_sentinel(bound);
+      }
+      handle_t current_node = distance_index.get_handle(bound, super_graph);
+      seen_nodes.emplace(super_graph->get_id(current_node),
+                         super_graph->get_is_reverse(current_node));
+    }
+    if (current_distance_right != std::numeric_limits<size_t>::max()) {
+      // If we can go right
+      net_handle_t bound =
+          distance_index.is_node(current_net)
+              ? current_net
+              : distance_index.get_bound(current_net, true, false);
+      if (distance_index.is_sentinel(bound)) {
+        bound = distance_index.get_node_from_sentinel(bound);
+      }
+      handle_t current_node = distance_index.get_handle(bound, super_graph);
+      seen_nodes.emplace(super_graph->get_id(current_node),
+                         super_graph->get_is_reverse(current_node));
+    }
+
+    current_distance_left = std::min(distance_start_left, distance_start_right);
+    current_distance_right = std::min(distance_end_left, distance_end_right);
+
+    current_net = std::move(parent);
+    parent = distance_index.canonical(distance_index.get_parent(current_net));
+  }
+  if (current_distance_left <= min_distance) {
+#ifdef debug_subgraph
+    cerr << "Adding the end of a child of the root "
+         << distance_index.net_handle_as_string(
+                distance_index.get_bound(current_net, false, false))
+         << " with distance " << current_distance_left << endl;
+#endif
+
+    handle_t bound = distance_index.get_handle(
+        distance_index.get_bound(current_net, false, false), super_graph);
+    search_start_nodes.emplace_back(bound, current_distance_left);
+  }
+  if (current_distance_right <= min_distance) {
+#ifdef debug_subgraph
+    cerr << "Adding the end of a child of the root "
+         << distance_index.net_handle_as_string(
+                distance_index.get_bound(current_net, false, false))
+         << " with distance " << current_distance_right << endl;
+#endif
+    handle_t bound = distance_index.get_handle(
+        distance_index.get_bound(current_net, true, false), super_graph);
+    search_start_nodes.emplace_back(bound, current_distance_right);
+  }
+  subgraph_in_distance_range_walk_graph(super_graph, min_distance, max_distance,
+                                        subgraph, search_start_nodes,
+                                        seen_nodes, traversal_start);
+
+  return;
+}
+
+/// Helper for subgraph_in_distance_range
+/// Given starting handles in the super graph and the distances to each handle
+/// (including the start position and
+// the first position in the handle), add all nodes within the distance range,
+// excluding nodes in seen_nodes
+void subgraph_in_distance_range_walk_graph(
+    const HandleGraph *super_graph, size_t min_distance, size_t max_distance,
+    std::unordered_set<nid_t> &subgraph,
+    vector<pair<handle_t, size_t>> &start_nodes,
+    hash_set<pair<nid_t, bool>> &seen_nodes,
+    const pair<nid_t, bool> &traversal_start) {
+#ifdef debug_subgraph
+  cerr << "Starting search from nodes " << endl;
+  for (auto &start_handle : start_nodes) {
+    cerr << "\t" << super_graph->get_id(start_handle.first) << " "
+         << super_graph->get_is_reverse(start_handle.first) << " with distance "
+         << start_handle.second << endl;
+  }
+#endif
+
+  // Order based on the distance to the position (handle)
+  auto cmp = [](const pair<handle_t, size_t> a,
+                const pair<handle_t, size_t> b) { return a.second > b.second; };
+  priority_queue<pair<handle_t, size_t>, vector<pair<handle_t, size_t>>,
+                 decltype(cmp)>
+      next_handles(cmp);
+  for (auto &start_handle : start_nodes) {
+    next_handles.emplace(start_handle);
+  }
+  bool first_node = true;
+
+  while (next_handles.size() > 0) {
+    // Traverse the graph, adding nodes if they are within the range
+    handle_t curr_handle = next_handles.top().first;
+    size_t curr_distance = next_handles.top().second;
+    next_handles.pop();
+#ifdef debug_subgraph
+    cerr << "At node " << super_graph->get_id(curr_handle) << " "
+         << super_graph->get_is_reverse(curr_handle) << " with distance "
+         << curr_distance << endl;
+#endif
+    if (seen_nodes.count(make_pair(super_graph->get_id(curr_handle),
+                                   super_graph->get_is_reverse(curr_handle))) ==
+        0) {
+      seen_nodes.emplace(super_graph->get_id(curr_handle),
+                         super_graph->get_is_reverse(curr_handle));
+
+      size_t node_len = super_graph->get_length(curr_handle);
+      size_t curr_distance_end =
+          SnarlDistanceIndex::sum(curr_distance, node_len) - 1;
+      if ((curr_distance >= min_distance && curr_distance <= max_distance) ||
+          (curr_distance_end >= min_distance &&
+           curr_distance_end <= max_distance) ||
+          (curr_distance <= min_distance &&
+           curr_distance_end >= max_distance)) {
+#ifdef debug_subgraph
+        cerr << "\tadding node " << super_graph->get_id(curr_handle) << " "
+             << super_graph->get_is_reverse(curr_handle) << " with distance "
+             << curr_distance << " and node length " << node_len << endl;
+#endif
+        subgraph.insert(super_graph->get_id(curr_handle));
+
+      }
+#ifdef debug_subgraph
+      else {
+        cerr << "\tdisregarding node " << super_graph->get_id(curr_handle)
+             << " " << super_graph->get_is_reverse(curr_handle)
+             << " with distance " << curr_distance << " and node length "
+             << node_len << endl;
+      }
+#endif
+      curr_distance = SnarlDistanceIndex::sum(node_len, curr_distance);
+
+      // If the end of this node is still within the range, add the next nodes
+      // that are within Also check that the node we're currently at isn't the
+      // start node
+      if (SnarlDistanceIndex::minus(curr_distance, 1) <= max_distance) {
+        super_graph->follow_edges(
+            curr_handle, false, [&](const handle_t &next) {
+              nid_t next_id = super_graph->get_id(next);
+              if (seen_nodes.count(make_pair(
+                      next_id, super_graph->get_is_reverse(next))) == 0) {
+                next_handles.emplace(next, curr_distance);
+              }
+              return true;
+            });
+      }
+      first_node = false;
+    }
+#ifdef debug_subgraph
+    else {
+      cerr << "\tthe node was already seen" << endl;
+    }
+#endif
+  }
+
+#ifdef debug_subgraph
+  cerr << "Subgraph has nodes: ";
+  for (const nid_t &node : subgraph) {
+    cerr << node << ", ";
+  }
+  cerr << endl;
+#endif
+  return;
+}
+// helper function to walk along a chain from the current node until the
+// distance traversed exceeds the minimum limit. Add the node just before this
+// happens to search_start_nodes
+void subgraph_in_distance_range_walk_across_chain(
+    const SnarlDistanceIndex &distance_index, const HandleGraph *super_graph,
+    std::unordered_set<nid_t> &subgraph, net_handle_t current_node,
+    size_t current_distance, vector<pair<handle_t, size_t>> &search_start_nodes,
+    hash_set<pair<nid_t, bool>> &seen_nodes, const size_t &min_distance,
+    const size_t &max_distance, bool checked_loop) {
+#ifdef debug_subgraph
+  cerr << "Walk along parent chain "
+       << distance_index.net_handle_as_string(
+              distance_index.get_parent(current_node))
+       << " from " << distance_index.net_handle_as_string(current_node)
+       << " with " << current_distance << endl;
+#endif
+  if (distance_index.is_trivial_chain(
+          distance_index.get_parent(current_node))) {
+    return;
+  }
+  bool finished_chain = false;
+  bool added_nodes =
+      false; // Did we start a search? if not, add the last node in the chain
+  while (current_distance <= min_distance && !finished_chain) {
+    finished_chain = distance_index.follow_net_edges(
+        current_node, super_graph, false, [&](const net_handle_t &next) {
+          size_t next_length = distance_index.minimum_length(next);
+          // If the next child is a snarl, then the distance to loop in the
+          // snarl
+          if (distance_index.is_snarl(next)) {
+            net_handle_t bound_fd = distance_index.get_bound(
+                next, distance_index.ends_at(next) == SnarlDistanceIndex::START,
+                true);
+            size_t next_loop = distance_index.distance_in_parent(
+                next, bound_fd, bound_fd, super_graph, max_distance);
+            if (!checked_loop &&
+                next_loop != std::numeric_limits<size_t>::max()) {
+#ifdef debug_subgraph
+              cerr << "\tsnarl loops so also check the other direction" << endl;
+#endif
+              // If we haven't yet checked the chain in the other direction and
+              // this snarl allows us to loop
+              if (SnarlDistanceIndex::sum(next_loop, current_distance) !=
+                      std::numeric_limits<size_t>::max() &&
+                  SnarlDistanceIndex::sum(
+                      SnarlDistanceIndex::sum(next_loop, current_distance),
+                      distance_index.node_length(current_node)) >=
+                      min_distance) {
+#ifdef debug_subgraph
+                cerr << "\t\t add the current node" << endl;
+#endif
+                // If the loop will put us over the edge, then start from the
+                // current node
+                super_graph->follow_edges(
+                    distance_index.get_handle(current_node, super_graph), false,
+                    [&](const handle_t &next_handle) {
+                      search_start_nodes.emplace_back(next_handle,
+                                                      current_distance);
+                    });
+                return true;
+              } else {
+                // Otherwise, switch direction in the chain and walk along it
+                // again
+                subgraph_in_distance_range_walk_across_chain(
+                    distance_index, super_graph, subgraph,
+                    distance_index.flip(current_node),
+                    SnarlDistanceIndex::sum(
+                        SnarlDistanceIndex::sum(current_distance, next_loop),
+                        distance_index.node_length(current_node)),
+                    search_start_nodes, seen_nodes, min_distance, max_distance,
+                    true);
+                checked_loop = true;
+              }
+            }
+            if (next_loop != std::numeric_limits<size_t>::max()) {
+              // TODO: This might be overkill. It prevents us from adding nodes
+              // that shouldn't be in the subgraph, but might be too slow If we
+              // don't check the other direction, go through the loop and add
+              // everything whose distance is lower than the minimum to
+              // seen_nodes
+              vector<pair<handle_t, size_t>> loop_handles_to_check;
+              handle_t start_out = distance_index.get_handle(
+                  distance_index.get_bound(next, false, false), super_graph);
+              handle_t end_out = distance_index.get_handle(
+                  distance_index.get_bound(next, true, false), super_graph);
+              loop_handles_to_check.emplace_back(
+                  distance_index.get_handle(bound_fd, super_graph),
+                  current_distance);
+              while (!loop_handles_to_check.empty()) {
+                handle_t current_loop_handle =
+                    loop_handles_to_check.back().first;
+                size_t current_loop_distance =
+                    loop_handles_to_check.back().second;
+                loop_handles_to_check.pop_back();
+
+                // Add to seen_nodes
+                seen_nodes.emplace(
+                    super_graph->get_id(current_loop_handle),
+                    super_graph->get_is_reverse(current_loop_handle));
+
+                // Walk one step out from this node
+                super_graph->follow_edges(
+                    current_loop_handle, false,
+                    [&](const handle_t &next_handle) {
+                      // If the next node is close enough and isn't exiting the
+                      // snarl, then add it to stack
+                      size_t new_distance = SnarlDistanceIndex::sum(
+                          current_loop_distance,
+                          super_graph->get_length(next_handle));
+                      if (new_distance < min_distance &&
+                          next_handle != start_out && next_handle != end_out &&
+                          seen_nodes.count(std::make_pair(
+                              super_graph->get_id(next_handle),
+                              super_graph->get_is_reverse(next_handle))) == 0) {
+                        loop_handles_to_check.emplace_back(next_handle,
+                                                           new_distance);
+                      }
+                    });
+              }
+            }
+          }
+          size_t next_max_length = distance_index.maximum_length(next);
+#ifdef debug_subgraph
+          cerr << "\tnext node: " << distance_index.net_handle_as_string(next)
+               << " with distance " << current_distance
+               << " and min and max lengths " << next_length << " "
+               << next_max_length << endl;
+#endif
+          if ((SnarlDistanceIndex::sum(next_max_length, current_distance) !=
+                   std::numeric_limits<size_t>::max() &&
+               SnarlDistanceIndex::sum(next_max_length, current_distance) >=
+                   min_distance)) {
+            if (distance_index.is_node(next)) {
+              size_t curr_distance_end = SnarlDistanceIndex::minus(
+                  SnarlDistanceIndex::sum(next_max_length, current_distance),
+                  1);
+          // If its a node that puts us over, add the node to the subgraph, then
+          // start the search from that node
+#ifdef debug_subgraph
+              cerr << "\t\tAdding node from a chain "
+                   << distance_index.net_handle_as_string(next)
+                   << " with distance " << current_distance << endl;
+#endif
+              if ((current_distance >= min_distance &&
+                   current_distance <= max_distance) ||
+                  (curr_distance_end >= min_distance &&
+                   curr_distance_end <= max_distance) ||
+                  (current_distance <= min_distance &&
+                   curr_distance_end >= max_distance)) {
+                subgraph.emplace(distance_index.node_id(next));
+              }
+              super_graph->follow_edges(
+                  distance_index.get_handle(next, super_graph), false,
+                  [&](const handle_t &next_handle) {
+                    search_start_nodes.emplace_back(
+                        next_handle,
+                        SnarlDistanceIndex::sum(current_distance, next_length));
+                    seen_nodes.erase(
+                        make_pair(super_graph->get_id(next_handle),
+                                  super_graph->get_is_reverse(next_handle)));
+                  });
+            } else {
+          // If it's a snarl, then we'll start from the last node
+#ifdef debug_subgraph
+              cerr << "\t\tAdding node from a chain "
+                   << distance_index.net_handle_as_string(next)
+                   << " with distance " << current_distance << endl;
+#endif
+              super_graph->follow_edges(
+                  distance_index.get_handle(current_node, super_graph), false,
+                  [&](const handle_t &next_handle) {
+                    search_start_nodes.emplace_back(next_handle,
+                                                    current_distance);
+                    seen_nodes.erase(
+                        make_pair(super_graph->get_id(next_handle),
+                                  super_graph->get_is_reverse(next_handle)));
+                  });
+            }
+            // If we added something, stop traversing the chain
+            added_nodes = true;
+            return true;
+          } else if (distance_index.is_node(next)) {
+            seen_nodes.emplace(distance_index.node_id(next),
+                               distance_index.ends_at(next) ==
+                                   SnarlDistanceIndex::START);
+          }
+          current_node = next;
+          current_distance =
+              SnarlDistanceIndex::sum(next_length, current_distance);
+          if (current_distance > max_distance) {
+            added_nodes = true;
+            return true;
+          } else {
+            return false;
+          }
+        });
+  }
+  if (!added_nodes && current_distance <= max_distance) {
+    // If we haven't added anything and haven't exceeded the distance limit,
+    // then start from the end of the chain
+    handle_t bound = distance_index.get_handle(current_node, super_graph);
+
+    super_graph->follow_edges(bound, false, [&](const handle_t &next_handle) {
+      search_start_nodes.emplace_back(next_handle, current_distance);
+      seen_nodes.erase(make_pair(super_graph->get_id(next_handle),
+                                 super_graph->get_is_reverse(next_handle)));
+    });
+    // seen_nodes.erase(make_pair(super_graph->get_id(bound),
+    // super_graph->get_is_reverse(bound))); search_start_nodes.emplace_back(
+    // bound, current_distance);
+  }
+};
+
+} // namespace vg
diff --git a/src/snarls.cpp b/src/snarls.cpp
index abaa507681..004021d6aa 100644
--- a/src/snarls.cpp
+++ b/src/snarls.cpp
@@ -10,6 +10,7 @@
 #include "snarls.hpp"
 #include "vg/io/json2pb.h"
 #include "subgraph_overlay.hpp"
+#include "crash.hpp"
 
 namespace vg {
 
@@ -20,7 +21,7 @@ SnarlManager SnarlFinder::find_snarls_parallel() {
 }
 
 HandleGraphSnarlFinder::HandleGraphSnarlFinder(const HandleGraph* graph) : graph(graph) {
-    // Nothing to do!
+    crash_unless(graph != nullptr);
 }
 
 SnarlManager HandleGraphSnarlFinder::find_snarls_unindexed() {
diff --git a/src/subcommand/call_main.cpp b/src/subcommand/call_main.cpp
index 4460e43d9e..11c09b9c86 100644
--- a/src/subcommand/call_main.cpp
+++ b/src/subcommand/call_main.cpp
@@ -801,7 +801,7 @@ int main_call(int argc, char** argv) {
 
     unique_ptr<AlignmentEmitter> alignment_emitter;
     if (gaf_output) {
-        alignment_emitter = vg::io::get_non_hts_alignment_emitter("-", "GAF", {}, get_thread_count(), graph);
+        alignment_emitter = vg::io::get_non_hts_alignment_emitter("-", "GAF", {}, vg::get_thread_count(), graph);
         // TODO: There should be a general function for emitting headers. See giraffe_main.cpp.
         io::GafAlignmentEmitter* gaf_emitter = dynamic_cast<io::GafAlignmentEmitter*>(alignment_emitter.get());
         if (gbz_graph.get() != nullptr && gaf_emitter != nullptr) {
diff --git a/src/subcommand/gampcompare_main.cpp b/src/subcommand/gampcompare_main.cpp
index 01a5d59717..96bcd85ec0 100644
--- a/src/subcommand/gampcompare_main.cpp
+++ b/src/subcommand/gampcompare_main.cpp
@@ -215,8 +215,13 @@ int main_gampcompare(int argc, char** argv) {
                         for (size_t j = 0; j < path_mapped_positions.size(); ++j) {
                             if (path_true_positions[i].second == path_mapped_positions[j].second) {
                                 // there is a pair of positions on the same strand of the same path
-                                abs_dist = min<int64_t>(abs_dist,
-                                    std::abs(static_cast<int64_t>(path_true_positions[i].first) - static_cast<int64_t>(path_mapped_positions[j].first)));
+                                abs_dist = min<int64_t>(
+                                    abs_dist,
+                                    std::abs(
+                                        static_cast<int64_t>(path_true_positions[i].first) - 
+                                        static_cast<int64_t>(path_mapped_positions[j].first)
+                                    )
+                                );
                             }
                         }
                     }
diff --git a/src/subcommand/giraffe_main.cpp b/src/subcommand/giraffe_main.cpp
index a9cab2c577..f1cf0250a5 100644
--- a/src/subcommand/giraffe_main.cpp
+++ b/src/subcommand/giraffe_main.cpp
@@ -1756,42 +1756,47 @@ int main_giraffe(int argc, char** argv) {
     if (show_progress) {
         logger.info() << "Loading Minimizer Index" << endl;
     }
+    IndexName minimizer_indexname;
     unique_ptr<gbwtgraph::DefaultMinimizerIndex> minimizer_index;
     MinimizerIndexParameters::PayloadType payload_type = MinimizerIndexParameters::PAYLOAD_ZIPCODES;
     if (map_long_reads) {
         if (use_path_minimizer) {
-            minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require("Long Read PathMinimizers").at(0));
+            minimizer_indexname = "Long Read PathMinimizers";
             payload_type = MinimizerIndexParameters::PAYLOAD_ZIPCODES_WITH_PATHS;
         } else {
             // Use the long read minimizers
-            minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require("Long Read Minimizers").at(0));
+            minimizer_indexname = "Long Read Minimizers";
         }
     } else {
-        minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require("Short Read Minimizers").at(0));
+        minimizer_indexname = "Short Read Minimizers";
     }
+    if (!registry.predates("Giraffe Distance Index", minimizer_indexname)) {
+        logger.error() << registry.require("Giraffe Distance Index").at(0) << " is newer than " << registry.require(minimizer_indexname).at(0) << " which depends on it" << std::endl;
+    }
+    minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require(minimizer_indexname).at(0));
     require_payload(*minimizer_index, payload_type);
 
     // Grab the zipcodes
     if (show_progress) {
         logger.info() << "Loading Zipcodes" << endl;
     }
+    IndexName oversized_zipcodes_indexname;
     ZipCodeCollection oversized_zipcodes;        
     if (map_long_reads) {
         if (use_path_minimizer) {
-            ifstream zip_in (registry.require("Long Read PathZipcodes").at(0));
-            oversized_zipcodes.deserialize(zip_in);
-            zip_in.close();
+            oversized_zipcodes_indexname = "Long Read PathZipcodes";
         } else {
-            ifstream zip_in (registry.require("Long Read Zipcodes").at(0));
-            oversized_zipcodes.deserialize(zip_in);
-            zip_in.close();
+            oversized_zipcodes_indexname = "Long Read Zipcodes";
         }
-        
     } else {
-        ifstream zip_in (registry.require("Short Read Zipcodes").at(0));
-        oversized_zipcodes.deserialize(zip_in);
-        zip_in.close();
+        oversized_zipcodes_indexname = "Short Read Zipcodes";
+    }
+    if (!registry.predates("Giraffe Distance Index", oversized_zipcodes_indexname)) {
+        logger.error() << registry.require("Giraffe Distance Index").at(0) << " is newer than " << registry.require(oversized_zipcodes_indexname).at(0) << " which depends on it" << std::endl;
     }
+    ifstream zip_in (registry.require(oversized_zipcodes_indexname).at(0));
+    oversized_zipcodes.deserialize(zip_in);
+    zip_in.close();
 
 
     // Grab the GBZ
@@ -1805,6 +1810,14 @@ int main_giraffe(int argc, char** argv) {
     if (show_progress) {
         logger.info() << "Loading Distance Index" << endl;
     }
+    // TODO: Now that we enforce that the minimizer and zipcodes files are
+    // newer than the distance index, we really shouldn't modify it ourselves
+    // by fixing any indirect pointers that may still be in it. So we should be
+    // able to open the file read-only and map the file read-only here, which
+    // in turn would solve problems with writable mappings being slow on shared
+    // filesystems even when not being written. But the VPKG system doesn't
+    // really support doing that, so we'd have to get the file descriptor
+    // manually and deserialize() on it and close() it later.
     auto distance_index = vg::io::VPKG::load_one<SnarlDistanceIndex>(registry.require("Giraffe Distance Index").at(0));
     
     if (show_progress) {
diff --git a/src/subcommand/haplotypes_main.cpp b/src/subcommand/haplotypes_main.cpp
index f0fa15c20f..4bffd2b852 100644
--- a/src/subcommand/haplotypes_main.cpp
+++ b/src/subcommand/haplotypes_main.cpp
@@ -965,7 +965,7 @@ void validate_error_sequence(const Logger& logger, size_t chain_id, size_t subch
 }
 
 std::string validate_unary_path(const HandleGraph& graph, handle_t from, handle_t to) {
-    hash_set<handle_t> visited;
+    vg::hash_set<handle_t> visited;
     handle_t curr = from;
     while (curr != to) {
         if (visited.find(curr) != visited.end()) {
@@ -989,7 +989,7 @@ std::string validate_unary_path(const HandleGraph& graph, handle_t from, handle_
 // Returns true if the path from (start, offset) reaches the end without revisiting start or leaving the subchain.
 // The path may continue in subsequent fragments.
 bool trace_path(
-    const gbwt::GBWT& index, const gbwt::FragmentMap& fragment_map, const hash_set<nid_t>& subchain_nodes,
+    const gbwt::GBWT& index, const gbwt::FragmentMap& fragment_map, const vg::hash_set<nid_t>& subchain_nodes,
     gbwt::size_type sequence_id, gbwt::node_type start, gbwt::size_type offset, gbwt::node_type end
 ) {
     gbwt::edge_type pos(start, offset);
@@ -1132,8 +1132,8 @@ void validate_chain(const Logger& logger,
         // Sequences: normal subchains.
         if (subchain.type == Haplotypes::Subchain::normal) {
             std::vector<gbwt::size_type> da = r_index.decompressDA(subchain.start);
-            hash_set<nid_t> nodes = extract_subchain(graph, gbwtgraph::GBWTGraph::node_to_handle(subchain.start), gbwtgraph::GBWTGraph::node_to_handle(subchain.end));
-            hash_set<Haplotypes::sequence_type> selected;
+            vg::hash_set<nid_t> nodes = extract_subchain(graph, gbwtgraph::GBWTGraph::node_to_handle(subchain.start), gbwtgraph::GBWTGraph::node_to_handle(subchain.end));
+            vg::hash_set<Haplotypes::sequence_type> selected;
             for (size_t i = 0; i < da.size(); i++) {
                 if (trace_path(*(graph.index), fragment_map, nodes, da[i], subchain.start, i, subchain.end)) {
                     selected.insert(Haplotypes::sequence_type(da[i], i));
@@ -1159,7 +1159,7 @@ void validate_chain(const Logger& logger,
                 std::string message = expected_got(da.size(), subchain.sequences.size()) + " sequences (prefix / suffix)";
                 validate_error_subchain(logger, chain_id, subchain_id, message);
             }
-            hash_set<Haplotypes::sequence_type> truth;
+            vg::hash_set<Haplotypes::sequence_type> truth;
             for (size_t i = 0; i < da.size(); i++) {
                 truth.insert({ da[i], i });
             }
@@ -1180,7 +1180,7 @@ void validate_chain(const Logger& logger,
 
         // Kmers.
         if (subchain.type != Haplotypes::Subchain::full_haplotype) {
-            hash_set<Haplotypes::Subchain::kmer_type> all_kmers;
+            vg::hash_set<Haplotypes::Subchain::kmer_type> all_kmers;
             for (size_t i = 0; i < subchain.kmers.size(); i++) {
                 all_kmers.insert(subchain.kmers[i]);
             }
@@ -1188,14 +1188,14 @@ void validate_chain(const Logger& logger,
                 std::string message = expected_got(subchain.kmers.size(), all_kmers.size()) + " kmers";
                 validate_error_subchain(logger, chain_id, subchain_id, message);
             }
-            hash_map<Haplotypes::Subchain::kmer_type, size_t> used_kmers; // (kmer used in haplotypes, number of sequences that contain it)
-            hash_map<Haplotypes::Subchain::kmer_type, size_t> missing_kmers; // (kmer not used in haplotypes, number of sequences that contain it)
+            vg::hash_map<Haplotypes::Subchain::kmer_type, size_t> used_kmers; // (kmer used in haplotypes, number of sequences that contain it)
+            vg::hash_map<Haplotypes::Subchain::kmer_type, size_t> missing_kmers; // (kmer not used in haplotypes, number of sequences that contain it)
             for (size_t i = 0; i < subchain.sequences.size(); i++) {
                 std::vector<std::string> haplotype = get_haplotype(
                     graph, fragment_map,
                     subchain.sequences[i], subchain.start, subchain.end, minimizer_index.k()
                 );
-                hash_map<Haplotypes::Subchain::kmer_type, bool> unique_minimizers; // (kmer, used in the sequence)
+                vg::hash_map<Haplotypes::Subchain::kmer_type, bool> unique_minimizers; // (kmer, used in the sequence)
                 for (const std::string& sequence : haplotype) {
                     auto minimizers = minimizer_index.minimizers(sequence);
                     for (auto& minimizer : minimizers) {
@@ -1322,7 +1322,7 @@ void validate_haplotypes(const Logger& logger,
     if (verbosity >= HaplotypePartitioner::Verbosity::verbosity_detailed) {
         logger.info() << "Validating kmer specificity" << std::endl;
     }
-    hash_map<Haplotypes::Subchain::kmer_type, std::pair<size_t, size_t>> kmers;
+    vg::hash_map<Haplotypes::Subchain::kmer_type, std::pair<size_t, size_t>> kmers;
     size_t collisions = 0, total_kmers = 0;
     for (size_t chain_id = 0; chain_id < haplotypes.components(); chain_id++) {
         const Haplotypes::TopLevelChain& chain = haplotypes.chains[chain_id];
diff --git a/src/subcommand/inject_main.cpp b/src/subcommand/inject_main.cpp
index 6fc7a99b8c..3dd6672fd6 100644
--- a/src/subcommand/inject_main.cpp
+++ b/src/subcommand/inject_main.cpp
@@ -138,7 +138,7 @@ int main_inject(int argc, char** argv) {
         set_crash_context(aln.name());
         if (add_identity) {
             // Calculate & save identity statistic
-            aln.set_identity(identity(aln.path()));
+            aln.set_identity(vg::identity(aln.path()));
         }
         if (rescore) {
             // Rescore the alignment
diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp
index 8635df2a97..53be9c44d6 100644
--- a/src/subcommand/minimizer_main.cpp
+++ b/src/subcommand/minimizer_main.cpp
@@ -91,6 +91,12 @@ int main_minimizer(int argc, char** argv) {
             logger.info() << "Loading SnarlDistanceIndex from " << config.distance_name << std::endl;
         }
         distance_index = vg::io::VPKG::load_one<SnarlDistanceIndex>(config.distance_name);
+        // Preload the index eagerly to establish it as recently-used in the OS
+        // page cache. Even though kmer counting may evict some pages, we
+        // re-preload right before cache_payloads. The double-preload is
+        // necessary: a single preload just before cache_payloads isn't enough
+        // to keep the index resident under the memory pressure of 32 parallel
+        // threads and the remaining in-memory data structures.
         distance_index->preload(true);
     }
 
@@ -102,13 +108,16 @@ int main_minimizer(int argc, char** argv) {
         config.params
     );
 
-    // Serialize the index and the oversized zipcodes.
+    // Close the distance index so it can't seem to be modified after the files
+    // that depend on it.
+    distance_index.reset();
+
+    // Serialize the minimizer index and the oversized zipcodes.
     save_minimizer(index, config.output_name);
     if (!config.zipcode_name.empty()) {
         std::ofstream zip_out(config.zipcode_name);
         oversized_zipcodes.serialize(zip_out);
         zip_out.close();
-
     }
 
     if (config.progress) {
diff --git a/src/subcommand/pack_main.cpp b/src/subcommand/pack_main.cpp
index 8d6d7155a9..10146cd2ff 100644
--- a/src/subcommand/pack_main.cpp
+++ b/src/subcommand/pack_main.cpp
@@ -200,7 +200,7 @@ int main_pack(int argc, char** argv) {
     // use some naive heuristics to come up with bin count and batch size based on thread count
     // more bins: finer grained parallelism at cost of more mutexes and allocations
     // bigger batch size: more robustness to sorted input at cost of less parallelism
-    size_t num_threads = get_thread_count();
+    size_t num_threads = vg::get_thread_count();
     size_t batch_size = Packer::estimate_batch_size(num_threads);
     size_t bin_count = Packer::estimate_bin_count(num_threads);
 
diff --git a/src/unittest/banded_global_aligner.cpp b/src/unittest/banded_global_aligner.cpp
index 045e9bfa97..6b5fb4b3c8 100644
--- a/src/unittest/banded_global_aligner.cpp
+++ b/src/unittest/banded_global_aligner.cpp
@@ -10,7 +10,7 @@
 #include "vg.hpp"
 #include "path.hpp"
 #include "banded_global_aligner.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "bdsg/hash_graph.hpp"
 #include "../algorithms/pad_band.hpp"
 
@@ -3515,10 +3515,9 @@ namespace vg {
             
             SECTION( "Banded global aligner does not produce empty edits when there is an insertion an empty node") {
                 string graph_json = R"({"edge": [{"to_end": true, "from_start": true, "to": 22, "from": 20}, {"to": 26, "from": 20}, {"to": 24, "from": 20}, {"to_end": true, "from_start": true, "to": 26, "from": 4}, {"to_end": true, "from_start": true, "to": 24, "from": 4}], "node": [{"sequence": "C", "id": 24}, {"sequence": "GAGA", "id": 20}, {"sequence": "T", "id": 26}, {"sequence": "GGAGTCT", "id": 4}, {"id": 22}]})";
-                
-                Graph graph;
-                json2pb(graph, graph_json.c_str(), graph_json.size());
-                VG vg_graph(graph);
+
+                bdsg::HashGraph vg_graph;
+                vg::io::json2graph(graph_json, &vg_graph);
                 
                 TestAligner aligner_source;
                 const Aligner& aligner = *aligner_source.get_regular_aligner();
diff --git a/src/unittest/cactus.cpp b/src/unittest/cactus.cpp
index 7447ee247d..5e518db4ef 100644
--- a/src/unittest/cactus.cpp
+++ b/src/unittest/cactus.cpp
@@ -5,8 +5,9 @@
 
 #include <iostream>
 #include <string>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../cactus.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "catch.hpp"
 
 namespace vg {
@@ -14,9 +15,7 @@ namespace unittest {
 using namespace std;
 
 TEST_CASE("We can convert a two-tailed graph to Cactus", "[cactus]") {
-    
-    VG graph;
-    
+
     string graph_json = R"(
     {"node":[{"sequence":"GT","id":7575},
     {"sequence":"TGTTAACAGCACAACATTTA","id":7580},
@@ -25,20 +24,18 @@ TEST_CASE("We can convert a two-tailed graph to Cactus", "[cactus]") {
     "edge":[{"from":7575,"to":7580,"from_start":true},
     {"from":7575,"to":7576}]}
     )";
-    
-    Graph g;
-    json2pb(g, graph_json.c_str(), graph_json.size());
-    graph.extend(g);
 
-    // Make sure we can make a Cactus graph and get something out.    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
+    // Make sure we can make a Cactus graph and get something out.
     auto cactusified = cactusify(graph);
     REQUIRE(cactusified.is_valid());
     
 }
 
 TEST_CASE("We can convert a hairpin graph to Cactus", "[cactus]") {
-    VG graph;
-    
+
     // Here's a graph where only the left side of node 2 is dangling, and the right side of node 1 has a self loop.
     string graph_json = R"(
     {"node": [{"sequence": "A", "id": 1},
@@ -46,12 +43,11 @@ TEST_CASE("We can convert a hairpin graph to Cactus", "[cactus]") {
     "edge": [{"from": 2, "to": 1},
     {"from": 1, "to": 1, "to_end": true}]}
     )";
-    
-    Graph g;
-    json2pb(g, graph_json.c_str(), graph_json.size());
-    graph.extend(g);
 
-    // Make sure we can make a Cactus graph and get something out.    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
+    // Make sure we can make a Cactus graph and get something out.
     auto cactusified = cactusify(graph);
     REQUIRE(cactusified.is_valid());
 }
diff --git a/src/unittest/chunker.cpp b/src/unittest/chunker.cpp
index 24f7d3b645..3be2298c15 100644
--- a/src/unittest/chunker.cpp
+++ b/src/unittest/chunker.cpp
@@ -7,6 +7,8 @@
 #include "vg.hpp"
 #include "xg.hpp"
 #include "path.hpp"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 namespace vg {
 namespace unittest {
@@ -83,13 +85,13 @@ TEST_CASE("basic graph chunking", "[chunk]") {
     
     )";
     
-    // Load it into Protobuf
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    
+    // Load the graph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Pass it over to XG
     xg::XG index;
-    index.from_path_handle_graph(VG(chunk));
+    index.from_path_handle_graph(graph);
 
     PathChunker chunker(&index);
 
diff --git a/src/unittest/copy_graph.cpp b/src/unittest/copy_graph.cpp
index 581b683130..4e7e878075 100644
--- a/src/unittest/copy_graph.cpp
+++ b/src/unittest/copy_graph.cpp
@@ -1,6 +1,7 @@
 #include "catch.hpp"
 #include "../handle.hpp"
 #include "../vg.hpp"
+#include "../io/json2graph.hpp"
 #include "xg.hpp"
 
 #include "bdsg/packed_graph.hpp"
@@ -53,14 +54,15 @@ namespace vg {
                          ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_handle_graph(&xg, &vg);
-            
+
             REQUIRE(xg.get_node_count() == 1);
             REQUIRE(vg.get_node_count() == 1);
         }
@@ -72,14 +74,15 @@ namespace vg {
                          ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_handle_graph(&xg, &pg);
-            
+
             REQUIRE(xg.get_node_count() == 1);
             REQUIRE(pg.get_node_count() == 1);
         }
@@ -91,14 +94,15 @@ namespace vg {
                          ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_handle_graph(&xg, &hg);
-            
+
             REQUIRE(xg.get_node_count() == 1);
             REQUIRE(hg.get_node_count() == 1);
         }
@@ -120,19 +124,20 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_handle_graph(&xg, &vg);
-            
+
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(vg.get_node_count() == 4);
             REQUIRE(vg.edge_count() == 4);
             REQUIRE(vg.length() == 16);
-            
+
         }
         TEST_CASE( "copy_handle_graph converter works on graphs with one reversing edge, xg to pg", "[handle][pg][xg]") {
             string graph_json = R"(
@@ -151,14 +156,15 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_handle_graph(&xg, &pg);
-            
+
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(pg.get_node_count() == 4);
 
@@ -168,14 +174,14 @@ namespace vg {
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             pg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 4);
-        
+
         }
         TEST_CASE( "copy_handle_graph converter works on graphs with one reversing edge, xg to hg", "[handle][hg][xg]") {
             string graph_json = R"(
@@ -194,14 +200,15 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_handle_graph(&xg, &hg);
-            
+
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(hg.get_node_count() == 4);
             int length = 0;
@@ -210,14 +217,14 @@ namespace vg {
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             hg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 4);
-            
+
         }
         TEST_CASE( "copy_handle_graph converter works on graphs with reversing edges and loops", "[handle][vg][xg]") {
             string graph_json = R"(
@@ -239,14 +246,15 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_handle_graph(&xg, &vg);
-            
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
@@ -274,26 +282,27 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_handle_graph(&xg, &pg);
-            
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(pg.get_node_count() == 4);
-            
+
             int length = 0;
             pg.for_each_handle([&](const handle_t& here) {
                 length += pg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             pg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
@@ -321,26 +330,27 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_handle_graph(&xg, &hg);
-            
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(hg.get_node_count() == 4);
-            
+
             int length = 0;
             hg.for_each_handle([&](const handle_t& here) {
                 length += hg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             hg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
@@ -382,16 +392,17 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             VG vg;
             handlealgs::copy_path_handle_graph(&xg, &vg);
 
-            
-            
+
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
@@ -444,37 +455,38 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::PackedGraph pg;
             handlealgs::copy_path_handle_graph(&xg, &pg);
-            
-            
-            
+
+
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(pg.get_node_count() == 4);
 
-            
+
             int length = 0;
             pg.for_each_handle([&](const handle_t& here) {
                 length += pg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             pg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 7);
-            
-            
+
+
             REQUIRE(pg.has_path("path1") == true);
             REQUIRE(pg.has_path("path2") == true);
             REQUIRE(pg.get_path_count() == 2);
@@ -521,37 +533,38 @@ namespace vg {
                         ]
             }
             )";
-            Graph proto_graph;
-            json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
+
+            bdsg::HashGraph source;
+            vg::io::json2graph(graph_json, &source);
+
             xg::XG xg;
-            xg.from_path_handle_graph(VG(proto_graph));
+            xg.from_path_handle_graph(source);
             bdsg::HashGraph hg;
             handlealgs::copy_path_handle_graph(&xg, &hg);
-            
-            
-            
+
+
+
             REQUIRE(xg.get_sequence(xg.get_handle(1)) == "GATT");
             REQUIRE(xg.get_sequence(xg.get_handle(3)) == "CGAT");
             REQUIRE(xg.get_node_count() == 4);
             REQUIRE(hg.get_node_count() == 4);
-            
-            
+
+
             int length = 0;
             hg.for_each_handle([&](const handle_t& here) {
                 length += hg.get_length(here);
                 return true;
             });
             REQUIRE(length == 16);
-            
+
             int edge_count = 0;
             hg.for_each_edge([&](const edge_t& edge) {
                 edge_count += 1;
                 return true;
             });
             REQUIRE(edge_count == 7);
-            
-            
+
+
             REQUIRE(hg.has_path("path1") == true);
             REQUIRE(hg.has_path("path2") == true);
             REQUIRE(hg.get_path_count() == 2);
diff --git a/src/unittest/dijkstra.cpp b/src/unittest/dijkstra.cpp
index 2608567153..4e94414040 100644
--- a/src/unittest/dijkstra.cpp
+++ b/src/unittest/dijkstra.cpp
@@ -6,7 +6,7 @@
 #include <iostream>
 #include <string>
 #include "../handle.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../vg.hpp"
 #include "catch.hpp"
 
@@ -125,14 +125,12 @@ TEST_CASE("Dijkstra search handles early stopping correctly", "[dijkstra][algori
 TEST_CASE("Dijkstra search works on a particular problem graph", "[dijkstra][algorithms]") {
 
     string graph_json = R"(
-{"node":[{"sequence":"A","id":"2454530"},{"sequence":"AGTGCTGGAGAGGATGTGGAGAAATAGGAAC","id":"2454529"},{"sequence":"C","id":"2454532"},{"sequence":"TTTTACACTGTTGGTGGGACTGTAAA","id":"2454533"},{"sequence":"A","id":"2454527"},{"sequence":"C","id":"2454528"},{"sequence":"G","id":"2454531"},{"sequence":"C","id":"2454534"},{"sequence":"T","id":"2454535"},{"sequence":"GGGTAATAA","id":"2454526"},{"sequence":"TAGTTCAACCATTGTGGAAGACTGTGGCAATT","id":"2454536"}],"edge":[{"from":"2454530","to":"2454532"},{"from":"2454530","to":"2454533"},{"from":"2454529","to":"2454530"},{"from":"2454529","to":"2454531"},{"from":"2454532","to":"2454533"},{"from":"2454533","to":"2454534"},{"from":"2454533","to":"2454535"},{"from":"2454527","to":"2454529"},{"from":"2454528","to":"2454529"},{"from":"2454531","to":"2454532"},{"from":"2454531","to":"2454533"},{"from":"2454534","to":"2454536"},{"from":"2454535","to":"2454536"},{"from":"2454526","to":"2454527"},{"from":"2454526","to":"2454528"}],"path":[{"name":"21","mapping":[{"position":{"node_id":"2454526"},"edit":[{"from_length":9,"to_length":9}],"rank":"3049077"},{"position":{"node_id":"2454528"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049078"},{"position":{"node_id":"2454529"},"edit":[{"from_length":31,"to_length":31}],"rank":"3049079"},{"position":{"node_id":"2454531"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049080"},{"position":{"node_id":"2454532"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049081"},{"position":{"node_id":"2454533"},"edit":[{"from_length":26,"to_length":26}],"rank":"3049082"},{"position":{"node_id":"2454535"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049083"},{"position":{"node_id":"2454536"},"edit":[{"from_length":32,"to_length":32}],"rank":"3049084"}]}]}    
+{"node":[{"sequence":"A","id":"2454530"},{"sequence":"AGTGCTGGAGAGGATGTGGAGAAATAGGAAC","id":"2454529"},{"sequence":"C","id":"2454532"},{"sequence":"TTTTACACTGTTGGTGGGACTGTAAA","id":"2454533"},{"sequence":"A","id":"2454527"},{"sequence":"C","id":"2454528"},{"sequence":"G","id":"2454531"},{"sequence":"C","id":"2454534"},{"sequence":"T","id":"2454535"},{"sequence":"GGGTAATAA","id":"2454526"},{"sequence":"TAGTTCAACCATTGTGGAAGACTGTGGCAATT","id":"2454536"}],"edge":[{"from":"2454530","to":"2454532"},{"from":"2454530","to":"2454533"},{"from":"2454529","to":"2454530"},{"from":"2454529","to":"2454531"},{"from":"2454532","to":"2454533"},{"from":"2454533","to":"2454534"},{"from":"2454533","to":"2454535"},{"from":"2454527","to":"2454529"},{"from":"2454528","to":"2454529"},{"from":"2454531","to":"2454532"},{"from":"2454531","to":"2454533"},{"from":"2454534","to":"2454536"},{"from":"2454535","to":"2454536"},{"from":"2454526","to":"2454527"},{"from":"2454526","to":"2454528"}],"path":[{"name":"21","mapping":[{"position":{"node_id":"2454526"},"edit":[{"from_length":9,"to_length":9}],"rank":"3049077"},{"position":{"node_id":"2454528"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049078"},{"position":{"node_id":"2454529"},"edit":[{"from_length":31,"to_length":31}],"rank":"3049079"},{"position":{"node_id":"2454531"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049080"},{"position":{"node_id":"2454532"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049081"},{"position":{"node_id":"2454533"},"edit":[{"from_length":26,"to_length":26}],"rank":"3049082"},{"position":{"node_id":"2454535"},"edit":[{"from_length":1,"to_length":1}],"rank":"3049083"},{"position":{"node_id":"2454536"},"edit":[{"from_length":32,"to_length":32}],"rank":"3049084"}]}]}
     )";
-    
-    Graph g;
-    json2pb(g, graph_json);
-    
-    // Wrap the graph in a HandleGraph
-    VG graph(g);
+
+    // Load the graph
+    HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Decide where to start
     handle_t start = graph.get_handle(2454536, true);
diff --git a/src/unittest/gbwt_extender.cpp b/src/unittest/gbwt_extender.cpp
index d04a225fdb..4835fbd511 100644
--- a/src/unittest/gbwt_extender.cpp
+++ b/src/unittest/gbwt_extender.cpp
@@ -5,7 +5,7 @@
 
 #include "../gbwt_extender.hpp"
 #include "../gbwt_helper.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../utility.hpp"
 #include "../vg.hpp"
 
@@ -90,10 +90,9 @@ gbwt::GBWT build_gbwt_index() {
 
 // Build a GBWTGraph using the provided GBWT index.
 gbwtgraph::GBWTGraph build_gbwt_graph(const gbwt::GBWT& gbwt_index) {
-    Graph graph;
-    json2pb(graph, gapless_extender_graph.c_str(), gapless_extender_graph.size());
-    VG vg_graph(graph);
-    return gbwtgraph::GBWTGraph(gbwt_index, vg_graph, nullptr);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(gapless_extender_graph, &graph);
+    return gbwtgraph::GBWTGraph(gbwt_index, graph, nullptr);
 }
 
 void same_position(const Position& pos, const Position& correct) {
diff --git a/src/unittest/genotypekit.cpp b/src/unittest/genotypekit.cpp
index af9bc2a4d8..b5d460c59a 100644
--- a/src/unittest/genotypekit.cpp
+++ b/src/unittest/genotypekit.cpp
@@ -10,6 +10,8 @@
 #include "../traversal_finder.hpp"
 #include "xg.hpp"
 #include "../haplotype_extracter.hpp"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 namespace Catch {
 
@@ -62,10 +64,10 @@ namespace vg {
 namespace unittest {
 
 TEST_CASE("sites can be found with Cactus", "[genotype]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -90,7 +92,7 @@ TEST_CASE("sites can be found with Cactus", "[genotype]") {
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
+
         ],
         "path": [
             {"name": "hint", "mapping": [
@@ -101,14 +103,13 @@ TEST_CASE("sites can be found with Cactus", "[genotype]") {
             ]}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
+  // Note: Using VG here because the test uses VG-specific methods like get_node() and get_edge()
   VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a CactusSnarlFinder
   unique_ptr<SnarlFinder> finder(new CactusSnarlFinder(graph));
@@ -196,10 +197,10 @@ TEST_CASE("sites can be found with Cactus", "[genotype]") {
 }
 
 TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integrated-snarl-finder]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -224,7 +225,7 @@ TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integ
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
+
         ],
         "path": [
             {"name": "hint", "mapping": [
@@ -235,14 +236,13 @@ TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integ
             ]}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
+  // Note: Using VG here because the test uses VG-specific methods like get_node() and get_edge()
   VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  vg::io::json2graph(graph_json, &graph);
     
   // Make an IntegratedSnarlFinder
   unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -329,7 +329,7 @@ TEST_CASE("sites can be found with the IntegratedSnarlFinder", "[genotype][integ
 }
 
 TEST_CASE("IntegratedSnarlFinder works when cactus graph contains back-to-back cycles along root path", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -351,17 +351,15 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains back-to-back c
             {"from": 3, "to": 5},
             {"from": 4, "to": 6},
             {"from": 5, "to": 6}
-            
+
         ]
     }
 
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -375,18 +373,16 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains back-to-back c
 }
 
 TEST_CASE("IntegratedSnarlFinder works on an all bridge edge Y graph with specific numbering", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
     {"node":[{"id":"2","sequence":"G"},{"id":"3","sequence":"G"},{"id":"4","sequence":"G"},{"id":"5","sequence":"G"},{"id":"6","sequence":"G"},{"id":"11","sequence":"G"}],
-    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}    
+    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -403,18 +399,16 @@ TEST_CASE("IntegratedSnarlFinder works on an all bridge edge Y graph with specif
 }
 
 TEST_CASE("IntegratedSnarlFinder roots correctly an all bridge edge Y graph with winning longest path", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
     {"node":[{"id":"2","sequence":"G"},{"id":"3","sequence":"G"},{"id":"4","sequence":"GG"},{"id":"5","sequence":"G"},{"id":"6","sequence":"G"},{"id":"11","sequence":"GG"}],
-    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}    
+    "edge":[{"from":"2","to":"3"},{"from":"3","to":"6"},{"from":"4","to":"5"},{"from":"5","to":"6"},{"from":"6","to":"11"}]}
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -452,7 +446,7 @@ TEST_CASE("IntegratedSnarlFinder roots correctly an all bridge edge Y graph with
 }
 
 TEST_CASE("IntegratedSnarlFinder works when cactus graph contains longer back-to-back cycles along root path", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -482,17 +476,15 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains longer back-to
             {"from": 32, "to": 5},
             {"from": 4, "to": 6},
             {"from": 5, "to": 6}
-            
+
         ]
     }
 
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -506,50 +498,48 @@ TEST_CASE("IntegratedSnarlFinder works when cactus graph contains longer back-to
 }
 
 TEST_CASE("IntegratedSnarlFinder works on a complex bundle-y region with a nested snarl", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
         {"edge": [{"from": "129672", "to": "129673"},
-                  {"from": "129662", "to": "129663"}, 
-                  {"from": "129662", "to": "129664"}, 
-                  {"from": "129664", "to": "129665"}, 
-                  {"from": "129664", "to": "129666"}, 
-                  {"from": "129666", "to": "129668"}, 
-                  {"from": "129666", "to": "129669"}, 
-                  {"from": "129666", "to": "129667"}, 
-                  {"from": "129667", "to": "129668"}, 
-                  {"from": "129667", "to": "129669"}, 
-                  {"from": "129669", "to": "129670"}, 
-                  {"from": "129669", "to": "129673"}, 
-                  {"from": "129671", "to": "129672"}, 
-                  {"from": "129668", "to": "129670"}, 
-                  {"from": "129668", "to": "129673"}, 
-                  {"from": "129665", "to": "129668"}, 
-                  {"from": "129665", "to": "129669"}, 
-                  {"from": "129665", "to": "129667"}, 
-                  {"from": "129670", "to": "129671"}, 
-                  {"from": "129670", "to": "129672"}, 
-                  {"from": "129663", "to": "129665"}, 
-                  {"from": "129663", "to": "129666"}], 
-        "node": [{"id": "129672", "sequence": "AT"}, 
-                 {"id": "129662", "sequence": "CAGGTCAAACTGTGAT"}, 
-                 {"id": "129664", "sequence": "T"}, 
-                 {"id": "129666", "sequence": "T"}, 
-                 {"id": "129667", "sequence": "G"}, 
-                 {"id": "129669", "sequence": "G"}, 
-                 {"id": "129671", "sequence": "T"}, 
-                 {"id": "129668", "sequence": "A"}, 
-                 {"id": "129665", "sequence": "A"}, 
-                 {"id": "129670", "sequence": "A"}, 
-                 {"id": "129673", "sequence": "ATATATATATACTTATTGTAAAAATCTTTAGA"}, 
+                  {"from": "129662", "to": "129663"},
+                  {"from": "129662", "to": "129664"},
+                  {"from": "129664", "to": "129665"},
+                  {"from": "129664", "to": "129666"},
+                  {"from": "129666", "to": "129668"},
+                  {"from": "129666", "to": "129669"},
+                  {"from": "129666", "to": "129667"},
+                  {"from": "129667", "to": "129668"},
+                  {"from": "129667", "to": "129669"},
+                  {"from": "129669", "to": "129670"},
+                  {"from": "129669", "to": "129673"},
+                  {"from": "129671", "to": "129672"},
+                  {"from": "129668", "to": "129670"},
+                  {"from": "129668", "to": "129673"},
+                  {"from": "129665", "to": "129668"},
+                  {"from": "129665", "to": "129669"},
+                  {"from": "129665", "to": "129667"},
+                  {"from": "129670", "to": "129671"},
+                  {"from": "129670", "to": "129672"},
+                  {"from": "129663", "to": "129665"},
+                  {"from": "129663", "to": "129666"}],
+        "node": [{"id": "129672", "sequence": "AT"},
+                 {"id": "129662", "sequence": "CAGGTCAAACTGTGAT"},
+                 {"id": "129664", "sequence": "T"},
+                 {"id": "129666", "sequence": "T"},
+                 {"id": "129667", "sequence": "G"},
+                 {"id": "129669", "sequence": "G"},
+                 {"id": "129671", "sequence": "T"},
+                 {"id": "129668", "sequence": "A"},
+                 {"id": "129665", "sequence": "A"},
+                 {"id": "129670", "sequence": "A"},
+                 {"id": "129673", "sequence": "ATATATATATACTTATTGTAAAAATCTTTAGA"},
                  {"id": "129663", "sequence": "G"}]}
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -579,23 +569,21 @@ TEST_CASE("IntegratedSnarlFinder works on a complex bundle-y region with a neste
 }
 
 TEST_CASE("CactusSnarlFinder safely handles a single node graph", "[genotype][cactus-snarl-finder]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "GATTACA"}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
-  VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a CactusSnarlFinder
   unique_ptr<SnarlFinder> finder(new CactusSnarlFinder(graph));
@@ -607,15 +595,13 @@ TEST_CASE("CactusSnarlFinder safely handles a single node graph", "[genotype][ca
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a completely empty graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = "{}";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make a IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -625,7 +611,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a completely empty graph", "[gen
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a single node graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -638,10 +624,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node graph", "[genotype
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -651,7 +635,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node graph", "[genotype
 }
 
 TEST_CASE("IntegratedSnarlFinder produces all the correct types of single-node chains", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -673,10 +657,8 @@ TEST_CASE("IntegratedSnarlFinder produces all the correct types of single-node c
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     IntegratedSnarlFinder finder(graph);
@@ -736,7 +718,7 @@ TEST_CASE("IntegratedSnarlFinder produces all the correct types of single-node c
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a path when forced to root at one end", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -757,10 +739,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a path when forced to root at on
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -770,7 +750,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a path when forced to root at on
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a single node connected component in a larger graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -787,10 +767,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node connected componen
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -813,7 +791,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node connected componen
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a single node cycle", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -828,10 +806,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node cycle", "[genotype
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -844,7 +820,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a single node cycle", "[genotype
 }
 
 TEST_CASE("IntegratedSnarlFinder safely handles a totally connected graph", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -866,10 +842,8 @@ TEST_CASE("IntegratedSnarlFinder safely handles a totally connected graph", "[ge
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -882,7 +856,7 @@ TEST_CASE("IntegratedSnarlFinder safely handles a totally connected graph", "[ge
 }
 
 TEST_CASE("IntegratedSnarlFinder prefers to root at a bridge edge path in a tie", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -903,10 +877,8 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a bridge edge path in a tie"
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -935,7 +907,7 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a bridge edge path in a tie"
 }
 
 TEST_CASE("IntegratedSnarlFinder prefers to root at a cycle that is 1 bp longer", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -956,10 +928,8 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a cycle that is 1 bp longer"
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -988,7 +958,7 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a cycle that is 1 bp longer"
 }
 
 TEST_CASE("IntegratedSnarlFinder prefers to root at a chain with an up-weighted node", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -1009,10 +979,8 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a chain with an up-weighted
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder that adds 10 bp to node 4's apparent length
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph, {{4, 10}}));
@@ -1041,7 +1009,7 @@ TEST_CASE("IntegratedSnarlFinder prefers to root at a chain with an up-weighted
 }
 
 TEST_CASE("IntegratedSnarlFinder sees tips as disqualifying ultrabubbles", "[genotype][integrated-snarl-finder]") {
-    
+
     // Build a toy graph
     const string graph_json = R"(
 
@@ -1066,10 +1034,8 @@ TEST_CASE("IntegratedSnarlFinder sees tips as disqualifying ultrabubbles", "[gen
     )";
 
     // Make an actual graph
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
 
     // Make an IntegratedSnarlFinder
     unique_ptr<SnarlFinder> finder(new IntegratedSnarlFinder(graph));
@@ -1098,10 +1064,10 @@ TEST_CASE("IntegratedSnarlFinder sees tips as disqualifying ultrabubbles", "[gen
 }
 
 TEST_CASE("CactusSnarlFinder throws an error instead of crashing when the graph has no edges", "[genotype][cactus-snarl-finder]") {
-    
+
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -1115,14 +1081,12 @@ TEST_CASE("CactusSnarlFinder throws an error instead of crashing when the graph
             {"id": 9, "sequence": "A"}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
-  VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a CactusSnarlFinder
   unique_ptr<SnarlFinder> finder(new CactusSnarlFinder(graph));
@@ -1183,7 +1147,7 @@ TEST_CASE("fixed priors can be assigned to genotypes", "[genotype]") {
 TEST_CASE("TrivialTraversalFinder can find traversals", "[genotype]") {
   // Build a toy graph
   const string graph_json = R"(
-    
+
     {
         "node": [
             {"id": 1, "sequence": "G"},
@@ -1208,7 +1172,7 @@ TEST_CASE("TrivialTraversalFinder can find traversals", "[genotype]") {
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
+
         ],
         "path": [
             {"name": "hint", "mapping": [
@@ -1219,14 +1183,12 @@ TEST_CASE("TrivialTraversalFinder can find traversals", "[genotype]") {
             ]}
         ]
     }
-    
+
     )";
-    
+
   // Make an actual graph
-  VG graph;
-  Graph chunk;
-  json2pb(chunk, graph_json.c_str(), graph_json.size());
-  graph.merge(chunk);
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
     
   // Make a site
   Snarl site;
@@ -1329,12 +1291,10 @@ TEST_CASE("CactusSnarlFinder can differentiate ultrabubbles from snarls", "[geno
         ]
         }
         )";
-        
+
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         CactusSnarlFinder cubs(graph);
@@ -1381,10 +1341,8 @@ TEST_CASE("CactusSnarlFinder can differentiate ultrabubbles from snarls", "[geno
         )";
     
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         CactusSnarlFinder cubs(graph);
@@ -1454,10 +1412,8 @@ TEST_CASE("IntegratedSnarlFinder can differentiate ultrabubbles from snarls", "[
         )";
         
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         IntegratedSnarlFinder cubs(graph);
@@ -1504,10 +1460,8 @@ TEST_CASE("IntegratedSnarlFinder can differentiate ultrabubbles from snarls", "[
         )";
     
         // Make an actual graph
-        VG graph;
-        Graph chunk;
-        json2pb(chunk, graph_json.c_str(), graph_json.size());
-        graph.merge(chunk);
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
 
         // Find the snarls
         IntegratedSnarlFinder cubs(graph);
@@ -1581,11 +1535,9 @@ TEST_CASE("RepresentativeTraversalFinder finds traversals correctly", "[genotype
         }
         )";
     
-    // Make an actual graph
+    // Load the graph. Needs to be a vg because we will give it to a SupportAugmentedGraph later. 
     VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    vg::io::json2graph(graph_json, &graph);
 
     // Find the snarls
     CactusSnarlFinder cubs(graph);
@@ -1713,11 +1665,9 @@ TEST_CASE("RepresentativeTraversalFinder finds traversals of simple inversions",
     }
     )";
 
-    // Make an actual graph
+    // Load the graph. Needs to be a vg because we will give it to a SupportAugmentedGraph later. 
     VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    vg::io::json2graph(graph_json, &graph);
 
     // Find the snarls
     CactusSnarlFinder cubs(graph);
@@ -1774,11 +1724,11 @@ TEST_CASE("GBWTTraversalFinder finds traversals for GBWT threads", "[genotype][g
     string graph_json = R"({"node": [{"id": 1, "sequence": "CAAATAAGGCTT"}, {"id": 2, "sequence": "G"}, {"id": 3, "sequence": "GGAAATTTTC"}, {"id": 4, "sequence": "C"}, {"id": 5, "sequence": "TGGAGTTCTATTATATTCC"}, {"id": 6, "sequence": "G"}, {"id": 7, "sequence": "A"}, {"id": 8, "sequence": "ACTCTCTGGTTCCTG"}, {"id": 9, "sequence": "A"}, {"id": 10, "sequence": "G"}, {"id": 11, "sequence": "TGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCA"}], "edge": [{"from": 1, "to": 2}, {"from": 1, "to": 3}, {"from": 2, "to": 3}, {"from": 3, "to": 4}, {"from": 3, "to": 5}, {"from": 4, "to": 5}, {"from": 5, "to": 6}, {"from": 5, "to": 7}, {"from": 6, "to": 8}, {"from": 7, "to": 8}, {"from": 8, "to": 9}, {"from": 8, "to": 10}, {"from": 9, "to": 11}, {"from": 10, "to": 11}]})";
   
     // Load the JSON
-    vg::Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(vg::VG(proto_graph));
+    xg_index.from_path_handle_graph(graph);
     
     gbwt::Verbosity::set(gbwt::Verbosity::SILENT);
   
diff --git a/src/unittest/genotyper.cpp b/src/unittest/genotyper.cpp
index e2e9f7a142..4228b16ee3 100644
--- a/src/unittest/genotyper.cpp
+++ b/src/unittest/genotyper.cpp
@@ -7,6 +7,7 @@
 #include "../snarls.hpp"
 #include "../cactus_snarl_finder.hpp"
 #include "../traversal_finder.hpp"
+#include "../io/json2graph.hpp"
 
 namespace vg {
 namespace unittest {
@@ -41,15 +42,6 @@ TEST_CASE("traversals can be found from reads", "[genotyper]") {
             {"from": 6, "to": 8},
             {"from": 7, "to": 9},
             {"from": 8, "to": 9}
-            
-        ],
-        "path": [
-            {"name": "hint", "mapping": [
-                {"position": {"node_id": 1}, "rank" : 1 },
-                {"position": {"node_id": 6}, "rank" : 2 },
-                {"position": {"node_id": 8}, "rank" : 3 },
-                {"position": {"node_id": 9}, "rank" : 4 }
-            ]}
         ]
     }
     
@@ -57,9 +49,7 @@ TEST_CASE("traversals can be found from reads", "[genotyper]") {
     
     // Make an actual graph
     VG graph;
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    graph.merge(chunk);
+    vg::io::json2graph(graph_json, &graph);
     
     // Find the snarls
     SnarlManager manager = CactusSnarlFinder(graph).find_snarls();
diff --git a/src/unittest/haplotypes.cpp b/src/unittest/haplotypes.cpp
index e441bbe197..9e4e04475b 100644
--- a/src/unittest/haplotypes.cpp
+++ b/src/unittest/haplotypes.cpp
@@ -4,8 +4,10 @@
 
 #include "catch.hpp"
 #include "haplotypes.hpp"
+#include "../io/json2graph.hpp"
 #include "xg.hpp"
 #include "vg.hpp"
+#include <bdsg/hash_graph.hpp>
 
 #include <numeric>
 
@@ -66,7 +68,7 @@ TEST_CASE("We can represent appropriate graphs according to linear reference", "
   )";
   
   thread_t SNP_thread = {tm[1], tm[3], tm[4]};
-    
+
   string del_graph_json = R"(
   {"node":[
     {"id":1,"sequence":"AAA"},
@@ -89,22 +91,24 @@ TEST_CASE("We can represent appropriate graphs according to linear reference", "
     ]}
   ]}
   )";
-  
+
   thread_t del_ref_thread = {tm[1], tm[2], tm[4]};
   thread_t del_thread = {tm[1], tm[4]};
-  
-  vg::Graph SNP_proto_graph;
-  json2pb(SNP_proto_graph, SNP_graph_json.c_str(), SNP_graph_json.size());
+
+  // Build the SNP graph
+  bdsg::HashGraph SNP_graph;
+  vg::io::json2graph(SNP_graph_json, &SNP_graph);
   // Build the xg index
   xg::XG SNP_xg_index;
-  SNP_xg_index.from_path_handle_graph(vg::VG(SNP_proto_graph));
+  SNP_xg_index.from_path_handle_graph(SNP_graph);
   vg::path_handle_t SNP_ref_path_handle = SNP_xg_index.get_path_handle("reference");
-  
-  vg::Graph del_proto_graph;
-  json2pb(del_proto_graph, del_graph_json.c_str(), del_graph_json.size());
+
+  // Build the del graph
+  bdsg::HashGraph del_graph;
+  vg::io::json2graph(del_graph_json, &del_graph);
   // Build the xg index
   xg::XG del_xg_index;
-  del_xg_index.from_path_handle_graph(vg::VG(del_proto_graph));
+  del_xg_index.from_path_handle_graph(del_graph);
   vg::path_handle_t del_ref_path_handle = del_xg_index.get_path_handle("reference");
   
   // NEGATIVE SNVs
@@ -159,18 +163,20 @@ TEST_CASE("We can represent appropriate graphs according to linear reference", "
   
   thread_t double_thread = {tm[1], tm[2], tm[4]};
 
-  vg::Graph long_proto_graph;
-  json2pb(long_proto_graph, long_graph_json.c_str(), long_graph_json.size());
+  // Build the long graph
+  bdsg::HashGraph long_graph;
+  vg::io::json2graph(long_graph_json, &long_graph);
   // Build the xg index
   xg::XG long_xg_index;
-  long_xg_index.from_path_handle_graph(vg::VG(long_proto_graph));
+  long_xg_index.from_path_handle_graph(long_graph);
   vg::path_handle_t long_ref_path_handle = long_xg_index.get_path_handle("reference");
-  
-  vg::Graph double_proto_graph;
-  json2pb(double_proto_graph, double_graph_json.c_str(), double_graph_json.size());
+
+  // Build the double graph
+  bdsg::HashGraph double_graph;
+  vg::io::json2graph(double_graph_json, &double_graph);
   // Build the xg index
   xg::XG double_xg_index;
-  double_xg_index.from_path_handle_graph(vg::VG(double_proto_graph));
+  double_xg_index.from_path_handle_graph(double_graph);
   vg::path_handle_t double_ref_path_handle = double_xg_index.get_path_handle("reference");
 
   string matching_test_file = "matching_test.slls";
@@ -382,13 +388,13 @@ TEST_CASE("We can score haplotypes using GBWT", "[haplo-score][gbwt]") {
 TEST_CASE("We can recognize a required crossover", "[hapo-score][gbwt]") {
   // This graph is the start of xy2 from test/small
   string graph_json = R"({"node": [{"id": 1, "sequence": "CAAATAAGGCTT"}, {"id": 2, "sequence": "G"}, {"id": 3, "sequence": "GGAAATTTTC"}, {"id": 4, "sequence": "C"}, {"id": 5, "sequence": "TGGAGTTCTATTATATTCC"}, {"id": 6, "sequence": "G"}, {"id": 7, "sequence": "A"}, {"id": 8, "sequence": "ACTCTCTGGTTCCTG"}, {"id": 9, "sequence": "A"}, {"id": 10, "sequence": "G"}, {"id": 11, "sequence": "TGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCA"}], "edge": [{"from": 1, "to": 2}, {"from": 1, "to": 3}, {"from": 2, "to": 3}, {"from": 3, "to": 4}, {"from": 3, "to": 5}, {"from": 4, "to": 5}, {"from": 5, "to": 6}, {"from": 5, "to": 7}, {"from": 6, "to": 8}, {"from": 7, "to": 8}, {"from": 8, "to": 9}, {"from": 8, "to": 10}, {"from": 9, "to": 11}, {"from": 10, "to": 11}]})";
-  
-  // Load the JSON
-  vg::Graph proto_graph;
-  json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+
+  // Load the JSON into a HashGraph
+  bdsg::HashGraph graph;
+  vg::io::json2graph(graph_json, &graph);
   // Build the xg index
   xg::XG xg_index;
-  xg_index.from_path_handle_graph(vg::VG(proto_graph));
+  xg_index.from_path_handle_graph(graph);
     
   gbwt::Verbosity::set(gbwt::Verbosity::SILENT);
   gbwt::DynamicGBWT* gbwt_index = new gbwt::DynamicGBWT;
diff --git a/src/unittest/indexed_vg.cpp b/src/unittest/indexed_vg.cpp
index 7f74d92193..27504dea9f 100644
--- a/src/unittest/indexed_vg.cpp
+++ b/src/unittest/indexed_vg.cpp
@@ -40,7 +40,7 @@ TEST_CASE("An IndexedVG can be created for a single node", "[handle][indexed-vg]
         ]
     })";
     
-    // Load the JSON
+    // Load the JSON to Protobuf specifically.
     Graph proto_graph;
     json2pb(proto_graph, graph_json.c_str(), graph_json.size());
     
diff --git a/src/unittest/mapper.cpp b/src/unittest/mapper.cpp
index 2caf42d076..17f81fe17b 100644
--- a/src/unittest/mapper.cpp
+++ b/src/unittest/mapper.cpp
@@ -1,9 +1,10 @@
 /// \file mapper.cpp
-///  
+///
 /// unit tests for the mapper
 
 #include <iostream>
 #include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include <vg/vg.pb.h>
 #include <bdsg/hash_graph.hpp>
 #include "../mapper.hpp"
@@ -25,14 +26,10 @@ TEST_CASE( "Mapper can map to a one-node graph", "[mapping][mapper]" ) {
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -245,14 +242,10 @@ TEST_CASE( "Mapper finds optimal mapping for read starting with node-border MEM"
     {"position":{"node_id":1444},"rank":1059},
     {"position":{"node_id":1445},"rank":1060}]}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -311,14 +304,10 @@ TEST_CASE( "Mapper can annotate positions correctly on both strands", "[mapper][
         ]}
     ]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
diff --git a/src/unittest/minimizer_mapper.cpp b/src/unittest/minimizer_mapper.cpp
index 3ecd5de147..84628276e4 100644
--- a/src/unittest/minimizer_mapper.cpp
+++ b/src/unittest/minimizer_mapper.cpp
@@ -3,8 +3,8 @@
 /// unit tests for the minimizer mapper
 
 #include <iostream>
-#include "vg/io/json2pb.h"
 #include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 #include "../minimizer_mapper.hpp"
 #include "../build_index.hpp"
@@ -450,15 +450,13 @@ TEST_CASE("MinimizerMapper can map an empty string between odd points", "[giraff
                 {"id": "55511925", "sequence": "CTTCCTTCC"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("");
-        
+
         pos_t left_anchor {55511921, false, 5}; // This is on the final base of the node
         pos_t right_anchor {55511925, false, 6};
         
@@ -480,7 +478,7 @@ TEST_CASE("MinimizerMapper can map an empty string between odd points", "[giraff
 TEST_CASE("MinimizerMapper can map with an initial deletion", "[giraffe][mapping][right_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [
                 {"from": "1", "to": "2"},
@@ -492,12 +490,10 @@ TEST_CASE("MinimizerMapper can map with an initial deletion", "[giraffe][mapping
                 {"id": "3", "sequence": "CATTAG"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("CATTAG");
         
@@ -527,7 +523,7 @@ TEST_CASE("MinimizerMapper can map with an initial deletion", "[giraffe][mapping
 TEST_CASE("MinimizerMapper can map with an initial deletion on a multi-base node", "[giraffe][mapping][right_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [
                 {"from": "1", "to": "2"},
@@ -539,12 +535,10 @@ TEST_CASE("MinimizerMapper can map with an initial deletion on a multi-base node
                 {"id": "3", "sequence": "CATTAG"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("CATTAG");
         
@@ -574,7 +568,7 @@ TEST_CASE("MinimizerMapper can map with an initial deletion on a multi-base node
 TEST_CASE("MinimizerMapper can map right off the past-the-end base", "[giraffe][mapping][right_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [
                 {"from": "1", "to": "2"},
@@ -586,15 +580,13 @@ TEST_CASE("MinimizerMapper can map right off the past-the-end base", "[giraffe][
                 {"id": "3", "sequence": "CATTAG"}
             ]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
-        
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         Alignment aln;
         aln.set_sequence("CATTAG");
-        
+
         pos_t left_anchor {1, false, 1}; // This is the past-end position
         pos_t right_anchor = empty_pos_t();
         
@@ -635,15 +627,13 @@ TEST_CASE("MinimizerMapper can compute longest detectable gap in range", "[giraf
 TEST_CASE("MinimizerMapper can find a significant indel instead of a tempting softclip", "[giraffe][mapping][left_tail]") {
 
         Aligner aligner;
-        
+
         string graph_json = R"({
             "edge": [{"from": "30788083", "to": "30788088"}, {"from": "30788083", "to": "30788084"}, {"from": "30788074", "to": "30788075"}, {"from": "30788074", "to": "30788076"}, {"from": "30788079", "to": "30788080"}, {"from": "30788079", "to": "30788081"}, {"from": "30788086", "to": "30788088"}, {"from": "30788086", "to": "30788087", "to_end": true}, {"from": "30788075", "to": "30788077"}, {"from": "30788073", "to": "30788074"}, {"from": "30788078", "to": "30788079"}, {"from": "30788077", "to": "30788078"}, {"from": "30788084", "to": "30788088"}, {"from": "30788084", "to": "30788085"}, {"from": "30788076", "to": "30788077"}, {"from": "30788087", "from_start": true, "to": "30788088"}, {"from": "30788081", "to": "30788082"}, {"from": "30788080", "to": "30788082"}, {"from": "30788082", "to": "30788088"}, {"from": "30788082", "to": "30788083"}, {"from": "30788085", "to": "30788086"}], "node": [{"id": "30788083", "sequence": "AAA"}, {"id": "30788074", "sequence": "AAAAAAAATACAAAAAATTAGC"}, {"id": "30788079", "sequence": "CGCCACTGCACTCCAGCCTGGGC"}, {"id": "30788086", "sequence": "AAAAAAA"}, {"id": "30788075", "sequence": "T"}, {"id": "30788073", "sequence": "GAAAGAGAGTTGTTTAAATTCCATAGTTAGGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCTACTA"}, {"id": "30788078", "sequence": "G"}, {"id": "30788077", "sequence": "GGGCGTGGTAGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATC"}, {"id": "30788084", "sequence": "A"}, {"id": "30788088", "sequence": "AATTCCATAGTTAGAAAAATAAGACATATCAGGTTTTCAAAAAGTGTAGCCATTTTCTGTTTCTAAAAGGGACACTTAAAGTGAAA"}, {"id": "30788076", "sequence": "C"}, {"id": "30788087", "sequence": "T"}, {"id": "30788081", "sequence": "A"}, {"id": "30788080", "sequence": "G"}, {"id": "30788082", "sequence": "ACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAA"}, {"id": "30788085", "sequence": "AA"}]
         })";
-        
-        // TODO: Write a json_to_handle_graph
-        vg::Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        auto graph = vg::VG(proto_graph);
+
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
         
         Alignment aln;
         aln.set_sequence("TTGAAAACCTGATATGTCTTATTTTTCTAACTATGGAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCTACCACGCCCGGCTAATTTTTTGTATTTTTTTT");
@@ -854,9 +844,8 @@ TEST_CASE("MinimizerMapper can extract a strand-split dagified local graph witho
                      {"id": "60245278", "sequence": "GATTACAGATTACA"}]
         }
     )";
-    vg::Graph graph_chunk;
-    json2pb(graph_chunk, graph_json.c_str(), graph_json.size());
-    vg::VG graph(graph_chunk);
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     TestMinimizerMapper::with_dagified_local_graph(make_pos_t(60245283, false, 10), empty_pos_t(), 50, graph, [&](DeletableHandleGraph& dagified_graph, const handle_t& left_anchor_handle, const handle_t& right_anchor_handle, const std::function<std::pair<nid_t, bool>(const handle_t&)>& dagified_handle_to_base) {
         // The graph started as a stick
diff --git a/src/unittest/multipath_alignment_graph.cpp b/src/unittest/multipath_alignment_graph.cpp
index bea5f687aa..d78e19d6f1 100644
--- a/src/unittest/multipath_alignment_graph.cpp
+++ b/src/unittest/multipath_alignment_graph.cpp
@@ -3,7 +3,8 @@
 /// unit tests for the multipath mapper's MultipathAlignmentGraph
 
 #include <iostream>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 #include "../cactus_snarl_finder.hpp"
 #include "../integrated_snarl_finder.hpp"
@@ -47,13 +48,9 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG vg;
-    vg.extend(proto_graph);
-    
+    bdsg::HashGraph vg;
+    ::vg::io::json2graph(graph_json, &vg);
+
     // Make snarls on it
     CactusSnarlFinder bubble_finder(vg);
     IntegratedSnarlFinder snarl_finder(vg);
diff --git a/src/unittest/multipath_mapper.cpp b/src/unittest/multipath_mapper.cpp
index be6d3b6194..bc1dc4cdd9 100644
--- a/src/unittest/multipath_mapper.cpp
+++ b/src/unittest/multipath_mapper.cpp
@@ -4,7 +4,9 @@
 
 #include <iostream>
 #include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include <vg/vg.pb.h>
+#include <bdsg/hash_graph.hpp>
 #include "../multipath_mapper.hpp"
 #include "../build_index.hpp"
 #include "xg.hpp"
@@ -111,7 +113,7 @@ TEST_CASE( "MultipathMapper::read_coverage works", "[multipath][mapping][multipa
 }
 
 TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][multipathmapper]" ) {
-    
+
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GATTACA"}],
         "path": [
@@ -120,14 +122,10 @@ TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][
             ]}
         ]
     })";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+
+    // Load the JSON into a HashGraph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -135,17 +133,17 @@ TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][
     // Make pointers to fill in
     gcsa::GCSA* gcsaidx = nullptr;
     gcsa::LCPArray* lcpidx = nullptr;
-    
+
     // Build the GCSA index
     build_gcsa_lcp(graph, gcsaidx, lcpidx, 16, 3);
-    
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
-    
+    xg_index.from_path_handle_graph(graph);
+
     // Make a multipath mapper to map against the graph.
     TestMultipathMapper mapper(&xg_index, gcsaidx, lcpidx);
-    
+
     // Make an Alignment that we're pretending we're doing
     Alignment aln;
     aln.set_sequence("GATTACA");
@@ -264,7 +262,7 @@ TEST_CASE( "MultipathMapper::query_cluster_graphs works", "[multipath][mapping][
 }
     
 TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][multipathmapper]" ) {
-    
+
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GATTACA"}],
         "path": [
@@ -273,14 +271,10 @@ TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][
             ]}
         ]
     })";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+
+    // Load the JSON into a HashGraph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -291,11 +285,11 @@ TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][
     
     // Build the GCSA index
     build_gcsa_lcp(graph, gcsaidx, lcpidx, 16, 3);
-    
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
-    
+
     // Make a multipath mapper to map against the graph.
     MultipathMapper mapper(&xg_index, gcsaidx, lcpidx);
     // Lower the max mapping quality so that it thinks it can find unambiguous mappings of
@@ -422,16 +416,12 @@ TEST_CASE( "MultipathMapper can map to a one-node graph", "[multipath][mapping][
 }
 
 TEST_CASE( "MultipathMapper can work on a bigger graph", "[multipath][mapping][multipathmapper]" ) {
-    
+
     string graph_json = R"({"node":[{"sequence":"CTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATG","id":12},{"sequence":"A","id":2},{"sequence":"CAAATAAGGCTTGGAAATTTTCTGGAGTTCTATTATATTCCAACTCTCTGGTTCCTGGTGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTT","id":3},{"sequence":"TTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTT","id":4},{"sequence":"CAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTA","id":5},{"sequence":"CTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTT","id":6},{"sequence":"ACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAAC","id":7},{"sequence":"ACAGTCAACGCTCAATACAAGGGACATTAGGATTGGCAGTAGCTCAGAGATCTCTCTGCTCACCGTGATCTTCAAGTTTGAAAATTGCATCTCAAATCTA","id":8},{"sequence":"AGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAA","id":9},{"sequence":"CAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATC","id":10},{"sequence":"TTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAG","id":11}],"path":[{"name":"x","mapping":[{"position":{"node_id":3},"edit":[{"from_length":100,"to_length":100}],"rank":1},{"position":{"node_id":4},"edit":[{"from_length":100,"to_length":100}],"rank":2},{"position":{"node_id":5},"edit":[{"from_length":100,"to_length":100}],"rank":3},{"position":{"node_id":6},"edit":[{"from_length":100,"to_length":100}],"rank":4},{"position":{"node_id":7},"edit":[{"from_length":100,"to_length":100}],"rank":5},{"position":{"node_id":8},"edit":[{"from_length":100,"to_length":100}],"rank":6},{"position":{"node_id":9},"edit":[{"from_length":100,"to_length":100}],"rank":7},{"position":{"node_id":10},"edit":[{"from_length":100,"to_length":100}],"rank":8},{"position":{"node_id":11},"edit":[{"from_length":100,"to_length":100}],"rank":9},{"position":{"node_id":12},"edit":[{"from_length":100,"to_length":100}],"rank":10},{"position":{"node_id":2},"edit":[{"from_length":1,"to_length":1}],"rank":11}]}],"edge":[{"from":12,"to":2},{"from":3,"to":4},{"from":4,"to":5},{"from":5,"to":6},{"from":6,"to":7},{"from":7,"to":8},{"from":8,"to":9},{"from":9,"to":10},{"from":10,"to":11},{"from":11,"to":12}]})";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
+
+    // Load the JSON into a HashGraph
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     // Make GCSA quiet
     gcsa::Verbosity::set(gcsa::Verbosity::SILENT);
@@ -442,11 +432,11 @@ TEST_CASE( "MultipathMapper can work on a bigger graph", "[multipath][mapping][m
     
     // Build the GCSA index
     build_gcsa_lcp(graph, gcsaidx, lcpidx, 16, 3);
-    
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
-    
+    xg_index.from_path_handle_graph(graph);
+
     // Make a multipath mapper to map against the graph.
     TestMultipathMapper mapper(&xg_index, gcsaidx, lcpidx);
     // Lower the max mapping quality so that it thinks it can find unambiguous mappings of
diff --git a/src/unittest/packed_structs.cpp b/src/unittest/packed_structs.cpp
index 9c0075751e..512e638620 100644
--- a/src/unittest/packed_structs.cpp
+++ b/src/unittest/packed_structs.cpp
@@ -69,7 +69,7 @@ using namespace std;
                     case APPEND:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_vec.push_back(next_val);
-                            dyn_vec.append(next_val);
+                            dyn_vec.push_back(next_val);
                             next_val++;
                         }
                         
@@ -79,7 +79,7 @@ using namespace std;
                         if (!std_vec.empty()) {
                             for (size_t k = 0; k < pops_per_op; k++) {
                                 std_vec.pop_back();
-                                dyn_vec.pop();
+                                dyn_vec.pop_back();
                             }
                         }
                         
@@ -161,7 +161,7 @@ using namespace std;
                     case APPEND:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_vec.push_back(next_val);
-                            dyn_vec.append(next_val);
+                            dyn_vec.push_back(next_val);
                             next_val = val_distr(prng);
                         }
                         
@@ -171,7 +171,7 @@ using namespace std;
                         if (!std_vec.empty()) {
                             for (size_t k = 0; k < pops_per_op; k++) {
                                 std_vec.pop_back();
-                                dyn_vec.pop();
+                                dyn_vec.pop_back();
                             }
                         }
                         
@@ -252,7 +252,7 @@ using namespace std;
                     case APPEND_LEFT:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_deq.push_front(next_val);
-                            suc_deq.append_front(next_val);
+                            suc_deq.push_front(next_val);
                             next_val++;
                         }
                         
@@ -269,7 +269,7 @@ using namespace std;
                     case APPEND_RIGHT:
                         for (size_t k = 0; k < appends_per_op; k++) {
                             std_deq.push_back(next_val);
-                            suc_deq.append_back(next_val);
+                            suc_deq.push_back(next_val);
                             next_val++;
                         }
                         
diff --git a/src/unittest/path_component_index.cpp b/src/unittest/path_component_index.cpp
index 058f4bf9c1..edd3a6013a 100644
--- a/src/unittest/path_component_index.cpp
+++ b/src/unittest/path_component_index.cpp
@@ -8,7 +8,8 @@
 #include "path_component_index.hpp"
 #include "xg.hpp"
 #include "vg.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 
 namespace vg {
@@ -17,14 +18,14 @@ namespace unittest {
     TEST_CASE("Path component memoization produces expected results", "[pathcomponent]") {
         
         string graph_json = R"({"node": [{"sequence": "AAACCC", "id": 1}, {"sequence": "CACACA", "id": 2}, {"sequence": "CACACA", "id": 3}, {"sequence": "TTTTGG", "id": 4}, {"sequence": "ACGTAC", "id": 5}], "path": [{"name": "one", "mapping": [{"position": {"node_id": 1}, "rank": 1}, {"position": {"node_id": 2}, "rank": 2}]}, {"name": "three", "mapping": [{"position": {"node_id": 2}, "rank": 1}, {"position": {"node_id": 3}, "rank": 2}]}, {"name": "two", "mapping": [{"position": {"node_id": 4}, "rank": 1}, {"position": {"node_id": 5}, "rank": 2}]}], "edge": [{"from": 1, "to": 2}, {"from": 2, "to": 3}, {"from": 4, "to": 5}]})";
-        
+
         // Load the JSON
-        Graph proto_graph;
-        json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-        
+        bdsg::HashGraph graph;
+        vg::io::json2graph(graph_json, &graph);
+
         // Build the xg index
         xg::XG xg_index;
-        xg_index.from_path_handle_graph(VG(proto_graph));
+        xg_index.from_path_handle_graph(graph);
         
         
         unordered_set<path_handle_t> comp_1;
diff --git a/src/unittest/path_index.cpp b/src/unittest/path_index.cpp
index b70152ae2d..e1facc2977 100644
--- a/src/unittest/path_index.cpp
+++ b/src/unittest/path_index.cpp
@@ -5,9 +5,9 @@
 
 #include <iostream>
 #include <string>
-#include "vg/io/json2pb.h"
-#include <vg/vg.pb.h>
+#include "../io/json2graph.hpp"
 #include "../path_index.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "catch.hpp"
 
 namespace vg {
@@ -58,15 +58,11 @@ const string path_index_graph_1 = R"(
 
 
 TEST_CASE("PathIndex can be created", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -78,13 +74,9 @@ TEST_CASE("PathIndex can be created", "[pathindex]") {
 TEST_CASE("PathIndex translation can change a node ID", "[pathindex]") {
 
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -115,15 +107,11 @@ TEST_CASE("PathIndex translation can change a node ID", "[pathindex]") {
 }
 
 TEST_CASE("PathIndex translation can divide a node", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -174,15 +162,11 @@ TEST_CASE("PathIndex translation can divide a node", "[pathindex]") {
 }
 
 TEST_CASE("PathIndex translation can create reverse strand mappings", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -235,15 +219,11 @@ TEST_CASE("PathIndex translation can create reverse strand mappings", "[pathinde
 }
 
 TEST_CASE("PathIndex translation can handle translations articulated for the reverse strand", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
@@ -300,15 +280,11 @@ TEST_CASE("PathIndex translation can handle translations articulated for the rev
 }
 
 TEST_CASE("PathIndex translation can divide the last node", "[pathindex]") {
-    
+
     // Load the graph
-    Graph graph;
-    json2pb(graph, path_index_graph_1.c_str(), path_index_graph_1.size());
-    
-    // Make it into a VG
-    VG to_index;
-    to_index.extend(graph);
-    
+    bdsg::HashGraph to_index;
+    vg::io::json2graph(path_index_graph_1, &to_index);
+
     // Make a PathIndex
     PathIndex index(to_index, "cool", true);
     
diff --git a/src/unittest/phase_unfolder.cpp b/src/unittest/phase_unfolder.cpp
index 0c79972941..36cfbca9de 100644
--- a/src/unittest/phase_unfolder.cpp
+++ b/src/unittest/phase_unfolder.cpp
@@ -12,7 +12,8 @@
 #include <gbwt/dynamic_gbwt.h>
 
 #include "../phase_unfolder.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "xg.hpp"
 
 #include "catch.hpp"
@@ -210,10 +211,10 @@ const std::string unfolder_graph_path = R"(
 TEST_CASE("PhaseUnfolder can unfold XG paths", "[phaseunfolder][indexing]") {
 
     // Build an XG index with a path.
-    Graph graph_with_path;
-    json2pb(graph_with_path, unfolder_graph_path.c_str(), unfolder_graph_path.size());
+    bdsg::HashGraph graph_with_path;
+    vg::io::json2graph(unfolder_graph_path, &graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_with_path));
+    xg_index.from_path_handle_graph(graph_with_path);
 
     // Build an empty GBWT index.
     gbwt::GBWT gbwt_index;
@@ -224,9 +225,7 @@ TEST_CASE("PhaseUnfolder can unfold XG paths", "[phaseunfolder][indexing]") {
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -255,10 +254,10 @@ TEST_CASE("PhaseUnfolder can unfold XG paths", "[phaseunfolder][indexing]") {
 TEST_CASE("PhaseUnfolder can restore XG paths", "[phaseunfolder][indexing]") {
 
     // Build an XG index with a path.
-    Graph graph_with_path;
-    json2pb(graph_with_path, unfolder_graph_path.c_str(), unfolder_graph_path.size());
+    bdsg::HashGraph graph_with_path;
+    vg::io::json2graph(unfolder_graph_path, &graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_with_path));
+    xg_index.from_path_handle_graph(graph_with_path);
 
     // Build an empty GBWT index.
     gbwt::GBWT gbwt_index;
@@ -269,9 +268,7 @@ TEST_CASE("PhaseUnfolder can restore XG paths", "[phaseunfolder][indexing]") {
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -299,10 +296,10 @@ TEST_CASE("PhaseUnfolder can restore XG paths", "[phaseunfolder][indexing]") {
 TEST_CASE("PhaseUnfolder can unfold GBWT threads", "[phaseunfolder][indexing]") {
 
     // Build an XG index without a path.
-    Graph graph_without_path;
-    json2pb(graph_without_path, unfolder_graph.c_str(), unfolder_graph.size());
+    bdsg::HashGraph graph_without_path;
+    vg::io::json2graph(unfolder_graph, &graph_without_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_without_path));
+    xg_index.from_path_handle_graph(graph_without_path);
 
     // Build a GBWT with three threads including a duplicate. We want to have
     // only one instance of short_path unfolded, but we want separate copies
@@ -335,9 +332,7 @@ TEST_CASE("PhaseUnfolder can unfold GBWT threads", "[phaseunfolder][indexing]")
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -366,10 +361,10 @@ TEST_CASE("PhaseUnfolder can unfold GBWT threads", "[phaseunfolder][indexing]")
 TEST_CASE("PhaseUnfolder can unfold both XG paths and GBWT threads", "[phaseunfolder][indexing]") {
 
     // Build an XG index with a path.
-    Graph graph_with_path;
-    json2pb(graph_with_path, unfolder_graph_path.c_str(), unfolder_graph_path.size());
+    bdsg::HashGraph graph_with_path;
+    vg::io::json2graph(unfolder_graph_path, &graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(graph_with_path));
+    xg_index.from_path_handle_graph(graph_with_path);
 
     // Build a GBWT with three threads including a duplicate. We want to have
     // only one instance of short_path unfolded, but we want separate copies
@@ -402,9 +397,7 @@ TEST_CASE("PhaseUnfolder can unfold both XG paths and GBWT threads", "[phaseunfo
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph.c_str(), unfolder_graph.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph, &vg_graph);
 
     // Remove branching regions from the VG graph, including the last node,
     // but keep the edge (1, 6) in the graph.
@@ -501,10 +494,10 @@ const std::string unfolder_graph_simple_path = R"(
 TEST_CASE("PhaseUnfolder can merge shared prefixes and suffixes", "[phaseunfolder][indexing]") {
 
     // Build an XG index.
-    Graph simple_graph;
-    json2pb(simple_graph, unfolder_graph_simple.c_str(), unfolder_graph_simple.size());
+    bdsg::HashGraph simple_graph;
+    vg::io::json2graph(unfolder_graph_simple, &simple_graph);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(simple_graph));
+    xg_index.from_path_handle_graph(simple_graph);
 
     // Build a GBWT with both possible threads.
     gbwt::vector_type upper_path {
@@ -536,9 +529,7 @@ TEST_CASE("PhaseUnfolder can merge shared prefixes and suffixes", "[phaseunfolde
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph_simple.c_str(), unfolder_graph_simple.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph_simple, &vg_graph);
 
     // Remove the bubble, including its endpoints.
     std::set<vg::id_t> to_remove { 3, 4, 5, 6 };
@@ -566,10 +557,10 @@ TEST_CASE("PhaseUnfolder can merge shared prefixes and suffixes", "[phaseunfolde
 TEST_CASE("PhaseUnfolder can extend short threads", "[phaseunfolder][indexing]") {
 
     // Build an XG index.
-    Graph simple_graph_with_path;
-    json2pb(simple_graph_with_path, unfolder_graph_simple_path.c_str(), unfolder_graph_simple_path.size());
+    bdsg::HashGraph simple_graph_with_path;
+    vg::io::json2graph(unfolder_graph_simple_path, &simple_graph_with_path);
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(simple_graph_with_path));
+    xg_index.from_path_handle_graph(simple_graph_with_path);
 
     // Build a GBWT for the fragment that is different from the reference.
     gbwt::vector_type short_fragment {
@@ -586,9 +577,7 @@ TEST_CASE("PhaseUnfolder can extend short threads", "[phaseunfolder][indexing]")
 
     // Build a VG graph.
     VG vg_graph;
-    Graph temp_graph;
-    json2pb(temp_graph, unfolder_graph_simple.c_str(), unfolder_graph_simple.size());
-    vg_graph.merge(temp_graph);
+    vg::io::json2graph(unfolder_graph_simple, &vg_graph);
 
     // Remove the bubble, including its endpoints.
     std::set<vg::id_t> to_remove { 3, 4, 5, 6 };
diff --git a/src/unittest/randomly_flipped_nodes.cpp b/src/unittest/randomly_flipped_nodes.cpp
new file mode 100644
index 0000000000..455bdd18ae
--- /dev/null
+++ b/src/unittest/randomly_flipped_nodes.cpp
@@ -0,0 +1,179 @@
+#include "catch.hpp"
+#include "../handle.hpp"
+#include "../utility.hpp"
+#include <bdsg/hash_graph.hpp>
+
+#include "support/randomly_flipped_nodes.hpp"
+#include "support/randomness.hpp"
+#include "support/random_graph.hpp"
+
+#include <set>
+#include <random>
+
+namespace vg {
+namespace unittest {
+
+/// Get the canonicalized set of edge sequence pairs from a graph.
+/// Each edge is represented as a pair of sequences (left_seq, right_seq) read
+/// in the orientation of the edge. To canonicalize, we compare each pair
+/// against its reverse complement (RC(right_seq), RC(left_seq)) and keep the
+/// lexicographically smaller one.
+///
+/// This doesn't fully constrain the graph, but if this doesn't match what it's
+/// supposed to, it can tell us that the graph smells off and is wrong.
+static set<pair<string, string>> canonical_edge_pairs(const HandleGraph& graph) {
+    set<pair<string, string>> result;
+    graph.for_each_edge([&](const edge_t& edge) {
+        string left_seq = graph.get_sequence(edge.first);
+        string right_seq = graph.get_sequence(edge.second);
+
+        // The reverse complement pair: RC(right) on the left, RC(left) on the right
+        string rc_right = reverse_complement(right_seq);
+        string rc_left = reverse_complement(left_seq);
+
+        pair<string, string> forward_pair = {left_seq, right_seq};
+        pair<string, string> rc_pair = {rc_right, rc_left};
+
+        // Use the lexicographically smaller one as canonical
+        if (rc_pair < forward_pair) {
+            result.insert(rc_pair);
+        } else {
+            result.insert(forward_pair);
+        }
+        return true;
+    });
+    return result;
+}
+
+/// Make sure that observed and expected graphs are not obviously not
+/// isomorphic.
+static void validate_graph(const HandleGraph& observed, const HandleGraph& expected, const set<pair<string, string>>& expected_edges) {
+    REQUIRE(observed.get_node_count() == expected.get_node_count());
+    REQUIRE(observed.get_edge_count() == expected.get_edge_count());
+
+    auto observed_edges = canonical_edge_pairs(observed);
+    REQUIRE(observed_edges == expected_edges);
+}
+
+TEST_CASE("randomly_flipped_nodes preserves graph structure on a simple linear graph", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    std::string stick_sequence = "GGACTGACTCGCATGTCGAGCGACTCGCGCGAGCTATCGTAGTACGCGAGTCATATTATATTATCACG";
+    size_t node_length = 3;
+    handle_t prev_handle;
+    for (size_t i = 0; i < stick_sequence.size(); i += node_length) {
+        handle_t h = graph.create_handle(stick_sequence.substr(i, node_length));
+        if (i > 0) {
+            graph.create_edge(prev_handle, h);
+        }
+        prev_handle = h;
+    }
+
+    auto original_edges = canonical_edge_pairs(graph);
+
+    SECTION("flipping no nodes preserves edges exactly") {
+        default_random_engine gen(test_seed_source());
+        auto flipped = randomly_flipped_nodes(graph, 0.0, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+
+    SECTION("flipping all nodes preserves canonical edge pairs") {
+        default_random_engine gen(test_seed_source());
+        auto flipped = randomly_flipped_nodes(graph, 1.0, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+
+    SECTION("flipping 50% of nodes preserves canonical edge pairs") {
+        default_random_engine gen(test_seed_source());
+        auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves structure on graph with reversing edges", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    handle_t h1 = graph.create_handle("GATT", 1);
+    handle_t h2 = graph.create_handle("ACA", 2);
+    handle_t h3 = graph.create_handle("CGAT", 3);
+    handle_t h4 = graph.create_handle("TCGAA", 4);
+
+    // Forward edges
+    graph.create_edge(h1, h2);
+    graph.create_edge(h2, h3);
+    graph.create_edge(h3, h4);
+    // Reversing edge: 4 fwd -> 3 rev
+    graph.create_edge(h4, graph.flip(h3));
+
+    auto original_edges = canonical_edge_pairs(graph);
+
+    default_random_engine gen(test_seed_source());
+    for (int i = 0; i < 10; i++) {
+        auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves structure on graph with self-loops", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    handle_t h1 = graph.create_handle("ACGT", 1);
+    handle_t h2 = graph.create_handle("TTCC", 2);
+
+    graph.create_edge(h1, h2);
+    // Self-loop on h1: fwd -> fwd
+    graph.create_edge(h1, h1);
+    // Inverting self-loop on h2: fwd -> rev
+    graph.create_edge(h2, graph.flip(h2));
+
+    auto original_edges = canonical_edge_pairs(graph);
+
+    default_random_engine gen(test_seed_source());
+    for (int i = 0; i < 10; i++) {
+        auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+        validate_graph(flipped, graph, original_edges);
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves structure on random graphs", "[randomly_flipped_nodes]") {
+    for (int trial = 0; trial < 5; trial++) {
+        bdsg::HashGraph graph;
+        random_graph(100, 10, 10, &graph);
+
+        auto original_edges = canonical_edge_pairs(graph);
+
+        default_random_engine gen(test_seed_source());
+        for (int i = 0; i < 5; i++) {
+            auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+            validate_graph(flipped, graph, original_edges);
+        }
+    }
+}
+
+TEST_CASE("randomly_flipped_nodes preserves node IDs", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("AAA", 5);
+    graph.create_handle("CCC", 10);
+    graph.create_handle("GGG", 15);
+    graph.create_edge(graph.get_handle(5), graph.get_handle(10));
+    graph.create_edge(graph.get_handle(10), graph.get_handle(15));
+
+    default_random_engine gen(test_seed_source());
+    auto flipped = randomly_flipped_nodes(graph, 0.5, gen);
+
+    REQUIRE(flipped.has_node(5));
+    REQUIRE(flipped.has_node(10));
+    REQUIRE(flipped.has_node(15));
+}
+
+TEST_CASE("randomly_flipped_nodes actually flips node sequences", "[randomly_flipped_nodes]") {
+    bdsg::HashGraph graph;
+    handle_t h1 = graph.create_handle("AAAC", 1);  // RC = GTTT
+
+    default_random_engine gen(test_seed_source());
+    // Guarantee a flip
+    auto flipped = randomly_flipped_nodes(graph, 1.0, gen);
+
+    // The forward sequence should be the RC of the original
+    REQUIRE(flipped.get_sequence(flipped.get_handle(1)) == "GTTT");
+}
+
+} // namespace unittest
+} // namespace vg
diff --git a/src/unittest/readfilter.cpp b/src/unittest/readfilter.cpp
index cc1562f3f3..6d84fa0a38 100644
--- a/src/unittest/readfilter.cpp
+++ b/src/unittest/readfilter.cpp
@@ -5,6 +5,9 @@
 #include "catch.hpp"
 #include "readfilter.hpp"
 #include "xg.hpp"
+#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 namespace vg {
 namespace unittest {
@@ -44,13 +47,13 @@ TEST_CASE("reads with ambiguous ends can be trimmed", "[filter]") {
     
     )";
     
-    // Load it into Protobuf
-    Graph chunk;
-    json2pb(chunk, graph_json.c_str(), graph_json.size());
-    
+    // Load the graph
+    bdsg::HashGraph chunk;
+    vg::io::json2graph(graph_json, &chunk);
+
     // Pass it over to XG
     xg::XG index;
-    index.from_path_handle_graph(VG(chunk));
+    index.from_path_handle_graph(chunk);
     
     // Make a ReadFilter;
     ReadFilter<Alignment> filter;
diff --git a/src/unittest/sampler.cpp b/src/unittest/sampler.cpp
index d8bb95b650..cda0147f57 100644
--- a/src/unittest/sampler.cpp
+++ b/src/unittest/sampler.cpp
@@ -6,11 +6,10 @@
 #include <unordered_set>
 #include <utility>
 
-#include "vg/io/json2pb.h"
-#include <vg/vg.pb.h>
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "../sampler.hpp"
 #include "../xg.hpp"
-#include "../vg.hpp"
 #include "catch.hpp"
 
 namespace vg {
@@ -28,13 +27,9 @@ TEST_CASE( "Sampler can sample from a 1-node graph", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
@@ -118,13 +113,9 @@ TEST_CASE( "position_at works", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
@@ -195,13 +186,9 @@ TEST_CASE( "Sampler can sample from a loop-containing path", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
@@ -259,13 +246,9 @@ TEST_CASE( "Sampler can across reversing edges", "[sampler]" ) {
     })";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
-    VG graph;
-    graph.extend(proto_graph);
-    
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
+
     // Build the xg index
     xg::XG xg_index;
     xg_index.from_path_handle_graph(graph);
diff --git a/src/unittest/snarl_decomposition_fuzzer.cpp b/src/unittest/snarl_decomposition_fuzzer.cpp
new file mode 100644
index 0000000000..38742be20c
--- /dev/null
+++ b/src/unittest/snarl_decomposition_fuzzer.cpp
@@ -0,0 +1,339 @@
+#include "catch.hpp"
+#include "../handle.hpp"
+#include <bdsg/hash_graph.hpp>
+
+#include "support/snarl_decomposition_fuzzer.hpp"
+
+#include <vector>
+#include <unordered_set>
+
+namespace vg {
+namespace unittest {
+
+using ET = DecompositionEventType;
+using Event = DecompositionEvent;
+
+TEST_CASE("ReplaySnarlFinder replays events faithfully", "[snarl_decomposition_fuzzer]") {
+    // Build a small graph to get real handles
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+    graph.create_handle("AA", 5);
+
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, true},
+            {ET::END_CHAIN, 3, true},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+          {ET::END_SNARL, 5, false},
+        {ET::END_CHAIN, 5, false},
+    };
+
+    ReplaySnarlFinder finder(&graph, events);
+    std::vector<Event> captured = capture_events(finder, graph);
+
+    REQUIRE(captured == events);
+}
+
+TEST_CASE("SnarlDecompositionFuzzer passes through when nothing is flipped", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+    graph.create_handle("AA", 5);
+
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, true},
+            {ET::END_CHAIN, 3, true},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+          {ET::END_SNARL, 5, false},
+        {ET::END_CHAIN, 5, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    // No chains to flip
+    SnarlDecompositionFuzzer fuzzer(&graph, &replay, {});
+
+    std::vector<Event> captured = capture_events(fuzzer, graph);
+
+    REQUIRE(captured == events);
+}
+
+TEST_CASE("SnarlDecompositionFuzzer flips an outer chain", "[snarl_decomposition_fuzzer]") {
+    // Graph:
+    // Chain: 1fwd -> snarl(1fwd, 4fwd) -> snarl(4fwd, 5fwd) -> 5fwd
+    // Inside first snarl: chain 2rev->3rev
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+    graph.create_handle("AA", 5);
+    
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, true},
+            {ET::END_CHAIN, 3, true},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+          {ET::END_SNARL, 5, false},
+        {ET::END_CHAIN, 5, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flip outer chain only") {
+        // Flip the outer chain (1fwd -> 5fwd)
+        std::unordered_set<nid_t> flips {1, 5};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Expected after flipping the outer chain:
+        // Flipping a chain reverses everything inside it, including children.
+        // The nested chain 2rev->3rev gets reversed to 3fwd->2fwd as
+        // part of the parent flip.
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 5, true},
+              {ET::BEGIN_SNARL, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 3, false},
+                {ET::END_CHAIN, 2, false},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip outer and nested chain") {
+        // Flip outer chain (1fwd->5fwd) AND nested chain (2rev->3rev)
+        std::unordered_set<nid_t> flips {1, 5, 2, 3};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Expected: outer chain flipped (reversing everything, including
+        // the nested chain to 3fwd->2fwd), AND THEN the nested chain is
+        // flipped again back to its original orientation 2rev->3rev.
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 5, true},
+              {ET::BEGIN_SNARL, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 2, true},
+                {ET::END_CHAIN, 3, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip nested chain only") {
+        // Flip only the nested chain (2rev->3rev), outer stays
+        std::unordered_set<nid_t> flips {2, 3};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Outer chain not flipped, nested chain flipped
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 1, false},
+              {ET::BEGIN_SNARL, 1, false},
+                {ET::BEGIN_CHAIN, 3, false},
+                {ET::END_CHAIN, 2, false},
+              {ET::END_SNARL, 4, false},
+              {ET::BEGIN_SNARL, 4, false},
+              {ET::END_SNARL, 5, false},
+            {ET::END_CHAIN, 5, false},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+TEST_CASE("SnarlDecompositionFuzzer handles empty chain", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("ACGT", 1);
+
+    // An empty chain: begin and end with same handle, no snarls inside
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+        {ET::END_CHAIN, 1, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flipping an empty chain") {
+        std::unordered_set<nid_t> flips {1};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+TEST_CASE("SnarlDecompositionFuzzer handles multiple top-level chains", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    graph.create_handle("A", 1);
+    graph.create_handle("C", 2);
+    graph.create_handle("G", 3);
+    graph.create_handle("T", 4);
+
+    // Two top-level chains in the root snarl
+    std::vector<Event> events = {
+        // Chain 1: 1fwd -> snarl -> 2fwd
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+          {ET::END_SNARL, 2, false},
+        {ET::END_CHAIN, 2, false},
+        // Chain 2: 3fwd -> snarl -> 4fwd
+        {ET::BEGIN_CHAIN, 3, false},
+          {ET::BEGIN_SNARL, 3, false},
+          {ET::END_SNARL, 4, false},
+        {ET::END_CHAIN, 4, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flip only first chain") {
+        std::unordered_set<nid_t> flips {1, 2};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 2, true},
+              {ET::BEGIN_SNARL, 2, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+            {ET::BEGIN_CHAIN, 3, false},
+              {ET::BEGIN_SNARL, 3, false},
+              {ET::END_SNARL, 4, false},
+            {ET::END_CHAIN, 4, false},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip both chains") {
+        std::unordered_set<nid_t> flips {1, 2, 3, 4};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 2, true},
+              {ET::BEGIN_SNARL, 2, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+            {ET::BEGIN_CHAIN, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+              {ET::END_SNARL, 3, true},
+            {ET::END_CHAIN, 3, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+TEST_CASE("SnarlDecompositionFuzzer handles deeply nested chains", "[snarl_decomposition_fuzzer]") {
+    bdsg::HashGraph graph;
+    for (nid_t i = 1; i <= 8; i++) {
+        graph.create_handle("A", i);
+    }
+
+    // Outer chain: 1->6
+    //   Snarl(1,4)
+    //     Inner chain: 2->3
+    //       Snarl(2,3) [leaf snarl, no children]
+    //   Snarl(4,6)
+    //     Inner chain: 5->5 [empty/trivial]
+    std::vector<Event> events = {
+        {ET::BEGIN_CHAIN, 1, false},
+          {ET::BEGIN_SNARL, 1, false},
+            {ET::BEGIN_CHAIN, 2, false},
+              {ET::BEGIN_SNARL, 2, false},
+              {ET::END_SNARL, 3, false},
+            {ET::END_CHAIN, 3, false},
+          {ET::END_SNARL, 4, false},
+          {ET::BEGIN_SNARL, 4, false},
+            {ET::BEGIN_CHAIN, 5, false},
+            {ET::END_CHAIN, 5, false},
+          {ET::END_SNARL, 6, false},
+        {ET::END_CHAIN, 6, false},
+    };
+
+    ReplaySnarlFinder replay(&graph, events);
+
+    SECTION("flip outer chain only") {
+        std::unordered_set<nid_t> flips {1, 6};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Inner chain and its snarls should flip too. 
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 6, true},
+              {ET::BEGIN_SNARL, 6, true},
+                {ET::BEGIN_CHAIN, 5, true},
+                {ET::END_CHAIN, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 3, true},
+                  {ET::BEGIN_SNARL, 3, true},
+                  {ET::END_SNARL, 2, true},
+                {ET::END_CHAIN, 2, true},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+
+    SECTION("flip outer and inner chain") {
+        std::unordered_set<nid_t> flips {1, 6, 2, 3};
+        SnarlDecompositionFuzzer fuzzer(&graph, &replay, flips);
+
+        std::vector<Event> captured = capture_events(fuzzer, graph);
+
+        // Outer chain should flip but inner chain should flip back
+        std::vector<Event> expected = {
+            {ET::BEGIN_CHAIN, 6, true},
+              {ET::BEGIN_SNARL, 6, true},
+                {ET::BEGIN_CHAIN, 5, true},
+                {ET::END_CHAIN, 5, true},
+              {ET::END_SNARL, 4, true},
+              {ET::BEGIN_SNARL, 4, true},
+                {ET::BEGIN_CHAIN, 2, false},
+                  {ET::BEGIN_SNARL, 2, false},
+                  {ET::END_SNARL, 3, false},
+                {ET::END_CHAIN, 3, false},
+              {ET::END_SNARL, 1, true},
+            {ET::END_CHAIN, 1, true},
+        };
+
+        REQUIRE(captured == expected);
+    }
+}
+
+} // namespace unittest
+} // namespace vg
diff --git a/src/unittest/snarl_distance_index.cpp b/src/unittest/snarl_distance_index.cpp
index 36a1b9b74e..99f0903ac6 100644
--- a/src/unittest/snarl_distance_index.cpp
+++ b/src/unittest/snarl_distance_index.cpp
@@ -9,23 +9,34 @@
 #include <iostream>
 #include <sstream>
 #include <set>
-#include "vg/io/json2pb.h"
-#include <vg/vg.pb.h>
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include "catch.hpp"
 #include "support/random_graph.hpp"
 #include "support/randomness.hpp"
+#include "support/randomly_flipped_nodes.hpp"
+#include "support/snarl_decomposition_fuzzer.hpp"
 #include "../snarl_distance_index.hpp"
 #include "../integrated_snarl_finder.hpp"
 #include "../genotypekit.hpp"
 #include "../traversal_finder.hpp"
+#include "../io/save_handle_graph.hpp"
 #include <vg/io/protobuf_emitter.hpp>
 #include <vg/io/vpkg.hpp>
 #include "xg.hpp"
+#include <handlegraph/algorithms/weakly_connected_components.hpp>
+#include <handlegraph/algorithms/find_shortest_paths.hpp>
 
 //#define debug
 
 namespace vg {
     namespace unittest {
+
+    // TODO: Having *any* operator<< overloads in vg::unittest seems to hide
+    // the ones that are just in vg, somehow.
+    using vg::operator<<;
+
+
     static pair<unordered_set<Node*>, unordered_set<Edge*> > pb_contents(
         VG& graph, const pair<unordered_set<id_t>, unordered_set<edge_t> >& contents) {
         pair<unordered_set<Node*>, unordered_set<Edge*> > ret;
@@ -192,7 +203,82 @@ namespace vg {
                 REQUIRE(distance_index.minimum_distance(2, true, 0, 2, true, 1) == 1);
             }
         }
-        TEST_CASE( "Nested chain with loop", "[snarl_distance]" ) {
+        TEST_CASE( "Can distance index nested chain without loop", "[snarl_distance]" ) {
+            bdsg::HashGraph graph;
+            handle_t h1 = graph.create_handle("G");
+            handle_t h2 = graph.create_handle("A");
+            handle_t h3 = graph.create_handle("T");
+            handle_t h4 = graph.create_handle("T");
+            handle_t h5 = graph.create_handle("A");
+            handle_t h6 = graph.create_handle("C");
+            handle_t h7 = graph.create_handle("A");
+            
+            // Wire it up as a stick
+            graph.create_edge(h1, h2);
+            graph.create_edge(h2, h3);
+            graph.create_edge(h3, h4);
+            graph.create_edge(h4, h5);
+            graph.create_edge(h5, h6);
+            graph.create_edge(h6, h7);
+
+            // Allow skipping a run of nodes to make a snarl with a child chain
+            graph.create_edge(h2, h5);
+
+            IntegratedSnarlFinder snarl_finder(graph);
+
+            SECTION("Snarl classifications are correct") {
+                SECTION("Distance index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder);
+                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+                SECTION("Distanceless index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder, 0);
+                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+            }
+        }
+        TEST_CASE( "Can distance index nested chain with a loop hiding in the middle", "[snarl_distance]" ) {
+            bdsg::HashGraph graph;
+            handle_t h1 = graph.create_handle("G");
+            handle_t h2 = graph.create_handle("A");
+            handle_t h3 = graph.create_handle("T");
+            handle_t h4 = graph.create_handle("T");
+            handle_t h5 = graph.create_handle("A");
+            handle_t h6 = graph.create_handle("C");
+            handle_t h7 = graph.create_handle("A");
+            
+            // Wire it up as a stick
+            graph.create_edge(h1, h2);
+            graph.create_edge(h2, h3);
+            graph.create_edge(h3, h4);
+            graph.create_edge(h4, h5);
+            graph.create_edge(h5, h6);
+            graph.create_edge(h6, h7);
+
+            // Allow skipping a run of nodes to make a snarl with a child chain that has a few nodes in it
+            graph.create_edge(h1, h6);
+
+            // Allow turning around with an edge hiding somewhere in the middle of the chain
+            graph.create_edge(h3, graph.flip(h3));
+
+            IntegratedSnarlFinder snarl_finder(graph);
+
+            SECTION("Snarl classifications are correct") {
+                SECTION("Distance index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder);
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+                SECTION("Distanceless index") {
+                    SnarlDistanceIndex distance_index;
+                    fill_in_distance_index(&distance_index, &graph, &snarl_finder, 0);
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(graph.get_id(h3))))));
+                }
+            }
+        }
+        TEST_CASE( "Can distance index nested chain with a loop", "[snarl_distance]" ) {
         
             VG graph;
                 
@@ -230,7 +316,8 @@ namespace vg {
             Edge* e17 = graph.create_edge(n11, n12);
             Edge* e18 = graph.create_edge(n12, n13);
             
-            graph.serialize_to_file("test_graph.vg");
+            vg::io::save_handle_graph(&graph, "test_graph.vg");
+            
             //get the snarls
             IntegratedSnarlFinder snarl_finder(graph); 
             SECTION("Traversal of chain") {
@@ -248,16 +335,13 @@ namespace vg {
                     fill_in_distance_index(&distance_index, &graph, &snarl_finder);
                     REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))));
                     REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n8->id())))));
-                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), true));
-                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), false));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id())))));
                 } SECTION("Distanceless index") {
                     SnarlDistanceIndex distance_index;
                     fill_in_distance_index(&distance_index, &graph, &snarl_finder, 0);
-                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))), true, &graph));
-                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n8->id()))), true, &graph));
-                    REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), true, &graph));
-                    // TODO: This isn't true because it would be too much work to recursively check all children using only the graph
-                    //REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id()))), false, &graph));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n8->id())))));
+                    REQUIRE(!distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n6->id())))));
                 }
             }
             SECTION("Minimum distances are correct") {
@@ -3577,12 +3661,9 @@ namespace vg {
             //    }
             //    )";
             //    
-            //    VG graph;
-            //    
             //    // Load up the graph
-            //    Graph g;
-            //    json2pb(g, graph_json.c_str(), graph_json.size());
-            //    graph.extend(g);
+            //    VG graph;
+            //    vg::io::json2graph(graph_json, &graph);
             //    
             //    // Define the one snarl
             //    Snarl snarl1;
@@ -3709,12 +3790,9 @@ namespace vg {
             //    string snarl2_json = R"({"type": 1, "end": {"node_id": 187209, "backward": true}, "start": {"node_id": 178895, "backward": true}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
             //    string snarl3_json = R"({"type": 1, "end": {"node_id": 178896}, "start": {"node_id": 178895}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
             //    
-            //    VG graph;
-            //    
             //    // Load up the graph
-            //    Graph g;
-            //    json2pb(g, graph_json.c_str(), graph_json.size());
-            //    graph.extend(g);
+            //    VG graph;
+            //    vg::io::json2graph(graph_json, &graph);
             //    
             //    // Load the snarls
             //    Snarl snarl1, snarl2, snarl3;
@@ -3885,9 +3963,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             // We need to see the path.
             REQUIRE(graph.paths.size() == 1);
@@ -4145,9 +4221,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4258,9 +4332,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4407,9 +4479,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
          
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4536,9 +4606,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4645,9 +4713,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4749,9 +4815,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -4919,9 +4983,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -5042,9 +5104,7 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             
             IntegratedSnarlFinder snarl_finder(graph); 
             SnarlDistanceIndex distance_index;
@@ -6624,6 +6684,25 @@ namespace vg {
             }
         }
 
+        TEST_CASE( "Tiny oversized snarl", "[snarl_distance]" ) {
+          VG graph;
+          handle_t n1 = graph.create_handle("GCA");
+          handle_t n2 = graph.create_handle("T");
+          handle_t n3 = graph.create_handle("G");
+          handle_t n4 = graph.create_handle("CTGA");  
+
+          graph.create_edge(n1, n2);
+          graph.create_edge(n1, n3);
+          graph.create_edge(n2, n3); 
+          graph.create_edge(n2, n4);  
+          graph.create_edge(n3, n4); 
+          IntegratedSnarlFinder snarl_finder(graph);
+          SnarlDistanceIndex distance_index;
+          fill_in_distance_index(&distance_index, &graph, &snarl_finder, 1); 
+
+          REQUIRE(distance_index.minimum_distance(2, false, 0, 3, false, 0, false, &graph) == 1); 
+        } 
+
         TEST_CASE( "Oversized snarl","[snarl_distance]" ) {
             VG graph;
          
@@ -7372,6 +7451,9 @@ namespace vg {
         }
 
 
+        // TODO: This test case doesn't do anything (runs 0 iterations).
+        // When I tell it to actually run iterations, it fails.
+        // Has it ever worked?
         TEST_CASE("random test subgraph", "[snarl_distance][snarl_distance_subgraph]") {
 
             int64_t min = 20; int64_t max = 50;
@@ -7480,7 +7562,7 @@ namespace vg {
                                      << distance_index.minimum_distance(nodeID1, false, 0, node_id, true, 0)
                                      << " (" << dist_start_fd << " " << dist_end_fd << " " << dist_start_bk << " " << dist_end_bk << ") "
                                      << " is in the subgraph but shouldn't be " << endl;
-                                graph.serialize_to_file("test_graph.vg");
+                                vg::io::save_handle_graph(&graph, "test_graph.vg");
                             }
                             REQUIRE((start_forward || end_forward || in_forward || start_backward || end_backward || in_backward));
                         } else {
@@ -7491,7 +7573,7 @@ namespace vg {
                                      << distance_index.minimum_distance(nodeID1, false, 0,node_id, true, 0)
                                      << " (" << dist_start_fd << " " << dist_end_fd << " " << dist_start_bk << " " << dist_end_bk << ") "
                                      << " is not in the subgraph but should be " << endl;
-                                graph.serialize_to_file("test_graph.vg");
+                                vg::io::save_handle_graph(&graph, "test_graph.vg");
                                 REQUIRE(!(start_forward || end_forward || in_forward || start_backward || end_backward || in_backward));
                             }
                         }
@@ -7556,31 +7638,49 @@ namespace vg {
         */
         
         TEST_CASE( "Distance index can traverse all the snarls in random graphs",
-                  "[snarl_distance_random]" ) {
+                  "[snarl_distance][snarl_distance_random]" ) {
         
             // Each actual graph takes a fairly long time to do so we randomize sizes...
             
-            default_random_engine generator(test_seed_source());
+            std::default_random_engine generator(test_seed_source());
             
             for (size_t repeat = 0; repeat < 1000; repeat++) {
             
-                uniform_int_distribution<size_t> bases_dist(100, 1000);
+                std::uniform_int_distribution<size_t> bases_dist(100, 1000);
                 size_t bases = bases_dist(generator);
-                uniform_int_distribution<size_t> variant_bases_dist(1, bases/20);
+                std::uniform_int_distribution<size_t> variant_bases_dist(1, bases/20);
                 size_t variant_bases = variant_bases_dist(generator);
-                uniform_int_distribution<size_t> variant_count_dist(1, bases/30);
+                std::uniform_int_distribution<size_t> variant_count_dist(1, bases/30);
                 size_t variant_count = variant_count_dist(generator);
+                
+                std::uniform_real_distribution<double> flip_dist(0.0, 1.0);
+                double node_flip_fraction = flip_dist(generator);
+                double chain_flip_fraction = flip_dist(generator);
 
-                uniform_int_distribution<size_t> snarl_size_limit_dist(500, 1000);
+                std::uniform_int_distribution<size_t> snarl_size_limit_dist(2, 1000);
                 size_t size_limit = snarl_size_limit_dist(generator);
-                        
+
 #ifdef debug
-                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events" << endl;
+                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events with " << node_flip_fraction << " nodes flipped and " << chain_flip_fraction << " of chains flipped, with size limit " << size_limit << endl;
 #endif
-            
-                VG graph;
-                random_graph(bases, variant_bases, variant_count, &graph);
-                IntegratedSnarlFinder finder(graph); 
+               
+                // Generate a base graph
+                VG base_graph;
+                random_graph(bases, variant_bases, variant_count, &base_graph);
+                
+                // Flip some fraction of the nodes to their local reverse orientation
+                bdsg::HashGraph graph = randomly_flipped_nodes(base_graph, node_flip_fraction, generator);
+
+                // Find snarls
+                IntegratedSnarlFinder base_finder(graph);
+
+                // Flip some fraction of the chains to their opposite orientation.
+                // Note that we can't flip the snarls because the snarl decomposition
+                // requires snarls to be articulated as forward along their
+                // chains.
+                SnarlDecompositionFuzzer finder(&graph, &base_finder, chain_flip_fraction, generator);
+                
+                // Build the index
                 SnarlDistanceIndex distance_index;
                 fill_in_distance_index(&distance_index, &graph, &finder, size_limit);
 
@@ -7640,7 +7740,7 @@ namespace vg {
                             cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
                             cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
                             cerr << "serializing graph to test_graph.vg" << endl;
-                            graph.serialize_to_file("test_graph.vg");
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
                             REQUIRE(false);
                         }
                         if (max_distance < snarl_distance){
@@ -7648,11 +7748,10 @@ namespace vg {
                             cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
                             cerr << "minimum: " << snarl_distance << " maximum: " << max_distance << endl;
                             cerr << "serializing graph to test_graph.vg" << endl;
-                            graph.serialize_to_file("test_graph.vg");
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
                             REQUIRE(false);
                         }
                         REQUIRE((snarl_distance >= dijkstra_distance || snarl_distance == std::numeric_limits<size_t>::max()));
-                            graph.serialize_to_file("test_graph.vg");
                         if (!traceback.first.empty() && ! traceback.second.empty()) {
                             size_t traceback_distance = 0;
                             for (auto x : traceback.first){
@@ -7699,7 +7798,7 @@ namespace vg {
                             cerr << node_id1 << " " << (rev1 ? "rev" : "fd") << offset1 << " -> " << node_id2 <<  (rev2 ? "rev" : "fd") << offset2 << endl;
                             cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
                             cerr << "serializing graph to test_graph.vg" << endl;
-                            graph.serialize_to_file("test_graph.vg");
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
                             REQUIRE(false);
                         }
                         REQUIRE((snarl_distance >= dijkstra_distance || snarl_distance == std::numeric_limits<size_t>::max()));
@@ -7789,8 +7888,372 @@ namespace vg {
         //                return true;
         //            });
         //}
+        
+        TEST_CASE( "Distance index can query a troublesome oversized snarl",
+                  "[snarl_distance]" ) {
+
+            std::string graph_json = R"({
+                "node": [
+                    {"id": "19","sequence": "A"},
+                    {"id": "20","sequence": "A"},
+                    {"id": "21","sequence": "A"},
+                    {"id": "22","sequence": "A"},
+                    {"id": "23","sequence": "A"} 
+                ], "edge": [
+                    {"from": "19","to": "20"},
+                    {"from": "19","to": "22"},
+                    {"from": "20","to": "21"},
+                    {"from": "20","to": "23"},
+                    {"from": "21","to": "22"}, 
+                    {"from": "22","to": "23"}
+                ]
+            })";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+
+            id_t node_id1 = 19; bool rev1 = false ; size_t offset1 = 0;
+            id_t node_id2 = 23; bool rev2 = false ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t dijkstra_distance = std::numeric_limits<size_t>::max();
+            handlegraph::algorithms::dijkstra(&graph, handle1, [&](const handle_t& reached, size_t distance) {
+                if (reached == handle2) {
+                    dijkstra_distance = distance;
+                    dijkstra_distance += graph.get_length(graph.get_handle(node_id1)) - offset1;
+                    dijkstra_distance += offset2;
+                    return false;
+                }
+                return true;
+            }
+            , false);
+
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == dijkstra_distance);
+        }
+
+        TEST_CASE( "Distance index can query out of a SNP with a reversing allele as an oversided snarl",
+                  "[snarl_distance]" ) {
+            
+            // This is a snarl from 1 to 2, where 4 nand 5 are a SNP, and 3
+            // lets you double back to the start
+            std::string graph_json = R"({
+                "node": [
+                    {"id": "1","sequence": "AAAAA"},
+                    {"id": "2","sequence": "AAAAA"},
+                    {"id": "3","sequence": "A"},
+                    {"id": "4","sequence": "A"},
+                    {"id": "5","sequence": "A"} 
+                ], "edge": [
+                    {"from": "1","to": "3"},
+                    {"from": "1","to": "4"},
+                    {"from": "1","to": "5"},
+                    {"from": "3","to": "1", "to_end": true},
+                    {"from": "4","to": "2"}, 
+                    {"from": "5","to": "2"}
+                ]
+            })";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+            
+            // We want to be able to get out of the snarl from node 4, which we definitely can.
+            id_t node_id1 = 4; bool rev1 = false ; size_t offset1 = 1;
+            id_t node_id2 = 2; bool rev2 = false ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t true_distance = 0;
+
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == true_distance);
+
+            // And out of the snarl to the left from 3 reverse to 1 reverse should also be 0
+            node_id1 = 3; rev1 = true; offset1 = 1;
+            node_id2 = 1; rev2 = true; offset2 = 0;
+            true_distance = 0;
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == true_distance);
+
+        }
+
+        TEST_CASE( "Distance index can query within a fiddly snarl",
+                  "[snarl_distance]" ) {
+            
+            std::string graph_json = R"({"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "3", "to_end": true}, {"from": "1", "to": "4"}, {"from": "1", "to": "5"}, {"from": "4", "to": "5", "to_end": true}, {"from": "2", "from_start": true, "to": "4", "to_end": true}], "node": [{"id": "5", "sequence": "A"}, {"id": "1", "sequence": "AAAAA"}, {"id": "4", "sequence": "A"}, {"id": "2", "sequence": "AAAAA"}, {"id": "3", "sequence": "A"}]})";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+            
+            id_t node_id1 = 4; bool rev1 = false ; size_t offset1 = 1;
+            id_t node_id2 = 5; bool rev2 = true ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t true_distance = 0;
+
+            REQUIRE(distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph) == true_distance);
+        }
+
+        TEST_CASE( "Distance index can query into a child snarl in reverse",
+                  "[snarl_distance]" ) {
+            
+            std::string graph_json = R"({"node":[{"id":"79","sequence":"A"},{"id":"16","sequence":"A"},{"id":"60","sequence":"A"},{"id":"37","sequence":"A"},{"id":"40","sequence":"A"},{"id":"53","sequence":"A"},{"id":"59","sequence":"A"},{"id":"63","sequence":"A"},{"id":"18","sequence":"A"},{"id":"38","sequence":"A"},{"id":"62","sequence":"A"}],"edge":[{"from":"16","to":"53"},{"from":"16","from_start":true,"to":"79","to_end":true},{"from":"60","to":"62"},{"from":"60","from_start":true,"to":"79","to_end":true},{"from":"37","from_start":true,"to":"63","to_end":true},{"from":"37","from_start":true,"to":"40"},{"from":"53","to":"60"},{"from":"59","to":"63"},{"from":"59","from_start":true,"to":"60","to_end":true},{"from":"18","to":"53"},{"from":"18","to":"38"},{"from":"18","from_start":true,"to":"79","to_end":true},{"from":"18","from_start":true,"to":"37","to_end":true},{"from":"38","to":"63","to_end":true},{"from":"38","to":"40"},{"from":"62","to":"63"}]})";
+
+            bdsg::HashGraph graph;
+            vg::io::json2graph(graph_json, &graph);
+
+            IntegratedSnarlFinder snarl_finder(graph); 
+            SnarlDistanceIndex distance_index;
+            fill_in_distance_index(&distance_index, &graph, &snarl_finder, 2);
+            
+            id_t node_id1 = 16; bool rev1 = false ; size_t offset1 = 1;
+            id_t node_id2 = 62; bool rev2 = true ; size_t offset2 = 0;
+            handle_t handle1 = graph.get_handle(node_id1, rev1);
+            handle_t handle2 = graph.get_handle(node_id2, rev2);
+
+            //Find actual distance
+            size_t dijkstra_distance = std::numeric_limits<size_t>::max();
+            handlegraph::algorithms::dijkstra(&graph, handle1, [&](const handle_t& reached, size_t distance) {
+                if (reached == handle2) {
+                    dijkstra_distance = distance;
+                    dijkstra_distance += graph.get_length(graph.get_handle(node_id1)) - offset1;
+                    dijkstra_distance += offset2;
+                    return false;
+                }
+                return true;
+            }
+            , false);
+
+            size_t index_distance = distance_index.minimum_distance(node_id1, rev1, offset1, node_id2, rev2, offset2, false, &graph);
+
+            REQUIRE(index_distance == dijkstra_distance);
+        }
+
+
+        TEST_CASE( "Distance index can query all possible 3-node-with-legs snarls",
+                 "[snarl_distance]" ) {
+
+            // We're going to generate all possible snarls you can get by
+            // starting with the boundary nodes, taking up to 3 nodes and
+            // connecting them, one nodeside at a time, onto the existing
+            // nodes.
+            //
+            // Combinatorics says this is a manageable number; each nodeside
+            // picks from one of the previous nodesides and attaches to it.
+            
+            /// Call the callback with each possible combination of choices of
+            /// previous items.
+            ///
+            /// start_size is the number of items present before we start
+            /// making choices; the first entry can choose from start_size
+            /// items.
+            /// 
+            /// end_size is the total number of items to think about, including
+            /// those in start_size.
+            /// 
+            /// Calls the callback with all possible vectors of length
+            /// (end_size - start_size) matching these constraints.
+            auto for_all_choice_combinations = [](size_t start_size, size_t end_size, const std::function<void(const std::vector<size_t>&)>& callback) { 
+
+                std::vector<size_t> choices(end_size - start_size, 0);
+                while (true) {
+#ifdef debug
+                    std::cerr << "Consider combination:";
+                    for (auto& item : choices) {
+                        std::cerr << " " << item;
+                    }
+                    std::cerr << std::endl;
+#endif
+                    callback(choices);
+                    choices.back()++;
+                    for (size_t i = end_size - 1; i >= start_size; i--) {
+                        if (choices.at(i - start_size) >= i) {
+                            // We've reached the point where we want to pick from a
+                            // choice not available at this point.
+                            // At i=2 we can choose between 0 and 1, so we carry at i.
+                            if (i == start_size) {
+                                // We've counted all possibilities
+                                return;
+                            } else {
+                                // Carry and reset to 0.
+                                choices.at(i - start_size - 1)++;
+                                choices.at(i - start_size) = 0;
+                            }
+                        } else {
+                            // No more carrying to do
+                            break;
+                        }
+                    }
+                }
+            };
+            
+            // How big should a snarl be allowed to be before being oversized?
+            size_t size_limit = 2;
+            // How many content nodes should be inside the snarl?
+            const size_t MAX_NODES = 3;
+            // How many node sides do we need to worry about, including the boundary sentinels?
+            size_t max_node_sides = MAX_NODES * 2 + 2;
+            for_all_choice_combinations(2, max_node_sides, [&](const std::vector<size_t>& choices) {
+                // Build the choices into a graph.
+
+                bdsg::HashGraph graph;
+                // Make the bounding nodes heavy so they are likely to root the snarl
+                handle_t start_node = graph.create_handle("AAAAA");
+                handle_t end_node = graph.create_handle("AAAAA");
+
+                std::vector<handle_t> connect_to;
+                connect_to.reserve(max_node_sides);
+                // Choice 0 is start node, arriving reading out
+                connect_to.push_back(graph.flip(start_node));
+                // Choice 1 is end node reading out
+                connect_to.push_back(end_node);
+
+                for (size_t i = 0; i < choices.size(); i += 2) {
+                    // Make a node
+                    handle_t new_node = graph.create_handle("A");
+                    // Make sure to remember it so it can choose itself
+                    connect_to.push_back(new_node);
+                    connect_to.push_back(graph.flip(new_node));
+                    // Connect its left and right to each pair of choices.
+                    graph.create_edge(graph.flip(new_node), connect_to.at(choices.at(i)));
+                    graph.create_edge(new_node, connect_to.at(choices.at(i + 1)));
+                }
+
+                // TODO: It might be more efficient to un-build the things that
+                // change between graphs instead of rebuilding from scratch for
+                // every case.
+                
+                // Skip graphs where the choices mean the graph isn't actually
+                // connected, because then it can't be recognized as a snarl
+                // probably.
+                std::vector<std::unordered_set<nid_t>> components = handlegraph::algorithms::weakly_connected_components(&graph);
+                if (components.size() > 1) {
+                    return;
+                }
+
+                // Now index the graph for query
+                IntegratedSnarlFinder finder(graph); 
+                SnarlDistanceIndex distance_index;
+                fill_in_distance_index(&distance_index, &graph, &finder, size_limit);
+
+                // Compute the truth all-to-all distances, between outgoing
+                // side of first handle and incoming side of second.
+                // Both handles are oriented along the connecting path.
+                // TODO: We compute/store both triangles of the matrix; can we avoid one somehow?
+                std::unordered_map<handle_t, std::unordered_map<handle_t, size_t>> dijkstra_distances;
+                graph.for_each_handle([&](const handle_t& base) {
+                    for (const handle_t& here : {base, graph.flip(base)}) {
+                        if (here == graph.flip(start_node) || here == end_node) {
+                            // Skip traversals looking out of the snarl
+                            return;
+                        }
+                        dijkstra_distances.emplace(here, handlegraph::algorithms::find_shortest_paths(&graph, here));
+                    }
+                });
+
+                // The Dijkstra traversal always sees a handle to itself at
+                // distance 0. We need to get the real back-to-self distance,
+                // if any, and fill that in.
+                graph.for_each_handle([&](const handle_t& base) {
+                    for (const handle_t& here : {base, graph.flip(base)}) {
+                        if (here == graph.flip(start_node) || here == end_node) {
+                            // Skip traversals looking out of the snarl
+                            return;
+                        }
+
+                        // The place we need to arrive at is ourselves, since
+                        // both start and end are oriented along the connecting
+                        // path here.
+                    
+                        size_t loop_distance = std::numeric_limits<size_t>::max();
+                        // See if we can get back here from any of the places we can get
+                        graph.follow_edges(here, false, [&](const handle_t next) {
+                            if (next == here) {
+                                // We found a real self loop
+                                loop_distance = 0;
+                                return false;
+                            }
+                            auto found_index = dijkstra_distances.find(next);
+                            if (found_index == dijkstra_distances.end()) {
+                                // This destination can't get anywhere.
+                                // This should be impossible since the Dijkstra always will point a node at itself.
+                                return true;
+                            }
+                            auto found_distance = found_index->second.find(here);
+                            if (found_distance == found_index->second.end()) {
+                                // This destination can't get back to us
+                                return true;
+                            }
+                            // If we find a way back, min in its distance.
+                            loop_distance = std::min(loop_distance, graph.get_length(next) + found_distance->second);
+                            return true;
+                        });
+
+#ifdef debug
+                        std::cerr << "Real self loop distance for " << graph.get_id(here) << (graph.get_is_reverse(here) ? "rev" : "fd") << " -> " << graph.get_id(here) << (graph.get_is_reverse(here) ? "rev" : "fd") << " is " << loop_distance << std::endl;
+#endif
+
+                        if (loop_distance == std::numeric_limits<size_t>::max()) {
+                            // There's really no way back from this node to itself in the same orientation. Delete the entry the Dijkstra search adds.
+                            dijkstra_distances.at(here).erase(here);
+                        } else {
+                            // There is a way back; store the value.
+                            dijkstra_distances.at(here)[here] = loop_distance;
+                        }
+                    };
+                });
+
+#ifdef debug
+                for (auto& [start_handle, distances] : dijkstra_distances) {
+                    for (auto& [end_handle, dijkstra_distance] : distances) {
+                        cerr << "Dijkstra sees: " << graph.get_id(start_handle) << (graph.get_is_reverse(start_handle) ? "rev" : "fd") << graph.get_length(start_handle) << " -> " << graph.get_id(end_handle) << (graph.get_is_reverse(end_handle) ? "rev" : "fd") << 0 << " = " << dijkstra_distance << endl;
+                    }
+                }
+#endif
+
+                // Now query all of the distances against the index
+                for (auto& [start_handle, distances] : dijkstra_distances) {
+                    for (auto& [end_handle, dijkstra_distance] : distances) {
+                        // Ask for distance between outgoing side of first handle and incoming side of second.
+                       
+#ifdef debug
+                        cerr << "Measure: " << graph.get_id(start_handle) << (graph.get_is_reverse(start_handle) ? "rev" : "fd") << graph.get_length(start_handle) << " -> " << graph.get_id(end_handle) << (graph.get_is_reverse(end_handle) ? "rev" : "fd") << 0 << endl;
+#endif
+
+                        size_t snarl_distance = distance_index.minimum_distance(graph.get_id(start_handle), graph.get_is_reverse(start_handle), graph.get_length(start_handle), graph.get_id(end_handle), graph.get_is_reverse(end_handle), 0, false, &graph);
+
+                        if (snarl_distance != dijkstra_distance) {
+                            cerr << "Failed exhaustive test" << endl;
+                            cerr << "Snarl size limit: " << size_limit << endl;
+                            cerr << graph.get_id(start_handle) << (graph.get_is_reverse(start_handle) ? "rev" : "fd") << graph.get_length(start_handle) << " -> " << graph.get_id(end_handle) << (graph.get_is_reverse(end_handle) ? "rev" : "fd") << 0 << endl;
+                            cerr << "guessed: " << snarl_distance << " actual: " << dijkstra_distance << endl;
+                            cerr << "serializing graph to test_graph.vg" << endl;
+                            vg::io::save_handle_graph(&graph, "test_graph.vg");
+                        }
+                        REQUIRE(snarl_distance == dijkstra_distance);
+                    }
+                }
+            });
+            
+        }
+        
+
         TEST_CASE( "random minimum distance paths",
-                  "[snarl_distance_random_paths]" ) {
+                  "[snarl_distance][snarl_distance_random_paths]" ) {
         
             // Each actual graph takes a fairly long time to do so we randomize sizes...
             
@@ -7809,7 +8272,7 @@ namespace vg {
                 size_t size_limit = snarl_size_limit_dist(generator);
                         
 #ifdef debug
-                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events" << endl;
+                cerr << repeat << ": Do graph of " << bases << " bp with ~" << variant_bases << " bp large variant length and " << variant_count << " events with size limit " << size_limit << endl;
 #endif
             
                 VG graph;
@@ -7818,7 +8281,7 @@ namespace vg {
                 SnarlDistanceIndex distance_index;
                 fill_in_distance_index(&distance_index, &graph, &finder, size_limit);
 
-                graph.serialize_to_file("test_graph.vg");
+                vg::io::save_handle_graph(&graph, "test_graph.vg");
                 for (size_t repeat_positions = 0 ; repeat_positions < 500 ; repeat_positions++) {
                     //Pick random pairs of positions and find the distance between them
                     id_t node_id1 = 0;
diff --git a/src/unittest/snarl_distance_index_characterization.cpp b/src/unittest/snarl_distance_index_characterization.cpp
new file mode 100644
index 0000000000..ff58800b0b
--- /dev/null
+++ b/src/unittest/snarl_distance_index_characterization.cpp
@@ -0,0 +1,386 @@
+// Characterization tests for snarl_distance_index.
+// These tests lock down the serialized byte layout (via FNV-1a hash) of the
+// distance index for six canonical graphs.  Hash constants are captured from
+// the first run on unmodified code and must remain identical after the file
+// split in PR 1.
+
+#include "../integrated_snarl_finder.hpp"
+#include "../path.hpp"
+#include "../snarl_distance_index.hpp"
+#include "catch.hpp"
+#include <bdsg/hash_graph.hpp>
+
+namespace vg {
+namespace unittest {
+
+static uint64_t fnv1a(const std::vector<uint8_t> &data) {
+  uint64_t h = 14695981039346656037ULL;
+  for (uint8_t b : data) {
+    h ^= b;
+    h *= 1099511628211ULL;
+  }
+  return h;
+}
+
+static std::vector<uint8_t> serialize_index(const SnarlDistanceIndex &idx) {
+  std::vector<uint8_t> buf;
+  idx.serialize([&](const void *p, size_t n) {
+    const uint8_t *bytes = static_cast<const uint8_t *>(p);
+    buf.insert(buf.end(), bytes, bytes + n);
+  });
+  return buf;
+}
+
+// Walk the snarl tree rooted at handle and collect all snarls into out.
+static void collect_snarls(const SnarlDistanceIndex &idx,
+                           const net_handle_t &handle,
+                           std::vector<net_handle_t> &out) {
+  if (idx.is_snarl(handle)) {
+    out.push_back(handle);
+  }
+  // Nodes and sentinels have no snarl-tree children; recursing into them
+  // throws.
+  if (idx.is_node(handle) || idx.is_sentinel(handle)) {
+    return;
+  }
+  idx.for_each_child(handle, [&](const net_handle_t &child) -> bool {
+    collect_snarls(idx, child, out);
+    return true;
+  });
+}
+
+// ─── Fixture 1: linear chain ─────────────────────────────────────────────────
+// h1 → h2 → h3 → h4 → h5   (no bubbles, just one chain)
+
+TEST_CASE("Characterization: linear chain", "[snarl_characterization]") {
+  bdsg::HashGraph graph;
+  handle_t h1 = graph.create_handle("A");
+  handle_t h2 = graph.create_handle("A");
+  handle_t h3 = graph.create_handle("A");
+  handle_t h4 = graph.create_handle("A");
+  handle_t h5 = graph.create_handle("A");
+  graph.create_edge(h1, h2);
+  graph.create_edge(h2, h3);
+  graph.create_edge(h3, h4);
+  graph.create_edge(h4, h5);
+
+  IntegratedSnarlFinder finder(graph);
+  SnarlDistanceIndex idx;
+  fill_in_distance_index(&idx, &graph, &finder);
+
+  SECTION("serialization hash") {
+    auto buf = serialize_index(idx);
+    constexpr size_t EXPECTED_SIZE = 1024;
+    constexpr uint64_t EXPECTED_HASH = 4461810471415873827ULL;
+    REQUIRE(buf.size() == EXPECTED_SIZE);
+    REQUIRE(fnv1a(buf) == EXPECTED_HASH);
+  }
+
+  SECTION("no non-trivial snarls") {
+    std::vector<net_handle_t> snarls;
+    collect_snarls(idx, idx.get_root(), snarls);
+    // A linear chain has no non-trivial snarls (root snarls don't count as
+    // regular/irregular in the sense of check_regularity).
+    for (const net_handle_t &s : snarls) {
+      // Root snarls wrapping a component are allowed; skip them.
+      if (!idx.is_root_snarl(s)) {
+        // No internal snarls expected in a plain linear chain.
+        REQUIRE(false);
+      }
+    }
+  }
+
+  SECTION("subgraph in distance range") {
+    // From the middle node h3 look forward; nodes 2 steps away should be h5.
+    std::unordered_set<nid_t> sub;
+    path_handle_t ph = graph.create_path_handle("path_linear");
+    graph.append_step(ph, h3);
+    Path path = path_from_path_handle(graph, ph);
+    subgraph_in_distance_range(idx, path, &graph, 2, 3, sub, true);
+    REQUIRE(sub.count(graph.get_id(h5)));
+    REQUIRE(!sub.count(graph.get_id(h1)));
+    REQUIRE(!sub.count(graph.get_id(h2)));
+  }
+}
+
+// ─── Fixture 2: simple bubble
+// ───────────────────────────────────────────────── h1 → h2 → h4 h1 → h3 → h4
+// One snarl (h1,h4) with two single-node children → should be regular.
+
+TEST_CASE("Characterization: simple bubble", "[snarl_characterization]") {
+  bdsg::HashGraph graph;
+  handle_t h1 = graph.create_handle("A");
+  handle_t h2 = graph.create_handle("A");
+  handle_t h3 = graph.create_handle("A");
+  handle_t h4 = graph.create_handle("A");
+  graph.create_edge(h1, h2);
+  graph.create_edge(h1, h3);
+  graph.create_edge(h2, h4);
+  graph.create_edge(h3, h4);
+
+  IntegratedSnarlFinder finder(graph);
+  SnarlDistanceIndex idx;
+  fill_in_distance_index(&idx, &graph, &finder);
+
+  SECTION("serialization hash") {
+    auto buf = serialize_index(idx);
+    constexpr size_t EXPECTED_SIZE = 1024;
+    constexpr uint64_t EXPECTED_HASH = 10070957726680237483ULL;
+    REQUIRE(buf.size() == EXPECTED_SIZE);
+    REQUIRE(fnv1a(buf) == EXPECTED_HASH);
+  }
+
+  SECTION("snarl is regular") {
+    std::vector<net_handle_t> snarls;
+    collect_snarls(idx, idx.get_root(), snarls);
+    bool found_internal = false;
+    for (const net_handle_t &s : snarls) {
+      if (!idx.is_root_snarl(s)) {
+        found_internal = true;
+        REQUIRE(idx.is_regular_snarl(s));
+      }
+    }
+    REQUIRE(found_internal);
+  }
+
+  SECTION("subgraph in distance range") {
+    // From h1 look forward; h2 and h3 are 1 step away, h4 is 2 steps away.
+    std::unordered_set<nid_t> sub;
+    path_handle_t ph = graph.create_path_handle("path_bubble");
+    graph.append_step(ph, h1);
+    Path path = path_from_path_handle(graph, ph);
+    subgraph_in_distance_range(idx, path, &graph, 1, 2, sub, true);
+    REQUIRE(sub.count(graph.get_id(h2)));
+    REQUIRE(sub.count(graph.get_id(h3)));
+    REQUIRE(sub.count(graph.get_id(h4)));
+  }
+}
+
+// ─── Fixture 3: nested chain with loop ───────────────────────────────────────
+// h1 → h2 → h3 → h4 → h5
+// h2 → flip(h2)  (self-loop, allows reversing at h2)
+// h3 → h5        (shortcut creating a nested snarl)
+
+TEST_CASE("Characterization: nested chain with loop",
+          "[snarl_characterization]") {
+  bdsg::HashGraph graph;
+  handle_t h1 = graph.create_handle("A");
+  handle_t h2 = graph.create_handle("A");
+  handle_t h3 = graph.create_handle("A");
+  handle_t h4 = graph.create_handle("A");
+  handle_t h5 = graph.create_handle("A");
+  graph.create_edge(h1, h2);
+  graph.create_edge(h2, h3);
+  graph.create_edge(h3, h4);
+  graph.create_edge(h4, h5);
+  graph.create_edge(h2, graph.flip(h2)); // self-loop
+  graph.create_edge(h3, h5);             // shortcut
+
+  IntegratedSnarlFinder finder(graph);
+  SnarlDistanceIndex idx;
+  fill_in_distance_index(&idx, &graph, &finder);
+
+  SECTION("serialization hash") {
+    auto buf = serialize_index(idx);
+    constexpr size_t EXPECTED_SIZE = 1024;
+    constexpr uint64_t EXPECTED_HASH = 16246149163740101819ULL;
+    REQUIRE(buf.size() == EXPECTED_SIZE);
+    REQUIRE(fnv1a(buf) == EXPECTED_HASH);
+  }
+
+  SECTION("snarls exist") {
+    std::vector<net_handle_t> snarls;
+    collect_snarls(idx, idx.get_root(), snarls);
+    // There should be at least one non-root snarl due to the shortcut h3→h5.
+    bool found = false;
+    for (const net_handle_t &s : snarls) {
+      if (!idx.is_root_snarl(s)) {
+        found = true;
+        break;
+      }
+    }
+    REQUIRE(found);
+  }
+
+  SECTION("subgraph in distance range") {
+    std::unordered_set<nid_t> sub;
+    path_handle_t ph = graph.create_path_handle("path_nested");
+    graph.append_step(ph, h1);
+    Path path = path_from_path_handle(graph, ph);
+    subgraph_in_distance_range(idx, path, &graph, 2, 3, sub, true);
+    // h3 is at distance 2 (through h2 then h3) and h5 is reachable via the
+    // shortcut.
+    REQUIRE(sub.count(graph.get_id(h3)));
+  }
+}
+
+// ─── Fixture 4: multi-component root ─────────────────────────────────────────
+// Component 1: h1 → h2 → h3
+// Component 2: h4 → h5 → h6   (no edges between components)
+
+TEST_CASE("Characterization: multi-component root",
+          "[snarl_characterization]") {
+  bdsg::HashGraph graph;
+  handle_t h1 = graph.create_handle("A");
+  handle_t h2 = graph.create_handle("A");
+  handle_t h3 = graph.create_handle("A");
+  handle_t h4 = graph.create_handle("A");
+  handle_t h5 = graph.create_handle("A");
+  handle_t h6 = graph.create_handle("A");
+  graph.create_edge(h1, h2);
+  graph.create_edge(h2, h3);
+  graph.create_edge(h4, h5);
+  graph.create_edge(h5, h6);
+
+  IntegratedSnarlFinder finder(graph);
+  SnarlDistanceIndex idx;
+  fill_in_distance_index(&idx, &graph, &finder);
+
+  SECTION("serialization hash") {
+    auto buf = serialize_index(idx);
+    constexpr size_t EXPECTED_SIZE = 1024;
+    constexpr uint64_t EXPECTED_HASH = 13763592152412395439ULL;
+    REQUIRE(buf.size() == EXPECTED_SIZE);
+    REQUIRE(fnv1a(buf) == EXPECTED_HASH);
+  }
+
+  SECTION("connected component count") {
+    REQUIRE(idx.connected_component_count() == 2);
+  }
+
+  SECTION("subgraph in distance range") {
+    // From h1 look forward: h3 is 2 steps away.
+    std::unordered_set<nid_t> sub;
+    path_handle_t ph = graph.create_path_handle("path_multicomp");
+    graph.append_step(ph, h1);
+    Path path = path_from_path_handle(graph, ph);
+    subgraph_in_distance_range(idx, path, &graph, 2, 3, sub, true);
+    REQUIRE(sub.count(graph.get_id(h3)));
+    // h4/h5/h6 are in a different component and not reachable.
+    REQUIRE(!sub.count(graph.get_id(h4)));
+    REQUIRE(!sub.count(graph.get_id(h5)));
+    REQUIRE(!sub.count(graph.get_id(h6)));
+  }
+}
+
+// ─── Fixture 5: oversized snarl ──────────────────────────────────────────────
+// h1 → h2 → h6
+// h1 → h3 → h6
+// h1 → h4 → h6
+// h1 → h5 → h6
+// Snarl (h1,h6) has 4 internal children; with size_limit=3 → oversized.
+
+TEST_CASE("Characterization: oversized snarl", "[snarl_characterization]") {
+  bdsg::HashGraph graph;
+  handle_t h1 = graph.create_handle("A");
+  handle_t h2 = graph.create_handle("A");
+  handle_t h3 = graph.create_handle("A");
+  handle_t h4 = graph.create_handle("A");
+  handle_t h5 = graph.create_handle("A");
+  handle_t h6 = graph.create_handle("A");
+  graph.create_edge(h1, h2);
+  graph.create_edge(h1, h3);
+  graph.create_edge(h1, h4);
+  graph.create_edge(h1, h5);
+  graph.create_edge(h2, h6);
+  graph.create_edge(h3, h6);
+  graph.create_edge(h4, h6);
+  graph.create_edge(h5, h6);
+
+  IntegratedSnarlFinder finder(graph);
+  SnarlDistanceIndex idx;
+  fill_in_distance_index(&idx, &graph, &finder, /*size_limit=*/3, false,
+                         /*silence_warnings=*/true);
+
+  SECTION("serialization size") {
+    // Hub-label content is non-deterministic (contraction hierarchy uses
+    // hash-based graph structures), so we only lock down the byte count.
+    auto buf = serialize_index(idx);
+    constexpr size_t EXPECTED_SIZE = 7168;
+    REQUIRE(buf.size() == EXPECTED_SIZE);
+  }
+
+  SECTION("oversized snarl exists") {
+    std::vector<net_handle_t> snarls;
+    collect_snarls(idx, idx.get_root(), snarls);
+    bool found_oversized = false;
+    for (const net_handle_t &s : snarls) {
+      if (!idx.is_root_snarl(s) && idx.is_oversized_snarl(s)) {
+        found_oversized = true;
+      }
+    }
+    REQUIRE(found_oversized);
+  }
+
+  SECTION("subgraph in distance range") {
+    // From h1 look forward; h2,h3,h4,h5 are 1 step away, h6 is 2 steps.
+    std::unordered_set<nid_t> sub;
+    path_handle_t ph = graph.create_path_handle("path_oversized");
+    graph.append_step(ph, h1);
+    Path path = path_from_path_handle(graph, ph);
+    subgraph_in_distance_range(idx, path, &graph, 1, 2, sub, true);
+    REQUIRE(sub.count(graph.get_id(h2)));
+    REQUIRE(sub.count(graph.get_id(h3)));
+    REQUIRE(sub.count(graph.get_id(h4)));
+    REQUIRE(sub.count(graph.get_id(h5)));
+    REQUIRE(sub.count(graph.get_id(h6)));
+  }
+}
+
+// ─── Fixture 6: irregular snarl ──────────────────────────────────────────────
+// h1 → h2 → h4
+// h1 → h3 → h4
+// h2 → h3       (cross-edge between children → snarl is not regular)
+
+TEST_CASE("Characterization: irregular snarl", "[snarl_characterization]") {
+  bdsg::HashGraph graph;
+  handle_t h1 = graph.create_handle("A");
+  handle_t h2 = graph.create_handle("A");
+  handle_t h3 = graph.create_handle("A");
+  handle_t h4 = graph.create_handle("A");
+  graph.create_edge(h1, h2);
+  graph.create_edge(h1, h3);
+  graph.create_edge(h2, h4);
+  graph.create_edge(h3, h4);
+  graph.create_edge(h2, h3); // cross-edge
+
+  IntegratedSnarlFinder finder(graph);
+  SnarlDistanceIndex idx;
+  fill_in_distance_index(&idx, &graph, &finder);
+
+  SECTION("serialization") {
+    auto buf = serialize_index(idx);
+    constexpr size_t EXPECTED_SIZE = 1024;
+    // constexpr uint64_t EXPECTED_HASH = 14645746962011564342ULL;
+    REQUIRE(buf.size() == EXPECTED_SIZE);
+    // REQUIRE(fnv1a(buf) == EXPECTED_HASH);
+  }
+
+  SECTION("snarl is not regular") {
+    std::vector<net_handle_t> snarls;
+    collect_snarls(idx, idx.get_root(), snarls);
+    bool found_irregular = false;
+    for (const net_handle_t &s : snarls) {
+      if (!idx.is_root_snarl(s) && !idx.is_regular_snarl(s)) {
+        found_irregular = true;
+      }
+    }
+    REQUIRE(found_irregular);
+  }
+
+  SECTION("subgraph in distance range") {
+    // From h1, h2 and h3 are 1 step away, h4 is 2 steps.
+    std::unordered_set<nid_t> sub;
+    path_handle_t ph = graph.create_path_handle("path_irregular");
+    graph.append_step(ph, h1);
+    Path path = path_from_path_handle(graph, ph);
+    subgraph_in_distance_range(idx, path, &graph, 1, 2, sub, true);
+    REQUIRE(sub.count(graph.get_id(h2)));
+    REQUIRE(sub.count(graph.get_id(h3)));
+    REQUIRE(sub.count(graph.get_id(h4)));
+    REQUIRE(!sub.count(graph.get_id(h1)));
+  }
+}
+
+} // namespace unittest
+} // namespace vg
diff --git a/src/unittest/snarls.cpp b/src/unittest/snarls.cpp
index c2f5030326..c7edf85b05 100644
--- a/src/unittest/snarls.cpp
+++ b/src/unittest/snarls.cpp
@@ -9,6 +9,8 @@
 #include <sstream>
 #include <set>
 #include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 #include <vg/vg.pb.h>
 #include "catch.hpp"
 #include "support/random_graph.hpp"
@@ -1697,14 +1699,12 @@ namespace vg {
                              ]
                 }
                 )";
-                
+
                 VG graph;
-                
+
                 // Load up the graph
-                Graph g;
-                json2pb(g, graph_json.c_str(), graph_json.size());
-                graph.extend(g);
-                
+                vg::io::json2graph(graph_json, &graph);
+
                 // Define the one snarl
                 Snarl snarl1;
                 snarl1.mutable_start()->set_node_id(6462830);
@@ -1830,14 +1830,12 @@ namespace vg {
                 string snarl1_json = R"({"type": 1, "end": {"node_id": 187208}, "start": {"node_id": 178894}})";
                 string snarl2_json = R"({"type": 1, "end": {"node_id": 187209, "backward": true}, "start": {"node_id": 178895, "backward": true}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
                 string snarl3_json = R"({"type": 1, "end": {"node_id": 178896}, "start": {"node_id": 178895}, "parent": {"end": {"node_id": 187208}, "start": {"node_id": 178894}}})";
-                
+
                 VG graph;
-                
+
                 // Load up the graph
-                Graph g;
-                json2pb(g, graph_json.c_str(), graph_json.size());
-                graph.extend(g);
-                
+                vg::io::json2graph(graph_json, &graph);
+
                 // Load the snarls
                 Snarl snarl1, snarl2, snarl3;
                 json2pb(snarl1, snarl1_json.c_str(), snarl1_json.size());
@@ -1917,13 +1915,11 @@ namespace vg {
             }
             
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             // We need to see the path.
             REQUIRE(graph.paths.size() == 1);
             
@@ -2045,10 +2041,8 @@ namespace vg {
             
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
 
 #ifdef debug
@@ -2061,7 +2055,7 @@ namespace vg {
                 cerr << endl;
             });
 #endif
-        
+
             SECTION("Root node has 1 child bubble") {
                 REQUIRE(snarl_manager.top_level_snarls().size() == 1);
                 
@@ -2127,15 +2121,13 @@ namespace vg {
                     ]}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2246,15 +2238,13 @@ namespace vg {
                     ]}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2354,18 +2344,16 @@ namespace vg {
                     {"from": 2, "to": 4},
                     {"from": 2, "to": 3},
                     {"from": 2, "to": 2},
-                    {"from": 3, "to": 3}            
+                    {"from": 3, "to": 3}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2415,15 +2403,13 @@ namespace vg {
                     ]}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2490,18 +2476,16 @@ namespace vg {
                 "edge": [
                     {"from": 1, "to": 2},
                     {"from": 2, "to": 1}
-                    
+
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2555,15 +2539,13 @@ namespace vg {
                     {"from": 3, "to": 6}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
             
 #ifdef debug
@@ -2767,15 +2749,13 @@ namespace vg {
                     {"from": 9, "to": 10}
                 ]
             }
-            
+
             )";
-            
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
-            
+            vg::io::json2graph(graph_json, &graph);
+
             SnarlManager snarl_manager = CactusSnarlFinder(graph).find_snarls();
 #ifdef debug
             snarl_manager.for_each_snarl_preorder([&](const Snarl* snarl) {
@@ -3919,14 +3899,12 @@ namespace vg {
                             {"position": {"node_id": 7, "is_reverse" : "true"}, "rank" : 5 }
                         ]}
                     ]
-                }            
+                }
                 )";
-                
+
             // Make an actual graph
             VG graph;
-            Graph chunk;
-            json2pb(chunk, graph_json.c_str(), graph_json.size());
-            graph.extend(chunk);
+            vg::io::json2graph(graph_json, &graph);
             assert(graph.is_valid());
             
             SECTION( "PathTraversalFinder can find simple forward traversals") {
diff --git a/src/unittest/source_sink_overlay.cpp b/src/unittest/source_sink_overlay.cpp
index 4c0ecbc20f..bf2aa3bc13 100644
--- a/src/unittest/source_sink_overlay.cpp
+++ b/src/unittest/source_sink_overlay.cpp
@@ -10,7 +10,8 @@
 #include "../source_sink_overlay.hpp"
 #include "../kmer.hpp"
 #include "../vg.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
+#include <bdsg/hash_graph.hpp>
 
 #include <iostream>
 #include <vector>
@@ -132,11 +133,9 @@ TEST_CASE("SourceSinkOverlay adds a source and a sink to a 1-node graph", "[over
 
 TEST_CASE("SourceSinkOverlay agrees with VG::add_start_end_markers in a tiny graph", "[overlay]") {
     const string graph_json = R"({"node":[{"sequence":"CAAATAAG","id":"1"},{"sequence":"A","id":"2"},{"sequence":"G","id":"3"},{"sequence":"T","id":"4"},{"sequence":"C","id":"5"},{"sequence":"TTG","id":"6"},{"sequence":"A","id":"7"},{"sequence":"G","id":"8"},{"sequence":"AAATTTTCTGGAGTTCTAT","id":"9"},{"sequence":"A","id":"10"},{"sequence":"T","id":"11"},{"sequence":"ATAT","id":"12"},{"sequence":"A","id":"13"},{"sequence":"T","id":"14"},{"sequence":"CCAACTCTCTG","id":"15"}],"edge":[{"from":"1","to":"2"},{"from":"1","to":"3"},{"from":"2","to":"4"},{"from":"2","to":"5"},{"from":"3","to":"4"},{"from":"3","to":"5"},{"from":"4","to":"6"},{"from":"5","to":"6"},{"from":"6","to":"7"},{"from":"6","to":"8"},{"from":"7","to":"9"},{"from":"8","to":"9"},{"from":"9","to":"10"},{"from":"9","to":"11"},{"from":"10","to":"12"},{"from":"11","to":"12"},{"from":"12","to":"13"},{"from":"12","to":"14"},{"from":"13","to":"15"},{"from":"14","to":"15"}],"path":[{"name":"x","mapping":[{"position":{"node_id":"1"},"edit":[{"from_length":8,"to_length":8}],"rank":"1"},{"position":{"node_id":"3"},"edit":[{"from_length":1,"to_length":1}],"rank":"2"},{"position":{"node_id":"5"},"edit":[{"from_length":1,"to_length":1}],"rank":"3"},{"position":{"node_id":"6"},"edit":[{"from_length":3,"to_length":3}],"rank":"4"},{"position":{"node_id":"8"},"edit":[{"from_length":1,"to_length":1}],"rank":"5"},{"position":{"node_id":"9"},"edit":[{"from_length":19,"to_length":19}],"rank":"6"},{"position":{"node_id":"11"},"edit":[{"from_length":1,"to_length":1}],"rank":"7"},{"position":{"node_id":"12"},"edit":[{"from_length":4,"to_length":4}],"rank":"8"},{"position":{"node_id":"14"},"edit":[{"from_length":1,"to_length":1}],"rank":"9"},{"position":{"node_id":"15"},"edit":[{"from_length":11,"to_length":11}],"rank":"10"}]}]})";
-    
-    Graph graph;
-    json2pb(graph, graph_json);
-    
-    VG produced(graph);
+
+    VG produced;
+    vg::io::json2graph(graph_json, &produced);
     
     id_t highest_id = produced.max_node_id();
     id_t start_id = highest_id + 1;
diff --git a/src/unittest/support/random_graph.hpp b/src/unittest/support/random_graph.hpp
index 7597beeab9..e3e812d265 100644
--- a/src/unittest/support/random_graph.hpp
+++ b/src/unittest/support/random_graph.hpp
@@ -1,11 +1,16 @@
+#ifndef VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
+#define VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
+/** \file random_graph.hpp
+ * Utilities for randomizing graphs for test cases.
+ */
+
+
 #include "handle.hpp"
 #include <vector>
 
-#ifndef VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
-#define VG_UNITTEST_RANDOM_GRAPH_HPP_INCLUDED
 
-namespace vg{
-namespace unittest{
+namespace vg {
+namespace unittest {
 
 /// Create a random graph by adding variation to a sequence of length seq_size
 /// variant_len is the mean length of a larger variation and variant_count
diff --git a/src/unittest/support/randomly_flipped_nodes.hpp b/src/unittest/support/randomly_flipped_nodes.hpp
new file mode 100644
index 0000000000..40b00bda26
--- /dev/null
+++ b/src/unittest/support/randomly_flipped_nodes.hpp
@@ -0,0 +1,83 @@
+#ifndef VG_UNITTEST_RANDOMLY_FLIPPED_NODES_HPP_INCLUDED
+#define VG_UNITTEST_RANDOMLY_FLIPPED_NODES_HPP_INCLUDED
+
+/**
+ * \file randomly_flipped_nodes.hpp
+ * Utility for creating a copy of a HandleGraph with a random subset of nodes
+ * flipped in orientation.
+ */
+
+#include <random>
+#include <bdsg/hash_graph.hpp>
+#include "handle.hpp"
+
+namespace vg {
+namespace unittest {
+
+/**
+ * Return a copy of the given graph with approximately p_flip fraction of its
+ * nodes reversed in their local forward orientation. When a node is flipped,
+ * its sequence is reverse-complemented and all edges that connected to its
+ * forward orientation now connect to its reverse orientation, and vice versa.
+ *
+ * The returned graph preserves node IDs.
+ */
+template<typename URNG>
+bdsg::HashGraph randomly_flipped_nodes(const HandleGraph& source, double p_flip, URNG& generator) {
+    bdsg::HashGraph result;
+
+    std::uniform_real_distribution<double> dist(0.0, 1.0);
+
+    // Track which nodes get flipped
+    std::unordered_set<nid_t> flipped;
+
+    // Copy all nodes, flipping some
+    source.for_each_handle([&](const handle_t& handle) {
+        nid_t id = source.get_id(handle);
+        if (dist(generator) < p_flip) {
+            // Flip this node: store its reverse complement sequence as forward
+            result.create_handle(source.get_sequence(source.flip(handle)), id);
+            flipped.insert(id);
+        } else {
+            // Keep this node as-is
+            result.create_handle(source.get_sequence(handle), id);
+        }
+    });
+
+    // Copy all edges, adjusting for flipped nodes.
+    // An edge (left, right) means: leave left in its orientation, enter right
+    // in its orientation. If we flipped a node, we need to toggle the
+    // orientation on that side of the edge.
+    source.for_each_edge([&](const edge_t& edge) {
+        handle_t left = edge.first;
+        handle_t right = edge.second;
+
+        nid_t left_id = source.get_id(left);
+        bool left_is_reverse = source.get_is_reverse(left);
+
+        nid_t right_id = source.get_id(right);
+        bool right_is_reverse = source.get_is_reverse(right);
+
+        // If we flipped a node, toggle the orientation for that side
+        if (flipped.count(left_id)) {
+            left_is_reverse = !left_is_reverse;
+        }
+        if (flipped.count(right_id)) {
+            right_is_reverse = !right_is_reverse;
+        }
+
+        result.create_edge(
+            result.get_handle(left_id, left_is_reverse),
+            result.get_handle(right_id, right_is_reverse)
+        );
+
+        return true;
+    });
+
+    return result;
+}
+
+} // namespace unittest
+} // namespace vg
+
+#endif
diff --git a/src/unittest/support/snarl_decomposition_fuzzer.cpp b/src/unittest/support/snarl_decomposition_fuzzer.cpp
new file mode 100644
index 0000000000..263ad486cf
--- /dev/null
+++ b/src/unittest/support/snarl_decomposition_fuzzer.cpp
@@ -0,0 +1,187 @@
+#include "snarl_decomposition_fuzzer.hpp"
+
+#include <cassert>
+#include <stack>
+
+namespace vg {
+namespace unittest {
+
+using ET = DecompositionEventType;
+
+SnarlDecompositionFuzzer::SnarlDecompositionFuzzer(
+    const HandleGraph* graph,
+    const HandleGraphSnarlFinder* finder,
+    const std::unordered_set<nid_t>& chains_to_flip)
+    : HandleGraphSnarlFinder(graph), wrapped(finder)
+{
+
+    should_flip = [chains_to_flip, graph](nid_t node_id) -> bool {
+        return chains_to_flip.count(node_id);
+    };
+}
+
+void SnarlDecompositionFuzzer::traverse_decomposition(
+    const function<void(handle_t)>& begin_chain,
+    const function<void(handle_t)>& end_chain,
+    const function<void(handle_t)>& begin_snarl,
+    const function<void(handle_t)>& end_snarl) const
+{
+    // Step 1: Capture all events from the wrapped finder.
+    std::vector<DecompositionHandleEvent> events = capture_events(*wrapped);
+
+    if (events.empty()) {
+        return;
+    }
+
+    // Step 2: Build pairing vector mapping each begin to its matching end
+    // and vice versa, using separate stacks for chains and snarls.
+    std::vector<size_t> other_bound(events.size());
+    {
+        stack<size_t> chain_stack, snarl_stack;
+        for (size_t i = 0; i < events.size(); i++) {
+            switch (events[i].type) {
+            case ET::BEGIN_CHAIN:
+                chain_stack.push(i);
+                break;
+            case ET::END_CHAIN:
+                assert(!chain_stack.empty());
+                other_bound[i] = chain_stack.top();
+                other_bound[chain_stack.top()] = i;
+                chain_stack.pop();
+                break;
+            case ET::BEGIN_SNARL:
+                snarl_stack.push(i);
+                break;
+            case ET::END_SNARL:
+                assert(!snarl_stack.empty());
+                other_bound[i] = snarl_stack.top();
+                other_bound[snarl_stack.top()] = i;
+                snarl_stack.pop();
+                break;
+            }
+        }
+    }
+
+    // Step 3: Walk through events with a cursor, flipping chains as needed.
+    // When we flip a chain, we jump to the other end and reverse direction,
+    // pushing the entry point onto a stack. When the cursor reaches a stack
+    // entry point, we jump back to the far end and restore direction.
+    struct FlipEntry {
+        size_t entry_index;
+        bool original_reverse;
+    };
+    std::stack<FlipEntry> flip_stack;
+
+    auto emitter = event_emitter(begin_chain, end_chain, begin_snarl, end_snarl);
+
+    bool reverse = false;
+    for (size_t cursor = 0; cursor != events.size(); cursor += reverse ? -1 : 1) {
+        // We know if we're entering a chain, we can't be at a stack pop point.
+        // So we can handle those cases separately.
+
+        if (events[cursor].type == (reverse ? ET::END_CHAIN : ET::BEGIN_CHAIN) && 
+            should_flip(graph->get_id(events[cursor].handle))) {
+            
+            // We're entering a chain, and this is a chain we want to flip. So
+            // flip before emitting anything.
+
+            // Flip: remember where we entered, jump to the other end,
+            // reverse direction, emit the entry event there.
+            flip_stack.push({cursor, reverse});
+            cursor = other_bound[cursor];
+            reverse = !reverse;
+        }
+        
+        // Emit the event here
+        emitter(reverse ? flip(events[cursor], graph) : events[cursor]);
+
+        if (!flip_stack.empty() && cursor == flip_stack.top().entry_index) {
+            // We've returned to the entry point of a flipped chain, so after
+            // emitting, go back to the entry orientation and jump to the other
+            // side, so we can advance out of it. 
+            
+            FlipEntry entry = flip_stack.top();
+            flip_stack.pop();
+            cursor = other_bound[entry.entry_index];
+            reverse = entry.original_reverse;
+        }
+    }
+}
+
+// ReplaySnarlFinder implementation
+
+ReplaySnarlFinder::ReplaySnarlFinder(const HandleGraph* graph, const std::vector<DecompositionEvent>& events) : HandleGraphSnarlFinder(graph) {
+    this->events.reserve(events.size());
+    for (const DecompositionEvent& e : events) {
+        // Translate input events into handles
+        this->events.emplace_back(e.type, graph->get_handle(e.id, e.is_reverse));
+    }
+}
+
+void ReplaySnarlFinder::traverse_decomposition(
+    const std::function<void(handle_t)>& begin_chain,
+    const std::function<void(handle_t)>& end_chain,
+    const std::function<void(handle_t)>& begin_snarl,
+    const std::function<void(handle_t)>& end_snarl) const
+{
+    auto emitter = event_emitter(begin_chain, end_chain, begin_snarl, end_snarl);
+    for (auto& event : events) {
+        emitter(event);
+    }
+}
+
+std::function<void(const DecompositionHandleEvent&)> event_emitter(
+    const std::function<void(handle_t)>& begin_chain,
+    const std::function<void(handle_t)>& end_chain,
+    const std::function<void(handle_t)>& begin_snarl,
+    const std::function<void(handle_t)>& end_snarl
+) {
+    return [&](const DecompositionHandleEvent& event) { 
+        switch (event.type) {
+        case ET::BEGIN_CHAIN:
+            begin_chain(event.handle);
+            break;
+        case ET::END_CHAIN:
+            end_chain(event.handle);
+            break;
+        case ET::BEGIN_SNARL:
+            begin_snarl(event.handle);
+            break;
+        case ET::END_SNARL:
+            end_snarl(event.handle);
+            break;
+        }
+    };
+}
+
+std::vector<DecompositionEvent> capture_events(const HandleGraphSnarlFinder& finder, const HandleGraph& graph) {
+    // Get all the events in terms of handles
+    std::vector<DecompositionHandleEvent> handle_result = capture_events(finder);
+    // And translate them to IDs and orientations
+    std::vector<DecompositionEvent> result;
+    result.reserve(handle_result.size());
+    for (DecompositionHandleEvent& e : handle_result) {
+        result.emplace_back(e.type, graph.get_id(e.handle), graph.get_is_reverse(e.handle));
+    }
+    return result;
+}
+
+std::vector<DecompositionHandleEvent> capture_events(const HandleGraphSnarlFinder& finder) {
+    std::vector<DecompositionHandleEvent> result;
+    // Mint out functions that push events of different types.
+    auto event_pusher = [&result](ET event) {
+        return [event,&result](const handle_t& h) {
+            result.push_back({event, h});
+        };
+    };
+    finder.traverse_decomposition(
+        event_pusher(ET::BEGIN_CHAIN),
+        event_pusher(ET::END_CHAIN),
+        event_pusher(ET::BEGIN_SNARL),
+        event_pusher(ET::END_SNARL)
+    );
+    return result;
+}
+
+} // namespace unittest
+} // namespace vg
diff --git a/src/unittest/support/snarl_decomposition_fuzzer.hpp b/src/unittest/support/snarl_decomposition_fuzzer.hpp
new file mode 100644
index 0000000000..91d92e97cb
--- /dev/null
+++ b/src/unittest/support/snarl_decomposition_fuzzer.hpp
@@ -0,0 +1,197 @@
+#ifndef VG_UNITTEST_SNARL_DECOMPOSITION_FUZZER_HPP_INCLUDED
+#define VG_UNITTEST_SNARL_DECOMPOSITION_FUZZER_HPP_INCLUDED
+
+/**
+ * \file snarl_decomposition_fuzzer.hpp
+ * Provides SnarlDecompositionFuzzer, which wraps a HandleGraphSnarlFinder and
+ * randomly flips chains in the snarl decomposition, and ReplaySnarlFinder,
+ * which replays a scripted sequence of decomposition events.
+ */
+
+#include <functional>
+#include <random>
+#include <vector>
+#include <set>
+#include <utility>
+#include "snarls.hpp"
+#include "handle.hpp"
+
+namespace vg {
+namespace unittest {
+
+/// Event types for snarl decomposition traversal.
+enum class DecompositionEventType {
+    BEGIN_CHAIN = 0,
+    END_CHAIN,
+    BEGIN_SNARL,
+    END_SNARL
+};
+
+inline std::ostream& operator<<(std::ostream& out, const DecompositionEventType& t) {
+    int bits = (int)t;
+    return out << (bits & 1 ? "END" : "BEGIN") << "_" << (bits & 2 ? "SNARL" : "CHAIN");
+}
+
+/// Flip the polatiry of an event type (start vs. end)
+inline DecompositionEventType flip(const DecompositionEventType& t) {
+    // We can flip by toggling the low bit.
+    return (DecompositionEventType)((int) t ^ 1);
+}
+
+/// A single event in a snarl decomposition traversal.
+/// This is in terms of IDs and orientations because those are easier to write in test code.
+struct DecompositionEvent {
+    DecompositionEventType type;
+    nid_t id;
+    bool is_reverse;
+
+    inline bool operator==(const DecompositionEvent& other) const {
+        return type == other.type && id == other.id && is_reverse == other.is_reverse;
+    }
+
+    inline bool operator!=(const DecompositionEvent& other) const {
+        return ! (*this == other);
+    }
+};
+
+inline std::ostream& operator<<(std::ostream& out, const DecompositionEvent& e) {
+    return out << e.type << "(" << e.id << (e.is_reverse ? "-" : "+") << ")";
+}
+
+/// A single event in a snarl decomposition traversal.
+/// This is in terms of handles because those are easier to work with internally.
+struct DecompositionHandleEvent {
+    DecompositionEventType type;
+    handle_t handle;
+};
+
+/// Flip the polarity of a whole event (event type between begin and end, and handle orientation)
+inline DecompositionHandleEvent flip(const DecompositionHandleEvent& e, const HandleGraph* g) {
+    return {flip(e.type), g->flip(e.handle)};
+}
+
+/// Turn begin and end functions to call into a function that emits an event by
+/// type. The provided functions must outlive the returned function.
+std::function<void(const DecompositionHandleEvent&)> event_emitter(
+    const std::function<void(handle_t)>& begin_chain,
+    const std::function<void(handle_t)>& end_chain,
+    const std::function<void(handle_t)>& begin_snarl,
+    const std::function<void(handle_t)>& end_snarl
+);
+
+/// Capture all events emitted by a snarl finder, in terms of IDs and orientations.
+std::vector<DecompositionEvent> capture_events(const HandleGraphSnarlFinder& finder, const HandleGraph& graph);
+
+/// Capture all events emitted by a snarl finder, in terms of handles.
+std::vector<DecompositionHandleEvent> capture_events(const HandleGraphSnarlFinder& finder);
+
+/**
+ * A HandleGraphSnarlFinder that wraps another HandleGraphSnarlFinder and
+ * randomly flips chains in the snarl decomposition. Flipping a chain reverses
+ * the entire chain including all children; if a child chain is also selected
+ * for flipping, it gets flipped again (canceling the parent's flip for that
+ * child).
+ *
+ * For non-randomized testing, the specific chains to flip can be 
+ * pre-identified and provided on construction.
+ */
+class SnarlDecompositionFuzzer : public HandleGraphSnarlFinder {
+public:
+    /**
+     * Construct a fuzzer wrapping the given finder, flipping chains with
+     * probability p_flip using the given random generator.
+     * The graph pointer is needed to flip handles.
+     */
+    template<typename URNG>
+    SnarlDecompositionFuzzer(const HandleGraph* graph,
+                             const HandleGraphSnarlFinder* finder,
+                             double p_flip, URNG& generator);
+
+    /**
+     * Construct a fuzzer wrapping the given finder, flipping the chains
+     * bounded by the given node IDs.
+     *
+     * You should provide both bounding IDs for each chain, but only the one
+     * that the chain is actually arrived at through during the traversal will
+     * really get used.
+     *
+     * Note that a node can bound at most one chain.
+     *
+     * This is mostly for testing the fuzzer itself.
+     */
+    SnarlDecompositionFuzzer(const HandleGraph* graph,
+                             const HandleGraphSnarlFinder* finder,
+                             const std::unordered_set<nid_t>& chains_to_flip);
+
+    virtual ~SnarlDecompositionFuzzer() = default;
+
+    /**
+     * Traverse the snarl decomposition, flipping selected chains.
+     */
+    virtual void traverse_decomposition(
+        const std::function<void(handle_t)>& begin_chain,
+        const std::function<void(handle_t)>& end_chain,
+        const std::function<void(handle_t)>& begin_snarl,
+        const std::function<void(handle_t)>& end_snarl
+    ) const override;
+
+private:
+    /// The wrapped snarl finder
+    const HandleGraphSnarlFinder* wrapped;
+
+    /// Function that decides whether to flip a chain, given either of its
+    /// bounding node IDs. May produce different results when called
+    /// multiple times with the same input.
+    std::function<bool(nid_t)> should_flip;
+};
+
+/**
+ * A HandleGraphSnarlFinder that replays a scripted sequence of decomposition
+ * events. Useful for testing SnarlDecompositionFuzzer without needing a real
+ * graph or snarl finder.
+ */
+class ReplaySnarlFinder : public HandleGraphSnarlFinder {
+public:
+    /**
+     * Construct a replay finder that will emit the given events.
+     */
+    ReplaySnarlFinder(const HandleGraph* graph, const std::vector<DecompositionEvent>& events);
+
+    virtual ~ReplaySnarlFinder() = default;
+
+    /**
+     * Replay the scripted events.
+     */
+    virtual void traverse_decomposition(
+        const std::function<void(handle_t)>& begin_chain,
+        const std::function<void(handle_t)>& end_chain,
+        const std::function<void(handle_t)>& begin_snarl,
+        const std::function<void(handle_t)>& end_snarl
+    ) const override;
+
+private:
+
+    using EventType = DecompositionEventType;
+    using Event = DecompositionHandleEvent;
+    
+    /// This stores events we are going to replay.
+    std::vector<Event> events;
+};
+
+
+template<typename URNG>
+SnarlDecompositionFuzzer::SnarlDecompositionFuzzer(
+    const HandleGraph* graph,
+    const HandleGraphSnarlFinder* finder,
+    double p_flip, URNG& generator)
+    : HandleGraphSnarlFinder(graph), wrapped(finder)
+{
+    should_flip = [&generator, p_flip](nid_t ignored) -> bool {
+        return std::uniform_real_distribution<double>(0.0, 1.0)(generator) < p_flip;
+    };
+}
+
+} // namespace unittest
+} // namespace vg
+
+#endif
diff --git a/src/unittest/variant_adder.cpp b/src/unittest/variant_adder.cpp
index afe3353e4b..6fad7d82ab 100644
--- a/src/unittest/variant_adder.cpp
+++ b/src/unittest/variant_adder.cpp
@@ -9,7 +9,7 @@
 
 #include "../utility.hpp"
 #include "../path.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 
 #include <vector>
 #include <sstream>
@@ -38,7 +38,7 @@ ref	5	rs1337	A	G	29	PASS	.	GT
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -51,14 +51,10 @@ ref	5	rs1337	A	G	29	PASS	.	GT
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     
     // Make a VariantAdder
@@ -85,7 +81,7 @@ ref	5	rs1337	A	G	29	PASS	.	GT	0/1
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -98,14 +94,10 @@ ref	5	rs1337	A	G	29	PASS	.	GT	0/1
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -139,7 +131,7 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -152,14 +144,10 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -193,7 +181,7 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -213,14 +201,10 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	A	29	PASS	.	GT	0/1
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     SECTION ("should work when the graph is as given") {
     
@@ -280,7 +264,7 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	29
 
     // Make a stream out of the data
     std::stringstream vcf_stream(vcf_data);
-    
+
     // Load it up in vcflib
     vcflib::VariantCallFile vcf;
     vcf.open(vcf_stream);
@@ -293,14 +277,10 @@ ref	5	rs1337	AAAAAAAAAAAAAAAAAAAAA	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	29
             ]}
         ]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -323,14 +303,10 @@ TEST_CASE( "The smart aligner works on very large inserts", "[variantadder]" ) {
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GCGCAAAAAAAAAAAAAAAAAAAAAGCGC"}]
     })";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -396,21 +372,17 @@ TEST_CASE( "The smart aligner should use mapping offsets on huge deletions", "[v
             {"from": 2, "to": 3}
         ]
     })";
-    
+
     // Make the graph have lots of As
     stringstream a_stream;
     for(size_t i = 0; i < 10000; i++) {
         a_stream << "A";
     }
     graph_json = regex_replace(graph_json, std::regex("<10kAs>"), a_stream.str());
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -484,21 +456,17 @@ TEST_CASE( "The smart aligner should find existing huge deletions", "[variantadd
             {"from": 2, "to": 3}
         ]
     })";
-    
+
     // Make the graph have lots of As
     stringstream a_stream;
     for(size_t i = 0; i < 10000; i++) {
         a_stream << "A";
     }
     graph_json = regex_replace(graph_json, std::regex("<10kAs>"), a_stream.str());
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
@@ -564,21 +532,17 @@ TEST_CASE( "The smart aligner should use deletion edits on medium deletions", "[
     string graph_json = R"({
         "node": [{"id": 1, "sequence": "GCGC<100As>GCGC"}]
     })";
-    
+
     // Make the graph have lots of As
     stringstream a_stream;
     for(size_t i = 0; i < 100; i++) {
         a_stream << "A";
     }
     graph_json = regex_replace(graph_json, std::regex("<100As>"), a_stream.str());
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Make it into a VG
     VG graph;
-    graph.extend(proto_graph);
+    json2graph(graph_json, &graph);
     
     // Make a VariantAdder
     VariantAdder adder(graph);
diff --git a/src/unittest/vg.cpp b/src/unittest/vg.cpp
index 9beb3e1ca7..b2795b57cc 100644
--- a/src/unittest/vg.cpp
+++ b/src/unittest/vg.cpp
@@ -8,6 +8,7 @@
 #include "../utility.hpp"
 #include "../algorithms/normalize.hpp"
 #include "../algorithms/disjoint_components.hpp"
+#include "../io/json2graph.hpp"
 #include "handle.hpp"
 
 namespace vg {
@@ -15,16 +16,6 @@ namespace unittest {
 
 using namespace std;
 
-// Turn a JSON string into a VG graph
-VG string_to_graph(const string& json) {
-    VG graph;
-    Graph chunk;
-    json2pb(chunk, json.c_str(), json.size());
-    graph.merge(chunk);
-    
-    return graph;
-}
-
 TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
    
     unordered_map<nid_t, pair<nid_t, bool> > node_translation;
@@ -44,7 +35,7 @@ TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         VG dag = graph.dagify(5, node_translation, 5, 0);
         
@@ -69,7 +60,7 @@ TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         VG dag = graph.dagify(5, node_translation, 5, 0);
         
@@ -93,7 +84,7 @@ TEST_CASE("dagify() should render the graph acyclic", "[vg][cycles][dagify]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         VG dag = graph.dagify(5, node_translation, 5, 0);
         
@@ -123,7 +114,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -252,7 +243,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -327,7 +318,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -417,7 +408,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -574,7 +565,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(10000, node_translation);
@@ -742,7 +733,7 @@ TEST_CASE("unfold() should properly unfold a graph out to the requested length",
         }
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         
         unordered_map<nid_t, pair<nid_t, bool> > node_translation;
         VG unfolded = graph.unfold(2, node_translation);
@@ -904,7 +895,7 @@ TEST_CASE("expand_context_by_length() should respect barriers", "[vg][context]")
     }
     )";
     
-    VG graph = string_to_graph(graph_json);
+    VG graph; vg::io::json2graph(graph_json, &graph);
 
     SECTION("barriers on either end of the seed node should stop anything being extracted") {
 
@@ -962,7 +953,7 @@ TEST_CASE("add_nodes_and_edges() should connect all nodes", "[vg][edit]") {
     )";
     
     // Define a graph
-    VG graph = string_to_graph(graph_json);
+    VG graph; vg::io::json2graph(graph_json, &graph);
     
     const string path_json = R"(
     {
@@ -1051,7 +1042,7 @@ TEST_CASE("edit() should not get confused even under very confusing circumstance
     )";
     
     // Define a graph
-    VG graph = string_to_graph(graph_json);
+    VG graph; vg::io::json2graph(graph_json, &graph);
     
     // And a path that doubles back on itself through an edge that isn't in the graph yet
     const string path_json = R"(
@@ -1310,7 +1301,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // One of the two alternative Ts should have been eliminated
@@ -1341,7 +1332,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts should be eliminated
@@ -1375,7 +1366,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts and Gs should be eliminated
@@ -1409,7 +1400,7 @@ TEST_CASE("normalize() can join nodes and merge siblings", "[vg][normalize]") {
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts and Gs should be eliminated
@@ -1447,7 +1438,7 @@ TEST_CASE("normalize() can join nodes and merge siblings when nodes are backward
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts (actually As) should be eliminated
@@ -1486,7 +1477,7 @@ TEST_CASE("normalize() can join nodes and merge siblings when nodes are backward
     
         )";
         
-        VG graph = string_to_graph(graph_json);
+        VG graph; vg::io::json2graph(graph_json, &graph);
         algorithms::normalize(&graph);
         
         // Those duplicate Ts (actually As) and Gs (actually Cs) should be eliminated
diff --git a/src/unittest/vg_algorithms.cpp b/src/unittest/vg_algorithms.cpp
index b4fc736734..8e713f87f7 100644
--- a/src/unittest/vg_algorithms.cpp
+++ b/src/unittest/vg_algorithms.cpp
@@ -27,7 +27,7 @@
 #include "../vg.hpp"
 #include "../xg.hpp"
 #include <bdsg/hash_graph.hpp>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 
 
 using namespace google::protobuf;
@@ -1092,11 +1092,8 @@ TEST_CASE( "Connecting graph extraction works on a cool loop without leaving ext
     {"edge": [{"from": "185927720", "to": "185927722"}, {"from": "185927721", "from_start": true, "to": "185927722"}, {"from": "185927722", "to": "186681786", "to_end": true}, {"from": "185927722", "to": "185927723"}, {"from": "186681786", "to": "186683083"}, {"from": "186681786", "from_start": true, "to": "186681787", "to_end": true}, {"from": "186681787", "to": "186683069", "to_end": true}, {"from": "186681787", "from_start": true, "to": "186681789"}, {"from": "186681787", "from_start": true, "to": "186681788", "to_end": true}, {"from": "186681788", "from_start": true, "to": "186681790", "to_end": true}, {"from": "186681789", "to": "186681790", "to_end": true}, {"from": "186681790", "from_start": true, "to": "186681792", "to_end": true}, {"from": "186683069", "from_start": true, "to": "186683079", "to_end": true}, {"from": "186683079", "from_start": true, "to": "186683080", "to_end": true}, {"from": "186683080", "from_start": true, "to": "186683081", "to_end": true}, {"from": "186683081", "from_start": true, "to": "186683083", "to_end": true}], "node": [{"id": "185927720", "sequence": "G"}, {"id": "185927721", "sequence": "A"}, {"id": "185927722", "sequence": "ACCGGG"}, {"id": "185927723", "sequence": "AGTGGGGG"}, {"id": "186681786", "sequence": "C"}, {"id": "186681787", "sequence": "TGGGAGTCTAAGTCTCTTTTGATCACACTTTAAAGACCAAAAGGTAGAAGCGCAAAGACGTTATCTGTCCAATATTACAAACCTAGTAAGTGGTGGAATTTGGCCTTGAACCCAGATCTGTAACTCCAGAGCCGAAGTGCTTCACCCACCTCCCTGTGGTG"}, {"id": "186681788", "sequence": "G"}, {"id": "186681789", "sequence": "T"}, {"id": "186681790", "sequence": "TAT"}, {"id": "186681792", "sequence": "T"}, {"id": "186683069", "sequence": "G"}, {"id": "186683079", "sequence": "G"}, {"id": "186683080", "sequence": "TACCCCGGAATCCCTGCCGCGGCCCCTCGGGCCTGTCCACATCCCTCTGCCCCTCCCAGACCTCTGTCCTTCCACCAATCGCCTCCCGCAGCCCCGAGCCGCCACTCCCAGTCCCCCGAGTCCCTGCCGCGCGCCCTCGCGCCTGTCCACATCCCTCTGCCCATCCGAGACCTCTGTCCTTACACCACTAGCCACCCCACGTGGGACTTCCATGGCTTCTGAGTACAAGGCCAGCCCCCCGGCCCACCAGCTTTCGGAATGCCTGCTTACCTCTTTTTCTGTAGA"}, {"id": "186683081", "sequence": "CCGG"}, {"id": "186683083", "sequence": "C"}]}
     )";
             
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG vg;
-    vg.extend(source);
+    vg::io::json2graph(graph_json, &vg);
             
     bdsg::HashGraph extractor;
             
@@ -1688,11 +1685,8 @@ TEST_CASE( "Connecting graph extraction works on a particular case without leavi
         
             )";
             
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG vg;
-    vg.extend(source);
+    vg::io::json2graph(graph_json, &vg);
             
     VG extractor;
             
@@ -2583,13 +2577,9 @@ TEST_CASE( "Topological sort works on a more complex graph",
             {"node": [{"id": 1, "sequence": "GTATTTTTAGTA"}, {"id": 2, "sequence": "G"}, {"id": 3, "sequence": "GAGACGGGGTTTCACCATGTT"}, {"id": 4, "sequence": "T"}, {"id": 5, "sequence": "CTAATTTTT"}, {"id": 6, "sequence": "CA"}, {"id": 7, "sequence": "GG"}, {"id": 8, "sequence": "ACGCCC"}, {"id": 9, "sequence": "C"}, {"id": 10, "sequence": "T"}, {"id": 11, "sequence": "C"}, {"id": 12, "sequence": "GCCA"}, {"id": 13, "sequence": "A"}, {"id": 14, "sequence": "GGGATTACAGGCGCACACC"}, {"id": 15, "sequence": "CCACACC"}, {"id": 16, "sequence": "AT"}, {"id": 17, "sequence": "CC"}, {"id": 18, "sequence": "GGTCAGGCTGGTCTCGACTCC"}, {"id": 19, "sequence": "TGACCTCCTGATCTGCCCCCC"}, {"id": 20, "sequence": "A"}, {"id": 21, "sequence": "G"}, {"id": 22, "sequence": "TATTTTTAGTA"}, {"id": 23, "sequence": "A"}, {"id": 24, "sequence": "G"}, {"id": 25, "sequence": "GA"}], "edge": [{"from": 4, "to": 1}, {"from": 5, "to": 1}, {"from": 1, "to": 2}, {"from": 1, "to": 3}, {"from": 22, "to": 2}, {"from": 2, "to": 20}, {"from": 2, "to": 21}, {"from": 3, "to": 18}, {"from": 5, "to": 4}, {"from": 6, "to": 5}, {"from": 7, "to": 5}, {"from": 8, "to": 6}, {"from": 8, "to": 7}, {"from": 9, "to": 8}, {"from": 10, "to": 8}, {"from": 11, "to": 9}, {"from": 11, "to": 10}, {"from": 12, "to": 11}, {"from": 13, "to": 11}, {"from": 16, "to": 12}, {"from": 17, "to": 12}, {"from": 12, "to": 15}, {"from": 14, "to": 13}, {"from": 18, "to": 19}, {"from": 20, "to": 25}, {"from": 21, "to": 25}, {"from": 23, "to": 22}, {"from": 24, "to": 22}]}
             )";
             
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-            
-    // Make it into a VG
+    // Load the JSON into a VG
     VG vg;
-    vg.extend(proto_graph);
+    vg::io::json2graph(graph_json, &vg);
             
     SECTION( "handlealgs::topological_order produces a consistent total ordering and orientation" ) {
         auto handle_sort = handlealgs::topological_order(&vg);
@@ -5385,11 +5375,8 @@ TEST_CASE("simplify_siblings() works on a graph with a reversing self loop", "[a
         {"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "2"}, {"from": "2", "to": "2", "to_end": true}], "node": [{"id": "1", "sequence": "T"}, {"id": "2", "sequence": "A"}, {"id": "3", "sequence": "ACA"}], "path": [{"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "2"}, "rank": "2"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"is_reverse": true, "node_id": "2"}, "rank": "3"}], "name": "x"}, {"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 3, "to_length": 3}], "position": {"node_id": "3"}, "rank": "2"}], "name": "y"}]}
     )";
     
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG graph;
-    graph.extend(source);
+    vg::io::json2graph(graph_json, &graph);
             
     
     
@@ -5405,11 +5392,8 @@ TEST_CASE("simplify_siblings() works on a smaller graph with a reversing self lo
         {"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "2"}, {"from": "2", "to": "2", "to_end": true}], "node": [{"id": "1", "sequence": "T"}, {"id": "2", "sequence": "A"}, {"id": "3", "sequence": "A"}], "path": [{"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "2"}, "rank": "2"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"is_reverse": true, "node_id": "2"}, "rank": "3"}], "name": "x"}]}
     )";
     
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG graph;
-    graph.extend(source);
+    vg::io::json2graph(graph_json, &graph);
             
     
     
@@ -5425,11 +5409,8 @@ TEST_CASE("normalize() works on a graph with a reversing self loop", "[algorithm
         {"edge": [{"from": "1", "to": "3"}, {"from": "1", "to": "2"}, {"from": "2", "to": "2", "to_end": true}], "node": [{"id": "1", "sequence": "T"}, {"id": "2", "sequence": "A"}, {"id": "3", "sequence": "ACA"}], "path": [{"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "2"}, "rank": "2"}, {"edit": [{"from_length": 1, "to_length": 1}], "position": {"is_reverse": true, "node_id": "2"}, "rank": "3"}], "name": "x"}, {"mapping": [{"edit": [{"from_length": 1, "to_length": 1}], "position": {"node_id": "1"}, "rank": "1"}, {"edit": [{"from_length": 3, "to_length": 3}], "position": {"node_id": "3"}, "rank": "2"}], "name": "y"}]}
     )";
     
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-            
     VG graph;
-    graph.extend(source);
+    vg::io::json2graph(graph_json, &graph);
             
     
     
diff --git a/src/unittest/vpkg.cpp b/src/unittest/vpkg.cpp
index 51a849c446..977814ff9c 100644
--- a/src/unittest/vpkg.cpp
+++ b/src/unittest/vpkg.cpp
@@ -13,7 +13,7 @@
 #include "xg.hpp"
 #include "../vg.hpp"
 #include "../snarl_seed_clusterer.hpp"
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include <gcsa/gcsa.h>
 #include <sstream>
 #include <tuple>
@@ -50,12 +50,12 @@ TEST_CASE("We can read and write XG", "[vpkg][handlegraph][xg]") {
     )";
     
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph hash_graph;
+    vg::io::json2graph(graph_json, &hash_graph);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(hash_graph);
 
     stringstream ss;
     
@@ -148,13 +148,10 @@ TEST_CASE("We can read VG from a VPKG-wrapped stream as a VG", "[vpkg][handlegra
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
@@ -179,13 +176,10 @@ TEST_CASE("We can read VG from a VPKG-wrapped stream as a HandleGraph which is a
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
@@ -210,13 +204,10 @@ TEST_CASE("We can read VG from a VPKG-wrapped stream as a HandleGraph which is a
 
 TEST_CASE("We can read an empty VG as a HandleGraph", "[vpkg][handlegraph][vg][empty]") {
     string graph_json = "{}";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
@@ -240,13 +231,10 @@ TEST_CASE("We prefer to read a graph as the first provided type that matches", "
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
-    // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
-    // Build the VG
-    vg::VG vg_graph(proto_graph);
+
+    // Load the JSON directly into VG
+    vg::VG vg_graph;
+    vg::io::json2graph(graph_json, &vg_graph);
     
     // Save it
     stringstream ss;
diff --git a/src/unittest/xdrop_aligner.cpp b/src/unittest/xdrop_aligner.cpp
index f745b8f66a..07577e4479 100644
--- a/src/unittest/xdrop_aligner.cpp
+++ b/src/unittest/xdrop_aligner.cpp
@@ -5,7 +5,7 @@
 
 #include <iostream>
 #include <string>
-#include "vg/io/json2pb.h"
+#include "../io/json2graph.hpp"
 #include "../alignment.hpp"
 #include "../vg.hpp"
 #include <vg/vg.pb.h>
@@ -764,12 +764,9 @@ TEST_CASE("QualAdjXdropAligner will not penalize a low quality mismatch", "[xdro
 TEST_CASE("XdropAligner doesn't crash on a case where it is hard to find a seed", "[xdrop][alignment][mapping]") {
     
     string graph_json = R"({"edge": [{"from": "92345167", "to": "92345168"}, {"from": "92345182", "to": "92345183"}, {"from": "92345165", "to": "92345166"}, {"from": "92345177", "to": "92345178"}, {"from": "92345171", "to": "92345172"}, {"from": "92345161", "to": "92345162"}, {"from": "92345183", "to": "92345184"}, {"from": "92345181", "to": "92345182"}, {"from": "92345178", "to": "92345179"}, {"from": "92345166", "to": "92345167"}, {"from": "92345179", "to": "92345180"}, {"from": "92345173", "to": "92345174"}, {"from": "92345184", "to": "92345185"}, {"from": "92345169", "to": "92345170"}, {"from": "92345185", "to": "92345186"}, {"from": "92345160", "to": "92345161"}, {"from": "92345174", "to": "92345175"}, {"from": "92345162", "to": "92345163"}, {"from": "92345175", "to": "92345176"}, {"from": "92345168", "to": "92345169"}, {"from": "92345163", "to": "92345164"}, {"from": "92345172", "to": "92345173"}, {"from": "92345180", "to": "92345181"}, {"from": "92345176", "to": "92345177"}, {"from": "92345170", "to": "92345171"}, {"from": "92345164", "to": "92345165"}], "node": [{"id": "92345167", "sequence": "TTTATATATATATATTTATATATATATATTTA"}, {"id": "92345182", "sequence": "TATATATATTTATATATATATTTATATATATA"}, {"id": "92345165", "sequence": "ATATATATATATTTATATATATTTATATATTA"}, {"id": "92345177", "sequence": "TTTATATATATATTTATATATATATATTATAT"}, {"id": "92345171", "sequence": "TTATATATATATTTATATATATATTTATATAT"}, {"id": "92345161", "sequence": "ATATATTTATATATTTTTATATATTATATATT"}, {"id": "92345183", "sequence": "TTTATATATATTTATATATATATTTATATATA"}, {"id": "92345181", "sequence": "ATATATTATATATATATTTATATATATATTTA"}, {"id": "92345178", "sequence": "ATATATTTATATATATATTTATATATATATTT"}, {"id": "92345166", "sequence": "TTTATATATATTTATATATATATTTATATATA"}, {"id": "92345179", "sequence": "ATATATATATTTATATATATATTTATATATAT"}, {"id": "92345173", "sequence": "ATATTTATATATATATATTTATATATATATTT"}, {"id": "92345184", "sequence": "TATTTATATATATATTTATATATATTTATATA"}, {"id": "92345169", "sequence": "TTTATATATATATTTATATATATATTTATATA"}, {"id": "92345185", "sequence": "TATATTTATATATATATATATATATTTATATA"}, {"id": "92345160", "sequence": "ATTTATATATATATTTATATATATATTTATAT"}, {"id": "92345174", "sequence": "ATATATATATTTATATATATATTATTTATATA"}, {"id": "92345162", "sequence": "TATATATATATTTATATATTATATATATATTT"}, {"id": "92345175", "sequence": "TATATTTATATATATATTATATATATATTTAT"}, {"id": "92345168", "sequence": "TATATATATTTATATATATATTTATATATATA"}, {"id": "92345163", "sequence": "ATATATTTATATATATATTTATATATATTTAT"}, {"id": "92345172", "sequence": "ATATATATATATTTATATATATATTTATATAT"}, {"id": "92345180", "sequence": "ATTTATATATATATTTATATATATATTTATAT"}, {"id": "92345176", "sequence": "ATATATATATTATATATATATTTATATATATA"}, {"id": "92345170", "sequence": "TATATTTATATATATATATTATATATATATAT"}, {"id": "92345164", "sequence": "ATATATATTTATATATATTTATATATATATTT"}, {"id": "92345186", "sequence": "TATATTTATATATATTTATATATATATTTATA"}]})";
-    
-    Graph source;
-    json2pb(source, graph_json.c_str(), graph_json.size());
-    
-    VG graph;
-    graph.extend(source);
+
+    bdsg::HashGraph graph;
+    vg::io::json2graph(graph_json, &graph);
     
     Alignment aln;
     aln.set_sequence("CAGCACTTTGGGAGGCCAAGGTGGGTGGATCATCTGAGGTCAGGAGTTTGAGACCAGCCTGACCAACATGGTGAAATCCTGTCTCTACTGAAAATACTAAAATTAGCCAGGCGTGGCGGCCAGTGCCTGTAATCCCGGCTACTGGGGAGG");
diff --git a/src/unittest/xg.cpp b/src/unittest/xg.cpp
index d74db5d0b0..dfa913b8eb 100644
--- a/src/unittest/xg.cpp
+++ b/src/unittest/xg.cpp
@@ -8,7 +8,9 @@
 #include "vg.hpp"
 #include "xg.hpp"
 #include "graph.hpp"
+#include "../io/json2graph.hpp"
 #include "algorithms/subgraph.hpp"
+#include "bdsg/hash_graph.hpp"
 #include <stdio.h>
 
 namespace vg {
@@ -22,19 +24,18 @@ TEST_CASE("We can build an xg index on a nice graph", "[xg]") {
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     VG vg_graph;
     algorithms::extract_context(xg_index, vg_graph, xg_index.get_handle(1), 0, 100);
     Graph& graph = vg_graph.graph;
-    sort_by_id_dedup_and_clean(graph);
 
     REQUIRE(graph.node_size() == 2);
     REQUIRE(graph.edge_size() == 1);
@@ -49,19 +50,18 @@ TEST_CASE("We can build an xg index on a nasty graph", "[xg]") {
     {"id":9999,"sequence":"AAA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     VG vg_graph;
     algorithms::extract_context(xg_index, vg_graph, xg_index.get_handle(1), 0, 100);
     Graph& graph = vg_graph.graph;
-    sort_by_id_dedup_and_clean(graph);
 
     REQUIRE(graph.node_size() == 2);
     REQUIRE(graph.edge_size() == 1);
@@ -161,15 +161,14 @@ TEST_CASE("We can build an xg index on a very nasty graph", "[xg]") {
     {"position":{"node_id":1444},"rank":1059},
     {"position":{"node_id":1445},"rank":1060}]}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    VG source;
+    vg::io::json2graph(graph_json, &source);
 
-    sort_by_id_dedup_and_clean(proto_graph);
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     SECTION("Context extraction gets something") {
         VG graph;
@@ -182,7 +181,7 @@ TEST_CASE("We can build an xg index on a very nasty graph", "[xg]") {
         
         SECTION("We can extract within a single node") {
             algorithms::extract_path_range(xg_index, xg_index.get_path_handle("17"), 5, 15, graph);
-            
+
             // We should just get node 1416
             REQUIRE(graph.graph.node_size() == 1);
             REQUIRE(graph.graph.node(0).id() == 1416);
@@ -265,14 +264,14 @@ TEST_CASE("We can build and scan an XG index for a problematic graph", "[xg]") {
       ]}
     ]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
 
     // Build the xg index (without any sorting)
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     REQUIRE(xg_index.get_node_count() == 5);
     
@@ -300,18 +299,16 @@ TEST_CASE("We can build the xg index on a small graph with discontinuous node id
     )";
 
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    VG source;
+    vg::io::json2graph(graph_json, &source);
 
-    sort_by_id_dedup_and_clean(proto_graph);
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     VG vg_graph;
     algorithms::extract_context(xg_index, vg_graph, xg_index.get_handle(10), 0, 100);
     Graph& graph = vg_graph.graph;
-    sort_by_id_dedup_and_clean(graph);
 
     REQUIRE(graph.node_size() == 2);
     REQUIRE(graph.edge_size() == 1);
@@ -326,14 +323,14 @@ TEST_CASE("Looping over XG handles in parallel works", "[xg]") {
     {"id":2,"sequence":"ACA"}],
     "edge":[{"to":2,"from":1}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
-    
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
+
     // Build the xg index
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     size_t count = 0;
 
@@ -341,7 +338,7 @@ TEST_CASE("Looping over XG handles in parallel works", "[xg]") {
         #pragma omp critical
         count++;
     }, true);
-    
+
     REQUIRE(count == 2);
 
 }
@@ -400,14 +397,14 @@ TEST_CASE("Vectorization of xg works correctly", "[xg]") {
             {"edit": [{"from_length": 11, "to_length": 11}], "position": {"node_id": "15"}, "rank": "10"}
         ], "name": "x"}]}
     )";
-    
+
     // Load the JSON
-    Graph proto_graph;
-    json2pb(proto_graph, graph_json.c_str(), graph_json.size());
+    bdsg::HashGraph source;
+    vg::io::json2graph(graph_json, &source);
 
     // Build the xg index (without any sorting)
     xg::XG xg_index;
-    xg_index.from_path_handle_graph(VG(proto_graph));
+    xg_index.from_path_handle_graph(source);
 
     REQUIRE(xg_index.get_node_count() == 15);
     
diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp
index dc3255e984..1d0a2c39c7 100644
--- a/src/unittest/zip_code.cpp
+++ b/src/unittest/zip_code.cpp
@@ -117,6 +117,10 @@ using namespace std;
         bool chain_is_reversed = distance_index.is_reversed_in_parent(
                                                 distance_index.get_node_net_handle(n1->id()));
 
+        // Node 4 is in snarl 3 to 6 which should be regular.
+        // The zip codes are going to encode this so it had better be true.
+        REQUIRE(distance_index.is_regular_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n4->id())))));
+
         SECTION ("zip code for node on top-level chain") {
             ZipCode zipcode;
             zipcode.fill_in_zipcode_from_pos(distance_index, make_pos_t(n1->id(), 0, false));
diff --git a/src/zip_code.cpp b/src/zip_code.cpp
index 4699a24494..051602443f 100644
--- a/src/zip_code.cpp
+++ b/src/zip_code.cpp
@@ -1,3 +1,5 @@
+#include "crash.hpp"
+
 #include "zip_code.hpp"
 
 //#define DEBUG_ZIPCODE
@@ -16,10 +18,11 @@ void ZipCode::fill_in_zipcode_from_pos(const SnarlDistanceIndex& distance_index,
     //Put all ancestors of the node in a vector, starting from the node, and not including the root
     while (!distance_index.is_root(current_handle)) {
         ancestors.emplace_back(distance_index.start_end_traversal_of(current_handle));
-        current_handle = distance_index.get_parent(current_handle);
+        net_handle_t parent_handle = distance_index.get_parent(current_handle);
+        crash_unless(parent_handle != current_handle);
+        current_handle = parent_handle;
     }
 
-
     //Now add the root-level snarl or chain
     if (distance_index.is_root_snarl(current_handle)) {
         //First thing is a snarl, so add the snarl's connected component number
@@ -121,7 +124,7 @@ void ZipCode::fill_in_zipcode_from_pos(const SnarlDistanceIndex& distance_index,
                 }
                 return;
             }
-        } else if (distance_index.is_regular_snarl(current_ancestor, false, graph_ptr)) {
+        } else if (distance_index.is_regular_snarl(current_ancestor)) {
             snarl_code_t snarl_code = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); 
             zipcode.add_value(snarl_code.get_raw_code_type());
             zipcode.add_value(snarl_code.get_raw_prefix_sum_or_identifier());
@@ -1065,11 +1068,7 @@ ZipCode::snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl,
     snarl_code.set_code_type(1);
 
     //The number of children
-    size_t child_count = 0;
-    distance_index.for_each_child(snarl, [&] (const net_handle_t& child) {
-        child_count++;
-    });
-    snarl_code.set_child_count(child_count);
+    snarl_code.set_child_count(distance_index.get_snarl_child_count(snarl));
 
     //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node
     net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false));
@@ -1100,11 +1099,7 @@ ZipCode::snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snar
     snarl_code.set_code_type(distance_index.is_dag(snarl) ? 0 : 2);
 
     //The number of children
-    size_t child_count = 0;
-    distance_index.for_each_child(snarl, [&] (const net_handle_t& child) {
-        child_count++;
-    });
-    snarl_code.set_child_count(child_count);
+    snarl_code.set_child_count(distance_index.get_snarl_child_count(snarl));
 
     //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node
     net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false));
diff --git a/test/t/50_vg_giraffe.t b/test/t/50_vg_giraffe.t
index b1136dceef..5ec9a33612 100644
--- a/test/t/50_vg_giraffe.t
+++ b/test/t/50_vg_giraffe.t
@@ -124,6 +124,8 @@ is "$(grep -c 'error.*are not compatible' log.txt)" "1" "appropriate error messa
 rm t1.bam t2.bam t3.bam t1.gaf tagged1.fq tagged2.fq
 rm -f read.fq read.gam
 
+rm -rf explanation_*
+
 vg giraffe -Z x.giraffe.gbz -f reads/small.middle.ref.indel.multi.fq --show-work --track-position -b chaining-sr > /dev/null 2>&1
 # Check that at least some TSV files and directories were created 
 is "$(find explanation_read1 -name 'chain*-dotplot*.tsv' 2>/dev/null | wc -l | tr -d ' ')" "1" "Chain explanation files are created per chain"
@@ -297,8 +299,8 @@ vg index -j 1mb1kgp.dist  1mb1kgp.vg
 vg autoindex -p 1mb1kgp -w giraffe -P "VG w/ Variant Paths:1mb1kgp.vg" -P "Giraffe Distance Index:1mb1kgp.dist" -r 1mb1kgp/z.fa -v 1mb1kgp/z.vcf.gz
 vg giraffe -Z 1mb1kgp.giraffe.gbz -f reads/1mb1kgp_longread.fq >longread.gam -U 300 --track-provenance --align-from-chains --set-refpos
 # This is an 8001 bp read with 1 insert and 1 substitution
-# 7999 * 1 + 1 * -4 + -6 + 5 + 5 = 7999
-is "$(vg view -aj longread.gam | jq -r '.score')" "7999" "A long read can be correctly aligned"
+# We use minimap2-based scoring which awards that this many points.
+is "$(vg view -aj longread.gam | jq -r '.score')" "7948" "A long read can be correctly aligned"
 is "$(vg view -aj longread.gam | jq -c '.path.mapping[].edit[] | select(.sequence)' | wc -l | sed 's/^[[:space:]]*//')" "2" "A long read has the correct edits found"
 is "$(vg view -aj longread.gam | jq -c '. | select(.annotation["filter_3_cluster-coverage_cluster_passed_size_total"] <= 300)' | wc -l | sed 's/^[[:space:]]*//')" "1" "Long read minimizer set is correctly restricted"
 is "$(vg view -aj longread.gam | jq -c '.refpos[]' | wc -l)" "$(vg view -aj longread.gam | jq -c '.path.mapping[]' | wc -l)" "Giraffe sets refpos for each reference node"