From 4449d7e74ed37adb236bbf1eb87ef77c3e174ac6 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 26 May 2026 13:23:53 -0400 Subject: [PATCH 01/13] Use a libbdsg that has better opinions on paths that should exist through indexing overlays --- deps/libbdsg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/libbdsg b/deps/libbdsg index e74fb663a5..35d3ad24e4 160000 --- a/deps/libbdsg +++ b/deps/libbdsg @@ -1 +1 @@ -Subproject commit e74fb663a5f85bc1f76d159b2b3a3691ed85862f +Subproject commit 35d3ad24e4b7fa95e66d9033e553f9198638df47 From c942a093194d3225bbc843d15b29c4573f08e5dc Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 26 May 2026 14:38:19 -0400 Subject: [PATCH 02/13] Index target paths in vg find --- src/subcommand/find_main.cpp | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/subcommand/find_main.cpp b/src/subcommand/find_main.cpp index d35e874cbf..6556a55959 100644 --- a/src/subcommand/find_main.cpp +++ b/src/subcommand/find_main.cpp @@ -400,6 +400,28 @@ int main_find(int argc, char** argv) { } } + // Parse any targets + // handle targets from BED + if (!bed_targets_file.empty()) { + parse_bed_regions(bed_targets_file, targets); + } + // those given on the command line + for (auto& target : targets_str) { + Region region; + parse_region(target, region); + targets.push_back(region); + } + + // Find out paths we will need to make position queries on, in case they + // aren't already the right sense. + std::unordered_set required_position_paths; + for (const Region& r : targets) { + required_position_paths.insert(r.seq); + } + if (!path_name.empty()) { + required_position_paths.insert(path_name); + } + PathPositionHandleGraph* xindex = nullptr; unique_ptr path_handle_graph; bdsg::PathPositionOverlayHelper overlay_helper; @@ -407,7 +429,7 @@ int main_find(int argc, char** argv) { if (!xg_name.empty()) { path_handle_graph = vg::io::VPKG::load_one(xg_name); input_gfa = dynamic_cast(path_handle_graph.get()) != nullptr; - xindex = overlay_helper.apply(path_handle_graph.get()); + xindex = overlay_helper.apply(path_handle_graph.get(), required_position_paths); // Remove node ids that do not exist in the graph. std::vector final_ids; @@ -617,16 +639,6 @@ int main_find(int argc, char** argv) { cout << xindex->get_path_name(path_handle) << endl; }); } - // handle targets from BED - if (!bed_targets_file.empty()) { - parse_bed_regions(bed_targets_file, targets); - } - // those given on the command line - for (auto& target : targets_str) { - Region region; - parse_region(target, region); - targets.push_back(region); - } if (!targets.empty()) { auto output_graph = get_output_graph(); auto& graph = *output_graph; From b98d221c46bd3ee5e06df80319a5908dddea76e3 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 26 May 2026 14:40:46 -0400 Subject: [PATCH 03/13] Deprecate vg find -Q/--paths-named --- src/subcommand/find_main.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/subcommand/find_main.cpp b/src/subcommand/find_main.cpp index 6556a55959..d79eafd0dd 100644 --- a/src/subcommand/find_main.cpp +++ b/src/subcommand/find_main.cpp @@ -79,7 +79,8 @@ void help_find(char** argv) { << " -Z, --min-mem N minimum length of the MEM [1]" << endl << " -D, --distance return distance on path between pair of nodes (-n)" << endl << " if -P not used, best path chosen heurstically" << endl - << " -Q, --paths-named STR return all paths with name prefix STR (may repeat)" << endl; + << " -Q, --paths-named STR return all paths with name prefix STR (may repeat)" << endl + << " (deprecated)" << endl; } @@ -819,6 +820,7 @@ int main_find(int argc, char** argv) { vg::io::save_handle_graph(&graph, cout); } if (extract_paths) { + logger.warn() << "vg paths -Q/--paths-named is deprecated due to the partial Protobuf graph output format. Consider vg paths --extract-fasta instead." << std::end; for (auto& pattern : extract_path_patterns) { // We want to write uncompressed protobuf Graph objects containing our paths. From 772b0b9d15302de36e9a925c2ad1a003c98c8e20 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 26 May 2026 14:50:45 -0400 Subject: [PATCH 04/13] Make sure vg find indexing of requested paths works --- src/subcommand/find_main.cpp | 2 +- test/t/05_vg_find.t | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/subcommand/find_main.cpp b/src/subcommand/find_main.cpp index d79eafd0dd..5f9e41dfe6 100644 --- a/src/subcommand/find_main.cpp +++ b/src/subcommand/find_main.cpp @@ -820,7 +820,7 @@ int main_find(int argc, char** argv) { vg::io::save_handle_graph(&graph, cout); } if (extract_paths) { - logger.warn() << "vg paths -Q/--paths-named is deprecated due to the partial Protobuf graph output format. Consider vg paths --extract-fasta instead." << std::end; + logger.warn() << "vg paths -Q/--paths-named is deprecated due to the partial Protobuf graph output format. Consider vg paths --extract-fasta instead." << std::endl; for (auto& pattern : extract_path_patterns) { // We want to write uncompressed protobuf Graph objects containing our paths. diff --git a/test/t/05_vg_find.t b/test/t/05_vg_find.t index d085565505..45b003dca8 100644 --- a/test/t/05_vg_find.t +++ b/test/t/05_vg_find.t @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap PATH=../bin:$PATH # for vg -plan tests 30 +plan tests 31 vg construct -m 1000 -r small/x.fa -v small/x.vcf.gz >x.vg is $? 0 "construction" @@ -142,3 +142,9 @@ is $? 0 "find nodes that map to the provided node ids" rm -f x.vg x.gbwt x.mapping x.unfolded.vg rm -f expected.gfa found.gfa + +# We wish we could test specifically for which paths are indexed, but we can't. +# So we test to make sure at least paths that shouldn't normally be indexed are +# indexed when asked about. +is "$(vg find -n 5 -P sample1#1#chr1#0 -x graphs/gfa_with_reference.gfa | cut -f2)" "4" "vg find can find positions along non-reference paths asked about specifically" + From 574f4c4c9050ea3d4e0f3cfc76e994582f65ff5a Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 26 May 2026 17:11:40 -0400 Subject: [PATCH 05/13] Use a libbdsg that tests haplotype x overlay interactions --- deps/libbdsg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/libbdsg b/deps/libbdsg index 35d3ad24e4..5a602f2eaa 160000 --- a/deps/libbdsg +++ b/deps/libbdsg @@ -1 +1 @@ -Subproject commit 35d3ad24e4b7fa95e66d9033e553f9198638df47 +Subproject commit 5a602f2eaab5f4fe0f4cd4a5cba2b7b437743d4d From d0a2bc99d7aa2b18732cb237923fb3eb87d8c49a Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 16:41:55 -0400 Subject: [PATCH 06/13] Index the right paths in vg clip --- src/subcommand/clip_main.cpp | 75 ++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/src/subcommand/clip_main.cpp b/src/subcommand/clip_main.cpp index 9929c8e22c..9fdacef6d2 100644 --- a/src/subcommand/clip_main.cpp +++ b/src/subcommand/clip_main.cpp @@ -278,23 +278,59 @@ int main_clip(int argc, char** argv) { // need snarls if input regions are provided, or doing snarl based clipping bool need_snarls = snarl_option || !bed_path.empty(); - // TodO: FIX!! shouldn't need pp without BED coordinates + // TODO: FIX!! shouldn't need pp without BED coordinates need_pp = need_pp || need_snarls; + if (!bed_path.empty()) { + // load the BED file + parse_bed_regions(bed_path, bed_regions); + if (verbose) { + logger.info() << "Loaded " << bed_regions.size() << " BED regions" << endl; + } + } + + // It's going to be a little expensive to find the paths with basenames + // matching the prefixes (algorithmically, if not in practice), so we fill + // them in once and then re-use them. + std::vector graph_paths_matched; if (need_pp) { - pp_graph = overlay_helper.apply(graph.get()); + // Figure out the paths we're going to need regions on. + std::unordered_set position_path_names; + + for (const Region& region : bed_regions) { + // For each region already defined (from the BED), we need its sequence indexed + position_path_names.insert(region.seq); + } + + if (!ref_prefixes.empty()) { + // If we want all paths matching some prefixes + graph->for_each_path_handle([&](path_handle_t path_handle) { + // Look at all ther paths + std::string path_name = pp_graph->get_path_name(path_handle); + subrange_t subrange; + // And get their base path names + path_name = Paths::strip_subrange(path_name, &subrange); + for (const string& ref_prefix : ref_prefixes) { + if (path_name.compare(0, ref_prefix.length(), ref_prefix) == 0) { + // And make sure they're indexed if they match the prefix. + position_path_names.insert(path_name); + // And remember them to then index all of + graph_paths_matched.emplace_back(std::move(path_name)); + break; + } + } + }); + } + + // Get the overlay, indexing all the paths we will need to care about + pp_graph = overlay_helper.apply(graph.get(), position_path_names); if (verbose) { logger.info() << "Computed path position overlay of input graph" << endl; } } if (need_snarls) { - // load the BED file if (!bed_path.empty()) { - parse_bed_regions(bed_path, bed_regions); - if (verbose) { - logger.info() << "Loaded " << bed_regions.size() << " BED regions" << endl; - } // contig names left in this set are *not* in the graph unordered_set contig_set; for (const Region& region : bed_regions) { @@ -328,20 +364,17 @@ int main_clip(int argc, char** argv) { } else { assert(need_pp); assert(!ref_prefixes.empty()); - // load the BED regions from the reference path prefix - pp_graph->for_each_path_handle([&](path_handle_t path_handle) { - string path_name = pp_graph->get_path_name(path_handle); - subrange_t subrange; - path_name = Paths::strip_subrange(path_name, &subrange); - int64_t offset = subrange == PathMetadata::NO_SUBRANGE ? 0 : subrange.first; - for (const string& ref_prefix : ref_prefixes) { - if (path_name.compare(0, ref_prefix.length(), ref_prefix) == 0) { - Region region = {path_name, offset, offset + (int64_t)pp_graph->get_path_length(path_handle) - 1}; - bed_regions.push_back(region); - break; - } - } - }); + + for (auto& path_name : graph_paths_matched) { + path_handle_t path_handle = pp_graph->get_path_handle(path_name); + // Fill in the BED regions from the paths matching the + // prefixes, now that we can get the lengths. + std::string base_name = get_path_base_name(*pp_graph, path_handle); + int64_t offset = get_path_base_offset(*pp_graph, path_handle); + Region region = {base_name, offset, offset + (int64_t)pp_graph->get_path_length(path_handle) - 1}; + bed_regions.push_back(region); + } + if (verbose) { logger.info() << "Inferred " << bed_regions.size() << " BED regions from paths in the graph" << endl; From b846618a935757ee27f01d49ec60490bb3a20413 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 16:47:21 -0400 Subject: [PATCH 07/13] Index relevant paths in vg mcmc --- src/subcommand/mcmc_main.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/subcommand/mcmc_main.cpp b/src/subcommand/mcmc_main.cpp index eda6fe1fa8..432884757b 100644 --- a/src/subcommand/mcmc_main.cpp +++ b/src/subcommand/mcmc_main.cpp @@ -165,17 +165,13 @@ int main_mcmc(int argc, char** argv) { if(vg_graph == nullptr || vg_graph == 0) { logger.error() << "Graph is NULL" << endl; } - PathPositionHandleGraph* graph = nullptr; - graph = overlay_helper.apply(vg_graph); - // Check our paths for (const string& ref_path : ref_paths) { - if (!graph->has_path(ref_path)) { + if (!vg_graph->has_path(ref_path)) { logger.error() << "Reference path \"" << ref_path << "\" not found in graph" << endl; } } - // Check our offsets if (ref_path_offsets.size() != 0 && ref_path_offsets.size() != ref_paths.size()) { logger.error() << "when using -o, the same number paths must be given with -p" << endl; @@ -185,7 +181,18 @@ int main_mcmc(int argc, char** argv) { logger.error() << "when using -l, the same number paths must be given with -p" << endl; } - // No paths specified: use them all + PathPositionHandleGraph* graph = nullptr; + { + // Path-position index all the extra paths we need to work on, plus the + // reference and generic paths. + std::unordered_set target_paths; + for (auto& name : ref_paths) { + target_paths.insert(name); + } + graph = overlay_helper.apply(vg_graph, target_paths); + } + + // No paths specified: use all the reference and generic paths if (ref_paths.empty()) { graph->for_each_path_of_sense({PathSense::REFERENCE, PathSense::GENERIC}, [&](path_handle_t path_handle) { const string& name = graph->get_path_name(path_handle); @@ -195,7 +202,7 @@ int main_mcmc(int argc, char** argv) { }); } - + // Check if VCF output file is specified ofstream vcf_file_out; if(!vcf_out.empty()){ From 2394248e343e4a898479a044cddc1a1f626b08f6 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 16:57:15 -0400 Subject: [PATCH 08/13] Index truth paths in gamcompare --- src/subcommand/gampcompare_main.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/subcommand/gampcompare_main.cpp b/src/subcommand/gampcompare_main.cpp index 01a5d59717..cac5340695 100644 --- a/src/subcommand/gampcompare_main.cpp +++ b/src/subcommand/gampcompare_main.cpp @@ -124,10 +124,7 @@ int main_gampcompare(int argc, char** argv) { } path_handle_graph = vg::io::VPKG::load_one(graph_stream); } - - bdsg::PathPositionOverlayHelper overlay_helper; - PathPositionHandleGraph* path_position_handle_graph = overlay_helper.apply(path_handle_graph.get()); - + // We will collect all the truth positions string_hash_map > > > true_positions; function record_truth = [&true_positions](Alignment& aln) { @@ -152,6 +149,20 @@ int main_gampcompare(int argc, char** argv) { } vg::io::for_each_parallel(truth_file_in, record_truth); } + + // Once we know the truth positions we know the paths we need to index + std::unordered_set truth_paths; + for (auto& aln_positions : true_positions) { + for (auto& path_and_positions : aln_positions.second) { + truth_paths.insert(path_and_positions.first); + } + } + + + bdsg::PathPositionOverlayHelper overlay_helper; + // Index the reference and generic paths, plus any paths that positions are on, for position queries. + // TODO: Can we actually end up using any non-reference/non-generic paths in the comparison? + PathPositionHandleGraph* path_position_handle_graph = overlay_helper.apply(path_handle_graph.get(), truth_paths); // A buffer we use for the TSV output vector>> buffers(get_thread_count()); From 1d0d2f1c9066695fb5f115f6cccece51df6180bf Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 17:06:11 -0400 Subject: [PATCH 09/13] Index target sample paths when pulling snarl positions on a sample in vg stats --- src/subcommand/stats_main.cpp | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/subcommand/stats_main.cpp b/src/subcommand/stats_main.cpp index f766212aea..ee603e9b2e 100644 --- a/src/subcommand/stats_main.cpp +++ b/src/subcommand/stats_main.cpp @@ -1179,30 +1179,38 @@ int main_stats(int argc, char** argv) { std::unordered_map extra_node_weight; constexpr size_t EXTRA_WEIGHT = 10000000000; + // Collect paths in our assigned snarl coordinate sample + std::vector ref_path_names; + if (snarl_stats && !snarl_sample.empty()) { + graph->for_each_path_of_sample(snarl_sample, [&](path_handle_t path_handle) { + ref_path_names.push_back(graph->get_path_name(path_handle)); + }); + if (ref_path_names.empty()) { + logger.error() << "unable to find any paths of --snarl-sample " << snarl_sample << endl; + } + } + + // Turn them into an unordered_set so we can make sure they're all positionally indexed + std::unordered_set target_paths(ref_path_names.begin(), ref_path_names.end()); + // additional indexes only needed when finding --snarl-sample coordinates unique_ptr path_trav_finder; bdsg::PathPositionOverlayHelper overlay_helper; - PathPositionHandleGraph* pp_graph = dynamic_cast(graph);; - unordered_map snarl_to_ref; + PathPositionHandleGraph* pp_graph = dynamic_cast(graph); + if (pp_graph == nullptr) { + pp_graph = overlay_helper.apply(graph, target_paths); + } if (snarl_stats) { // TSV header if (!snarl_sample.empty()) { // optionally prefix with BED-like refpath coordinates if --snarl-sample given cout <<"Contig\tStartPos\tEndPos\t"; - - if (pp_graph == nullptr) { - pp_graph = overlay_helper.apply(graph); - } - vector ref_path_names; pp_graph->for_each_path_of_sample(snarl_sample, [&](path_handle_t path_handle) { - ref_path_names.push_back(graph->get_path_name(path_handle)); + // Try and pin down the tips of the sample extra_node_weight[graph->get_id(graph->get_handle_of_step(graph->path_begin(path_handle)))] += EXTRA_WEIGHT; extra_node_weight[graph->get_id(graph->get_handle_of_step(graph->path_back(path_handle)))] += EXTRA_WEIGHT; }); - if (ref_path_names.empty()) { - logger.error() << "unable to find any paths of --snarl-sample" << endl; - } path_trav_finder = unique_ptr(new PathTraversalFinder(*pp_graph, ref_path_names)); } cout << "Start\tStart-Reversed\tEnd\tEnd-Reversed\tUltrabubble\tUnary\tShallow-Nodes" @@ -1212,6 +1220,8 @@ int main_stats(int argc, char** argv) { // First compute the snarls manager = IntegratedSnarlFinder(*graph, extra_node_weight).find_snarls_parallel(); + + std::unordered_map snarl_to_ref; manager.for_each_snarl_preorder([&](const Snarl* snarl) { // Loop over all the snarls and print stats. From 13552a8fedb90b88cbf89103b226d0a229643e0f Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 17:09:59 -0400 Subject: [PATCH 10/13] Explain why we can't do anything useful for inject --- src/subcommand/inject_main.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/subcommand/inject_main.cpp b/src/subcommand/inject_main.cpp index 268bbeeac0..5e9669923f 100644 --- a/src/subcommand/inject_main.cpp +++ b/src/subcommand/inject_main.cpp @@ -125,6 +125,10 @@ int main_inject(int argc, char** argv) { logger.error() << "Graph (-x) is required" << endl; } unique_ptr path_handle_graph = vg::io::VPKG::load_one(xg_name); + + // Path position index the graph. + // TODO: We can't index the paths the input BAM is actually on; it needs to + // be on reference paths in the graph. bdsg::PathPositionOverlayHelper overlay_helper; PathPositionHandleGraph* xgidx = overlay_helper.apply(path_handle_graph.get()); From 7c94fcb3e49a4afed8d3289bf2d5d18e889a5169 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 17:30:01 -0400 Subject: [PATCH 11/13] Use right graph when looking for paths to index in vg clip --- src/subcommand/clip_main.cpp | 2 +- test/t/53_clip.t | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/subcommand/clip_main.cpp b/src/subcommand/clip_main.cpp index 9fdacef6d2..42e34cf459 100644 --- a/src/subcommand/clip_main.cpp +++ b/src/subcommand/clip_main.cpp @@ -306,7 +306,7 @@ int main_clip(int argc, char** argv) { // If we want all paths matching some prefixes graph->for_each_path_handle([&](path_handle_t path_handle) { // Look at all ther paths - std::string path_name = pp_graph->get_path_name(path_handle); + std::string path_name = graph->get_path_name(path_handle); subrange_t subrange; // And get their base path names path_name = Paths::strip_subrange(path_name, &subrange); diff --git a/test/t/53_clip.t b/test/t/53_clip.t index edfec715fd..0f1ee63814 100644 --- a/test/t/53_clip.t +++ b/test/t/53_clip.t @@ -18,7 +18,7 @@ for i in `vg view clip_flat.vg | grep '^S' | awk '{print $1}'` ; do vg view clip vg view clip_flat.vg | grep ^S | wc -l > node_count diff step_count node_count is "$?" 0 "every step in clipped graph belongs to reference path" -is $(vg paths -Ev hla.vg -Q "gi|568815551:1054737-1055734" | awk '{ print $2 }') $(vg stats -l clip_flat.vg | awk '{ print $2 }') "clipped graph has same length as ref path" +is "$(vg paths -Ev hla.vg -Q "gi|568815551:1054737-1055734" | awk '{ print $2 }')" "$(vg stats -l clip_flat.vg | awk '{ print $2 }')" "clipped graph has same length as ref path" rm -f region.bed step_count node_count @@ -27,7 +27,7 @@ printf "gi|157734152:29563108-29564082\t90\t92\n" > region.bed vg clip hla.vg -b region.bed > clip.vg vg validate clip.vg is "$?" 0 "clipped graph is valid" -is $(vg view clip.vg | grep ^S | wc -l) "49" "Just one node filtered" +is "$(vg view clip.vg | grep ^S | wc -l)" "49" "Just one node filtered" rm -f region.bed clip.vg @@ -36,7 +36,7 @@ printf "gi|568815564:1054403-1055400\t150\t153\n" > region.bed vg clip hla.vg -b region.bed > clip.vg vg validate clip.vg is "$?" 0 "clipped graph is valid" -is $(vg view clip.vg | grep ^L | wc -l) "65" "Just one edge filtered" +is "$(vg view clip.vg | grep ^L | wc -l)" "65" "Just one edge filtered" rm -f region.bed clip.vg @@ -44,7 +44,7 @@ rm -f region.bed clip.vg vg clip hla.vg -d 4 -P "gi|568815551:1054737-1055734" > clip.vg vg validate clip.vg is "$?" 0 "clipped graph is valid" -is $(vg view clip.vg | grep ^S | wc -l) "49" "Just one node filtered" +is "$(vg view clip.vg | grep ^S | wc -l)" "49" "Just one node filtered" rm -f clip.vg @@ -65,7 +65,7 @@ printf "gi|568815551:1054737-1055734\t600\t650\n" > region.bed vg clip hla.vg -b region.bed -d 4 > clip.vg vg validate clip.vg is "$?" 0 "clipped graph is valid" -is $(vg view clip.vg | grep ^S | wc -l) "49" "Just one node filtered" +is "$(vg view clip.vg | grep ^S | wc -l)" "49" "Just one node filtered" rm -f region.bed clip.vg @@ -86,25 +86,25 @@ diff tiny.gfa tiny-nostubs.gfa is "$?" 0 "stub clipping removed all stubs" printf "x\t5\t25\n" > region.bed -is $(vg clip tiny-stubs.gfa -s -b region.bed | vg stats -N -) "17" "region clipping filtered out only 2 / 4 stub nodes" +is "$(vg clip tiny-stubs.gfa -s -b region.bed | vg stats -N -)" "17" "region clipping filtered out only 2 / 4 stub nodes" printf "L\t100\t+\t2\t-\t0M\n" >> tiny-stubs.gfa printf "L\t15\t+\t13\t-\t0M\n" >> tiny-stubs.gfa -is $(vg clip tiny-stubs.gfa -sS -P x | vg stats -HT - | sort -nk 2 | awk '{print $2}' | head -1) "1" "Correct head after path stubbification" -is $(vg clip tiny-stubs.gfa -sS -P x | vg stats -HT - | sort -nk 2 | awk '{print $2}' | tail -1) "15" "Correct tail after path stubbification" +is "$(vg clip tiny-stubs.gfa -sS -P x | vg stats -HT - | sort -nk 2 | awk '{print $2}' | head -1)" "1" "Correct head after path stubbification" +is "$(vg clip tiny-stubs.gfa -sS -P x | vg stats -HT - | sort -nk 2 | awk '{print $2}' | tail -1)" "15" "Correct tail after path stubbification" rm -f tiny.gfa tiny-stubs.gfa region.bed tiny-nostubs.gfa vg clip graphs/snarl-clip.gfa -A 2 -d 2 -P x > sc-A2d2.gfa -is $(vg find -x sc-A2d2.gfa -n 4 | wc -l) "0" "Node 4 correctly clipped with snarl length and depth filter" -is $(vg stats sc-A2d2.gfa -N) "14" "No other nodes were clipped with snarl length and depth filter" +is "$(vg find -x sc-A2d2.gfa -n 4 | wc -l)" "0" "Node 4 correctly clipped with snarl length and depth filter" +is "$(vg stats sc-A2d2.gfa -N)" "14" "No other nodes were clipped with snarl length and depth filter" rm -f sc-A2d2.gfa vg clip graphs/chain-clip.gfa -A 2 -d1 -P x -g > sc-A2d1g.gfa -is $(vg stats -E sc-A2d1g.gfa) "30" "One net edges clipped" -is $(vg stats -N sc-A2d1g.gfa) "22" "No other nodes were clipped with net edge filter" +is "$(vg stats -E sc-A2d1g.gfa)" "30" "One net edges clipped" +is "$(vg stats -N sc-A2d1g.gfa)" "22" "No other nodes were clipped with net edge filter" rm -f sc-A2d1g.gfa From bcd06d056b6edb03888160c7c6f46370d0a0c2da Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 18:35:19 -0400 Subject: [PATCH 12/13] Bump libbdsg to version that ought to allow building bindings --- deps/libbdsg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/libbdsg b/deps/libbdsg index 5a602f2eaa..4e1ad0af93 160000 --- a/deps/libbdsg +++ b/deps/libbdsg @@ -1 +1 @@ -Subproject commit 5a602f2eaab5f4fe0f4cd4a5cba2b7b437743d4d +Subproject commit 4e1ad0af937adbe6db5f2ffce09d6992243df27f From 07451b68d41a9cee02b8d00d0cb0174f5dcbf574 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 10 Jun 2026 19:09:32 -0400 Subject: [PATCH 13/13] Use libbdsg where you can actually import the bindings --- deps/libbdsg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/libbdsg b/deps/libbdsg index 4e1ad0af93..73e74c223e 160000 --- a/deps/libbdsg +++ b/deps/libbdsg @@ -1 +1 @@ -Subproject commit 4e1ad0af937adbe6db5f2ffce09d6992243df27f +Subproject commit 73e74c223ed16c6b0db2c378f285819c5a3499e0