Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 39 additions & 21 deletions src/ldmat/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def convert_full_chromosome_h5(
precision,
decimals,
start_locus,
max_chunk_width,
chromosome,
locus_regex,
loader_class=BroadInstituteLoader,
Expand All @@ -281,24 +282,36 @@ def convert_full_chromosome_h5(

first_missing_locus = start_locus

for i, (file, local_start_locus, local_end_locus) in enumerate(files):
if local_start_locus >= start_locus:
if i + 1 < len(files):
next_covered_locus = files[i + 1][1]
else:
next_covered_locus = local_end_locus
convert_h5(
file,
outfile,
first_missing_locus,
next_covered_locus,
precision,
decimals,
loader_class=loader_class,
)
first_missing_locus = next_covered_locus
i = 0
while i < len(files):
file, local_start_locus, local_end_locus = files[i]
if local_start_locus < start_locus:
i += 1
continue

if i + 1 < len(files):
next_covered_locus = files[i+1][1]
else:
next_covered_locus = local_end_locus

logger.info("{:.0f}% complete".format(((i + 1) * 100) / len(files)))
if max_chunk_width and (first_missing_locus + max_chunk_width < next_covered_locus):
next_locus = first_missing_locus + max_chunk_width
else:
next_locus = next_covered_locus
i += 1

convert_h5(
file,
outfile,
first_missing_locus,
next_locus,
precision,
decimals,
loader_class=loader_class,
)
first_missing_locus = next_locus

logger.info("{:.0f}% complete".format((i * 100) / len(files)))


def convert_maf_h5(infile, outfile, loader_class=BroadInstituteLoader):
Expand Down Expand Up @@ -393,7 +406,10 @@ def extract_metadata_df_from_group(group):
df = pd.DataFrame(
group[POSITION_DATASET], columns=["BP"], index=group[NAME_DATASET]
)
df["relative_pos"] = np.arange(len(df))
df["relative_col"] = np.arange(len(df))

start_offset = len(df[df.BP < group.attrs[START_ATTR]])
df["relative_row"] = df["relative_col"] - start_offset
return df


Expand All @@ -406,8 +422,8 @@ def get_horizontal_slice(group, rows, columns, range_query):
row_inds = df_ld_snps.BP.isin(rows)
col_inds = df_ld_snps.BP.isin(columns)

row_positions = df_ld_snps[row_inds].relative_pos
col_positions = df_ld_snps[col_inds].relative_pos
row_positions = df_ld_snps[row_inds].relative_row
col_positions = df_ld_snps[col_inds].relative_col

h_slice = None
if len(row_positions) and len(col_positions):
Expand Down Expand Up @@ -809,11 +825,12 @@ def convert(infile, outfile, min_value, decimals, start_locus, end_locus, loader
@click.option("--min-value", "-m", type=float, default=None)
@click.option("--decimals", "-d", type=int, default=None)
@click.option("--start-locus", "-s", type=int, default=1)
@click.option("--max-chunk-width", "-w", type=int, default=None)
@click.option("--chromosome", "-c", type=int, required=True)
@click.option("--locus-regex", "-r", type=str, default=r"_(\d+)", show_default=True)
@loader_option
def convert_chromosome(
filepath, outfile, min_value, decimals, start_locus, chromosome, locus_regex, loader
filepath, outfile, min_value, decimals, start_locus, max_chunk_width, chromosome, locus_regex, loader
):
logger.debug(f"Converting chromosome {chromosome}")

Expand All @@ -823,6 +840,7 @@ def convert_chromosome(
min_value,
decimals,
start_locus,
max_chunk_width,
chromosome,
locus_regex,
loader,
Expand Down