Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions fanc/commands/fanc_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2189,6 +2189,110 @@ def dump(argv, **kwargs):
logger.info("All done.")


def resolution_parser():
parser = argparse.ArgumentParser(
prog='fanc resolution',
description='Determine the resolution of a Hi-C experiment'
)

parser.add_argument(
'input',
help='Directory containing binned Hi-C matrices.'
)

parser.add_argument(
'output',
nargs='?',
help='Output file for results. If not provided, will write to stdout'
)

parser.add_argument(
'-i', '--interactions-threshold',
type=int,
default=1000,
help='Threshold of interactions per bin. Default: %(default)d'
)

parser.add_argument(
'-t', '--threads',
type=int,
default=1,
help='Number of threads used for parallel processing of files. Default: %(default)d'
)

parser.add_argument(
'-p', '--plot',
help='Path for saving resolution plot'
)

return parser


def _resolution_bins_passing_threshold(hic_file, threshold):
"""Percentage of bins passing threshold of marginals."""
from fanc.tools.load import load

hic = load(hic_file, mode='r')
marginals = hic.marginals(masked=False, norm=False)
return hic.bin_size, sum(marginals > threshold) / len(marginals)


def resolution(argv, **kwargs):
parser = resolution_parser()
args = parser.parse_args(argv[2:])

input_dir = args.input
output_file = os.path.expanduser(args.output) if args.output is not None else None
plot_file = os.path.expanduser(args.plot) if args.plot is not None else None
threshold = args.interactions_threshold
threads = args.threads

from fanc.tools.load import load
from fanc.tools.general import human_format
import glob
from multiprocessing import Pool
import sys

input_files = sorted(
glob.iglob(f"{input_dir}/*.hic"),
key=lambda file: load(file, mode='r').bin_size
)

with Pool(threads) as p:
results = p.starmap(
_resolution_bins_passing_threshold,
((file, threshold) for file in input_files)
)

with open(output_file, 'w') if output_file is not None else sys.stdout as f:
for result, file in sorted(zip(results, input_files), reverse=True):
f.write(f"{os.path.basename(file)}\t{result[0]}\t{result[1]}\n")

if plot_file is not None:
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

bin_size, bins_passing = list(zip(*results))
bins_passing = [x * 100 for x in bins_passing]

fig, ax = plt.subplots()
ax.plot(bin_size, bins_passing)
ax.axhline(80, linestyle='--', color='black')
ax.set_xscale('log')
ax.set_xticks(bin_size)
ax.invert_xaxis()
ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
ax.xaxis.set_major_formatter(
ticker.FuncFormatter(lambda x, _: f"{human_format(x)}b")
)
ax.set_xlabel('Bin size')
ax.set_ylabel(f"Bins with > {threshold} contacts %")
fig.savefig(plot_file)
plt.close(fig)


def pca_parser():
parser = argparse.ArgumentParser(
prog="fanc pca",
Expand Down