Skip to content

Commit

Permalink
Expand ROIs to all signal regions and cell types
Browse files Browse the repository at this point in the history
I wanted to test whether using a binning approach + a similar training strategy as PCPC/RR has an effect on our predictions.
  • Loading branch information
Cazares, Tareian authored and Cazares, Tareian committed Jun 3, 2021
1 parent 78ffa82 commit 39bce87
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 410 deletions.
27 changes: 8 additions & 19 deletions maxatac/analyses/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,6 @@ def run_training(args):
weights=args.weights
)

logging.error("Window the genome to desired bin width and step size")

# Import windowed genome
windowed_gen = pd.read_csv(args.window_sequence, sep='\t', header=None, names=["Chr", "Start", "Stop"])

logging.error("Import training regions")

# Import training regions
Expand All @@ -67,8 +62,8 @@ def run_training(args):
prefix=args.prefix,
output_directory=maxatac_model.output_directory,
tag="training",
test_cell_type=maxatac_model.test_cell_type,
genomic_bins=windowed_gen)
test_cell_type=maxatac_model.test_cell_type
)

logging.error("Import validation regions")

Expand All @@ -78,35 +73,29 @@ def run_training(args):
prefix=args.prefix,
output_directory=maxatac_model.output_directory,
tag="validation",
test_cell_type=maxatac_model.test_cell_type,
genomic_bins=windowed_gen)
test_cell_type=maxatac_model.test_cell_type
)

logging.error("Initialize the training generator")

# Initialize the training generator
train_gen = DataGenerator(sequence=args.sequence,
meta_table=maxatac_model.meta_dataframe,
roi_pool=train_examples,
cell_type_list=maxatac_model.cell_types,
chroms=args.tchroms,
roi_pool=train_examples.ROI_pool_df,
quant=args.quant,
batch_size=args.batch_size,
target_scale_factor=args.target_scale_factor,
shuffle_cell_type=args.shuffle_cell_type
target_scale_factor=args.target_scale_factor
)

logging.error("Initialize the validation generator")

# Initialize the validation generator
val_gen = DataGenerator(sequence=args.sequence,
meta_table=maxatac_model.meta_dataframe,
roi_pool=validate_examples,
cell_type_list=maxatac_model.cell_types,
chroms=args.vchroms,
roi_pool=validate_examples.ROI_pool_df,
quant=args.quant,
batch_size=args.batch_size,
target_scale_factor=args.target_scale_factor,
shuffle_cell_type=args.shuffle_cell_type
target_scale_factor=args.target_scale_factor
)

logging.error("Fit the model")
Expand Down
20 changes: 0 additions & 20 deletions maxatac/utilities/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,13 +422,6 @@ def get_parser():
help="Input signal file"
)

predict_parser.add_argument("--minimum",
dest="minimum",
type=float,
default=DEFAULT_MIN_PREDICTION,
help="Minimum prediction value to be reported. Default: " + str(DEFAULT_MIN_PREDICTION)
)

predict_parser.add_argument("--output",
dest="output",
type=str,
Expand All @@ -452,12 +445,6 @@ def get_parser():
Default: None, predictions are done on the whole chromosome length"
)

predict_parser.add_argument("--keep",
dest="keep",
action="store_true",
help="Keep temporary files. Default: False"
)

predict_parser.add_argument("--threads",
dest="threads",
default=get_cpu_count(),
Expand Down Expand Up @@ -526,13 +513,6 @@ def get_parser():

train_parser.set_defaults(func=run_training)

train_parser.add_argument("--window_sequence",
dest="window_sequence",
type=str,
required=True,
help="Windowed Genome at 1024 bp sliding at 256"
)

train_parser.add_argument("--sequence",
dest="sequence",
type=str,
Expand Down

0 comments on commit 39bce87

Please sign in to comment.