use of nl.hartwigmedicalfoundation.bachelor.Program in project hmftools by hartwigmedical.
the class BachelorEligibility method processCopyNumbers.
@NotNull
Collection<EligibilityReport> processCopyNumbers(final String patient, final List<GeneCopyNumber> copyNumbers) {
final List<EligibilityReport> results = Lists.newArrayList();
for (final GeneCopyNumber copyNumber : copyNumbers) {
// TODO: verify the germline check
final boolean isGermline = copyNumber.germlineHet2HomRegions() + copyNumber.germlineHomRegions() > 0;
final List<String> matchingPrograms = programs.stream().filter(program -> program.copyNumberProcessor().test(copyNumber)).map(BachelorProgram::name).collect(Collectors.toList());
final List<EligibilityReport> interimResults = matchingPrograms.stream().map(p -> ImmutableEligibilityReport.builder().patient(patient).source(isGermline ? GERMLINE_DELETION : SOMATIC_DELETION).program(p).id("").genes(copyNumber.gene()).chrom(copyNumber.chromosome()).pos(copyNumber.start()).ref("").alts("").effects("").build()).collect(Collectors.toList());
results.addAll(interimResults);
}
return results;
}
use of nl.hartwigmedicalfoundation.bachelor.Program in project hmftools by hartwigmedical.
the class BachelorEligibility method fromMap.
static BachelorEligibility fromMap(final Map<String, Program> input) {
final BachelorEligibility result = new BachelorEligibility();
for (final Program program : input.values()) {
final Multimap<String, String> geneToEnsemblMap = HashMultimap.create();
program.getPanel().stream().map(ProgramPanel::getGene).flatMap(Collection::stream).forEach(g -> geneToEnsemblMap.put(g.getName(), g.getEnsembl()));
// NOTE: copy number and SVs are untested/unverified for now, but leave in support for them
// process copy number sections
final List<Predicate<GeneCopyNumber>> cnvPredicates = Lists.newArrayList();
for (final ProgramPanel panel : program.getPanel()) {
final List<GeneIdentifier> genes = panel.getGene();
if (panel.getEffect().contains(OtherEffect.HOMOZYGOUS_DELETION)) {
final Predicate<GeneCopyNumber> geneCopyNumberPredicate = cnv -> genes.stream().anyMatch(g -> g.getEnsembl().equals(cnv.transcriptID()));
// TODO: we are matching on transcript ID here but we only have canonical transcripts in our panel file
cnvPredicates.add(geneCopyNumberPredicate);
}
}
// process structural variant disruptions
final List<Predicate<HmfGenomeRegion>> disruptionPredicates = Lists.newArrayList();
for (final ProgramPanel panel : program.getPanel()) {
final List<GeneIdentifier> genes = panel.getGene();
if (panel.getEffect().contains(OtherEffect.GENE_DISRUPTION)) {
final Predicate<HmfGenomeRegion> disruptionPredicate = sv -> genes.stream().anyMatch(g -> g.getEnsembl().equals(sv.transcriptID()));
// TODO: we are matching on transcript ID here but we only have canonical transcripts in our panel file
disruptionPredicates.add(disruptionPredicate);
}
}
// process variants from vcf
final List<Predicate<VariantModel>> panelPredicates = Lists.newArrayList();
List<String> requiredEffects = Lists.newArrayList();
List<String> panelTranscripts = Lists.newArrayList();
for (final ProgramPanel panel : program.getPanel()) {
final List<GeneIdentifier> genes = panel.getGene();
// take up a collection of the effects to search for
requiredEffects = panel.getSnpEffect().stream().map(SnpEffect::value).collect(Collectors.toList());
panelTranscripts = genes.stream().map(GeneIdentifier::getEnsembl).collect(Collectors.toList());
final List<String> effects = requiredEffects;
final Predicate<VariantModel> panelPredicate = v -> genes.stream().anyMatch(p -> v.sampleAnnotations().stream().anyMatch(a -> a.featureID().equals(p.getEnsembl()) && effects.stream().anyMatch(x -> a.effects().contains(x))));
panelPredicates.add(panelPredicate);
// update query targets
for (final GeneIdentifier g : genes) {
final HmfGenomeRegion region = allTranscriptsMap.get(g.getEnsembl());
if (region == null) {
final HmfGenomeRegion namedRegion = allGenesMap.get(g.getName());
if (namedRegion == null) {
LOGGER.warn("Program {} gene {} non-canonical transcript {} couldn't find region, transcript will be skipped", program.getName(), g.getName(), g.getEnsembl());
// just skip this gene for now
} else {
result.variantLocationsToQuery.add(namedRegion);
}
} else {
result.variantLocationsToQuery.add(region);
}
}
}
final Predicate<VariantModel> inPanel = v -> panelPredicates.stream().anyMatch(p -> p.test(v));
final Predicate<VariantModel> inBlacklist = new BlacklistPredicate(geneToEnsemblMap.values(), program.getBlacklist());
final Predicate<VariantModel> inWhitelist = new WhitelistPredicate(geneToEnsemblMap, program.getWhitelist());
final Predicate<VariantModel> snvPredicate = v -> inPanel.test(v) ? !inBlacklist.test(v) : inWhitelist.test(v);
final Predicate<GeneCopyNumber> copyNumberPredicate = cnv -> cnvPredicates.stream().anyMatch(p -> p.test(cnv)) && cnv.minCopyNumber() < MAX_COPY_NUMBER_FOR_LOSS;
final Predicate<HmfGenomeRegion> disruptionPredicate = disruption -> disruptionPredicates.stream().anyMatch(p -> p.test(disruption));
BachelorProgram bachelorProgram = new BachelorProgram(program.getName(), snvPredicate, copyNumberPredicate, disruptionPredicate, requiredEffects, panelTranscripts);
result.programs.add(bachelorProgram);
}
return result;
}
use of nl.hartwigmedicalfoundation.bachelor.Program in project hmftools by hartwigmedical.
the class BachelorHelper method loadXML.
@NotNull
public static Map<String, Program> loadXML(final Path path) throws IOException, SAXException {
final BachelorSchema schema = BachelorSchema.make();
final List<Program> programs = Files.walk(path).filter(p -> p.toString().endsWith(".xml")).map(schema::processXML).filter(Objects::nonNull).collect(Collectors.toList());
final Map<String, Program> result = Maps.newHashMap();
for (final Program p : programs) {
if (result.containsKey(p.getName())) {
LOGGER.error("duplicate programs detected: {}", p.getName());
System.exit(1);
} else {
result.put(p.getName(), p);
}
}
return result;
}
use of nl.hartwigmedicalfoundation.bachelor.Program in project hmftools by hartwigmedical.
the class BachelorApplication method main.
public static void main(final String... args) {
final Options options = createOptions();
try {
final CommandLine cmd = createCommandLine(options, args);
// load configs
final Map<String, Program> map;
if (cmd.hasOption(CONFIG_DIRECTORY)) {
map = BachelorHelper.loadXML(Paths.get(cmd.getOptionValue(CONFIG_DIRECTORY)));
} else if (cmd.hasOption(CONFIG_XML)) {
map = BachelorHelper.loadXML(Paths.get(cmd.getOptionValue(CONFIG_XML)));
} else {
LOGGER.error("config directory or xml required!");
System.exit(1);
return;
}
if (cmd.hasOption(VALIDATE)) {
System.exit(0);
return;
}
if (map.isEmpty()) {
LOGGER.error("no programs loaded, exiting");
System.exit(1);
return;
}
final BachelorEligibility eligibility = BachelorEligibility.fromMap(map);
LOGGER.info("beginning processing");
final boolean germline = cmd.hasOption(GERMLINE);
final boolean somatic = cmd.hasOption(SOMATIC);
final boolean copyNumber = cmd.hasOption(COPYNUMBER);
final boolean structuralVariants = cmd.hasOption(SV);
final boolean doAll = !(germline || somatic || copyNumber || structuralVariants);
final List<File> filesToMerge;
if (cmd.hasOption(BATCH_DIRECTORY)) {
final Path root = Paths.get(cmd.getOptionValue(BATCH_DIRECTORY));
try (final Stream<Path> stream = Files.walk(root, 1, FileVisitOption.FOLLOW_LINKS).parallel()) {
filesToMerge = stream.filter(p -> p.toFile().isDirectory()).filter(p -> !p.equals(root)).map(RunDirectory::new).map(run -> process(eligibility, run, germline || doAll, somatic || doAll, copyNumber || doAll, structuralVariants || doAll)).collect(Collectors.toList());
}
} else if (cmd.hasOption(RUN_DIRECTORY)) {
final Path path = Paths.get(cmd.getOptionValue(RUN_DIRECTORY));
if (!Files.exists(path)) {
LOGGER.error("-runDirectory path does not exist");
System.exit(1);
return;
}
filesToMerge = Collections.singletonList(process(eligibility, new RunDirectory(path), germline || doAll, somatic || doAll, copyNumber || doAll, structuralVariants || doAll));
} else {
LOGGER.error("requires either a batch or single run directory");
System.exit(1);
return;
}
LOGGER.info("processing complete");
LOGGER.info("merging to CSV {}", cmd.getOptionValue(OUTPUT));
try (final BufferedWriter writer = Files.newBufferedWriter(Paths.get(cmd.getOptionValue(OUTPUT)))) {
// header
writer.write(fileHeader());
writer.newLine();
for (final File file : filesToMerge) {
final List<String> lines = Files.readAllLines(file.toPath());
for (final String line : lines) {
writer.write(line);
writer.newLine();
}
}
}
LOGGER.info("output written");
LOGGER.info("bachelor done");
} catch (final ParseException e) {
printHelpAndExit(options);
} catch (Exception e) {
e.printStackTrace();
}
}
use of nl.hartwigmedicalfoundation.bachelor.Program in project hmftools by hartwigmedical.
the class BachelorEligibility method processStructuralVariant.
private Collection<EligibilityReport> processStructuralVariant(final String patient, final GenomePosition position, final GenomePosition other, final StructuralVariantType svType) {
final List<EligibilityReport> results = Lists.newArrayList();
// TODO: can we do better than this performance wise? new map?
for (final HmfGenomeRegion region : allGenesByChromosomeMap.get(position.chromosome())) {
if (!region.contains(position)) {
continue;
}
// skip non-inversion intronic variants
if (region.contains(other) && svType != StructuralVariantType.INV) {
final int intronStart = intron(region.exome(), position);
final int intronEnd = intron(region.exome(), other);
// the variant is intronic in a gene -- we will filter it
if (intronStart >= 0 && intronStart == intronEnd) {
continue;
}
}
programs.stream().filter(p -> p.disruptionProcessor().test(region)).map(p -> ImmutableEligibilityReport.builder().patient(patient).source(SOMATIC_DISRUPTION).program(p.name()).id("").genes(region.gene()).chrom(region.chromosome()).pos(position.position()).ref("").alts("").effects("").build()).forEach(results::add);
}
return results;
}
Aggregations