use of nl.hartwigmedicalfoundation.bachelor.SnpEffect in project hmftools by hartwigmedical.
the class BachelorEligibility method fromMap.
static BachelorEligibility fromMap(final Map<String, Program> input) {
final BachelorEligibility result = new BachelorEligibility();
for (final Program program : input.values()) {
final Multimap<String, String> geneToEnsemblMap = HashMultimap.create();
program.getPanel().stream().map(ProgramPanel::getGene).flatMap(Collection::stream).forEach(g -> geneToEnsemblMap.put(g.getName(), g.getEnsembl()));
// NOTE: copy number and SVs are untested/unverified for now, but leave in support for them
// process copy number sections
final List<Predicate<GeneCopyNumber>> cnvPredicates = Lists.newArrayList();
for (final ProgramPanel panel : program.getPanel()) {
final List<GeneIdentifier> genes = panel.getGene();
if (panel.getEffect().contains(OtherEffect.HOMOZYGOUS_DELETION)) {
final Predicate<GeneCopyNumber> geneCopyNumberPredicate = cnv -> genes.stream().anyMatch(g -> g.getEnsembl().equals(cnv.transcriptID()));
// TODO: we are matching on transcript ID here but we only have canonical transcripts in our panel file
cnvPredicates.add(geneCopyNumberPredicate);
}
}
// process structural variant disruptions
final List<Predicate<HmfGenomeRegion>> disruptionPredicates = Lists.newArrayList();
for (final ProgramPanel panel : program.getPanel()) {
final List<GeneIdentifier> genes = panel.getGene();
if (panel.getEffect().contains(OtherEffect.GENE_DISRUPTION)) {
final Predicate<HmfGenomeRegion> disruptionPredicate = sv -> genes.stream().anyMatch(g -> g.getEnsembl().equals(sv.transcriptID()));
// TODO: we are matching on transcript ID here but we only have canonical transcripts in our panel file
disruptionPredicates.add(disruptionPredicate);
}
}
// process variants from vcf
final List<Predicate<VariantModel>> panelPredicates = Lists.newArrayList();
List<String> requiredEffects = Lists.newArrayList();
List<String> panelTranscripts = Lists.newArrayList();
for (final ProgramPanel panel : program.getPanel()) {
final List<GeneIdentifier> genes = panel.getGene();
// take up a collection of the effects to search for
requiredEffects = panel.getSnpEffect().stream().map(SnpEffect::value).collect(Collectors.toList());
panelTranscripts = genes.stream().map(GeneIdentifier::getEnsembl).collect(Collectors.toList());
final List<String> effects = requiredEffects;
final Predicate<VariantModel> panelPredicate = v -> genes.stream().anyMatch(p -> v.sampleAnnotations().stream().anyMatch(a -> a.featureID().equals(p.getEnsembl()) && effects.stream().anyMatch(x -> a.effects().contains(x))));
panelPredicates.add(panelPredicate);
// update query targets
for (final GeneIdentifier g : genes) {
final HmfGenomeRegion region = allTranscriptsMap.get(g.getEnsembl());
if (region == null) {
final HmfGenomeRegion namedRegion = allGenesMap.get(g.getName());
if (namedRegion == null) {
LOGGER.warn("Program {} gene {} non-canonical transcript {} couldn't find region, transcript will be skipped", program.getName(), g.getName(), g.getEnsembl());
// just skip this gene for now
} else {
result.variantLocationsToQuery.add(namedRegion);
}
} else {
result.variantLocationsToQuery.add(region);
}
}
}
final Predicate<VariantModel> inPanel = v -> panelPredicates.stream().anyMatch(p -> p.test(v));
final Predicate<VariantModel> inBlacklist = new BlacklistPredicate(geneToEnsemblMap.values(), program.getBlacklist());
final Predicate<VariantModel> inWhitelist = new WhitelistPredicate(geneToEnsemblMap, program.getWhitelist());
final Predicate<VariantModel> snvPredicate = v -> inPanel.test(v) ? !inBlacklist.test(v) : inWhitelist.test(v);
final Predicate<GeneCopyNumber> copyNumberPredicate = cnv -> cnvPredicates.stream().anyMatch(p -> p.test(cnv)) && cnv.minCopyNumber() < MAX_COPY_NUMBER_FOR_LOSS;
final Predicate<HmfGenomeRegion> disruptionPredicate = disruption -> disruptionPredicates.stream().anyMatch(p -> p.test(disruption));
BachelorProgram bachelorProgram = new BachelorProgram(program.getName(), snvPredicate, copyNumberPredicate, disruptionPredicate, requiredEffects, panelTranscripts);
result.programs.add(bachelorProgram);
}
return result;
}
Aggregations