Search in sources :

Example 1 with GeneIdentifier

use of nl.hartwigmedicalfoundation.bachelor.GeneIdentifier in project hmftools by hartwigmedical.

the class BachelorEligibility method fromMap.

static BachelorEligibility fromMap(final Map<String, Program> input) {
    final BachelorEligibility result = new BachelorEligibility();
    for (final Program program : input.values()) {
        final Multimap<String, String> geneToEnsemblMap = HashMultimap.create();
        program.getPanel().stream().map(ProgramPanel::getGene).flatMap(Collection::stream).forEach(g -> geneToEnsemblMap.put(g.getName(), g.getEnsembl()));
        // NOTE: copy number and SVs are untested/unverified for now, but leave in support for them
        // process copy number sections
        final List<Predicate<GeneCopyNumber>> cnvPredicates = Lists.newArrayList();
        for (final ProgramPanel panel : program.getPanel()) {
            final List<GeneIdentifier> genes = panel.getGene();
            if (panel.getEffect().contains(OtherEffect.HOMOZYGOUS_DELETION)) {
                final Predicate<GeneCopyNumber> geneCopyNumberPredicate = cnv -> genes.stream().anyMatch(g -> g.getEnsembl().equals(cnv.transcriptID()));
                // TODO: we are matching on transcript ID here but we only have canonical transcripts in our panel file
                cnvPredicates.add(geneCopyNumberPredicate);
            }
        }
        // process structural variant disruptions
        final List<Predicate<HmfGenomeRegion>> disruptionPredicates = Lists.newArrayList();
        for (final ProgramPanel panel : program.getPanel()) {
            final List<GeneIdentifier> genes = panel.getGene();
            if (panel.getEffect().contains(OtherEffect.GENE_DISRUPTION)) {
                final Predicate<HmfGenomeRegion> disruptionPredicate = sv -> genes.stream().anyMatch(g -> g.getEnsembl().equals(sv.transcriptID()));
                // TODO: we are matching on transcript ID here but we only have canonical transcripts in our panel file
                disruptionPredicates.add(disruptionPredicate);
            }
        }
        // process variants from vcf
        final List<Predicate<VariantModel>> panelPredicates = Lists.newArrayList();
        List<String> requiredEffects = Lists.newArrayList();
        List<String> panelTranscripts = Lists.newArrayList();
        for (final ProgramPanel panel : program.getPanel()) {
            final List<GeneIdentifier> genes = panel.getGene();
            // take up a collection of the effects to search for
            requiredEffects = panel.getSnpEffect().stream().map(SnpEffect::value).collect(Collectors.toList());
            panelTranscripts = genes.stream().map(GeneIdentifier::getEnsembl).collect(Collectors.toList());
            final List<String> effects = requiredEffects;
            final Predicate<VariantModel> panelPredicate = v -> genes.stream().anyMatch(p -> v.sampleAnnotations().stream().anyMatch(a -> a.featureID().equals(p.getEnsembl()) && effects.stream().anyMatch(x -> a.effects().contains(x))));
            panelPredicates.add(panelPredicate);
            // update query targets
            for (final GeneIdentifier g : genes) {
                final HmfGenomeRegion region = allTranscriptsMap.get(g.getEnsembl());
                if (region == null) {
                    final HmfGenomeRegion namedRegion = allGenesMap.get(g.getName());
                    if (namedRegion == null) {
                        LOGGER.warn("Program {} gene {} non-canonical transcript {} couldn't find region, transcript will be skipped", program.getName(), g.getName(), g.getEnsembl());
                    // just skip this gene for now
                    } else {
                        result.variantLocationsToQuery.add(namedRegion);
                    }
                } else {
                    result.variantLocationsToQuery.add(region);
                }
            }
        }
        final Predicate<VariantModel> inPanel = v -> panelPredicates.stream().anyMatch(p -> p.test(v));
        final Predicate<VariantModel> inBlacklist = new BlacklistPredicate(geneToEnsemblMap.values(), program.getBlacklist());
        final Predicate<VariantModel> inWhitelist = new WhitelistPredicate(geneToEnsemblMap, program.getWhitelist());
        final Predicate<VariantModel> snvPredicate = v -> inPanel.test(v) ? !inBlacklist.test(v) : inWhitelist.test(v);
        final Predicate<GeneCopyNumber> copyNumberPredicate = cnv -> cnvPredicates.stream().anyMatch(p -> p.test(cnv)) && cnv.minCopyNumber() < MAX_COPY_NUMBER_FOR_LOSS;
        final Predicate<HmfGenomeRegion> disruptionPredicate = disruption -> disruptionPredicates.stream().anyMatch(p -> p.test(disruption));
        BachelorProgram bachelorProgram = new BachelorProgram(program.getName(), snvPredicate, copyNumberPredicate, disruptionPredicate, requiredEffects, panelTranscripts);
        result.programs.add(bachelorProgram);
    }
    return result;
}
Also used : GeneIdentifier(nl.hartwigmedicalfoundation.bachelor.GeneIdentifier) ProgramPanel(nl.hartwigmedicalfoundation.bachelor.ProgramPanel) Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SOMATIC_DELETION(com.hartwig.hmftools.bachelor.EligibilityReport.ReportType.SOMATIC_DELETION) SOMATIC_DISRUPTION(com.hartwig.hmftools.bachelor.EligibilityReport.ReportType.SOMATIC_DISRUPTION) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) HmfGenomeRegion(com.hartwig.hmftools.common.region.hmfslicer.HmfGenomeRegion) Multimap(com.google.common.collect.Multimap) StructuralVariant(com.hartwig.hmftools.common.variant.structural.StructuralVariant) HmfGenePanelSupplier(com.hartwig.hmftools.genepanel.HmfGenePanelSupplier) HashMultimap(com.google.common.collect.HashMultimap) Lists(com.google.common.collect.Lists) GenomePosition(com.hartwig.hmftools.common.position.GenomePosition) VariantAnnotation(com.hartwig.hmftools.common.variant.snpeff.VariantAnnotation) Map(java.util.Map) StructuralVariantType(com.hartwig.hmftools.common.variant.structural.StructuralVariantType) GenomePositions(com.hartwig.hmftools.common.position.GenomePositions) SnpEffect(nl.hartwigmedicalfoundation.bachelor.SnpEffect) GERMLINE_DELETION(com.hartwig.hmftools.bachelor.EligibilityReport.ReportType.GERMLINE_DELETION) SortedSetMultimap(com.google.common.collect.SortedSetMultimap) OtherEffect(nl.hartwigmedicalfoundation.bachelor.OtherEffect) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Program(nl.hartwigmedicalfoundation.bachelor.Program) Set(java.util.Set) WhitelistPredicate(com.hartwig.hmftools.bachelor.predicates.WhitelistPredicate) Collectors(java.util.stream.Collectors) Maps(com.google.common.collect.Maps) Sets(com.google.common.collect.Sets) GeneIdentifier(nl.hartwigmedicalfoundation.bachelor.GeneIdentifier) List(java.util.List) Stream(java.util.stream.Stream) Logger(org.apache.logging.log4j.Logger) VariantContext(htsjdk.variant.variantcontext.VariantContext) HmfExonRegion(com.hartwig.hmftools.common.region.hmfslicer.HmfExonRegion) GeneCopyNumber(com.hartwig.hmftools.common.gene.GeneCopyNumber) NotNull(org.jetbrains.annotations.NotNull) Collections(java.util.Collections) LogManager(org.apache.logging.log4j.LogManager) BlacklistPredicate(com.hartwig.hmftools.bachelor.predicates.BlacklistPredicate) BlacklistPredicate(com.hartwig.hmftools.bachelor.predicates.BlacklistPredicate) Program(nl.hartwigmedicalfoundation.bachelor.Program) WhitelistPredicate(com.hartwig.hmftools.bachelor.predicates.WhitelistPredicate) Predicate(java.util.function.Predicate) WhitelistPredicate(com.hartwig.hmftools.bachelor.predicates.WhitelistPredicate) BlacklistPredicate(com.hartwig.hmftools.bachelor.predicates.BlacklistPredicate) GeneCopyNumber(com.hartwig.hmftools.common.gene.GeneCopyNumber) SnpEffect(nl.hartwigmedicalfoundation.bachelor.SnpEffect) ProgramPanel(nl.hartwigmedicalfoundation.bachelor.ProgramPanel) HmfGenomeRegion(com.hartwig.hmftools.common.region.hmfslicer.HmfGenomeRegion)

Aggregations

HashMultimap (com.google.common.collect.HashMultimap)1 Lists (com.google.common.collect.Lists)1 Maps (com.google.common.collect.Maps)1 Multimap (com.google.common.collect.Multimap)1 Sets (com.google.common.collect.Sets)1 SortedSetMultimap (com.google.common.collect.SortedSetMultimap)1 GERMLINE_DELETION (com.hartwig.hmftools.bachelor.EligibilityReport.ReportType.GERMLINE_DELETION)1 SOMATIC_DELETION (com.hartwig.hmftools.bachelor.EligibilityReport.ReportType.SOMATIC_DELETION)1 SOMATIC_DISRUPTION (com.hartwig.hmftools.bachelor.EligibilityReport.ReportType.SOMATIC_DISRUPTION)1 BlacklistPredicate (com.hartwig.hmftools.bachelor.predicates.BlacklistPredicate)1 WhitelistPredicate (com.hartwig.hmftools.bachelor.predicates.WhitelistPredicate)1 GeneCopyNumber (com.hartwig.hmftools.common.gene.GeneCopyNumber)1 GenomePosition (com.hartwig.hmftools.common.position.GenomePosition)1 GenomePositions (com.hartwig.hmftools.common.position.GenomePositions)1 HmfExonRegion (com.hartwig.hmftools.common.region.hmfslicer.HmfExonRegion)1 HmfGenomeRegion (com.hartwig.hmftools.common.region.hmfslicer.HmfGenomeRegion)1 VariantAnnotation (com.hartwig.hmftools.common.variant.snpeff.VariantAnnotation)1 StructuralVariant (com.hartwig.hmftools.common.variant.structural.StructuralVariant)1 StructuralVariantType (com.hartwig.hmftools.common.variant.structural.StructuralVariantType)1 HmfGenePanelSupplier (com.hartwig.hmftools.genepanel.HmfGenePanelSupplier)1