Search in sources :

Example 1 with Category

use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.

the class AbstractGroupBy method onTraversalDone.

@Override
public void onTraversalDone(final Map<Category, Long> counts) {
    final GATKReportTable table = createGATKReportTable();
    table.addColumn("COUNT");
    int nRows = 0;
    for (final Category cat : counts.keySet()) {
        for (int x = 0; x < cat.size(); ++x) {
            table.set(nRows, x, cat.get(x));
        }
        table.set(nRows, cat.size(), counts.get(cat));
        ++nRows;
    }
    final GatkReportWriter reportWriter = GatkReportWriter.createWriter(this.outputTableFormat);
    final GATKReport report = new GATKReport();
    report.addTable(table);
    reportWriter.print(report, this.out);
    this.out.flush();
    logger.info("TraversalDone");
}
Also used : GATKReport(org.broadinstitute.gatk.utils.report.GATKReport) Category(com.github.lindenb.jvarkit.gatk.Category) GatkReportWriter(com.github.lindenb.jvarkit.gatk.GatkReportWriter) GATKReportTable(org.broadinstitute.gatk.utils.report.GATKReportTable)

Example 2 with Category

use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.

the class GroupByGenotypes method map.

@Override
public Map<Category, Long> map(final RefMetaDataTracker tracker, final ReferenceContext refctx, final AlignmentContext context) {
    if (tracker == null)
        return Collections.emptyMap();
    final Map<Category, Long> counts = new HashMap<>();
    for (final VariantContext ctx : tracker.getValues(this.variants, context.getLocation())) {
        int index_singleton = -1;
        if (onlysingletons) {
            for (int i = 0; i < ctx.getNSamples(); ++i) {
                final Genotype g = ctx.getGenotype(i);
                if (g == null || !g.isCalled() || g.isNoCall() || g.isHomRef())
                    continue;
                if (index_singleton != -1) {
                    // not anymore a singleton
                    index_singleton = -1;
                    break;
                }
                index_singleton = i;
            }
        }
        for (int i = 0; i < ctx.getNSamples(); ++i) {
            if (onlysingletons && index_singleton != i) {
                continue;
            }
            final Genotype genotype = ctx.getGenotype(i);
            final List<Object> labels = new ArrayList<>();
            labels.add(genotype.getSampleName());
            if (bychrom)
                labels.add(ctx.getContig());
            if (byID)
                labels.add(ctx.hasID());
            if (byType)
                labels.add(ctx.getType().name());
            if (byGenotypeType)
                labels.add(genotype.getType());
            if (byFilter)
                labels.add(ctx.isFiltered());
            if (byGFilter)
                labels.add(genotype.isFiltered());
            if (minGenotypeQuality >= 0) {
                labels.add(genotype.hasGQ() && genotype.getGQ() >= this.minGenotypeQuality ? "." : "LOWQUAL");
            }
            if (byImpact) {
                AnnPredictionParser.Impact impact = null;
                for (final AnnPredictionParser.AnnPrediction pred : super.annParser.getPredictions(ctx)) {
                    // see http://stackoverflow.com/questions/41678374/
                    final Predicate<Allele> afilter = new Predicate<Allele>() {

                        @Override
                        public boolean test(final Allele A) {
                            return A.getDisplayString().equals(pred.getAllele());
                        }
                    };
                    if (genotype.getAlleles().stream().filter(afilter).findAny().isPresent() == false)
                        continue;
                    final AnnPredictionParser.Impact currImpact = pred.getPutativeImpact();
                    if (impact != null && currImpact.compareTo(impact) < 0)
                        continue;
                    impact = currImpact;
                }
                if (byImpact)
                    labels.add(impact == null ? "." : impact.name());
            }
            final Category cat = new Category(labels);
            Long n = counts.get(cat);
            counts.put(cat, n == null ? 1L : n + 1);
        }
    }
    return counts;
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Category(com.github.lindenb.jvarkit.gatk.Category) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Predicate(java.util.function.Predicate) Allele(htsjdk.variant.variantcontext.Allele)

Example 3 with Category

use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.

the class AbstractGroupBy method reduce.

@Override
public Map<Category, Long> reduce(final Map<Category, Long> value, final Map<Category, Long> sum) {
    final Map<Category, Long> newmap = new HashMap<>(sum);
    for (final Category cat : value.keySet()) {
        final Long sv = sum.get(cat);
        final Long vv = value.get(cat);
        newmap.put(cat, sv == null ? vv : sv + vv);
    }
    return newmap;
}
Also used : Category(com.github.lindenb.jvarkit.gatk.Category) HashMap(java.util.HashMap)

Example 4 with Category

use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.

the class GroupByVariants method map.

@Override
public Map<Category, Long> map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
    if (tracker == null)
        return Collections.emptyMap();
    final Map<Category, Long> count = new HashMap<>();
    for (final VariantContext ctx : tracker.getValues(this.variants, context.getLocation())) {
        final List<Object> labels = new ArrayList<>();
        if (bychrom)
            labels.add(ctx.getContig());
        if (byID)
            labels.add(ctx.hasID());
        if (byType)
            labels.add(ctx.getType().name());
        if (byFilter)
            labels.add(ctx.isFiltered());
        if (minQuality >= 0) {
            labels.add(ctx.hasLog10PError() && ctx.getPhredScaledQual() >= this.minQuality ? "." : "LOWQUAL");
        }
        if (byImpact || bybiotype) {
            String biotype = null;
            AnnPredictionParser.Impact impact = null;
            for (final AnnPredictionParser.AnnPrediction pred : super.annParser.getPredictions(ctx)) {
                final AnnPredictionParser.Impact currImpact = pred.getPutativeImpact();
                if (impact != null && currImpact.compareTo(impact) < 0)
                    continue;
                impact = currImpact;
                biotype = pred.getTranscriptBioType();
            }
            if (byImpact)
                labels.add(impact == null ? "." : impact);
            if (bybiotype)
                labels.add(biotype == null ? "." : biotype);
        }
        if (bynalts)
            labels.add(ctx.getAlternateAlleles().size());
        if (byAffected || byCalled || bySingleton) {
            int nc = 0;
            int ng = 0;
            int nsingles = 0;
            for (int i = 0; i < ctx.getNSamples(); ++i) {
                final Genotype g = ctx.getGenotype(i);
                if (!(!g.isCalled() || g.isNoCall() || g.isHomRef())) {
                    ng++;
                }
                if (g.isCalled()) {
                    nc++;
                    if (!g.isHomRef()) {
                        nsingles++;
                    }
                }
            }
            if (byCalled)
                labels.add(nc < maxSamples ? nc : "GE_" + maxSamples);
            if (byAffected)
                labels.add(ng < maxSamples ? ng : "GE_" + maxSamples);
            if (bySingleton)
                labels.add(nsingles == 1 ? "SINGLETON" : ".");
        }
        if (byTsv) {
            if (ctx.getType() == VariantContext.Type.SNP && ctx.getAlternateAlleles().size() == 1) {
                boolean b = (VariantContextUtils.isTransition(ctx));
                labels.add(b ? "Transition" : "Transversion");
            } else {
                labels.add(".");
            }
        }
        if (byAlleleSize) {
            // see http://stackoverflow.com/questions/41678374/
            final Predicate<Allele> afilter = new Predicate<Allele>() {

                @Override
                public boolean test(final Allele a) {
                    return !(a.isNoCall() || a.isSymbolic());
                }
            };
            final OptionalInt longest = ctx.getAlleles().stream().filter(afilter).mapToInt(new ToIntFunction<Allele>() {

                public int applyAsInt(final Allele value) {
                    return value.length();
                }
            }).max();
            labels.add(longest.isPresent() ? alleleSizeClassifier.apply(longest.getAsInt()) : "N/A");
        }
        if (byAlleFrequency) {
            final List<Object> afs = ctx.getAttributeAsList("AF");
            if (afs.isEmpty()) {
                labels.add("NOT_AVAILABLE");
            } else {
                Double minaf = null;
                for (final Object o : afs) {
                    final Double af;
                    if (o == null)
                        continue;
                    if (o instanceof Double) {
                        af = Double.class.cast(o);
                    } else {
                        try {
                            af = Double.parseDouble(String.valueOf(o));
                        } catch (NumberFormatException err) {
                            logger.warn("Not a number for AF :" + o);
                            continue;
                        }
                    }
                    if (af < 0.0)
                        logger.warn("AF < 0 : " + o);
                    if (af > 1.0)
                        logger.warn("AF > 1.0 : " + o);
                    if (minaf == null || af.compareTo(minaf) < 0) {
                        minaf = af;
                    }
                }
                labels.add(minaf == null ? "NOT_FOUND" : this.groupByAfClassifier.apply(minaf));
            }
        }
        if (byDepth) {
            final List<Object> depths = ctx.getAttributeAsList("DP");
            if (depths.size() != 1) {
                if (depths.size() > 1) {
                    logger.warn("Too many data for DP :" + depths);
                }
                labels.add("NOT_AVAILABLE");
            } else {
                Integer dp = null;
                final Object o = depths.get(0);
                if (o != null && o instanceof Integer) {
                    dp = Integer.class.cast(o);
                } else {
                    try {
                        int i = Integer.parseInt(String.valueOf(o));
                        dp = i;
                    } catch (NumberFormatException err) {
                        logger.warn("Not a number for DP :" + o);
                        dp = null;
                    }
                }
                labels.add(this.groupByDpClassifier.apply(dp));
            }
        }
        for (final String att : this.presence_of_attributes) {
            labels.add(ctx.hasAttribute(att));
        }
        final Category cat = new Category(labels);
        Long n = count.get(cat);
        count.put(cat, n == null ? 1L : n + 1);
    }
    return count;
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Category(com.github.lindenb.jvarkit.gatk.Category) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) OptionalInt(java.util.OptionalInt) Predicate(java.util.function.Predicate) Allele(htsjdk.variant.variantcontext.Allele) ToIntFunction(java.util.function.ToIntFunction)

Aggregations

Category (com.github.lindenb.jvarkit.gatk.Category)4 HashMap (java.util.HashMap)3 AnnPredictionParser (com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser)2 Allele (htsjdk.variant.variantcontext.Allele)2 Genotype (htsjdk.variant.variantcontext.Genotype)2 VariantContext (htsjdk.variant.variantcontext.VariantContext)2 ArrayList (java.util.ArrayList)2 Predicate (java.util.function.Predicate)2 GatkReportWriter (com.github.lindenb.jvarkit.gatk.GatkReportWriter)1 OptionalInt (java.util.OptionalInt)1 ToIntFunction (java.util.function.ToIntFunction)1 GATKReport (org.broadinstitute.gatk.utils.report.GATKReport)1 GATKReportTable (org.broadinstitute.gatk.utils.report.GATKReportTable)1