use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.
the class AbstractGroupBy method onTraversalDone.
@Override
public void onTraversalDone(final Map<Category, Long> counts) {
final GATKReportTable table = createGATKReportTable();
table.addColumn("COUNT");
int nRows = 0;
for (final Category cat : counts.keySet()) {
for (int x = 0; x < cat.size(); ++x) {
table.set(nRows, x, cat.get(x));
}
table.set(nRows, cat.size(), counts.get(cat));
++nRows;
}
final GatkReportWriter reportWriter = GatkReportWriter.createWriter(this.outputTableFormat);
final GATKReport report = new GATKReport();
report.addTable(table);
reportWriter.print(report, this.out);
this.out.flush();
logger.info("TraversalDone");
}
use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.
the class GroupByGenotypes method map.
@Override
public Map<Category, Long> map(final RefMetaDataTracker tracker, final ReferenceContext refctx, final AlignmentContext context) {
if (tracker == null)
return Collections.emptyMap();
final Map<Category, Long> counts = new HashMap<>();
for (final VariantContext ctx : tracker.getValues(this.variants, context.getLocation())) {
int index_singleton = -1;
if (onlysingletons) {
for (int i = 0; i < ctx.getNSamples(); ++i) {
final Genotype g = ctx.getGenotype(i);
if (g == null || !g.isCalled() || g.isNoCall() || g.isHomRef())
continue;
if (index_singleton != -1) {
// not anymore a singleton
index_singleton = -1;
break;
}
index_singleton = i;
}
}
for (int i = 0; i < ctx.getNSamples(); ++i) {
if (onlysingletons && index_singleton != i) {
continue;
}
final Genotype genotype = ctx.getGenotype(i);
final List<Object> labels = new ArrayList<>();
labels.add(genotype.getSampleName());
if (bychrom)
labels.add(ctx.getContig());
if (byID)
labels.add(ctx.hasID());
if (byType)
labels.add(ctx.getType().name());
if (byGenotypeType)
labels.add(genotype.getType());
if (byFilter)
labels.add(ctx.isFiltered());
if (byGFilter)
labels.add(genotype.isFiltered());
if (minGenotypeQuality >= 0) {
labels.add(genotype.hasGQ() && genotype.getGQ() >= this.minGenotypeQuality ? "." : "LOWQUAL");
}
if (byImpact) {
AnnPredictionParser.Impact impact = null;
for (final AnnPredictionParser.AnnPrediction pred : super.annParser.getPredictions(ctx)) {
// see http://stackoverflow.com/questions/41678374/
final Predicate<Allele> afilter = new Predicate<Allele>() {
@Override
public boolean test(final Allele A) {
return A.getDisplayString().equals(pred.getAllele());
}
};
if (genotype.getAlleles().stream().filter(afilter).findAny().isPresent() == false)
continue;
final AnnPredictionParser.Impact currImpact = pred.getPutativeImpact();
if (impact != null && currImpact.compareTo(impact) < 0)
continue;
impact = currImpact;
}
if (byImpact)
labels.add(impact == null ? "." : impact.name());
}
final Category cat = new Category(labels);
Long n = counts.get(cat);
counts.put(cat, n == null ? 1L : n + 1);
}
}
return counts;
}
use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.
the class AbstractGroupBy method reduce.
@Override
public Map<Category, Long> reduce(final Map<Category, Long> value, final Map<Category, Long> sum) {
final Map<Category, Long> newmap = new HashMap<>(sum);
for (final Category cat : value.keySet()) {
final Long sv = sum.get(cat);
final Long vv = value.get(cat);
newmap.put(cat, sv == null ? vv : sv + vv);
}
return newmap;
}
use of com.github.lindenb.jvarkit.gatk.Category in project jvarkit by lindenb.
the class GroupByVariants method map.
@Override
public Map<Category, Long> map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
if (tracker == null)
return Collections.emptyMap();
final Map<Category, Long> count = new HashMap<>();
for (final VariantContext ctx : tracker.getValues(this.variants, context.getLocation())) {
final List<Object> labels = new ArrayList<>();
if (bychrom)
labels.add(ctx.getContig());
if (byID)
labels.add(ctx.hasID());
if (byType)
labels.add(ctx.getType().name());
if (byFilter)
labels.add(ctx.isFiltered());
if (minQuality >= 0) {
labels.add(ctx.hasLog10PError() && ctx.getPhredScaledQual() >= this.minQuality ? "." : "LOWQUAL");
}
if (byImpact || bybiotype) {
String biotype = null;
AnnPredictionParser.Impact impact = null;
for (final AnnPredictionParser.AnnPrediction pred : super.annParser.getPredictions(ctx)) {
final AnnPredictionParser.Impact currImpact = pred.getPutativeImpact();
if (impact != null && currImpact.compareTo(impact) < 0)
continue;
impact = currImpact;
biotype = pred.getTranscriptBioType();
}
if (byImpact)
labels.add(impact == null ? "." : impact);
if (bybiotype)
labels.add(biotype == null ? "." : biotype);
}
if (bynalts)
labels.add(ctx.getAlternateAlleles().size());
if (byAffected || byCalled || bySingleton) {
int nc = 0;
int ng = 0;
int nsingles = 0;
for (int i = 0; i < ctx.getNSamples(); ++i) {
final Genotype g = ctx.getGenotype(i);
if (!(!g.isCalled() || g.isNoCall() || g.isHomRef())) {
ng++;
}
if (g.isCalled()) {
nc++;
if (!g.isHomRef()) {
nsingles++;
}
}
}
if (byCalled)
labels.add(nc < maxSamples ? nc : "GE_" + maxSamples);
if (byAffected)
labels.add(ng < maxSamples ? ng : "GE_" + maxSamples);
if (bySingleton)
labels.add(nsingles == 1 ? "SINGLETON" : ".");
}
if (byTsv) {
if (ctx.getType() == VariantContext.Type.SNP && ctx.getAlternateAlleles().size() == 1) {
boolean b = (VariantContextUtils.isTransition(ctx));
labels.add(b ? "Transition" : "Transversion");
} else {
labels.add(".");
}
}
if (byAlleleSize) {
// see http://stackoverflow.com/questions/41678374/
final Predicate<Allele> afilter = new Predicate<Allele>() {
@Override
public boolean test(final Allele a) {
return !(a.isNoCall() || a.isSymbolic());
}
};
final OptionalInt longest = ctx.getAlleles().stream().filter(afilter).mapToInt(new ToIntFunction<Allele>() {
public int applyAsInt(final Allele value) {
return value.length();
}
}).max();
labels.add(longest.isPresent() ? alleleSizeClassifier.apply(longest.getAsInt()) : "N/A");
}
if (byAlleFrequency) {
final List<Object> afs = ctx.getAttributeAsList("AF");
if (afs.isEmpty()) {
labels.add("NOT_AVAILABLE");
} else {
Double minaf = null;
for (final Object o : afs) {
final Double af;
if (o == null)
continue;
if (o instanceof Double) {
af = Double.class.cast(o);
} else {
try {
af = Double.parseDouble(String.valueOf(o));
} catch (NumberFormatException err) {
logger.warn("Not a number for AF :" + o);
continue;
}
}
if (af < 0.0)
logger.warn("AF < 0 : " + o);
if (af > 1.0)
logger.warn("AF > 1.0 : " + o);
if (minaf == null || af.compareTo(minaf) < 0) {
minaf = af;
}
}
labels.add(minaf == null ? "NOT_FOUND" : this.groupByAfClassifier.apply(minaf));
}
}
if (byDepth) {
final List<Object> depths = ctx.getAttributeAsList("DP");
if (depths.size() != 1) {
if (depths.size() > 1) {
logger.warn("Too many data for DP :" + depths);
}
labels.add("NOT_AVAILABLE");
} else {
Integer dp = null;
final Object o = depths.get(0);
if (o != null && o instanceof Integer) {
dp = Integer.class.cast(o);
} else {
try {
int i = Integer.parseInt(String.valueOf(o));
dp = i;
} catch (NumberFormatException err) {
logger.warn("Not a number for DP :" + o);
dp = null;
}
}
labels.add(this.groupByDpClassifier.apply(dp));
}
}
for (final String att : this.presence_of_attributes) {
labels.add(ctx.hasAttribute(att));
}
final Category cat = new Category(labels);
Long n = count.get(cat);
count.put(cat, n == null ? 1L : n + 1);
}
return count;
}
Aggregations