use of org.broadinstitute.gatk.engine.filters.MalformedReadFilter in project jvarkit by lindenb.
the class SoftClipAnnotator method map.
public Integer map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
if (tracker == null)
return 0;
final Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
if (VCs.isEmpty()) {
return 0;
}
final Collection<ReadFilter> readFilters = super.getToolkit().getFilters();
for (final VariantContext ctx : VCs) {
int observed_genotypes = 0;
int num_some_clipped_genotypes = 0;
final List<Genotype> genotypes = new ArrayList<>(ctx.getNSamples());
for (int i = 0; i < ctx.getNSamples(); ++i) {
final Genotype g = ctx.getGenotype(i);
if (!g.isCalled() || g.isNoCall() || g.isHomRef()) {
genotypes.add(g);
continue;
}
final List<SamReader> bamReaderForSample = this.sample2bam.get(g.getSampleName());
if (bamReaderForSample.isEmpty()) {
genotypes.add(g);
continue;
}
observed_genotypes++;
int numberOfClipsOverlapping = 0;
for (final SamReader samReader : bamReaderForSample) {
final SAMRecordIterator iter = samReader.query(ctx.getContig(), Math.max(0, ctx.getStart() - extend4clip), ctx.getEnd() + extend4clip, false);
while (iter.hasNext()) {
final SAMRecord samRecord = iter.next();
if (samRecord.getReadUnmappedFlag() || samRecord.getCigar() == null || !samRecord.getContig().equals(ctx.getContig()) || samRecord.getUnclippedEnd() < ctx.getStart() || samRecord.getUnclippedStart() > ctx.getEnd() || samRecord.getReadGroup() == null || !g.getSampleName().equals(samRecord.getReadGroup().getSample()))
continue;
boolean ok = true;
for (final ReadFilter readFilter : readFilters) {
// this one cannot't be correctly initialized...
if (readFilter instanceof MalformedReadFilter)
continue;
if (readFilter.filterOut(samRecord)) {
ok = false;
break;
}
}
if (!ok)
continue;
int refPos = samRecord.getUnclippedStart();
for (final CigarElement ce : samRecord.getCigar()) {
if (ce.getOperator().consumesReferenceBases() || ce.getOperator().isClipping()) {
final int refEnd = refPos + ce.getLength();
if (!(ctx.getEnd() < refPos || refEnd < ctx.getStart())) {
// System.err.println("IN "+ce+" "+ctx.getStart()+"-"+ctx.getEnd()+" : "+refPos+"-"+refPos+ce.getLength());
if (ce.getOperator().equals(CigarOperator.S)) {
++numberOfClipsOverlapping;
}
}
refPos += ce.getLength();
}
if (refPos > ctx.getEnd())
break;
}
}
iter.close();
}
if (numberOfClipsOverlapping == 0) {
genotypes.add(g);
} else {
GenotypeBuilder gb = new GenotypeBuilder(g);
gb.attribute(numClipInGenotypeFormatHeaderLine.getID(), numberOfClipsOverlapping);
genotypes.add(gb.make());
num_some_clipped_genotypes++;
}
}
if (num_some_clipped_genotypes == 0) {
vcfWriter.add(ctx);
} else {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.genotypes(genotypes);
if (observed_genotypes == num_some_clipped_genotypes) {
vcb.filter(this.filterHaveClipInVariant.getID());
}
vcfWriter.add(vcb.make());
}
}
return VCs.size();
}
Aggregations