use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class VcfIn method scanUsingTabix.
private int scanUsingTabix(final VariantContextWriter vcw, final String databaseFile, final VcfIterator in2) {
VCFFileReader tabix = null;
try {
tabix = new VCFFileReader(new File(databaseFile), true);
final VCFHeader header1 = new VCFHeader(in2.getHeader());
this.addMetaData(header1);
vcw.writeHeader(header1);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1.getSequenceDictionary()).logger(LOG);
while (in2.hasNext() && !vcw.checkError()) {
final VariantContext userCtx = progress.watch(in2.next());
final CloseableIterator<VariantContext> iter = tabix.query(userCtx.getContig(), Math.max(1, userCtx.getStart() - 1), userCtx.getEnd() + 1);
boolean keep = false;
while (iter.hasNext()) {
final VariantContext dbctx = iter.next();
if (!sameContext(userCtx, dbctx))
continue;
if (!allUserAltFoundInDatabase(userCtx, dbctx))
continue;
keep = true;
break;
}
iter.close();
addVariant(vcw, userCtx, keep);
if (vcw.checkError())
break;
}
progress.finish();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(tabix);
CloserUtil.close(in2);
}
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class VcfTreePack method scan.
void scan(final VcfIterator iter) {
final VCFHeader header = iter.getHeader();
super.bindings.put("header", header);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
bindings.put("variant", ctx);
super.nodeFactoryChain.watch(super.rootNode, ctx);
}
progress.finish();
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class VcfLiftOver method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VariantContextWriter failed = null;
GenomicSequence genomicSequence = null;
try {
final VCFHeader inputHeader = in.getHeader();
final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
if (!(V instanceof VCFInfoHeaderLine))
return true;
final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
if (removeInfo.contains(vih.getID()))
return false;
return true;
}).collect(Collectors.toSet());
if (this.failedFile != null) {
final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
failed = super.openVariantContextWriter(failedFile);
failed.writeHeader(header2);
}
final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
out.writeHeader(header3);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = progress.watch(in.next());
if (!this.removeInfo.isEmpty()) {
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
ctx = vcb.make();
}
if (ctx.isIndel() && this.ignoreIndels) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
continue;
}
if (adaptivematch) {
double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
}
final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
if (lifted == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
} else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
} else {
boolean alleleAreValidatedVsRef = true;
// part of the code was copied from picard/liftovervcf
final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
final List<Allele> alleles = new ArrayList<Allele>();
for (final Allele oldAllele : ctx.getAlleles()) {
final Allele fixedAllele;
if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
alleles.add(oldAllele);
continue;
} else if (lifted.isPositiveStrand()) {
fixedAllele = oldAllele;
alleles.add(oldAllele);
} else {
fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
alleles.add(fixedAllele);
reverseComplementAlleleMap.put(oldAllele, fixedAllele);
}
if (this.checkAlleleSequence) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
}
final String alleleStr = fixedAllele.getBaseString();
int x = 0;
while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
alleleAreValidatedVsRef = false;
break;
}
++x;
}
if (x != alleleStr.length()) {
alleleAreValidatedVsRef = false;
break;
}
}
}
if (!alleleAreValidatedVsRef) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
continue;
}
if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
vcb.id(ctx.getID());
vcb.attributes(ctx.getAttributes());
vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
vcb.filters(ctx.getFilters());
vcb.log10PError(ctx.getLog10PError());
final GenotypesContext genotypeContext = ctx.getGenotypes();
final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
for (final Genotype genotype : genotypeContext) {
final List<Allele> fixedAlleles = new ArrayList<Allele>();
for (final Allele allele : genotype.getAlleles()) {
final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
fixedAlleles.add(fixedAllele);
}
fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
}
vcb.genotypes(fixedGenotypes);
out.add(vcb.make());
}
}
if (failed != null) {
failed.close();
failed = null;
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(failed);
}
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class BamClipToInsertion method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
LOG.error("File header missing");
return -1;
}
if (header1.getSortOrder() != SortOrder.coordinate) {
LOG.error("Input is not sorted on coordinate.");
return -1;
}
final SAMFileHeader header2 = header1.clone();
header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
header2.setSortOrder(SortOrder.unsorted);
sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
iter = sfr.iterator();
String curContig = null;
LinkedList<SamAndCigar> buffer = new LinkedList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
// ignore unmapped reads
if (rec.getReadUnmappedFlag()) {
sfw.addAlignment(rec);
continue;
}
}
if (rec == null || (curContig != null && !curContig.equals(rec.getReferenceName()))) {
for (final SamAndCigar r : buffer) sfw.addAlignment(r.getSAMRecord());
buffer.clear();
// we're done
if (rec == null)
break;
curContig = rec.getReferenceName();
}
final SamAndCigar sac = new SamAndCigar(rec);
if (!sac.containsIorS) {
sfw.addAlignment(rec);
continue;
}
buffer.add(sac);
int i = 0;
while (i < buffer.size()) {
final SamAndCigar ri = buffer.get(i);
if (ri.getSAMRecord().getUnclippedEnd() < rec.getUnclippedStart()) {
for (int j = 0; j < buffer.size(); ++j) {
if (i == j)
continue;
ri.merge(buffer.get(j));
}
sfw.addAlignment(ri.build());
buffer.remove(i);
} else {
++i;
}
}
}
progress.finish();
LOG.info("done");
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class BamTile method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
LOG.error("File header missing");
return -1;
}
if (header1.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
LOG.error("File header not sorted on coordinate");
return -1;
}
final SAMFileHeader header2 = header1.clone();
header2.addComment("BamTile:" + getVersion() + ":" + getProgramCommandLine());
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header2, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
iter = sfr.iterator();
final LinkedList<SAMRecord> buffer = new LinkedList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
if (this.filterOut.filterOut(rec))
continue;
if (!buffer.isEmpty()) {
final SAMRecord last = buffer.getLast();
if (this.no_overlap) {
if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getEnd() >= rec.getStart()) {
continue;
}
} else {
if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getAlignmentStart() <= rec.getAlignmentStart() && last.getAlignmentEnd() >= rec.getAlignmentEnd()) {
continue;
}
}
}
}
if (rec == null || (!buffer.isEmpty() && buffer.getLast().getReferenceIndex() != rec.getReferenceIndex())) {
while (!buffer.isEmpty()) {
sfw.addAlignment(buffer.removeFirst());
}
if (rec == null)
break;
}
buffer.add(rec);
if (!this.no_overlap && buffer.size() > 2) {
final int index = buffer.size();
final SAMRecord prev = buffer.get(index - 3);
final SAMRecord curr = buffer.get(index - 2);
final SAMRecord next = buffer.get(index - 1);
if (prev.getAlignmentEnd() >= next.getAlignmentStart() || curr.getAlignmentEnd() <= prev.getAlignmentEnd()) {
buffer.remove(index - 2);
} else if (curr.getAlignmentStart() == prev.getAlignmentStart() && curr.getAlignmentEnd() > prev.getAlignmentEnd()) {
buffer.remove(index - 3);
}
}
while (buffer.size() > 3) {
sfw.addAlignment(buffer.removeFirst());
}
}
progress.finish();
sfw.close();
sfw = null;
iter.close();
iter = null;
sfr.close();
sfr = null;
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
Aggregations