use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class SvToSVG method doWork.
@Override
public int doWork(final List<String> args) {
/* parse interval */
if (this.intervalStrList.isEmpty()) {
LOG.error("interval(s) undefined");
return -1;
}
this.drawinAreaWidth = Math.max(100, this.drawinAreaWidth);
try {
if (this.fastaFile != null) {
this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.fastaFile);
}
if (this.vcfFile != null) {
this.vcfFileReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
}
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
final DocumentBuilder db = dbf.newDocumentBuilder();
this.document = db.newDocument();
final List<Path> bamFiles = IOUtils.unrollPaths(args);
if (bamFiles.isEmpty()) {
LOG.error("bam(s) undefined");
return -1;
}
final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.fastaFile);
/* loop over each bam file */
for (final Path bamFile : bamFiles) {
try (final SamReader sr = srf.open(bamFile)) {
final SAMFileHeader header = sr.getFileHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final Function<String, Optional<SimpleInterval>> parser = IntervalParserFactory.newInstance().dictionary(dict).make();
final Set<String> samples = header.getReadGroups().stream().map(G -> G.getSample()).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet());
if (samples.size() != 1) {
LOG.error("expected on sample in " + bamFile + " but got " + samples.size() + " " + samples);
return -1;
}
final Sample sample = new Sample(samples.iterator().next());
this.sampleList.add(sample);
/* loop over each region for that sample */
for (final String intervalStr : this.intervalStrList) {
LOG.info("scanning " + intervalStr + " for " + bamFile);
final SimpleInterval interval = parser.apply(intervalStr).orElseThrow(IntervalParserFactory.exception(intervalStr));
if (interval == null) {
LOG.error("Cannot parse " + intervalStr + " for " + bamFile);
return -1;
}
/* create new region */
final Sample.Region region = sample.new Region(interval);
if (!sample.regions.isEmpty() && region.getLengthOnReference() != sample.regions.get(0).getLengthOnReference()) {
LOG.warning("intervals should all have the same length");
}
sample.regions.add(region);
try (CloseableIterator<SAMRecord> iter = sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false)) {
while (iter.hasNext()) {
final SAMRecord record = iter.next();
boolean trace = record.getReadName().equals(DEBUG_READ);
if (!SAMRecordDefaultFilter.accept(record, this.min_mapq))
continue;
if (trace)
LOG.debug("got1");
final Cigar cigar = record.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
if (trace)
LOG.debug("got2");
if (remove_simple_reads) {
if ((!cigar.isClipped() && SAMUtils.getOtherCanonicalAlignments(record).isEmpty() && (record.getReadPairedFlag() && record.getProperPairFlag())))
continue;
}
if (!record.getContig().equals(interval.getContig()))
continue;
if (trace)
LOG.debug("got3");
final Sample.ShortRead shortRead = sample.new DefaultShortRead(region, record);
if (shortRead.getEnd() < interval.getStart())
continue;
if (shortRead.getStart() > interval.getEnd())
continue;
if (trace)
LOG.debug("got4" + record.getSAMString());
if (!sample.sampleName.equals(shortRead.getSampleName()))
continue;
region.beforePileup.add(shortRead);
}
}
/* en iterator */
}
/* end loop interval */
}
/* end open SAM */
}
this.sampleList.forEach(S -> S.regions.forEach(R -> R.addSplitReads()));
this.sampleList.forEach(S -> S.regions.forEach(R -> R.pileup()));
/* sort per sample */
this.sampleList.sort((A, B) -> A.sampleName.compareTo(B.sampleName));
/* sort per region */
this.sampleList.stream().forEach(SL -> SL.regions.sort((A, B) -> {
final SmartComparator sc = new SmartComparator();
int i = sc.compare(A.getContig(), B.getContig());
if (i != 0)
return i;
return A.getStart() - B.getStart();
}));
buildDocument();
final Transformer tr = TransformerFactory.newInstance().newTransformer();
final Result result;
if (this.outputFile != null) {
result = new StreamResult(this.outputFile);
} else {
result = new StreamResult(stdout());
}
tr.transform(new DOMSource(this.document), result);
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(this.vcfFileReader);
this.document = null;
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class WesCnvSvg method doWork.
@Override
public int doWork(final List<String> args) {
XMLStreamWriter w = null;
BufferedReader r = null;
OutputStream fout = null;
VCFReader vcfReader = null;
try {
this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
final IntervalExtender extender = IntervalExtender.of(refDict, this.extendWhat);
if (extender.isShriking()) {
LOG.error("extend factor <1.0");
return -1;
}
final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(refDict);
final ContigDictComparator contigDictCompare = new ContigDictComparator(refDict);
final List<CaptureInterval> userIntervals = this.intervalListProvider.stream().map(loc -> contigNameConverter.convertToSimpleInterval(loc).<RuntimeException>orElseThrow(() -> new RuntimeException(new JvarkitException.ContigNotFoundInDictionary(loc.getContig(), refDict)))).map(extender).collect(HtsCollectors.mergeIntervals()).map(L -> new CaptureInterval(L)).sorted(contigDictCompare.createLocatableComparator()).collect(Collectors.toCollection(ArrayList::new));
if (userIntervals.isEmpty()) {
LOG.error("no interval or bed defined");
return -1;
}
this.countBasesToBeDisplayed = userIntervals.stream().mapToInt(R -> R.getLengthOnReference()).sum();
if (this.countBasesToBeDisplayed < 1) {
LOG.error("Nothing to display. BED count==0");
return -1;
} else {
double x1 = 0;
for (int i = 0; i < userIntervals.size(); ++i) {
final CaptureInterval ci = userIntervals.get(i);
ci.pixelx = x1;
x1 += ci.getPixelWidth();
}
}
/* distinct ordered contigs */
final List<String> distinctContigs = userIntervals.stream().map(R -> R.getContig()).collect(Collectors.toSet()).stream().sorted(contigDictCompare).collect(Collectors.toList());
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidx);
for (final Path bamFile : IOUtils.unrollPaths(args)) {
final BamInput bi = new BamInput();
bi.bamPath = bamFile;
try (SamReader samReader = srf.open(bamFile)) {
final SAMSequenceDictionary samDict = SequenceDictionaryUtils.extractRequired(samReader.getFileHeader());
if (!SequenceUtil.areSequenceDictionariesEqual(refDict, samDict)) {
throw new JvarkitException.DictionariesAreNotTheSame(refDict, samDict);
}
bi.sample = samReader.getFileHeader().getReadGroups().stream().map(V -> V.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bamFile));
final CoverageFactory covFactory = new CoverageFactory().setMappingQuality(this.minMappingQuality);
for (final String contig : distinctContigs) {
final List<CaptureInterval> contig_intervals = userIntervals.stream().filter(R -> R.getContig().equals(contig)).collect(Collectors.toList());
final CoverageFactory.SimpleCoverage coverage = covFactory.getSimpleCoverage(samReader, contig_intervals, null);
for (CaptureInterval rgn : contig_intervals) {
final double[] array = coverage.getSubCoverage(rgn).scale(this.scaleType, (int) rgn.getPixelWidth());
bi.coverages.add(array);
}
}
}
this.bamInputs.add(bi);
}
if (this.bamInputs.isEmpty()) {
LOG.error("no bam input");
return -1;
}
if (this.vcfFile != null) {
vcfReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
final SAMSequenceDictionary vcfDict = vcfReader.getHeader().getSequenceDictionary();
if (vcfDict != null)
SequenceUtil.assertSequenceDictionariesEqual(refDict, vcfDict);
}
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
if (this.outputFile == null) {
w = xof.createXMLStreamWriter(stdout(), "UTF-8");
} else {
fout = Files.newOutputStream(this.outputFile);
w = xof.createXMLStreamWriter(fout, "UTF-8");
}
final Function<List<Point2D.Double>, String> points2str = (L) -> L.stream().map(S -> format(S.getX()) + "," + format(S.getY())).collect(Collectors.joining(" "));
final Consumer<List<Point2D.Double>> simplifyPoints = (L) -> {
for (int z = 0; z + 1 < L.size(); ++z) {
if (L.get(z).getY() == L.get(z + 1).getY()) {
L.get(z).x = L.get(z + 1).x;
L.remove(z + 1);
}
}
};
w.writeStartDocument("UTF-8", "1.0");
final Dimension dim = new Dimension(this.drawinAreaWidth, 0);
final int bed_header_height = 20;
dim.height += bed_header_height;
dim.height += (int) this.bamInputs.stream().mapToDouble(B -> B.getPixelHeight()).sum();
LOG.debug("drawing area: " + dim);
w.writeStartElement("svg");
w.writeAttribute("width", String.valueOf(dim.width));
w.writeAttribute("height", String.valueOf(dim.height));
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK.NS);
// https://stackoverflow.com/questions/15717970
w.writeStartElement("style");
if (this.cssFile != null) {
w.writeCharacters(IOUtil.slurp(this.cssFile));
} else {
w.writeCharacters("g.maing {stroke:black;stroke-width:0.5px;fill:whitesmoke;font-size:10pt;}\n" + "text.sampleLabel {stroke:none;stroke-width:0.5px;fill:blue;}" + "text.captureLabel {stroke:none;stroke-width:0.5px;fill:slategrey;text-anchor:middle;}" + "polygon.area {stroke:darkgray;stroke-width:0.5px;fill:url('#grad01');}" + "line.linedp {stroke:darkcyan;stroke-width:0.3px;opacity:0.4;}" + "text.linedp {fill-opacity:0.6;font-size:7px;stroke:none;stroke-width:0.5px;fill:darkcyan;}" + "rect.sampleFrame { fill:none;stroke:slategray;stroke-width:0.3px;}" + "rect.clickRgn {fill:none;stroke:none;pointer-events:all;}" + "polyline.gc {stroke:lightcoral;stroke-width:0.3px;fill:none;}" + "polyline.clipping {stroke:orange;stroke-width:0.8px;fill:none;}" + "circle.ar {fill:orange;stroke:none;}" + "circle.aa {fill:red;stroke:none;}" + "circle.rr {fill:green;stroke:none;}");
}
// style
w.writeEndElement();
w.writeStartElement("title");
w.writeCharacters(this.domSvgTitle);
w.writeEndElement();
w.writeStartElement("defs");
// alleles
final double genotype_radius = Double.parseDouble(this.dynaParams.getOrDefault("gt.radius", "1.5"));
w.writeEmptyElement("circle");
w.writeAttribute("r", format(genotype_radius));
w.writeAttribute("id", "rr");
w.writeAttribute("class", "rr");
w.writeEmptyElement("circle");
w.writeAttribute("r", format(genotype_radius));
w.writeAttribute("id", "ar");
w.writeAttribute("class", "ar");
w.writeEmptyElement("circle");
w.writeAttribute("r", format(genotype_radius));
w.writeAttribute("id", "aa");
w.writeAttribute("class", "aa");
// gradient
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:lightgray;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:gray;stop-opacity:1;");
w.writeEndElement();
// gc percent
for (final CaptureInterval ci : userIntervals) {
final GenomicSequence genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, ci.getContig());
final int gc_percent_width = (int) ci.getPixelWidth();
final List<Point2D.Double> points = new ArrayList<>(gc_percent_width);
for (int x = 0; x < gc_percent_width; ++x) {
int pos1 = ci.getStart() + (int) (((x + 0) / ci.getPixelWidth()) * ci.getLengthOnReference());
int pos2 = ci.getStart() + (int) (((x + 1) / ci.getPixelWidth()) * ci.getLengthOnReference());
double gc_percent = getGcPercent(genomicSequence, pos1, pos2);
double y = this.sampleTrackHeight - this.sampleTrackHeight * gc_percent;
points.add(new Point2D.Double(x, y));
}
simplifyPoints.accept(points);
w.writeStartElement("polyline");
w.writeAttribute("class", "gc");
w.writeAttribute("id", "z" + ci.getId());
w.writeAttribute("points", points2str.apply(points));
w.writeStartElement("title");
w.writeCharacters("GC %");
w.writeEndElement();
w.writeEndElement();
}
// defs
w.writeEndElement();
w.writeStartElement("script");
final StringBuilder openBrowserFunction = new StringBuilder("function openGenomeBrowser(contig,chromStart,chromEnd) {\n");
if (!this.hyperlinkType.isEmpty()) {
openBrowserFunction.append("var url=\"" + this.hyperlinkType.getPattern() + "\".replace(/__CHROM__/g,contig).replace(/__START__/g,chromStart).replace(/__END__/g,chromEnd);\n");
openBrowserFunction.append("window.open(url,'_blank');\n");
} else {
// nothing
}
openBrowserFunction.append("}\n");
w.writeCData(openBrowserFunction.toString() + "function clicked(evt,contig,chromStart,chromEnd){\n" + " var e = evt.target;\n" + " var dim = e.getBoundingClientRect();\n" + " var x = 1.0 * evt.clientX - dim.left;\n" + " var cLen = 1.0* (chromEnd - chromStart); if(cLen<1) cLen=1.0;\n" + " var pos1 = chromStart + parseInt(((x+0)/dim.width)*cLen);\n" + " var pos2 = chromStart + parseInt(((x+1)/dim.width)*cLen);\n" + " openGenomeBrowser(contig,pos1,pos2);\n" + "}\n");
// script
w.writeEndElement();
w.writeStartElement("g");
w.writeAttribute("class", "maing");
int y = 0;
w.writeStartElement("g");
w.writeComment("interval background");
for (final CaptureInterval ci : userIntervals) {
w.writeStartElement("text");
w.writeAttribute("class", "captureLabel");
w.writeAttribute("textLength", String.valueOf(ci.getPixelWidth() * 0.8));
w.writeAttribute("lengthAdjust", "spacing");
w.writeAttribute("x", String.valueOf(ci.getPixelX1() + ci.getPixelWidth() / 2.0));
w.writeAttribute("y", String.valueOf(bed_header_height - 2));
w.writeCharacters(ci.getName());
w.writeStartElement("title");
w.writeCharacters(ci.toNiceString());
// title
w.writeEndElement();
// text
w.writeEndElement();
w.writeStartElement("rect");
w.writeAttribute("style", "stroke:black;fill:none;");
w.writeAttribute("x", String.valueOf(ci.getPixelX1()));
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
w.writeAttribute("height", String.valueOf(dim.height));
w.writeStartElement("title");
w.writeCharacters(ci.getName());
w.writeEndElement();
w.writeEndElement();
}
// interval background
w.writeEndElement();
y += bed_header_height;
for (final BamInput bi : this.bamInputs) {
w.writeComment(bi.bamPath.toString());
w.writeStartElement("g");
w.writeAttribute("transform", "translate(0," + y + ")");
if (normalize_on_median_flag) {
final double medianDepth = Math.max(1.0, Percentile.median().evaluate(bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).toArray()).orElse(1.0));
LOG.info("median" + medianDepth);
for (final double[] coverage_array : bi.coverages) {
for (int px = 0; px < coverage_array.length; px++) {
coverage_array[px] /= medianDepth;
}
}
}
final double maxDepth = bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).max().orElse(1.0);
for (int ridx = 0; ridx < userIntervals.size(); ridx++) {
final CaptureInterval ci = userIntervals.get(ridx);
final String clickedAttribute = "clicked(evt,\"" + ci.getContig() + "\"," + ci.getStart() + "," + ci.getEnd() + ")";
final double[] coverage_array = bi.coverages.get(ridx);
final double leftx = ci.getPixelX1();
w.writeStartElement("g");
w.writeAttribute("transform", "translate(" + leftx + ",0)");
final int segment_width = (int) ci.getPixelWidth();
// coverage
{
final List<Point2D.Double> points = new ArrayList<>(segment_width);
points.add(new Point2D.Double(0, bi.getPixelHeight()));
for (int px = 0; px < coverage_array.length; px++) {
final double y_avg_cov = coverage_array[px];
final double new_y = bi.getPixelHeight() - (y_avg_cov / maxDepth) * bi.getPixelHeight();
points.add(new Point2D.Double(px, new_y));
}
points.add(new Point2D.Double(ci.getPixelWidth(), bi.getPixelHeight()));
simplifyPoints.accept(points);
// close
points.add(new Point2D.Double(leftx, bi.getPixelHeight()));
w.writeEmptyElement("polygon");
w.writeAttribute("class", "area");
w.writeAttribute("title", ci.toNiceString());
// w.writeAttribute("onclick", clickedAttribute);
w.writeAttribute("points", points2str.apply(points));
}
// w.writeEndElement();//g
int depthshift = 1;
for (; ; ) {
final int numdiv = (int) (maxDepth / depthshift);
if (numdiv <= 10)
break;
depthshift *= 10;
}
int depth = depthshift;
while (depth < maxDepth) {
double new_y = bi.getPixelHeight() - (depth / maxDepth) * bi.getPixelHeight();
w.writeStartElement("text");
w.writeAttribute("class", "linedp");
w.writeAttribute("x", "1");
w.writeAttribute("y", String.valueOf(new_y + 1));
w.writeCharacters(String.valueOf(depth));
// text
w.writeEndElement();
w.writeStartElement("line");
w.writeAttribute("class", "linedp");
w.writeAttribute("stroke-dasharray", "4");
w.writeAttribute("x1", "0");
w.writeAttribute("x2", String.valueOf(ci.getPixelWidth()));
w.writeAttribute("y1", String.valueOf(new_y));
w.writeAttribute("y2", String.valueOf(new_y));
w.writeStartElement("title");
w.writeCharacters(String.valueOf(depth));
w.writeEndElement();
// line
w.writeEndElement();
depth += depthshift;
}
// polyline
w.writeEmptyElement("use");
w.writeAttribute("xlink", XLINK.NS, "href", "#z" + ci.getId());
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
// click
w.writeStartElement("rect");
w.writeAttribute("class", "clickRgn");
w.writeAttribute("onclick", clickedAttribute);
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
w.writeEndElement();
// genotype
if (vcfReader != null) {
try (CloseableIterator<VariantContext> iter = vcfReader.query(ci)) {
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
final Genotype gt = ctx.getGenotype(bi.sample);
if (gt == null)
break;
String allele_id = null;
switch(gt.getType()) {
case HET:
allele_id = "ar";
break;
case HOM_REF:
allele_id = "rr";
break;
case HOM_VAR:
allele_id = "aa";
break;
default:
allele_id = null;
break;
}
if (allele_id != null) {
w.writeEmptyElement("use");
w.writeAttribute("xlink", XLINK.NS, "href", "#" + allele_id);
w.writeAttribute("x", format(((ctx.getStart() - ci.getStart()) / (double) ci.getLengthOnReference()) * ci.getPixelWidth()));
w.writeAttribute("y", format(bi.getPixelHeight() - 2 * genotype_radius));
}
}
}
}
// g
w.writeEndElement();
}
// frame for this sample
w.writeStartElement("rect");
w.writeAttribute("class", "sampleFrame");
w.writeAttribute("x", "0");
w.writeAttribute("y", "0");
w.writeAttribute("width", String.valueOf(dim.width));
w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
// rect
w.writeEndElement();
w.writeStartElement("text");
w.writeAttribute("class", "sampleLabel");
w.writeAttribute("x", "5");
w.writeAttribute("y", "12");
w.writeStartElement("title");
w.writeCharacters(bi.bamPath.toString());
w.writeEndElement();
w.writeCharacters(bi.sample);
// text
w.writeEndElement();
// g
w.writeEndElement();
y += bi.getPixelHeight();
}
w.writeStartElement("g");
w.writeComment("interval lines");
for (int n = 0; n <= userIntervals.size(); n++) {
w.writeEmptyElement("line");
String x1 = n < userIntervals.size() ? String.valueOf(userIntervals.get(n).getPixelX1()) : String.valueOf(dim.width);
w.writeAttribute("x1", x1);
w.writeAttribute("y1", "0");
w.writeAttribute("x2", x1);
w.writeAttribute("y2", String.valueOf(dim.height));
}
// interval lines
w.writeEndElement();
// g
w.writeEndElement();
// svg
w.writeEndElement();
w.writeEndDocument();
w.flush();
w.close();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcfReader);
CloserUtil.close(w);
CloserUtil.close(fout);
CloserUtil.close(r);
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(this.bamInputs);
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class Biostar404363 method doWork.
@Override
public int doWork(final List<String> args) {
SamReader in = null;
SAMFileWriter out = null;
final IntervalTreeMap<PosToChange> intervalTreeMap = new IntervalTreeMap<>();
try {
final SamReaderFactory srf = super.createSamReaderFactory();
if (this.faidx != null) {
srf.referenceSequence(this.faidx);
writingBamArgs.setReferencePath(this.faidx);
}
final String input = oneFileOrNull(args);
if (input == null) {
in = srf.open(SamInputResource.of(stdin()));
} else {
in = srf.open(SamInputResource.of(input));
}
final SAMFileHeader header = in.getFileHeader();
final SAMProgramRecord prg = header.createProgramRecord();
prg.setCommandLine(this.getProgramCommandLine());
prg.setProgramName(this.getProgramName());
prg.setProgramVersion(this.getGitHash());
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
try (VCFReader vcfReader = VCFReaderFactory.makeDefault().open(this.vcfPath, false)) {
try (CloseableIterator<VariantContext> iter = vcfReader.iterator()) {
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
final List<Allele> alts = ctx.getAlternateAlleles();
if (alts.size() != 1) {
LOG.error("Expected one ALT allele in " + ctx);
return -1;
}
final Allele alt = alts.get(0);
if (alt.isSymbolic() || alt.length() != 1 || !AcidNucleics.isATGCN(alt)) {
LOG.error("Bad ALT allele in " + ctx);
return -1;
}
final String theContig = contigNameConverter.apply(ctx.getContig());
if (StringUtils.isBlank(theContig))
throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
final PosToChange pos2change = new PosToChange(theContig, ctx.getStart());
pos2change.base = (byte) Character.toUpperCase(alt.getBases()[0]);
if (ctx.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
pos2change.proba = ctx.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 1.0);
}
final Interval key = new Interval(pos2change);
if (intervalTreeMap.containsKey(key)) {
LOG.error("Duplicate key " + key + " for " + ctx);
return -1;
}
intervalTreeMap.put(key, pos2change);
}
}
}
JVarkitVersion.getInstance().addMetaData(this, header);
out = this.writingBamArgs.openSamWriter(this.outputFile, header, true);
final SAMRecordIterator iter = in.iterator();
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
final byte[] bases = rec.getReadBases();
if (rec.getReadUnmappedFlag() || bases == SAMRecord.NULL_SEQUENCE) {
out.addAlignment(rec);
continue;
}
final List<PosToChange> changes = intervalTreeMap.getOverlapping(new SimpleInterval(rec.getContig(), rec.getUnclippedStart(), rec.getUnclippedEnd())).stream().collect(Collectors.toList());
if (changes.isEmpty()) {
out.addAlignment(rec);
continue;
}
int NM = 0;
if (rec.hasAttribute("NM"))
NM = rec.getIntegerAttribute("NM");
int readpos = 0;
int ref = rec.getUnclippedStart();
boolean changed = false;
final Cigar cigar = rec.getCigar();
for (final CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
if ((op.equals(CigarOperator.S) && !this.ignore_clip) || (op.consumesReadBases() && op.consumesReferenceBases())) {
for (int i = 0; i < ce.getLength(); i++) {
final int the_pos = ref + i;
final byte the_base = bases[readpos + i];
final PosToChange pos2change = changes.stream().filter(P -> P.getPosition() == the_pos).filter(P -> P.base != the_base).filter(P -> Math.random() <= P.proba).findFirst().orElse(null);
if (pos2change == null)
continue;
bases[readpos + i] = pos2change.base;
if (op.isAlignment())
NM++;
changed = true;
}
}
if (op.equals(CigarOperator.S) || op.consumesReadBases()) {
readpos += ce.getLength();
}
if (op.isClipping() || op.consumesReferenceBases()) {
ref += ce.getLength();
}
}
if (changed) {
rec.setReadBases(bases);
if (!keep_original_nm)
rec.setAttribute("NM", NM);
rec.setAttribute("PG", prg.getId());
}
out.addAlignment(rec);
}
in.close();
in = null;
out.close();
out = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(out);
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class Biostar322664 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.use_bam_index && !this.input_is_not_sorted_on_queryname) {
LOG.error("Cannot use option -index without option -nm");
return -1;
}
if (this.both_pair_mode && this.input_is_not_sorted_on_queryname) {
LOG.error("Cannot use option -and without sorting on queryname");
return -1;
}
/* will be used as a SAMRecord attribute 'X'+meta_char containing the data about the variants */
final char meta_char;
if (this.meta_tag_str != null) {
if (this.meta_tag_str.length() != 1) {
LOG.error("Meta tag should be only one character, got " + meta_tag_str);
return -1;
}
meta_char = this.meta_tag_str.charAt(0);
} else {
meta_char = '\0';
}
if (this.output_all && meta_char == '\0') {
LOG.error("Cannot output all if not meta char -X defined");
return -1;
}
final IntervalTreeMap<VariantContext> variantMap = new IntervalTreeMap<>();
VCFReader vcfFileReader = null;
CloseableIterator<VariantContext> viter = null;
SamReader samReader = null;
SAMFileWriter samFileWriter = null;
try {
LOG.info("reading VCF " + this.vcfFile + " in memory...");
vcfFileReader = VCFReaderFactory.makeDefault().open(this.vcfFile, false);
viter = vcfFileReader.iterator();
viter.stream().filter(V -> V.isVariant() && V.getReference().length() == 1 && V.getAlternateAlleles().stream().anyMatch(A -> A.length() == 1)).map(V -> new VariantContextBuilder(V).attributes(Collections.emptyMap()).noID().unfiltered().noGenotypes().make()).forEach(V -> variantMap.put(new Interval(V.getContig(), V.getStart(), V.getEnd()), V));
CloserUtil.close(viter);
viter = null;
vcfFileReader.close();
vcfFileReader = null;
LOG.info("Done Reading: " + this.vcfFile);
samReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header = samReader.getFileHeader();
if (!this.input_is_not_sorted_on_queryname && header.getSortOrder() != SAMFileHeader.SortOrder.queryname) {
LOG.error("Expected SAM input to be sorted on " + SAMFileHeader.SortOrder.queryname + " but got " + header.getSortOrder() + " See the options to work without 'mate'.");
return -1;
}
samFileWriter = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
final CloseableIterator<SAMRecord> iter;
if (!use_bam_index) {
iter = samReader.iterator();
} else {
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null)
throw new JvarkitException.BamDictionaryMissing("input");
final List<QueryInterval> intervals = new ArrayList<>();
for (final VariantContext ctx : variantMap.values()) {
int tid = dict.getSequenceIndex(ctx.getContig());
if (tid < 0)
throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
intervals.add(new QueryInterval(tid, ctx.getStart(), ctx.getEnd()));
}
QueryInterval[] intervalArray = intervals.toArray(new QueryInterval[intervals.size()]);
intervalArray = QueryInterval.optimizeIntervals(intervalArray);
iter = samReader.query(intervalArray, false);
}
Iterator<List<SAMRecord>> eq_range = null;
if (this.input_is_not_sorted_on_queryname) {
eq_range = new AbstractIterator<List<SAMRecord>>() {
@Override
protected List<SAMRecord> advance() {
return iter.hasNext() ? Collections.singletonList(iter.next()) : null;
}
};
} else {
eq_range = new EqualRangeIterator<>(iter, (R1, R2) -> R1.getReadName().compareTo(R2.getReadName()));
}
while (eq_range.hasNext()) {
final List<SAMRecord> array = eq_range.next();
final boolean[] carry_variant = new boolean[array.size()];
Arrays.fill(carry_variant, false);
for (int record_index = 0; record_index < array.size(); ++record_index) {
final SAMRecord rec = array.get(record_index);
// if(debug==true) LOG.info("got read! "+rec+" array.size="+array.size());
if (rec.getReadUnmappedFlag()) {
continue;
}
final Set<String> meta_attribute = (meta_char == '\0' ? null : new HashSet<>());
final Collection<VariantContext> variants = variantMap.getOverlapping(rec);
if (variants.isEmpty()) {
continue;
}
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
final byte[] bases = rec.getReadBases();
if (bases == null || bases.length == 0 || bases == SAMRecord.NULL_SEQUENCE)
continue;
for (final VariantContext ctx : variants) {
if (ctx.getReference().length() != 1)
continue;
for (final Allele alt : ctx.getAlternateAlleles()) {
if (alt.isNoCall() || alt.isSymbolic())
continue;
if (alt.length() != 1)
continue;
final char altbase = (char) Character.toUpperCase(alt.getBases()[0]);
int ref1 = rec.getUnclippedStart();
int readpos = 0;
for (final CigarElement ce : cigar) {
if (ref1 > ctx.getEnd()) {
break;
}
final CigarOperator op = ce.getOperator();
switch(op) {
case P:
break;
case S:
{
readpos += ce.getLength();
ref1 += ce.getLength();
break;
}
case H:
{
ref1 += ce.getLength();
break;
}
case I:
readpos += ce.getLength();
break;
case D:
case N:
ref1 += ce.getLength();
break;
case M:
case EQ:
case X:
{
for (int x = 0; x < ce.getLength(); ++x) {
final int rp2 = readpos + x;
if (rp2 >= 0 && rp2 < bases.length && ref1 + x == ctx.getStart()) {
final char readbase = Character.toUpperCase((char) bases[rp2]);
if (readbase == altbase) {
// if(debug) LOG.debug(rec.getReadName()+" read["+rp2+"]="+readbase+" ref["+ctx.getStart()+"]="+altbase);
carry_variant[record_index] = true;
if (meta_attribute != null) {
meta_attribute.add(ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString() + "|" + ctx.getAlternateAlleles().stream().filter(A -> A.length() == 1).map(A -> A.getDisplayString()).collect(Collectors.joining(",")));
}
break;
}
}
}
readpos += ce.getLength();
ref1 += ce.getLength();
break;
}
default:
throw new IllegalStateException("bad cigar operator " + op);
}
}
}
// end of cigar loop
if (meta_attribute != null && !meta_attribute.isEmpty()) {
rec.setAttribute("X" + meta_char, String.join(";", meta_attribute));
}
}
// end of loop over variants
}
// end of for(Samrecord in array)
boolean any_match = false;
for (boolean m : carry_variant) {
if (m) {
any_match = true;
break;
}
}
if (any_match && this.both_pair_mode) {
// reset
any_match = false;
for (int x = 0; x + 1 < array.size() && !any_match; ++x) {
if (!carry_variant[x])
continue;
final SAMRecord r1 = array.get(x);
if (!r1.getReadPairedFlag())
continue;
for (int y = x + 1; y < array.size() && !any_match; ++y) {
if (!carry_variant[y])
continue;
// check r2 is mate of r1
final SAMRecord r2 = array.get(x);
if (!r2.getReadPairedFlag())
continue;
if (r1.getFirstOfPairFlag() && r2.getFirstOfPairFlag())
continue;
if (!r1.getFirstOfPairFlag() && !r2.getFirstOfPairFlag())
continue;
if (r1.getAlignmentStart() != r2.getMateAlignmentStart())
continue;
if (r2.getAlignmentStart() != r1.getMateAlignmentStart())
continue;
if (!r1.getReferenceName().equals(r2.getMateReferenceName()))
continue;
any_match = true;
}
}
}
if (any_match || this.output_all) {
for (final SAMRecord rec : array) samFileWriter.addAlignment(rec);
}
}
CloserUtil.close(eq_range);
iter.close();
samFileWriter.close();
samFileWriter = null;
samReader.close();
samReader = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(samReader);
CloserUtil.close(samFileWriter);
CloserUtil.close(vcfFileReader);
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class VcfGnomadExomeVsGenome method isVariantIn.
boolean isVariantIn(final VariantContext ctx, final BufferedVCFReader reader) {
final String ctg = this.ctgNameConverter.apply(ctx.getContig());
if (StringUtils.isBlank(ctg))
return false;
try (CloseableIterator<VariantContext> iterE = reader.query(new SimpleInterval(ctg, ctx.getStart(), ctx.getEnd()))) {
while (iterE.hasNext()) {
final VariantContext ctx2 = iterE.next();
if (ctx.getStart() != ctx2.getStart())
continue;
if (!ctx.getReference().equals(ctx2.getReference()))
continue;
final Set<Allele> alt2 = ctx2.getAlternateAlleles().stream().filter(A -> AcidNucleics.isATGC(A)).collect(Collectors.toSet());
if (ctx.getAlternateAlleles().stream().anyMatch(A -> alt2.contains(A)))
return true;
}
}
return false;
}
Aggregations