use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class VCFAnnoBam method process.
private void process(Rgn rgn, List<SamReader> samReaders) {
rgn.processed = true;
int chromStart1 = rgn.interval.getStart();
int chromEnd1 = rgn.interval.getEnd();
int[] counts = new int[chromEnd1 - chromStart1 + 1];
if (counts.length == 0)
return;
Arrays.fill(counts, 0);
for (SamReader samReader : samReaders) {
/**
* start - 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* end - 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
*/
SAMRecordIterator r = samReader.queryOverlapping(rgn.interval.getContig(), chromStart1, chromEnd1);
while (r.hasNext()) {
SAMRecord rec = r.next();
if (rec.getReadUnmappedFlag())
continue;
if (this.filter.filterOut(rec))
continue;
if (!rec.getReferenceName().equals(rgn.interval.getContig()))
continue;
Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
int refpos1 = rec.getAlignmentStart();
for (CigarElement ce : cigar.getCigarElements()) {
switch(ce.getOperator()) {
case H:
break;
case S:
break;
case I:
break;
case P:
break;
// reference skip
case N:
case // deletion in reference
D:
{
refpos1 += ce.getLength();
break;
}
case M:
case EQ:
case X:
{
for (int i = 0; i < ce.getLength() && refpos1 <= chromEnd1; ++i) {
if (refpos1 >= chromStart1 && refpos1 <= chromEnd1) {
counts[refpos1 - chromStart1]++;
}
refpos1++;
}
break;
}
default:
throw new IllegalStateException("Doesn't know how to handle cigar operator:" + ce.getOperator() + " cigar:" + cigar);
}
}
}
r.close();
}
Arrays.sort(counts);
for (int cov : counts) {
if (cov <= MIN_COVERAGE)
rgn.count_no_coverage++;
rgn.mean += cov;
}
rgn.mean /= counts.length;
rgn.min = counts[0];
rgn.max = counts[counts.length - 1];
rgn.percent_covered = (int) (((counts.length - rgn.count_no_coverage) / (double) counts.length) * 100.0);
rgn.processed = true;
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class FindAllCoverageAtPosition method scan.
private void scan(final BufferedReader in, final Set<Mutation> mutations) throws Exception {
final String DEFAULT_SAMPLE_NAME = "(undefined)";
String line;
while ((line = in.readLine()) != null) {
if (out.checkError())
break;
if (line.isEmpty() || line.startsWith("#"))
continue;
File f = new File(line);
if (!f.exists())
continue;
if (!f.isFile())
continue;
if (!f.canRead())
continue;
String filename = f.getName();
if (filename.endsWith(".cram")) {
LOG.warn("Sorry CRAM is not supported " + filename);
continue;
}
if (!filename.endsWith(".bam"))
continue;
SamReader samReader = null;
SAMRecordIterator iter = null;
try {
samReader = this.samReaderFactory.open(f);
if (!samReader.hasIndex()) {
LOG.warn("no index for " + f);
continue;
}
final SAMFileHeader header = samReader.getFileHeader();
for (final Mutation src : mutations) {
final Map<String, CigarAndBases> sample2count = new TreeMap<String, CigarAndBases>();
for (SAMReadGroupRecord rg : header.getReadGroups()) {
if (rg != null) {
String sn = this.groupBy.apply(rg);
if (sn != null && !sn.trim().isEmpty()) {
sample2count.put(sn, new CigarAndBases());
}
}
}
if (sample2count.isEmpty()) {
sample2count.put(DEFAULT_SAMPLE_NAME, new CigarAndBases());
}
final Mutation m = convertFromSamHeader(f, header, src);
if (m == null)
continue;
iter = samReader.query(m.chrom, m.pos - 1, m.pos + 1, false);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (this.filter.filterOut(rec))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
final String readString = rec.getReadString().toUpperCase();
String sampleName = DEFAULT_SAMPLE_NAME;
final SAMReadGroupRecord rg = rec.getReadGroup();
if (rg != null) {
String sn = groupBy.apply(rg);
if (!StringUtil.isBlank(sn)) {
sampleName = sn;
}
}
CigarAndBases counter = sample2count.get(sampleName);
if (counter == null) {
counter = new CigarAndBases();
sample2count.put(sampleName, counter);
}
int ref = rec.getUnclippedStart();
int readPos = 0;
for (int k = 0; k < cigar.numCigarElements() && ref < m.pos + 1; ++k) {
final CigarElement ce = cigar.getCigarElement(k);
final CigarOperator op = ce.getOperator();
switch(op) {
case P:
break;
case I:
{
if (ref == m.pos) {
counter.operators.incr(op);
counter.bases.incr(INSERTION_CHAR);
}
readPos += ce.getLength();
break;
}
case D:
case N:
case M:
case X:
case EQ:
case H:
case S:
{
for (int i = 0; i < ce.getLength(); ++i) {
if (ref == m.pos) {
counter.operators.incr(op);
switch(op) {
case M:
case X:
case EQ:
counter.bases.incr(readString.charAt(readPos));
break;
case D:
case N:
counter.bases.incr(DELETION_CHAR);
break;
default:
break;
}
break;
}
if (op.consumesReadBases())
++readPos;
ref++;
}
break;
}
default:
throw new RuntimeException("unknown operator:" + op);
}
}
}
iter.close();
iter = null;
for (final String sample : sample2count.keySet()) {
final CigarAndBases counter = sample2count.get(sample);
out.print(f);
out.print('\t');
out.print(m.chrom);
out.print('\t');
out.print(m.pos);
if (this.indexedFastaSequenceFile != null) {
out.print('\t');
out.print(getReferenceAt(m.chrom, m.pos));
}
out.print('\t');
out.print(sample);
out.print('\t');
out.print(counter.operators.count(CigarOperator.M) + counter.operators.count(CigarOperator.EQ) + counter.operators.count(CigarOperator.X));
for (final CigarOperator op : CigarOperator.values()) {
out.print('\t');
out.print(counter.operators.count(op));
}
for (char c : BASES_To_PRINT) {
out.print('\t');
out.print(counter.bases.count(c));
}
out.println();
}
}
// end of loop over mutations
} catch (final Exception err) {
LOG.error(err);
throw err;
} finally {
CloserUtil.close(iter);
CloserUtil.close(samReader);
}
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class FixVcfMissingGenotypes method fetchDP.
/**
* return DP at given position
*/
private int fetchDP(final VariantContext ctx, final String sample, List<SamReader> samReaders) {
int depth = 0;
if (samReaders == null)
samReaders = Collections.emptyList();
for (final SamReader sr : samReaders) {
final SAMRecordIterator iter = sr.query(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (filter.filterOut(rec))
continue;
final SAMReadGroupRecord rg = rec.getReadGroup();
if (!sample.equals(rg.getSample()))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
int refPos = rec.getAlignmentStart();
for (final CigarElement ce : cigar.getCigarElements()) {
if (refPos > ctx.getEnd())
break;
if (!ce.getOperator().consumesReferenceBases())
continue;
if (ce.getOperator().consumesReadBases()) {
for (int n = 0; n < ce.getLength(); ++n) {
if (refPos + n < ctx.getStart())
continue;
if (refPos + n > ctx.getEnd())
break;
depth++;
}
}
refPos += ce.getLength();
}
}
iter.close();
}
depth /= (1 + ctx.getEnd() - ctx.getStart());
return depth;
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class GBrowserHtml method doWork.
@Override
public int doWork(List<String> args) {
final int DEFAULT_EXTEND_INTERVAL = 0;
SamReader samReader = null;
ZipOutputStream zout = null;
BufferedReader bufReader = null;
PrintWriter paramsWriter = null;
JsonWriter paramsJsonWriter = null;
String line;
IndexedFastaSequenceFile faidx = null;
long snapshot_id = 0L;
final String inputName = oneFileOrNull(args);
try {
final SamJsonWriterFactory samJsonWriterFactory = SamJsonWriterFactory.newInstance().printHeader(true).printAttributes(false).printMate(true).printReadQualities(false).closeStreamAtEnd(false);
if (this.outputFile != null) {
if (!this.outputFile.getName().endsWith(".zip")) {
LOG.error("Output file should end with *.zip");
return -1;
}
}
bufReader = (inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName));
final File tmpFile1 = File.createTempFile("gbrowse.", ".tmp");
tmpFile1.deleteOnExit();
final File tmpFile2 = File.createTempFile("gbrowse.", ".tmp");
tmpFile2.deleteOnExit();
paramsWriter = new PrintWriter(tmpFile2);
paramsWriter.print("var config=");
paramsWriter.flush();
paramsJsonWriter = new JsonWriter(paramsWriter);
paramsJsonWriter.beginArray();
zout = new ZipOutputStream(super.openFileOrStdoutAsStream(this.outputFile));
File bamFile = null;
File faidxFile = null;
String sampleName = null;
int extend_interval = DEFAULT_EXTEND_INTERVAL;
String title = null;
Interval interval = null;
while ((line = bufReader.readLine()) != null) {
LOG.info(line);
if (line.isEmpty() || line.startsWith("#"))
continue;
final int eq = line.indexOf("=");
final String key = (eq == -1 ? "" : line.substring(0, eq).toLowerCase().trim());
final String value = (eq == -1 ? "" : line.substring(eq + 1).trim());
if (key.equals("bam")) {
if (samReader != null)
samReader.close();
samReader = null;
bamFile = (value.isEmpty() ? null : new File(value));
} else if (key.equals("sample")) {
sampleName = (value.isEmpty() ? null : value);
} else if (key.equals("title")) {
title = (value.isEmpty() ? null : value);
} else if (key.equals("ref") || key.equals("fasta")) {
if (faidx != null)
faidx.close();
faidx = null;
faidxFile = (value.isEmpty() ? null : new File(value));
} else if (key.equals("extend")) {
extend_interval = (value.isEmpty() ? DEFAULT_EXTEND_INTERVAL : Integer.parseInt(value));
} else if (key.equals("position") || key.equals("location") || key.equals("interval") || key.equals("goto")) {
Pattern pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)");
Matcher matcher = pat1.matcher(value);
if (matcher.matches()) {
String c = matcher.group(1);
int pos = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
interval = new Interval(c, Math.max(1, pos - extend_interval), pos + extend_interval);
continue;
}
pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\-([\\d,]+)");
matcher = pat1.matcher(value);
if (matcher.matches()) {
String c = matcher.group(1);
int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
int E = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
if (B > E) {
LOG.error("bad interval :" + line);
return -1;
}
interval = new Interval(c, Math.max(1, B - extend_interval), E + extend_interval);
continue;
}
pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\+([\\d,]+)");
matcher = pat1.matcher(value);
if (matcher.matches()) {
String c = matcher.group(1);
int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
int x = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
interval = new Interval(c, Math.max(1, B - (x + extend_interval)), B + (x + extend_interval));
continue;
}
LOG.error("bad interval :" + line);
return -1;
} else if (line.toLowerCase().equals("snapshot")) {
if (interval == null) {
LOG.error("No interval defined!");
continue;
}
if (bamFile == null) {
LOG.error("No BAM file defined!");
continue;
}
++snapshot_id;
LOG.info("open samFile " + bamFile);
if (samReader == null) {
samReader = super.createSamReaderFactory().open(bamFile);
}
FileWriter jsonFileWriter = new FileWriter(tmpFile1);
JsonWriter jsw = new JsonWriter(jsonFileWriter);
jsw.beginObject();
jsw.name("interval");
jsw.beginObject();
jsw.name("contig");
jsw.value(interval.getContig());
jsw.name("start");
jsw.value(interval.getStart());
jsw.name("end");
jsw.value(interval.getEnd());
jsw.endObject();
if (faidxFile != null) {
if (faidx == null) {
faidx = new IndexedFastaSequenceFile(faidxFile);
}
ReferenceSequence dna = faidx.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
jsw.name("reference");
jsw.value(dna.getBaseString());
}
jsw.name("sam");
SAMFileWriter samFileWriter = samJsonWriterFactory.open(samReader.getFileHeader(), jsw);
SAMRecordIterator samRecIter = samReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
while (samRecIter.hasNext()) {
final SAMRecord rec = samRecIter.next();
if (sampleName != null && !sampleName.isEmpty()) {
SAMReadGroupRecord srg = rec.getReadGroup();
if (srg == null)
continue;
if (!sampleName.equals(srg.getSample()))
continue;
}
if (rec.getReadUnmappedFlag())
continue;
samFileWriter.addAlignment(rec);
}
samRecIter.close();
samFileWriter.close();
jsw.endObject();
jsw.flush();
jsw.close();
jsonFileWriter.close();
ZipEntry entry = new ZipEntry(this.prefix + "_snapshot." + String.format("%05d", snapshot_id) + ".json");
zout.putNextEntry(entry);
IOUtils.copyTo(tmpFile1, zout);
zout.closeEntry();
tmpFile1.delete();
paramsJsonWriter.beginObject();
paramsJsonWriter.name("title");
paramsJsonWriter.value(title == null ? interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + (sampleName == null ? "" : " " + sampleName) : title);
paramsJsonWriter.name("interval");
paramsJsonWriter.beginObject();
paramsJsonWriter.name("contig");
paramsJsonWriter.value(interval.getContig());
paramsJsonWriter.name("start");
paramsJsonWriter.value(interval.getStart());
paramsJsonWriter.name("end");
paramsJsonWriter.value(interval.getEnd());
paramsJsonWriter.endObject();
if (sampleName != null && !sampleName.isEmpty()) {
paramsJsonWriter.name("sample");
paramsJsonWriter.value(sampleName);
}
paramsJsonWriter.name("href");
paramsJsonWriter.value("_snapshot." + String.format("%05d", snapshot_id) + ".json");
paramsJsonWriter.endObject();
} else if (line.toLowerCase().equals("exit") || line.toLowerCase().equals("quit")) {
break;
}
}
bufReader.close();
bufReader = null;
for (final String jssrc : new String[] { "gbrowse", "hershey", "samtools", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
InputStream is = this.getClass().getResourceAsStream("/META-INF/js/" + jssrc + ".js");
if (is == null) {
LOG.error("Cannot read resource /META-INF/js/" + jssrc + ".js");
return -1;
}
ZipEntry entry = new ZipEntry(this.prefix + jssrc + ".js");
entry.setComment("JAVASCRIPT SOURCE " + jssrc);
zout.putNextEntry(entry);
IOUtils.copyTo(is, zout);
CloserUtil.close(is);
zout.closeEntry();
}
// save params.js
paramsJsonWriter.endArray();
paramsJsonWriter.flush();
paramsWriter.println(";");
paramsWriter.flush();
paramsJsonWriter.close();
paramsWriter.close();
zout.putNextEntry(new ZipEntry(this.prefix + "config.js"));
IOUtils.copyTo(tmpFile2, zout);
zout.closeEntry();
tmpFile2.delete();
// save index.html
zout.putNextEntry(new ZipEntry(this.prefix + "index.html"));
XMLOutputFactory xof = XMLOutputFactory.newFactory();
XMLStreamWriter w = xof.createXMLStreamWriter(zout, "UTF-8");
w.writeStartElement("html");
w.writeStartElement("head");
w.writeEmptyElement("meta");
w.writeAttribute("http-equiv", "Content-Type");
w.writeAttribute("content", "text/html; charset=utf-8");
w.writeEmptyElement("meta");
w.writeAttribute("http-equiv", "author");
w.writeAttribute("content", "Pierre Lindenbaum Phd @yokofakun");
w.writeStartElement("title");
w.writeCharacters(getProgramName() + ":" + getVersion());
// title
w.writeEndElement();
w.writeStartElement("style");
w.writeAttribute("type", "text/css");
w.writeCharacters("body { color:rgb(50,50,50); margin:20px; padding:20px; font: 12pt Arial, Helvetica, sans-serif; }\n");
w.writeCharacters("label { text-align:right; }\n");
w.writeCharacters("button { border: 1px solid; background-image:-moz-linear-gradient( top, gray, lightgray ); }\n");
w.writeCharacters("canvas { image-rendering:auto;}\n");
w.writeCharacters(".me { padding-top:100px; font-size:80%; }\n");
// style
w.writeEndElement();
for (final String src : new String[] { "samtools", "gbrowse", "hershey", "config", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
w.writeStartElement("script");
w.writeAttribute("type", "text/javascript");
w.writeAttribute("language", "text/javascript");
w.writeAttribute("src", src + ".js");
w.writeCharacters("");
// script
w.writeEndElement();
}
// head
w.writeEndElement();
w.writeStartElement("body");
w.writeStartElement("div");
w.writeStartElement("button");
w.writeAttribute("onclick", "changemenu(-1)");
w.writeCharacters("prev");
// button
w.writeEndElement();
w.writeStartElement("select");
w.writeAttribute("id", "menu");
// menu
w.writeEndElement();
w.writeStartElement("button");
w.writeAttribute("onclick", "changemenu(+1)");
w.writeCharacters("next");
// button
w.writeEndElement();
// div
w.writeEndElement();
w.writeStartElement("div");
w.writeAttribute("id", "flags");
// div
w.writeEndElement();
w.writeStartElement("div");
w.writeAttribute("style", "text-align:center;");
w.writeStartElement("div");
w.writeAttribute("style", "font-size:200%;margin:10px;");
w.writeAttribute("id", "browserTitle");
// div
w.writeEndElement();
w.writeStartElement("div");
w.writeEmptyElement("canvas");
w.writeAttribute("id", "canvasdoc");
w.writeAttribute("width", "100");
w.writeAttribute("height", "100");
// div
w.writeEndElement();
// div
w.writeEndElement();
w.writeEmptyElement("hr");
w.writeStartElement("div");
w.writeAttribute("class", "me");
w.writeCharacters("Pierre Lindenbaum PhD. ");
w.writeStartElement("a");
w.writeAttribute("href", "https://github.com/lindenb/jvarkit");
w.writeCharacters("https://github.com/lindenb/jvarkit");
w.writeEndElement();
w.writeCharacters(". Tested with Firefox 45.0");
w.writeEndElement();
// body
w.writeEndElement();
// html
w.writeEndElement();
w.flush();
w.close();
w = null;
zout.closeEntry();
zout.finish();
zout.close();
return RETURN_OK;
} catch (Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(paramsJsonWriter);
CloserUtil.close(paramsWriter);
CloserUtil.close(zout);
CloserUtil.close(bufReader);
CloserUtil.close(samReader);
CloserUtil.close(faidx);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class BWAMemDigest method doWork.
@Override
public int doWork(List<String> args) {
IntervalTreeMap<Boolean> ignore = null;
DefaultMemOuput output = new DefaultMemOuput();
final float limitcigar = 0.15f;
SamReader r = null;
try {
r = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = r.getFileHeader();
if (IGNORE_BED != null) {
LOG.info("open " + IGNORE_BED);
ignore = new IntervalTreeMap<>();
BufferedReader in = IOUtils.openFileForBufferedReading(IGNORE_BED);
String line;
while ((line = in.readLine()) != null) {
if (line.isEmpty() || line.startsWith("#"))
continue;
String[] tokens = line.split("[\t]");
if (tokens.length < 3)
continue;
if (ignore.put(new Interval(tokens[0], Math.max(1, Integer.parseInt(tokens[1]) - (1 + IGNORE_EXTEND)), Integer.parseInt(tokens[2]) + IGNORE_EXTEND), Boolean.TRUE)) {
LOG.warn("BED:ignoring " + line);
}
}
in.close();
}
OtherCanonicalAlignFactory xPalignFactory = new OtherCanonicalAlignFactory(header);
SAMRecordIterator iter = r.iterator();
long readNum = 0L;
while (iter.hasNext()) {
SAMRecord record = iter.next();
++readNum;
if (!record.getReadPairedFlag())
continue;
if (record.getProperPairFlag())
continue;
if (record.getReadFailsVendorQualityCheckFlag())
continue;
if (record.getDuplicateReadFlag())
continue;
if (record.getReadUnmappedFlag())
continue;
if (ignore != null && ignore.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
LOG.info("ignore " + record);
continue;
}
float countM = 0f;
float countS = 0f;
for (CigarElement c : record.getCigar().getCigarElements()) {
switch(c.getOperator()) {
case M:
countM += c.getLength();
break;
case S:
countS += c.getLength();
break;
default:
break;
}
}
if (countM > 10 && ((countS / countM) > (1f - limitcigar) && (countS / countM) < (1f + limitcigar))) {
output.insertion(record, readNum, countS, countM);
}
for (OtherCanonicalAlign xp : xPalignFactory.getXPAligns(record)) {
if (ignore != null && ignore.containsOverlapping(new Interval(xp.getReferenceName(), xp.getAlignmentStart(), xp.getAlignmentStart()))) {
LOG.info("ignore " + record);
continue;
}
output.xp(record, readNum, xp);
}
if (record.getMateUnmappedFlag()) {
output.orphan(record, readNum);
continue;
}
if (ignore != null && ignore.containsOverlapping(new Interval(record.getMateReferenceName(), record.getMateAlignmentStart(), record.getMateAlignmentStart()))) {
LOG.info("ignore " + record);
continue;
}
if (record.getReferenceIndex() == record.getMateReferenceIndex()) {
output.deletion(record, readNum);
} else {
output.transloc(record, readNum);
}
}
} catch (Exception e) {
e.printStackTrace();
LOG.error(e);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(output);
}
return 0;
}
Aggregations