use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class BamStats01 method run.
private void run(final String filename, SamReader samFileReader) throws IOException {
Map<String, Histogram> sample2hist = new HashMap<String, BamStats01.Histogram>();
SAMSequenceDictionary currDict = samFileReader.getFileHeader().getSequenceDictionary();
if (samSequenceDictionary == null) {
samSequenceDictionary = currDict;
}
if (this.bedFile != null) {
if (!SequenceUtil.areSequenceDictionariesEqual(currDict, samSequenceDictionary)) {
samFileReader.close();
throw new IOException("incompatible sequence dictionaries." + filename);
}
if (intervalTreeMap == null) {
final BedLineCodec bedCodec = new BedLineCodec();
intervalTreeMap = new IntervalTreeMap<>();
LOG.info("opening " + this.bedFile);
String line;
final BufferedReader bedIn = IOUtils.openFileForBufferedReading(bedFile);
while ((line = bedIn.readLine()) != null) {
final BedLine bedLine = bedCodec.decode(line);
if (bedLine == null)
continue;
int seqIndex = currDict.getSequenceIndex(bedLine.getContig());
if (seqIndex == -1) {
throw new IOException("unknown chromosome from dict in in " + line + " " + this.bedFile);
}
intervalTreeMap.put(bedLine.toInterval(), Boolean.TRUE);
}
bedIn.close();
LOG.info("done reading " + this.bedFile);
}
}
this.chrX_index = -1;
this.chrY_index = -1;
for (final SAMSequenceRecord rec : currDict.getSequences()) {
final String chromName = rec.getSequenceName().toLowerCase();
if (chromName.equals("x") || chromName.equals("chrx")) {
this.chrX_index = rec.getSequenceIndex();
} else if (chromName.equals("y") || chromName.equals("chry")) {
this.chrY_index = rec.getSequenceIndex();
}
}
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(currDict);
final SAMRecordIterator iter = samFileReader.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progess.watch(iter.next());
String sampleName = groupBy.getPartion(rec);
if (sampleName == null || sampleName.isEmpty())
sampleName = "undefined";
Histogram hist = sample2hist.get(sampleName);
if (hist == null) {
hist = new Histogram();
sample2hist.put(sampleName, hist);
}
hist.histograms[Category2.ALL.ordinal()].watch(rec);
if (intervalTreeMap == null)
continue;
if (rec.getReadUnmappedFlag()) {
continue;
}
if (!intervalTreeMap.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
hist.histograms[Category2.OFF_TARGET.ordinal()].watch(rec);
} else {
hist.histograms[Category2.IN_TARGET.ordinal()].watch(rec);
}
}
progess.finish();
samFileReader.close();
samFileReader = null;
for (final String sampleName : sample2hist.keySet()) {
final Histogram hist = sample2hist.get(sampleName);
out.print(filename + "\t" + sampleName);
for (final Category2 cat2 : Category2.values()) {
for (// je je suis libertineuuh, je suis une cat1
final Category cat1 : // je je suis libertineuuh, je suis une cat1
Category.values()) {
out.print("\t");
out.print(hist.histograms[cat2.ordinal()].counts[cat1.ordinal()]);
}
if (intervalTreeMap == null)
break;
}
out.println();
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class BamStats02 method run.
private void run(String filename, SamReader r, PrintWriter out) {
final Counter<Category> counter = new Counter<>();
SAMRecordIterator iter = null;
try {
iter = r.iterator();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader().getSequenceDictionary());
while (iter.hasNext()) {
final SAMRecord record = progress.watch(iter.next());
final Category cat = new Category();
cat.set(STRING_PROPS.filename, filename);
cat.flag = record.getFlags();
cat.set(INT_PROPS.inTarget, -1);
final SAMReadGroupRecord g = record.getReadGroup();
if (g != null) {
cat.set(STRING_PROPS.samplename, g.getSample());
cat.set(STRING_PROPS.platform, g.getPlatform());
cat.set(STRING_PROPS.platformUnit, g.getPlatformUnit());
cat.set(STRING_PROPS.library, g.getLibrary());
}
final ShortReadName readName = ShortReadName.parse(record);
if (readName.isValid()) {
cat.set(STRING_PROPS.instrument, readName.getInstrumentName());
cat.set(STRING_PROPS.flowcell, readName.getFlowCellId());
cat.set(INT_PROPS.lane, readName.getFlowCellLane());
cat.set(INT_PROPS.run, readName.getRunId());
}
if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
cat.set(STRING_PROPS.mate_chromosome, record.getMateReferenceName());
}
if (!record.getReadUnmappedFlag()) {
cat.set(INT_PROPS.mapq, (int) (Math.ceil(record.getMappingQuality() / 10.0) * 10));
cat.set(STRING_PROPS.chromosome, record.getReferenceName());
if (this.intervals != null) {
if (this.intervals.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
cat.set(INT_PROPS.inTarget, 1);
} else {
cat.set(INT_PROPS.inTarget, 0);
}
}
}
counter.incr(cat);
}
progress.finish();
for (final Category cat : counter.keySetDecreasing()) {
cat.print(out);
out.print("\t");
out.print(counter.count(cat));
out.println();
}
out.flush();
} finally {
CloserUtil.close(iter);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class Biostar154220 method doWork.
@SuppressWarnings("resource")
private int doWork(final SamReader in) throws IOException {
SAMFileHeader header = in.getFileHeader();
if (header.getSortOrder() != SAMFileHeader.SortOrder.unsorted) {
LOG.error("input should be unsorted, reads sorted on REF/query-name e.g: see https://github.com/lindenb/jvarkit/wiki/SortSamRefName");
return -1;
}
SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("no dict !");
return -1;
}
SAMFileWriter out = null;
SAMRecordIterator iter = null;
int prev_tid = -1;
int[] depth_array = null;
try {
SAMFileHeader header2 = header.clone();
header2.addComment("Biostar154220" + " " + getVersion() + " " + getProgramCommandLine());
out = this.writingBams.openSAMFileWriter(outputFile, header2, true);
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
iter = in.iterator();
List<SAMRecord> buffer = new ArrayList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
}
if (rec != null && rec.getReadUnmappedFlag()) {
out.addAlignment(rec);
continue;
}
// no more record or
if (!buffer.isEmpty() && rec != null && buffer.get(0).getReadName().equals(rec.getReadName()) && buffer.get(0).getReferenceIndex().equals(rec.getReferenceIndex())) {
buffer.add(progress.watch(rec));
} else if (buffer.isEmpty() && rec != null) {
buffer.add(progress.watch(rec));
} else // dump buffer
{
if (!buffer.isEmpty()) {
final int tid = buffer.get(0).getReferenceIndex();
if (prev_tid == -1 || prev_tid != tid) {
SAMSequenceRecord ssr = dict.getSequence(tid);
prev_tid = tid;
depth_array = null;
System.gc();
LOG.info("Alloc memory for contig " + ssr.getSequenceName() + " N=" + ssr.getSequenceLength() + "*sizeof(int)");
// use a +1 pos
depth_array = new int[ssr.getSequenceLength() + 1];
Arrays.fill(depth_array, 0);
}
// position->coverage for this set of reads
Counter<Integer> readposition2coverage = new Counter<Integer>();
boolean dump_this_buffer = true;
for (final SAMRecord sr : buffer) {
if (!dump_this_buffer)
break;
if (this.filter.filterOut(sr))
continue;
final Cigar cigar = sr.getCigar();
if (cigar == null) {
throw new IOException("Cigar missing in " + rec.getSAMString());
}
int refPos1 = sr.getAlignmentStart();
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
if (!op.consumesReferenceBases())
continue;
if (op.consumesReadBases()) {
for (int x = 0; x < ce.getLength() && refPos1 + x < depth_array.length; ++x) {
int cov = (int) readposition2coverage.incr(refPos1 + x);
if (depth_array[refPos1 + x] + cov > this.capDepth) {
dump_this_buffer = false;
break;
}
}
}
if (!dump_this_buffer)
break;
refPos1 += ce.getLength();
}
}
if (dump_this_buffer) {
// consumme this coverage
for (Integer pos : readposition2coverage.keySet()) {
depth_array[pos] += (int) readposition2coverage.count(pos);
}
for (SAMRecord sr : buffer) {
out.addAlignment(sr);
}
}
buffer.clear();
}
if (rec == null)
break;
buffer.add(rec);
}
}
depth_array = null;
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(out);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class BamStats05 method doWork.
protected int doWork(final PrintWriter pw, final Map<String, List<Interval>> gene2interval, final String filename, final SamReader IN) throws Exception {
try {
LOG.info("Scanning " + filename);
final SAMFileHeader header = IN.getFileHeader();
final List<SAMReadGroupRecord> rgs = header.getReadGroups();
if (rgs == null || rgs.isEmpty())
throw new IOException("No read groups in " + filename);
final Set<String> groupNames = this.groupBy.getPartitions(rgs);
for (final String partition : groupNames) {
if (partition.isEmpty())
throw new IOException("Empty " + groupBy.name());
for (final String gene : gene2interval.keySet()) {
int geneStart = Integer.MAX_VALUE;
int geneEnd = 0;
final List<Integer> counts = new ArrayList<>();
for (final Interval interval : gene2interval.get(gene)) {
geneStart = Math.min(geneStart, interval.getStart() - 1);
geneEnd = Math.max(geneEnd, interval.getEnd());
/* picard javadoc: - Sequence name - Start position (1-based) - End position (1-based, end inclusive) */
int[] interval_counts = new int[interval.getEnd() - interval.getStart() + 1];
if (interval_counts.length == 0)
continue;
Arrays.fill(interval_counts, 0);
if (IN.getFileHeader().getSequenceIndex(interval.getContig()) == -1) {
throw new IllegalArgumentException("NO DICT FOR \"" + interval.getContig() + "\"");
}
/**
* start - 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* end - 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
*/
SAMRecordIterator r = IN.query(new QueryInterval[] { new QueryInterval(header.getSequenceIndex(interval.getContig()), interval.getStart(), interval.getEnd()) }, false);
while (r.hasNext()) {
final SAMRecord rec = r.next();
if (rec.getReadUnmappedFlag())
continue;
if (filter.filterOut(rec))
continue;
if (!rec.getReferenceName().equals(interval.getContig()))
continue;
final SAMReadGroupRecord rg = rec.getReadGroup();
if (rg == null || !partition.equals(this.groupBy.apply(rg)))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
int refpos1 = rec.getAlignmentStart();
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
if (!op.consumesReferenceBases())
continue;
if (op.consumesReadBases()) {
for (int i = 0; i < ce.getLength(); ++i) {
if (refpos1 + i >= interval.getStart() && refpos1 + i <= interval.getEnd()) {
interval_counts[refpos1 + i - interval.getStart()]++;
}
}
}
refpos1 += ce.getLength();
}
}
/* end while r */
r.close();
for (int d : interval_counts) {
counts.add(d);
}
}
/* end interval */
Collections.sort(counts);
int count_no_coverage = 0;
double mean = 0;
for (int cov : counts) {
if (cov <= MIN_COVERAGE)
++count_no_coverage;
mean += cov;
}
mean /= counts.size();
pw.println(gene2interval.get(gene).get(0).getContig() + "\t" + geneStart + "\t" + geneEnd + "\t" + gene + "\t" + partition + "\t" + counts.size() + "\t" + counts.get(0) + "\t" + counts.get(counts.size() - 1) + "\t" + mean + "\t" + count_no_coverage + "\t" + (int) (((counts.size() - count_no_coverage) / (double) counts.size()) * 100.0));
}
// end gene
}
// end sample
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(IN);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class BioAlcidae method execute_bam.
private int execute_bam(String source) throws IOException {
SamReader in = null;
SAMRecordIterator iter = null;
try {
SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
if (source == null) {
in = srf.open(SamInputResource.of(stdin()));
} else {
in = srf.open(SamInputResource.of(source));
}
iter = in.iterator();
bindings.put("header", in.getFileHeader());
bindings.put("iter", iter);
bindings.put("format", "sam");
this.script.eval(bindings);
this.writer.flush();
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(iter);
bindings.remove("header");
bindings.remove("iter");
bindings.remove("format");
}
}
Aggregations