use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class Bam2Xml method run.
private int run(SamReader samReader) {
OutputStream fout = null;
SAMRecordIterator iter = null;
XMLStreamWriter w = null;
try {
XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
if (this.outputFile != null) {
fout = IOUtils.openFileForWriting(this.outputFile);
w = xmlfactory.createXMLStreamWriter(fout, "UTF-8");
} else {
w = xmlfactory.createXMLStreamWriter(stdout(), "UTF-8");
}
w.writeStartDocument("UTF-8", "1.0");
final SAMFileHeader header = samReader.getFileHeader();
final SAMXMLWriter xw = new SAMXMLWriter(w, header);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
iter = samReader.iterator();
while (iter.hasNext()) {
xw.addAlignment(progress.watch(iter.next()));
}
xw.close();
w.writeEndDocument();
if (fout != null)
fout.flush();
} catch (Exception e) {
e.printStackTrace();
LOG.error(e);
return -1;
} finally {
CloserUtil.close(w);
CloserUtil.close(iter);
CloserUtil.close(fout);
}
return 0;
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class BamIndexReadNames method indexBamFile.
private void indexBamFile(File bamFile) throws IOException {
NameIndexDef indexDef = new NameIndexDef();
SortingCollection<NameAndPos> sorting = null;
LOG.info("Opening " + bamFile);
SamReader sfr = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
sorting = SortingCollection.newInstance(NameAndPos.class, new NameAndPosCodec(), new NameAndPosComparator(), maxRecordsInRAM, bamFile.getParentFile().toPath());
sorting.setDestructiveIteration(true);
if (sfr.getFileHeader().getSortOrder() != SortOrder.coordinate) {
throw new IOException("not SortOrder.coordinate " + sfr.getFileHeader().getSortOrder());
}
SAMRecordIterator iter = sfr.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary());
while (iter.hasNext()) {
SAMRecord rec = iter.next();
progress.watch(rec);
NameAndPos nap = new NameAndPos();
nap.name = rec.getReadName();
indexDef.maxNameLengt = Math.max(nap.name.length() + 1, indexDef.maxNameLengt);
nap.tid = rec.getReferenceIndex();
nap.pos = rec.getAlignmentStart();
indexDef.countReads++;
sorting.add(nap);
}
progress.finish();
iter.close();
sfr.close();
sorting.doneAdding();
LOG.info("Done Adding. N=" + indexDef.countReads);
File indexFile = new File(bamFile.getParentFile(), bamFile.getName() + NAME_IDX_EXTENSION);
LOG.info("Writing index " + indexFile);
FileOutputStream raf = new FileOutputStream(indexFile);
ByteBuffer byteBuff = ByteBuffer.allocate(8 + 4);
byteBuff.putLong(indexDef.countReads);
byteBuff.putInt(indexDef.maxNameLengt);
raf.write(byteBuff.array());
byteBuff = ByteBuffer.allocate(indexDef.maxNameLengt + 4 + 4);
CloseableIterator<NameAndPos> iter2 = sorting.iterator();
while (iter2.hasNext()) {
byteBuff.rewind();
NameAndPos nap = iter2.next();
for (int i = 0; i < nap.name.length(); ++i) {
byteBuff.put((byte) nap.name.charAt(i));
}
for (int i = nap.name.length(); i < indexDef.maxNameLengt; ++i) {
byteBuff.put((byte) '\0');
}
byteBuff.putInt(nap.tid);
byteBuff.putInt(nap.pos);
raf.write(byteBuff.array());
}
raf.flush();
raf.close();
sorting.cleanup();
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class BamStats01 method run.
private void run(final String filename, SamReader samFileReader) throws IOException {
Map<String, Histogram> sample2hist = new HashMap<String, BamStats01.Histogram>();
SAMSequenceDictionary currDict = samFileReader.getFileHeader().getSequenceDictionary();
if (samSequenceDictionary == null) {
samSequenceDictionary = currDict;
}
if (this.bedFile != null) {
if (!SequenceUtil.areSequenceDictionariesEqual(currDict, samSequenceDictionary)) {
samFileReader.close();
throw new IOException("incompatible sequence dictionaries." + filename);
}
if (intervalTreeMap == null) {
final BedLineCodec bedCodec = new BedLineCodec();
intervalTreeMap = new IntervalTreeMap<>();
LOG.info("opening " + this.bedFile);
String line;
final BufferedReader bedIn = IOUtils.openFileForBufferedReading(bedFile);
while ((line = bedIn.readLine()) != null) {
final BedLine bedLine = bedCodec.decode(line);
if (bedLine == null)
continue;
int seqIndex = currDict.getSequenceIndex(bedLine.getContig());
if (seqIndex == -1) {
throw new IOException("unknown chromosome from dict in in " + line + " " + this.bedFile);
}
intervalTreeMap.put(bedLine.toInterval(), Boolean.TRUE);
}
bedIn.close();
LOG.info("done reading " + this.bedFile);
}
}
this.chrX_index = -1;
this.chrY_index = -1;
for (final SAMSequenceRecord rec : currDict.getSequences()) {
final String chromName = rec.getSequenceName().toLowerCase();
if (chromName.equals("x") || chromName.equals("chrx")) {
this.chrX_index = rec.getSequenceIndex();
} else if (chromName.equals("y") || chromName.equals("chry")) {
this.chrY_index = rec.getSequenceIndex();
}
}
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(currDict);
final SAMRecordIterator iter = samFileReader.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progess.watch(iter.next());
String sampleName = groupBy.getPartion(rec);
if (sampleName == null || sampleName.isEmpty())
sampleName = "undefined";
Histogram hist = sample2hist.get(sampleName);
if (hist == null) {
hist = new Histogram();
sample2hist.put(sampleName, hist);
}
hist.histograms[Category2.ALL.ordinal()].watch(rec);
if (intervalTreeMap == null)
continue;
if (rec.getReadUnmappedFlag()) {
continue;
}
if (!intervalTreeMap.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
hist.histograms[Category2.OFF_TARGET.ordinal()].watch(rec);
} else {
hist.histograms[Category2.IN_TARGET.ordinal()].watch(rec);
}
}
progess.finish();
samFileReader.close();
samFileReader = null;
for (final String sampleName : sample2hist.keySet()) {
final Histogram hist = sample2hist.get(sampleName);
out.print(filename + "\t" + sampleName);
for (final Category2 cat2 : Category2.values()) {
for (// je je suis libertineuuh, je suis une cat1
final Category cat1 : // je je suis libertineuuh, je suis une cat1
Category.values()) {
out.print("\t");
out.print(hist.histograms[cat2.ordinal()].counts[cat1.ordinal()]);
}
if (intervalTreeMap == null)
break;
}
out.println();
}
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class BamStats02 method run.
private void run(String filename, SamReader r, PrintWriter out) {
final Counter<Category> counter = new Counter<>();
SAMRecordIterator iter = null;
try {
iter = r.iterator();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader().getSequenceDictionary());
while (iter.hasNext()) {
final SAMRecord record = progress.watch(iter.next());
final Category cat = new Category();
cat.set(STRING_PROPS.filename, filename);
cat.flag = record.getFlags();
cat.set(INT_PROPS.inTarget, -1);
final SAMReadGroupRecord g = record.getReadGroup();
if (g != null) {
cat.set(STRING_PROPS.samplename, g.getSample());
cat.set(STRING_PROPS.platform, g.getPlatform());
cat.set(STRING_PROPS.platformUnit, g.getPlatformUnit());
cat.set(STRING_PROPS.library, g.getLibrary());
}
final ShortReadName readName = ShortReadName.parse(record);
if (readName.isValid()) {
cat.set(STRING_PROPS.instrument, readName.getInstrumentName());
cat.set(STRING_PROPS.flowcell, readName.getFlowCellId());
cat.set(INT_PROPS.lane, readName.getFlowCellLane());
cat.set(INT_PROPS.run, readName.getRunId());
}
if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
cat.set(STRING_PROPS.mate_chromosome, record.getMateReferenceName());
}
if (!record.getReadUnmappedFlag()) {
cat.set(INT_PROPS.mapq, (int) (Math.ceil(record.getMappingQuality() / 10.0) * 10));
cat.set(STRING_PROPS.chromosome, record.getReferenceName());
if (this.intervals != null) {
if (this.intervals.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
cat.set(INT_PROPS.inTarget, 1);
} else {
cat.set(INT_PROPS.inTarget, 0);
}
}
}
counter.incr(cat);
}
progress.finish();
for (final Category cat : counter.keySetDecreasing()) {
cat.print(out);
out.print("\t");
out.print(counter.count(cat));
out.println();
}
out.flush();
} finally {
CloserUtil.close(iter);
}
}
use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.
the class Biostar154220 method doWork.
@SuppressWarnings("resource")
private int doWork(final SamReader in) throws IOException {
SAMFileHeader header = in.getFileHeader();
if (header.getSortOrder() != SAMFileHeader.SortOrder.unsorted) {
LOG.error("input should be unsorted, reads sorted on REF/query-name e.g: see https://github.com/lindenb/jvarkit/wiki/SortSamRefName");
return -1;
}
SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("no dict !");
return -1;
}
SAMFileWriter out = null;
SAMRecordIterator iter = null;
int prev_tid = -1;
int[] depth_array = null;
try {
SAMFileHeader header2 = header.clone();
header2.addComment("Biostar154220" + " " + getVersion() + " " + getProgramCommandLine());
out = this.writingBams.openSAMFileWriter(outputFile, header2, true);
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
iter = in.iterator();
List<SAMRecord> buffer = new ArrayList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
}
if (rec != null && rec.getReadUnmappedFlag()) {
out.addAlignment(rec);
continue;
}
// no more record or
if (!buffer.isEmpty() && rec != null && buffer.get(0).getReadName().equals(rec.getReadName()) && buffer.get(0).getReferenceIndex().equals(rec.getReferenceIndex())) {
buffer.add(progress.watch(rec));
} else if (buffer.isEmpty() && rec != null) {
buffer.add(progress.watch(rec));
} else // dump buffer
{
if (!buffer.isEmpty()) {
final int tid = buffer.get(0).getReferenceIndex();
if (prev_tid == -1 || prev_tid != tid) {
SAMSequenceRecord ssr = dict.getSequence(tid);
prev_tid = tid;
depth_array = null;
System.gc();
LOG.info("Alloc memory for contig " + ssr.getSequenceName() + " N=" + ssr.getSequenceLength() + "*sizeof(int)");
// use a +1 pos
depth_array = new int[ssr.getSequenceLength() + 1];
Arrays.fill(depth_array, 0);
}
// position->coverage for this set of reads
Counter<Integer> readposition2coverage = new Counter<Integer>();
boolean dump_this_buffer = true;
for (final SAMRecord sr : buffer) {
if (!dump_this_buffer)
break;
if (this.filter.filterOut(sr))
continue;
final Cigar cigar = sr.getCigar();
if (cigar == null) {
throw new IOException("Cigar missing in " + rec.getSAMString());
}
int refPos1 = sr.getAlignmentStart();
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
if (!op.consumesReferenceBases())
continue;
if (op.consumesReadBases()) {
for (int x = 0; x < ce.getLength() && refPos1 + x < depth_array.length; ++x) {
int cov = (int) readposition2coverage.incr(refPos1 + x);
if (depth_array[refPos1 + x] + cov > this.capDepth) {
dump_this_buffer = false;
break;
}
}
}
if (!dump_this_buffer)
break;
refPos1 += ce.getLength();
}
}
if (dump_this_buffer) {
// consumme this coverage
for (Integer pos : readposition2coverage.keySet()) {
depth_array[pos] += (int) readposition2coverage.count(pos);
}
for (SAMRecord sr : buffer) {
out.addAlignment(sr);
}
}
buffer.clear();
}
if (rec == null)
break;
buffer.add(rec);
}
}
depth_array = null;
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(out);
}
}
Aggregations