use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class SamMaskAlignedBases method doWork.
@Override
public int doWork(List<String> args) {
final byte RESET_CHAR = (byte) 'N';
final byte RESET_QUAL = (byte) SAMUtils.fastqToPhred('#');
long nRecords = 0L;
long nRecordMasked = 0L;
long nBasesMasked = 0L;
long nBases = 0L;
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
LOG.error("File header missing");
return -1;
}
final SAMFileHeader header2 = header1.clone();
header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
header2.setSortOrder(SortOrder.unsorted);
sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
++nRecords;
nBases += rec.getReadLength();
if (rec.getReadUnmappedFlag()) {
SAMUtils.makeReadUnmapped(rec);
sfw.addAlignment(rec);
continue;
}
if (rec.isSecondaryOrSupplementary()) {
continue;
}
final Cigar cigar = rec.getCigar();
byte[] bases = rec.getReadBases();
byte[] quals = rec.getBaseQualities();
if (cigar == null || cigar.isEmpty()) {
SAMUtils.makeReadUnmapped(rec);
sfw.addAlignment(rec);
continue;
}
int readpos = 0;
for (final CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
if (op.consumesReadBases()) {
if (op.consumesReferenceBases()) {
for (int x = 0; x < ce.getLength(); ++x) {
if (bases != null)
bases[readpos + x] = RESET_CHAR;
if (quals != null)
quals[readpos + x] = RESET_QUAL;
++nBasesMasked;
}
}
readpos += ce.getLength();
}
}
++nRecordMasked;
SAMUtils.makeReadUnmapped(rec);
sfw.addAlignment(rec);
}
iter.close();
sfw.close();
progress.finish();
LOG.info("done : reads masked " + nRecordMasked + "/" + nRecords + " Bases masked:" + nBasesMasked + "/" + nBases);
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class SamRetrieveSeqAndQual method doWork.
@Override
public int doWork(List<String> args) {
FastqReader[] fastqReaders = null;
SamReader samReader = null;
SAMFileWriter samWriter = null;
SAMRecordIterator iter = null;
try {
if (fastqFin == null) {
LOG.error("undefined fastq file");
return -1;
} else {
LOG.info("opening " + fastqFin);
FastqReader r1 = new FastqReader(fastqFin);
if (fastqRin == null) {
fastqReaders = new FastqReader[] { r1 };
} else {
LOG.info("opening " + fastqRin);
FastqReader r2 = new FastqReader(fastqRin);
fastqReaders = new FastqReader[] { r1, r2 };
}
}
samReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
if (sortOrder == null) {
LOG.warning("undefined sort order read are in the sam order");
} else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
LOG.error("Bad Sort Order. Sort this input on read name");
return -1;
}
SAMFileHeader header = samReader.getFileHeader().clone();
SAMProgramRecord prg = header.createProgramRecord();
prg.setCommandLine(this.getProgramCommandLine());
prg.setProgramName(this.getProgramName());
prg.setProgramVersion(this.getVersion());
samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
iter = samReader.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
FastqRecord[] currFastq = new FastqRecord[] { null, null };
while (iter.hasNext()) {
SAMRecord rec = progress.watch(iter.next());
String readName = rec.getReadName();
int fastq_index = 0;
if (rec.getReadPairedFlag()) {
if (fastqReaders.length != 2) {
LOG.error("Not paired but number of fastq!=2");
return -1;
}
fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
} else {
if (fastqReaders.length != 1) {
LOG.error("Not paired but number of fastq!=1");
return -1;
}
fastq_index = 0;
}
if (sortOrder == SAMFileHeader.SortOrder.queryname) {
while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
LOG.error("Read Missing for " + readName);
return -1;
}
}
} else {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
}
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
return -1;
}
String fastqBases = currFastq[fastq_index].getReadString();
String fastqQuals = currFastq[fastq_index].getBaseQualityString();
/* handle orientation */
if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
fastqBases = AcidNucleics.reverseComplement(fastqBases);
StringBuilder sb = new StringBuilder(fastqQuals.length());
for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
fastqQuals = sb.toString();
}
/* remove hard clip */
Cigar cigar = rec.getCigar();
if (cigar != null) {
List<CigarElement> ceList = cigar.getCigarElements();
if (!ceList.isEmpty()) {
CigarElement ce = ceList.get(ceList.size() - 1);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
}
ce = ceList.get(0);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(ce.getLength());
fastqQuals = fastqQuals.substring(ce.getLength());
}
}
}
rec.setBaseQualityString(fastqQuals);
rec.setReadString(fastqBases);
samWriter.addAlignment(rec);
}
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
if (fastqReaders != null)
for (FastqReader r : fastqReaders) CloserUtil.close(r);
CloserUtil.close(iter);
CloserUtil.close(samReader);
CloserUtil.close(samWriter);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class SamToPsl method scan.
private void scan(final SamReader in) {
final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
if (dict == null)
throw new RuntimeException("Sequence dictionary missing...");
final SAMRecordIterator iter = in.iterator();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
while (iter.hasNext() && !this.out.checkError()) {
final SAMRecord rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
for (final PslAlign a : makePslAlign(rec, dict)) {
out.println(toString(a, rec));
}
}
progress.finish();
iter.close();
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class FindNewSpliceSites method scan.
private void scan(SamReader in) {
SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary();
if (dict == null)
throw new RuntimeException("Sequence dictionary missing");
SAMRecordIterator iter = in.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (rec.isSecondaryOrSupplementary())
continue;
progress.watch(rec);
if (isWeird(rec, dict)) {
this.weird.addAlignment(rec);
continue;
}
for (CigarElement ce : rec.getCigar().getCigarElements()) {
if (ce.getOperator().equals(CigarOperator.N)) {
scanRead(rec, dict);
break;
}
}
}
iter.close();
progress.finish();
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class SAM4WebLogo method doWork.
@Override
public int doWork(final List<String> args) {
if (StringUtil.isBlank(this.regionStr)) {
LOG.error("Undefined interval.");
return -1;
}
final IntervalParser intervalParser = new IntervalParser().setFixContigName(true);
PrintWriter out = null;
SamReader samReader = null;
SAMRecordIterator iter = null;
try {
out = super.openFileOrStdoutAsPrintWriter(outputFile);
for (final String inputName : IOUtils.unrollFiles(args)) {
samReader = openSamReader(inputName);
final Interval interval = intervalParser.setDictionary(samReader.getFileHeader().getSequenceDictionary()).parse(this.regionStr);
if (interval == null) {
LOG.error("Bad interval " + this.regionStr);
return -1;
}
iter = samReader.query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (this.SamRecordFilter.filterOut(rec))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
if (!rec.getReferenceName().equals(interval.getContig()))
continue;
if (this.readEnd.apply(rec) < interval.getStart())
continue;
if (this.readStart.apply(rec) > interval.getEnd())
continue;
printRead(out, rec, interval);
}
iter.close();
iter = null;
samReader.close();
samReader = null;
}
out.flush();
out.close();
out = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(samReader);
CloserUtil.close(out);
}
}
Aggregations