use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class BamLiftOver method doWork.
@Override
public int doWork(final List<String> args) {
final double minMatch = (this.userMinMatch <= 0.0 ? LiftOver.DEFAULT_LIFTOVER_MINMATCH : this.userMinMatch);
if (this.liftOverFile == null) {
LOG.error("LiftOver file is undefined.");
return -1;
}
if (this.faidx == null) {
LOG.error("New Sequence Dictionary file is undefined.");
return -1;
}
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
LOG.info("Reading " + liftOverFile);
LiftOver liftOver = new LiftOver(liftOverFile);
liftOver.setLiftOverMinMatch(minMatch);
final SAMSequenceDictionary newDict = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
sfr = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader headerIn = sfr.getFileHeader();
final SAMFileHeader headerOut = headerIn.clone();
headerOut.setSortOrder(SortOrder.unsorted);
sfw = this.writingBamArgs.openSAMFileWriter(outputFile, headerOut, true);
iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
final SAMRecord copy = (SAMRecord) rec.clone();
copy.setHeader(headerOut);
final StringBuilder sb = new StringBuilder();
if (!rec.getReadUnmappedFlag()) {
final String chrom = rec.getReferenceName();
int pos = rec.getAlignmentStart();
final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getReadNegativeStrandFlag(), null));
if (interval != null) {
sb.append(chrom + ":" + pos + ":" + (rec.getReadNegativeStrandFlag() ? "-" : "+"));
final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
if (ssr == null) {
sfr.close();
sfr = null;
LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
return -1;
}
copy.setReferenceName(ssr.getSequenceName());
copy.setReferenceIndex(ssr.getSequenceIndex());
copy.setAlignmentStart(interval.getStart());
copy.setReadNegativeStrandFlag(interval.isNegativeStrand());
if (rec.getReadNegativeStrandFlag() != copy.getReadNegativeStrandFlag()) {
copy.setReadString(AcidNucleics.reverseComplement(rec.getReadString()));
byte[] qual = rec.getBaseQualities();
byte[] quals2 = new byte[qual.length];
for (int i = 0; i < qual.length; ++i) {
quals2[i] = qual[(qual.length - 1) - i];
}
copy.setBaseQualities(quals2);
}
} else {
sb.append(".");
SAMUtils.makeReadUnmapped(copy);
}
}
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
sb.append("/");
String chrom = rec.getMateReferenceName();
int pos = rec.getMateAlignmentStart();
final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getMateNegativeStrandFlag(), null));
if (interval != null) {
sb.append(chrom + ":" + pos + ":" + (rec.getMateNegativeStrandFlag() ? "-" : "+"));
final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
if (ssr == null) {
sfr.close();
sfr = null;
LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
return -1;
}
copy.setMateReferenceName(ssr.getSequenceName());
copy.setMateReferenceIndex(ssr.getSequenceIndex());
copy.setMateAlignmentStart(interval.getStart());
copy.setMateNegativeStrandFlag(interval.isNegativeStrand());
if (!copy.getReadUnmappedFlag() && copy.getReferenceIndex() == copy.getMateReferenceIndex()) // && copy.getReadNegativeStrandFlag()!=copy.getMateNegativeStrandFlag()
{
// don't change ?
} else {
copy.setProperPairFlag(false);
copy.setInferredInsertSize(0);
}
} else {
sb.append(".");
SAMUtils.makeReadUnmapped(copy);
}
}
if (sb.length() > 0)
copy.setAttribute("LO", sb.toString());
sfw.addAlignment(copy);
}
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class SamSlop method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("Reference was not specified.");
return -1;
}
if (this.defaultQual.length() != 1) {
LOG.error("default quality should have length==1 " + this.defaultQual);
}
GenomicSequence genomicSequence = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
final char defaultQUAL = this.defaultQual.charAt(0);
try {
final String inputName = oneFileOrNull(args);
LOG.info("Loading reference");
indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
sfr = openSamReader(inputName);
final SAMFileHeader header = sfr.getFileHeader();
header.setSortOrder(SortOrder.unsorted);
sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Cigar cigar = rec.getCigar();
if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
sfw.addAlignment(rec);
continue;
}
final StringBuilder sbs = new StringBuilder(rec.getReadString());
final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
if (!this.removeClip) {
// replace clip S/H by match M
for (int i = 0; i < cl.size(); ++i) {
final CigarElement ce = cl.get(i);
if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
}
}
}
if (this.extend5 > 0 && refPos1 > 1) {
if (this.removeClip) {
// /remove hard + soft clip 5'
while (!cl.isEmpty()) {
// first
final CigarElement ce = cl.get(0);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.replace(0, ce.getLength(), "");
sbq.replace(0, ce.getLength(), "");
}
// remove first
cl.remove(0);
}
}
final StringBuilder prefix = new StringBuilder(this.extend5);
// /append + soft clip 5'
for (int i = 0; i < this.extend5; ++i) {
int x1 = (refPos1 - 1) - i;
// break if out of genome
if (x1 < 1)
break;
prefix.insert(0, genomicSequence.charAt(x1 - 1));
}
sbs.insert(0, prefix.toString());
// preprend quality
for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
// prepend cigar
cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
// update start pos
rec.setAlignmentStart(refPos1 - prefix.length());
}
if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
if (this.removeClip) {
// /remove hard + soft clip 3'
while (!cl.isEmpty()) {
// last
final CigarElement ce = cl.get(cl.size() - 1);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.setLength(sbs.length() - ce.getLength());
sbq.setLength(sbq.length() - ce.getLength());
}
// remove last
cl.remove(cl.size() - 1);
}
}
int extend = 0;
for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
sbs.append(genomicSequence.charAt(pos1 - 1));
sbq.append(defaultQUAL);
++extend;
}
// append cigar
cl.add(new CigarElement(extend, CigarOperator.M));
}
// simplify cigar
int idx = 0;
while (idx + 1 < cl.size()) {
final CigarElement ce1 = cl.get(idx);
final CigarElement ce2 = cl.get(idx + 1);
if (ce1.getOperator() == ce2.getOperator()) {
cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
cl.remove(idx + 1);
} else {
idx++;
}
}
rec.setCigar(new Cigar(cl));
rec.setReadString(sbs.toString());
rec.setBaseQualityString(sbq.toString());
final List<SAMValidationError> errors = rec.isValid();
if (errors != null && !errors.isEmpty()) {
for (SAMValidationError err : errors) {
LOG.error(err.getMessage());
}
}
// info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
sfw.addAlignment(rec);
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(indexedFastaSequenceFile);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class SamToJson method call.
private int call(final String inputName) throws Exception {
PrintWriter out = null;
SamReader sfr = null;
final SamJsonWriterFactory factory = SamJsonWriterFactory.newInstance().printHeader(this.print_header).printReadName(!this.disable_readName).printAttributes(!this.disable_atts).expandFlag(this.expflag).expandCigar(this.excigar);
SAMFileWriter swf = null;
try {
sfr = super.openSamReader(inputName);
out = super.openFileOrStdoutAsPrintWriter(this.output);
swf = factory.open(sfr.getFileHeader(), out);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext() && !out.checkError()) {
swf.addAlignment(iter.next());
}
iter.close();
out.flush();
swf.close();
swf = null;
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfr);
CloserUtil.close(swf);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class SortSamRefName method doWork.
@Override
public int doWork(List<String> args) {
SamReader in = null;
SAMFileWriter out = null;
SAMRecordIterator iter = null;
CloseableIterator<SAMRecord> iter2 = null;
SortingCollection<SAMRecord> sorter = null;
try {
in = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = in.getFileHeader();
final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(header);
final RefNameComparator refNameComparator = new RefNameComparator();
sorter = SortingCollection.newInstance(SAMRecord.class, bamRecordCodec, refNameComparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorter.setDestructiveIteration(true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
iter = in.iterator();
while (iter.hasNext()) {
sorter.add(progress.watch(iter.next()));
}
in.close();
in = null;
sorter.doneAdding();
final SAMFileHeader header2 = header.clone();
header2.addComment(getProgramName() + " " + getVersion() + " " + getProgramCommandLine());
header2.setSortOrder(SortOrder.unsorted);
out = this.writingBamArgs.openSAMFileWriter(outputFile, header2, true);
iter2 = sorter.iterator();
while (iter2.hasNext()) {
out.addAlignment(iter2.next());
}
out.close();
out = null;
sorter.cleanup();
progress.finish();
LOG.info("done");
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter2);
CloserUtil.close(iter);
CloserUtil.close(out);
CloserUtil.close(in);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class Sam2Tsv method scan.
private void scan(final SamReader r) {
final Row row = new Row();
SAMRecordIterator iter = null;
try {
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader());
iter = r.iterator();
while (iter.hasNext()) {
row.rec = progress.watch(iter.next());
printAln(row);
if (this.out.checkError())
break;
}
progress.finish();
} catch (final Exception err) {
LOG.error("scan error:", err);
throw new RuntimeException(String.valueOf(err.getMessage()), err);
} finally {
CloserUtil.close(iter);
}
}
Aggregations