use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamAddPI method doWork.
@Override
public int doWork(final List<String> args) {
final Map<String, List<Integer>> rg2insertsize = new HashMap<>();
SamReader sfr = null;
SamReader sfrTmp = null;
SAMFileWriter sfw = null;
File tmpBam = null;
SAMFileWriter tmpBamWriter = null;
SAMFileWriter outWriter = null;
CloseableIterator<SAMRecord> iter = null;
CloseableIterator<SAMRecord> iterTmp = null;
try {
sfr = openSamReader(oneFileOrNull(args));
SAMFileHeader header = sfr.getFileHeader();
for (final SAMReadGroupRecord rg : header.getReadGroups()) {
if (!overwrite_existing && rg.getPredictedMedianInsertSize() != null) {
continue;
}
rg2insertsize.put(rg.getId(), new ArrayList<>(num_read_to_test < 1L ? 10000 : num_read_to_test));
}
tmpBam = File.createTempFile("__addpi", ".bam");
tmpBamWriter = this.writingBamArgs.openSAMFileWriter(tmpBam, header, true);
iter = sfr.iterator();
int n_processed = 0;
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (iter.hasNext() && (this.num_read_to_test < 0 || n_processed < this.num_read_to_test)) {
final SAMRecord rec = progress.watch(iter.next());
tmpBamWriter.addAlignment(rec);
final SAMReadGroupRecord rg = rec.getReadGroup();
final List<Integer> insertlist = rg2insertsize.get(rg.getId());
if (insertlist == null)
continue;
if (rec.getReadUnmappedFlag())
continue;
if (!rec.getReadPairedFlag())
continue;
if (!rec.getFirstOfPairFlag())
continue;
if (rec.getMateUnmappedFlag())
continue;
if (this.samRecordFilter.filterOut(rec))
continue;
final int len = rec.getInferredInsertSize();
if (len == 0)
continue;
insertlist.add(Math.abs(len));
++n_processed;
}
tmpBamWriter.close();
tmpBamWriter = null;
// reopen tmp file
sfrTmp = super.createSamReaderFactory().open(tmpBam);
iterTmp = sfrTmp.iterator();
// update dMedianInsertSize
for (final SAMReadGroupRecord rg : header.getReadGroups()) {
final List<Integer> insertlist = rg2insertsize.get(rg.getId());
if (insertlist == null || insertlist.isEmpty())
continue;
rg.setPredictedMedianInsertSize((int) Percentile.median().evaluate(insertlist.stream().mapToDouble(I -> I.doubleValue())));
}
header.addComment("Processed with " + getClass().getSimpleName() + " " + getProgramCommandLine());
outWriter = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
while (iterTmp.hasNext()) {
outWriter.addAlignment(iterTmp.next());
}
iterTmp.close();
iterTmp = null;
sfrTmp.close();
sfrTmp = null;
tmpBam.delete();
// finish writing original input
while (iter.hasNext()) {
outWriter.addAlignment(progress.watch(iter.next()));
}
progress.finish();
iter.close();
iter = null;
sfr.close();
sfr = null;
outWriter.close();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(tmpBamWriter);
if (tmpBam != null)
tmpBam.delete();
CloserUtil.close(outWriter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class ConcatSam method doWork.
@Override
public int doWork(final List<String> args) {
SAMFileWriter out = null;
ConcatSamIterator iter = null;
try {
final Factory factory = new Factory().setConcatenate(!this.merging);
if (!StringUtil.isBlank(this.region_str)) {
factory.addInterval(region_str);
}
iter = factory.open(args);
out = this.writingBamArgs.openSAMFileWriter(outputFile, iter.getFileHeader(), true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(iter.getFileHeader()).logger(LOG);
while (iter.hasNext()) {
out.addAlignment(progress.watch(iter.next()));
}
iter.close();
iter = null;
out.close();
out = null;
progress.finish();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
CloserUtil.close(iter);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class BamLiftOver method doWork.
@Override
public int doWork(final List<String> args) {
final double minMatch = (this.userMinMatch <= 0.0 ? LiftOver.DEFAULT_LIFTOVER_MINMATCH : this.userMinMatch);
if (this.liftOverFile == null) {
LOG.error("LiftOver file is undefined.");
return -1;
}
if (this.faidx == null) {
LOG.error("New Sequence Dictionary file is undefined.");
return -1;
}
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
LOG.info("Reading " + liftOverFile);
LiftOver liftOver = new LiftOver(liftOverFile);
liftOver.setLiftOverMinMatch(minMatch);
final SAMSequenceDictionary newDict = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
sfr = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader headerIn = sfr.getFileHeader();
final SAMFileHeader headerOut = headerIn.clone();
headerOut.setSortOrder(SortOrder.unsorted);
sfw = this.writingBamArgs.openSAMFileWriter(outputFile, headerOut, true);
iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
final SAMRecord copy = (SAMRecord) rec.clone();
copy.setHeader(headerOut);
final StringBuilder sb = new StringBuilder();
if (!rec.getReadUnmappedFlag()) {
final String chrom = rec.getReferenceName();
int pos = rec.getAlignmentStart();
final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getReadNegativeStrandFlag(), null));
if (interval != null) {
sb.append(chrom + ":" + pos + ":" + (rec.getReadNegativeStrandFlag() ? "-" : "+"));
final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
if (ssr == null) {
sfr.close();
sfr = null;
LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
return -1;
}
copy.setReferenceName(ssr.getSequenceName());
copy.setReferenceIndex(ssr.getSequenceIndex());
copy.setAlignmentStart(interval.getStart());
copy.setReadNegativeStrandFlag(interval.isNegativeStrand());
if (rec.getReadNegativeStrandFlag() != copy.getReadNegativeStrandFlag()) {
copy.setReadString(AcidNucleics.reverseComplement(rec.getReadString()));
byte[] qual = rec.getBaseQualities();
byte[] quals2 = new byte[qual.length];
for (int i = 0; i < qual.length; ++i) {
quals2[i] = qual[(qual.length - 1) - i];
}
copy.setBaseQualities(quals2);
}
} else {
sb.append(".");
SAMUtils.makeReadUnmapped(copy);
}
}
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
sb.append("/");
String chrom = rec.getMateReferenceName();
int pos = rec.getMateAlignmentStart();
final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getMateNegativeStrandFlag(), null));
if (interval != null) {
sb.append(chrom + ":" + pos + ":" + (rec.getMateNegativeStrandFlag() ? "-" : "+"));
final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
if (ssr == null) {
sfr.close();
sfr = null;
LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
return -1;
}
copy.setMateReferenceName(ssr.getSequenceName());
copy.setMateReferenceIndex(ssr.getSequenceIndex());
copy.setMateAlignmentStart(interval.getStart());
copy.setMateNegativeStrandFlag(interval.isNegativeStrand());
if (!copy.getReadUnmappedFlag() && copy.getReferenceIndex() == copy.getMateReferenceIndex()) // && copy.getReadNegativeStrandFlag()!=copy.getMateNegativeStrandFlag()
{
// don't change ?
} else {
copy.setProperPairFlag(false);
copy.setInferredInsertSize(0);
}
} else {
sb.append(".");
SAMUtils.makeReadUnmapped(copy);
}
}
if (sb.length() > 0)
copy.setAttribute("LO", sb.toString());
sfw.addAlignment(copy);
}
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamSlop method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("Reference was not specified.");
return -1;
}
if (this.defaultQual.length() != 1) {
LOG.error("default quality should have length==1 " + this.defaultQual);
}
GenomicSequence genomicSequence = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
final char defaultQUAL = this.defaultQual.charAt(0);
try {
final String inputName = oneFileOrNull(args);
LOG.info("Loading reference");
indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
sfr = openSamReader(inputName);
final SAMFileHeader header = sfr.getFileHeader();
header.setSortOrder(SortOrder.unsorted);
sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Cigar cigar = rec.getCigar();
if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
sfw.addAlignment(rec);
continue;
}
final StringBuilder sbs = new StringBuilder(rec.getReadString());
final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
if (!this.removeClip) {
// replace clip S/H by match M
for (int i = 0; i < cl.size(); ++i) {
final CigarElement ce = cl.get(i);
if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
}
}
}
if (this.extend5 > 0 && refPos1 > 1) {
if (this.removeClip) {
// /remove hard + soft clip 5'
while (!cl.isEmpty()) {
// first
final CigarElement ce = cl.get(0);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.replace(0, ce.getLength(), "");
sbq.replace(0, ce.getLength(), "");
}
// remove first
cl.remove(0);
}
}
final StringBuilder prefix = new StringBuilder(this.extend5);
// /append + soft clip 5'
for (int i = 0; i < this.extend5; ++i) {
int x1 = (refPos1 - 1) - i;
// break if out of genome
if (x1 < 1)
break;
prefix.insert(0, genomicSequence.charAt(x1 - 1));
}
sbs.insert(0, prefix.toString());
// preprend quality
for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
// prepend cigar
cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
// update start pos
rec.setAlignmentStart(refPos1 - prefix.length());
}
if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
if (this.removeClip) {
// /remove hard + soft clip 3'
while (!cl.isEmpty()) {
// last
final CigarElement ce = cl.get(cl.size() - 1);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.setLength(sbs.length() - ce.getLength());
sbq.setLength(sbq.length() - ce.getLength());
}
// remove last
cl.remove(cl.size() - 1);
}
}
int extend = 0;
for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
sbs.append(genomicSequence.charAt(pos1 - 1));
sbq.append(defaultQUAL);
++extend;
}
// append cigar
cl.add(new CigarElement(extend, CigarOperator.M));
}
// simplify cigar
int idx = 0;
while (idx + 1 < cl.size()) {
final CigarElement ce1 = cl.get(idx);
final CigarElement ce2 = cl.get(idx + 1);
if (ce1.getOperator() == ce2.getOperator()) {
cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
cl.remove(idx + 1);
} else {
idx++;
}
}
rec.setCigar(new Cigar(cl));
rec.setReadString(sbs.toString());
rec.setBaseQualityString(sbq.toString());
final List<SAMValidationError> errors = rec.isValid();
if (errors != null && !errors.isEmpty()) {
for (SAMValidationError err : errors) {
LOG.error(err.getMessage());
}
}
// info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
sfw.addAlignment(rec);
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(indexedFastaSequenceFile);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamToJson method call.
private int call(final String inputName) throws Exception {
PrintWriter out = null;
SamReader sfr = null;
final SamJsonWriterFactory factory = SamJsonWriterFactory.newInstance().printHeader(this.print_header).printReadName(!this.disable_readName).printAttributes(!this.disable_atts).expandFlag(this.expflag).expandCigar(this.excigar);
SAMFileWriter swf = null;
try {
sfr = super.openSamReader(inputName);
out = super.openFileOrStdoutAsPrintWriter(this.output);
swf = factory.open(sfr.getFileHeader(), out);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext() && !out.checkError()) {
swf.addAlignment(iter.next());
}
iter.close();
out.flush();
swf.close();
swf = null;
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfr);
CloserUtil.close(swf);
}
}
Aggregations