use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamRetrieveSeqAndQual method doWork.
@Override
public int doWork(List<String> args) {
FastqReader[] fastqReaders = null;
SamReader samReader = null;
SAMFileWriter samWriter = null;
SAMRecordIterator iter = null;
try {
if (fastqFin == null) {
LOG.error("undefined fastq file");
return -1;
} else {
LOG.info("opening " + fastqFin);
FastqReader r1 = new FastqReader(fastqFin);
if (fastqRin == null) {
fastqReaders = new FastqReader[] { r1 };
} else {
LOG.info("opening " + fastqRin);
FastqReader r2 = new FastqReader(fastqRin);
fastqReaders = new FastqReader[] { r1, r2 };
}
}
samReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
if (sortOrder == null) {
LOG.warning("undefined sort order read are in the sam order");
} else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
LOG.error("Bad Sort Order. Sort this input on read name");
return -1;
}
SAMFileHeader header = samReader.getFileHeader().clone();
SAMProgramRecord prg = header.createProgramRecord();
prg.setCommandLine(this.getProgramCommandLine());
prg.setProgramName(this.getProgramName());
prg.setProgramVersion(this.getVersion());
samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
iter = samReader.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
FastqRecord[] currFastq = new FastqRecord[] { null, null };
while (iter.hasNext()) {
SAMRecord rec = progress.watch(iter.next());
String readName = rec.getReadName();
int fastq_index = 0;
if (rec.getReadPairedFlag()) {
if (fastqReaders.length != 2) {
LOG.error("Not paired but number of fastq!=2");
return -1;
}
fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
} else {
if (fastqReaders.length != 1) {
LOG.error("Not paired but number of fastq!=1");
return -1;
}
fastq_index = 0;
}
if (sortOrder == SAMFileHeader.SortOrder.queryname) {
while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
LOG.error("Read Missing for " + readName);
return -1;
}
}
} else {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
}
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
return -1;
}
String fastqBases = currFastq[fastq_index].getReadString();
String fastqQuals = currFastq[fastq_index].getBaseQualityString();
/* handle orientation */
if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
fastqBases = AcidNucleics.reverseComplement(fastqBases);
StringBuilder sb = new StringBuilder(fastqQuals.length());
for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
fastqQuals = sb.toString();
}
/* remove hard clip */
Cigar cigar = rec.getCigar();
if (cigar != null) {
List<CigarElement> ceList = cigar.getCigarElements();
if (!ceList.isEmpty()) {
CigarElement ce = ceList.get(ceList.size() - 1);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
}
ce = ceList.get(0);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(ce.getLength());
fastqQuals = fastqQuals.substring(ce.getLength());
}
}
}
rec.setBaseQualityString(fastqQuals);
rec.setReadString(fastqBases);
samWriter.addAlignment(rec);
}
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
if (fastqReaders != null)
for (FastqReader r : fastqReaders) CloserUtil.close(r);
CloserUtil.close(iter);
CloserUtil.close(samReader);
CloserUtil.close(samWriter);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamFixCigar method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("Reference was not specified.");
return -1;
}
GenomicSequence genomicSequence = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = sfr.getFileHeader();
sfw = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final List<CigarElement> newCigar = new ArrayList<CigarElement>();
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
Cigar cigar = rec.getCigar();
byte[] bases = rec.getReadBases();
if (rec.getReadUnmappedFlag() || cigar == null || cigar.getCigarElements().isEmpty() || bases == null) {
sfw.addAlignment(rec);
continue;
}
if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
newCigar.clear();
int refPos1 = rec.getAlignmentStart();
int readPos0 = 0;
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
if (op.equals(CigarOperator.M)) {
for (int i = 0; i < ce.getLength(); ++i) {
char c1 = Character.toUpperCase((char) bases[readPos0]);
char c2 = Character.toUpperCase(refPos1 - 1 < genomicSequence.length() ? genomicSequence.charAt(refPos1 - 1) : '*');
if (c2 == 'N' || c1 == c2) {
newCigar.add(new CigarElement(1, CigarOperator.EQ));
} else {
newCigar.add(new CigarElement(1, CigarOperator.X));
}
refPos1++;
readPos0++;
}
} else {
newCigar.add(ce);
if (op.consumesReadBases())
readPos0 += ce.getLength();
if (op.consumesReferenceBases())
refPos1 += ce.getLength();
}
}
int i = 0;
while (i < newCigar.size()) {
final CigarOperator op1 = newCigar.get(i).getOperator();
final int length1 = newCigar.get(i).getLength();
if (i + 1 < newCigar.size() && newCigar.get(i + 1).getOperator() == op1) {
final CigarOperator op2 = newCigar.get(i + 1).getOperator();
int length2 = newCigar.get(i + 1).getLength();
newCigar.set(i, new CigarElement(length1 + length2, op2));
newCigar.remove(i + 1);
} else {
++i;
}
}
cigar = new Cigar(newCigar);
// info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
rec.setCigar(cigar);
sfw.addAlignment(rec);
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class BWAMemNOp method doWork.
@Override
public int doWork(List<String> args) {
SamReader r = null;
SAMFileWriter w = null;
try {
r = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = r.getFileHeader();
OtherCanonicalAlignFactory ocaf = new OtherCanonicalAlignFactory(header);
w = writingBamArgs.openSAMFileWriter(outputFile, header, true);
SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
SAMRecordIterator iter = r.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (rec.getSupplementaryAlignmentFlag()) {
continue;
}
if (rec.getReadUnmappedFlag()) {
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
Cigar cigar1 = rec.getCigar();
if (cigar1 == null || cigar1.isEmpty() || !(cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) || cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S))) // last or first is soft clipping
{
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
rec.getAlignmentStart();
List<OtherCanonicalAlign> xps = ocaf.getXPAligns(rec);
if (xps.isEmpty()) {
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
boolean found_one = false;
for (OtherCanonicalAlign xp : xps) {
if (!rec.getReferenceName().equals(xp.getReferenceName()))
continue;
if (xp.getReadNegativeStrandFlag() != rec.getReadNegativeStrandFlag())
continue;
Cigar cigar2 = xp.getCigar();
if (cigar2 == null || cigar2.isEmpty()) {
continue;
}
SAMRecord newrec = null;
List<CigarEvt> L1 = null;
List<CigarEvt> L2 = null;
if (cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(cigar1.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar2.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(0).getLength() >= this.min_soft_clip_length && rec.getAlignmentEnd() < xp.getAlignmentStart()) {
newrec = samRecordFactory.createSAMRecord(header);
int ref1 = rec.getAlignmentStart();
newrec.setAlignmentStart(ref1);
L1 = cigarEvents(0, ref1, cigar1);
L2 = cigarEvents(0, xp.getAlignmentStart(), cigar2);
} else if (cigar2.getCigarElement(cigar2.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(cigar2.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(0).getLength() >= this.min_soft_clip_length && xp.getAlignmentEnd() < rec.getAlignmentStart()) {
newrec = samRecordFactory.createSAMRecord(header);
int ref1 = xp.getAlignmentStart();
newrec.setAlignmentStart(ref1);
L1 = cigarEvents(0, ref1, cigar2);
L2 = cigarEvents(0, rec.getAlignmentStart(), cigar1);
}
if (newrec == null)
continue;
newrec.setFlags(rec.getFlags());
newrec.setReadName(rec.getReadName());
newrec.setReadBases(rec.getReadBases());
newrec.setMappingQuality(rec.getMappingQuality());
newrec.setReferenceIndex(rec.getReferenceIndex());
newrec.setBaseQualities(rec.getBaseQualities());
if (found_one) {
newrec.setNotPrimaryAlignmentFlag(true);
}
found_one = true;
for (SAMTagAndValue tav : rec.getAttributes()) {
if (tav.tag.equals(ocaf.getAttributeKey()))
continue;
if (tav.tag.equals("NM"))
continue;
newrec.setAttribute(tav.tag, tav.value);
}
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
newrec.setMateAlignmentStart(rec.getMateAlignmentStart());
newrec.setMateReferenceIndex(rec.getMateReferenceIndex());
newrec.setInferredInsertSize(rec.getInferredInsertSize());
}
while (!L1.isEmpty() && (L1.get(L1.size() - 1).op.equals(CigarOperator.S) || L1.get(L1.size() - 1).op.equals(CigarOperator.D) || L1.get(L1.size() - 1).op.equals(CigarOperator.H))) {
L1.remove(L1.size() - 1);
}
while (!L2.isEmpty() && L2.get(0).read0 <= L1.get(L1.size() - 1).read0) {
L2.remove(0);
}
List<CigarElement> cigarElements = new ArrayList<CigarElement>();
int i = 0;
while (i < L1.size()) {
int j = i + 1;
while (j < L1.size() && L1.get(i).op.equals(L1.get(j).op)) {
j++;
}
cigarElements.add(new CigarElement(j - i, L1.get(i).op));
i = j;
}
// add 'N'
cigarElements.add(new CigarElement((L2.get(0).ref1 - L1.get(L1.size() - 1).ref1) - 1, CigarOperator.N));
i = 0;
while (i < L2.size()) {
int j = i + 1;
while (j < L2.size() && L2.get(i).op.equals(L2.get(j).op)) {
j++;
}
cigarElements.add(new CigarElement(j - i, L2.get(i).op));
i = j;
}
// cleanup : case where 'S' is close to 'N'
i = 0;
while (i + 1 < cigarElements.size()) {
CigarElement ce1 = cigarElements.get(i);
CigarElement ce2 = cigarElements.get(i + 1);
if (i > 0 && ce1.getOperator().equals(CigarOperator.S) && ce2.getOperator().equals(CigarOperator.N)) {
cigarElements.set(i, new CigarElement(ce1.getLength(), CigarOperator.X));
} else if (i + 2 < cigarElements.size() && ce1.getOperator().equals(CigarOperator.N) && ce2.getOperator().equals(CigarOperator.S)) {
cigarElements.set(i + 1, new CigarElement(ce2.getLength(), CigarOperator.X));
}
i++;
}
newrec.setCigar(new Cigar(cigarElements));
List<SAMValidationError> validations = newrec.isValid();
if (validations != null) {
for (SAMValidationError err : validations) {
LOG.warning(err.getType() + ":" + err.getMessage());
}
}
w.addAlignment(newrec);
}
if (!found_one) {
if (!print_only_spit_read)
w.addAlignment(rec);
}
}
iter.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(w);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class FindMyVirus method doWork.
@Override
public int doWork(List<String> args) {
if (virusNames.isEmpty()) {
LOG.error("no virus name");
return -1;
}
SamReader sfr = null;
SAMFileWriter[] sfwArray = new SAMFileWriter[CAT.values().length];
try {
sfr = openSamReader(oneFileOrNull(args));
SAMFileHeader header = sfr.getFileHeader();
for (CAT category : CAT.values()) {
LOG.info("Opening " + category);
SAMFileHeader header2 = header.clone();
header2.addComment("Category:" + category.name());
header2.addComment("Description:" + category.getDescription());
SAMProgramRecord rec = header2.createProgramRecord();
rec.setCommandLine(this.getProgramCommandLine());
rec.setProgramName(getProgramName());
rec.setProgramVersion(getVersion());
rec.setAttribute("CAT", category.name());
File outputFile = new File(this.outputFile.getParentFile(), this.outputFile.getName() + "." + category.name() + ".bam");
LOG.info("Opening " + outputFile);
File countFile = new File(outputFile.getParentFile(), outputFile.getName() + "." + category.name() + ".count.txt");
SAMFileWriter sfw = writingBamArgs.openSAMFileWriter(outputFile, header2, true);
sfw = new SAMFileWriterCount(sfw, countFile, category);
sfwArray[category.ordinal()] = sfw;
}
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
OtherCanonicalAlignFactory xpAlignFactory = new OtherCanonicalAlignFactory(header);
SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
progress.watch(rec);
CAT category = null;
List<OtherCanonicalAlign> xpList = Collections.emptyList();
if (category == null && !rec.getReadPairedFlag()) {
category = CAT.unpaired;
}
if (category == null && rec.isSecondaryOrSupplementary()) {
category = CAT.secondary;
}
if (category == null && rec.getReadFailsVendorQualityCheckFlag()) {
category = CAT.failsqual;
}
if (category == null && rec.getDuplicateReadFlag()) {
category = CAT.duplicate;
}
if (category == null && rec.getReadUnmappedFlag()) {
category = CAT.unmapped;
}
if (category == null) {
xpList = xpAlignFactory.getXPAligns(rec);
}
boolean xp_containsVirus = false;
boolean xp_containsChrom = false;
for (OtherCanonicalAlign xpa : xpList) {
if (virusNames.contains(xpa.getReferenceName())) {
xp_containsVirus = true;
} else {
xp_containsChrom = true;
}
}
/* both reads mapped on ref */
if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) {
if (!xp_containsVirus) {
category = CAT.both_ref;
} else {
category = CAT.ref_and_virus_spliced;
}
}
/* pair(unmapped,mapped on reference) */
if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getMateReferenceName())))) {
if (!xp_containsVirus) {
category = CAT.ref_orphan;
} else {
category = CAT.ref_and_virus_spliced;
}
}
/* both reads mapped on virus */
if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())) {
if (!xp_containsChrom) {
category = CAT.both_virus;
} else {
category = CAT.ref_and_virus_spliced;
}
}
if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getMateReferenceName())))) {
if (!xp_containsChrom) {
category = CAT.virus_orphan;
} else {
category = CAT.ref_and_virus_spliced;
}
}
if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && ((virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) || (!virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())))) {
category = CAT.ref_and_virus;
}
/*dispatch */
if (category == null) {
LOG.warning("Not handled: " + rec);
category = CAT.undetermined;
}
sfwArray[category.ordinal()].addAlignment(rec);
}
for (SAMFileWriter sfw : sfwArray) {
LOG.info("Closing " + sfw);
sfw.close();
}
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
LOG.info("Closing");
CloserUtil.close(sfr);
CloserUtil.close(sfwArray);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SplitByTile method doWork.
@Override
public int doWork(final List<String> args) {
if (OUTPUT == null || !OUTPUT.contains(TILEWORD) || !OUTPUT.endsWith(".bam")) {
LOG.error("Bad OUPUT name " + OUTPUT + ". must contain " + TILEWORD + " and ends with .bam");
return -1;
}
SamReader samReader = null;
final Map<Integer, SAMFileWriter> tile2writer = new HashMap<Integer, SAMFileWriter>();
final Pattern colon = Pattern.compile("[\\:]");
SAMRecordIterator iter = null;
try {
samReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header = samReader.getFileHeader();
iter = samReader.iterator();
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
final String[] tokens = colon.split(rec.getReadName(), 6);
if (tokens.length < 5) {
LOG.error("Cannot get the 6th field in " + rec.getReadName());
samReader.close();
samReader = null;
return -1;
}
int tile = -1;
try {
tile = Integer.parseInt(tokens[4]);
} catch (Exception e) {
tile = -1;
}
if (tile < 0) {
LOG.error("Bad tile in read: " + rec.getReadName());
samReader.close();
samReader = null;
return -1;
}
SAMFileWriter sfw = tile2writer.get(tile);
if (sfw == null) {
final File outFile = new File(OUTPUT.replaceAll(TILEWORD, String.valueOf(tile)));
LOG.info("create output for " + outFile);
if (outFile.getParentFile() != null) {
outFile.getParentFile().mkdirs();
}
sfw = this.writingBamArgs.openSAMFileWriter(outFile, header, true);
tile2writer.put(tile, sfw);
}
sfw.addAlignment(rec);
}
for (final SAMFileWriter sfw : tile2writer.values()) {
sfw.close();
}
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
if (iter != null)
iter.close();
CloserUtil.close(samReader);
}
return 0;
}
Aggregations