Search in sources :

Example 61 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SamRetrieveSeqAndQual method doWork.

@Override
public int doWork(List<String> args) {
    FastqReader[] fastqReaders = null;
    SamReader samReader = null;
    SAMFileWriter samWriter = null;
    SAMRecordIterator iter = null;
    try {
        if (fastqFin == null) {
            LOG.error("undefined fastq file");
            return -1;
        } else {
            LOG.info("opening " + fastqFin);
            FastqReader r1 = new FastqReader(fastqFin);
            if (fastqRin == null) {
                fastqReaders = new FastqReader[] { r1 };
            } else {
                LOG.info("opening " + fastqRin);
                FastqReader r2 = new FastqReader(fastqRin);
                fastqReaders = new FastqReader[] { r1, r2 };
            }
        }
        samReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
        if (sortOrder == null) {
            LOG.warning("undefined sort order read are in the sam order");
        } else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
            LOG.error("Bad Sort Order. Sort this input on read name");
            return -1;
        }
        SAMFileHeader header = samReader.getFileHeader().clone();
        SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getVersion());
        samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
        iter = samReader.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        FastqRecord[] currFastq = new FastqRecord[] { null, null };
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            String readName = rec.getReadName();
            int fastq_index = 0;
            if (rec.getReadPairedFlag()) {
                if (fastqReaders.length != 2) {
                    LOG.error("Not paired but number of fastq!=2");
                    return -1;
                }
                fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
            } else {
                if (fastqReaders.length != 1) {
                    LOG.error("Not paired but number of fastq!=1");
                    return -1;
                }
                fastq_index = 0;
            }
            if (sortOrder == SAMFileHeader.SortOrder.queryname) {
                while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
                    if (!fastqReaders[fastq_index].hasNext()) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                    currFastq[fastq_index] = fastqReaders[fastq_index].next();
                    if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                }
            } else {
                if (!fastqReaders[fastq_index].hasNext()) {
                    LOG.error("Read Missing for " + readName);
                    return -1;
                }
                currFastq[fastq_index] = fastqReaders[fastq_index].next();
            }
            if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
                LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
                return -1;
            }
            String fastqBases = currFastq[fastq_index].getReadString();
            String fastqQuals = currFastq[fastq_index].getBaseQualityString();
            /* handle orientation */
            if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
                fastqBases = AcidNucleics.reverseComplement(fastqBases);
                StringBuilder sb = new StringBuilder(fastqQuals.length());
                for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
                fastqQuals = sb.toString();
            }
            /* remove hard clip */
            Cigar cigar = rec.getCigar();
            if (cigar != null) {
                List<CigarElement> ceList = cigar.getCigarElements();
                if (!ceList.isEmpty()) {
                    CigarElement ce = ceList.get(ceList.size() - 1);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
                        fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
                    }
                    ce = ceList.get(0);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(ce.getLength());
                        fastqQuals = fastqQuals.substring(ce.getLength());
                    }
                }
            }
            rec.setBaseQualityString(fastqQuals);
            rec.setReadString(fastqBases);
            samWriter.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (fastqReaders != null)
            for (FastqReader r : fastqReaders) CloserUtil.close(r);
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(samWriter);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) FastqRecord(htsjdk.samtools.fastq.FastqRecord) CigarElement(htsjdk.samtools.CigarElement) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) FastqReader(htsjdk.samtools.fastq.FastqReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 62 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SamFixCigar method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader();
        sfw = this.writingBamArgs.setReferenceFile(this.faidx).openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final List<CigarElement> newCigar = new ArrayList<CigarElement>();
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            Cigar cigar = rec.getCigar();
            byte[] bases = rec.getReadBases();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.getCigarElements().isEmpty() || bases == null) {
                sfw.addAlignment(rec);
                continue;
            }
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            newCigar.clear();
            int refPos1 = rec.getAlignmentStart();
            int readPos0 = 0;
            for (final CigarElement ce : cigar.getCigarElements()) {
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.M)) {
                    for (int i = 0; i < ce.getLength(); ++i) {
                        char c1 = Character.toUpperCase((char) bases[readPos0]);
                        char c2 = Character.toUpperCase(refPos1 - 1 < genomicSequence.length() ? genomicSequence.charAt(refPos1 - 1) : '*');
                        if (c2 == 'N' || c1 == c2) {
                            newCigar.add(new CigarElement(1, CigarOperator.EQ));
                        } else {
                            newCigar.add(new CigarElement(1, CigarOperator.X));
                        }
                        refPos1++;
                        readPos0++;
                    }
                } else {
                    newCigar.add(ce);
                    if (op.consumesReadBases())
                        readPos0 += ce.getLength();
                    if (op.consumesReferenceBases())
                        refPos1 += ce.getLength();
                }
            }
            int i = 0;
            while (i < newCigar.size()) {
                final CigarOperator op1 = newCigar.get(i).getOperator();
                final int length1 = newCigar.get(i).getLength();
                if (i + 1 < newCigar.size() && newCigar.get(i + 1).getOperator() == op1) {
                    final CigarOperator op2 = newCigar.get(i + 1).getOperator();
                    int length2 = newCigar.get(i + 1).getLength();
                    newCigar.set(i, new CigarElement(length1 + length2, op2));
                    newCigar.remove(i + 1);
                } else {
                    ++i;
                }
            }
            cigar = new Cigar(newCigar);
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            rec.setCigar(cigar);
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 63 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class BWAMemNOp method doWork.

@Override
public int doWork(List<String> args) {
    SamReader r = null;
    SAMFileWriter w = null;
    try {
        r = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = r.getFileHeader();
        OtherCanonicalAlignFactory ocaf = new OtherCanonicalAlignFactory(header);
        w = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
        SAMRecordIterator iter = r.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            if (rec.getSupplementaryAlignmentFlag()) {
                continue;
            }
            if (rec.getReadUnmappedFlag()) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            Cigar cigar1 = rec.getCigar();
            if (cigar1 == null || cigar1.isEmpty() || !(cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) || cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S))) // last or first is soft clipping
            {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            rec.getAlignmentStart();
            List<OtherCanonicalAlign> xps = ocaf.getXPAligns(rec);
            if (xps.isEmpty()) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
                continue;
            }
            boolean found_one = false;
            for (OtherCanonicalAlign xp : xps) {
                if (!rec.getReferenceName().equals(xp.getReferenceName()))
                    continue;
                if (xp.getReadNegativeStrandFlag() != rec.getReadNegativeStrandFlag())
                    continue;
                Cigar cigar2 = xp.getCigar();
                if (cigar2 == null || cigar2.isEmpty()) {
                    continue;
                }
                SAMRecord newrec = null;
                List<CigarEvt> L1 = null;
                List<CigarEvt> L2 = null;
                if (cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(cigar1.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar2.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(0).getLength() >= this.min_soft_clip_length && rec.getAlignmentEnd() < xp.getAlignmentStart()) {
                    newrec = samRecordFactory.createSAMRecord(header);
                    int ref1 = rec.getAlignmentStart();
                    newrec.setAlignmentStart(ref1);
                    L1 = cigarEvents(0, ref1, cigar1);
                    L2 = cigarEvents(0, xp.getAlignmentStart(), cigar2);
                } else if (cigar2.getCigarElement(cigar2.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(cigar2.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(0).getLength() >= this.min_soft_clip_length && xp.getAlignmentEnd() < rec.getAlignmentStart()) {
                    newrec = samRecordFactory.createSAMRecord(header);
                    int ref1 = xp.getAlignmentStart();
                    newrec.setAlignmentStart(ref1);
                    L1 = cigarEvents(0, ref1, cigar2);
                    L2 = cigarEvents(0, rec.getAlignmentStart(), cigar1);
                }
                if (newrec == null)
                    continue;
                newrec.setFlags(rec.getFlags());
                newrec.setReadName(rec.getReadName());
                newrec.setReadBases(rec.getReadBases());
                newrec.setMappingQuality(rec.getMappingQuality());
                newrec.setReferenceIndex(rec.getReferenceIndex());
                newrec.setBaseQualities(rec.getBaseQualities());
                if (found_one) {
                    newrec.setNotPrimaryAlignmentFlag(true);
                }
                found_one = true;
                for (SAMTagAndValue tav : rec.getAttributes()) {
                    if (tav.tag.equals(ocaf.getAttributeKey()))
                        continue;
                    if (tav.tag.equals("NM"))
                        continue;
                    newrec.setAttribute(tav.tag, tav.value);
                }
                if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                    newrec.setMateAlignmentStart(rec.getMateAlignmentStart());
                    newrec.setMateReferenceIndex(rec.getMateReferenceIndex());
                    newrec.setInferredInsertSize(rec.getInferredInsertSize());
                }
                while (!L1.isEmpty() && (L1.get(L1.size() - 1).op.equals(CigarOperator.S) || L1.get(L1.size() - 1).op.equals(CigarOperator.D) || L1.get(L1.size() - 1).op.equals(CigarOperator.H))) {
                    L1.remove(L1.size() - 1);
                }
                while (!L2.isEmpty() && L2.get(0).read0 <= L1.get(L1.size() - 1).read0) {
                    L2.remove(0);
                }
                List<CigarElement> cigarElements = new ArrayList<CigarElement>();
                int i = 0;
                while (i < L1.size()) {
                    int j = i + 1;
                    while (j < L1.size() && L1.get(i).op.equals(L1.get(j).op)) {
                        j++;
                    }
                    cigarElements.add(new CigarElement(j - i, L1.get(i).op));
                    i = j;
                }
                // add 'N'
                cigarElements.add(new CigarElement((L2.get(0).ref1 - L1.get(L1.size() - 1).ref1) - 1, CigarOperator.N));
                i = 0;
                while (i < L2.size()) {
                    int j = i + 1;
                    while (j < L2.size() && L2.get(i).op.equals(L2.get(j).op)) {
                        j++;
                    }
                    cigarElements.add(new CigarElement(j - i, L2.get(i).op));
                    i = j;
                }
                // cleanup : case where  'S' is close to 'N'
                i = 0;
                while (i + 1 < cigarElements.size()) {
                    CigarElement ce1 = cigarElements.get(i);
                    CigarElement ce2 = cigarElements.get(i + 1);
                    if (i > 0 && ce1.getOperator().equals(CigarOperator.S) && ce2.getOperator().equals(CigarOperator.N)) {
                        cigarElements.set(i, new CigarElement(ce1.getLength(), CigarOperator.X));
                    } else if (i + 2 < cigarElements.size() && ce1.getOperator().equals(CigarOperator.N) && ce2.getOperator().equals(CigarOperator.S)) {
                        cigarElements.set(i + 1, new CigarElement(ce2.getLength(), CigarOperator.X));
                    }
                    i++;
                }
                newrec.setCigar(new Cigar(cigarElements));
                List<SAMValidationError> validations = newrec.isValid();
                if (validations != null) {
                    for (SAMValidationError err : validations) {
                        LOG.warning(err.getType() + ":" + err.getMessage());
                    }
                }
                w.addAlignment(newrec);
            }
            if (!found_one) {
                if (!print_only_spit_read)
                    w.addAlignment(rec);
            }
        }
        iter.close();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(w);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) OtherCanonicalAlign(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) OtherCanonicalAlignFactory(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue)

Example 64 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class FindMyVirus method doWork.

@Override
public int doWork(List<String> args) {
    if (virusNames.isEmpty()) {
        LOG.error("no virus name");
        return -1;
    }
    SamReader sfr = null;
    SAMFileWriter[] sfwArray = new SAMFileWriter[CAT.values().length];
    try {
        sfr = openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        for (CAT category : CAT.values()) {
            LOG.info("Opening " + category);
            SAMFileHeader header2 = header.clone();
            header2.addComment("Category:" + category.name());
            header2.addComment("Description:" + category.getDescription());
            SAMProgramRecord rec = header2.createProgramRecord();
            rec.setCommandLine(this.getProgramCommandLine());
            rec.setProgramName(getProgramName());
            rec.setProgramVersion(getVersion());
            rec.setAttribute("CAT", category.name());
            File outputFile = new File(this.outputFile.getParentFile(), this.outputFile.getName() + "." + category.name() + ".bam");
            LOG.info("Opening " + outputFile);
            File countFile = new File(outputFile.getParentFile(), outputFile.getName() + "." + category.name() + ".count.txt");
            SAMFileWriter sfw = writingBamArgs.openSAMFileWriter(outputFile, header2, true);
            sfw = new SAMFileWriterCount(sfw, countFile, category);
            sfwArray[category.ordinal()] = sfw;
        }
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        OtherCanonicalAlignFactory xpAlignFactory = new OtherCanonicalAlignFactory(header);
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            progress.watch(rec);
            CAT category = null;
            List<OtherCanonicalAlign> xpList = Collections.emptyList();
            if (category == null && !rec.getReadPairedFlag()) {
                category = CAT.unpaired;
            }
            if (category == null && rec.isSecondaryOrSupplementary()) {
                category = CAT.secondary;
            }
            if (category == null && rec.getReadFailsVendorQualityCheckFlag()) {
                category = CAT.failsqual;
            }
            if (category == null && rec.getDuplicateReadFlag()) {
                category = CAT.duplicate;
            }
            if (category == null && rec.getReadUnmappedFlag()) {
                category = CAT.unmapped;
            }
            if (category == null) {
                xpList = xpAlignFactory.getXPAligns(rec);
            }
            boolean xp_containsVirus = false;
            boolean xp_containsChrom = false;
            for (OtherCanonicalAlign xpa : xpList) {
                if (virusNames.contains(xpa.getReferenceName())) {
                    xp_containsVirus = true;
                } else {
                    xp_containsChrom = true;
                }
            }
            /* both reads mapped on ref */
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) {
                if (!xp_containsVirus) {
                    category = CAT.both_ref;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            /*  pair(unmapped,mapped on reference) */
            if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getMateReferenceName())))) {
                if (!xp_containsVirus) {
                    category = CAT.ref_orphan;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            /* both reads mapped on virus */
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())) {
                if (!xp_containsChrom) {
                    category = CAT.both_virus;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getMateReferenceName())))) {
                if (!xp_containsChrom) {
                    category = CAT.virus_orphan;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && ((virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) || (!virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())))) {
                category = CAT.ref_and_virus;
            }
            /*dispatch */
            if (category == null) {
                LOG.warning("Not handled: " + rec);
                category = CAT.undetermined;
            }
            sfwArray[category.ordinal()].addAlignment(rec);
        }
        for (SAMFileWriter sfw : sfwArray) {
            LOG.info("Closing " + sfw);
            sfw.close();
        }
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        LOG.info("Closing");
        CloserUtil.close(sfr);
        CloserUtil.close(sfwArray);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) OtherCanonicalAlign(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign) SAMFileWriter(htsjdk.samtools.SAMFileWriter) OtherCanonicalAlignFactory(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 65 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SplitByTile method doWork.

@Override
public int doWork(final List<String> args) {
    if (OUTPUT == null || !OUTPUT.contains(TILEWORD) || !OUTPUT.endsWith(".bam")) {
        LOG.error("Bad OUPUT name " + OUTPUT + ". must contain " + TILEWORD + " and ends with .bam");
        return -1;
    }
    SamReader samReader = null;
    final Map<Integer, SAMFileWriter> tile2writer = new HashMap<Integer, SAMFileWriter>();
    final Pattern colon = Pattern.compile("[\\:]");
    SAMRecordIterator iter = null;
    try {
        samReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = samReader.getFileHeader();
        iter = samReader.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            final String[] tokens = colon.split(rec.getReadName(), 6);
            if (tokens.length < 5) {
                LOG.error("Cannot get the 6th field in " + rec.getReadName());
                samReader.close();
                samReader = null;
                return -1;
            }
            int tile = -1;
            try {
                tile = Integer.parseInt(tokens[4]);
            } catch (Exception e) {
                tile = -1;
            }
            if (tile < 0) {
                LOG.error("Bad tile in read: " + rec.getReadName());
                samReader.close();
                samReader = null;
                return -1;
            }
            SAMFileWriter sfw = tile2writer.get(tile);
            if (sfw == null) {
                final File outFile = new File(OUTPUT.replaceAll(TILEWORD, String.valueOf(tile)));
                LOG.info("create output for " + outFile);
                if (outFile.getParentFile() != null) {
                    outFile.getParentFile().mkdirs();
                }
                sfw = this.writingBamArgs.openSAMFileWriter(outFile, header, true);
                tile2writer.put(tile, sfw);
            }
            sfw.addAlignment(rec);
        }
        for (final SAMFileWriter sfw : tile2writer.values()) {
            sfw.close();
        }
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        if (iter != null)
            iter.close();
        CloserUtil.close(samReader);
    }
    return 0;
}
Also used : Pattern(java.util.regex.Pattern) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8