Search in sources :

Example 21 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class SamSlop method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    if (this.defaultQual.length() != 1) {
        LOG.error("default quality should have length==1 " + this.defaultQual);
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    final char defaultQUAL = this.defaultQual.charAt(0);
    try {
        final String inputName = oneFileOrNull(args);
        LOG.info("Loading reference");
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(inputName);
        final SAMFileHeader header = sfr.getFileHeader();
        header.setSortOrder(SortOrder.unsorted);
        sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Cigar cigar = rec.getCigar();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
                sfw.addAlignment(rec);
                continue;
            }
            final StringBuilder sbs = new StringBuilder(rec.getReadString());
            final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
            final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
            final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
            if (!this.removeClip) {
                // replace clip S/H by match M
                for (int i = 0; i < cl.size(); ++i) {
                    final CigarElement ce = cl.get(i);
                    if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
                        cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
                    }
                }
            }
            if (this.extend5 > 0 && refPos1 > 1) {
                if (this.removeClip) {
                    // /remove hard + soft clip 5'
                    while (!cl.isEmpty()) {
                        // first
                        final CigarElement ce = cl.get(0);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.replace(0, ce.getLength(), "");
                            sbq.replace(0, ce.getLength(), "");
                        }
                        // remove first
                        cl.remove(0);
                    }
                }
                final StringBuilder prefix = new StringBuilder(this.extend5);
                // /append + soft clip 5'
                for (int i = 0; i < this.extend5; ++i) {
                    int x1 = (refPos1 - 1) - i;
                    // break if out of genome
                    if (x1 < 1)
                        break;
                    prefix.insert(0, genomicSequence.charAt(x1 - 1));
                }
                sbs.insert(0, prefix.toString());
                // preprend quality
                for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
                // prepend cigar
                cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
                // update start pos
                rec.setAlignmentStart(refPos1 - prefix.length());
            }
            if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
                if (this.removeClip) {
                    // /remove hard + soft clip 3'
                    while (!cl.isEmpty()) {
                        // last
                        final CigarElement ce = cl.get(cl.size() - 1);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.setLength(sbs.length() - ce.getLength());
                            sbq.setLength(sbq.length() - ce.getLength());
                        }
                        // remove last
                        cl.remove(cl.size() - 1);
                    }
                }
                int extend = 0;
                for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
                    sbs.append(genomicSequence.charAt(pos1 - 1));
                    sbq.append(defaultQUAL);
                    ++extend;
                }
                // append cigar
                cl.add(new CigarElement(extend, CigarOperator.M));
            }
            // simplify cigar
            int idx = 0;
            while (idx + 1 < cl.size()) {
                final CigarElement ce1 = cl.get(idx);
                final CigarElement ce2 = cl.get(idx + 1);
                if (ce1.getOperator() == ce2.getOperator()) {
                    cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
                    cl.remove(idx + 1);
                } else {
                    idx++;
                }
            }
            rec.setCigar(new Cigar(cl));
            rec.setReadString(sbs.toString());
            rec.setBaseQualityString(sbq.toString());
            final List<SAMValidationError> errors = rec.isValid();
            if (errors != null && !errors.isEmpty()) {
                for (SAMValidationError err : errors) {
                    LOG.error(err.getMessage());
                }
            }
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 22 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class VcfToBam method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.faidx == null) {
        LOG.error("No REF defined");
        return -1;
    }
    if (readSize <= 0) {
        LOG.error("bad read size");
        return -1;
    }
    if (fragmentSize <= readSize * 2) {
        LOG.error("bad fragment size");
        return -1;
    }
    VcfIterator iter = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidx);
        iter = super.openVcfIterator(super.oneFileOrNull(args));
        run(iter);
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
    }
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) IOException(java.io.IOException)

Example 23 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class Sam2Tsv method doWork.

@Override
public int doWork(final List<String> args) {
    if (printAlignment) {
        L1 = new StringBuilder();
        L2 = new StringBuilder();
        L3 = new StringBuilder();
    }
    SamReader samFileReader = null;
    try {
        if (refFile != null) {
            this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
        }
        this.out = openFileOrStdoutAsPrintWriter(outputFile);
        this.out.println("#READ_NAME\tFLAG\tCHROM\tREAD_POS\tBASE\tQUAL\tREF_POS\tREF\tOP");
        samFileReader = openSamReader(oneFileOrNull(args));
        scan(samFileReader);
        samFileReader.close();
        samFileReader = null;
        this.out.flush();
        this.out.close();
        this.out = null;
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(samFileReader);
        CloserUtil.close(out);
        L1 = null;
        L2 = null;
        L3 = null;
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 24 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.

the class ReadUtilsUnitTest method testReadWithNsRefIndexInDeletion.

@Test
public void testReadWithNsRefIndexInDeletion() throws FileNotFoundException {
    final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(exampleReference));
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
    final int readLength = 76;
    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, 8975, readLength);
    read.setBases(Utils.dupBytes((byte) 'A', readLength));
    read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read.setCigar("3M414N1D73M");
    final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9392, ReadUtils.ClippingTail.LEFT_TAIL);
    Assert.assertEquals(result, 2);
}
Also used : CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) SAMFileHeader(htsjdk.samtools.SAMFileHeader) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 25 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.

the class ReadUtilsUnitTest method testReadWithNsRefAfterDeletion.

@Test
public void testReadWithNsRefAfterDeletion() throws FileNotFoundException {
    final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(exampleReference));
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
    final int readLength = 76;
    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, 8975, readLength);
    read.setBases(Utils.dupBytes((byte) 'A', readLength));
    read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read.setCigar("3M414N1D73M");
    final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9393, ReadUtils.ClippingTail.LEFT_TAIL);
    Assert.assertEquals(result, 3);
}
Also used : CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) SAMFileHeader(htsjdk.samtools.SAMFileHeader) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)57 File (java.io.File)34 SamReader (htsjdk.samtools.SamReader)22 SAMRecord (htsjdk.samtools.SAMRecord)20 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)16 ArrayList (java.util.ArrayList)16 IOException (java.io.IOException)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)13 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)11 CigarElement (htsjdk.samtools.CigarElement)11 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)11 List (java.util.List)11 FileNotFoundException (java.io.FileNotFoundException)10 BufferedReader (java.io.BufferedReader)9 Collectors (java.util.stream.Collectors)9 Cigar (htsjdk.samtools.Cigar)8 CigarOperator (htsjdk.samtools.CigarOperator)7