Search in sources :

Example 36 with FastqRecord

use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.

the class PadEmptyFastq method copyTo.

private void copyTo(final FastqReader r, final FastqWriter w) {
    int padLength = this.N;
    long nReads = 0L;
    long nFill = 0L;
    String fillN = null;
    String fillQ = null;
    r.setValidationStringency(ValidationStringency.LENIENT);
    while (r.hasNext()) {
        FastqRecord rec = r.next();
        if (++nReads % 1E6 == 0) {
            LOG.info("Read " + nReads + " reads. empty reads=" + nFill);
        }
        if (rec.getReadString().isEmpty()) {
            ++nFill;
            if (padLength < 1) {
                padLength = DEFAULT_LENGTH;
            }
            if (fillN == null) {
                StringBuilder b1 = new StringBuilder();
                while (b1.length() < padLength) b1.append("N");
                fillN = b1.toString();
                fillQ = fillN.replace('N', '#');
            }
            rec = new FastqRecord(rec.getReadName(), fillN, rec.getBaseQualityHeader(), fillQ);
        } else if (padLength < 1) {
            padLength = rec.getReadString().length();
        }
        w.write(rec);
    }
    LOG.info("Done. Read " + nReads + " reads. empty reads=" + nFill);
}
Also used : FastqRecord(htsjdk.samtools.fastq.FastqRecord)

Example 37 with FastqRecord

use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.

the class SamRetrieveSeqAndQual method doWork.

@Override
public int doWork(List<String> args) {
    FastqReader[] fastqReaders = null;
    SamReader samReader = null;
    SAMFileWriter samWriter = null;
    SAMRecordIterator iter = null;
    try {
        if (fastqFin == null) {
            LOG.error("undefined fastq file");
            return -1;
        } else {
            LOG.info("opening " + fastqFin);
            FastqReader r1 = new FastqReader(fastqFin);
            if (fastqRin == null) {
                fastqReaders = new FastqReader[] { r1 };
            } else {
                LOG.info("opening " + fastqRin);
                FastqReader r2 = new FastqReader(fastqRin);
                fastqReaders = new FastqReader[] { r1, r2 };
            }
        }
        samReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
        if (sortOrder == null) {
            LOG.warning("undefined sort order read are in the sam order");
        } else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
            LOG.error("Bad Sort Order. Sort this input on read name");
            return -1;
        }
        SAMFileHeader header = samReader.getFileHeader().clone();
        SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getVersion());
        samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
        iter = samReader.iterator();
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        FastqRecord[] currFastq = new FastqRecord[] { null, null };
        while (iter.hasNext()) {
            SAMRecord rec = progress.watch(iter.next());
            String readName = rec.getReadName();
            int fastq_index = 0;
            if (rec.getReadPairedFlag()) {
                if (fastqReaders.length != 2) {
                    LOG.error("Not paired but number of fastq!=2");
                    return -1;
                }
                fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
            } else {
                if (fastqReaders.length != 1) {
                    LOG.error("Not paired but number of fastq!=1");
                    return -1;
                }
                fastq_index = 0;
            }
            if (sortOrder == SAMFileHeader.SortOrder.queryname) {
                while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
                    if (!fastqReaders[fastq_index].hasNext()) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                    currFastq[fastq_index] = fastqReaders[fastq_index].next();
                    if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
                        LOG.error("Read Missing for " + readName);
                        return -1;
                    }
                }
            } else {
                if (!fastqReaders[fastq_index].hasNext()) {
                    LOG.error("Read Missing for " + readName);
                    return -1;
                }
                currFastq[fastq_index] = fastqReaders[fastq_index].next();
            }
            if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
                LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
                return -1;
            }
            String fastqBases = currFastq[fastq_index].getReadString();
            String fastqQuals = currFastq[fastq_index].getBaseQualityString();
            /* handle orientation */
            if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
                fastqBases = AcidNucleics.reverseComplement(fastqBases);
                StringBuilder sb = new StringBuilder(fastqQuals.length());
                for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
                fastqQuals = sb.toString();
            }
            /* remove hard clip */
            Cigar cigar = rec.getCigar();
            if (cigar != null) {
                List<CigarElement> ceList = cigar.getCigarElements();
                if (!ceList.isEmpty()) {
                    CigarElement ce = ceList.get(ceList.size() - 1);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
                        fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
                    }
                    ce = ceList.get(0);
                    if (ce.getOperator() == CigarOperator.HARD_CLIP) {
                        fastqBases = fastqBases.substring(ce.getLength());
                        fastqQuals = fastqQuals.substring(ce.getLength());
                    }
                }
            }
            rec.setBaseQualityString(fastqQuals);
            rec.setReadString(fastqBases);
            samWriter.addAlignment(rec);
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (fastqReaders != null)
            for (FastqReader r : fastqReaders) CloserUtil.close(r);
        CloserUtil.close(iter);
        CloserUtil.close(samReader);
        CloserUtil.close(samWriter);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) FastqRecord(htsjdk.samtools.fastq.FastqRecord) CigarElement(htsjdk.samtools.CigarElement) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) FastqReader(htsjdk.samtools.fastq.FastqReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 38 with FastqRecord

use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.

the class SamExtractClip method run.

private void run(final SamReader r, final FastqWriter out) {
    int[] startend = new int[2];
    final SAMFileHeader header = r.getFileHeader();
    // w=swf.make(header, System.out);
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
    SAMRecordIterator it = r.iterator();
    while (it.hasNext()) {
        final SAMRecord rec = progress.watch(it.next());
        if (rec.getReadUnmappedFlag())
            continue;
        if (rec.isSecondaryOrSupplementary())
            continue;
        if (rec.getDuplicateReadFlag())
            continue;
        final Cigar cigar = rec.getCigar();
        if (cigar == null || cigar.isEmpty())
            continue;
        String suffix = "";
        if (rec.getReadPairedFlag()) {
            suffix = (rec.getFirstOfPairFlag() ? "/1" : "/2");
        }
        startend[0] = 0;
        startend[1] = rec.getReadLength();
        boolean found = false;
        for (int side = 0; side < 2; ++side) {
            final CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
            if (!ce.getOperator().equals(CigarOperator.S))
                continue;
            if (ce.getLength() < min_clip_length)
                continue;
            found = true;
            final String clippedSeq;
            final String clippedQual;
            if (side == 0) {
                startend[0] = ce.getLength();
                clippedSeq = rec.getReadString().substring(0, startend[0]);
                clippedQual = rec.getBaseQualityString().substring(0, startend[0]);
            } else {
                startend[1] = rec.getReadLength() - ce.getLength();
                clippedSeq = rec.getReadString().substring(startend[1]);
                clippedQual = rec.getBaseQualityString().substring(startend[1]);
            }
            out.write(new FastqRecord(rec.getReadName() + suffix + ";" + side + ";" + rec.getReferenceName() + ";" + rec.getAlignmentStart() + ";" + rec.getFlags() + ";" + rec.getCigarString() + ";" + (side == 0 ? "5'" : "3'"), clippedSeq, "", clippedQual));
        }
        if (!found)
            continue;
        String bases = rec.getReadString();
        String qual = rec.getBaseQualityString();
        if (rec.getReadNegativeStrandFlag()) {
            bases = AcidNucleics.reverseComplement(bases);
            qual = new StringBuilder(qual).reverse().toString();
        }
        if (print_original_read) {
            out.write(new FastqRecord(rec.getReadName() + suffix, bases, "", qual));
        }
        if (print_clipped_read) {
            out.write(new FastqRecord(rec.getReadName() + suffix + ":clipped", bases.substring(startend[0], startend[1]), "", qual.substring(startend[0], startend[1])));
        }
    }
    it.close();
    progress.finish();
}
Also used : Cigar(htsjdk.samtools.Cigar) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMRecord(htsjdk.samtools.SAMRecord) FastqRecord(htsjdk.samtools.fastq.FastqRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) CigarElement(htsjdk.samtools.CigarElement)

Example 39 with FastqRecord

use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.

the class AbstractFastqReader method next.

public FastqRecord next() {
    if (!hasNext()) {
        throw new NoSuchElementException("next() called when !hasNext()");
    }
    final FastqRecord rec = nextRecord;
    nextRecord = null;
    _has_next_called = false;
    seqHeader = null;
    return rec;
}
Also used : FastqRecord(htsjdk.samtools.fastq.FastqRecord) NoSuchElementException(java.util.NoSuchElementException)

Example 40 with FastqRecord

use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.

the class FourLinesFastqReaderTest method test2.

public void test2() throws Exception {
    final String fastqs = "@IL31_4368:1:1:996:8507/2\n" + "\n" + "+\n" + "\n" + "@IL31_4368:1:1:996:21421/2\n" + "CAAAAACTTTCACTTTACCTGCCGGGTTTCCCAGTTTACATTCCACTGTTTGAC\n" + "+\n" + ">DBDDB,B9BAA4AAB7BB?7BBB=91;+*@;5<87+*=/*@@?9=73=.7)7*\n";
    FourLinesFastqReader r = new FourLinesFastqReader(new ByteArrayInputStream(fastqs.getBytes()));
    r.setValidationStringency(ValidationStringency.LENIENT);
    Assert.assertTrue(r.hasNext());
    FastqRecord rec = r.next();
    Assert.assertNotNull(rec);
    Assert.assertTrue(rec.getBaseQualityString().isEmpty());
    Assert.assertTrue(rec.getReadString().isEmpty());
    Assert.assertTrue(r.hasNext());
    rec = r.next();
    Assert.assertNotNull(rec);
    Assert.assertFalse(rec.getBaseQualityString().isEmpty());
    Assert.assertFalse(rec.getReadString().isEmpty());
    Assert.assertFalse(r.hasNext());
    r.close();
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) FastqRecord(htsjdk.samtools.fastq.FastqRecord)

Aggregations

FastqRecord (htsjdk.samtools.fastq.FastqRecord)40 SAMRecord (htsjdk.samtools.SAMRecord)16 Test (org.junit.Test)12 FastqReader (htsjdk.samtools.fastq.FastqReader)8 File (java.io.File)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)4 FastqReader (com.github.lindenb.jvarkit.util.picard.FastqReader)3 FourLinesFastqReader (com.github.lindenb.jvarkit.util.picard.FourLinesFastqReader)3 SAMFileWriter (htsjdk.samtools.SAMFileWriter)3 IOException (java.io.IOException)3 Test (org.testng.annotations.Test)3 BufferedReferenceSequenceFile (au.edu.wehi.idsv.picard.BufferedReferenceSequenceFile)2 ChimericAlignment (au.edu.wehi.idsv.sam.ChimericAlignment)2 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)2 Cigar (htsjdk.samtools.Cigar)2 CigarElement (htsjdk.samtools.CigarElement)2 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)2 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)2 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)2 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)2