Search in sources :

Example 1 with FastqPairedWriter

use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.

the class FastqSplitInterleaved method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.fileA.equals(this.fileB)) {
        LOG.error("R1 file==R2.file.");
        return -1;
    }
    CloseableIterator<FastqRecordPair> iter1 = null;
    FastqPairedWriter pairedWriter = null;
    try {
        final String input = oneFileOrNull(args);
        final FastqPairedReaderFactory fqprf = new FastqPairedReaderFactory().setValidateReadNames(this.validate_read_names);
        if (input == null) {
            iter1 = fqprf.open(stdin());
        } else {
            iter1 = fqprf.open(Paths.get(input));
        }
        final FastqPairedWriterFactory fqwf = new FastqPairedWriterFactory().setCreateMd5(this.write_md5).setAsyncIo(this.with_asynio);
        pairedWriter = fqwf.open(fileA, fileB);
        while (iter1.hasNext()) {
            pairedWriter.write(iter1.next());
        }
        iter1.close();
        pairedWriter.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter1);
        CloserUtil.close(pairedWriter);
    }
}
Also used : FastqRecordPair(com.github.lindenb.jvarkit.fastq.FastqRecordPair) FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) FastqPairedReaderFactory(com.github.lindenb.jvarkit.fastq.FastqPairedReaderFactory) FastqPairedWriterFactory(com.github.lindenb.jvarkit.fastq.FastqPairedWriterFactory)

Example 2 with FastqPairedWriter

use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.

the class BamToFastq method processInput.

@Override
protected int processInput(final SAMFileHeader header, final CloseableIterator<SAMRecord> iter0) {
    final Comparator<SAMRecord> queryNameComparator = (A, B) -> A.getReadName().compareTo(B.getReadName());
    SortingCollection<SAMRecord> sortingSAMRecord = null;
    final ArrayList<SAMRecord> buffer = new ArrayList<>(50_000);
    final CountIn<FastqRecord> singleCounter = new CountIn<>("single-end");
    final CountIn<FastqRecord> unpairedCounter = new CountIn<>("unpaired");
    final CountIn<FastqRecord> pairedCounter = new CountIn<>("paired");
    final CountIn<SAMRecord> sortingCounter = new CountIn<>("sorting");
    final PeekableIterator<SAMRecord> iter = new PeekableIterator<>(iter0);
    try {
        if (!SAMFileHeader.SortOrder.coordinate.equals(header.getSortOrder())) {
            LOG.error("Input is not sorted on coordinate. got : " + header.getSortOrder());
            return -1;
        }
        if (singleFastq != null)
            FastqUtils.validateFastqFilename(singleFastq);
        if (unpairedFile1 != null)
            FastqUtils.validateFastqFilename(unpairedFile1);
        if (unpairedFile2 != null)
            FastqUtils.validateFastqFilename(unpairedFile2);
        try (FastqWriter singleEndWriter = this.singleFastq == null ? new NullFastqWriter() : new BasicFastqWriter(this.singleFastq);
            FastqWriter unpairedWriter1 = this.unpairedFile1 == null ? new NullFastqWriter() : new BasicFastqWriter(this.unpairedFile1);
            FastqWriter unpairedWriter2 = this.unpairedFile2 == null ? new NullFastqWriter() : new BasicFastqWriter(this.unpairedFile2);
            FastqPairedWriter R1R2writer = openFastqPairedWriter()) {
            sortingSAMRecord = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), queryNameComparator, sortingCollection.getMaxRecordsInRam(), sortingCollection.getTmpPaths());
            sortingSAMRecord.setDestructiveIteration(true);
            while (iter.hasNext()) {
                final SAMRecord rec = iter.next();
                if (rec.isSecondaryOrSupplementary())
                    continue;
                if (!rec.getReadPairedFlag()) {
                    singleEndWriter.write(singleCounter.apply(toFastq(rec)));
                    continue;
                }
                if ((rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) && iter.hasNext()) {
                    final SAMRecord rec2 = iter.peek();
                    if (!rec2.isSecondaryOrSupplementary() && queryNameComparator.compare(rec, rec2) == 0) {
                        if (rec2.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) {
                            // consumme
                            iter.next();
                            R1R2writer.write(pairedCounter.apply(toFastq(rec2)), pairedCounter.apply(toFastq(rec)));
                            continue;
                        } else if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
                            // consumme
                            iter.next();
                            R1R2writer.write(pairedCounter.apply(toFastq(rec)), pairedCounter.apply(toFastq(rec2)));
                            continue;
                        }
                    }
                }
                if (rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || !rec.getReferenceName().equals(rec.getMateReferenceName()) || Math.abs(rec.getInferredInsertSize()) > this.distance) {
                    sortingSAMRecord.add(sortingCounter.apply(rec));
                    continue;
                }
                while (!buffer.isEmpty() && !buffer.get(0).getReferenceName().equals(rec.getReferenceName())) {
                    sortingSAMRecord.add(sortingCounter.apply(buffer.remove(0)));
                }
                while (!buffer.isEmpty() && (rec.getAlignmentStart() - buffer.get(0).getAlignmentStart()) > this.distance) {
                    sortingSAMRecord.add(sortingCounter.apply(buffer.remove(0)));
                }
                if (rec.getAlignmentStart() < rec.getMateAlignmentStart()) {
                    buffer.add(rec);
                    continue;
                }
                SAMRecord mate = null;
                int i = 0;
                while (i < buffer.size()) {
                    final SAMRecord rec2 = buffer.get(i);
                    if (queryNameComparator.compare(rec2, rec) == 0) {
                        mate = rec2;
                        buffer.remove(i);
                        break;
                    }
                    if (rec2.getAlignmentStart() > rec.getMateAlignmentStart()) {
                        break;
                    }
                    ++i;
                }
                if (mate == null) {
                    (rec.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec)));
                } else if (mate.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) {
                    R1R2writer.write(pairedCounter.apply(toFastq(mate)), pairedCounter.apply(toFastq(rec)));
                } else if (rec.getFirstOfPairFlag() && mate.getSecondOfPairFlag()) {
                    R1R2writer.write(pairedCounter.apply(toFastq(rec)), pairedCounter.apply(toFastq(mate)));
                } else {
                    (rec.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec)));
                    (mate.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(mate)));
                }
            }
            // end while
            for (final SAMRecord rec : buffer) {
                sortingSAMRecord.add(sortingCounter.apply(rec));
            }
            buffer.clear();
            sortingSAMRecord.doneAdding();
            try (CloseableIterator<SAMRecord> iter2 = sortingSAMRecord.iterator()) {
                try (EqualIterator<SAMRecord> eq = new EqualIterator<>(iter2, queryNameComparator)) {
                    while (eq.hasNext()) {
                        final List<SAMRecord> L = eq.next();
                        if (L.size() == 2) {
                            if (L.get(0).getFirstOfPairFlag() && L.get(1).getSecondOfPairFlag()) {
                                R1R2writer.write(pairedCounter.apply(toFastq(L.get(0))), pairedCounter.apply(toFastq(L.get(1))));
                            } else if (L.get(1).getFirstOfPairFlag() && L.get(0).getSecondOfPairFlag()) {
                                R1R2writer.write(pairedCounter.apply(toFastq(L.get(1))), pairedCounter.apply(toFastq(L.get(0))));
                            } else {
                                (L.get(0).getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(L.get(0))));
                                (L.get(1).getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(L.get(1))));
                            }
                        } else {
                            for (SAMRecord rec2 : L) {
                                (rec2.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec2)));
                            }
                        }
                    }
                }
            }
        }
        sortingSAMRecord.cleanup();
        unpairedCounter.log();
        singleCounter.log();
        pairedCounter.log();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        iter.close();
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) MultiBamLauncher(com.github.lindenb.jvarkit.jcommander.MultiBamLauncher) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) UnaryOperator(java.util.function.UnaryOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) PeekableIterator(htsjdk.samtools.util.PeekableIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) FastqUtils(com.github.lindenb.jvarkit.fastq.FastqUtils) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter) FastqPairedWriterFactory(com.github.lindenb.jvarkit.fastq.FastqPairedWriterFactory) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) FastqRecord(htsjdk.samtools.fastq.FastqRecord) List(java.util.List) FastqWriter(htsjdk.samtools.fastq.FastqWriter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) Comparator(java.util.Comparator) ArrayList(java.util.ArrayList) FastqRecord(htsjdk.samtools.fastq.FastqRecord) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) SAMRecord(htsjdk.samtools.SAMRecord) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter) FastqWriter(htsjdk.samtools.fastq.FastqWriter) PeekableIterator(htsjdk.samtools.util.PeekableIterator) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter)

Example 3 with FastqPairedWriter

use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.

the class OnePassFastqLauncher method runPairedEnd.

@Override
protected int runPairedEnd(final CloseableIterator<FastqRecordPair> iter) throws IOException {
    int ret = 0;
    FastqPairedWriter fws = null;
    try {
        final FastqPairedWriterFactory fpwf = new FastqPairedWriterFactory();
        fpwf.setCreateMd5(this.write_md5);
        if (outputFile1 != null && outputFile2 != null) {
            fws = fpwf.open(outputFile1, outputFile2);
        } else if (outputFile1 != null && outputFile2 == null) {
            fws = fpwf.open(outputFile1);
        } else if (outputFile1 == null && outputFile2 == null) {
            fws = fpwf.open(new PrintStream(new BufferedOutputStream(stdout())));
        } else {
            getLogger().error("bad output declaration.");
            return -1;
        }
        ret = runPairedEnd(iter, fws);
        fws.close();
        return ret;
    } catch (final Throwable err) {
        getLogger().error(err);
        return -1;
    } finally {
        if (fws != null)
            fws.close();
    }
}
Also used : PrintStream(java.io.PrintStream) FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) FastqPairedWriterFactory(com.github.lindenb.jvarkit.fastq.FastqPairedWriterFactory) BufferedOutputStream(java.io.BufferedOutputStream)

Example 4 with FastqPairedWriter

use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.

the class FastqSplit method openPairedWriter.

private FastqPairedWriter openPairedWriter(int i, PrintWriter manifest) throws IOException {
    final FastqPairedWriterFactory fqpwf = new FastqPairedWriterFactory().setCreateMd5(write_md5).setAsyncIo(with_asynio);
    final FastqPairedWriter w;
    if (this.output_is_interleaved) {
        final String tag = String.format("%09d.R12", (i + 1));
        final String filename = this.basename.replace(TAG, tag);
        final File file = new File(filename);
        if (file.getParentFile() != null)
            file.getParentFile().mkdirs();
        w = fqpwf.open(file);
        manifest.println(file);
    } else {
        final File[] files = new File[2];
        for (int j = 0; j < 2; ++j) {
            final String tag = String.format("%09d.R%d", (i + 1), (j + 1));
            final String filename = this.basename.replace(TAG, tag);
            final File file = new File(filename);
            if (file.getParentFile() != null)
                file.getParentFile().mkdirs();
            files[j] = file;
        }
        w = fqpwf.open(files[0], files[1]);
        manifest.print(files[0]);
        manifest.print("\t");
        manifest.println(files[1]);
    }
    return w;
}
Also used : FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) FastqPairedWriterFactory(com.github.lindenb.jvarkit.fastq.FastqPairedWriterFactory) File(java.io.File)

Example 5 with FastqPairedWriter

use of com.github.lindenb.jvarkit.fastq.FastqPairedWriter in project jvarkit by lindenb.

the class FastqSplit method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.per_file_number < 1 && this.split_number < 1) {
        LOG.error("Option -n or -s are undefined");
        return -1;
    }
    if (this.per_file_number > 0 && this.split_number > 0) {
        LOG.error("Both Options -n and -s are defined");
        return -1;
    }
    if (!this.basename.contains(TAG)) {
        LOG.error("basename doesn't contain " + TAG + ": " + basename);
        return -1;
    }
    PrintWriter manifest = null;
    try {
        if (this.manifestPath == null) {
            manifest = new PrintWriter(new NullOuputStream());
        } else {
            manifest = super.openPathOrStdoutAsPrintWriter(this.manifestPath);
        }
        if (args.size() == 2 || (this.input_is_interleaved && (args.isEmpty() || args.size() == 1))) {
            final List<FastqPairedWriter> fastqWriters = new ArrayList<>();
            FastqPairedWriter previous = null;
            int count_files = 0;
            long n = 0L;
            try (final CloseableIterator<FastqRecordPair> iter = new FastqPairedReaderFactory().setValidateReadNames(this.validate_read_names).open(args)) {
                while (iter.hasNext()) {
                    final FastqRecordPair pair = iter.next();
                    final FastqPairedWriter w;
                    /* split by number of reads per file */
                    if (this.per_file_number > 0) {
                        if (previous == null || n % this.per_file_number == 0) {
                            if (previous != null)
                                previous.close();
                            previous = openPairedWriter(count_files, manifest);
                            count_files++;
                            n = 0L;
                        }
                        w = previous;
                    } else /* split by file */
                    {
                        final int idx = (int) (n % this.split_number);
                        if (idx >= fastqWriters.size()) {
                            w = openPairedWriter(idx, manifest);
                            fastqWriters.add(w);
                        } else {
                            w = fastqWriters.get(idx);
                        }
                    }
                    w.write(pair);
                    n++;
                }
            }
            if (previous != null)
                previous.close();
            for (final FastqPairedWriter w : fastqWriters) w.close();
        } else if (args.isEmpty() || args.size() == 1) {
            if (this.output_is_interleaved) {
                LOG.error("Cannot set output is interleaved if input is not paired.");
                return -1;
            }
            final List<FastqWriter> fastqWriters = new ArrayList<>();
            FastqWriter previous = null;
            long n = 0L;
            int count_files = 0;
            final FastqReader iter;
            if (args.size() == 1) {
                iter = new FastqReader(new File(args.get(0)));
            } else {
                iter = new FastqReader(IOUtils.openStreamForBufferedReader(stdin()));
            }
            while (iter.hasNext()) {
                final FastqRecord rec = iter.next();
                final FastqWriter w;
                /* split by number of reads per file */
                if (this.per_file_number > 0) {
                    if (previous == null || n % this.per_file_number == 0) {
                        if (previous != null)
                            previous.close();
                        previous = this.openSingleWriter(count_files, manifest);
                        count_files++;
                        n = 0L;
                    }
                    w = previous;
                } else /* split by file */
                {
                    final int idx = (int) (n % this.split_number);
                    if (idx >= fastqWriters.size()) {
                        w = this.openSingleWriter(idx, manifest);
                        fastqWriters.add(w);
                    } else {
                        w = fastqWriters.get(idx);
                    }
                }
                w.write(rec);
                n++;
            }
            iter.close();
            for (final FastqWriter w : fastqWriters) w.close();
        } else {
            LOG.error("Illegal number of arguments.");
            return -1;
        }
        manifest.flush();
        manifest.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(manifest);
    }
}
Also used : ArrayList(java.util.ArrayList) FastqRecord(htsjdk.samtools.fastq.FastqRecord) FastqPairedReaderFactory(com.github.lindenb.jvarkit.fastq.FastqPairedReaderFactory) FastqRecordPair(com.github.lindenb.jvarkit.fastq.FastqRecordPair) FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) FastqReader(htsjdk.samtools.fastq.FastqReader) FastqWriter(htsjdk.samtools.fastq.FastqWriter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) PrintWriter(java.io.PrintWriter)

Aggregations

FastqPairedWriter (com.github.lindenb.jvarkit.fastq.FastqPairedWriter)5 FastqPairedWriterFactory (com.github.lindenb.jvarkit.fastq.FastqPairedWriterFactory)4 File (java.io.File)3 FastqPairedReaderFactory (com.github.lindenb.jvarkit.fastq.FastqPairedReaderFactory)2 FastqRecordPair (com.github.lindenb.jvarkit.fastq.FastqRecordPair)2 FastqRecord (htsjdk.samtools.fastq.FastqRecord)2 FastqWriter (htsjdk.samtools.fastq.FastqWriter)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Parameter (com.beust.jcommander.Parameter)1 ParametersDelegate (com.beust.jcommander.ParametersDelegate)1 FastqUtils (com.github.lindenb.jvarkit.fastq.FastqUtils)1 NullOuputStream (com.github.lindenb.jvarkit.io.NullOuputStream)1 EqualIterator (com.github.lindenb.jvarkit.iterator.EqualIterator)1 MultiBamLauncher (com.github.lindenb.jvarkit.jcommander.MultiBamLauncher)1 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)1 DistanceParser (com.github.lindenb.jvarkit.util.bio.DistanceParser)1 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1