Search in sources :

Example 1 with VCFBuffer

use of com.github.lindenb.jvarkit.util.vcf.VCFBuffer in project jvarkit by lindenb.

the class ForkVcf method doWork.

@Override
public int doWork(List<String> args) {
    if (this.outputFile == null || !this.outputFile.getName().contains(REPLACE_GROUPID)) {
        LOG.error("Output file pattern undefined or doesn't contain " + REPLACE_GROUPID + " : " + this.outputFile);
        return -1;
    }
    if (!(this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz"))) {
        LOG.error("output file must end with '.vcf' or '.vcf.gz'");
        return -1;
    }
    if (this.number_of_files <= 0) {
        LOG.error("Bad value for number of files:" + this.number_of_files);
        return -1;
    }
    BufferedReader r = null;
    VcfIterator in = null;
    PrintWriter manifestWriter = null;
    final List<SplitGroup> groups = new ArrayList<>();
    VCFBuffer vcfBuffer = null;
    try {
        in = openVcfIterator(oneFileOrNull(args));
        manifestWriter = (this.manifestFile == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openFileForPrintWriter(this.manifestFile));
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        if (!this.split_by_chunk) {
            while (groups.size() < this.number_of_files) {
                final SplitGroup sg = new SplitGroup(groups.size() + 1);
                sg.open(in.getHeader());
                manifestWriter.println(sg.getFile().getPath());
                groups.add(sg);
            }
            int idx = 0;
            while (in.hasNext()) {
                final VariantContext ctx = progress.watch(in.next());
                groups.get(idx % this.number_of_files)._writer.add(ctx);
                ++idx;
            }
            in.close();
        } else {
            long count_variants = 0;
            vcfBuffer = new VCFBuffer(this.maxRecordsInRam, this.tmpDir);
            vcfBuffer.writeHeader(in.getHeader());
            while (in.hasNext()) {
                final VariantContext ctx = progress.watch(in.next());
                vcfBuffer.add(ctx);
                ++count_variants;
            }
            in.close();
            final long variant_per_file = Math.max(1L, (long) Math.ceil(count_variants / (double) this.number_of_files));
            LOG.info("done buffering. n=" + count_variants + " now forking " + variant_per_file + " variants for " + this.number_of_files + " files.");
            VcfIterator iter2 = vcfBuffer.iterator();
            long count_ctx = 0L;
            while (iter2.hasNext()) {
                if (groups.isEmpty() || count_ctx >= variant_per_file) {
                    if (!groups.isEmpty())
                        groups.get(groups.size() - 1).close();
                    final SplitGroup last = new SplitGroup(groups.size() + 1);
                    last.open(in.getHeader());
                    manifestWriter.println(last.getFile().getPath());
                    groups.add(last);
                    count_ctx = 0;
                }
                final VariantContext ctx = iter2.next();
                groups.get(groups.size() - 1)._writer.add(ctx);
                count_ctx++;
            }
            iter2.close();
            vcfBuffer.close();
            vcfBuffer.dispose();
            vcfBuffer = null;
            // save remaining empty VCFs
            while (groups.size() < this.number_of_files) {
                LOG.info("creating empty vcf");
                final SplitGroup sg = new SplitGroup(groups.size() + 1);
                sg.open(in.getHeader());
                manifestWriter.println(sg.getFile().getPath());
                sg.close();
                groups.add(sg);
            }
        }
        progress.finish();
        for (final SplitGroup g : groups) {
            g.close();
        }
        manifestWriter.flush();
        manifestWriter.close();
        manifestWriter = null;
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        for (final SplitGroup g : groups) {
            CloserUtil.close(g);
            if (in != null)
                g.getFile().delete();
        }
        return -1;
    } finally {
        if (vcfBuffer != null)
            vcfBuffer.dispose();
        CloserUtil.close(r);
        CloserUtil.close(in);
        IOUtils.flush(manifestWriter);
        CloserUtil.close(manifestWriter);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) BufferedReader(java.io.BufferedReader) VCFBuffer(com.github.lindenb.jvarkit.util.vcf.VCFBuffer) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) PrintWriter(java.io.PrintWriter)

Aggregations

NullOuputStream (com.github.lindenb.jvarkit.io.NullOuputStream)1 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)1 VCFBuffer (com.github.lindenb.jvarkit.util.vcf.VCFBuffer)1 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)1 RuntimeIOException (htsjdk.samtools.util.RuntimeIOException)1 VariantContext (htsjdk.variant.variantcontext.VariantContext)1 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1