Search in sources :

Example 6 with EqualRangeIterator

use of com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator in project jvarkit by lindenb.

the class VcfGeneSplitter method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, File outputFile) {
    SortingCollection<KeyAndLine> sortingcollection = null;
    BufferedReader in = null;
    FileOutputStream fos = null;
    ZipOutputStream zout = null;
    CloseableIterator<KeyAndLine> iter = null;
    PrintWriter pw = null;
    try {
        in = inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName);
        final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
        /**
         * find splitter by name
         */
        final VepPredictionParser vepPredictionParser = new VepPredictionParserFactory().header(cah.header).get();
        sortingcollection = SortingCollection.newInstance(KeyAndLine.class, new KeyAndLineCodec(), new KeyAndLineComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        sortingcollection.setDestructiveIteration(true);
        // read variants
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(cah.header);
        String line;
        while ((line = in.readLine()) != null) {
            final VariantContext ctx = progess.watch(cah.codec.decode(line));
            // no check for ctx.ifFiltered here, we do this later.
            for (final String key : this.getVariantKeys(vepPredictionParser, ctx)) {
                sortingcollection.add(new KeyAndLine(key, line));
            }
        }
        progess.finish();
        sortingcollection.doneAdding();
        LOG.info("creating zip " + outputFile);
        fos = new FileOutputStream(outputFile);
        zout = new ZipOutputStream(fos);
        final File tmpReportFile = File.createTempFile("_tmp.", ".txt", writingSortingCollection.getTmpDirectories().get(0));
        tmpReportFile.deleteOnExit();
        pw = IOUtils.openFileForPrintWriter(tmpReportFile);
        pw.println("#chrom\tstart\tend\tkey\tCount_Variants");
        iter = sortingcollection.iterator();
        final EqualRangeIterator<KeyAndLine> eqiter = new EqualRangeIterator<>(iter, new Comparator<KeyAndLine>() {

            @Override
            public int compare(final KeyAndLine o1, final KeyAndLine o2) {
                return o1.key.compareTo(o2.key);
            }
        });
        while (eqiter.hasNext()) {
            final List<KeyAndLine> buffer = eqiter.next();
            final KeyAndLine first = buffer.get(0);
            LOG.info(first.key);
            final List<VariantContext> variants = new ArrayList<>(buffer.size());
            String contig = null;
            int chromStart = Integer.MAX_VALUE;
            int chromEnd = 0;
            for (final KeyAndLine kal : buffer) {
                final VariantContext ctx = cah.codec.decode(kal.ctx);
                variants.add(ctx);
                contig = ctx.getContig();
                chromStart = Math.min(chromStart, ctx.getStart());
                chromEnd = Math.max(chromEnd, ctx.getEnd());
            }
            pw.println(contig + "\t" + (chromStart - 1) + // -1 for bed compatibility
            "\t" + chromEnd + "\t" + first.key + "\t" + variants.size());
            // save vcf file
            final ZipEntry ze = new ZipEntry(this.baseZipDir + "/" + first.key + ".vcf");
            zout.putNextEntry(ze);
            final VariantContextWriter out = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(zout));
            final VCFHeader header2 = addMetaData(new VCFHeader(cah.header));
            header2.addMetaDataLine(new VCFHeaderLine("VcfGeneSplitter.Name", String.valueOf(first.key)));
            out.writeHeader(header2);
            for (final VariantContext ctx : variants) {
                out.add(ctx);
            }
            // yes because wrapped into IOUtils.encloseableOutputSream
            out.close();
            zout.closeEntry();
        }
        eqiter.close();
        iter.close();
        iter = null;
        progess.finish();
        LOG.info("saving report");
        pw.flush();
        pw.close();
        final ZipEntry entry = new ZipEntry(this.baseZipDir + "/manifest.bed");
        zout.putNextEntry(entry);
        IOUtils.copyTo(tmpReportFile, zout);
        zout.closeEntry();
        zout.finish();
        zout.close();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        if (sortingcollection != null)
            sortingcollection.cleanup();
        CloserUtil.close(in);
        CloserUtil.close(fos);
        CloserUtil.close(pw);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) File(java.io.File) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 7 with EqualRangeIterator

use of com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator in project jvarkit by lindenb.

the class VCFCompareGT method doWork.

@Override
public int doWork(final List<String> arguments) {
    final List<File> inputVcfFiles = new ArrayList<>(IOUtil.unrollFiles(arguments.stream().map(F -> new File(F)).collect(Collectors.toCollection(HashSet::new)), ".vcf", "vcf.gz"));
    if (inputVcfFiles.isEmpty()) {
        LOG.error("VCF missing.");
        return -1;
    }
    VariantComparator varcmp = new VariantComparator();
    SortingCollection<Variant> variants = null;
    final Set<String> sampleNames = new LinkedHashSet<>();
    try {
        variants = SortingCollection.newInstance(Variant.class, new VariantCodec(), varcmp, writingSortingCollection.getMaxRecordsInRam(), writingSortingCollection.getTmpPaths());
        variants.setDestructiveIteration(true);
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        metaData.add(new VCFHeaderLine(getClass().getSimpleName(), "version:" + getVersion() + " command:" + getProgramCommandLine()));
        for (int i = 0; i < inputVcfFiles.size(); ++i) {
            final File vcfFile = inputVcfFiles.get(i);
            LOG.info("Opening " + vcfFile);
            final VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
            final CloseableIterator<VariantContext> iter = vcfFileReader.iterator();
            final VCFHeader header = vcfFileReader.getFileHeader();
            sampleNames.addAll(header.getSampleNamesInOrder());
            metaData.add(new VCFHeaderLine(getClass().getSimpleName() + "_" + ((i) + 1), "File: " + vcfFile.getPath()));
            long nLines = 0;
            while (iter.hasNext()) {
                final VariantContext var = iter.next();
                if (nLines++ % 10000 == 0) {
                    LOG.info(vcfFile + " " + nLines);
                }
                if (!var.isVariant())
                    continue;
                if (!var.hasGenotypes())
                    continue;
                for (final Genotype genotype : var.getGenotypes()) {
                    final Variant rec = new Variant();
                    if (!genotype.isAvailable())
                        continue;
                    if (!genotype.isCalled())
                        continue;
                    if (genotype.isNoCall())
                        continue;
                    rec.file_index = i + 1;
                    rec.sampleName = genotype.getSampleName();
                    rec.chrom = var.getContig();
                    rec.start = var.getStart();
                    rec.end = var.getEnd();
                    rec.ref = var.getReference().getDisplayString();
                    if (var.hasID()) {
                        rec.id = var.getID();
                    }
                    if (genotype.hasDP()) {
                        rec.dp = genotype.getDP();
                    }
                    if (genotype.hasGQ()) {
                        rec.gq = genotype.getGQ();
                    }
                    final List<Allele> alleles = genotype.getAlleles();
                    if (alleles == null)
                        continue;
                    if (alleles.size() == 1) {
                        rec.a1 = alleles.get(0).getDisplayString().toUpperCase();
                        rec.a2 = rec.a1;
                    } else if (alleles.size() == 2) {
                        rec.a1 = alleles.get(0).getDisplayString().toUpperCase();
                        rec.a2 = alleles.get(1).getDisplayString().toUpperCase();
                        if (rec.a1.compareTo(rec.a2) > 0) {
                            String tmp = rec.a2;
                            rec.a2 = rec.a1;
                            rec.a1 = tmp;
                        }
                    } else {
                        continue;
                    }
                    variants.add(rec);
                }
            }
            iter.close();
            vcfFileReader.close();
        }
        variants.doneAdding();
        LOG.info("Done Adding");
        final Set<String> newSampleNames = new HashSet<>();
        for (int i = 0; i < inputVcfFiles.size(); ++i) {
            for (final String sample : sampleNames) {
                newSampleNames.add(sample + "_" + ((i) + 1));
            }
        }
        final String GenpotypeChangedKey = "GCH";
        final String GenpotypeCreated = "GNW";
        final String GenpotypeDiff = "GDF";
        metaData.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        metaData.add(new VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Depth"));
        metaData.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Qual"));
        metaData.add(new VCFFormatHeaderLine(GenpotypeChangedKey, 1, VCFHeaderLineType.Integer, "Changed Genotype"));
        metaData.add(new VCFFormatHeaderLine(GenpotypeCreated, 1, VCFHeaderLineType.Integer, "Genotype Created/Deleted"));
        metaData.add(new VCFInfoHeaderLine(GenpotypeDiff, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples with Genotype Difference"));
        final VCFHeader header = new VCFHeader(metaData, new ArrayList<String>(newSampleNames));
        final VariantContextWriter w = super.openVariantContextWriter(outputFile);
        w.writeHeader(header);
        final PosComparator posCompare = new PosComparator();
        final EqualRangeIterator<Variant> iter = new EqualRangeIterator<>(variants.iterator(), posCompare);
        while (iter.hasNext()) {
            final List<Variant> row = iter.next();
            /**
             * this sample is not always the same
             */
            final Set<String> samplesModified = new TreeSet<>();
            /**
             * the number of sample is different from vcflist.size()
             */
            final Set<String> samplesCreates = new TreeSet<>();
            final Counter<String> samplesSeen = new Counter<>();
            for (int x = 0; x < row.size(); ++x) {
                final Variant var1 = row.get(x);
                samplesSeen.incr(var1.sampleName);
                for (int y = x + 1; y < row.size(); ++y) {
                    final Variant var2 = row.get(y);
                    if (!var2.sampleName.equals(var1.sampleName))
                        continue;
                    if (var1.a1.equals(var2.a1) && var1.a2.equals(var2.a2))
                        continue;
                    samplesModified.add(var1.sampleName);
                }
            }
            for (final String sampleName : samplesSeen.keySet()) {
                if (samplesSeen.count(sampleName) != inputVcfFiles.size()) {
                    samplesCreates.add(sampleName);
                }
            }
            final Variant first = row.get(0);
            final Set<Allele> alleles = new HashSet<>();
            alleles.add(Allele.create(first.ref, true));
            for (final Variant var : row) {
                alleles.add(Allele.create(var.a1, var.a1.equalsIgnoreCase(var.ref)));
                alleles.add(Allele.create(var.a2, var.a2.equalsIgnoreCase(var.ref)));
            }
            final VariantContextBuilder b = new VariantContextBuilder(getClass().getName(), first.chrom, first.start, first.end, alleles);
            // build genotypes
            final List<Genotype> genotypes = new ArrayList<Genotype>();
            for (final Variant var : row) {
                // alleles for this genotype
                final List<Allele> galleles = new ArrayList<Allele>();
                galleles.add(Allele.create(var.a1, var.a1.equalsIgnoreCase(var.ref)));
                galleles.add(Allele.create(var.a2, var.a2.equalsIgnoreCase(var.ref)));
                final GenotypeBuilder gb = new GenotypeBuilder();
                gb.DP(var.dp);
                gb.alleles(galleles);
                gb.name(var.sampleName + "_" + var.file_index);
                gb.GQ(var.gq);
                gb.attribute(GenpotypeChangedKey, samplesModified.contains(var.sampleName) ? 1 : 0);
                gb.attribute(GenpotypeCreated, samplesCreates.contains(var.sampleName) ? 1 : 0);
                genotypes.add(gb.make());
            }
            b.genotypes(genotypes);
            b.id(first.id);
            if (!(samplesModified.isEmpty() && samplesCreates.isEmpty())) {
                Set<String> set2 = new TreeSet<String>(samplesModified);
                set2.addAll(samplesCreates);
                b.attribute(GenpotypeDiff, set2.toArray());
            }
            if (!only_print_modified || !(samplesModified.isEmpty() && samplesCreates.isEmpty())) {
                w.add(b.make());
            }
        }
        iter.close();
        w.close();
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (variants != null)
            try {
                variants.cleanup();
            } catch (Exception err) {
            }
    }
    return 0;
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) LinkedHashSet(java.util.LinkedHashSet) VCFConstants(htsjdk.variant.vcf.VCFConstants) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Counter(com.github.lindenb.jvarkit.util.Counter) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Comparator(java.util.Comparator) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) LinkedHashSet(java.util.LinkedHashSet) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Counter(com.github.lindenb.jvarkit.util.Counter) TreeSet(java.util.TreeSet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) File(java.io.File)

Example 8 with EqualRangeIterator

use of com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator in project jvarkit by lindenb.

the class VcfIn method scanFileSorted.

private int scanFileSorted(final VariantContextWriter vcw, final String databaseVcfUri, final VcfIterator userVcfIn) {
    EqualRangeVcfIterator equalRangeDbIter = null;
    EqualRangeIterator<VariantContext> equalRangeUserVcf = null;
    try {
        final VCFHeader header = new VCFHeader(userVcfIn.getHeader());
        final SAMSequenceDictionary userVcfDict = header.getSequenceDictionary();
        // / NO need if(dict1==null)
        if (userVcfDict == null) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("user file"));
            return -1;
        }
        final Comparator<VariantContext> userVcfComparator = VCFUtils.createTidPosComparator(userVcfDict);
        equalRangeDbIter = new EqualRangeVcfIterator(VCFUtils.createVcfIterator(databaseVcfUri), userVcfComparator);
        this.addMetaData(header);
        vcw.writeHeader(header);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(userVcfDict).logger(LOG);
        equalRangeUserVcf = new EqualRangeIterator<>(userVcfIn, userVcfComparator);
        while (equalRangeUserVcf.hasNext()) {
            final List<VariantContext> ctxList = equalRangeUserVcf.next();
            progress.watch(ctxList.get(0));
            // fill both contextes
            final List<VariantContext> dbContexes = new ArrayList<VariantContext>(equalRangeDbIter.next(ctxList.get(0)));
            for (final VariantContext userCtx : ctxList) {
                boolean keep = dbContexes.stream().filter(V -> sameContext(userCtx, V)).anyMatch(V -> allUserAltFoundInDatabase(userCtx, V));
                addVariant(vcw, userCtx, keep);
            }
            if (vcw.checkError())
                break;
        }
        equalRangeUserVcf.close();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(equalRangeDbIter);
        CloserUtil.close(userVcfIn);
        CloserUtil.close(vcw);
    }
}
Also used : VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Allele(htsjdk.variant.variantcontext.Allele) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) TabixVcfFileReader(com.github.lindenb.jvarkit.util.vcf.TabixVcfFileReader) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Iterator(java.util.Iterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Comparator(java.util.Comparator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 9 with EqualRangeIterator

use of com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator in project jvarkit by lindenb.

the class VCFComposite method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    try {
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        if (listModels) {
            for (final Type t : Type.values()) {
                out.println(t.name());
                out.println("\t" + t.getDescription());
            }
            out.flush();
            return 0;
        }
        this.pedigree = Pedigree.newParser().parse(pedigreeFile);
        if (this.pedigree.getAffected().isEmpty()) {
            LOG.error("No Affected sample in " + this.pedigreeFile);
            return -1;
        }
        if (this.pedigree.getUnaffected().isEmpty()) {
            LOG.error("No Unaffected sample in " + this.pedigreeFile);
            return -1;
        }
        final DiseaseModel model = this.createModel();
        final String inputName = super.oneFileOrNull(args);
        final LineIterator r = (inputName == null ? IOUtils.openStreamForLineIterator(stdin()) : IOUtils.openURIForLineIterator(inputName));
        final VCFCodec codec = new VCFCodec();
        final VCFHeader header = (VCFHeader) codec.readActualHeader(r);
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        // final VCFHeader h2=new VCFHeader(header.getMetaDataInInputOrder(),header.getSampleNamesInOrder());
        // h2.addMetaDataLine(new VCFInfoHeaderLine(this.TAG,1,VCFHeaderLineType.String,"Values from bigwig file: "+BIGWIG));
        SortingCollection<GeneAndVariant> sorting = null;
        String prevContig = null;
        for (; ; ) {
            String line;
            final VariantContext ctx;
            if (r.hasNext()) {
                line = r.next();
                ctx = codec.decode(line);
            } else {
                line = null;
                ctx = null;
            }
            if (ctx == null || !ctx.getContig().equals(prevContig)) {
                if (sorting != null) {
                    LOG.debug("Dump contig " + prevContig);
                    sorting.doneAdding();
                    CloseableIterator<GeneAndVariant> iter2 = sorting.iterator();
                    EqualRangeIterator<GeneAndVariant> eqiter = new EqualRangeIterator<>(iter2, (A, B) -> A.gene.compareTo(B.gene));
                    while (eqiter.hasNext()) {
                        final List<GeneAndVariant> variants = eqiter.next();
                        model.scan(variants.get(0).gene, variants.stream().map(L -> codec.decode(L.ctxLine)).collect(Collectors.toList()), out);
                    }
                    eqiter.close();
                    iter2.close();
                    sorting.cleanup();
                }
                sorting = null;
                if (ctx == null)
                    break;
                prevContig = ctx.getContig();
            }
            if (!ctx.isVariant())
                continue;
            if (!acceptFiltered && ctx.isFiltered())
                continue;
            if (!acceptID && ctx.hasID())
                continue;
            if (!model.accept(ctx))
                continue;
            final Set<String> geneKeys = new HashSet<>();
            for (final AnnPredictionParser.AnnPrediction pred : annParser.getPredictions(ctx)) {
                geneKeys.addAll(pred.getGeneKeys().stream().map(S -> ctx.getContig() + "_" + S).collect(Collectors.toSet()));
            }
            for (final VepPredictionParser.VepPrediction pred : vepParser.getPredictions(ctx)) {
                geneKeys.addAll(pred.getGeneKeys().stream().map(S -> ctx.getContig() + "_" + S).collect(Collectors.toSet()));
            }
            if (sorting == null) {
                sorting = SortingCollection.newInstance(GeneAndVariant.class, new GeneAndVariantCodec(), (A, B) -> {
                    int i = A.gene.compareTo(B.gene);
                    if (i != 0)
                        return i;
                    return A.ctxLine.compareTo(B.ctxLine);
                }, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
                sorting.setDestructiveIteration(true);
            }
            for (final String gk : geneKeys) {
                final GeneAndVariant gav = new GeneAndVariant();
                gav.gene = gk;
                gav.ctxLine = line;
                sorting.add(gav);
            }
        }
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Genotype(htsjdk.variant.variantcontext.Genotype) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) LineIterator(htsjdk.tribble.readers.LineIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) ParametersDelegate(com.beust.jcommander.ParametersDelegate) HashSet(java.util.HashSet) ContigPosRef(com.github.lindenb.jvarkit.util.vcf.ContigPosRef) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFCodec(htsjdk.variant.vcf.VCFCodec) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SortingCollection(htsjdk.samtools.util.SortingCollection) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) VCFCodec(htsjdk.variant.vcf.VCFCodec) IOException(java.io.IOException) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 10 with EqualRangeIterator

use of com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator in project jvarkit by lindenb.

the class VcfLoopOverGenes method doWork.

@SuppressWarnings("resource")
@Override
public int doWork(final List<String> args) {
    PrintWriter pw = null;
    VCFFileReader vcfFileReader = null;
    CloseableIterator<VariantContext> iter = null;
    CloseableIterator<GeneLoc> iter2 = null;
    BufferedReader br = null;
    ArchiveFactory archive = null;
    try {
        final File vcf = new File(oneAndOnlyOneFile(args));
        vcfFileReader = new VCFFileReader(vcf, (this.geneFile != null || !StringUtil.isBlank(this.regionStr)));
        this.dictionary = vcfFileReader.getFileHeader().getSequenceDictionary();
        if (this.dictionary == null) {
            throw new JvarkitException.VcfDictionaryMissing(vcf);
        }
        final VcfTools tools = new VcfTools(vcfFileReader.getFileHeader());
        if (!this.prefix.isEmpty() && !this.prefix.endsWith(".")) {
            this.prefix += ".";
        }
        if (this.geneFile == null) {
            final SortingCollection<GeneLoc> sortingCollection = SortingCollection.newInstance(GeneLoc.class, new GeneLocCodec(), (A, B) -> A.compareTo(B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            sortingCollection.setDestructiveIteration(true);
            if (StringUtil.isBlank(this.regionStr)) {
                iter = vcfFileReader.iterator();
            } else {
                final IntervalParser parser = new IntervalParser(this.dictionary);
                parser.setContigNameIsWholeContig(true);
                final Interval interval = parser.parse(this.regionStr);
                if (interval == null) {
                    LOG.error("Cannot parse interval " + this.regionStr);
                    return -1;
                }
                iter = vcfFileReader.query(interval.getContig(), interval.getStart(), interval.getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(vcfFileReader.getFileHeader()).logger(LOG);
            if (this.splitMethod.equals(SplitMethod.Annotations)) {
                while (iter.hasNext()) {
                    final VariantContext ctx = progress.watch(iter.next());
                    for (final AnnPredictionParser.AnnPrediction pred : tools.getAnnPredictionParser().getPredictions(ctx)) {
                        if (this.snpEffNoIntergenic && pred.isIntergenicRegion()) {
                            continue;
                        }
                        if (!StringUtil.isBlank(pred.getGeneName())) {
                            sortingCollection.add(create(ctx, pred.getGeneName(), SourceType.ANN_GeneName));
                        }
                        if (!StringUtil.isBlank(pred.getGeneId())) {
                            sortingCollection.add(create(ctx, pred.getGeneId(), SourceType.ANN_GeneID));
                        }
                        if (!StringUtil.isBlank(pred.getFeatureId())) {
                            sortingCollection.add(create(ctx, pred.getFeatureId(), SourceType.ANN_FeatureID));
                        }
                    }
                    for (final VepPredictionParser.VepPrediction pred : tools.getVepPredictionParser().getPredictions(ctx)) {
                        if (!StringUtil.isBlank(pred.getGene())) {
                            sortingCollection.add(create(ctx, pred.getGene(), SourceType.VEP_Gene));
                        }
                        if (!StringUtil.isBlank(pred.getFeature())) {
                            sortingCollection.add(create(ctx, pred.getFeature(), SourceType.VEP_Feature));
                        }
                        if (!StringUtil.isBlank(pred.getSymbol())) {
                            sortingCollection.add(create(ctx, pred.getSymbol(), SourceType.VEP_Symbol));
                        }
                        if (!StringUtil.isBlank(pred.getHgncId())) {
                            sortingCollection.add(create(ctx, pred.getHgncId(), SourceType.VEP_HgncId));
                        }
                    }
                }
            } else /**
             * split VCF per sliding window of variants
             */
            if (this.splitMethod.equals(SplitMethod.VariantSlidingWindow)) {
                if (this.variantsWinCount < 1) {
                    LOG.error("Bad value for variantsWinCount");
                    return -1;
                }
                if (this.variantsWinShift < 1 || this.variantsWinShift > this.variantsWinCount) {
                    LOG.error("Bad value for variantsWinShift");
                    return -1;
                }
                final List<VariantContext> buffer = new ArrayList<>(this.variantsWinCount);
                /**
                 * routine to dump buffer into sorting collection
                 */
                final Runnable dumpBuffer = () -> {
                    if (buffer.isEmpty())
                        return;
                    final String contig = buffer.get(0).getContig();
                    final int chromStart = buffer.stream().mapToInt(CTX -> CTX.getStart()).min().getAsInt();
                    // use last of start too
                    final int chromEnd0 = buffer.stream().mapToInt(CTX -> CTX.getStart()).max().getAsInt();
                    // final int chromEnd1 = buffer.stream().mapToInt(CTX->CTX.getEnd()).max().getAsInt();
                    final String identifier = contig + "_" + String.format(NUM_FORMAT, chromStart) + "_" + String.format(NUM_FORMAT, chromEnd0);
                    for (final VariantContext ctx : buffer) {
                        sortingCollection.add(create(ctx, identifier, SourceType.SlidingVariants));
                    }
                };
                while (iter.hasNext()) {
                    VariantContext ctx = progress.watch(iter.next());
                    /* reduce the memory footprint for this context */
                    ctx = new VariantContextBuilder(ctx).genotypes(Collections.emptyList()).unfiltered().rmAttributes(new ArrayList<>(ctx.getAttributes().keySet())).make();
                    if (!buffer.isEmpty() && !buffer.get(0).getContig().equals(ctx.getContig())) {
                        dumpBuffer.run();
                        buffer.clear();
                    }
                    buffer.add(ctx);
                    if (buffer.size() >= this.variantsWinCount) {
                        dumpBuffer.run();
                        final int fromIndex = Math.min(this.variantsWinShift, buffer.size());
                        buffer.subList(0, fromIndex).clear();
                    }
                }
                dumpBuffer.run();
                buffer.clear();
            } else if (this.splitMethod.equals(SplitMethod.ContigSlidingWindow)) {
                if (this.contigWinLength < 1) {
                    LOG.error("Bad value for contigWinCount");
                    return -1;
                }
                if (this.contigWinShift < 1 || this.contigWinShift > this.contigWinLength) {
                    LOG.error("Bad value for contigWinShift");
                    return -1;
                }
                while (iter.hasNext()) {
                    VariantContext ctx = progress.watch(iter.next());
                    /* reduce the memory footprint for this context */
                    ctx = new VariantContextBuilder(ctx).genotypes(Collections.emptyList()).unfiltered().rmAttributes(new ArrayList<>(ctx.getAttributes().keySet())).make();
                    int start = 0;
                    while (start <= ctx.getStart()) {
                        if (start + this.contigWinLength >= ctx.getStart()) {
                            final int chromStart = start;
                            final int chromEnd0 = start + this.contigWinLength;
                            final String identifier = ctx.getContig() + "_" + String.format(NUM_FORMAT, chromStart) + "_" + String.format(NUM_FORMAT, chromEnd0);
                            sortingCollection.add(create(ctx, identifier, SourceType.SlidingContig));
                        }
                        start += this.contigWinShift;
                    }
                }
            } else {
                throw new IllegalStateException("No such method: " + this.splitMethod);
            }
            sortingCollection.doneAdding();
            progress.finish();
            iter.close();
            iter = null;
            pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
            iter2 = sortingCollection.iterator();
            final EqualRangeIterator<GeneLoc> eqiter = new EqualRangeIterator<>(iter2, this.compareGeneName);
            int geneIdentifierId = 0;
            while (eqiter.hasNext()) {
                final List<GeneLoc> gene = eqiter.next();
                pw.print(gene.get(0).contig);
                pw.print('\t');
                // -1 for BED
                pw.print(gene.stream().mapToInt(G -> G.start).min().getAsInt() - 1);
                pw.print('\t');
                pw.print(gene.stream().mapToInt(G -> G.end).max().getAsInt());
                pw.print('\t');
                pw.print(this.prefix + String.format("%09d", ++geneIdentifierId));
                pw.print('\t');
                pw.print(gene.get(0).geneName);
                pw.print('\t');
                pw.print(gene.get(0).sourceType);
                pw.print('\t');
                pw.print(gene.size());
                pw.println();
            }
            pw.flush();
            pw.close();
            pw = null;
            eqiter.close();
            iter2.close();
            iter2 = null;
            sortingCollection.cleanup();
        } else {
            if (this.nJobs < 1) {
                this.nJobs = Math.max(1, Runtime.getRuntime().availableProcessors());
                LOG.info("setting njobs to " + this.nJobs);
            }
            final ExecutorService executorService;
            final List<Future<Integer>> futureResults;
            if (this.nJobs > 1) {
                executorService = new ThreadPoolExecutor(this.nJobs, this.nJobs, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
                futureResults = new ArrayList<>();
            } else {
                executorService = null;
                futureResults = Collections.emptyList();
            }
            if (this.outputFile == null) {
                LOG.error("When scanning a VCF with " + this.geneFile + ". Output file must be defined");
            }
            if (!this.exec.isEmpty()) {
                if (this.outputFile.getName().endsWith(".zip")) {
                    LOG.error("Cannot execute " + this.exec + " when saving to a zip.");
                    return -1;
                }
            }
            archive = ArchiveFactory.open(this.outputFile);
            PrintWriter manifest = this.deleteAfterCommand && !this.exec.isEmpty() ? // all files will be deleted, no manifest needed
            new PrintWriter(new NullOuputStream()) : archive.openWriter(this.prefix + "manifest.txt");
            br = IOUtils.openFileForBufferedReading(this.geneFile);
            final BedLineCodec bedCodec = new BedLineCodec();
            for (; ; ) {
                if (!futureResults.isEmpty()) {
                    int i = 0;
                    while (i < futureResults.size()) {
                        final Future<Integer> r = futureResults.get(i);
                        if (r.isCancelled()) {
                            LOG.error("Task was canceled. Break.");
                            return -1;
                        } else if (r.isDone()) {
                            futureResults.remove(i);
                            int rez = r.get();
                            if (rez != 0) {
                                LOG.error("Task Failed (" + rez + "). Break");
                            }
                        } else {
                            i++;
                        }
                    }
                }
                final String line = br.readLine();
                if (line == null)
                    break;
                if (line.startsWith("#") || line.isEmpty())
                    continue;
                final BedLine bedLine = bedCodec.decode(line);
                if (bedLine == null)
                    continue;
                // ID
                final String geneIdentifier = bedLine.get(3);
                // name
                final String geneName = bedLine.get(4);
                final SourceType sourceType = SourceType.valueOf(bedLine.get(5));
                final String filename = geneIdentifier;
                final String outputVcfName = (filename.startsWith(this.prefix) ? "" : this.prefix) + filename + ".vcf" + (this.compress ? ".gz" : "");
                LOG.info(bedLine.getContig() + ":" + bedLine.getStart() + "-" + bedLine.getEnd() + " length :" + (bedLine.getEnd() - bedLine.getStart()));
                if (bedLine.getEnd() - bedLine.getStart() > 1E6) {
                    LOG.warn("That's a large region ! " + bedLine);
                }
                OutputStream vcfOutputStream = null;
                VariantContextWriter vw = null;
                int countVariants = 0;
                final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(vcfFileReader.getFileHeader()).logger(LOG).prefix(geneName + " " + bedLine.getContig() + ":" + bedLine.getStart() + "-" + bedLine.getEnd());
                iter = vcfFileReader.query(bedLine.getContig(), bedLine.getStart(), bedLine.getEnd());
                while (iter.hasNext()) {
                    VariantContext ctx = progress.watch(iter.next());
                    switch(sourceType) {
                        case SlidingVariants:
                            {
                                // nothing
                                break;
                            }
                        case SlidingContig:
                            {
                                // nothing
                                break;
                            }
                        case ANN_GeneName:
                        case ANN_FeatureID:
                        case ANN_GeneID:
                            {
                                final List<String> preds = new ArrayList<>();
                                for (final AnnPredictionParser.AnnPrediction pred : tools.getAnnPredictionParser().getPredictions(ctx)) {
                                    final String predictionIdentifier;
                                    switch(sourceType) {
                                        case ANN_GeneName:
                                            predictionIdentifier = pred.getGeneName();
                                            break;
                                        case ANN_FeatureID:
                                            predictionIdentifier = pred.getFeatureId();
                                            break;
                                        case ANN_GeneID:
                                            predictionIdentifier = pred.getGeneId();
                                            break;
                                        default:
                                            throw new IllegalStateException(bedLine.toString());
                                    }
                                    if (StringUtil.isBlank(predictionIdentifier))
                                        continue;
                                    if (!geneName.equals(predictionIdentifier))
                                        continue;
                                    preds.add(pred.getOriginalAttributeAsString());
                                }
                                if (preds.isEmpty()) {
                                    ctx = null;
                                } else {
                                    ctx = new VariantContextBuilder(ctx).rmAttribute(tools.getAnnPredictionParser().getTag()).attribute(tools.getAnnPredictionParser().getTag(), preds).make();
                                }
                                break;
                            }
                        case VEP_Gene:
                        case VEP_Feature:
                        case VEP_Symbol:
                        case VEP_HgncId:
                            {
                                final List<String> preds = new ArrayList<>();
                                for (final VepPredictionParser.VepPrediction pred : tools.getVepPredictions(ctx)) {
                                    final String predictionIdentifier;
                                    switch(sourceType) {
                                        case VEP_Gene:
                                            predictionIdentifier = pred.getGene();
                                            break;
                                        case VEP_Feature:
                                            predictionIdentifier = pred.getFeature();
                                            break;
                                        case VEP_Symbol:
                                            predictionIdentifier = pred.getSymbol();
                                            break;
                                        case VEP_HgncId:
                                            predictionIdentifier = pred.getHgncId();
                                            break;
                                        default:
                                            throw new IllegalStateException(bedLine.toString());
                                    }
                                    if (StringUtil.isBlank(predictionIdentifier))
                                        continue;
                                    if (!geneName.equals(predictionIdentifier))
                                        continue;
                                    preds.add(pred.getOriginalAttributeAsString());
                                }
                                if (preds.isEmpty()) {
                                    ctx = null;
                                } else {
                                    ctx = new VariantContextBuilder(ctx).rmAttribute(tools.getVepPredictionParser().getTag()).attribute(tools.getVepPredictionParser().getTag(), preds).make();
                                }
                                break;
                            }
                        default:
                            throw new IllegalStateException(bedLine.toString());
                    }
                    if (ctx == null)
                        continue;
                    if (vcfOutputStream == null) {
                        LOG.info(filename);
                        manifest.println(outputVcfName);
                        final VCFHeader header = new VCFHeader(vcfFileReader.getFileHeader());
                        header.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_SPLITKEY, filename));
                        vcfOutputStream = archive.openOuputStream(outputVcfName);
                        vw = VCFUtils.createVariantContextWriterToOutputStream(vcfOutputStream);
                        vw.writeHeader(header);
                    }
                    countVariants++;
                    vw.add(ctx);
                    if (countVariants % 1000 == 0) {
                        LOG.info("Loading : " + geneIdentifier + " N=" + countVariants);
                    }
                }
                progress.finish();
                LOG.info(geneIdentifier + " N=" + countVariants);
                if (vcfOutputStream != null) {
                    vw.close();
                    vcfOutputStream.flush();
                    vcfOutputStream.close();
                    vw = null;
                    if (!this.exec.isEmpty()) {
                        final Callable<Integer> callable = () -> {
                            final File vcfOutFile = new File(this.outputFile, outputVcfName);
                            IOUtil.assertFileIsReadable(vcfOutFile);
                            final String vcfPath = vcfOutFile.getPath();
                            final StringTokenizer st = new StringTokenizer(this.exec);
                            final List<String> command = new ArrayList<>(1 + st.countTokens());
                            while (st.hasMoreTokens()) {
                                String token = st.nextToken().replaceAll("__PREFIX__", this.prefix).replaceAll("__CONTIG__", bedLine.getContig()).replaceAll("__CHROM__", bedLine.getContig()).replaceAll("__ID__", geneIdentifier).replaceAll("__NAME__", geneName).replaceAll("__START__", String.valueOf(bedLine.getStart())).replaceAll("__END__", String.valueOf(bedLine.getEnd())).replaceAll("__SOURCE__", sourceType.name()).replaceAll("__VCF__", vcfPath);
                                command.add(token);
                            }
                            LOG.info(command.stream().map(S -> "'" + S + "'").collect(Collectors.joining(" ")));
                            final ProcessBuilder pb = new ProcessBuilder(command);
                            pb.redirectErrorStream(true);
                            final Process p = pb.start();
                            final Thread stdoutThread = new Thread(() -> {
                                try {
                                    InputStream in = p.getInputStream();
                                    IOUtils.copyTo(in, stdout());
                                } catch (Exception err) {
                                    LOG.error(err);
                                }
                            });
                            stdoutThread.start();
                            int exitValue = p.waitFor();
                            if (exitValue != 0) {
                                LOG.error("Command failed (" + exitValue + "):" + String.join(" ", command));
                                return -1;
                            } else {
                                if (deleteAfterCommand) {
                                    if (!vcfOutFile.delete()) {
                                        LOG.warn("Cannot delete " + vcfOutFile);
                                    }
                                }
                                return 0;
                            }
                        };
                        if (executorService != null) {
                            final Future<Integer> rez = executorService.submit(callable);
                            futureResults.add(rez);
                        } else {
                            final int ret = callable.call();
                            if (ret != 0) {
                                LOG.error("Error with process (" + ret + ")");
                                return ret;
                            }
                        }
                    }
                } else {
                    manifest.println("#" + filename);
                    LOG.warn("No Variant Found for " + line);
                }
                iter.close();
            }
            ;
            if (executorService != null) {
                LOG.info("shutdown");
                executorService.shutdown();
                executorService.awaitTermination(365, TimeUnit.DAYS);
            }
            br.close();
            br = null;
            manifest.close();
            archive.close();
            archive = null;
            LOG.info("Done");
        }
        vcfFileReader.close();
        vcfFileReader = null;
        return 0;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        {
            CloserUtil.close(iter2);
            CloserUtil.close(iter);
            CloserUtil.close(pw);
            CloserUtil.close(vcfFileReader);
            CloserUtil.close(br);
            CloserUtil.close(archive);
        }
    }
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) Future(java.util.concurrent.Future) DataOutputStream(java.io.DataOutputStream) StringUtil(htsjdk.samtools.util.StringUtil) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) Callable(java.util.concurrent.Callable) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) StringTokenizer(java.util.StringTokenizer) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) SortingCollection(htsjdk.samtools.util.SortingCollection) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) BufferedReader(java.io.BufferedReader) Comparator(java.util.Comparator) Collections(java.util.Collections) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) List(java.util.List) ArrayList(java.util.ArrayList) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) File(java.io.File) Interval(htsjdk.samtools.util.Interval) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) IOException(java.io.IOException) StringTokenizer(java.util.StringTokenizer) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Aggregations

EqualRangeIterator (com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator)10 IOException (java.io.IOException)9 VariantContext (htsjdk.variant.variantcontext.VariantContext)8 VCFHeader (htsjdk.variant.vcf.VCFHeader)8 File (java.io.File)8 Parameter (com.beust.jcommander.Parameter)7 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)7 Program (com.github.lindenb.jvarkit.util.jcommander.Program)7 Logger (com.github.lindenb.jvarkit.util.log.Logger)7 CloseableIterator (htsjdk.samtools.util.CloseableIterator)7 List (java.util.List)7 AbstractDataCodec (com.github.lindenb.jvarkit.util.picard.AbstractDataCodec)6 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)6 SortingCollection (htsjdk.samtools.util.SortingCollection)6 DataInputStream (java.io.DataInputStream)6 DataOutputStream (java.io.DataOutputStream)6 ArrayList (java.util.ArrayList)6 Comparator (java.util.Comparator)6 ParametersDelegate (com.beust.jcommander.ParametersDelegate)5 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)5