Search in sources :

Example 31 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class TestUtils method createRandomPedigreeFromFile.

protected File createRandomPedigreeFromFile(final String vcfFile) throws IOException {
    class TmpSample {

        String fam = "F1";

        String name;

        String father = "0";

        String mother = "0";

        int sex = 0;

        int status = 0;
    }
    final File vcfIn = new File(vcfFile);
    final VCFFileReader r = new VCFFileReader(vcfIn, false);
    final VCFHeader header = r.getFileHeader();
    r.close();
    final List<String> samples = new ArrayList<>(header.getSampleNamesInOrder());
    final List<TmpSample> ped = new ArrayList<>();
    if (samples.isEmpty())
        return null;
    while (!samples.isEmpty()) {
        final TmpSample indi = new TmpSample();
        indi.name = samples.remove(0);
        indi.fam = ped.stream().filter(P -> P.father.equals(indi.name) || P.mother.equals(indi.name)).map(P -> P.fam).findFirst().orElse("F" + this.random.nextInt());
        if (ped.stream().anyMatch(P -> P.father.equals(indi.name))) {
            indi.sex = 1;
        } else if (ped.stream().anyMatch(P -> P.mother.equals(indi.name))) {
            indi.sex = 2;
        } else {
            indi.sex = this.random.nextBoolean() ? 1 : 2;
        }
        if (random.nextBoolean()) {
            final List<String> remain = new ArrayList<>(samples);
            if (!remain.isEmpty()) {
                indi.father = remain.remove(0);
            } else {
                indi.father = "0";
            }
            if (!remain.isEmpty()) {
                indi.mother = remain.remove(0);
            } else {
                indi.mother = "0";
            }
        } else {
            indi.father = "0";
            indi.mother = "0";
        }
        indi.status = this.random.nextInt(2);
        ped.add(indi);
    }
    final File pedFile = createTmpFile(".ped");
    final PrintWriter pw = new PrintWriter(pedFile);
    for (TmpSample ts : ped) {
        pw.print(ts.fam);
        pw.print('\t');
        pw.print(ts.name);
        pw.print('\t');
        pw.print(ts.father);
        pw.print('\t');
        pw.print(ts.mother);
        pw.print('\t');
        pw.print(ts.sex);
        pw.print('\t');
        pw.print(ts.status);
        pw.println();
    }
    pw.flush();
    pw.close();
    return pedFile;
}
Also used : Arrays(java.util.Arrays) AfterGroups(org.testng.annotations.AfterGroups) IOUtil(htsjdk.samtools.util.IOUtil) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SamFiles(htsjdk.samtools.SamFiles) Random(java.util.Random) Test(org.testng.annotations.Test) JFXPanel(javafx.embed.swing.JFXPanel) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) Application(javafx.application.Application) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) Vector(java.util.Vector) BeforeGroups(org.testng.annotations.BeforeGroups) ImageIO(javax.imageio.ImageIO) SAXParser(javax.xml.parsers.SAXParser) FastaSequenceIndexCreator(htsjdk.samtools.reference.FastaSequenceIndexCreator) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) JfxLauncher(com.github.lindenb.jvarkit.util.jcommander.JfxLauncher) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) BeforeClass(org.testng.annotations.BeforeClass) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) AfterTest(org.testng.annotations.AfterTest) SAMSequenceDictionaryCodec(htsjdk.samtools.SAMSequenceDictionaryCodec) Platform(javafx.application.Platform) CountDownLatch(java.util.concurrent.CountDownLatch) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) Stream(java.util.stream.Stream) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) BeforeSuite(org.testng.annotations.BeforeSuite) FilenameFilter(java.io.FilenameFilter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ZipInputStream(java.util.zip.ZipInputStream) DataProvider(org.testng.annotations.DataProvider) SAXParserFactory(javax.xml.parsers.SAXParserFactory) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) NcbiApiKey(com.github.lindenb.jvarkit.util.ncbi.NcbiApiKey) Interval(htsjdk.samtools.util.Interval) BeforeTest(org.testng.annotations.BeforeTest) Assert(org.testng.Assert) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) SAMRecordFactory(htsjdk.samtools.SAMRecordFactory) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) AfterClass(org.testng.annotations.AfterClass) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) SamReader(htsjdk.samtools.SamReader) File(java.io.File) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Stage(javafx.stage.Stage) Paths(java.nio.file.Paths) BAMIndex(htsjdk.samtools.BAMIndex) BufferedReader(java.io.BufferedReader) AfterSuite(org.testng.annotations.AfterSuite) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) PrintWriter(java.io.PrintWriter)

Example 32 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfBiomart method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter out) {
    HttpGet httpGet = null;
    final Pattern tab = Pattern.compile("[\t]");
    try {
        final TransformerFactory factory = TransformerFactory.newInstance();
        final Transformer transformer = factory.newTransformer();
        // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        final VCFHeader header = iter.getHeader();
        StringBuilder desc = new StringBuilder("Biomart query. Format: ");
        desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
        out.writeHeader(header);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.rmAttribute(this.TAG);
            this.filterColumnContig.set(ctx.getContig());
            this.filterColumnStart.set(String.valueOf(ctx.getStart()));
            this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
            final StringWriter domToStr = new StringWriter();
            transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
            final URIBuilder builder = new URIBuilder(this.serviceUrl);
            builder.addParameter("query", domToStr.toString());
            // System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
            // escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
            httpGet = new HttpGet(builder.build());
            final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
            int responseCode = httpResponse.getStatusLine().getStatusCode();
            if (responseCode != 200) {
                throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
            }
            InputStream response = httpResponse.getEntity().getContent();
            if (this.teeResponse) {
                response = new TeeInputStream(response, stderr(), false);
            }
            final BufferedReader br = new BufferedReader(new InputStreamReader(response));
            final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
                final StringBuilder sb = new StringBuilder();
                for (int i = 0; i < this.attributes.size(); i++) {
                    if (i > 0)
                        sb.append("|");
                    if (this.printLabels)
                        sb.append(escapeInfo(this.attributes.get(i))).append("|");
                    sb.append(i < T.length ? escapeInfo(T[i]) : "");
                }
                return sb.toString();
            }).collect(Collectors.toCollection(LinkedHashSet::new));
            CloserUtil.close(br);
            CloserUtil.close(response);
            CloserUtil.close(httpResponse);
            if (!infoAtts.isEmpty()) {
                vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
            }
            out.add(vcb.make());
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        throw new RuntimeIOException(err);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Transformer(javax.xml.transform.Transformer) DOMSource(javax.xml.transform.dom.DOMSource) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Attr(org.w3c.dom.Attr) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Document(org.w3c.dom.Document) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) Node(org.w3c.dom.Node) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) URIBuilder(org.apache.http.client.utils.URIBuilder) StringWriter(java.io.StringWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Element(org.w3c.dom.Element) HttpGet(org.apache.http.client.methods.HttpGet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) Pattern(java.util.regex.Pattern) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) TransformerFactory(javax.xml.transform.TransformerFactory) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HttpClients(org.apache.http.impl.client.HttpClients) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) HttpGet(org.apache.http.client.methods.HttpGet) VariantContext(htsjdk.variant.variantcontext.VariantContext) StringWriter(java.io.StringWriter) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) TransformerFactory(javax.xml.transform.TransformerFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) StreamResult(javax.xml.transform.stream.StreamResult) InputStreamReader(java.io.InputStreamReader) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) InputStream(java.io.InputStream) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) URIBuilder(org.apache.http.client.utils.URIBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 33 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfEnsemblVepRest method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
    try {
        final VCFHeader header = vcfIn.getHeader();
        final List<VariantContext> buffer = new ArrayList<>(this.batchSize + 1);
        final VCFHeader h2 = new VCFHeader(header);
        addMetaData(h2);
        for (final String tag : this.outputTags) {
            h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "__CUSTOM_DESCRIPTION__" + tag));
        }
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        for (; ; ) {
            VariantContext ctx = null;
            if (vcfIn.hasNext()) {
                buffer.add((ctx = progress.watch(vcfIn.next())));
            }
            if (ctx == null || buffer.size() >= this.batchSize) {
                if (!buffer.isEmpty()) {
                    final Map<String, EnsVepPrediction> input2pred = vep(buffer);
                    for (final VariantContext ctx2 : buffer) {
                        final String inputStr = createInputContext(ctx2);
                        final EnsVepPrediction pred = input2pred.get(inputStr);
                        final VariantContextBuilder vcb = new VariantContextBuilder(ctx2);
                        for (final String tag : this.outputTags) {
                            vcb.rmAttribute(tag);
                        }
                        if (pred == null) {
                            LOG.info("No Annotation found for " + inputStr);
                            out.add(vcb.make());
                            continue;
                        }
                        for (final String tag : this.outputTags) {
                            final Set<String> info = pred.tag2infoLines.get(tag);
                            if (info == null || info.isEmpty())
                                continue;
                            vcb.attribute(tag, new ArrayList<>(info));
                        }
                        out.add(vcb.make());
                    }
                // end of loop over variants
                }
                // end of if buffer is not empty
                if (ctx == null)
                    break;
                buffer.clear();
            }
        }
        progress.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) TransformerException(javax.xml.transform.TransformerException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 34 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfEnsemblReg method annotate.

private void annotate(Track track, File inf, File outf) throws IOException {
    boolean contained = false;
    LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
    VcfIterator in = VCFUtils.createVcfIteratorFromFile(inf);
    VCFHeader header = in.getHeader();
    VCFInfoHeaderLine info = null;
    SeekableStream sstream = SeekableStreamFactory.getInstance().getStreamFor(track.url);
    BBFileReader bigFile = new BBFileReader(track.url.toString(), new SeekableStreamAdaptor(sstream));
    VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
    if (bigFile.isBigWigFile()) {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
    } else {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
    }
    header.addMetaDataLine(info);
    w1.writeHeader(in.getHeader());
    while (in.hasNext()) {
        VariantContext ctx = in.next();
        String chrom = ctx.getContig();
        if (!chrom.startsWith("chr"))
            chrom = "chr" + chrom;
        if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
            w1.add(ctx);
        } else if (bigFile.isBigWigFile()) {
            BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Float wigValue = null;
            while (iter != null && iter.hasNext() && wigValue == null) {
                WigItem item = iter.next();
                wigValue = item.getWigValue();
            }
            if (wigValue == null) {
                w1.add(ctx);
                continue;
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, wigValue);
            w1.add(vcb.make());
        } else {
            BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Set<String> bedValues = new HashSet<String>();
            while (iter != null && iter.hasNext()) {
                BedFeature item = iter.next();
                String[] rest = item.getRestOfFields();
                if (rest == null || rest.length != 6) {
                    System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
                    continue;
                }
                String color = null;
                if (track.parent != null) {
                    if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
                        color = segway_17SegmentationSummaries(rest[5]);
                    } else if (track.parent.startsWith("ProjectedSegments")) {
                        color = projectedSegments(rest[5]);
                    } else if (track.parent.startsWith("RegBuildOverview")) {
                        color = regBuildOverview(rest[5]);
                    } else if (track.parent.startsWith("Segway_17CellSegments")) {
                        color = segway_17CellSegments(rest[5]);
                    } else {
                        System.err.println("Unknown parent:" + track.parent);
                    }
                }
                if (color == null)
                    continue;
                bedValues.add(rest[0] + "|" + color);
            }
            if (bedValues.isEmpty()) {
                w1.add(ctx);
                continue;
            }
            StringBuilder sb = new StringBuilder();
            for (String s : bedValues) {
                if (sb.length() != 0)
                    sb.append(",");
                sb.append(s);
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, sb.toString());
            w1.add(vcb.make());
        }
    }
    sstream.close();
    in.close();
    w1.close();
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) SeekableStream(htsjdk.samtools.seekablestream.SeekableStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) BigBedIterator(org.broad.igv.bbfile.BigBedIterator) BedFeature(org.broad.igv.bbfile.BedFeature) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) WigItem(org.broad.igv.bbfile.WigItem) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SeekableStreamAdaptor(com.github.lindenb.jvarkit.util.igv.SeekableStreamAdaptor) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BBFileReader(org.broad.igv.bbfile.BBFileReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) BigWigIterator(org.broad.igv.bbfile.BigWigIterator)

Example 35 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfEpistatis01 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.number_of_jobs < 1) {
        LOG.error("bad number of jobs");
        return -1;
    }
    try {
        final int variantsCount;
        final List<VariantContext> inMemoryVariants;
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        final File tmpIndexFile;
        if (vcfFile.equals(this.outputFile)) {
            LOG.error("input == output");
            return -1;
        }
        VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
        final VCFHeader header = vcfFileReader.getFileHeader();
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        pedigree.verifyPersonsHaveUniqueNames();
        final Map<String, Integer> sample2index = header.getSampleNameToOffset();
        final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
        final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
        if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
            LOG.error("empty ped or no case/ctrl");
            vcfFileReader.close();
            return -1;
        }
        if (this.load_variants_in_memory) {
            LOG.info("loading variants in memory");
            tmpIndexFile = null;
            final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
            inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
            V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
            variantsCount = inMemoryVariants.size();
            iter2.close();
        } else {
            tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
            tmpIndexFile.deleteOnExit();
            new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
            final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
            variantsCount = tmpList.size();
            tmpList.close();
            inMemoryVariants = null;
        }
        vcfFileReader.close();
        LOG.info("Number of variants: " + variantsCount);
        Result bestResult = null;
        int x = this.start_index_at;
        while (x + 1 < variantsCount) {
            final List<Runner> runners = new Vector<>(this.number_of_jobs);
            while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
                LOG.info("starting " + x + "/" + variantsCount);
                runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
                ++x;
            }
            final ExecutorService execSvc;
            if (this.number_of_jobs == 1) {
                execSvc = null;
            } else {
                execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
                ;
            }
            if (this.number_of_jobs == 1) {
                runners.get(0).call();
            } else {
                execSvc.invokeAll(runners);
            }
            if (execSvc != null) {
                execSvc.shutdown();
                execSvc.awaitTermination(10000L, TimeUnit.DAYS);
                execSvc.shutdownNow();
            }
            runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
                LOG.info("That took " + (D / 1000f) + " seconds.");
            });
            for (final Runner r : runners) {
                final Result rez = r.result;
                if (rez == null)
                    continue;
                if (bestResult == null || bestResult.score < rez.score) {
                    bestResult = rez;
                    if (this.output_score) {
                        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
                        pw.println(bestResult.score + "\t" + bestResult.toString());
                        pw.flush();
                        pw.close();
                    } else {
                        final VariantContextWriter w = openVariantContextWriter(this.outputFile);
                        final VCFHeader header2 = new VCFHeader(header);
                        header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
                        w.writeHeader(header2);
                        w.add(bestResult.ctx1);
                        w.add(bestResult.ctx2);
                        w.close();
                    }
                }
            }
            LOG.info("best: " + bestResult);
        }
        if (tmpIndexFile != null)
            tmpIndexFile.delete();
        return 0;
    } catch (final Exception err) {
        err.printStackTrace();
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Callable(java.util.concurrent.Callable) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Vector(java.util.Vector) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ExecutorService(java.util.concurrent.ExecutorService) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) File(java.io.File) Executors(java.util.concurrent.Executors) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfOffsetsIndexFactory(com.github.lindenb.jvarkit.tools.vcflist.VcfOffsetsIndexFactory) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Vector(java.util.Vector) PrintWriter(java.io.PrintWriter) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) VcfList(com.github.lindenb.jvarkit.tools.vcflist.VcfList)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34