Search in sources :

Example 51 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VCFUtils method convertVCFHeaderToList.

/**
 * convert a VCF header to List of String. Created for serialization
 */
public static List<String> convertVCFHeaderToList(final VCFHeader header) {
    final ByteArrayOutputStream baos = new ByteArrayOutputStream(1000);
    final VariantContextWriter vcw = createVariantContextWriterToOutputStream(baos);
    vcw.writeHeader(header);
    vcw.close();
    return Arrays.asList(new String(baos.toByteArray()).split("\n"));
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 52 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfMultiToOneAlleleTest method createVcf.

private List<VariantContext> createVcf(final String params, final List<Genotype> genotypes) throws IOException {
    Set<VCFHeaderLine> metaData = new HashSet<>();
    VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT", "DP");
    VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "AF", "AN", "AC", "DP");
    final VCFHeader vcfheader = new VCFHeader(metaData, genotypes.stream().map(G -> G.getSampleName()).sorted().collect(Collectors.toSet()));
    final VariantAttributesRecalculator calc = new VariantAttributesRecalculator();
    calc.setHeader(vcfheader);
    final File vcfOut = super.createTmpFile(".vcf");
    final File vcfOut2 = super.createTmpFile(".vcf");
    VariantContextBuilder vcb = new VariantContextBuilder();
    vcb.chr("1");
    vcb.start(1);
    vcb.stop(1);
    vcb.alleles(genotypes.stream().flatMap(G -> G.getAlleles().stream()).collect(Collectors.toSet()));
    vcb.genotypes(genotypes);
    final VariantContextWriter w = VCFUtils.createVariantContextWriter(vcfOut);
    w.writeHeader(vcfheader);
    w.add(calc.apply(vcb.make()));
    w.close();
    Assert.assertEquals(new VcfMultiToOneAllele().instanceMain(newCmd().add("-o").add(vcfOut2).split(params).add(vcfOut).make()), 0);
    super.assertIsVcf(vcfOut2);
    return super.variantStream(vcfOut2).collect(Collectors.toList());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) Set(java.util.Set) IOException(java.io.IOException) Test(org.testng.annotations.Test) Collectors(java.util.stream.Collectors) File(java.io.File) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) Assert(org.testng.Assert) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) TestUtils(com.github.lindenb.jvarkit.tools.tests.TestUtils) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 53 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfBurdenFisherHTest method test01.

@Test(dataProvider = "all-vcf-files")
public void test01(final String inputFile) throws IOException {
    final VCFFileReader r0 = new VCFFileReader(new File(inputFile), false);
    final VCFHeader vcfheader = r0.getFileHeader();
    if (vcfheader.getNGenotypeSamples() < 2) {
        r0.close();
        return;
    }
    final CloseableIterator<VariantContext> iter = r0.iterator();
    final File inputVcf = super.createTmpFile(".vcf");
    final VariantContextWriter w = VCFUtils.createVariantContextWriter(inputVcf);
    w.writeHeader(vcfheader);
    iter.stream().filter(V -> V.getAlleles().size() == 2).forEach(V -> w.add(V));
    iter.close();
    w.close();
    r0.close();
    final File ped = super.createRandomPedigreeFromFile(inputVcf.getPath());
    if (ped == null) {
        Reporter.getCurrentTestResult().setAttribute("warn", "No Pedigree for " + inputFile);
        return;
    }
    final File output = super.createTmpFile(".vcf");
    Assert.assertEquals(new VcfBurdenFisherH().instanceMain(newCmd().add("-o", output, "--pedigree", ped, inputFile).make()), 0);
    assertIsVcf(output);
}
Also used : VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Assert(org.testng.Assert) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) Reporter(org.testng.Reporter) Test(org.testng.annotations.Test) TestUtils(com.github.lindenb.jvarkit.tools.tests.TestUtils) File(java.io.File) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) Test(org.testng.annotations.Test)

Example 54 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfCalledWithAnotherMethod method doVcfToVcf.

protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final List<ExternalVcf> externalVcfs = new ArrayList<>();
    try {
        final VCFHeader header = in.getHeader();
        this.dictionary = header.getSequenceDictionary();
        /**
         * open primitive input
         */
        if (this.dictionary == null) {
            LOG.error("no dictionary in input");
            return -1;
        }
        final Set<File> samtoolsFiles = new HashSet<>();
        this.externalVcfStrs.stream().filter(S -> S.endsWith(".list")).map(S -> Paths.get(S)).forEach(P -> {
            try {
                samtoolsFiles.addAll(Files.readAllLines(P).stream().filter(L -> !(L.startsWith("#") || L.trim().isEmpty())).map(S -> new File(S)).collect(Collectors.toSet()));
            } catch (final Exception err) {
                throw new RuntimeIOException(err);
            }
        });
        samtoolsFiles.addAll(this.externalVcfStrs.stream().filter(S -> !S.endsWith(".list")).map(S -> new File(S)).collect(Collectors.toSet()));
        externalVcfs.addAll(samtoolsFiles.stream().map(F -> new ExternalVcf(F)).collect(Collectors.toList()));
        /**
         * check for uniq keys
         */
        final Set<String> uniqKeys = new HashSet<>();
        for (final ExternalVcf extvcf : externalVcfs) {
            int n = 0;
            for (; ; ) {
                final String newkey = extvcf.key + (n == 0 ? "" : "_" + n);
                if (!uniqKeys.contains(newkey)) {
                    extvcf.key = newkey;
                    uniqKeys.add(newkey);
                    break;
                }
                ++n;
            }
        }
        final VCFHeader h2 = new VCFHeader(header);
        for (final ExternalVcf extvcf : externalVcfs) {
            h2.addMetaDataLine(new VCFHeaderLine("otherVcf:" + extvcf.key, extvcf.vcfFile.getPath()));
        }
        final VCFFilterHeaderLine variantNotFoundElsewhereFILTER = (filterNameVariantNotFoundElseWhere.isEmpty() ? null : new VCFFilterHeaderLine(filterNameVariantNotFoundElseWhere, "Variant Was not found in other VCFs: " + externalVcfs.stream().map(S -> S.vcfFile.getPath()).collect(Collectors.joining(", "))));
        if (variantNotFoundElsewhereFILTER != null) {
            h2.addMetaDataLine(variantNotFoundElsewhereFILTER);
        }
        final VCFInfoHeaderLine variantFoundElseWhereKeys = new VCFInfoHeaderLine(this.infoFoundKey, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Variant was found in the VCFs designed by those keys");
        h2.addMetaDataLine(variantFoundElseWhereKeys);
        final VCFInfoHeaderLine variantFoundElseWhereCount = new VCFInfoHeaderLine(this.infoFoundKey, 1, VCFHeaderLineType.Integer, "Number of times the Variant was found  in the VCFs");
        h2.addMetaDataLine(variantFoundElseWhereCount);
        final VCFFormatHeaderLine genotypeCountSame = new VCFFormatHeaderLine(this.formatCountSame, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found the same in other VCFs");
        h2.addMetaDataLine(genotypeCountSame);
        final VCFFormatHeaderLine genotypeCountDiscordant = new VCFFormatHeaderLine(this.formatCountDiscordant, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found dicordant in other VCFs");
        h2.addMetaDataLine(genotypeCountDiscordant);
        super.addMetaData(h2);
        final VariantContextWriter w = super.openVariantContextWriter(outputFile);
        w.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            final List<GenotypeCount> genotypeCounts = ctx.getGenotypes().stream().map(G -> new GenotypeCount(G)).collect(Collectors.toList());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            final Set<String> variantFoundInOtherVcfs = new HashSet<>();
            for (final ExternalVcf extvcf : externalVcfs) {
                final List<VariantContext> otherVariants = extvcf.get(ctx);
                if (otherVariants.stream().filter(CTX2 -> {
                    if (ctx.getAlternateAlleles().isEmpty())
                        return true;
                    for (final Allele a : ctx.getAlternateAlleles()) {
                        if (CTX2.hasAllele(a))
                            return true;
                    }
                    return false;
                }).findAny().isPresent()) {
                    variantFoundInOtherVcfs.add(extvcf.key);
                }
                for (final GenotypeCount gt : genotypeCounts) {
                    for (final VariantContext otherVariant : otherVariants) {
                        final Genotype otherGt = otherVariant.getGenotype(gt.gt.getSampleName());
                        if (otherGt == null)
                            continue;
                        if (gt.gt.sameGenotype(otherGt) || (this.noCallSameAsHomRef && ((gt.gt.isNoCall() && otherGt.isHomRef()) || (gt.gt.isHomRef() && otherGt.isNoCall())))) {
                            gt.countSame++;
                        } else {
                            gt.countDiscordant++;
                        }
                    }
                }
            }
            vcb.genotypes(genotypeCounts.stream().map(G -> {
                final GenotypeBuilder gb = new GenotypeBuilder(G.gt);
                gb.attribute(genotypeCountSame.getID(), G.countSame);
                gb.attribute(genotypeCountDiscordant.getID(), G.countDiscordant);
                return gb.make();
            }).collect(Collectors.toList()));
            vcb.attribute(variantFoundElseWhereCount.getID(), variantFoundInOtherVcfs.size());
            if (variantFoundInOtherVcfs.isEmpty()) {
                if (variantNotFoundElsewhereFILTER != null) {
                    vcb.filter(variantNotFoundElsewhereFILTER.getID());
                }
            } else {
                if (variantNotFoundElsewhereFILTER != null && !ctx.isFiltered()) {
                    vcb.passFilters();
                }
                vcb.attribute(variantFoundElseWhereKeys.getID(), new ArrayList<>(variantFoundInOtherVcfs));
            }
            w.add(vcb.make());
        }
        progress.finish();
        while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) Paths(java.nio.file.Paths) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) PeekIterator(htsjdk.samtools.util.PeekIterator) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Comparator(java.util.Comparator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) File(java.io.File)

Example 55 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfGetVariantByIndex method doWork.

public int doWork(List<String> args) {
    if (this.fileListOfIndexes == null) {
        LOG.error("undefined list of indexes");
        return -1;
    }
    if (args.size() != 1) {
        LOG.error("Expected only one vcf file on input");
        return -1;
    }
    File vcfFile = new File(args.get(0));
    VariantContextWriter w = null;
    IndexFile indexFile = null;
    BufferedReader r = null;
    String line;
    try {
        LOG.info("Opening " + vcfFile);
        if (vcfFile.getName().endsWith(".vcf.gz")) {
            indexFile = new BGZIndexFile(vcfFile);
        } else if (vcfFile.getName().endsWith(".vcf")) {
            indexFile = new RandomAccessIndexFile(vcfFile);
        } else {
            LOG.error("Not a .vcf or .vcf.gz file: " + vcfFile);
            return -1;
        }
        indexFile.open();
        w = super.openVariantContextWriter(outputFile);
        w.writeHeader(indexFile.getHeader());
        r = IOUtils.openFileForBufferedReading(fileListOfIndexes);
        while ((line = r.readLine()) != null) {
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            long ith;
            try {
                ith = Long.parseLong(line);
            } catch (Exception e) {
                LOG.error("Bad index in " + line + " ignoring");
                continue;
            }
            // 0-based index
            ith--;
            if (ith < 0 || ith >= indexFile.size()) {
                LOG.error("Index out of bound in " + line + " ignoring");
                continue;
            }
            String varStr = indexFile.getLine(ith);
            w.add(indexFile.getCodec().decode(varStr));
        }
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(indexFile);
        CloserUtil.close(w);
        CloserUtil.close(r);
    }
    return 0;
}
Also used : BufferedReader(java.io.BufferedReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) IOException(java.io.IOException)

Aggregations

VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)132 File (java.io.File)67 VCFHeader (htsjdk.variant.vcf.VCFHeader)65 VariantContext (htsjdk.variant.variantcontext.VariantContext)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)53 ArrayList (java.util.ArrayList)41 IOException (java.io.IOException)38 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)35 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)34 HashSet (java.util.HashSet)32 List (java.util.List)29 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)28 Allele (htsjdk.variant.variantcontext.Allele)28 Collectors (java.util.stream.Collectors)27 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)26 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)26 Parameter (com.beust.jcommander.Parameter)24 Logger (com.github.lindenb.jvarkit.util.log.Logger)24 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)23 Program (com.github.lindenb.jvarkit.util.jcommander.Program)23