Search in sources :

Example 21 with VCFReader

use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.

the class VcfVcf method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VCFIterator r, VariantContextWriter w) {
    try {
        CloseableIterator<VariantContext> iter = null;
        LOG.info("opening file: " + this.TABIX);
        VCFReader tabix = VCFReaderFactory.makeDefault().open(this.TABIX, true);
        VCFHeader header3 = tabix.getHeader();
        VCFHeader header1 = r.getHeader();
        VCFHeader h2 = new VCFHeader(header1.getMetaDataInInputOrder(), header1.getSampleNamesInOrder());
        for (String infoId : this.INFO_IDS) {
            VCFInfoHeaderLine vihl = header3.getInfoHeaderLine(infoId);
            if (vihl == null) {
                LOG.warn("Not INFO=" + infoId + " in " + TABIX);
                continue;
            }
            if (h2.getInfoHeaderLine(infoId) != null) {
                LOG.warn("Input already contains INFO=" + vihl);
            }
            h2.addMetaDataLine(vihl);
        }
        if (ALT_CONFLICT_FLAG != null) {
            h2.addMetaDataLine(new VCFInfoHeaderLine(ALT_CONFLICT_FLAG, 1, VCFHeaderLineType.Flag, "conflict ALT allele with " + this.TABIX));
        }
        w.writeHeader(h2);
        while (r.hasNext()) {
            VariantContext ctx1 = r.next();
            VariantContextBuilder vcb = new VariantContextBuilder(ctx1);
            String BEST_ID = null;
            boolean best_id_match_alt = false;
            List<VariantContext> variantsList = new ArrayList<VariantContext>();
            iter = tabix.query(ctx1.getContig(), Math.max(0, ctx1.getStart() - 1), (ctx1.getEnd() + 1));
            while (iter.hasNext()) {
                VariantContext ctx3 = iter.next();
                if (!ctx3.getContig().equals(ctx1.getContig()))
                    continue;
                if (ctx3.getStart() != ctx1.getStart())
                    continue;
                if (ctx3.getEnd() != ctx1.getEnd())
                    continue;
                if (ctx1.getReference().equals(ctx3.getReference()) && ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
                    variantsList.clear();
                    variantsList.add(ctx3);
                    break;
                } else {
                    variantsList.add(ctx3);
                }
            }
            CloserUtil.close(iter);
            iter = null;
            for (VariantContext ctx3 : variantsList) {
                if (this.REF_ALLELE_MATTERS && !ctx1.getReference().equals(ctx3.getReference())) {
                    continue;
                }
                if (this.ALT_ALLELES_MATTERS && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
                    continue;
                }
                if (ctx3.getID() != null && this.REPLACE_ID) {
                    if (BEST_ID != null && best_id_match_alt) {
                    // nothing
                    } else {
                        BEST_ID = ctx3.getID();
                        best_id_match_alt = ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles());
                    }
                }
                for (String id : this.INFO_IDS) {
                    Object info3 = ctx3.getAttribute(id);
                    if (info3 == null) {
                        continue;
                    }
                    Object info1 = ctx1.getAttribute(id);
                    if (info1 != null && !this.REPLACE_INFO_FIELD) {
                        continue;
                    }
                    vcb.attribute(id, info3);
                }
                if (ALT_CONFLICT_FLAG != null && !ctx1.getAlternateAlleles().equals(ctx3.getAlternateAlleles())) {
                    vcb.attribute(ALT_CONFLICT_FLAG, true);
                }
            }
            if (BEST_ID != null) {
                vcb.id(BEST_ID);
            }
            w.add(vcb.make());
        }
        tabix.close();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFReader(htsjdk.variant.vcf.VCFReader) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException)

Example 22 with VCFReader

use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.

the class VcfPeekAf method beforeVcf.

@Override
protected int beforeVcf() {
    final List<AFPeeker> all_peekers = new ArrayList<>();
    all_peekers.add(new InfoAcAnPeeker());
    all_peekers.add(new InfoAfPeeker());
    all_peekers.add(new GtPeeker());
    all_peekers.add(new CustomInfoPeeker());
    this.indexedVcfFileReader = null;
    if (this.buffer_size < 1) {
        LOG.error("bad buffer-size");
        return -1;
    }
    try {
        if (this.list_peekers) {
            try (PrintWriter out = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                for (final AFPeeker p : all_peekers) out.println(p.getName() + "\n\t" + p.getDescription());
                out.flush();
            }
            System.exit(0);
        }
        if (StringUtils.isBlank(this.peekerName)) {
            LOG.error("peeker name is empty");
            return -1;
        }
        this.peeker = all_peekers.stream().filter(P -> P.getName().equals(this.peekerName)).findFirst().orElse(null);
        if (this.peeker == null) {
            LOG.error("peeker " + this.peekerName + " not found in " + all_peekers.stream().map(P -> P.getName()).collect(Collectors.joining(";")));
            return -1;
        }
        final VCFReader reader0 = VCFReaderFactory.makeDefault().open(this.resourceVcfFile, true);
        this.indexedVcfFileReader = new BufferedVCFReader(reader0, this.buffer_size);
        this.peeker.initialize(this.indexedVcfFileReader.getHeader());
        this.indexedVcfFileReader.setSimplifier(peeker::sanitize);
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) OptionalDouble(java.util.OptionalDouble) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Map(java.util.Map) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) PrintWriter(java.io.PrintWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) VCFReader(htsjdk.variant.vcf.VCFReader) PrintWriter(java.io.PrintWriter) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)

Example 23 with VCFReader

use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.

the class FixVcfMissingGenotypesTest method test01.

@Test
public void test01() throws Exception {
    final TestSupport support = new TestSupport();
    try {
        Path nocallvcffile = support.createTmpPath(".vcf");
        VCFReader r1 = VCFReaderFactory.makeDefault().open(Paths.get(support.resource("rotavirus_rf.vcf.gz")), false);
        final VariantContextWriter vcw = VCFUtils.createVariantContextWriter(nocallvcffile.toFile());
        vcw.writeHeader(r1.getHeader());
        r1.iterator().stream().forEach(V -> {
            VariantContextBuilder vcb = new VariantContextBuilder(V);
            vcb.genotypes(V.getGenotypes().stream().map(G -> GenotypeBuilder.createMissing(G.getSampleName(), 2)).collect(Collectors.toList()));
            vcw.add(vcb.make());
        });
        vcw.close();
        r1.close();
        final Path output = support.createTmpPath(".vcf");
        final FixVcfMissingGenotypes cmd = new FixVcfMissingGenotypes();
        Assert.assertEquals(0, cmd.instanceMain(new String[] { "-B", support.resource("S1.bam"), "-B", support.resource("S2.bam"), "-B", support.resource("S3.bam"), "-B", support.resource("S4.bam"), "-B", support.resource("S5.bam"), "-o", output.toString(), nocallvcffile.toString() }));
        support.assertIsVcf(output);
        Assert.assertFalse(support.variantStream(output).flatMap(V -> V.getGenotypes().stream()).noneMatch(G -> G.isHomRef()), "at least one GT should be 0/0");
    } finally {
        support.removeTmpFiles();
    }
}
Also used : Path(java.nio.file.Path) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFReader(htsjdk.variant.vcf.VCFReader) Test(org.testng.annotations.Test) Collectors(java.util.stream.Collectors) Assert(org.testng.Assert) Paths(java.nio.file.Paths) AlsoTest(com.github.lindenb.jvarkit.tests.AlsoTest) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) TestSupport(com.github.lindenb.jvarkit.tools.tests.TestSupport) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) VCFUtilsTest(com.github.lindenb.jvarkit.util.vcf.VCFUtilsTest) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFReader(htsjdk.variant.vcf.VCFReader) TestSupport(com.github.lindenb.jvarkit.tools.tests.TestSupport) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Test(org.testng.annotations.Test) AlsoTest(com.github.lindenb.jvarkit.tests.AlsoTest) VCFUtilsTest(com.github.lindenb.jvarkit.util.vcf.VCFUtilsTest)

Example 24 with VCFReader

use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.

the class VcfBurdenMAFTest method test01.

@Test
public void test01() throws IOException {
    try {
        final String inputFile = support.resource("rotavirus_rf.vcf.gz");
        try (final VCFReader r0 = VCFReaderFactory.makeDefault().open(Paths.get(inputFile), false)) {
            final VCFHeader vcfheader = r0.getHeader();
            if (vcfheader.getNGenotypeSamples() < 1) {
                return;
            }
        }
        final Path ped = support.createRandomPedigreeFromFile(inputFile);
        if (ped == null) {
            Reporter.getCurrentTestResult().setAttribute("warn", "No Pedigree for " + inputFile);
            return;
        }
        final Path output = support.createTmpPath(".vcf");
        Assert.assertEquals(new VcfBurdenMAF().instanceMain(new String[] { "-o", output.toString(), "--max-af", "5/100", "--pedigree", ped.toString(), inputFile.toString() }), 0);
        support.assertIsVcf(output);
    } finally {
        support.removeTmpFiles();
    }
}
Also used : Path(java.nio.file.Path) VCFReader(htsjdk.variant.vcf.VCFReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) AlsoTest(com.github.lindenb.jvarkit.tests.AlsoTest) Test(org.testng.annotations.Test) LauncherTest(com.github.lindenb.jvarkit.util.jcommander.LauncherTest)

Example 25 with VCFReader

use of htsjdk.variant.vcf.VCFReader in project jvarkit by lindenb.

the class VcfCutSamplesTest method test01.

@Test(dataProvider = "src01")
public void test01(final String vcfpath) throws IOException {
    try {
        VCFReader r = VCFReaderFactory.makeDefault().open(Paths.get(vcfpath), false);
        final List<String> samples = new ArrayList<>(r.getHeader().getSampleNamesInOrder());
        r.close();
        if (samples.isEmpty())
            return;
        Collections.shuffle(samples, support.random);
        final List<String> keep = samples.subList(0, support.random.nextInt(samples.size()));
        final Path vcfOut = support.createTmpPath(".vcf");
        List<String> args = new ArrayList<>();
        args.add("-o");
        args.add(vcfOut.toString());
        for (String s : keep) {
            args.add("-S");
            args.add(s);
        }
        args.add(vcfpath);
        Assert.assertEquals(new VcfCutSamples().instanceMain(args), 0);
        support.assertIsVcf(vcfOut);
        r = VCFReaderFactory.makeDefault().open(vcfOut, false);
        Assert.assertEquals(new TreeSet<>(keep), new TreeSet<>(r.getHeader().getSampleNamesInOrder()));
        r.close();
    } finally {
        support.removeTmpFiles();
    }
}
Also used : Path(java.nio.file.Path) VCFReader(htsjdk.variant.vcf.VCFReader) ArrayList(java.util.ArrayList) Test(org.testng.annotations.Test) LauncherTest(com.github.lindenb.jvarkit.util.jcommander.LauncherTest) AlsoTest(com.github.lindenb.jvarkit.tests.AlsoTest) VCFUtilsTest(com.github.lindenb.jvarkit.util.vcf.VCFUtilsTest)

Aggregations

VCFReader (htsjdk.variant.vcf.VCFReader)60 VariantContext (htsjdk.variant.variantcontext.VariantContext)45 Path (java.nio.file.Path)41 VCFHeader (htsjdk.variant.vcf.VCFHeader)37 VCFReaderFactory (com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)32 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)31 Collectors (java.util.stream.Collectors)30 ArrayList (java.util.ArrayList)29 List (java.util.List)29 Logger (com.github.lindenb.jvarkit.util.log.Logger)28 Parameter (com.beust.jcommander.Parameter)27 Program (com.github.lindenb.jvarkit.util.jcommander.Program)26 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)24 Set (java.util.Set)22 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)21 IOException (java.io.IOException)21 CloserUtil (htsjdk.samtools.util.CloserUtil)20 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)20 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)19