Search in sources :

Example 11 with VCFCodec

use of htsjdk.variant.vcf.VCFCodec in project jvarkit by lindenb.

the class JfxNgs method doMenuIndexVcf.

/**
 * open index a VCF file
 */
private void doMenuIndexVcf(final Window owner) {
    final FileChooser fc = newFileChooser();
    fc.getExtensionFilters().addAll(VcfStage.EXTENSION_FILTERS);
    final List<File> files = fc.showOpenMultipleDialog(owner);
    if (files == null)
        return;
    for (final File file : files) {
        updateLastDir(file);
        if (file.getName().endsWith(".vcf.gz")) {
            LOG.info("writing tabix index for " + file);
            final File output = new File(file.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
            try {
                if (output.exists()) {
                    throw new IOException("Tabix index " + output + " already exists.");
                }
                final TabixIndex index = IndexFactory.createTabixIndex(file, new VCFCodec(), (SAMSequenceDictionary) null);
                index.write(output);
                final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
                alert.showAndWait();
            } catch (final Exception err) {
                showExceptionDialog(owner, err);
                break;
            }
        } else if (file.getName().endsWith(".vcf")) {
            LOG.info("writing tribble index for " + file);
            final File output = new File(file.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
            try {
                if (output.exists()) {
                    throw new IOException("Tribble index " + output + " already exists.");
                }
                final Index index = IndexFactory.createIndex(file, new VCFCodec(), IndexType.LINEAR);
                index.writeBasedOnFeatureFile(file);
                final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
                alert.showAndWait();
            } catch (final Exception err) {
                showExceptionDialog(owner, err);
                break;
            }
        } else {
            showExceptionDialog(owner, "Cannot index file " + file);
            break;
        }
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) FileChooser(javafx.stage.FileChooser) TabixIndex(htsjdk.tribble.index.tabix.TabixIndex) Alert(javafx.scene.control.Alert) BAMIndex(htsjdk.samtools.BAMIndex) TabixIndex(htsjdk.tribble.index.tabix.TabixIndex) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) File(java.io.File) ScriptException(javax.script.ScriptException) BackingStoreException(java.util.prefs.BackingStoreException) IOException(java.io.IOException)

Example 12 with VCFCodec

use of htsjdk.variant.vcf.VCFCodec in project jvarkit by lindenb.

the class VcfOffsetsIndexFactory method indexVcfFile.

/**
 * index a vcf file for its variant offsets
 */
public File indexVcfFile(final File vcfFile, final File indexFile) throws IOException {
    LOG.info("indexing " + vcfFile);
    IOUtil.assertFileIsReadable(vcfFile);
    DataOutputStream daos = null;
    BlockCompressedInputStream bgzin = null;
    AsciiLineReader ascii = null;
    VCFHeader header = null;
    final VCFCodec codec = new VCFCodec();
    SAMSequenceDictionaryProgress progress = null;
    try {
        daos = new DataOutputStream(new FileOutputStream(indexFile));
        daos.write(MAGIC);
        if (vcfFile.getName().endsWith(".vcf.gz")) {
            bgzin = new BlockCompressedInputStream(vcfFile);
            ascii = null;
        } else if (vcfFile.getName().endsWith(".vcf")) {
            bgzin = null;
            ascii = new AsciiLineReader(new FileInputStream(vcfFile));
        } else {
            throw new IllegalArgumentException("not a vcf.gz or vcf file: " + vcfFile);
        }
        final List<String> headerLines = new ArrayList<>();
        for (; ; ) {
            final long offset = (ascii == null ? bgzin.getPosition() : ascii.getPosition());
            final String line = (ascii == null ? bgzin.readLine() : ascii.readLine());
            if (line == null)
                break;
            if (line.startsWith("#")) {
                headerLines.add(line);
                if (line.startsWith("#CHROM")) {
                    codec.readHeader(new LineIterator() {

                        int i = 0;

                        @Override
                        public String next() {
                            final String s = headerLines.get(i);
                            i++;
                            return s;
                        }

                        @Override
                        public boolean hasNext() {
                            return i < headerLines.size();
                        }

                        @Override
                        public String peek() {
                            return i < headerLines.size() ? headerLines.get(i) : null;
                        }
                    });
                    header = VCFUtils.parseHeader(headerLines).header;
                    progress = new SAMSequenceDictionaryProgress(header);
                    progress.logger(this.logger == null ? LOG : this.logger);
                    progress.setLogPrefix("indexing");
                }
                continue;
            }
            if (progress == null) {
                throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
            }
            final VariantContext ctx = codec.decode(line);
            progress.watch(ctx);
            if (this.acceptVariant != null) {
                if (!acceptVariant.test(ctx))
                    continue;
            }
            daos.writeLong(offset);
        }
        if (progress == null) {
            throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
        }
        progress.finish();
        daos.flush();
        daos.close();
        return indexFile;
    } catch (final IOException err) {
        throw err;
    } finally {
        CloserUtil.close(ascii);
        CloserUtil.close(bgzin);
        CloserUtil.close(daos);
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) AsciiLineReader(htsjdk.tribble.readers.AsciiLineReader) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) LineIterator(htsjdk.tribble.readers.LineIterator) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) VCFHeader(htsjdk.variant.vcf.VCFHeader) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream)

Example 13 with VCFCodec

use of htsjdk.variant.vcf.VCFCodec in project jvarkit by lindenb.

the class VCFUtils method findCodecFromLines.

/**
 * find a codec from the lines header. if not found, return default codec
 */
public static AbstractVCFCodec findCodecFromLines(final List<String> list) {
    for (final String line : list) {
        String formatString = line;
        if (formatString.startsWith("##")) {
            formatString = formatString.substring(2);
        }
        int eq = formatString.indexOf('=');
        if (eq == -1)
            continue;
        if (!VCFHeaderVersion.isFormatString(formatString.substring(0, eq)))
            continue;
        VCFHeaderVersion version = VCFHeaderVersion.getHeaderVersion(line);
        if (version == null)
            continue;
        switch(version) {
            case VCF3_2:
            case VCF3_3:
                return new VCF3Codec();
            case VCF4_0:
            case VCF4_1:
            case VCF4_2:
                return new VCFCodec();
        }
    }
    return createDefaultVCFCodec();
}
Also used : AbstractVCFCodec(htsjdk.variant.vcf.AbstractVCFCodec) VCFCodec(htsjdk.variant.vcf.VCFCodec) VCF3Codec(htsjdk.variant.vcf.VCF3Codec) VCFHeaderVersion(htsjdk.variant.vcf.VCFHeaderVersion)

Example 14 with VCFCodec

use of htsjdk.variant.vcf.VCFCodec in project gatk by broadinstitute.

the class GenomicsDBImportIntegrationTest method testPreserveContigOrderingInHeader.

@Test
public void testPreserveContigOrderingInHeader() throws IOException {
    final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
    writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), new SimpleInterval("chr20", 17959479, 17959479), workspace, 0, false, 0);
    try (final GenomicsDBFeatureReader<VariantContext, PositionalBufferedStream> genomicsDBFeatureReader = new GenomicsDBFeatureReader<>(new File(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME).getAbsolutePath(), new File(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME).getAbsolutePath(), workspace, GenomicsDBConstants.DEFAULT_ARRAY_NAME, b38_reference_20_21, null, new BCF2Codec());
        final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true)) {
        final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader) genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
        final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader) inputGVCFReader.getHeader()).getSequenceDictionary();
        Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) GenomicsDBFeatureReader(com.intel.genomicsdb.GenomicsDBFeatureReader) PositionalBufferedStream(htsjdk.tribble.readers.PositionalBufferedStream) BCF2Codec(htsjdk.variant.bcf2.BCF2Codec) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 15 with VCFCodec

use of htsjdk.variant.vcf.VCFCodec in project gatk by broadinstitute.

the class GenotypeGVCFsIntegrationTest method getVariantContexts.

/**
     * Returns a list of VariantContext records from a VCF file
     *
     * @param vcfFile VCF file
     * @return list of VariantContext records
     * @throws IOException if the file does not exist or can not be opened
     */
private static List<VariantContext> getVariantContexts(final File vcfFile) throws IOException {
    final VCFCodec codec = new VCFCodec();
    final FileInputStream s = new FileInputStream(vcfFile);
    final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s));
    codec.readHeader(lineIteratorVCF);
    final List<VariantContext> VCs = new ArrayList<>();
    while (lineIteratorVCF.hasNext()) {
        final String line = lineIteratorVCF.next();
        Assert.assertFalse(line == null);
        VCs.add(codec.decode(line));
    }
    return VCs;
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) PositionalBufferedStream(htsjdk.tribble.readers.PositionalBufferedStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) FileInputStream(java.io.FileInputStream)

Aggregations

VCFCodec (htsjdk.variant.vcf.VCFCodec)21 LineIterator (htsjdk.tribble.readers.LineIterator)12 VariantContext (htsjdk.variant.variantcontext.VariantContext)12 VCFHeader (htsjdk.variant.vcf.VCFHeader)11 File (java.io.File)11 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)6 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)5 PositionalBufferedStream (htsjdk.tribble.readers.PositionalBufferedStream)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 Index (htsjdk.tribble.index.Index)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 StructuralVariantFactory (com.hartwig.hmftools.common.variant.structural.StructuralVariantFactory)2 GenomicsDBFeatureReader (com.intel.genomicsdb.GenomicsDBFeatureReader)2 BlockCompressedInputStream (htsjdk.samtools.util.BlockCompressedInputStream)2 BlockCompressedOutputStream (htsjdk.samtools.util.BlockCompressedOutputStream)2 CloseableIterator (htsjdk.samtools.util.CloseableIterator)2 AbstractFeatureReader (htsjdk.tribble.AbstractFeatureReader)2 FeatureReader (htsjdk.tribble.FeatureReader)2 BCF2Codec (htsjdk.variant.bcf2.BCF2Codec)2