Search in sources :

Example 26 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project ASCIIGenome by dariober.

the class Utils method getVCFHeader.

/**
 *Get VCFHeader from the given source which could be URL or local file.
 */
public static VCFHeader getVCFHeader(String source) throws MalformedURLException {
    VCFHeader vcfHeader;
    if (Utils.urlFileExists(source)) {
        URL url = new URL(source);
        AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(url.toExternalForm(), new VCFCodec(), false);
        vcfHeader = (VCFHeader) reader.getHeader();
    } else {
        // Set requiredIndex false!
        VCFFileReader reader = new VCFFileReader(new File(source), false);
        vcfHeader = reader.getFileHeader();
        reader.close();
    }
    return vcfHeader;
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) LineIterator(htsjdk.tribble.readers.LineIterator) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) URL(java.net.URL)

Example 27 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project ASCIIGenome by dariober.

the class MakeTabixIndex method blockCompressAndIndex.

/**
 * Block compress input file and create associated tabix index.
 * @throws IOException
 * @throws InvalidRecordException
 */
private void blockCompressAndIndex(String intab, File bgzfOut, TabixFormat fmt) throws IOException, InvalidRecordException {
    LineIterator lin = utils.IOUtils.openURIForLineIterator(intab);
    BlockCompressedOutputStream writer = new BlockCompressedOutputStream(bgzfOut);
    long filePosition = writer.getFilePointer();
    TabixIndexCreator indexCreator = new TabixIndexCreator(fmt);
    boolean first = true;
    // This is relevant to vcf files only: Prepare header and codec
    // ------------------------------------------------------------
    VCFHeader vcfHeader = null;
    VCFCodec vcfCodec = null;
    if (fmt.equals(TabixFormat.VCF)) {
        try {
            VCFFileReader vcfr = new VCFFileReader(new File(intab), false);
            // new VCFHeader();
            vcfHeader = vcfr.getFileHeader();
            vcfr.close();
        } catch (MalformedFeatureFile e) {
            vcfHeader = new VCFHeader();
        }
        vcfCodec = new VCFCodec();
        vcfCodec.setVCFHeader(vcfHeader, Utils.getVCFHeaderVersion(vcfHeader));
    }
    // ------------------------------------------------------------
    int nWarnings = 10;
    while (lin.hasNext()) {
        String line = lin.next().trim();
        try {
            if (line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (line.startsWith("#")) {
                writer.write((line + "\n").getBytes());
                filePosition = writer.getFilePointer();
                continue;
            }
            if (line.startsWith("##FASTA")) {
                break;
            }
            if (first && !fmt.equals(TabixFormat.VCF)) {
                String dummy = this.makeDummyLine(line, fmt);
                addLineToIndex(dummy, indexCreator, filePosition, fmt, null, null);
                writer.write(dummy.getBytes());
                writer.write('\n');
                filePosition = writer.getFilePointer();
                first = false;
            }
            addLineToIndex(line, indexCreator, filePosition, fmt, vcfHeader, vcfCodec);
            writer.write(line.getBytes());
            writer.write('\n');
            filePosition = writer.getFilePointer();
        } catch (Exception e) {
            if (e.getMessage().contains("added out sequence of order") || e.getMessage().contains("Features added out of order")) {
                // Get a string marker for out-of-order from htsjdk/tribble/index/tabix/TabixIndexCreator.java
                throw new InvalidRecordException();
            }
            if (nWarnings >= 0) {
                System.err.println("Warning: " + e.getMessage() + ". Skipping:\n" + line);
            }
            if (nWarnings == 0) {
                System.err.println("Additional warnings will not be show.");
            }
            nWarnings--;
        }
    }
    writer.flush();
    Index index = indexCreator.finalizeIndex(writer.getFilePointer());
    index.writeBasedOnFeatureFile(bgzfOut);
    writer.close();
    CloserUtil.close(lin);
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) TabixIndexCreator(htsjdk.tribble.index.tabix.TabixIndexCreator) Index(htsjdk.tribble.index.Index) MalformedFeatureFile(htsjdk.tribble.TribbleException.MalformedFeatureFile) LineIterator(htsjdk.tribble.readers.LineIterator) InvalidRecordException(exceptions.InvalidRecordException) SQLException(java.sql.SQLException) IOException(java.io.IOException) VCFHeader(htsjdk.variant.vcf.VCFHeader) MalformedFeatureFile(htsjdk.tribble.TribbleException.MalformedFeatureFile) File(java.io.File) InvalidRecordException(exceptions.InvalidRecordException)

Example 28 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gridss by PapenfussLab.

the class ProcessingContext method getVariantContextWriter.

/**
 * Gets a VCF file ready to write variants to
 * A header based on this processing context will have already been written to the returned writer
 * It is the responsibility of the caller to close the returned @link {@link VariantContextWriter}
 * @param output file
 * @return opened output VCF stream
 */
@Override
public VariantContextWriter getVariantContextWriter(File file, boolean createIndex) {
    VariantContextWriterBuilder builder = getVariantContextWriterBuilder(file, createIndex);
    VariantContextWriter vcfWriter = builder.build();
    final VCFHeader vcfHeader = new VCFHeader(Collections.emptySet(), categories);
    GridssVcfConstants.addHeaders(vcfHeader);
    vcfHeader.setSequenceDictionary(getReference().getSequenceDictionary());
    vcfWriter.writeHeader(vcfHeader);
    return vcfWriter;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 29 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class KnimeVariantHelper method forEachVariants.

public Stream<VariantContext> forEachVariants(final String vcfFile) throws IOException {
    final File file = new File(vcfFile);
    IOUtil.assertFileIsReadable(file);
    final VCFFileReader r = new VCFFileReader(file, false);
    final VCFHeader header = r.getFileHeader();
    this.init(header);
    final CloseableIterator<VariantContext> iter = r.iterator();
    final Iterable<VariantContext> iterable = () -> iter;
    return StreamSupport.stream(iterable.spliterator(), false).onClose(() -> {
        CloserUtil.close(iter);
        CloserUtil.close(r);
    });
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 30 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class KnimeVariantHelper method processVcfMulti.

/**
 * process the VCF file,
 *
 * @param vcfIn input file name
 * @param fun functional
 * @return the output file name
 * @throws IOException
 */
public String processVcfMulti(final String vcfIn, final Function<VariantContext, List<VariantContext>> fun) throws IOException {
    this.lastVariantCount = 0;
    if (vcfIn == null) {
        final String msg = "Vcf Input URI/FIle is null.";
        LOG.error(msg);
        throw new IllegalArgumentException(msg);
    }
    File outVcfFile = null;
    File outVcfIndexFile = null;
    final File STOP_FILE = new File(this.workfingDirectory, "STOP");
    if (STOP_FILE.exists()) {
        final String msg = "There is a stop file in " + STOP_FILE;
        LOG.error(msg);
        throw new IOException(msg);
    }
    boolean fail_flag = false;
    VcfIterator iter = null;
    VariantContextWriter variantContextWriter = null;
    try {
        IOUtil.assertDirectoryIsReadable(this.workfingDirectory);
        IOUtil.assertDirectoryIsWritable(this.workfingDirectory);
        if (!IOUtil.isUrl(vcfIn)) {
            IOUtil.assertFileIsReadable(new File(vcfIn));
        }
        final String extension;
        if (this.forceSuffix.equals(ForceSuffix.ForceTabix)) {
            extension = ".vcf.gz";
        } else if (this.forceSuffix.equals(ForceSuffix.ForceTribble)) {
            extension = ".vcf";
        } else if (vcfIn.endsWith(".gz")) {
            extension = ".vcf.gz";
        } else {
            extension = ".vcf";
        }
        final String filename = this.createOutputFile(vcfIn, extension);
        final String indexFilename;
        if (extension.endsWith(".gz")) {
            indexFilename = filename + Tribble.STANDARD_INDEX_EXTENSION;
        } else {
            indexFilename = filename + TabixUtils.STANDARD_INDEX_EXTENSION;
        }
        outVcfFile = new File(filename);
        outVcfIndexFile = new File(indexFilename);
        LOG.info("opening " + vcfIn);
        iter = VCFUtils.createVcfIterator(vcfIn);
        super.init(iter.getHeader());
        final VCFHeader vcfHeader2;
        if (this.getExtraVcfHeaderLines().isEmpty()) {
            vcfHeader2 = iter.getHeader();
        } else {
            vcfHeader2 = new VCFHeader(iter.getHeader());
            for (final VCFHeaderLine extra : this.getExtraVcfHeaderLines()) {
                vcfHeader2.addMetaDataLine(extra);
            }
            // clear vcf header line now they 've been added to the header.
            this.getExtraVcfHeaderLines().clear();
        }
        final SAMSequenceDictionary dict = this.getHeader().getSequenceDictionary();
        if (dict == null) {
            final String msg = "There is no dictionary (##contig lines) in " + vcfIn + " but they are required.";
            LOG.error(msg);
            throw new IllegalArgumentException(msg);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        progress.setLogPrefix(this.filePrefix);
        LOG.info("writing " + outVcfFile + ". Emergency stop file is " + STOP_FILE);
        variantContextWriter = this.variantContextWriterBuilder.setOutputFile(outVcfFile).setReferenceDictionary(dict).build();
        long lastTick = System.currentTimeMillis();
        variantContextWriter.writeHeader(vcfHeader2);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final List<VariantContext> array = fun.apply(ctx);
            if (array != null) {
                for (final VariantContext ctx2 : array) {
                    variantContextWriter.add(ctx2);
                    this.lastVariantCount++;
                }
            }
            // check STOP File
            final long now = System.currentTimeMillis();
            if (// 10sec
            (now - lastTick) > 10 * 1000) {
                lastTick = now;
                if (STOP_FILE.exists()) {
                    LOG.warn("STOP FILE detected " + STOP_FILE + " Aborting.");
                    fail_flag = true;
                    break;
                }
            }
        }
        progress.finish();
        iter.close();
        iter = null;
        variantContextWriter.close();
        variantContextWriter = null;
        return outVcfFile.getPath();
    } catch (final Exception err) {
        fail_flag = true;
        LOG.error(err);
        throw new IOException(err);
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(variantContextWriter);
        if (fail_flag) {
            if (outVcfFile != null && outVcfFile.exists()) {
                LOG.warn("deleting " + outVcfFile);
                outVcfFile.delete();
            }
            if (outVcfIndexFile != null && outVcfIndexFile.exists()) {
                LOG.warn("deleting " + outVcfIndexFile);
                outVcfIndexFile.delete();
            }
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34