Search in sources :

Example 1 with BufferedVCFReader

use of com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader in project jvarkit by lindenb.

the class BamPhased01 method beforeSam.

@Override
protected int beforeSam() {
    if (!(this.XTAG.length() == 0 || this.XTAG.length() == 2)) {
        LOG.error("tag should be empty of length==2 but got " + this.XTAG);
        return -1;
    }
    if (this.XTAG.length() == 2 && !this.XTAG.startsWith("X")) {
        LOG.error("tag should start with 'X' but got " + this.XTAG);
        return -1;
    }
    if (this.num_supporting_variants < 2) {
        LOG.error("Bad number of supporting variant (should be >=2) " + this.num_supporting_variants);
        return -1;
    }
    this.vcfReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
    this.bufferedVCFReader = new BufferedVCFReader(this.vcfReader, this.buffSizeInBp);
    this.bufferedVCFReader.setSimplifier(V -> simplify(V));
    return 0;
}
Also used : BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)

Example 2 with BufferedVCFReader

use of com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader in project jvarkit by lindenb.

the class VcfPeekAf method beforeVcf.

@Override
protected int beforeVcf() {
    final List<AFPeeker> all_peekers = new ArrayList<>();
    all_peekers.add(new InfoAcAnPeeker());
    all_peekers.add(new InfoAfPeeker());
    all_peekers.add(new GtPeeker());
    all_peekers.add(new CustomInfoPeeker());
    this.indexedVcfFileReader = null;
    if (this.buffer_size < 1) {
        LOG.error("bad buffer-size");
        return -1;
    }
    try {
        if (this.list_peekers) {
            try (PrintWriter out = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                for (final AFPeeker p : all_peekers) out.println(p.getName() + "\n\t" + p.getDescription());
                out.flush();
            }
            System.exit(0);
        }
        if (StringUtils.isBlank(this.peekerName)) {
            LOG.error("peeker name is empty");
            return -1;
        }
        this.peeker = all_peekers.stream().filter(P -> P.getName().equals(this.peekerName)).findFirst().orElse(null);
        if (this.peeker == null) {
            LOG.error("peeker " + this.peekerName + " not found in " + all_peekers.stream().map(P -> P.getName()).collect(Collectors.joining(";")));
            return -1;
        }
        final VCFReader reader0 = VCFReaderFactory.makeDefault().open(this.resourceVcfFile, true);
        this.indexedVcfFileReader = new BufferedVCFReader(reader0, this.buffer_size);
        this.peeker.initialize(this.indexedVcfFileReader.getHeader());
        this.indexedVcfFileReader.setSimplifier(peeker::sanitize);
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) OptionalDouble(java.util.OptionalDouble) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Map(java.util.Map) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) PrintWriter(java.io.PrintWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) VCFReader(htsjdk.variant.vcf.VCFReader) PrintWriter(java.io.PrintWriter) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)

Example 3 with BufferedVCFReader

use of com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader in project jvarkit by lindenb.

the class Biostar9501110 method beforeSam.

@Override
protected int beforeSam() {
    if (this.min_num_variants < 1) {
        LOG.error("--min-variants < 1");
        return -1;
    }
    if (!StringUtils.isBlank(this.attribute)) {
        if (this.attribute.length() != 2 || !this.attribute.startsWith("X")) {
            LOG.error("attribute should have length==2 and start with X but got " + this.attribute + ".");
            return -1;
        }
    }
    this.vcfReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
    this.bufferedVCFReader = new BufferedVCFReader(this.vcfReader, this.buffSizeInBp);
    this.bufferedVCFReader.setSimplifier(V -> simplify(V));
    this.findVariantInSamRecord.setUseClip(this.use_clip);
    return super.beforeSam();
}
Also used : BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)

Example 4 with BufferedVCFReader

use of com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader in project jvarkit by lindenb.

the class VcfGnomad method beforeVcf.

@Override
protected int beforeVcf() {
    try {
        final VCFReader r = VCFReaderFactory.makeDefault().open(this.gnomadPath, true);
        this.gnomadReader = new BufferedVCFReader(r, this.gnomadBufferSize);
        this.ctgNameConverter = ContigNameConverter.fromOneDictionary(SequenceDictionaryUtils.extractRequired(r.getHeader()));
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
    final VCFHeader gnomadHeader = this.gnomadReader.getHeader();
    for (final String field : this.infoFieldStr.split("[ ,;\t]+")) {
        if (StringUtils.isBlank(field))
            continue;
        final VCFInfoHeaderLine info = gnomadHeader.getInfoHeaderLines().stream().filter(F -> F.getID().equalsIgnoreCase(field)).findFirst().orElse(null);
        if (info == null) {
            LOG.error("field INFO/" + field + " is undefined in " + this.gnomadPath);
            return -1;
        }
        if (!field.equals(info.getID())) {
            LOG.warn("changed user field INFO/" + field + " to INFO/" + info.getID());
        }
        if (info.getCountType() != VCFHeaderLineCount.A) {
            LOG.warn("field INFO/" + field + " count-type is not 'A' but " + info.getCountType());
        }
        this.gnomad_info_af_attributes.add(info.getID());
    }
    if (this.gnomad_info_af_attributes.isEmpty()) {
        LOG.error("No INFO attribute defined");
        return -1;
    }
    /* do not keep those INFO in memory */
    final List<String> removeAtt = this.gnomadReader.getHeader().getInfoHeaderLines().stream().map(H -> H.getID()).filter(ID -> !this.gnomad_info_af_attributes.contains(ID)).collect(Collectors.toList());
    this.gnomadReader.setSimplifier(V -> {
        final VariantContextBuilder vcb = new VariantContextBuilder(V);
        vcb.rmAttributes(removeAtt).noGenotypes();
        return vcb.make();
    });
    return 0;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) UnaryOperator(java.util.function.UnaryOperator) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) VCFConstants(htsjdk.variant.vcf.VCFConstants) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Logger(com.github.lindenb.jvarkit.util.log.Logger) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) VCFReader(htsjdk.variant.vcf.VCFReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)

Example 5 with BufferedVCFReader

use of com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader in project jvarkit by lindenb.

the class VcfGnomadExomeVsGenome method beforeVcf.

@Override
protected int beforeVcf() {
    final UnaryOperator<VariantContext> simplifier = V -> new VariantContextBuilder(V).noGenotypes().noID().attributes(Collections.emptyMap()).make();
    SAMSequenceDictionary dict1 = null;
    try {
        final VCFReader r = VCFReaderFactory.makeDefault().open(this.exomePath, true);
        this.exomeReader = new BufferedVCFReader(r, this.gnomadBufferSize);
        this.exomeReader.setSimplifier(simplifier);
        this.ctgNameConverter = ContigNameConverter.fromOneDictionary(SequenceDictionaryUtils.extractRequired(r.getHeader()));
        dict1 = SequenceDictionaryUtils.extractRequired(r.getHeader());
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
    try {
        final VCFReader r = VCFReaderFactory.makeDefault().open(this.genomePath, true);
        this.genomeReader = new BufferedVCFReader(r, this.gnomadBufferSize);
        this.genomeReader.setSimplifier(simplifier);
        final SAMSequenceDictionary dict2 = SequenceDictionaryUtils.extractRequired(r.getHeader());
        SequenceUtil.assertSequenceDictionariesEqual(dict1, dict2);
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
    return super.beforeVcf();
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) UnaryOperator(java.util.function.UnaryOperator) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) VCFReader(htsjdk.variant.vcf.VCFReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)

Aggregations

BufferedVCFReader (com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)7 Parameter (com.beust.jcommander.Parameter)4 OnePassVcfLauncher (com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher)4 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)4 JVarkitVersion (com.github.lindenb.jvarkit.util.JVarkitVersion)4 DistanceParser (com.github.lindenb.jvarkit.util.bio.DistanceParser)4 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)4 Program (com.github.lindenb.jvarkit.util.jcommander.Program)4 Logger (com.github.lindenb.jvarkit.util.log.Logger)4 VCFReaderFactory (com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory)4 CloseableIterator (htsjdk.samtools.util.CloseableIterator)4 Allele (htsjdk.variant.variantcontext.Allele)4 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)4 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)4 VCFHeader (htsjdk.variant.vcf.VCFHeader)4 VCFIterator (htsjdk.variant.vcf.VCFIterator)4 VCFReader (htsjdk.variant.vcf.VCFReader)4 Path (java.nio.file.Path)4