Search in sources :

Example 66 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class ConvertVcfChromosomes method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator iterin, VariantContextWriter out) {
    final ContigNameConverter customMapping = ContigNameConverter.fromFile(mappingFile);
    customMapping.setOnNotFound(this.onNotFound);
    final Set<String> unseen = new HashSet<>();
    final VCFHeader header1 = iterin.getHeader();
    final VCFHeader header2 = new VCFHeader(header1.getMetaDataInInputOrder().stream().filter(L -> !L.getKey().equals(VCFHeader.CONTIG_KEY)).collect(Collectors.toSet()), header1.getSampleNamesInOrder());
    if (header1.getSequenceDictionary() != null) {
        header2.setSequenceDictionary(customMapping.convertDictionary(header1.getSequenceDictionary()));
    }
    out.writeHeader(header2);
    while (iterin.hasNext()) {
        final VariantContext ctx = iterin.next();
        final String newName = customMapping.apply(ctx.getContig());
        if (newName == null) {
            if (unseen.size() < 1000 && !unseen.contains(ctx.getContig())) {
                LOG.warn("Cannot find contig for " + ctx.getContig());
                unseen.add(ctx.getContig());
            }
            // skip unknown chromosomes
            continue;
        }
        final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        vcb.chr(newName);
        out.add(vcb.make());
    }
    return 0;
}
Also used : VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Example 67 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class DownSampleVcf method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final Random rand = new Random(this.seed == -1L ? System.currentTimeMillis() : this.seed);
    final List<VariantContext> buffer = new ArrayList<>(this.reservoir_size);
    final VCFHeader h2 = new VCFHeader(in.getHeader());
    super.addMetaData(h2);
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
    out.writeHeader(h2);
    if (this.reservoir_size != 0) {
        while (in.hasNext()) {
            final VariantContext ctx = progess.watch(in.next());
            if (buffer.size() < this.reservoir_size) {
                buffer.add(ctx);
            } else {
                buffer.set(rand.nextInt(buffer.size()), ctx);
            }
        }
    }
    buffer.stream().forEach(V -> out.add(V));
    progess.finish();
    return 0;
}
Also used : Random(java.util.Random) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 68 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfJaspar method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    final String ATT = "JASPAR";
    GenomicSequence genomicSequence = null;
    final VCFHeader header = new VCFHeader(in.getHeader());
    addMetaData(header);
    header.addMetaDataLine(new VCFInfoHeaderLine(ATT, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Jaspar pattern overlapping: Format: (Name|length|Score/1000|pos|strand)"));
    out.writeHeader(header);
    while (in.hasNext()) {
        VariantContext var = in.next();
        if (genomicSequence == null || !genomicSequence.getChrom().equals(var.getContig())) {
            LOG.info("Loading sequence " + var.getContig());
            genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, var.getContig());
        }
        final Set<String> hits = new HashSet<String>();
        for (final Matrix matrix : this.jasparDb) {
            int start0 = Math.max(0, var.getStart() - matrix.length());
            for (int y = start0; y < var.getStart() && y + matrix.length() <= genomicSequence.length(); ++y) {
                CharSequence forward = new SubSequence(genomicSequence, y, y + matrix.length());
                CharSequence revcomp = new RevCompCharSequence(forward);
                // run each strand
                for (int strand = 0; strand < 2; ++strand) {
                    double score = matrix.score(strand == 0 ? forward : revcomp);
                    if (score <= 0)
                        continue;
                    if (score >= matrix.max() * this.fraction_of_max) {
                        StringBuilder b = new StringBuilder("(");
                        b.append(matrix.getName().replaceAll("[ \t;=]+", "/"));
                        b.append("|");
                        b.append(matrix.length());
                        b.append("|");
                        b.append((int) (1000.0 * (score / matrix.max())));
                        b.append("|");
                        b.append(y + 1);
                        b.append("|");
                        b.append(strand == 0 ? '+' : '-');
                        b.append(")");
                        hits.add(b.toString());
                        break;
                    }
                }
            }
        }
        if (hits.isEmpty()) {
            out.add(var);
            continue;
        }
        final VariantContextBuilder vcb = new VariantContextBuilder(var);
        vcb.attribute(ATT, hits.toArray(new String[hits.size()]));
        out.add(vcb.make());
    }
    return RETURN_OK;
}
Also used : GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) RevCompCharSequence(com.github.lindenb.jvarkit.util.bio.RevCompCharSequence) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) SubSequence(com.github.lindenb.jvarkit.lang.SubSequence) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) RevCompCharSequence(com.github.lindenb.jvarkit.util.bio.RevCompCharSequence) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 69 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class AnnPredictionParserFactory method createDefaultParser.

/**
 * create a parser without the VCF header
 */
public AnnPredictionParser createDefaultParser() {
    final VCFHeader tmpheader = new VCFHeader();
    final VCFInfoHeaderLine info = new VCFInfoHeaderLine(AnnPredictionParser.getDefaultTag(), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "");
    tmpheader.addMetaDataLine(info);
    return new AnnPredictionParser(tmpheader, getTag());
}
Also used : VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 70 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfMultiToOneAlleleTest method createVcf.

private List<VariantContext> createVcf(final String params, final List<Genotype> genotypes) throws IOException {
    Set<VCFHeaderLine> metaData = new HashSet<>();
    VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT", "DP");
    VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "AF", "AN", "AC", "DP");
    final VCFHeader vcfheader = new VCFHeader(metaData, genotypes.stream().map(G -> G.getSampleName()).sorted().collect(Collectors.toSet()));
    final VariantAttributesRecalculator calc = new VariantAttributesRecalculator();
    calc.setHeader(vcfheader);
    final File vcfOut = super.createTmpFile(".vcf");
    final File vcfOut2 = super.createTmpFile(".vcf");
    VariantContextBuilder vcb = new VariantContextBuilder();
    vcb.chr("1");
    vcb.start(1);
    vcb.stop(1);
    vcb.alleles(genotypes.stream().flatMap(G -> G.getAlleles().stream()).collect(Collectors.toSet()));
    vcb.genotypes(genotypes);
    final VariantContextWriter w = VCFUtils.createVariantContextWriter(vcfOut);
    w.writeHeader(vcfheader);
    w.add(calc.apply(vcb.make()));
    w.close();
    Assert.assertEquals(new VcfMultiToOneAllele().instanceMain(newCmd().add("-o").add(vcfOut2).split(params).add(vcfOut).make()), 0);
    super.assertIsVcf(vcfOut2);
    return super.variantStream(vcfOut2).collect(Collectors.toList());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) Set(java.util.Set) IOException(java.io.IOException) Test(org.testng.annotations.Test) Collectors(java.util.stream.Collectors) File(java.io.File) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) Assert(org.testng.Assert) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) TestUtils(com.github.lindenb.jvarkit.tools.tests.TestUtils) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34