Search in sources :

Example 1 with VariantAttributesRecalculator

use of com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator in project jvarkit by lindenb.

the class VcfMultiToOneAlleleTest method createVcf.

private List<VariantContext> createVcf(final String params, final List<Genotype> genotypes) throws IOException {
    Set<VCFHeaderLine> metaData = new HashSet<>();
    VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT", "DP");
    VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "AF", "AN", "AC", "DP");
    final VCFHeader vcfheader = new VCFHeader(metaData, genotypes.stream().map(G -> G.getSampleName()).sorted().collect(Collectors.toSet()));
    final VariantAttributesRecalculator calc = new VariantAttributesRecalculator();
    calc.setHeader(vcfheader);
    final File vcfOut = super.createTmpFile(".vcf");
    final File vcfOut2 = super.createTmpFile(".vcf");
    VariantContextBuilder vcb = new VariantContextBuilder();
    vcb.chr("1");
    vcb.start(1);
    vcb.stop(1);
    vcb.alleles(genotypes.stream().flatMap(G -> G.getAlleles().stream()).collect(Collectors.toSet()));
    vcb.genotypes(genotypes);
    final VariantContextWriter w = VCFUtils.createVariantContextWriter(vcfOut);
    w.writeHeader(vcfheader);
    w.add(calc.apply(vcb.make()));
    w.close();
    Assert.assertEquals(new VcfMultiToOneAllele().instanceMain(newCmd().add("-o").add(vcfOut2).split(params).add(vcfOut).make()), 0);
    super.assertIsVcf(vcfOut2);
    return super.variantStream(vcfOut2).collect(Collectors.toList());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) Set(java.util.Set) IOException(java.io.IOException) Test(org.testng.annotations.Test) Collectors(java.util.stream.Collectors) File(java.io.File) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) Assert(org.testng.Assert) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) TestUtils(com.github.lindenb.jvarkit.tools.tests.TestUtils) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 2 with VariantAttributesRecalculator

use of com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator in project jvarkit by lindenb.

the class VcfSimulator method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.indexedFastaSequenceFile == null) {
        LOG.error("Reference is undefined");
        return -1;
    }
    if (!args.isEmpty()) {
        LOG.error("too many arguments");
        return -1;
    }
    if (this.randomSeed == -1L) {
        this.random = new Random(System.currentTimeMillis());
    } else {
        this.random = new Random(this.randomSeed);
    }
    if (this.numSamples < 0) {
        this.numSamples = 1 + this.random.nextInt(10);
    }
    while (this.samples.size() < numSamples) this.samples.add("SAMPLE" + (1 + this.samples.size()));
    VariantContextWriter writer = null;
    PrintStream pw = null;
    try {
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        VCFStandardHeaderLines.addStandardFormatLines(metaData, true, "GT", "DP");
        VCFStandardHeaderLines.addStandardInfoLines(metaData, true, "AF", "AN", "AC", "DP");
        VariantAttributesRecalculator calc = new VariantAttributesRecalculator();
        final VCFHeader header = new VCFHeader(metaData, this.samples);
        header.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
        calc.setHeader(header);
        pw = super.openFileOrStdoutAsPrintStream(this.outputFile);
        writer = VCFUtils.createVariantContextWriterToOutputStream(pw);
        writer.writeHeader(header);
        long countVariantsSoFar = 0;
        for (; ; ) {
            if (pw.checkError())
                break;
            if (this.numberOfVariants >= 0 && countVariantsSoFar >= this.numberOfVariants)
                break;
            for (final SAMSequenceRecord ssr : this.indexedFastaSequenceFile.getSequenceDictionary().getSequences()) {
                if (pw.checkError())
                    break;
                final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
                for (int pos = 1; pos <= ssr.getSequenceLength(); ++pos) {
                    if (pw.checkError())
                        break;
                    char REF = Character.toUpperCase(genomic.charAt(pos - 1));
                    if (REF == 'N')
                        continue;
                    char ALT = 'N';
                    switch(REF) {
                        case 'A':
                            ALT = "TGC".charAt(random.nextInt(3));
                            break;
                        case 'T':
                            ALT = "AGC".charAt(random.nextInt(3));
                            break;
                        case 'G':
                            ALT = "TAC".charAt(random.nextInt(3));
                            break;
                        case 'C':
                            ALT = "TGA".charAt(random.nextInt(3));
                            break;
                        default:
                            ALT = 'N';
                    }
                    if (ALT == 'N')
                        continue;
                    final Allele refAllele = Allele.create((byte) REF, true);
                    Allele altAllele = Allele.create((byte) ALT, false);
                    final VariantContextBuilder cb = new VariantContextBuilder();
                    cb.chr(genomic.getChrom());
                    cb.start(pos);
                    cb.stop(pos);
                    List<Genotype> genotypes = new ArrayList<Genotype>(samples.size());
                    for (String sample : samples) {
                        final Allele a1 = (random.nextBoolean() ? refAllele : altAllele);
                        final Allele a2 = (random.nextBoolean() ? refAllele : altAllele);
                        GenotypeBuilder gb = new GenotypeBuilder(sample, Arrays.asList(a1, a2));
                        if (random.nextBoolean()) {
                            gb = new GenotypeBuilder(sample, Arrays.asList(a1, a2));
                            gb.DP(1 + random.nextInt(50));
                        }
                        genotypes.add(gb.make());
                    }
                    cb.genotypes(genotypes);
                    cb.alleles(Arrays.asList(refAllele, altAllele));
                    writer.add(calc.apply(cb.make()));
                    countVariantsSoFar++;
                }
            }
        }
        writer.close();
        writer = null;
        pw.flush();
        pw.close();
        pw = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(pw);
        CloserUtil.close(writer);
    }
}
Also used : PrintStream(java.io.PrintStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) Genotype(htsjdk.variant.variantcontext.Genotype) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) Random(java.util.Random) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantAttributesRecalculator(com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Aggregations

VariantAttributesRecalculator (com.github.lindenb.jvarkit.util.vcf.VariantAttributesRecalculator)2 Allele (htsjdk.variant.variantcontext.Allele)2 Genotype (htsjdk.variant.variantcontext.Genotype)2 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)2 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)2 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)2 VCFHeader (htsjdk.variant.vcf.VCFHeader)2 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 TestUtils (com.github.lindenb.jvarkit.tools.tests.TestUtils)1 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)1 VCFUtils (com.github.lindenb.jvarkit.util.vcf.VCFUtils)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 VariantContext (htsjdk.variant.variantcontext.VariantContext)1 VCFStandardHeaderLines (htsjdk.variant.vcf.VCFStandardHeaderLines)1 File (java.io.File)1 IOException (java.io.IOException)1 PrintStream (java.io.PrintStream)1 Arrays (java.util.Arrays)1