Search in sources :

Example 61 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class EigenInfoAnnotator method getAnnotations.

@Override
public Map<String, Object> getAnnotations(final VariantContext ctx) {
    if (!ctx.isVariant())
        return Collections.emptyMap();
    final int contig = contig2eigen(ctx.getContig());
    if (contig < 1)
        return Collections.emptyMap();
    try {
        if (this.codingFeatureReader == null) {
            this.codingFeatureReader = new TabixFeatureReader<>(new File(this.eigenDirectory, getTabixPrefix() + "coding_annot_04092016.tab.bgz").getPath(), new CodingFeatureCodec());
        }
        if (this.prev_contig == -1 || prev_contig != contig) {
            CloserUtil.close(this.nonCodingFeatureReader);
            this.nonCodingFeatureReader = new TabixFeatureReader<>(getNonCodingFileForContig(contig).getPath(), new NonCodingFeatureCodec());
            this.prev_contig = contig;
        }
        final Map<Allele, NonCodingFeature> alt2nonCoding = new HashMap<>();
        CloseableTribbleIterator<NonCodingFeature> iter1 = this.nonCodingFeatureReader.query(String.valueOf(contig), ctx.getStart(), ctx.getEnd());
        while (iter1.hasNext()) {
            NonCodingFeature feat = iter1.next();
            if (feat == null || !feat.accept(ctx))
                continue;
            alt2nonCoding.put(feat.alt, feat);
        }
        iter1.close();
        final Map<Allele, CodingFeature> alt2coding = new HashMap<>();
        CloseableTribbleIterator<CodingFeature> iter2 = this.codingFeatureReader.query(String.valueOf(contig), ctx.getStart(), ctx.getEnd());
        while (iter2.hasNext()) {
            CodingFeature feat = iter2.next();
            if (feat == null || !feat.accept(ctx))
                continue;
            alt2coding.put(feat.alt, feat);
        }
        iter2.close();
        if (alt2nonCoding.isEmpty() && alt2coding.isEmpty())
            return Collections.emptyMap();
        final List<Allele> alternateAlleles = ctx.getAlternateAlleles();
        final Map<String, Object> map = new HashMap<>();
        for (int side = 0; side < 2; ++side) {
            for (int i = 0; i < (side == 0 ? noncodingheaderlines.size() : codingheaderlines.size()); ++i) {
                final VCFInfoHeaderLine vihl = (side == 0 ? noncodingheaderlines.get(i) : codingheaderlines.get(i));
                final List<Object> atts = new ArrayList<>(alternateAlleles.size());
                boolean found_one = false;
                for (int altn = 0; altn < alternateAlleles.size(); ++altn) {
                    final Allele alt = alternateAlleles.get(altn);
                    final AbstractFeature feat = (side == 0 ? alt2nonCoding.get(alt) : alt2coding.get(alt));
                    if (feat == null) {
                        atts.add(".");
                        continue;
                    }
                    final String token = feat.get(4 + i);
                    if (token == null || token.isEmpty() || token.equals(".")) {
                        atts.add(".");
                        continue;
                    } else if (vihl.getType() == VCFHeaderLineType.String) {
                        found_one = true;
                        atts.add(token.replace(',', '|'));
                    } else {
                        try {
                            Float dbl = new Float(token);
                            atts.add(dbl);
                            found_one = true;
                        } catch (NumberFormatException err) {
                            throw new IOException("Cannot cast " + token + " to float for " + vihl);
                        }
                    }
                }
                if (!found_one) {
                // nothing
                } else if (vihl.getCountType() == VCFHeaderLineCount.R) {
                    map.put(vihl.getID(), new ArrayList<>(new LinkedHashSet<>(atts)));
                } else {
                    if (atts.size() != ctx.getAlternateAlleles().size()) {
                        System.err.println("Number of eigen data!=number of ALTS");
                    }
                    map.put(vihl.getID(), atts);
                }
            }
        }
        return map;
    } catch (final IOException err) {
        throw new RuntimeException(err);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) File(java.io.File)

Example 62 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VCFFixIndels method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator r, VariantContextWriter w) {
    long nChanged = 0L;
    final String TAG = "INDELFIXED";
    final VCFHeader header = r.getHeader();
    final VCFHeader h2 = new VCFHeader(header);
    addMetaData(h2);
    h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, 1, VCFHeaderLineType.String, "Fix Indels for @SolenaLS (position|alleles...)"));
    w.writeHeader(h2);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
    while (r.hasNext()) {
        boolean somethingChanged = false;
        VariantContext ctx = progress.watch(r.next());
        /* map old allele to new allele */
        Map<Allele, Allele> original2modified = new HashMap<Allele, Allele>();
        /* did we found a strange allele (symbolic, etc ) */
        boolean strange_allele_flag = false;
        for (final Allele a : ctx.getAlleles()) {
            original2modified.put(a, a);
            if (a.isSymbolic() || a.isNoCall()) {
                strange_allele_flag = true;
                break;
            }
        }
        if (strange_allele_flag || original2modified.size() < 2) /* at least 2 alleles: REF+ ALT */
        {
            w.add(ctx);
            continue;
        }
        /* record chromStart if we need to shift to the right */
        int chromStart = ctx.getStart();
        /* trim right then left  */
        for (int side = 0; side < 2; ++side) {
            boolean done = false;
            while (!done) {
                boolean ok_side = true;
                /* common nucleotide at end/start */
                Character targetChar = null;
                done = true;
                // scan side
                Set<Allele> keys = new HashSet<>(original2modified.keySet());
                for (Allele k : keys) {
                    Allele newAllele = original2modified.get(k);
                    if (newAllele.isSymbolic()) {
                        ok_side = false;
                        break;
                    }
                    String baseString = newAllele.getBaseString().trim().toUpperCase();
                    if (baseString.length() < 2) {
                        ok_side = false;
                        break;
                    }
                    /* first or last char or all sequences
						 * side==0 : right
						 * side==1 : left
						 * */
                    Character baseChar = (side == 0 ? baseString.charAt(baseString.length() - 1) : baseString.charAt(0));
                    if (targetChar == null) {
                        targetChar = baseChar;
                    } else if (!targetChar.equals(baseChar)) {
                        /* doesn't end with same nucleotide */
                        ok_side = false;
                        break;
                    }
                }
                /* ok we can shift all alleles */
                if (ok_side && targetChar != null) {
                    done = false;
                    somethingChanged = true;
                    for (Allele k : keys) {
                        Allele newAllele = original2modified.get(k);
                        String baseString = newAllele.getBaseString().trim().toUpperCase();
                        if (// remove last nucleotide
                        side == 0) {
                            newAllele = Allele.create(baseString.substring(0, baseString.length() - 1), newAllele.isReference());
                        } else {
                            newAllele = Allele.create(baseString.substring(1), newAllele.isReference());
                        }
                        original2modified.put(k, newAllele);
                    }
                    if (side == 1)
                        chromStart++;
                }
            }
        /* end of while done */
        }
        if (!somethingChanged) {
            w.add(ctx);
            continue;
        }
        final VariantContextBuilder b = new VariantContextBuilder(ctx);
        b.start(chromStart);
        Allele newRef = original2modified.get(ctx.getReference());
        b.stop(chromStart + newRef.getBaseString().length() - 1);
        b.alleles(original2modified.values());
        List<Genotype> genotypes = new ArrayList<>();
        for (final String sample : header.getSampleNamesInOrder()) {
            final Genotype g = ctx.getGenotype(sample);
            if (!g.isCalled()) {
                genotypes.add(g);
                continue;
            }
            final GenotypeBuilder gb = new GenotypeBuilder(g);
            final List<Allele> aL = new ArrayList<>();
            for (final Allele a : g.getAlleles()) {
                aL.add(original2modified.get(a));
            }
            gb.alleles(aL);
            genotypes.add(gb.make());
        }
        StringBuilder tagContent = new StringBuilder();
        tagContent.append(String.valueOf(ctx.getStart()));
        for (Allele a : ctx.getAlleles()) {
            tagContent.append("|");
            tagContent.append(a.toString());
        }
        b.attribute(TAG, tagContent.toString());
        b.genotypes(genotypes);
        w.add(b.make());
        ++nChanged;
        if (w.checkError())
            break;
    }
    progress.finish();
    LOG.info("indels changed:" + nChanged);
    return RETURN_OK;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 63 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VCFComm method doWork.

@Override
public int doWork(final List<String> args) {
    CloseableIterator<LineAndFile> iter = null;
    SortingCollection<LineAndFile> variants = null;
    VariantContextWriter w = null;
    try {
        if (args.isEmpty()) {
            LOG.error("Illegal number of arguments");
            return -1;
        }
        Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        variants = SortingCollection.newInstance(LineAndFile.class, new LineAndFileCodec(), new LineAndFileComparator(), super.sortingCollectionArgs.getMaxRecordsInRam(), super.sortingCollectionArgs.getTmpPaths());
        variants.setDestructiveIteration(true);
        /**
         * new sample names in the output vcf: one  sample per file
         */
        final Map<Integer, String> fileid2sampleName = new TreeMap<>();
        /**
         * samples names as they appear in the original VCF headers
         */
        final Counter<String> countInputSamples = new Counter<String>();
        /**
         * dicts
         */
        final List<SAMSequenceDictionary> all_dictionaries = new ArrayList<>();
        for (final String vcffilename : IOUtils.unrollFiles(args)) {
            LOG.info("Reading from " + vcffilename);
            final Input input = super.put(variants, vcffilename);
            String sampleName = vcffilename;
            if (sampleName.endsWith(".vcf.gz")) {
                sampleName = sampleName.substring(0, sampleName.length() - 7);
            } else if (sampleName.endsWith(".vcf.gz")) {
                sampleName = sampleName.substring(0, sampleName.length() - 4);
            }
            int slash = sampleName.lastIndexOf(File.separatorChar);
            if (slash != -1)
                sampleName = sampleName.substring(slash + 1);
            int suffix = 1;
            // loop until we find a uniq name
            for (; ; ) {
                final String key = sampleName + (suffix == 1 ? "" : "_" + suffix);
                if (fileid2sampleName.values().contains(key)) {
                    suffix++;
                    continue;
                }
                fileid2sampleName.put(input.file_id, key);
                metaData.add(new VCFHeaderLine(key, vcffilename));
                break;
            }
            for (final String sname : input.codecAndHeader.header.getSampleNamesInOrder()) {
                countInputSamples.incr(sname);
            }
            all_dictionaries.add(input.codecAndHeader.header.getSequenceDictionary());
        }
        variants.doneAdding();
        /**
         * unique sample name, if any present in all VCF
         */
        Optional<String> unqueSampleName = Optional.empty();
        if (countInputSamples.getCountCategories() == 1 && countInputSamples.count(countInputSamples.keySet().iterator().next()) == fileid2sampleName.size()) {
            unqueSampleName = Optional.of(countInputSamples.keySet().iterator().next());
            LOG.info("Unique sample name is " + unqueSampleName.get());
        }
        VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_FILTER_KEY);
        VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.DEPTH_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_NUMBER_KEY);
        metaData.add(new VCFHeaderLine(getClass().getSimpleName(), "version:" + getVersion() + " command:" + getProgramCommandLine()));
        final VCFFilterHeaderLine variantNotCalledInAllVcf = new VCFFilterHeaderLine("NotCalledEveryWhere", "Variant was NOT called in all input VCF");
        metaData.add(variantNotCalledInAllVcf);
        final VCFFilterHeaderLine variantWasFiltered = new VCFFilterHeaderLine("VariantWasFiltered", "At least one variant was filtered");
        metaData.add(variantWasFiltered);
        final VCFFormatHeaderLine variantQUALFormat = new VCFFormatHeaderLine("VCQUAL", 1, VCFHeaderLineType.Float, "Variant Quality");
        metaData.add(variantQUALFormat);
        metaData.add(new VCFFormatHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Number of allle in the src vcf"));
        metaData.add(new VCFFormatHeaderLine(VCFConstants.ALLELE_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of ALT alllele"));
        final VCFInfoHeaderLine foundInCountVcfInfo = new VCFInfoHeaderLine("NVCF", 1, VCFHeaderLineType.Integer, "Number of VCF this variant was found");
        metaData.add(foundInCountVcfInfo);
        final VCFInfoHeaderLine variantTypesInfo = new VCFInfoHeaderLine("VTYPES", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Distinct Variants type");
        metaData.add(variantTypesInfo);
        final VCFFilterHeaderLine multipleTypeFilters = new VCFFilterHeaderLine("DiscordantTypes", "Discordant types at this position");
        metaData.add(multipleTypeFilters);
        final VCFFormatHeaderLine variantTypeFormat = new VCFFormatHeaderLine("VTYPE", 1, VCFHeaderLineType.String, "Variant Type");
        metaData.add(variantTypeFormat);
        final VCFFilterHeaderLine uniqueVariantDiscordantGTFilter;
        if (unqueSampleName.isPresent()) {
            metaData.add(new VCFHeaderLine("UniqSample", unqueSampleName.get()));
            uniqueVariantDiscordantGTFilter = new VCFFilterHeaderLine("DiscordantGenotypeForUniqSample", "Genotype Dicordant for for sample " + unqueSampleName.get());
            metaData.add(uniqueVariantDiscordantGTFilter);
        } else {
            uniqueVariantDiscordantGTFilter = null;
        }
        final VCFHeader header = new VCFHeader(metaData, new ArrayList<>(fileid2sampleName.values()));
        if (// all have a dict
        !normalize_chr && !all_dictionaries.contains(null)) {
            SAMSequenceDictionary thedict = null;
            for (int x = 0; x < all_dictionaries.size(); ++x) {
                SAMSequenceDictionary d = all_dictionaries.get(x);
                if (thedict == null) {
                    thedict = d;
                } else if (!SequenceUtil.areSequenceDictionariesEqual(d, thedict)) {
                    thedict = null;
                    break;
                }
            }
            if (thedict != null)
                header.setSequenceDictionary(thedict);
        }
        w = super.openVariantContextWriter(super.outputFile);
        w.writeHeader(header);
        final List<LineAndFile> row = new ArrayList<LineAndFile>(super.inputs.size());
        final Comparator<LineAndFile> posCompare = (A, B) -> A.getContigPosRef().compareTo(B.getContigPosRef());
        iter = variants.iterator();
        for (; ; ) {
            LineAndFile rec = null;
            if (iter.hasNext()) {
                rec = iter.next();
            }
            if (rec == null || (!row.isEmpty() && posCompare.compare(row.get(0), rec) != 0)) {
                if (!row.isEmpty()) {
                    final VariantContext first = row.get(0).getContext();
                    /* in which file id we find this variant */
                    Set<Integer> fileids_for_variant = row.stream().map(LAF -> LAF.fileIdx).collect(Collectors.toSet());
                    // see with HAS multiple chrom/pos/ref but different alt
                    if (row.size() != fileids_for_variant.size()) {
                        for (; ; ) {
                            boolean ok = true;
                            for (int x = 0; ok && x + 1 < row.size(); ++x) {
                                final VariantContext ctxx = row.get(x).getContext();
                                final List<Allele> altsx = ctxx.getAlternateAlleles();
                                for (int y = x + 1; ok && y < row.size(); ++y) {
                                    if (row.get(x).fileIdx != row.get(y).fileIdx)
                                        continue;
                                    final VariantContext ctxy = row.get(y).getContext();
                                    final List<Allele> altsy = ctxy.getAlternateAlleles();
                                    if (altsx.equals(altsy))
                                        continue;
                                    if (!ctxx.isVariant() && ctxy.isVariant()) {
                                        row.remove(x);
                                    } else if (ctxx.isVariant() && !ctxy.isVariant()) {
                                        row.remove(y);
                                    } else if (!ctxx.isSNP() && ctxy.isSNP()) {
                                        row.remove(x);
                                    } else if (ctxx.isSNP() && !ctxy.isSNP()) {
                                        row.remove(y);
                                    } else if (altsx.size() > altsy.size()) {
                                        row.remove(x);
                                    } else if (altsx.size() < altsy.size()) {
                                        row.remove(y);
                                    } else {
                                        row.remove(y);
                                    }
                                    ok = false;
                                    break;
                                }
                            }
                            if (ok)
                                break;
                        }
                        fileids_for_variant = row.stream().map(LAF -> LAF.fileIdx).collect(Collectors.toSet());
                    }
                    if (row.size() != fileids_for_variant.size()) {
                        LOG.error("There are some duplicated variants at the position " + new ContigPosRef(first) + " in the same vcf file");
                        for (final LineAndFile laf : row) {
                            LOG.error("File [" + laf.fileIdx + "]" + fileid2sampleName.get(laf.fileIdx));
                            LOG.error("\t" + laf.getContigPosRef());
                        }
                        row.clear();
                    } else {
                        final Set<Allele> alleles = row.stream().flatMap(R -> R.getContext().getAlleles().stream()).collect(Collectors.toSet());
                        final VariantContextBuilder vcb = new VariantContextBuilder(getClass().getName(), first.getContig(), first.getStart(), first.getEnd(), alleles);
                        final Set<String> filters = new HashSet<>();
                        final Set<VariantContext.Type> variantContextTypes = new HashSet<>();
                        final List<Genotype> genotypes = new ArrayList<Genotype>();
                        for (final LineAndFile laf : row) {
                            if (laf.getContext().isFiltered())
                                filters.add(variantWasFiltered.getID());
                            variantContextTypes.add(laf.getContext().getType());
                            final GenotypeBuilder gbuilder = new GenotypeBuilder();
                            gbuilder.name(fileid2sampleName.get(laf.fileIdx));
                            if (unqueSampleName.isPresent()) {
                                final Genotype g0 = laf.getContext().getGenotype(unqueSampleName.get());
                                if (g0 == null) {
                                    iter.close();
                                    w.close();
                                    throw new IllegalStateException("Cannot find genotype for " + unqueSampleName.get());
                                }
                                if (g0.hasDP())
                                    gbuilder.DP(g0.getDP());
                                if (g0.hasGQ())
                                    gbuilder.GQ(g0.getGQ());
                                gbuilder.alleles(g0.getAlleles());
                            } else {
                                gbuilder.alleles(Arrays.asList(first.getReference(), first.getReference()));
                                if (laf.getContext().hasAttribute(VCFConstants.DEPTH_KEY)) {
                                    gbuilder.DP(laf.getContext().getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
                                }
                            }
                            if (laf.getContext().isFiltered()) {
                                gbuilder.filter("VCFFILTERED");
                            }
                            if (laf.getContext().hasLog10PError()) {
                                gbuilder.attribute(variantQUALFormat.getID(), laf.getContext().getPhredScaledQual());
                            }
                            gbuilder.attribute(VCFConstants.ALLELE_NUMBER_KEY, laf.getContext().getGenotypes().stream().flatMap(G -> G.getAlleles().stream()).filter(A -> !A.isNoCall()).count());
                            gbuilder.attribute(VCFConstants.ALLELE_COUNT_KEY, laf.getContext().getGenotypes().stream().flatMap(G -> G.getAlleles().stream()).filter(A -> !(A.isReference() || A.isNoCall())).count());
                            gbuilder.attribute(variantTypeFormat.getID(), laf.getContext().getType().name());
                            genotypes.add(gbuilder.make());
                        }
                        final String id = String.join(";", row.stream().map(LAF -> LAF.getContext()).filter(V -> V.hasID()).map(V -> V.getID()).collect(Collectors.toSet()));
                        if (!id.isEmpty())
                            vcb.id(id);
                        vcb.genotypes(genotypes);
                        if (unqueSampleName.isPresent()) {
                            boolean all_same = true;
                            for (int x = 0; all_same && x + 1 < genotypes.size(); ++x) {
                                if (!genotypes.get(x).isCalled())
                                    continue;
                                for (int y = x + 1; all_same && y < genotypes.size(); ++y) {
                                    if (!genotypes.get(y).isCalled())
                                        continue;
                                    if (!genotypes.get(x).sameGenotype(genotypes.get(y), true)) {
                                        all_same = false;
                                        break;
                                    }
                                }
                            }
                            if (!all_same)
                                filters.add(uniqueVariantDiscordantGTFilter.getID());
                        }
                        // Add AN
                        vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, genotypes.stream().filter(G -> G.isCalled()).mapToInt(G -> G.getAlleles().size()).sum());
                        if (!variantContextTypes.isEmpty()) {
                            vcb.attribute(variantTypesInfo.getID(), new ArrayList<>(variantContextTypes.stream().map(T -> T.name()).collect(Collectors.toSet())));
                            if (variantContextTypes.size() > 1) {
                                filters.add(multipleTypeFilters.getID());
                            }
                        }
                        vcb.attribute(foundInCountVcfInfo.getID(), fileids_for_variant.size());
                        boolean print = true;
                        if (row.size() == super.inputs.size() && ignore_everywhere) {
                            print = false;
                        }
                        if (fileids_for_variant.size() != fileid2sampleName.size()) {
                            filters.add(variantNotCalledInAllVcf.getID());
                            if (only_everywhere) {
                                print = false;
                            }
                        }
                        vcb.filters(filters);
                        if (print) {
                            w.add(vcb.make());
                        }
                    }
                    row.clear();
                }
                if (rec == null)
                    break;
            }
            row.add(rec);
        }
        iter.close();
        iter = null;
        w.close();
        w = null;
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(w);
        try {
            if (variants != null)
                variants.cleanup();
        } catch (Exception err) {
        }
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ContigPosRef(com.github.lindenb.jvarkit.util.vcf.ContigPosRef) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) Counter(com.github.lindenb.jvarkit.util.Counter) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) TreeMap(java.util.TreeMap) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Counter(com.github.lindenb.jvarkit.util.Counter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) ContigPosRef(com.github.lindenb.jvarkit.util.vcf.ContigPosRef) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) TreeMap(java.util.TreeMap) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 64 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class VcfCompareCallersOneSample method doWork.

@Override
public int doWork(List<String> args) {
    File inputFile = null;
    List<EqualRangeVcfIterator> listChallengers = new ArrayList<>();
    VariantContextWriter vcw = null;
    VcfIterator in = null;
    try {
        in = super.openVcfIterator(oneFileOrNull(args));
        VCFHeader header = in.getHeader();
        if (header.getNGenotypeSamples() != 1) {
            LOG.error("vcf.must.have.only.one.sample");
            return -1;
        }
        VCFHeader h2 = new VCFHeader(header);
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        SAMSequenceDictionary dict = header.getSequenceDictionary();
        if (dict == null) {
            LOG.error("no.dict.in.vcf");
            return -1;
        }
        Comparator<VariantContext> ctxComparator = VCFUtils.createTidPosComparator(dict);
        /* load files to be challenged */
        for (File cf : this.challengerVcf) {
            // do not challenge vs itself
            if (inputFile != null && inputFile.equals(cf)) {
                LOG.error("Ignoring challenger (self): " + cf);
                continue;
            }
            VcfIterator cin = VCFUtils.createVcfIteratorFromFile(cf);
            VCFHeader ch = cin.getHeader();
            if (ch.getNGenotypeSamples() != 1) {
                LOG.warning("vcf.must.have.only.one.sample");
                cin.close();
                continue;
            }
            if (!header.getSampleNamesInOrder().get(0).equals(ch.getSampleNamesInOrder().get(0))) {
                LOG.warning("Ignoring " + cf + " because not the same sample.");
                cin.close();
                continue;
            }
            SAMSequenceDictionary hdict = ch.getSequenceDictionary();
            if (hdict == null || !SequenceUtil.areSequenceDictionariesEqual(dict, hdict)) {
                LOG.error("not.the.same.sequence.dictionaries");
                return -1;
            }
            listChallengers.add(new EqualRangeVcfIterator(cin, ctxComparator));
        }
        vcw = super.openVariantContextWriter(outputFile);
        vcw.writeHeader(h2);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VariantContext prev_ctx = null;
        while (in.hasNext() && !vcw.checkError()) {
            VariantContext ctx = progress.watch(in.next());
            // check input order
            if (prev_ctx != null && ctxComparator.compare(prev_ctx, ctx) > 0) {
                LOG.error("bad sort order : got\n\t" + prev_ctx + "\nbefore\n\t" + ctx + "\n");
                return -1;
            }
            prev_ctx = ctx;
            int countInOtherFiles = 0;
            for (EqualRangeVcfIterator citer : listChallengers) {
                boolean foundInThatFile = false;
                List<VariantContext> ctxChallenging = citer.next(ctx);
                for (VariantContext ctx2 : ctxChallenging) {
                    if (!ctx2.getReference().equals(ctx.getReference()))
                        continue;
                    boolean ok = true;
                    if (!this.ignoreAlternate) {
                        Set<Allele> myAlt = new HashSet<Allele>(ctx.getAlternateAlleles());
                        myAlt.removeAll(ctx2.getAlternateAlleles());
                        if (!myAlt.isEmpty())
                            ok = false;
                    }
                    if (ok) {
                        foundInThatFile = true;
                        break;
                    }
                }
                countInOtherFiles += (foundInThatFile ? 1 : 0);
            }
            if (countInOtherFiles >= minCountInclusive && countInOtherFiles <= maxCountInclusive) {
                vcw.add(ctx);
            }
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcw);
        CloserUtil.close(listChallengers);
        CloserUtil.close(in);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 65 with Allele

use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.

the class FindAVariation method report.

private void report(final File f, final VCFHeader header, final VariantContext ctx, final Mutation mpos) {
    final GenotypesContext genotypes = ctx.getGenotypes();
    if (genotypes == null || genotypes.isEmpty()) {
        reportPos(f, header, ctx);
        out.println();
    } else {
        VCFFormatHeaderLine DP4header = header.getFormatHeaderLine("DP4");
        if (DP4header != null && !(DP4header.getType().equals(VCFHeaderLineType.Integer) && DP4header.getCount() == 4)) {
            DP4header = null;
        }
        for (int i = 0; i < genotypes.size(); ++i) {
            Genotype g = genotypes.get(i);
            if (!g.isCalled() && this.hideNoCall)
                continue;
            if (g.isHomRef() && this.hideHomRef)
                continue;
            reportPos(f, header, ctx);
            out.print('\t');
            out.print(g.getSampleName());
            out.print('\t');
            out.print(g.getType());
            out.print('\t');
            List<Allele> alleles = g.getAlleles();
            for (int na = 0; na < alleles.size(); ++na) {
                if (na > 0)
                    out.print(" ");
                out.print(alleles.get(na).getDisplayString());
            }
            if (DP4header != null) {
                final Object dp4 = g.getExtendedAttribute("DP4");
                if (dp4 != null) {
                    out.print('\t');
                    // it's a String not an int[] ??
                    out.print(String.valueOf(dp4));
                }
            }
            out.println();
        }
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) Genotype(htsjdk.variant.variantcontext.Genotype) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine)

Aggregations

Allele (htsjdk.variant.variantcontext.Allele)157 VariantContext (htsjdk.variant.variantcontext.VariantContext)82 Genotype (htsjdk.variant.variantcontext.Genotype)72 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)66 ArrayList (java.util.ArrayList)50 Test (org.testng.annotations.Test)48 VCFHeader (htsjdk.variant.vcf.VCFHeader)42 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)37 File (java.io.File)31 Collectors (java.util.stream.Collectors)31 HashSet (java.util.HashSet)30 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)29 IOException (java.io.IOException)28 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)26 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)25 VCFConstants (htsjdk.variant.vcf.VCFConstants)22 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)22 List (java.util.List)22 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)21 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)20