Search in sources :

Example 1 with StructuralVariantType

use of htsjdk.variant.variantcontext.StructuralVariantType in project jvarkit by lindenb.

the class LumpyMoreSamples method doWork.

@Override
public int doWork(final List<String> args) {
    VcfIterator r = null;
    VariantContextWriter vcw = null;
    final Map<String, SamReader> sample2samreaders = new HashMap<>();
    try {
        r = super.openVcfIterator(oneFileOrNull(args));
        final VCFHeader headerIn = r.getHeader();
        final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
        if (dict == null) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
            return -1;
        }
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
            if (F.trim().isEmpty())
                return;
            final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
            final SAMFileHeader samHeader = sr.getFileHeader();
            final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
            if (dict2 == null) {
                throw new JvarkitException.BamDictionaryMissing(F);
            }
            if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
                throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
            }
            for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
                final String sample = rg.getSample();
                if (StringUtil.isBlank(sample))
                    continue;
                final SamReader reader = sample2samreaders.get(sample);
                if (reader == null) {
                    sample2samreaders.put(sample, reader);
                } else if (reader == sr) {
                    continue;
                } else {
                    throw new JvarkitException.UserError("more than one sample per bam:" + F);
                }
            }
        });
        final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
        final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
        outVcfSampleNames.addAll(sample2samreaders.keySet());
        final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
        final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
        final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
        final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
        headerOut.addMetaDataLine(SU2);
        headerOut.addMetaDataLine(PE2);
        headerOut.addMetaDataLine(SR2);
        vcw = super.openVariantContextWriter(this.outputFile);
        vcw.writeHeader(headerOut);
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            final StructuralVariantType sttype = ctx.getStructuralVariantType();
            if (sttype == null)
                continue;
            final int tid = dict.getSequenceIndex(ctx.getContig());
            final Map<String, Genotype> genotypeMap = new HashMap<>();
            ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
            for (final String sample : sample2samreaders.keySet()) {
                final SamReader samReader = sample2samreaders.get(sample);
                final SupportingReads sr = new SupportingReads();
                switch(sttype) {
                    case DEL:
                        {
                            int pos = ctx.getStart();
                            int[] ci = confidenceIntervalPos(ctx);
                            final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
                            int end = ctx.getEnd();
                            ci = confidenceIntervalEnd(ctx);
                            final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
                            for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
                                final Cigar cigar = rec.getCigar();
                                if (cigar.isLeftClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
                                    if (qi.overlaps(left)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                                if (cigar.isRightClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
                                    if (qi.overlaps(right)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                            }
                            break;
                        }
                    default:
                        break;
                }
                final GenotypeBuilder gb;
                if (genotypeMap.containsKey(sample)) {
                    gb = new GenotypeBuilder(genotypeMap.get(sample));
                } else {
                    gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                }
                gb.attribute(SR2.getID(), sr.splitReads);
                gb.attribute(PE2.getID(), sr.pairedReads);
                gb.attribute(SU2.getID(), 0);
                genotypeMap.put(sample, gb.make());
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // add missing samples.
            for (final String sampleName : outVcfSampleNames) {
                if (genotypeMap.containsKey(sampleName))
                    continue;
                genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
            }
            vcb.genotypes(genotypeMap.values());
            vcw.add(vcb.make());
        }
        r.close();
        r = null;
        sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
        LOG.info("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContext(htsjdk.variant.variantcontext.VariantContext) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType)

Aggregations

JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)1 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)1 Cigar (htsjdk.samtools.Cigar)1 QueryInterval (htsjdk.samtools.QueryInterval)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SamReader (htsjdk.samtools.SamReader)1 SamReaderFactory (htsjdk.samtools.SamReaderFactory)1 Genotype (htsjdk.variant.variantcontext.Genotype)1 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)1 StructuralVariantType (htsjdk.variant.variantcontext.StructuralVariantType)1 VariantContext (htsjdk.variant.variantcontext.VariantContext)1 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)1 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)1 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)1 VCFHeader (htsjdk.variant.vcf.VCFHeader)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1