Search in sources :

Example 81 with CigarElement

use of htsjdk.samtools.CigarElement in project jvarkit by lindenb.

the class SamFindClippedRegions method doWork.

/*private static boolean closeTo(int pos1,int pos2, int max)
		{
		return Math.abs(pos2-pos1)<=max;
		}*/
/*
	private static boolean same(char c1,char c2)
		{
		if(c1=='N' || c2=='N') return false;
		return Character.toUpperCase(c1)==Character.toUpperCase(c2);
		}*/
@Override
public int doWork(List<String> args) {
    int readLength = 150;
    if (args.isEmpty()) {
        LOG.error("illegal.number.of.arguments");
        return -1;
    }
    List<Input> inputs = new ArrayList<Input>();
    VariantContextWriter w = null;
    // SAMFileWriter w=null;
    try {
        SAMSequenceDictionary dict = null;
        /* create input, collect sample names */
        Map<String, Input> sample2input = new HashMap<String, Input>();
        for (final String filename : args) {
            Input input = new Input(new File(filename));
            // input.index=inputs.size();
            inputs.add(input);
            if (sample2input.containsKey(input.sampleName)) {
                LOG.error("Duplicate sample " + input.sampleName + " in " + input.bamFile + " and " + sample2input.get(input.sampleName).bamFile);
                return -1;
            }
            sample2input.put(input.sampleName, input);
            if (dict == null) {
                dict = input.header.getSequenceDictionary();
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, input.header.getSequenceDictionary())) {
                LOG.error("Found more than one dictint sequence dictionary");
                return -1;
            }
        }
        LOG.info("Sample N= " + sample2input.size());
        /* create merged iterator */
        List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(sample2input.size());
        for (Input input : inputs) headers.add(input.header);
        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.coordinate, headers, true);
        List<SamReader> readers = new ArrayList<SamReader>(sample2input.size());
        for (Input input : inputs) readers.add(input.samFileReaderScan);
        MergingSamRecordIterator merginIter = new MergingSamRecordIterator(headerMerger, readers, true);
        Allele reference_allele = Allele.create("N", true);
        Allele[] alternate_alleles = new Allele[] { Allele.create("<CLIP5>", false), Allele.create("<CLIP3>", false) };
        Set<VCFHeaderLine> vcfHeaderLines = new HashSet<VCFHeaderLine>();
        for (Allele alt : alternate_alleles) {
            vcfHeaderLines.add(new VCFSimpleHeaderLine("<ID=" + alt.getDisplayString() + ",Description=\"StructVar\">", VCFHeaderVersion.VCF4_1, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description")));
        }
        vcfHeaderLines.add(new VCFInfoHeaderLine("COUNT_SAMPLES", 1, VCFHeaderLineType.Integer, "Number of samples with  depth>=" + this.min_depth));
        vcfHeaderLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth."));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth"));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        for (int side = 0; side < 2; ++side) {
            vcfHeaderLines.add(new VCFFormatHeaderLine("CN" + (side == 0 ? 5 : 3), 1, VCFHeaderLineType.Integer, "count clipped in " + (side == 0 ? 5 : 3) + "'"));
        }
        if (dict != null) {
            vcfHeaderLines.addAll(VCFUtils.samSequenceDictToVCFContigHeaderLine(dict));
        }
        VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines, sample2input.keySet());
        w = VCFUtils.createVariantContextWriterToStdout();
        w.writeHeader(vcfHeader);
        final IntervalTreeMap<Boolean> intervals = new IntervalTreeMap<>();
        // w=swf.make(header, System.out);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        if (bedFile != null) {
            final BedLineCodec bedLineCodec = new BedLineCodec();
            LOG.info("Reading " + bedFile);
            BufferedReader r = IOUtils.openFileForBufferedReading(bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                BedLine bedLine = bedLineCodec.decode(line);
                if (bedLine == null)
                    continue;
                if (dict != null && dict.getSequence(bedLine.getContig()) == null) {
                    LOG.warning("undefined chromosome  in " + bedFile + " " + line);
                    continue;
                }
                intervals.put(bedLine.toInterval(), true);
            }
            CloserUtil.close(r);
        }
        LinkedList<SAMRecord> buffer = new LinkedList<SAMRecord>();
        final Predicate<SAMRecord> filterSamRecords = new Predicate<SAMRecord>() {

            @Override
            public boolean test(SAMRecord rec) {
                if (rec.getReadUnmappedFlag())
                    return false;
                if (rec.isSecondaryOrSupplementary())
                    return false;
                if (rec.getDuplicateReadFlag())
                    return false;
                if (rec.getReadFailsVendorQualityCheckFlag())
                    return false;
                Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.numCigarElements() < 2)
                    return false;
                boolean found_S = false;
                for (int side = 0; side < 2; ++side) {
                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                    // read must be clipped on 5' or 3' with a good length
                    if (!ce.getOperator().equals(CigarOperator.S))
                        continue;
                    found_S = true;
                    break;
                }
                if (!found_S)
                    return false;
                SAMReadGroupRecord g = rec.getReadGroup();
                if (g == null || g.getSample() == null || g.getSample().isEmpty())
                    return false;
                return true;
            }
        };
        final FilteringIterator<SAMRecord> forwardIterator = new FilteringIterator<SAMRecord>(merginIter, filterSamRecords);
        for (; ; ) {
            SAMRecord rec = null;
            if (forwardIterator.hasNext()) {
                rec = forwardIterator.next();
                progress.watch(rec);
                if (intervals != null && !intervals.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd())))
                    continue;
            }
            // need to flush buffer ?
            if (rec == null || (!buffer.isEmpty() && !buffer.getLast().getReferenceIndex().equals(rec.getReferenceIndex())) || (!buffer.isEmpty() && buffer.getLast().getUnclippedEnd() + readLength < rec.getUnclippedStart())) {
                if (!buffer.isEmpty()) {
                    int chromStart = buffer.getFirst().getUnclippedStart();
                    int chromEnd = buffer.getFirst().getUnclippedEnd();
                    for (SAMRecord sam : buffer) {
                        chromStart = Math.min(chromStart, sam.getUnclippedStart());
                        chromEnd = Math.max(chromEnd, sam.getUnclippedEnd());
                    }
                    final int winShift = 5;
                    for (int pos = chromStart; pos + winShift <= chromEnd; pos += winShift) {
                        int[] count_big_clip = new int[] { 0, 0 };
                        // int max_depth[]=new int[]{0,0};
                        List<Genotype> genotypes = new ArrayList<Genotype>();
                        Set<Allele> all_alleles = new HashSet<Allele>();
                        all_alleles.add(reference_allele);
                        boolean found_one_depth_ok = false;
                        int sum_depth = 0;
                        int samples_with_high_depth = 0;
                        for (String sample : sample2input.keySet()) {
                            GenotypeBuilder gb = new GenotypeBuilder(sample);
                            int[] count_clipped = new int[] { 0, 0 };
                            Set<Allele> sample_alleles = new HashSet<Allele>(3);
                            for (int side = 0; side < 2; ++side) {
                                for (SAMRecord sam : buffer) {
                                    if (!sam.getReadGroup().getSample().equals(sample))
                                        continue;
                                    Cigar cigar = sam.getCigar();
                                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                                    if (!ce.getOperator().equals(CigarOperator.S))
                                        continue;
                                    int clipStart = (side == 0 ? sam.getUnclippedStart() : sam.getAlignmentEnd() + 1);
                                    int clipEnd = (side == 0 ? sam.getAlignmentStart() - 1 : sam.getUnclippedEnd());
                                    if ((pos + winShift < clipStart || pos > clipEnd))
                                        continue;
                                    count_clipped[side]++;
                                    if (ce.getLength() >= this.min_clip_length) {
                                        count_big_clip[side]++;
                                    }
                                    sample_alleles.add(alternate_alleles[side]);
                                    gb.attribute("CN" + (side == 0 ? 5 : 3), count_clipped[side]);
                                }
                            }
                            // if(!(found_one_big_clip[0] || found_one_big_clip[1])) continue;
                            if (count_clipped[0] + count_clipped[1] == 0)
                                continue;
                            if ((count_clipped[0] + count_clipped[1]) > min_depth) {
                                found_one_depth_ok = true;
                                ++samples_with_high_depth;
                            }
                            sum_depth += (count_clipped[0] + count_clipped[1]);
                            gb.alleles(new ArrayList<Allele>(sample_alleles));
                            all_alleles.addAll(sample_alleles);
                            gb.DP(count_clipped[0] + count_clipped[1]);
                            genotypes.add(gb.make());
                        }
                        if (all_alleles.size() == 1) {
                            // all homozygotes
                            continue;
                        }
                        if (!found_one_depth_ok) {
                            continue;
                        }
                        if (!(count_big_clip[0] >= 1 || count_big_clip[1] >= 1)) {
                            continue;
                        }
                        Map<String, Object> atts = new HashMap<String, Object>();
                        atts.put("COUNT_SAMPLES", samples_with_high_depth);
                        atts.put(VCFConstants.DEPTH_KEY, sum_depth);
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(buffer.getFirst().getReferenceName());
                        vcb.start(pos);
                        vcb.stop(pos + winShift);
                        vcb.alleles(all_alleles);
                        vcb.attributes(atts);
                        vcb.genotypes(genotypes);
                        w.add(vcb.make());
                    }
                    buffer.clear();
                }
                if (rec == null) {
                    break;
                }
            }
            buffer.add(rec);
        }
        merginIter.close();
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (Input input : inputs) {
            CloserUtil.close(input);
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VCFSimpleHeaderLine(htsjdk.variant.vcf.VCFSimpleHeaderLine) Predicate(java.util.function.Predicate) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) LinkedList(java.util.LinkedList) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) Interval(htsjdk.samtools.util.Interval) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap)

Example 82 with CigarElement

use of htsjdk.samtools.CigarElement in project jvarkit by lindenb.

the class ReadClipper method clip.

public SAMRecord clip(final SAMRecord rec, final Interval fragment) {
    if (rec.getReadUnmappedFlag()) {
        return rec;
    }
    if (!fragment.getContig().equals(rec.getContig())) {
        return rec;
    }
    if (rec.getAlignmentEnd() < fragment.getStart()) {
        return rec;
    }
    if (rec.getAlignmentStart() > fragment.getEnd()) {
        return rec;
    }
    Cigar cigar = rec.getCigar();
    if (cigar == null) {
        LOG.warning("cigar missing in " + rec);
        return rec;
    }
    final List<BaseOp> bases = new ArrayList<>(cigar.getCigarElements().stream().mapToInt(C -> C.getLength()).sum());
    // expand cigar
    int refPos = rec.getUnclippedStart();
    for (final CigarElement ce : cigar.getCigarElements()) {
        final CigarOperator op = ce.getOperator();
        if (op.equals(CigarOperator.P))
            continue;
        for (int x = 0; x < ce.getLength(); ++x) {
            bases.add(new BaseOp(op, op.consumesReferenceBases() || op.isClipping() ? refPos : -1));
            if (op.consumesReferenceBases() || op.isClipping()) {
                refPos++;
            }
        }
    }
    /* 5' side */
    int newStart = rec.getAlignmentStart();
    int x = 0;
    while (x < bases.size()) {
        final BaseOp b = bases.get(x);
        if (b.refPos != -1 && b.isMatch() && b.refPos >= fragment.getStart()) {
            newStart = b.refPos;
            break;
        } else if (b.isDeletion()) {
            bases.remove(x);
            continue;
        } else if (!b.op.isClipping()) {
            b.op = CigarOperator.S;
            ++x;
        } else {
            ++x;
        }
    }
    /* 3' side */
    x = bases.size() - 1;
    while (x >= 0) {
        final BaseOp b = bases.get(x);
        if (b.refPos != -1 && b.isMatch() && b.refPos <= fragment.getEnd()) {
            break;
        } else if (b.isDeletion()) {
            bases.remove(x);
            --x;
            continue;
        } else if (!b.op.isClipping()) {
            b.op = CigarOperator.S;
            --x;
        } else {
            --x;
        }
    }
    // build new cigar
    boolean found_M = false;
    final List<CigarElement> newcigarlist = new ArrayList<>();
    x = 0;
    while (x < bases.size()) {
        final CigarOperator op = bases.get(x).op;
        if (op.equals(CigarOperator.M) || op.equals(CigarOperator.EQ)) {
            found_M = true;
        }
        int x2 = x;
        while (x2 < bases.size() && op.equals(bases.get(x2).op)) {
            ++x2;
        }
        newcigarlist.add(new CigarElement(x2 - x, op));
        x = x2;
    }
    if (!found_M) {
        SAMUtils.makeReadUnmappedWithOriginalTags(rec);
        if (this.programGroup != null) {
            rec.setAttribute("PG", programGroup);
        }
        return rec;
    }
    cigar = new Cigar(newcigarlist);
    rec.setCigar(cigar);
    rec.setAlignmentStart(newStart);
    if (this.programGroup != null) {
        rec.setAttribute("PG", programGroup);
    }
    return rec;
}
Also used : Cigar(htsjdk.samtools.Cigar) ArrayList(java.util.ArrayList) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement)

Example 83 with CigarElement

use of htsjdk.samtools.CigarElement in project jvarkit by lindenb.

the class SamSlop method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    if (this.defaultQual.length() != 1) {
        LOG.error("default quality should have length==1 " + this.defaultQual);
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    final char defaultQUAL = this.defaultQual.charAt(0);
    try {
        final String inputName = oneFileOrNull(args);
        LOG.info("Loading reference");
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(inputName);
        final SAMFileHeader header = sfr.getFileHeader();
        header.setSortOrder(SortOrder.unsorted);
        sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Cigar cigar = rec.getCigar();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
                sfw.addAlignment(rec);
                continue;
            }
            final StringBuilder sbs = new StringBuilder(rec.getReadString());
            final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
            final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
            final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
            if (!this.removeClip) {
                // replace clip S/H by match M
                for (int i = 0; i < cl.size(); ++i) {
                    final CigarElement ce = cl.get(i);
                    if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
                        cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
                    }
                }
            }
            if (this.extend5 > 0 && refPos1 > 1) {
                if (this.removeClip) {
                    // /remove hard + soft clip 5'
                    while (!cl.isEmpty()) {
                        // first
                        final CigarElement ce = cl.get(0);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.replace(0, ce.getLength(), "");
                            sbq.replace(0, ce.getLength(), "");
                        }
                        // remove first
                        cl.remove(0);
                    }
                }
                final StringBuilder prefix = new StringBuilder(this.extend5);
                // /append + soft clip 5'
                for (int i = 0; i < this.extend5; ++i) {
                    int x1 = (refPos1 - 1) - i;
                    // break if out of genome
                    if (x1 < 1)
                        break;
                    prefix.insert(0, genomicSequence.charAt(x1 - 1));
                }
                sbs.insert(0, prefix.toString());
                // preprend quality
                for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
                // prepend cigar
                cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
                // update start pos
                rec.setAlignmentStart(refPos1 - prefix.length());
            }
            if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
                if (this.removeClip) {
                    // /remove hard + soft clip 3'
                    while (!cl.isEmpty()) {
                        // last
                        final CigarElement ce = cl.get(cl.size() - 1);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.setLength(sbs.length() - ce.getLength());
                            sbq.setLength(sbq.length() - ce.getLength());
                        }
                        // remove last
                        cl.remove(cl.size() - 1);
                    }
                }
                int extend = 0;
                for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
                    sbs.append(genomicSequence.charAt(pos1 - 1));
                    sbq.append(defaultQUAL);
                    ++extend;
                }
                // append cigar
                cl.add(new CigarElement(extend, CigarOperator.M));
            }
            // simplify cigar
            int idx = 0;
            while (idx + 1 < cl.size()) {
                final CigarElement ce1 = cl.get(idx);
                final CigarElement ce2 = cl.get(idx + 1);
                if (ce1.getOperator() == ce2.getOperator()) {
                    cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
                    cl.remove(idx + 1);
                } else {
                    idx++;
                }
            }
            rec.setCigar(new Cigar(cl));
            rec.setReadString(sbs.toString());
            rec.setBaseQualityString(sbq.toString());
            final List<SAMValidationError> errors = rec.isValid();
            if (errors != null && !errors.isEmpty()) {
                for (SAMValidationError err : errors) {
                    LOG.error(err.getMessage());
                }
            }
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 84 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.

the class ReadThreadingGraph method mergeDanglingHead.

/**
     * Actually merge the dangling head if possible
     *
     * @param danglingHeadMergeResult   the result from generating a Cigar for the dangling head against the reference
     * @return 1 if merge was successful, 0 otherwise
     */
@VisibleForTesting
int mergeDanglingHead(final DanglingChainMergeHelper danglingHeadMergeResult) {
    final List<CigarElement> elements = danglingHeadMergeResult.cigar.getCigarElements();
    final CigarElement firstElement = elements.get(0);
    Utils.validateArg(firstElement.getOperator() == CigarOperator.M, "The first Cigar element must be an M");
    final int indexesToMerge = bestPrefixMatch(danglingHeadMergeResult.referencePathString, danglingHeadMergeResult.danglingPathString, firstElement.getLength());
    if (indexesToMerge <= 0) {
        return 0;
    }
    // we can't push back the reference path
    if (indexesToMerge >= danglingHeadMergeResult.referencePath.size() - 1) {
        return 0;
    }
    // but we can manipulate the dangling path if we need to
    if (indexesToMerge >= danglingHeadMergeResult.danglingPath.size() && !extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge - danglingHeadMergeResult.danglingPath.size() + 2)) {
        return 0;
    }
    addEdge(danglingHeadMergeResult.referencePath.get(indexesToMerge + 1), danglingHeadMergeResult.danglingPath.get(indexesToMerge), ((MyEdgeFactory) getEdgeFactory()).createEdge(false, 1));
    return 1;
}
Also used : CigarElement(htsjdk.samtools.CigarElement) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 85 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class EventMap method processCigarForInitialEvents.

protected void processCigarForInitialEvents() {
    final Cigar cigar = haplotype.getCigar();
    final byte[] alignment = haplotype.getBases();
    int refPos = haplotype.getAlignmentStartHapwrtRef();
    if (refPos < 0) {
        return;
    }
    // Protection against SW failures
    final List<VariantContext> proposedEvents = new ArrayList<>();
    int alignmentPos = 0;
    for (int cigarIndex = 0; cigarIndex < cigar.numCigarElements(); cigarIndex++) {
        final CigarElement ce = cigar.getCigarElement(cigarIndex);
        final int elementLength = ce.getLength();
        switch(ce.getOperator()) {
            case I:
                {
                    if (refPos > 0) {
                        // protect against trying to create insertions/deletions at the beginning of a contig
                        final List<Allele> insertionAlleles = new ArrayList<>();
                        final int insertionStart = refLoc.getStart() + refPos - 1;
                        final byte refByte = ref[refPos - 1];
                        if (BaseUtils.isRegularBase(refByte)) {
                            insertionAlleles.add(Allele.create(refByte, true));
                        }
                        if (cigarIndex == 0 || cigarIndex == cigar.numCigarElements() - 1) {
                        // if the insertion isn't completely resolved in the haplotype, skip it
                        // note this used to emit SYMBOLIC_UNASSEMBLED_EVENT_ALLELE but that seems dangerous
                        } else {
                            byte[] insertionBases = {};
                            // add the padding base
                            insertionBases = ArrayUtils.add(insertionBases, ref[refPos - 1]);
                            insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange(alignment, alignmentPos, alignmentPos + elementLength));
                            if (BaseUtils.isAllRegularBases(insertionBases)) {
                                insertionAlleles.add(Allele.create(insertionBases, false));
                            }
                        }
                        if (insertionAlleles.size() == 2) {
                            // found a proper ref and alt allele
                            proposedEvents.add(new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
                        }
                    }
                    alignmentPos += elementLength;
                    break;
                }
            case S:
                {
                    alignmentPos += elementLength;
                    break;
                }
            case D:
                {
                    if (refPos > 0) {
                        // protect against trying to create insertions/deletions at the beginning of a contig
                        // add padding base
                        final byte[] deletionBases = Arrays.copyOfRange(ref, refPos - 1, refPos + elementLength);
                        final List<Allele> deletionAlleles = new ArrayList<>();
                        final int deletionStart = refLoc.getStart() + refPos - 1;
                        final byte refByte = ref[refPos - 1];
                        if (BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases)) {
                            deletionAlleles.add(Allele.create(deletionBases, true));
                            deletionAlleles.add(Allele.create(refByte, false));
                            proposedEvents.add(new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
                        }
                    }
                    refPos += elementLength;
                    break;
                }
            case M:
            case EQ:
            case X:
                {
                    for (int iii = 0; iii < elementLength; iii++) {
                        final byte refByte = ref[refPos];
                        final byte altByte = alignment[alignmentPos];
                        if (refByte != altByte) {
                            // SNP!
                            if (BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte)) {
                                final List<Allele> snpAlleles = new ArrayList<>();
                                snpAlleles.add(Allele.create(refByte, true));
                                snpAlleles.add(Allele.create(altByte, false));
                                proposedEvents.add(new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make());
                            }
                        }
                        refPos++;
                        alignmentPos++;
                    }
                    break;
                }
            case N:
            case H:
            case P:
            default:
                throw new GATKException("Unsupported cigar operator created during SW alignment: " + ce.getOperator());
        }
    }
    for (final VariantContext proposedEvent : proposedEvents) addVC(proposedEvent, true);
}
Also used : Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13