Search in sources :

Example 31 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class FindAllCoverageAtPosition method scan.

private void scan(final BufferedReader in, final Set<Mutation> mutations) throws Exception {
    final String DEFAULT_SAMPLE_NAME = "(undefined)";
    String line;
    while ((line = in.readLine()) != null) {
        if (out.checkError())
            break;
        if (line.isEmpty() || line.startsWith("#"))
            continue;
        File f = new File(line);
        if (!f.exists())
            continue;
        if (!f.isFile())
            continue;
        if (!f.canRead())
            continue;
        String filename = f.getName();
        if (filename.endsWith(".cram")) {
            LOG.warn("Sorry CRAM is not supported " + filename);
            continue;
        }
        if (!filename.endsWith(".bam"))
            continue;
        SamReader samReader = null;
        SAMRecordIterator iter = null;
        try {
            samReader = this.samReaderFactory.open(f);
            if (!samReader.hasIndex()) {
                LOG.warn("no index for " + f);
                continue;
            }
            final SAMFileHeader header = samReader.getFileHeader();
            for (final Mutation src : mutations) {
                final Map<String, CigarAndBases> sample2count = new TreeMap<String, CigarAndBases>();
                for (SAMReadGroupRecord rg : header.getReadGroups()) {
                    if (rg != null) {
                        String sn = this.groupBy.apply(rg);
                        if (sn != null && !sn.trim().isEmpty()) {
                            sample2count.put(sn, new CigarAndBases());
                        }
                    }
                }
                if (sample2count.isEmpty()) {
                    sample2count.put(DEFAULT_SAMPLE_NAME, new CigarAndBases());
                }
                final Mutation m = convertFromSamHeader(f, header, src);
                if (m == null)
                    continue;
                iter = samReader.query(m.chrom, m.pos - 1, m.pos + 1, false);
                while (iter.hasNext()) {
                    final SAMRecord rec = iter.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (this.filter.filterOut(rec))
                        continue;
                    final Cigar cigar = rec.getCigar();
                    if (cigar == null)
                        continue;
                    final String readString = rec.getReadString().toUpperCase();
                    String sampleName = DEFAULT_SAMPLE_NAME;
                    final SAMReadGroupRecord rg = rec.getReadGroup();
                    if (rg != null) {
                        String sn = groupBy.apply(rg);
                        if (!StringUtil.isBlank(sn)) {
                            sampleName = sn;
                        }
                    }
                    CigarAndBases counter = sample2count.get(sampleName);
                    if (counter == null) {
                        counter = new CigarAndBases();
                        sample2count.put(sampleName, counter);
                    }
                    int ref = rec.getUnclippedStart();
                    int readPos = 0;
                    for (int k = 0; k < cigar.numCigarElements() && ref < m.pos + 1; ++k) {
                        final CigarElement ce = cigar.getCigarElement(k);
                        final CigarOperator op = ce.getOperator();
                        switch(op) {
                            case P:
                                break;
                            case I:
                                {
                                    if (ref == m.pos) {
                                        counter.operators.incr(op);
                                        counter.bases.incr(INSERTION_CHAR);
                                    }
                                    readPos += ce.getLength();
                                    break;
                                }
                            case D:
                            case N:
                            case M:
                            case X:
                            case EQ:
                            case H:
                            case S:
                                {
                                    for (int i = 0; i < ce.getLength(); ++i) {
                                        if (ref == m.pos) {
                                            counter.operators.incr(op);
                                            switch(op) {
                                                case M:
                                                case X:
                                                case EQ:
                                                    counter.bases.incr(readString.charAt(readPos));
                                                    break;
                                                case D:
                                                case N:
                                                    counter.bases.incr(DELETION_CHAR);
                                                    break;
                                                default:
                                                    break;
                                            }
                                            break;
                                        }
                                        if (op.consumesReadBases())
                                            ++readPos;
                                        ref++;
                                    }
                                    break;
                                }
                            default:
                                throw new RuntimeException("unknown operator:" + op);
                        }
                    }
                }
                iter.close();
                iter = null;
                for (final String sample : sample2count.keySet()) {
                    final CigarAndBases counter = sample2count.get(sample);
                    out.print(f);
                    out.print('\t');
                    out.print(m.chrom);
                    out.print('\t');
                    out.print(m.pos);
                    if (this.indexedFastaSequenceFile != null) {
                        out.print('\t');
                        out.print(getReferenceAt(m.chrom, m.pos));
                    }
                    out.print('\t');
                    out.print(sample);
                    out.print('\t');
                    out.print(counter.operators.count(CigarOperator.M) + counter.operators.count(CigarOperator.EQ) + counter.operators.count(CigarOperator.X));
                    for (final CigarOperator op : CigarOperator.values()) {
                        out.print('\t');
                        out.print(counter.operators.count(op));
                    }
                    for (char c : BASES_To_PRINT) {
                        out.print('\t');
                        out.print(counter.bases.count(c));
                    }
                    out.println();
                }
            }
        // end of loop over mutations
        } catch (final Exception err) {
            LOG.error(err);
            throw err;
        } finally {
            CloserUtil.close(iter);
            CloserUtil.close(samReader);
        }
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) CigarOperator(htsjdk.samtools.CigarOperator) TreeMap(java.util.TreeMap) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 32 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class FixVcfMissingGenotypes method fetchDP.

/**
 * return DP at given position
 */
private int fetchDP(final VariantContext ctx, final String sample, List<SamReader> samReaders) {
    int depth = 0;
    if (samReaders == null)
        samReaders = Collections.emptyList();
    for (final SamReader sr : samReaders) {
        final SAMRecordIterator iter = sr.query(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false);
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (rec.getReadUnmappedFlag())
                continue;
            if (filter.filterOut(rec))
                continue;
            final SAMReadGroupRecord rg = rec.getReadGroup();
            if (!sample.equals(rg.getSample()))
                continue;
            final Cigar cigar = rec.getCigar();
            if (cigar == null)
                continue;
            int refPos = rec.getAlignmentStart();
            for (final CigarElement ce : cigar.getCigarElements()) {
                if (refPos > ctx.getEnd())
                    break;
                if (!ce.getOperator().consumesReferenceBases())
                    continue;
                if (ce.getOperator().consumesReadBases()) {
                    for (int n = 0; n < ce.getLength(); ++n) {
                        if (refPos + n < ctx.getStart())
                            continue;
                        if (refPos + n > ctx.getEnd())
                            break;
                        depth++;
                    }
                }
                refPos += ce.getLength();
            }
        }
        iter.close();
    }
    depth /= (1 + ctx.getEnd() - ctx.getStart());
    return depth;
}
Also used : SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) CigarElement(htsjdk.samtools.CigarElement)

Example 33 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class GBrowserHtml method doWork.

@Override
public int doWork(List<String> args) {
    final int DEFAULT_EXTEND_INTERVAL = 0;
    SamReader samReader = null;
    ZipOutputStream zout = null;
    BufferedReader bufReader = null;
    PrintWriter paramsWriter = null;
    JsonWriter paramsJsonWriter = null;
    String line;
    IndexedFastaSequenceFile faidx = null;
    long snapshot_id = 0L;
    final String inputName = oneFileOrNull(args);
    try {
        final SamJsonWriterFactory samJsonWriterFactory = SamJsonWriterFactory.newInstance().printHeader(true).printAttributes(false).printMate(true).printReadQualities(false).closeStreamAtEnd(false);
        if (this.outputFile != null) {
            if (!this.outputFile.getName().endsWith(".zip")) {
                LOG.error("Output file should end with *.zip");
                return -1;
            }
        }
        bufReader = (inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName));
        final File tmpFile1 = File.createTempFile("gbrowse.", ".tmp");
        tmpFile1.deleteOnExit();
        final File tmpFile2 = File.createTempFile("gbrowse.", ".tmp");
        tmpFile2.deleteOnExit();
        paramsWriter = new PrintWriter(tmpFile2);
        paramsWriter.print("var config=");
        paramsWriter.flush();
        paramsJsonWriter = new JsonWriter(paramsWriter);
        paramsJsonWriter.beginArray();
        zout = new ZipOutputStream(super.openFileOrStdoutAsStream(this.outputFile));
        File bamFile = null;
        File faidxFile = null;
        String sampleName = null;
        int extend_interval = DEFAULT_EXTEND_INTERVAL;
        String title = null;
        Interval interval = null;
        while ((line = bufReader.readLine()) != null) {
            LOG.info(line);
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final int eq = line.indexOf("=");
            final String key = (eq == -1 ? "" : line.substring(0, eq).toLowerCase().trim());
            final String value = (eq == -1 ? "" : line.substring(eq + 1).trim());
            if (key.equals("bam")) {
                if (samReader != null)
                    samReader.close();
                samReader = null;
                bamFile = (value.isEmpty() ? null : new File(value));
            } else if (key.equals("sample")) {
                sampleName = (value.isEmpty() ? null : value);
            } else if (key.equals("title")) {
                title = (value.isEmpty() ? null : value);
            } else if (key.equals("ref") || key.equals("fasta")) {
                if (faidx != null)
                    faidx.close();
                faidx = null;
                faidxFile = (value.isEmpty() ? null : new File(value));
            } else if (key.equals("extend")) {
                extend_interval = (value.isEmpty() ? DEFAULT_EXTEND_INTERVAL : Integer.parseInt(value));
            } else if (key.equals("position") || key.equals("location") || key.equals("interval") || key.equals("goto")) {
                Pattern pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)");
                Matcher matcher = pat1.matcher(value);
                if (matcher.matches()) {
                    String c = matcher.group(1);
                    int pos = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
                    interval = new Interval(c, Math.max(1, pos - extend_interval), pos + extend_interval);
                    continue;
                }
                pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\-([\\d,]+)");
                matcher = pat1.matcher(value);
                if (matcher.matches()) {
                    String c = matcher.group(1);
                    int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
                    int E = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
                    if (B > E) {
                        LOG.error("bad interval :" + line);
                        return -1;
                    }
                    interval = new Interval(c, Math.max(1, B - extend_interval), E + extend_interval);
                    continue;
                }
                pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\+([\\d,]+)");
                matcher = pat1.matcher(value);
                if (matcher.matches()) {
                    String c = matcher.group(1);
                    int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
                    int x = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
                    interval = new Interval(c, Math.max(1, B - (x + extend_interval)), B + (x + extend_interval));
                    continue;
                }
                LOG.error("bad interval :" + line);
                return -1;
            } else if (line.toLowerCase().equals("snapshot")) {
                if (interval == null) {
                    LOG.error("No interval defined!");
                    continue;
                }
                if (bamFile == null) {
                    LOG.error("No BAM file defined!");
                    continue;
                }
                ++snapshot_id;
                LOG.info("open samFile " + bamFile);
                if (samReader == null) {
                    samReader = super.createSamReaderFactory().open(bamFile);
                }
                FileWriter jsonFileWriter = new FileWriter(tmpFile1);
                JsonWriter jsw = new JsonWriter(jsonFileWriter);
                jsw.beginObject();
                jsw.name("interval");
                jsw.beginObject();
                jsw.name("contig");
                jsw.value(interval.getContig());
                jsw.name("start");
                jsw.value(interval.getStart());
                jsw.name("end");
                jsw.value(interval.getEnd());
                jsw.endObject();
                if (faidxFile != null) {
                    if (faidx == null) {
                        faidx = new IndexedFastaSequenceFile(faidxFile);
                    }
                    ReferenceSequence dna = faidx.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
                    jsw.name("reference");
                    jsw.value(dna.getBaseString());
                }
                jsw.name("sam");
                SAMFileWriter samFileWriter = samJsonWriterFactory.open(samReader.getFileHeader(), jsw);
                SAMRecordIterator samRecIter = samReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
                while (samRecIter.hasNext()) {
                    final SAMRecord rec = samRecIter.next();
                    if (sampleName != null && !sampleName.isEmpty()) {
                        SAMReadGroupRecord srg = rec.getReadGroup();
                        if (srg == null)
                            continue;
                        if (!sampleName.equals(srg.getSample()))
                            continue;
                    }
                    if (rec.getReadUnmappedFlag())
                        continue;
                    samFileWriter.addAlignment(rec);
                }
                samRecIter.close();
                samFileWriter.close();
                jsw.endObject();
                jsw.flush();
                jsw.close();
                jsonFileWriter.close();
                ZipEntry entry = new ZipEntry(this.prefix + "_snapshot." + String.format("%05d", snapshot_id) + ".json");
                zout.putNextEntry(entry);
                IOUtils.copyTo(tmpFile1, zout);
                zout.closeEntry();
                tmpFile1.delete();
                paramsJsonWriter.beginObject();
                paramsJsonWriter.name("title");
                paramsJsonWriter.value(title == null ? interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + (sampleName == null ? "" : " " + sampleName) : title);
                paramsJsonWriter.name("interval");
                paramsJsonWriter.beginObject();
                paramsJsonWriter.name("contig");
                paramsJsonWriter.value(interval.getContig());
                paramsJsonWriter.name("start");
                paramsJsonWriter.value(interval.getStart());
                paramsJsonWriter.name("end");
                paramsJsonWriter.value(interval.getEnd());
                paramsJsonWriter.endObject();
                if (sampleName != null && !sampleName.isEmpty()) {
                    paramsJsonWriter.name("sample");
                    paramsJsonWriter.value(sampleName);
                }
                paramsJsonWriter.name("href");
                paramsJsonWriter.value("_snapshot." + String.format("%05d", snapshot_id) + ".json");
                paramsJsonWriter.endObject();
            } else if (line.toLowerCase().equals("exit") || line.toLowerCase().equals("quit")) {
                break;
            }
        }
        bufReader.close();
        bufReader = null;
        for (final String jssrc : new String[] { "gbrowse", "hershey", "samtools", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
            InputStream is = this.getClass().getResourceAsStream("/META-INF/js/" + jssrc + ".js");
            if (is == null) {
                LOG.error("Cannot read resource /META-INF/js/" + jssrc + ".js");
                return -1;
            }
            ZipEntry entry = new ZipEntry(this.prefix + jssrc + ".js");
            entry.setComment("JAVASCRIPT SOURCE " + jssrc);
            zout.putNextEntry(entry);
            IOUtils.copyTo(is, zout);
            CloserUtil.close(is);
            zout.closeEntry();
        }
        // save params.js
        paramsJsonWriter.endArray();
        paramsJsonWriter.flush();
        paramsWriter.println(";");
        paramsWriter.flush();
        paramsJsonWriter.close();
        paramsWriter.close();
        zout.putNextEntry(new ZipEntry(this.prefix + "config.js"));
        IOUtils.copyTo(tmpFile2, zout);
        zout.closeEntry();
        tmpFile2.delete();
        // save index.html
        zout.putNextEntry(new ZipEntry(this.prefix + "index.html"));
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        XMLStreamWriter w = xof.createXMLStreamWriter(zout, "UTF-8");
        w.writeStartElement("html");
        w.writeStartElement("head");
        w.writeEmptyElement("meta");
        w.writeAttribute("http-equiv", "Content-Type");
        w.writeAttribute("content", "text/html; charset=utf-8");
        w.writeEmptyElement("meta");
        w.writeAttribute("http-equiv", "author");
        w.writeAttribute("content", "Pierre Lindenbaum Phd @yokofakun");
        w.writeStartElement("title");
        w.writeCharacters(getProgramName() + ":" + getVersion());
        // title
        w.writeEndElement();
        w.writeStartElement("style");
        w.writeAttribute("type", "text/css");
        w.writeCharacters("body	{ color:rgb(50,50,50); margin:20px; padding:20px; font: 12pt Arial, Helvetica, sans-serif; }\n");
        w.writeCharacters("label { text-align:right;	 }\n");
        w.writeCharacters("button	{ border: 1px solid; background-image:-moz-linear-gradient( top, gray, lightgray ); }\n");
        w.writeCharacters("canvas { image-rendering:auto;}\n");
        w.writeCharacters(".me 	{ padding-top:100px; font-size:80%; }\n");
        // style
        w.writeEndElement();
        for (final String src : new String[] { "samtools", "gbrowse", "hershey", "config", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
            w.writeStartElement("script");
            w.writeAttribute("type", "text/javascript");
            w.writeAttribute("language", "text/javascript");
            w.writeAttribute("src", src + ".js");
            w.writeCharacters("");
            // script
            w.writeEndElement();
        }
        // head
        w.writeEndElement();
        w.writeStartElement("body");
        w.writeStartElement("div");
        w.writeStartElement("button");
        w.writeAttribute("onclick", "changemenu(-1)");
        w.writeCharacters("prev");
        // button
        w.writeEndElement();
        w.writeStartElement("select");
        w.writeAttribute("id", "menu");
        // menu
        w.writeEndElement();
        w.writeStartElement("button");
        w.writeAttribute("onclick", "changemenu(+1)");
        w.writeCharacters("next");
        // button
        w.writeEndElement();
        // div
        w.writeEndElement();
        w.writeStartElement("div");
        w.writeAttribute("id", "flags");
        // div
        w.writeEndElement();
        w.writeStartElement("div");
        w.writeAttribute("style", "text-align:center;");
        w.writeStartElement("div");
        w.writeAttribute("style", "font-size:200%;margin:10px;");
        w.writeAttribute("id", "browserTitle");
        // div
        w.writeEndElement();
        w.writeStartElement("div");
        w.writeEmptyElement("canvas");
        w.writeAttribute("id", "canvasdoc");
        w.writeAttribute("width", "100");
        w.writeAttribute("height", "100");
        // div
        w.writeEndElement();
        // div
        w.writeEndElement();
        w.writeEmptyElement("hr");
        w.writeStartElement("div");
        w.writeAttribute("class", "me");
        w.writeCharacters("Pierre Lindenbaum PhD. ");
        w.writeStartElement("a");
        w.writeAttribute("href", "https://github.com/lindenb/jvarkit");
        w.writeCharacters("https://github.com/lindenb/jvarkit");
        w.writeEndElement();
        w.writeCharacters(". Tested with Firefox 45.0");
        w.writeEndElement();
        // body
        w.writeEndElement();
        // html
        w.writeEndElement();
        w.flush();
        w.close();
        w = null;
        zout.closeEntry();
        zout.finish();
        zout.close();
        return RETURN_OK;
    } catch (Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(paramsJsonWriter);
        CloserUtil.close(paramsWriter);
        CloserUtil.close(zout);
        CloserUtil.close(bufReader);
        CloserUtil.close(samReader);
        CloserUtil.close(faidx);
    }
}
Also used : XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Matcher(java.util.regex.Matcher) SamJsonWriterFactory(com.github.lindenb.jvarkit.util.samtools.SamJsonWriterFactory) FileWriter(java.io.FileWriter) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ZipEntry(java.util.zip.ZipEntry) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) PrintWriter(java.io.PrintWriter) Pattern(java.util.regex.Pattern) SAMFileWriter(htsjdk.samtools.SAMFileWriter) InputStream(java.io.InputStream) JsonWriter(com.google.gson.stream.JsonWriter) ZipOutputStream(java.util.zip.ZipOutputStream) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Interval(htsjdk.samtools.util.Interval)

Example 34 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class LumpyMoreSamples method doWork.

@Override
public int doWork(final List<String> args) {
    VcfIterator r = null;
    VariantContextWriter vcw = null;
    final Map<String, SamReader> sample2samreaders = new HashMap<>();
    try {
        r = super.openVcfIterator(oneFileOrNull(args));
        final VCFHeader headerIn = r.getHeader();
        final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
        if (dict == null) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
            return -1;
        }
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
            if (F.trim().isEmpty())
                return;
            final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
            final SAMFileHeader samHeader = sr.getFileHeader();
            final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
            if (dict2 == null) {
                throw new JvarkitException.BamDictionaryMissing(F);
            }
            if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
                throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
            }
            for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
                final String sample = rg.getSample();
                if (StringUtil.isBlank(sample))
                    continue;
                final SamReader reader = sample2samreaders.get(sample);
                if (reader == null) {
                    sample2samreaders.put(sample, reader);
                } else if (reader == sr) {
                    continue;
                } else {
                    throw new JvarkitException.UserError("more than one sample per bam:" + F);
                }
            }
        });
        final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
        final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
        outVcfSampleNames.addAll(sample2samreaders.keySet());
        final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
        final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
        final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
        final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
        headerOut.addMetaDataLine(SU2);
        headerOut.addMetaDataLine(PE2);
        headerOut.addMetaDataLine(SR2);
        vcw = super.openVariantContextWriter(this.outputFile);
        vcw.writeHeader(headerOut);
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            final StructuralVariantType sttype = ctx.getStructuralVariantType();
            if (sttype == null)
                continue;
            final int tid = dict.getSequenceIndex(ctx.getContig());
            final Map<String, Genotype> genotypeMap = new HashMap<>();
            ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
            for (final String sample : sample2samreaders.keySet()) {
                final SamReader samReader = sample2samreaders.get(sample);
                final SupportingReads sr = new SupportingReads();
                switch(sttype) {
                    case DEL:
                        {
                            int pos = ctx.getStart();
                            int[] ci = confidenceIntervalPos(ctx);
                            final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
                            int end = ctx.getEnd();
                            ci = confidenceIntervalEnd(ctx);
                            final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
                            for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
                                final Cigar cigar = rec.getCigar();
                                if (cigar.isLeftClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
                                    if (qi.overlaps(left)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                                if (cigar.isRightClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
                                    if (qi.overlaps(right)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                            }
                            break;
                        }
                    default:
                        break;
                }
                final GenotypeBuilder gb;
                if (genotypeMap.containsKey(sample)) {
                    gb = new GenotypeBuilder(genotypeMap.get(sample));
                } else {
                    gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                }
                gb.attribute(SR2.getID(), sr.splitReads);
                gb.attribute(PE2.getID(), sr.pairedReads);
                gb.attribute(SU2.getID(), 0);
                genotypeMap.put(sample, gb.make());
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // add missing samples.
            for (final String sampleName : outVcfSampleNames) {
                if (genotypeMap.containsKey(sampleName))
                    continue;
                genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
            }
            vcb.genotypes(genotypeMap.values());
            vcw.add(vcb.make());
        }
        r.close();
        r = null;
        sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
        LOG.info("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContext(htsjdk.variant.variantcontext.VariantContext) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType)

Example 35 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class LumpyMoreSamples method extractSupportingReads.

private List<SAMRecord> extractSupportingReads(final VariantContext ctx, final String sample, final SamReader reader, QueryInterval[] intervals) {
    intervals = QueryInterval.optimizeIntervals(intervals);
    final List<SAMRecord> L = new ArrayList<>();
    final CloseableIterator<SAMRecord> iter = reader.query(intervals, false);
    while (iter.hasNext()) {
        final SAMRecord rec = iter.next();
        if (rec.getReadUnmappedFlag())
            continue;
        if (rec.getDuplicateReadFlag())
            continue;
        if (rec.getCigar() == null || rec.getCigar().isEmpty())
            continue;
        final SAMReadGroupRecord rg = rec.getReadGroup();
        if (rg == null)
            continue;
        if (!sample.equals(rg.getSample()))
            continue;
        L.add(rec);
    }
    iter.close();
    return L;
}
Also used : SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)81 SAMFileHeader (htsjdk.samtools.SAMFileHeader)48 SAMRecord (htsjdk.samtools.SAMRecord)33 Test (org.testng.annotations.Test)31 SamReader (htsjdk.samtools.SamReader)29 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)26 File (java.io.File)23 ArrayList (java.util.ArrayList)22 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)20 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)20 HashMap (java.util.HashMap)18 CigarElement (htsjdk.samtools.CigarElement)17 Cigar (htsjdk.samtools.Cigar)16 HashSet (java.util.HashSet)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 CigarOperator (htsjdk.samtools.CigarOperator)14 IOException (java.io.IOException)14 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)13 List (java.util.List)12