Search in sources :

Example 86 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class LumpyMoreSamples method doWork.

@Override
public int doWork(final List<String> args) {
    VcfIterator r = null;
    VariantContextWriter vcw = null;
    final Map<String, SamReader> sample2samreaders = new HashMap<>();
    try {
        r = super.openVcfIterator(oneFileOrNull(args));
        final VCFHeader headerIn = r.getHeader();
        final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
        if (dict == null) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
            return -1;
        }
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
            if (F.trim().isEmpty())
                return;
            final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
            final SAMFileHeader samHeader = sr.getFileHeader();
            final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
            if (dict2 == null) {
                throw new JvarkitException.BamDictionaryMissing(F);
            }
            if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
                throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
            }
            for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
                final String sample = rg.getSample();
                if (StringUtil.isBlank(sample))
                    continue;
                final SamReader reader = sample2samreaders.get(sample);
                if (reader == null) {
                    sample2samreaders.put(sample, reader);
                } else if (reader == sr) {
                    continue;
                } else {
                    throw new JvarkitException.UserError("more than one sample per bam:" + F);
                }
            }
        });
        final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
        final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
        outVcfSampleNames.addAll(sample2samreaders.keySet());
        final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
        final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
        final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
        final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
        headerOut.addMetaDataLine(SU2);
        headerOut.addMetaDataLine(PE2);
        headerOut.addMetaDataLine(SR2);
        vcw = super.openVariantContextWriter(this.outputFile);
        vcw.writeHeader(headerOut);
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            final StructuralVariantType sttype = ctx.getStructuralVariantType();
            if (sttype == null)
                continue;
            final int tid = dict.getSequenceIndex(ctx.getContig());
            final Map<String, Genotype> genotypeMap = new HashMap<>();
            ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
            for (final String sample : sample2samreaders.keySet()) {
                final SamReader samReader = sample2samreaders.get(sample);
                final SupportingReads sr = new SupportingReads();
                switch(sttype) {
                    case DEL:
                        {
                            int pos = ctx.getStart();
                            int[] ci = confidenceIntervalPos(ctx);
                            final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
                            int end = ctx.getEnd();
                            ci = confidenceIntervalEnd(ctx);
                            final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
                            for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
                                final Cigar cigar = rec.getCigar();
                                if (cigar.isLeftClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
                                    if (qi.overlaps(left)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                                if (cigar.isRightClipped()) {
                                    final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
                                    if (qi.overlaps(right)) {
                                        sr.splitReads++;
                                        if (rec.getReadPairedFlag())
                                            sr.pairedReads++;
                                    }
                                }
                            }
                            break;
                        }
                    default:
                        break;
                }
                final GenotypeBuilder gb;
                if (genotypeMap.containsKey(sample)) {
                    gb = new GenotypeBuilder(genotypeMap.get(sample));
                } else {
                    gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                }
                gb.attribute(SR2.getID(), sr.splitReads);
                gb.attribute(PE2.getID(), sr.pairedReads);
                gb.attribute(SU2.getID(), 0);
                genotypeMap.put(sample, gb.make());
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // add missing samples.
            for (final String sampleName : outVcfSampleNames) {
                if (genotypeMap.containsKey(sampleName))
                    continue;
                genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
            }
            vcb.genotypes(genotypeMap.values());
            vcw.add(vcb.make());
        }
        r.close();
        r = null;
        sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
        LOG.info("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContext(htsjdk.variant.variantcontext.VariantContext) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType)

Example 87 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BWAMemDigest method doWork.

@Override
public int doWork(List<String> args) {
    IntervalTreeMap<Boolean> ignore = null;
    DefaultMemOuput output = new DefaultMemOuput();
    final float limitcigar = 0.15f;
    SamReader r = null;
    try {
        r = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = r.getFileHeader();
        if (IGNORE_BED != null) {
            LOG.info("open " + IGNORE_BED);
            ignore = new IntervalTreeMap<>();
            BufferedReader in = IOUtils.openFileForBufferedReading(IGNORE_BED);
            String line;
            while ((line = in.readLine()) != null) {
                if (line.isEmpty() || line.startsWith("#"))
                    continue;
                String[] tokens = line.split("[\t]");
                if (tokens.length < 3)
                    continue;
                if (ignore.put(new Interval(tokens[0], Math.max(1, Integer.parseInt(tokens[1]) - (1 + IGNORE_EXTEND)), Integer.parseInt(tokens[2]) + IGNORE_EXTEND), Boolean.TRUE)) {
                    LOG.warn("BED:ignoring " + line);
                }
            }
            in.close();
        }
        OtherCanonicalAlignFactory xPalignFactory = new OtherCanonicalAlignFactory(header);
        SAMRecordIterator iter = r.iterator();
        long readNum = 0L;
        while (iter.hasNext()) {
            SAMRecord record = iter.next();
            ++readNum;
            if (!record.getReadPairedFlag())
                continue;
            if (record.getProperPairFlag())
                continue;
            if (record.getReadFailsVendorQualityCheckFlag())
                continue;
            if (record.getDuplicateReadFlag())
                continue;
            if (record.getReadUnmappedFlag())
                continue;
            if (ignore != null && ignore.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
                LOG.info("ignore " + record);
                continue;
            }
            float countM = 0f;
            float countS = 0f;
            for (CigarElement c : record.getCigar().getCigarElements()) {
                switch(c.getOperator()) {
                    case M:
                        countM += c.getLength();
                        break;
                    case S:
                        countS += c.getLength();
                        break;
                    default:
                        break;
                }
            }
            if (countM > 10 && ((countS / countM) > (1f - limitcigar) && (countS / countM) < (1f + limitcigar))) {
                output.insertion(record, readNum, countS, countM);
            }
            for (OtherCanonicalAlign xp : xPalignFactory.getXPAligns(record)) {
                if (ignore != null && ignore.containsOverlapping(new Interval(xp.getReferenceName(), xp.getAlignmentStart(), xp.getAlignmentStart()))) {
                    LOG.info("ignore " + record);
                    continue;
                }
                output.xp(record, readNum, xp);
            }
            if (record.getMateUnmappedFlag()) {
                output.orphan(record, readNum);
                continue;
            }
            if (ignore != null && ignore.containsOverlapping(new Interval(record.getMateReferenceName(), record.getMateAlignmentStart(), record.getMateAlignmentStart()))) {
                LOG.info("ignore " + record);
                continue;
            }
            if (record.getReferenceIndex() == record.getMateReferenceIndex()) {
                output.deletion(record, readNum);
            } else {
                output.transloc(record, readNum);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(output);
    }
    return 0;
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) OtherCanonicalAlign(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign) OtherCanonicalAlignFactory(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 88 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamClipToInsertion method doWork.

@Override
public int doWork(final List<String> args) {
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        if (header1.getSortOrder() != SortOrder.coordinate) {
            LOG.error("Input is not sorted on coordinate.");
            return -1;
        }
        final SAMFileHeader header2 = header1.clone();
        header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
        header2.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = sfr.iterator();
        String curContig = null;
        LinkedList<SamAndCigar> buffer = new LinkedList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
                // ignore unmapped reads
                if (rec.getReadUnmappedFlag()) {
                    sfw.addAlignment(rec);
                    continue;
                }
            }
            if (rec == null || (curContig != null && !curContig.equals(rec.getReferenceName()))) {
                for (final SamAndCigar r : buffer) sfw.addAlignment(r.getSAMRecord());
                buffer.clear();
                // we're done
                if (rec == null)
                    break;
                curContig = rec.getReferenceName();
            }
            final SamAndCigar sac = new SamAndCigar(rec);
            if (!sac.containsIorS) {
                sfw.addAlignment(rec);
                continue;
            }
            buffer.add(sac);
            int i = 0;
            while (i < buffer.size()) {
                final SamAndCigar ri = buffer.get(i);
                if (ri.getSAMRecord().getUnclippedEnd() < rec.getUnclippedStart()) {
                    for (int j = 0; j < buffer.size(); ++j) {
                        if (i == j)
                            continue;
                        ri.merge(buffer.get(j));
                    }
                    sfw.addAlignment(ri.build());
                    buffer.remove(i);
                } else {
                    ++i;
                }
            }
        }
        progress.finish();
        LOG.info("done");
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) LinkedList(java.util.LinkedList)

Example 89 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamTile method doWork.

@Override
public int doWork(final List<String> args) {
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        if (header1.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            LOG.error("File header not sorted on coordinate");
            return -1;
        }
        final SAMFileHeader header2 = header1.clone();
        header2.addComment("BamTile:" + getVersion() + ":" + getProgramCommandLine());
        sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header2, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
        iter = sfr.iterator();
        final LinkedList<SAMRecord> buffer = new LinkedList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.filterOut.filterOut(rec))
                    continue;
                if (!buffer.isEmpty()) {
                    final SAMRecord last = buffer.getLast();
                    if (this.no_overlap) {
                        if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getEnd() >= rec.getStart()) {
                            continue;
                        }
                    } else {
                        if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getAlignmentStart() <= rec.getAlignmentStart() && last.getAlignmentEnd() >= rec.getAlignmentEnd()) {
                            continue;
                        }
                    }
                }
            }
            if (rec == null || (!buffer.isEmpty() && buffer.getLast().getReferenceIndex() != rec.getReferenceIndex())) {
                while (!buffer.isEmpty()) {
                    sfw.addAlignment(buffer.removeFirst());
                }
                if (rec == null)
                    break;
            }
            buffer.add(rec);
            if (!this.no_overlap && buffer.size() > 2) {
                final int index = buffer.size();
                final SAMRecord prev = buffer.get(index - 3);
                final SAMRecord curr = buffer.get(index - 2);
                final SAMRecord next = buffer.get(index - 1);
                if (prev.getAlignmentEnd() >= next.getAlignmentStart() || curr.getAlignmentEnd() <= prev.getAlignmentEnd()) {
                    buffer.remove(index - 2);
                } else if (curr.getAlignmentStart() == prev.getAlignmentStart() && curr.getAlignmentEnd() > prev.getAlignmentEnd()) {
                    buffer.remove(index - 3);
                }
            }
            while (buffer.size() > 3) {
                sfw.addAlignment(buffer.removeFirst());
            }
        }
        progress.finish();
        sfw.close();
        sfw = null;
        iter.close();
        iter = null;
        sfr.close();
        sfr = null;
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) LinkedList(java.util.LinkedList)

Example 90 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamToSql method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidxFile == null) {
        LOG.error("ref sequence faidx not defined");
        return -1;
    }
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    PrintWriter out = null;
    GenomicSequence genomicSequence = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    args = new ArrayList<String>(IOUtils.unrollFiles(args));
    try {
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
        out.println("CREATE TABLE IF NOT EXISTS SamFile");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("filename TEXT");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Dictionary");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("length INT NOT NULL,");
        out.println("tid INT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("groupId TEXT NOT NULL,");
        out.println("sample TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Read");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("flag INTEGER NOT NULL,");
        if (this.printflag) {
            for (final SAMFlag flg : SAMFlag.values()) {
                out.println(flg.name() + " INTEGER NOT NULL,");
            }
        }
        out.println("rname TEXT,");
        out.println("pos INTEGER,");
        out.println("mapq INTEGER NOT NULL,");
        out.println("cigar TEXT,");
        out.println("rnext TEXT,");
        out.println("pnext INTEGER,");
        out.println("tlen INTEGER,");
        out.println("sequence TEXT NOT NULL,");
        out.println("qualities TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("group_id INT,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
        out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Cigar");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("read_pos INT ,");
        out.println("read_base TEXT,");
        out.println("read_qual INT ,");
        out.println("ref_pos INT ,");
        out.println("ref_base TEXT,");
        out.println("operator TEXT NOT NULL,");
        out.println("read_id INT NOT NULL,");
        out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
        out.println(");");
        out.println("begin transaction;");
        int samIndex = 0;
        do {
            final String inputName;
            if (samIndex == 0 && args.isEmpty()) {
                sfr = openSamReader(null);
                inputName = "<stdin>";
            } else {
                inputName = args.get(samIndex);
                sfr = openSamReader(inputName);
            }
            final SAMFileHeader header1 = sfr.getFileHeader();
            if (header1 == null) {
                throw new JvarkitException.FileFormatError("File header missing");
            }
            final SAMSequenceDictionary dict = header1.getSequenceDictionary();
            if (dict == null) {
                throw new JvarkitException.DictionaryMissing("No Dictionary in input");
            }
            final IntervalParser intervalParser = new IntervalParser(dict);
            final Interval userInterval;
            iter = null;
            if (this.regionStr == null || this.regionStr.isEmpty()) {
                LOG.warn("You're currently scanning the whole BAM ???!!!");
                iter = sfr.iterator();
                userInterval = null;
            } else {
                userInterval = intervalParser.parse(this.regionStr);
                if (userInterval == null) {
                    throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
                }
                iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
            }
            out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
            for (int i = 0; i < dict.size(); ++i) {
                final SAMSequenceRecord ssr = dict.getSequence(i);
                out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
            }
            for (final SAMReadGroupRecord g : header1.getReadGroups()) {
                out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                final StringBuilder sql = new StringBuilder();
                sql.append("insert into Read(" + "name,flag,");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.name()).append(",");
                    }
                }
                sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
                sql.append(quote(rec.getReadName())).append(",");
                sql.append(rec.getFlags()).append(",");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
                        sql.append(",");
                    }
                }
                if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getReferenceName()));
                    sql.append(",");
                    sql.append(rec.getAlignmentStart());
                }
                sql.append(",");
                sql.append(rec.getMappingQuality());
                sql.append(",");
                // cigar
                if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
                    sql.append("NULL");
                } else {
                    sql.append(quote(rec.getCigarString()));
                }
                sql.append(",");
                // rnext
                if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getMateReferenceName()));
                    sql.append(",");
                    sql.append(rec.getMateAlignmentStart());
                }
                sql.append(",");
                // tlen
                sql.append(rec.getInferredInsertSize());
                sql.append(",");
                // sequence
                sql.append(quote(rec.getReadString()));
                sql.append(",");
                // qualities
                sql.append(quote(rec.getBaseQualityString()));
                sql.append(",");
                if (rec.getReadGroup() == null) {
                    sql.append("NULL");
                } else {
                    sql.append("G.id");
                }
                sql.append(",F.id FROM SamFile as F");
                if (rec.getReadGroup() != null) {
                    sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
                }
                sql.append("  ORDER BY F.id DESC LIMIT 1;");
                out.println(sql.toString());
                if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                    if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                        genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
                    }
                    int ref = rec.getUnclippedStart();
                    final byte[] bases = rec.getReadBases();
                    final byte[] quals = rec.getBaseQualities();
                    int read = 0;
                    for (final CigarElement ce : rec.getCigar()) {
                        final CigarOperator op = ce.getOperator();
                        if (op.equals(CigarOperator.P))
                            continue;
                        for (int i = 0; i < ce.getLength(); ++i) {
                            sql.setLength(0);
                            boolean in_user_interval = true;
                            sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
                            sql.append("select '");
                            sql.append(op.name());
                            sql.append("',");
                            if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
                                in_user_interval = false;
                            }
                            switch(op) {
                                case I:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append("NULL,NULL");
                                        read++;
                                        break;
                                    }
                                case D:
                                case N:
                                case // yes H (hard clip)
                                H:
                                    {
                                        sql.append("NULL,NULL,NULL,");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        break;
                                    }
                                case M:
                                case X:
                                case EQ:
                                case // yes S, soft clip
                                S:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        read++;
                                        break;
                                    }
                                default:
                                    throw new IllegalStateException();
                            }
                            sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
                            if (in_user_interval)
                                out.println(sql.toString());
                        }
                    }
                }
            }
            iter.close();
            iter = null;
            sfr.close();
            sfr = null;
            progress.finish();
            samIndex++;
        } while (samIndex < args.size());
        out.println("COMMIT;");
        out.flush();
        out.close();
        LOG.info("done");
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(out);
        CloserUtil.close(indexedFastaSequenceFile);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) PrintWriter(java.io.PrintWriter) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFlag(htsjdk.samtools.SAMFlag) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26