Search in sources :

Example 36 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class BamToSql method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidxFile == null) {
        LOG.error("ref sequence faidx not defined");
        return -1;
    }
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    PrintWriter out = null;
    GenomicSequence genomicSequence = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    args = new ArrayList<String>(IOUtils.unrollFiles(args));
    try {
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
        out.println("CREATE TABLE IF NOT EXISTS SamFile");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("filename TEXT");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Dictionary");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("length INT NOT NULL,");
        out.println("tid INT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("groupId TEXT NOT NULL,");
        out.println("sample TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Read");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("name TEXT NOT NULL,");
        out.println("flag INTEGER NOT NULL,");
        if (this.printflag) {
            for (final SAMFlag flg : SAMFlag.values()) {
                out.println(flg.name() + " INTEGER NOT NULL,");
            }
        }
        out.println("rname TEXT,");
        out.println("pos INTEGER,");
        out.println("mapq INTEGER NOT NULL,");
        out.println("cigar TEXT,");
        out.println("rnext TEXT,");
        out.println("pnext INTEGER,");
        out.println("tlen INTEGER,");
        out.println("sequence TEXT NOT NULL,");
        out.println("qualities TEXT NOT NULL,");
        out.println("samfile_id INT NOT NULL,");
        out.println("group_id INT,");
        out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
        out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
        out.println(");");
        out.println("CREATE TABLE IF NOT EXISTS Cigar");
        out.println("(");
        out.println("id INTEGER PRIMARY KEY,");
        out.println("read_pos INT ,");
        out.println("read_base TEXT,");
        out.println("read_qual INT ,");
        out.println("ref_pos INT ,");
        out.println("ref_base TEXT,");
        out.println("operator TEXT NOT NULL,");
        out.println("read_id INT NOT NULL,");
        out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
        out.println(");");
        out.println("begin transaction;");
        int samIndex = 0;
        do {
            final String inputName;
            if (samIndex == 0 && args.isEmpty()) {
                sfr = openSamReader(null);
                inputName = "<stdin>";
            } else {
                inputName = args.get(samIndex);
                sfr = openSamReader(inputName);
            }
            final SAMFileHeader header1 = sfr.getFileHeader();
            if (header1 == null) {
                throw new JvarkitException.FileFormatError("File header missing");
            }
            final SAMSequenceDictionary dict = header1.getSequenceDictionary();
            if (dict == null) {
                throw new JvarkitException.DictionaryMissing("No Dictionary in input");
            }
            final IntervalParser intervalParser = new IntervalParser(dict);
            final Interval userInterval;
            iter = null;
            if (this.regionStr == null || this.regionStr.isEmpty()) {
                LOG.warn("You're currently scanning the whole BAM ???!!!");
                iter = sfr.iterator();
                userInterval = null;
            } else {
                userInterval = intervalParser.parse(this.regionStr);
                if (userInterval == null) {
                    throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
                }
                iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
            }
            out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
            for (int i = 0; i < dict.size(); ++i) {
                final SAMSequenceRecord ssr = dict.getSequence(i);
                out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
            }
            for (final SAMReadGroupRecord g : header1.getReadGroups()) {
                out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                final StringBuilder sql = new StringBuilder();
                sql.append("insert into Read(" + "name,flag,");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.name()).append(",");
                    }
                }
                sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
                sql.append(quote(rec.getReadName())).append(",");
                sql.append(rec.getFlags()).append(",");
                if (this.printflag) {
                    for (final SAMFlag flg : SAMFlag.values()) {
                        sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
                        sql.append(",");
                    }
                }
                if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getReferenceName()));
                    sql.append(",");
                    sql.append(rec.getAlignmentStart());
                }
                sql.append(",");
                sql.append(rec.getMappingQuality());
                sql.append(",");
                // cigar
                if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
                    sql.append("NULL");
                } else {
                    sql.append(quote(rec.getCigarString()));
                }
                sql.append(",");
                // rnext
                if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
                    sql.append("NULL,NULL");
                } else {
                    sql.append(quote(rec.getMateReferenceName()));
                    sql.append(",");
                    sql.append(rec.getMateAlignmentStart());
                }
                sql.append(",");
                // tlen
                sql.append(rec.getInferredInsertSize());
                sql.append(",");
                // sequence
                sql.append(quote(rec.getReadString()));
                sql.append(",");
                // qualities
                sql.append(quote(rec.getBaseQualityString()));
                sql.append(",");
                if (rec.getReadGroup() == null) {
                    sql.append("NULL");
                } else {
                    sql.append("G.id");
                }
                sql.append(",F.id FROM SamFile as F");
                if (rec.getReadGroup() != null) {
                    sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
                }
                sql.append("  ORDER BY F.id DESC LIMIT 1;");
                out.println(sql.toString());
                if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                    if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                        genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
                    }
                    int ref = rec.getUnclippedStart();
                    final byte[] bases = rec.getReadBases();
                    final byte[] quals = rec.getBaseQualities();
                    int read = 0;
                    for (final CigarElement ce : rec.getCigar()) {
                        final CigarOperator op = ce.getOperator();
                        if (op.equals(CigarOperator.P))
                            continue;
                        for (int i = 0; i < ce.getLength(); ++i) {
                            sql.setLength(0);
                            boolean in_user_interval = true;
                            sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
                            sql.append("select '");
                            sql.append(op.name());
                            sql.append("',");
                            if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
                                in_user_interval = false;
                            }
                            switch(op) {
                                case I:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append("NULL,NULL");
                                        read++;
                                        break;
                                    }
                                case D:
                                case N:
                                case // yes H (hard clip)
                                H:
                                    {
                                        sql.append("NULL,NULL,NULL,");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        break;
                                    }
                                case M:
                                case X:
                                case EQ:
                                case // yes S, soft clip
                                S:
                                    {
                                        sql.append(read);
                                        sql.append(",");
                                        sql.append("'" + (char) bases[read] + "',");
                                        sql.append("" + quals[read] + "");
                                        sql.append(",");
                                        sql.append(ref);
                                        sql.append(",'");
                                        sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
                                        sql.append("'");
                                        ref++;
                                        read++;
                                        break;
                                    }
                                default:
                                    throw new IllegalStateException();
                            }
                            sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
                            if (in_user_interval)
                                out.println(sql.toString());
                        }
                    }
                }
            }
            iter.close();
            iter = null;
            sfr.close();
            sfr = null;
            progress.finish();
            samIndex++;
        } while (samIndex < args.size());
        out.println("COMMIT;");
        out.flush();
        out.close();
        LOG.info("done");
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(out);
        CloserUtil.close(indexedFastaSequenceFile);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) PrintWriter(java.io.PrintWriter) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFlag(htsjdk.samtools.SAMFlag) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 37 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class SamAddPI method doWork.

@Override
public int doWork(final List<String> args) {
    final Map<String, List<Integer>> rg2insertsize = new HashMap<>();
    SamReader sfr = null;
    SamReader sfrTmp = null;
    SAMFileWriter sfw = null;
    File tmpBam = null;
    SAMFileWriter tmpBamWriter = null;
    SAMFileWriter outWriter = null;
    CloseableIterator<SAMRecord> iter = null;
    CloseableIterator<SAMRecord> iterTmp = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            if (!overwrite_existing && rg.getPredictedMedianInsertSize() != null) {
                continue;
            }
            rg2insertsize.put(rg.getId(), new ArrayList<>(num_read_to_test < 1L ? 10000 : num_read_to_test));
        }
        tmpBam = File.createTempFile("__addpi", ".bam");
        tmpBamWriter = this.writingBamArgs.openSAMFileWriter(tmpBam, header, true);
        iter = sfr.iterator();
        int n_processed = 0;
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext() && (this.num_read_to_test < 0 || n_processed < this.num_read_to_test)) {
            final SAMRecord rec = progress.watch(iter.next());
            tmpBamWriter.addAlignment(rec);
            final SAMReadGroupRecord rg = rec.getReadGroup();
            final List<Integer> insertlist = rg2insertsize.get(rg.getId());
            if (insertlist == null)
                continue;
            if (rec.getReadUnmappedFlag())
                continue;
            if (!rec.getReadPairedFlag())
                continue;
            if (!rec.getFirstOfPairFlag())
                continue;
            if (rec.getMateUnmappedFlag())
                continue;
            if (this.samRecordFilter.filterOut(rec))
                continue;
            final int len = rec.getInferredInsertSize();
            if (len == 0)
                continue;
            insertlist.add(Math.abs(len));
            ++n_processed;
        }
        tmpBamWriter.close();
        tmpBamWriter = null;
        // reopen tmp file
        sfrTmp = super.createSamReaderFactory().open(tmpBam);
        iterTmp = sfrTmp.iterator();
        // update dMedianInsertSize
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            final List<Integer> insertlist = rg2insertsize.get(rg.getId());
            if (insertlist == null || insertlist.isEmpty())
                continue;
            rg.setPredictedMedianInsertSize((int) Percentile.median().evaluate(insertlist.stream().mapToDouble(I -> I.doubleValue())));
        }
        header.addComment("Processed with " + getClass().getSimpleName() + " " + getProgramCommandLine());
        outWriter = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        while (iterTmp.hasNext()) {
            outWriter.addAlignment(iterTmp.next());
        }
        iterTmp.close();
        iterTmp = null;
        sfrTmp.close();
        sfrTmp = null;
        tmpBam.delete();
        // finish writing original input
        while (iter.hasNext()) {
            outWriter.addAlignment(progress.watch(iter.next()));
        }
        progress.finish();
        iter.close();
        iter = null;
        sfr.close();
        sfr = null;
        outWriter.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(tmpBamWriter);
        if (tmpBam != null)
            tmpBam.delete();
        CloserUtil.close(outWriter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) List(java.util.List) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 38 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class SamFindClippedRegions method doWork.

/*private static boolean closeTo(int pos1,int pos2, int max)
		{
		return Math.abs(pos2-pos1)<=max;
		}*/
/*
	private static boolean same(char c1,char c2)
		{
		if(c1=='N' || c2=='N') return false;
		return Character.toUpperCase(c1)==Character.toUpperCase(c2);
		}*/
@Override
public int doWork(List<String> args) {
    int readLength = 150;
    if (args.isEmpty()) {
        LOG.error("illegal.number.of.arguments");
        return -1;
    }
    List<Input> inputs = new ArrayList<Input>();
    VariantContextWriter w = null;
    // SAMFileWriter w=null;
    try {
        SAMSequenceDictionary dict = null;
        /* create input, collect sample names */
        Map<String, Input> sample2input = new HashMap<String, Input>();
        for (final String filename : args) {
            Input input = new Input(new File(filename));
            // input.index=inputs.size();
            inputs.add(input);
            if (sample2input.containsKey(input.sampleName)) {
                LOG.error("Duplicate sample " + input.sampleName + " in " + input.bamFile + " and " + sample2input.get(input.sampleName).bamFile);
                return -1;
            }
            sample2input.put(input.sampleName, input);
            if (dict == null) {
                dict = input.header.getSequenceDictionary();
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, input.header.getSequenceDictionary())) {
                LOG.error("Found more than one dictint sequence dictionary");
                return -1;
            }
        }
        LOG.info("Sample N= " + sample2input.size());
        /* create merged iterator */
        List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(sample2input.size());
        for (Input input : inputs) headers.add(input.header);
        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.coordinate, headers, true);
        List<SamReader> readers = new ArrayList<SamReader>(sample2input.size());
        for (Input input : inputs) readers.add(input.samFileReaderScan);
        MergingSamRecordIterator merginIter = new MergingSamRecordIterator(headerMerger, readers, true);
        Allele reference_allele = Allele.create("N", true);
        Allele[] alternate_alleles = new Allele[] { Allele.create("<CLIP5>", false), Allele.create("<CLIP3>", false) };
        Set<VCFHeaderLine> vcfHeaderLines = new HashSet<VCFHeaderLine>();
        for (Allele alt : alternate_alleles) {
            vcfHeaderLines.add(new VCFSimpleHeaderLine("<ID=" + alt.getDisplayString() + ",Description=\"StructVar\">", VCFHeaderVersion.VCF4_1, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description")));
        }
        vcfHeaderLines.add(new VCFInfoHeaderLine("COUNT_SAMPLES", 1, VCFHeaderLineType.Integer, "Number of samples with  depth>=" + this.min_depth));
        vcfHeaderLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth."));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth"));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        for (int side = 0; side < 2; ++side) {
            vcfHeaderLines.add(new VCFFormatHeaderLine("CN" + (side == 0 ? 5 : 3), 1, VCFHeaderLineType.Integer, "count clipped in " + (side == 0 ? 5 : 3) + "'"));
        }
        if (dict != null) {
            vcfHeaderLines.addAll(VCFUtils.samSequenceDictToVCFContigHeaderLine(dict));
        }
        VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines, sample2input.keySet());
        w = VCFUtils.createVariantContextWriterToStdout();
        w.writeHeader(vcfHeader);
        final IntervalTreeMap<Boolean> intervals = new IntervalTreeMap<>();
        // w=swf.make(header, System.out);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        if (bedFile != null) {
            final BedLineCodec bedLineCodec = new BedLineCodec();
            LOG.info("Reading " + bedFile);
            BufferedReader r = IOUtils.openFileForBufferedReading(bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                BedLine bedLine = bedLineCodec.decode(line);
                if (bedLine == null)
                    continue;
                if (dict != null && dict.getSequence(bedLine.getContig()) == null) {
                    LOG.warning("undefined chromosome  in " + bedFile + " " + line);
                    continue;
                }
                intervals.put(bedLine.toInterval(), true);
            }
            CloserUtil.close(r);
        }
        LinkedList<SAMRecord> buffer = new LinkedList<SAMRecord>();
        final Predicate<SAMRecord> filterSamRecords = new Predicate<SAMRecord>() {

            @Override
            public boolean test(SAMRecord rec) {
                if (rec.getReadUnmappedFlag())
                    return false;
                if (rec.isSecondaryOrSupplementary())
                    return false;
                if (rec.getDuplicateReadFlag())
                    return false;
                if (rec.getReadFailsVendorQualityCheckFlag())
                    return false;
                Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.numCigarElements() < 2)
                    return false;
                boolean found_S = false;
                for (int side = 0; side < 2; ++side) {
                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                    // read must be clipped on 5' or 3' with a good length
                    if (!ce.getOperator().equals(CigarOperator.S))
                        continue;
                    found_S = true;
                    break;
                }
                if (!found_S)
                    return false;
                SAMReadGroupRecord g = rec.getReadGroup();
                if (g == null || g.getSample() == null || g.getSample().isEmpty())
                    return false;
                return true;
            }
        };
        final FilteringIterator<SAMRecord> forwardIterator = new FilteringIterator<SAMRecord>(merginIter, filterSamRecords);
        for (; ; ) {
            SAMRecord rec = null;
            if (forwardIterator.hasNext()) {
                rec = forwardIterator.next();
                progress.watch(rec);
                if (intervals != null && !intervals.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd())))
                    continue;
            }
            // need to flush buffer ?
            if (rec == null || (!buffer.isEmpty() && !buffer.getLast().getReferenceIndex().equals(rec.getReferenceIndex())) || (!buffer.isEmpty() && buffer.getLast().getUnclippedEnd() + readLength < rec.getUnclippedStart())) {
                if (!buffer.isEmpty()) {
                    int chromStart = buffer.getFirst().getUnclippedStart();
                    int chromEnd = buffer.getFirst().getUnclippedEnd();
                    for (SAMRecord sam : buffer) {
                        chromStart = Math.min(chromStart, sam.getUnclippedStart());
                        chromEnd = Math.max(chromEnd, sam.getUnclippedEnd());
                    }
                    final int winShift = 5;
                    for (int pos = chromStart; pos + winShift <= chromEnd; pos += winShift) {
                        int[] count_big_clip = new int[] { 0, 0 };
                        // int max_depth[]=new int[]{0,0};
                        List<Genotype> genotypes = new ArrayList<Genotype>();
                        Set<Allele> all_alleles = new HashSet<Allele>();
                        all_alleles.add(reference_allele);
                        boolean found_one_depth_ok = false;
                        int sum_depth = 0;
                        int samples_with_high_depth = 0;
                        for (String sample : sample2input.keySet()) {
                            GenotypeBuilder gb = new GenotypeBuilder(sample);
                            int[] count_clipped = new int[] { 0, 0 };
                            Set<Allele> sample_alleles = new HashSet<Allele>(3);
                            for (int side = 0; side < 2; ++side) {
                                for (SAMRecord sam : buffer) {
                                    if (!sam.getReadGroup().getSample().equals(sample))
                                        continue;
                                    Cigar cigar = sam.getCigar();
                                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                                    if (!ce.getOperator().equals(CigarOperator.S))
                                        continue;
                                    int clipStart = (side == 0 ? sam.getUnclippedStart() : sam.getAlignmentEnd() + 1);
                                    int clipEnd = (side == 0 ? sam.getAlignmentStart() - 1 : sam.getUnclippedEnd());
                                    if ((pos + winShift < clipStart || pos > clipEnd))
                                        continue;
                                    count_clipped[side]++;
                                    if (ce.getLength() >= this.min_clip_length) {
                                        count_big_clip[side]++;
                                    }
                                    sample_alleles.add(alternate_alleles[side]);
                                    gb.attribute("CN" + (side == 0 ? 5 : 3), count_clipped[side]);
                                }
                            }
                            // if(!(found_one_big_clip[0] || found_one_big_clip[1])) continue;
                            if (count_clipped[0] + count_clipped[1] == 0)
                                continue;
                            if ((count_clipped[0] + count_clipped[1]) > min_depth) {
                                found_one_depth_ok = true;
                                ++samples_with_high_depth;
                            }
                            sum_depth += (count_clipped[0] + count_clipped[1]);
                            gb.alleles(new ArrayList<Allele>(sample_alleles));
                            all_alleles.addAll(sample_alleles);
                            gb.DP(count_clipped[0] + count_clipped[1]);
                            genotypes.add(gb.make());
                        }
                        if (all_alleles.size() == 1) {
                            // all homozygotes
                            continue;
                        }
                        if (!found_one_depth_ok) {
                            continue;
                        }
                        if (!(count_big_clip[0] >= 1 || count_big_clip[1] >= 1)) {
                            continue;
                        }
                        Map<String, Object> atts = new HashMap<String, Object>();
                        atts.put("COUNT_SAMPLES", samples_with_high_depth);
                        atts.put(VCFConstants.DEPTH_KEY, sum_depth);
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(buffer.getFirst().getReferenceName());
                        vcb.start(pos);
                        vcb.stop(pos + winShift);
                        vcb.alleles(all_alleles);
                        vcb.attributes(atts);
                        vcb.genotypes(genotypes);
                        w.add(vcb.make());
                    }
                    buffer.clear();
                }
                if (rec == null) {
                    break;
                }
            }
            buffer.add(rec);
        }
        merginIter.close();
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (Input input : inputs) {
            CloserUtil.close(input);
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VCFSimpleHeaderLine(htsjdk.variant.vcf.VCFSimpleHeaderLine) Predicate(java.util.function.Predicate) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) LinkedList(java.util.LinkedList) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) Interval(htsjdk.samtools.util.Interval) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap)

Example 39 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class NgsFilesScanner method readBam.

@Override
protected void readBam(final File f) {
    if (!f.canRead())
        return;
    SamReader r = null;
    try {
        StringWriter sw = new StringWriter();
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        XMLStreamWriter out = xof.createXMLStreamWriter(new StreamResult(sw));
        out.writeStartElement("bam");
        writeFile(out, f);
        r = super.openSamReader(f.getPath());
        final SAMFileHeader h = r.getFileHeader();
        out.writeStartElement("samples");
        if (h != null && h.getReadGroups() != null) {
            Set<String> seen = new HashSet<String>();
            for (SAMReadGroupRecord rg : h.getReadGroups()) {
                String sample = rg.getSample();
                if (sample == null || sample.isEmpty() || seen.contains(sample))
                    continue;
                seen.add(sample);
                out.writeStartElement("sample");
                out.writeCharacters(sample);
                out.writeEndElement();
            }
        }
        out.writeEndElement();
        out.writeEndElement();
        out.flush();
        out.close();
        sw.flush();
        put(f, sw.toString());
    } catch (Exception e) {
        LOG.warning(e);
    } finally {
        CloserUtil.close(r);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) StringWriter(java.io.StringWriter) StreamResult(javax.xml.transform.stream.StreamResult) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 40 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class NgsFilesSummary method readBam.

@Override
protected void readBam(final File f) {
    if (!f.canRead())
        return;
    SamReader r = null;
    try {
        r = super.openSamReader(f.getPath());
        SAMFileHeader h = r.getFileHeader();
        if (h != null && h.getReadGroups() != null && h.getReadGroups().isEmpty()) {
            for (final SAMReadGroupRecord rg : h.getReadGroups()) {
                String sample = rg.getSample();
                if (StringUtil.isBlank(sample)) {
                    sample = "_NO_SAMPLE_RG_";
                }
                print(sample, InfoType.BAM, f);
            }
        } else {
            print("_NO_READ_GROUP_", InfoType.BAM, f);
        }
    } catch (final Exception e) {
        LOG.warning(e);
    } finally {
        CloserUtil.close(r);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IOException(java.io.IOException)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)81 SAMFileHeader (htsjdk.samtools.SAMFileHeader)48 SAMRecord (htsjdk.samtools.SAMRecord)33 Test (org.testng.annotations.Test)31 SamReader (htsjdk.samtools.SamReader)29 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)26 File (java.io.File)23 ArrayList (java.util.ArrayList)22 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)20 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)20 HashMap (java.util.HashMap)18 CigarElement (htsjdk.samtools.CigarElement)17 Cigar (htsjdk.samtools.Cigar)16 HashSet (java.util.HashSet)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 CigarOperator (htsjdk.samtools.CigarOperator)14 IOException (java.io.IOException)14 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)13 List (java.util.List)12