Search in sources :

Example 91 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SamAddPI method doWork.

@Override
public int doWork(final List<String> args) {
    final Map<String, List<Integer>> rg2insertsize = new HashMap<>();
    SamReader sfr = null;
    SamReader sfrTmp = null;
    SAMFileWriter sfw = null;
    File tmpBam = null;
    SAMFileWriter tmpBamWriter = null;
    SAMFileWriter outWriter = null;
    CloseableIterator<SAMRecord> iter = null;
    CloseableIterator<SAMRecord> iterTmp = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            if (!overwrite_existing && rg.getPredictedMedianInsertSize() != null) {
                continue;
            }
            rg2insertsize.put(rg.getId(), new ArrayList<>(num_read_to_test < 1L ? 10000 : num_read_to_test));
        }
        tmpBam = File.createTempFile("__addpi", ".bam");
        tmpBamWriter = this.writingBamArgs.openSAMFileWriter(tmpBam, header, true);
        iter = sfr.iterator();
        int n_processed = 0;
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext() && (this.num_read_to_test < 0 || n_processed < this.num_read_to_test)) {
            final SAMRecord rec = progress.watch(iter.next());
            tmpBamWriter.addAlignment(rec);
            final SAMReadGroupRecord rg = rec.getReadGroup();
            final List<Integer> insertlist = rg2insertsize.get(rg.getId());
            if (insertlist == null)
                continue;
            if (rec.getReadUnmappedFlag())
                continue;
            if (!rec.getReadPairedFlag())
                continue;
            if (!rec.getFirstOfPairFlag())
                continue;
            if (rec.getMateUnmappedFlag())
                continue;
            if (this.samRecordFilter.filterOut(rec))
                continue;
            final int len = rec.getInferredInsertSize();
            if (len == 0)
                continue;
            insertlist.add(Math.abs(len));
            ++n_processed;
        }
        tmpBamWriter.close();
        tmpBamWriter = null;
        // reopen tmp file
        sfrTmp = super.createSamReaderFactory().open(tmpBam);
        iterTmp = sfrTmp.iterator();
        // update dMedianInsertSize
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            final List<Integer> insertlist = rg2insertsize.get(rg.getId());
            if (insertlist == null || insertlist.isEmpty())
                continue;
            rg.setPredictedMedianInsertSize((int) Percentile.median().evaluate(insertlist.stream().mapToDouble(I -> I.doubleValue())));
        }
        header.addComment("Processed with " + getClass().getSimpleName() + " " + getProgramCommandLine());
        outWriter = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        while (iterTmp.hasNext()) {
            outWriter.addAlignment(iterTmp.next());
        }
        iterTmp.close();
        iterTmp = null;
        sfrTmp.close();
        sfrTmp = null;
        tmpBam.delete();
        // finish writing original input
        while (iter.hasNext()) {
            outWriter.addAlignment(progress.watch(iter.next()));
        }
        progress.finish();
        iter.close();
        iter = null;
        sfr.close();
        sfr = null;
        outWriter.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(tmpBamWriter);
        if (tmpBam != null)
            tmpBam.delete();
        CloserUtil.close(outWriter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) List(java.util.List) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 92 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SamClipIndelFraction method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader sfr = null;
    SAMRecordIterator iter = null;
    PrintWriter pw = null;
    try {
        sfr = openSamReader(oneFileOrNull(args));
        pw = super.openFileOrStdoutAsPrintWriter(outputFile);
        long total_bases_count = 0L;
        long count_clipped_reads = 0L;
        long count_clipped_left_reads = 0L;
        long count_clipped_right_reads = 0L;
        long count_unclipped_reads = 0L;
        long count_unmapped_reads = 0L;
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader()).logger(LOG);
        Counter<Integer> counter = new Counter<>();
        iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord record = progress.watch(iter.next());
            if (record.getReadUnmappedFlag()) {
                ++count_unmapped_reads;
                continue;
            }
            final Cigar cigar = record.getCigar();
            int left_clip_length = 0;
            int right_clip_length = 0;
            int deletion_N_length = 0;
            int deletion_D_length = 0;
            int insertion_length = 0;
            boolean onLeft = true;
            for (int i = 0; i < cigar.numCigarElements(); ++i) {
                final CigarElement ce = cigar.getCigarElement(i);
                final CigarOperator op = ce.getOperator();
                switch(op) {
                    case N:
                        {
                            onLeft = false;
                            deletion_D_length += ce.getLength();
                            total_bases_count += ce.getLength();
                            break;
                        }
                    case D:
                        {
                            onLeft = false;
                            deletion_N_length += ce.getLength();
                            total_bases_count += ce.getLength();
                            break;
                        }
                    case I:
                        {
                            onLeft = false;
                            insertion_length += ce.getLength();
                            total_bases_count += ce.getLength();
                            break;
                        }
                    case S:
                    case H:
                        {
                            if (onLeft) {
                                if (record.getReadNegativeStrandFlag()) {
                                    right_clip_length += ce.getLength();
                                } else {
                                    left_clip_length += ce.getLength();
                                }
                            } else {
                                if (record.getReadNegativeStrandFlag()) {
                                    left_clip_length += ce.getLength();
                                } else {
                                    right_clip_length += ce.getLength();
                                }
                            }
                            total_bases_count += ce.getLength();
                            break;
                        }
                    default:
                        {
                            onLeft = false;
                            if (op.consumesReadBases()) {
                                total_bases_count += ce.getLength();
                            }
                            break;
                        }
                }
            }
            if (left_clip_length + right_clip_length == 0) {
                count_unclipped_reads++;
            } else {
                if (left_clip_length > 0)
                    count_clipped_left_reads++;
                if (right_clip_length > 0)
                    count_clipped_right_reads++;
                count_clipped_reads++;
            }
            switch(type) {
                case leftclip:
                    counter.incr(left_clip_length);
                    break;
                case rightclip:
                    counter.incr(right_clip_length);
                    break;
                case allclip:
                    counter.incr(left_clip_length + right_clip_length);
                    break;
                case deletion:
                    counter.incr(deletion_D_length + deletion_N_length);
                    break;
                case insert:
                    counter.incr(insertion_length);
                    break;
                default:
                    LOG.error("Bad type: " + type);
                    return -1;
            }
        }
        progress.finish();
        pw.println("##UNMAPPED_READS=" + count_unmapped_reads);
        pw.println("##MAPPED_READS=" + (count_clipped_reads + count_unclipped_reads));
        pw.println("##CLIPPED_READS=" + count_clipped_reads);
        pw.println("##CLIPPED_READS_5_PRIME=" + count_clipped_left_reads);
        pw.println("##CLIPPED_READS_3_PRIME=" + count_clipped_right_reads);
        pw.println("##UNCLIPPED_READS=" + count_unclipped_reads);
        pw.println("##COUNT_BASES=" + total_bases_count);
        pw.print("#");
        switch(type) {
            case leftclip:
                pw.print("CLIP_5_PRIME");
                break;
            case rightclip:
                pw.print("CLIP_3_PRIME");
                break;
            case allclip:
                pw.print("CLIP");
                break;
            case deletion:
                pw.print("DELETION");
                break;
            case insert:
                pw.print("INSERTION");
                break;
            default:
                LOG.error("Bad type: " + type);
                return -1;
        }
        pw.println("\tCOUNT\tFRACTION_OF_MAPPED_READS");
        for (final Integer size : new TreeSet<Integer>(counter.keySet())) {
            pw.print(size);
            pw.print('\t');
            pw.print(counter.count(size));
            pw.print('\t');
            pw.println(counter.count(size) / (double) (count_unclipped_reads + count_unclipped_reads));
        }
        pw.flush();
        pw.close();
        pw = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(pw);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) Cigar(htsjdk.samtools.Cigar) TreeSet(java.util.TreeSet) SAMRecord(htsjdk.samtools.SAMRecord) PrintWriter(java.io.PrintWriter)

Example 93 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamLiftOver method doWork.

@Override
public int doWork(final List<String> args) {
    final double minMatch = (this.userMinMatch <= 0.0 ? LiftOver.DEFAULT_LIFTOVER_MINMATCH : this.userMinMatch);
    if (this.liftOverFile == null) {
        LOG.error("LiftOver file is undefined.");
        return -1;
    }
    if (this.faidx == null) {
        LOG.error("New Sequence Dictionary file is undefined.");
        return -1;
    }
    SAMRecordIterator iter = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        LOG.info("Reading " + liftOverFile);
        LiftOver liftOver = new LiftOver(liftOverFile);
        liftOver.setLiftOverMinMatch(minMatch);
        final SAMSequenceDictionary newDict = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
        sfr = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader headerIn = sfr.getFileHeader();
        final SAMFileHeader headerOut = headerIn.clone();
        headerOut.setSortOrder(SortOrder.unsorted);
        sfw = this.writingBamArgs.openSAMFileWriter(outputFile, headerOut, true);
        iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            final SAMRecord copy = (SAMRecord) rec.clone();
            copy.setHeader(headerOut);
            final StringBuilder sb = new StringBuilder();
            if (!rec.getReadUnmappedFlag()) {
                final String chrom = rec.getReferenceName();
                int pos = rec.getAlignmentStart();
                final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getReadNegativeStrandFlag(), null));
                if (interval != null) {
                    sb.append(chrom + ":" + pos + ":" + (rec.getReadNegativeStrandFlag() ? "-" : "+"));
                    final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
                    if (ssr == null) {
                        sfr.close();
                        sfr = null;
                        LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
                        return -1;
                    }
                    copy.setReferenceName(ssr.getSequenceName());
                    copy.setReferenceIndex(ssr.getSequenceIndex());
                    copy.setAlignmentStart(interval.getStart());
                    copy.setReadNegativeStrandFlag(interval.isNegativeStrand());
                    if (rec.getReadNegativeStrandFlag() != copy.getReadNegativeStrandFlag()) {
                        copy.setReadString(AcidNucleics.reverseComplement(rec.getReadString()));
                        byte[] qual = rec.getBaseQualities();
                        byte[] quals2 = new byte[qual.length];
                        for (int i = 0; i < qual.length; ++i) {
                            quals2[i] = qual[(qual.length - 1) - i];
                        }
                        copy.setBaseQualities(quals2);
                    }
                } else {
                    sb.append(".");
                    SAMUtils.makeReadUnmapped(copy);
                }
            }
            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                sb.append("/");
                String chrom = rec.getMateReferenceName();
                int pos = rec.getMateAlignmentStart();
                final Interval interval = liftOver.liftOver(new Interval(chrom, pos, pos, rec.getMateNegativeStrandFlag(), null));
                if (interval != null) {
                    sb.append(chrom + ":" + pos + ":" + (rec.getMateNegativeStrandFlag() ? "-" : "+"));
                    final SAMSequenceRecord ssr = newDict.getSequence(interval.getContig());
                    if (ssr == null) {
                        sfr.close();
                        sfr = null;
                        LOG.error("the chromosome " + interval.getContig() + " is undefined in the sequence dict.");
                        return -1;
                    }
                    copy.setMateReferenceName(ssr.getSequenceName());
                    copy.setMateReferenceIndex(ssr.getSequenceIndex());
                    copy.setMateAlignmentStart(interval.getStart());
                    copy.setMateNegativeStrandFlag(interval.isNegativeStrand());
                    if (!copy.getReadUnmappedFlag() && copy.getReferenceIndex() == copy.getMateReferenceIndex()) // && copy.getReadNegativeStrandFlag()!=copy.getMateNegativeStrandFlag()
                    {
                    // don't change ?
                    } else {
                        copy.setProperPairFlag(false);
                        copy.setInferredInsertSize(0);
                    }
                } else {
                    sb.append(".");
                    SAMUtils.makeReadUnmapped(copy);
                }
            }
            if (sb.length() > 0)
                copy.setAttribute("LO", sb.toString());
            sfw.addAlignment(copy);
        }
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 94 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SamExtractClip method doWork.

@Override
public int doWork(List<String> args) {
    SamReader r = null;
    BasicFastqWriter out = null;
    try {
        if (this.outputFile != null) {
            LOG.info("writing to " + this.outputFile);
            out = new BasicFastqWriter(this.outputFile);
        } else {
            LOG.info("writing to stdout");
            out = new BasicFastqWriter(stdout());
        }
        if (args.isEmpty()) {
            LOG.info("Reading from stdin");
            r = createSamReaderFactory().open(SamInputResource.of(stdin()));
            run(r, out);
            r.close();
        } else {
            for (final String filename : args) {
                LOG.info("Reading from " + filename);
                r = createSamReaderFactory().open(SamInputResource.of(filename));
                run(r, out);
                r.close();
                r = null;
            }
        }
        out.flush();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(out);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter)

Example 95 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SamFindClippedRegions method doWork.

/*private static boolean closeTo(int pos1,int pos2, int max)
		{
		return Math.abs(pos2-pos1)<=max;
		}*/
/*
	private static boolean same(char c1,char c2)
		{
		if(c1=='N' || c2=='N') return false;
		return Character.toUpperCase(c1)==Character.toUpperCase(c2);
		}*/
@Override
public int doWork(List<String> args) {
    int readLength = 150;
    if (args.isEmpty()) {
        LOG.error("illegal.number.of.arguments");
        return -1;
    }
    List<Input> inputs = new ArrayList<Input>();
    VariantContextWriter w = null;
    // SAMFileWriter w=null;
    try {
        SAMSequenceDictionary dict = null;
        /* create input, collect sample names */
        Map<String, Input> sample2input = new HashMap<String, Input>();
        for (final String filename : args) {
            Input input = new Input(new File(filename));
            // input.index=inputs.size();
            inputs.add(input);
            if (sample2input.containsKey(input.sampleName)) {
                LOG.error("Duplicate sample " + input.sampleName + " in " + input.bamFile + " and " + sample2input.get(input.sampleName).bamFile);
                return -1;
            }
            sample2input.put(input.sampleName, input);
            if (dict == null) {
                dict = input.header.getSequenceDictionary();
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, input.header.getSequenceDictionary())) {
                LOG.error("Found more than one dictint sequence dictionary");
                return -1;
            }
        }
        LOG.info("Sample N= " + sample2input.size());
        /* create merged iterator */
        List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(sample2input.size());
        for (Input input : inputs) headers.add(input.header);
        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.coordinate, headers, true);
        List<SamReader> readers = new ArrayList<SamReader>(sample2input.size());
        for (Input input : inputs) readers.add(input.samFileReaderScan);
        MergingSamRecordIterator merginIter = new MergingSamRecordIterator(headerMerger, readers, true);
        Allele reference_allele = Allele.create("N", true);
        Allele[] alternate_alleles = new Allele[] { Allele.create("<CLIP5>", false), Allele.create("<CLIP3>", false) };
        Set<VCFHeaderLine> vcfHeaderLines = new HashSet<VCFHeaderLine>();
        for (Allele alt : alternate_alleles) {
            vcfHeaderLines.add(new VCFSimpleHeaderLine("<ID=" + alt.getDisplayString() + ",Description=\"StructVar\">", VCFHeaderVersion.VCF4_1, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description")));
        }
        vcfHeaderLines.add(new VCFInfoHeaderLine("COUNT_SAMPLES", 1, VCFHeaderLineType.Integer, "Number of samples with  depth>=" + this.min_depth));
        vcfHeaderLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth."));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth"));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        for (int side = 0; side < 2; ++side) {
            vcfHeaderLines.add(new VCFFormatHeaderLine("CN" + (side == 0 ? 5 : 3), 1, VCFHeaderLineType.Integer, "count clipped in " + (side == 0 ? 5 : 3) + "'"));
        }
        if (dict != null) {
            vcfHeaderLines.addAll(VCFUtils.samSequenceDictToVCFContigHeaderLine(dict));
        }
        VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines, sample2input.keySet());
        w = VCFUtils.createVariantContextWriterToStdout();
        w.writeHeader(vcfHeader);
        final IntervalTreeMap<Boolean> intervals = new IntervalTreeMap<>();
        // w=swf.make(header, System.out);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        if (bedFile != null) {
            final BedLineCodec bedLineCodec = new BedLineCodec();
            LOG.info("Reading " + bedFile);
            BufferedReader r = IOUtils.openFileForBufferedReading(bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                BedLine bedLine = bedLineCodec.decode(line);
                if (bedLine == null)
                    continue;
                if (dict != null && dict.getSequence(bedLine.getContig()) == null) {
                    LOG.warning("undefined chromosome  in " + bedFile + " " + line);
                    continue;
                }
                intervals.put(bedLine.toInterval(), true);
            }
            CloserUtil.close(r);
        }
        LinkedList<SAMRecord> buffer = new LinkedList<SAMRecord>();
        final Predicate<SAMRecord> filterSamRecords = new Predicate<SAMRecord>() {

            @Override
            public boolean test(SAMRecord rec) {
                if (rec.getReadUnmappedFlag())
                    return false;
                if (rec.isSecondaryOrSupplementary())
                    return false;
                if (rec.getDuplicateReadFlag())
                    return false;
                if (rec.getReadFailsVendorQualityCheckFlag())
                    return false;
                Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.numCigarElements() < 2)
                    return false;
                boolean found_S = false;
                for (int side = 0; side < 2; ++side) {
                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                    // read must be clipped on 5' or 3' with a good length
                    if (!ce.getOperator().equals(CigarOperator.S))
                        continue;
                    found_S = true;
                    break;
                }
                if (!found_S)
                    return false;
                SAMReadGroupRecord g = rec.getReadGroup();
                if (g == null || g.getSample() == null || g.getSample().isEmpty())
                    return false;
                return true;
            }
        };
        final FilteringIterator<SAMRecord> forwardIterator = new FilteringIterator<SAMRecord>(merginIter, filterSamRecords);
        for (; ; ) {
            SAMRecord rec = null;
            if (forwardIterator.hasNext()) {
                rec = forwardIterator.next();
                progress.watch(rec);
                if (intervals != null && !intervals.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd())))
                    continue;
            }
            // need to flush buffer ?
            if (rec == null || (!buffer.isEmpty() && !buffer.getLast().getReferenceIndex().equals(rec.getReferenceIndex())) || (!buffer.isEmpty() && buffer.getLast().getUnclippedEnd() + readLength < rec.getUnclippedStart())) {
                if (!buffer.isEmpty()) {
                    int chromStart = buffer.getFirst().getUnclippedStart();
                    int chromEnd = buffer.getFirst().getUnclippedEnd();
                    for (SAMRecord sam : buffer) {
                        chromStart = Math.min(chromStart, sam.getUnclippedStart());
                        chromEnd = Math.max(chromEnd, sam.getUnclippedEnd());
                    }
                    final int winShift = 5;
                    for (int pos = chromStart; pos + winShift <= chromEnd; pos += winShift) {
                        int[] count_big_clip = new int[] { 0, 0 };
                        // int max_depth[]=new int[]{0,0};
                        List<Genotype> genotypes = new ArrayList<Genotype>();
                        Set<Allele> all_alleles = new HashSet<Allele>();
                        all_alleles.add(reference_allele);
                        boolean found_one_depth_ok = false;
                        int sum_depth = 0;
                        int samples_with_high_depth = 0;
                        for (String sample : sample2input.keySet()) {
                            GenotypeBuilder gb = new GenotypeBuilder(sample);
                            int[] count_clipped = new int[] { 0, 0 };
                            Set<Allele> sample_alleles = new HashSet<Allele>(3);
                            for (int side = 0; side < 2; ++side) {
                                for (SAMRecord sam : buffer) {
                                    if (!sam.getReadGroup().getSample().equals(sample))
                                        continue;
                                    Cigar cigar = sam.getCigar();
                                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                                    if (!ce.getOperator().equals(CigarOperator.S))
                                        continue;
                                    int clipStart = (side == 0 ? sam.getUnclippedStart() : sam.getAlignmentEnd() + 1);
                                    int clipEnd = (side == 0 ? sam.getAlignmentStart() - 1 : sam.getUnclippedEnd());
                                    if ((pos + winShift < clipStart || pos > clipEnd))
                                        continue;
                                    count_clipped[side]++;
                                    if (ce.getLength() >= this.min_clip_length) {
                                        count_big_clip[side]++;
                                    }
                                    sample_alleles.add(alternate_alleles[side]);
                                    gb.attribute("CN" + (side == 0 ? 5 : 3), count_clipped[side]);
                                }
                            }
                            // if(!(found_one_big_clip[0] || found_one_big_clip[1])) continue;
                            if (count_clipped[0] + count_clipped[1] == 0)
                                continue;
                            if ((count_clipped[0] + count_clipped[1]) > min_depth) {
                                found_one_depth_ok = true;
                                ++samples_with_high_depth;
                            }
                            sum_depth += (count_clipped[0] + count_clipped[1]);
                            gb.alleles(new ArrayList<Allele>(sample_alleles));
                            all_alleles.addAll(sample_alleles);
                            gb.DP(count_clipped[0] + count_clipped[1]);
                            genotypes.add(gb.make());
                        }
                        if (all_alleles.size() == 1) {
                            // all homozygotes
                            continue;
                        }
                        if (!found_one_depth_ok) {
                            continue;
                        }
                        if (!(count_big_clip[0] >= 1 || count_big_clip[1] >= 1)) {
                            continue;
                        }
                        Map<String, Object> atts = new HashMap<String, Object>();
                        atts.put("COUNT_SAMPLES", samples_with_high_depth);
                        atts.put(VCFConstants.DEPTH_KEY, sum_depth);
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(buffer.getFirst().getReferenceName());
                        vcb.start(pos);
                        vcb.stop(pos + winShift);
                        vcb.alleles(all_alleles);
                        vcb.attributes(atts);
                        vcb.genotypes(genotypes);
                        w.add(vcb.make());
                    }
                    buffer.clear();
                }
                if (rec == null) {
                    break;
                }
            }
            buffer.add(rec);
        }
        merginIter.close();
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (Input input : inputs) {
            CloserUtil.close(input);
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VCFSimpleHeaderLine(htsjdk.variant.vcf.VCFSimpleHeaderLine) Predicate(java.util.function.Predicate) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) LinkedList(java.util.LinkedList) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) Interval(htsjdk.samtools.util.Interval) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26