Search in sources :

Example 1 with ForwardPeekIterator

use of com.github.lindenb.jvarkit.util.iterator.ForwardPeekIterator in project jvarkit by lindenb.

the class SamTranslocations method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.max_distance <= 0) {
        LOG.error("max_distance <=0 (" + max_distance + ")");
        return -1;
    }
    PrintWriter pw = null;
    ConcatSam.ConcatSamIterator samIter = null;
    ForwardPeekIterator<SAMRecord> forwardPeekIterator = null;
    try {
        samIter = new ConcatSam.Factory().addInterval(this.region_str).open(args);
        final SAMSequenceDictionary dict = samIter.getFileHeader().getSequenceDictionary();
        if (dict.size() < 2) {
            LOG.error("Not enough contigs in sequence dictionary. Expected at least 2.");
            return -1;
        }
        forwardPeekIterator = new ForwardPeekIteratorImpl<>(new FilterIterator<>(samIter, rec -> {
            if (rec.getReadUnmappedFlag())
                return false;
            if (!rec.getReadPairedFlag())
                return false;
            if (rec.getMateUnmappedFlag())
                return false;
            final int tid1 = rec.getReferenceIndex();
            final int tid2 = rec.getMateReferenceIndex();
            if (tid1 == tid2)
                return false;
            if (this.samRecordFilter.filterOut(rec))
                return false;
            return true;
        }));
        pw = openFileOrStdoutAsPrintWriter(this.outputFile);
        pw.print("#chrom1");
        pw.print('\t');
        pw.print("chrom1-start");
        pw.print('\t');
        pw.print("chrom1-end");
        pw.print('\t');
        pw.print("middle1\tstrand_plus_before_mid1_count\tstrand_plus_before_mid1_percent\tstrand_minus_after_mid1_count\tstrand_minus_after_mid1_percent");
        pw.print('\t');
        pw.print("chrom2");
        pw.print('\t');
        pw.print("chrom2-start");
        pw.print('\t');
        pw.print("chrom2-end");
        pw.print('\t');
        pw.print("middle2\tstrand_plus_before_mid2_count\tstrand_plus_before_mid2_percent\tstrand_minus_after_mid2_count\tstrand_minus_after_mid2_percent");
        pw.print('\t');
        pw.print("count-reads");
        pw.print('\t');
        pw.print("count-clipped");
        pw.print('\t');
        pw.print("partition");
        pw.println();
        final Map<String, PartitionState> partition2state = new HashMap<>();
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict).logger(LOG);
        while (forwardPeekIterator.hasNext()) {
            final SAMRecord rec = progress.watch(forwardPeekIterator.next());
            final String partition = this.samRecordPartition.getPartion(rec, "N/A");
            PartitionState partitionState = partition2state.get(partition);
            if (partitionState == null) {
                partitionState = new PartitionState(partition);
                partition2state.put(partition, partitionState);
            }
            if (partitionState.last_rec != null) {
                if (partitionState.last_rec == rec) {
                    // reset last
                    partitionState.last_rec = null;
                }
                continue;
            }
            // searching for -->
            if (rec.getReadNegativeStrandFlag())
                continue;
            // find forward records
            final List<SAMRecord> positiveStrandList = new ArrayList<>();
            positiveStrandList.add(rec);
            int x = 0;
            for (; ; ) {
                final SAMRecord rec2 = forwardPeekIterator.peek(x);
                if (rec2 == null) {
                    break;
                }
                if (!rec.getReferenceIndex().equals(rec2.getReferenceIndex())) {
                    break;
                }
                if (rec2.getStart() - rec.getEnd() > this.max_distance) {
                    break;
                }
                if (rec2.getReadNegativeStrandFlag() || Math.abs(rec2.getEnd() - rec.getEnd()) > this.fully_distance || !rec2.getMateReferenceName().equals(rec.getMateReferenceName())) {
                    ++x;
                    continue;
                }
                positiveStrandList.add(rec2);
                partitionState.last_rec = rec2;
                ++x;
            }
            if (positiveStrandList.size() < this.min_count_forward) {
                partitionState.last_rec = null;
                continue;
            }
            // find reverse records
            x = 0;
            final List<SAMRecord> negativeStrandList = new ArrayList<>();
            for (; ; ) {
                final SAMRecord rec2 = forwardPeekIterator.peek(x);
                if (rec2 == null) {
                    break;
                }
                if (!rec.getReferenceIndex().equals(rec2.getReferenceIndex())) {
                    break;
                }
                if (rec2.getStart() - rec.getEnd() > this.max_distance) {
                    break;
                }
                if (!rec2.getReadNegativeStrandFlag() || (rec2.getStart() < rec.getEnd() && (rec.getEnd() - rec2.getStart()) > this.fully_distance) || !rec2.getMateReferenceName().equals(rec.getMateReferenceName())) {
                    ++x;
                    continue;
                }
                negativeStrandList.add(rec2);
                ++x;
            }
            if (negativeStrandList.size() < this.min_count_forward) {
                partitionState.last_rec = null;
                continue;
            }
            final int start1 = (int) Percentile.median().evaluate(positiveStrandList.stream().mapToInt(R -> R.getEnd()));
            final int end1 = (int) Percentile.median().evaluate(negativeStrandList.stream().mapToInt(R -> R.getStart()));
            final int mid1 = (start1 + end1) / 2;
            final int mid2 = (int) Percentile.median().evaluate(negativeStrandList.stream().mapToInt(R -> R.getMateAlignmentStart()));
            pw.print(rec.getReferenceName());
            pw.print('\t');
            pw.print(positiveStrandList.stream().mapToInt(R -> R.getEnd()).max().getAsInt());
            pw.print('\t');
            pw.print(negativeStrandList.stream().mapToInt(R -> R.getStart()).min().getAsInt());
            pw.print('\t');
            pw.print(mid1);
            pw.print('\t');
            pw.print(positiveStrandList.stream().filter(R -> R.getEnd() <= mid1).count());
            pw.print('\t');
            pw.print((int) (100.0 * (positiveStrandList.stream().filter(R -> R.getEnd() <= mid1).count()) / positiveStrandList.size()));
            pw.print('\t');
            pw.print(negativeStrandList.stream().filter(R -> R.getStart() >= mid1).count());
            pw.print('\t');
            pw.print((int) (100.0 * (negativeStrandList.stream().filter(R -> R.getStart() >= mid1).count()) / positiveStrandList.size()));
            pw.print('\t');
            pw.print(rec.getMateReferenceName());
            pw.print('\t');
            pw.print(negativeStrandList.stream().mapToInt(R -> R.getMateAlignmentStart()).min().getAsInt());
            pw.print('\t');
            pw.print(negativeStrandList.stream().mapToInt(R -> R.getMateAlignmentStart()).max().getAsInt());
            pw.print('\t');
            pw.print(mid2);
            pw.print('\t');
            pw.print(negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() <= mid2).count());
            pw.print('\t');
            pw.print((int) (100.0 * (negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() <= mid2).count()) / negativeStrandList.size()));
            pw.print('\t');
            pw.print(negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() >= mid2).count());
            pw.print('\t');
            pw.print((int) (100.0 * (negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() >= mid2).count()) / negativeStrandList.size()));
            pw.print('\t');
            pw.print(positiveStrandList.size() + negativeStrandList.size());
            pw.print('\t');
            pw.print(positiveStrandList.stream().filter(R -> R.getCigar() != null && R.getCigar().isClipped()).count() + negativeStrandList.stream().filter(R -> R.getCigar() != null && R.getCigar().isClipped()).count());
            pw.print('\t');
            pw.print(partition);
            pw.println();
        }
        progress.finish();
        forwardPeekIterator.close();
        forwardPeekIterator = null;
        samIter.close();
        samIter = null;
        pw.flush();
        pw.close();
        pw = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(forwardPeekIterator);
        CloserUtil.close(samIter);
        CloserUtil.close(pw);
    }
}
Also used : PrintWriter(java.io.PrintWriter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) FilterIterator(com.github.lindenb.jvarkit.util.iterator.FilterIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) ForwardPeekIterator(com.github.lindenb.jvarkit.util.iterator.ForwardPeekIterator) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) ForwardPeekIteratorImpl(com.github.lindenb.jvarkit.util.iterator.ForwardPeekIteratorImpl) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) Map(java.util.Map) ConcatSam(com.github.lindenb.jvarkit.tools.misc.ConcatSam) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMRecord(htsjdk.samtools.SAMRecord) FilterIterator(com.github.lindenb.jvarkit.util.iterator.FilterIterator) ConcatSam(com.github.lindenb.jvarkit.tools.misc.ConcatSam) PrintWriter(java.io.PrintWriter)

Aggregations

Parameter (com.beust.jcommander.Parameter)1 Percentile (com.github.lindenb.jvarkit.math.stats.Percentile)1 ConcatSam (com.github.lindenb.jvarkit.tools.misc.ConcatSam)1 IntervalParser (com.github.lindenb.jvarkit.util.bio.IntervalParser)1 FilterIterator (com.github.lindenb.jvarkit.util.iterator.FilterIterator)1 ForwardPeekIterator (com.github.lindenb.jvarkit.util.iterator.ForwardPeekIterator)1 ForwardPeekIteratorImpl (com.github.lindenb.jvarkit.util.iterator.ForwardPeekIteratorImpl)1 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)1 SAMRecordPartition (com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition)1 SamRecordJEXLFilter (com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)1 CloserUtil (htsjdk.samtools.util.CloserUtil)1 File (java.io.File)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1