use of com.github.lindenb.jvarkit.util.iterator.ForwardPeekIterator in project jvarkit by lindenb.
the class SamTranslocations method doWork.
@Override
public int doWork(final List<String> args) {
if (this.max_distance <= 0) {
LOG.error("max_distance <=0 (" + max_distance + ")");
return -1;
}
PrintWriter pw = null;
ConcatSam.ConcatSamIterator samIter = null;
ForwardPeekIterator<SAMRecord> forwardPeekIterator = null;
try {
samIter = new ConcatSam.Factory().addInterval(this.region_str).open(args);
final SAMSequenceDictionary dict = samIter.getFileHeader().getSequenceDictionary();
if (dict.size() < 2) {
LOG.error("Not enough contigs in sequence dictionary. Expected at least 2.");
return -1;
}
forwardPeekIterator = new ForwardPeekIteratorImpl<>(new FilterIterator<>(samIter, rec -> {
if (rec.getReadUnmappedFlag())
return false;
if (!rec.getReadPairedFlag())
return false;
if (rec.getMateUnmappedFlag())
return false;
final int tid1 = rec.getReferenceIndex();
final int tid2 = rec.getMateReferenceIndex();
if (tid1 == tid2)
return false;
if (this.samRecordFilter.filterOut(rec))
return false;
return true;
}));
pw = openFileOrStdoutAsPrintWriter(this.outputFile);
pw.print("#chrom1");
pw.print('\t');
pw.print("chrom1-start");
pw.print('\t');
pw.print("chrom1-end");
pw.print('\t');
pw.print("middle1\tstrand_plus_before_mid1_count\tstrand_plus_before_mid1_percent\tstrand_minus_after_mid1_count\tstrand_minus_after_mid1_percent");
pw.print('\t');
pw.print("chrom2");
pw.print('\t');
pw.print("chrom2-start");
pw.print('\t');
pw.print("chrom2-end");
pw.print('\t');
pw.print("middle2\tstrand_plus_before_mid2_count\tstrand_plus_before_mid2_percent\tstrand_minus_after_mid2_count\tstrand_minus_after_mid2_percent");
pw.print('\t');
pw.print("count-reads");
pw.print('\t');
pw.print("count-clipped");
pw.print('\t');
pw.print("partition");
pw.println();
final Map<String, PartitionState> partition2state = new HashMap<>();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict).logger(LOG);
while (forwardPeekIterator.hasNext()) {
final SAMRecord rec = progress.watch(forwardPeekIterator.next());
final String partition = this.samRecordPartition.getPartion(rec, "N/A");
PartitionState partitionState = partition2state.get(partition);
if (partitionState == null) {
partitionState = new PartitionState(partition);
partition2state.put(partition, partitionState);
}
if (partitionState.last_rec != null) {
if (partitionState.last_rec == rec) {
// reset last
partitionState.last_rec = null;
}
continue;
}
// searching for -->
if (rec.getReadNegativeStrandFlag())
continue;
// find forward records
final List<SAMRecord> positiveStrandList = new ArrayList<>();
positiveStrandList.add(rec);
int x = 0;
for (; ; ) {
final SAMRecord rec2 = forwardPeekIterator.peek(x);
if (rec2 == null) {
break;
}
if (!rec.getReferenceIndex().equals(rec2.getReferenceIndex())) {
break;
}
if (rec2.getStart() - rec.getEnd() > this.max_distance) {
break;
}
if (rec2.getReadNegativeStrandFlag() || Math.abs(rec2.getEnd() - rec.getEnd()) > this.fully_distance || !rec2.getMateReferenceName().equals(rec.getMateReferenceName())) {
++x;
continue;
}
positiveStrandList.add(rec2);
partitionState.last_rec = rec2;
++x;
}
if (positiveStrandList.size() < this.min_count_forward) {
partitionState.last_rec = null;
continue;
}
// find reverse records
x = 0;
final List<SAMRecord> negativeStrandList = new ArrayList<>();
for (; ; ) {
final SAMRecord rec2 = forwardPeekIterator.peek(x);
if (rec2 == null) {
break;
}
if (!rec.getReferenceIndex().equals(rec2.getReferenceIndex())) {
break;
}
if (rec2.getStart() - rec.getEnd() > this.max_distance) {
break;
}
if (!rec2.getReadNegativeStrandFlag() || (rec2.getStart() < rec.getEnd() && (rec.getEnd() - rec2.getStart()) > this.fully_distance) || !rec2.getMateReferenceName().equals(rec.getMateReferenceName())) {
++x;
continue;
}
negativeStrandList.add(rec2);
++x;
}
if (negativeStrandList.size() < this.min_count_forward) {
partitionState.last_rec = null;
continue;
}
final int start1 = (int) Percentile.median().evaluate(positiveStrandList.stream().mapToInt(R -> R.getEnd()));
final int end1 = (int) Percentile.median().evaluate(negativeStrandList.stream().mapToInt(R -> R.getStart()));
final int mid1 = (start1 + end1) / 2;
final int mid2 = (int) Percentile.median().evaluate(negativeStrandList.stream().mapToInt(R -> R.getMateAlignmentStart()));
pw.print(rec.getReferenceName());
pw.print('\t');
pw.print(positiveStrandList.stream().mapToInt(R -> R.getEnd()).max().getAsInt());
pw.print('\t');
pw.print(negativeStrandList.stream().mapToInt(R -> R.getStart()).min().getAsInt());
pw.print('\t');
pw.print(mid1);
pw.print('\t');
pw.print(positiveStrandList.stream().filter(R -> R.getEnd() <= mid1).count());
pw.print('\t');
pw.print((int) (100.0 * (positiveStrandList.stream().filter(R -> R.getEnd() <= mid1).count()) / positiveStrandList.size()));
pw.print('\t');
pw.print(negativeStrandList.stream().filter(R -> R.getStart() >= mid1).count());
pw.print('\t');
pw.print((int) (100.0 * (negativeStrandList.stream().filter(R -> R.getStart() >= mid1).count()) / positiveStrandList.size()));
pw.print('\t');
pw.print(rec.getMateReferenceName());
pw.print('\t');
pw.print(negativeStrandList.stream().mapToInt(R -> R.getMateAlignmentStart()).min().getAsInt());
pw.print('\t');
pw.print(negativeStrandList.stream().mapToInt(R -> R.getMateAlignmentStart()).max().getAsInt());
pw.print('\t');
pw.print(mid2);
pw.print('\t');
pw.print(negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() <= mid2).count());
pw.print('\t');
pw.print((int) (100.0 * (negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() <= mid2).count()) / negativeStrandList.size()));
pw.print('\t');
pw.print(negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() >= mid2).count());
pw.print('\t');
pw.print((int) (100.0 * (negativeStrandList.stream().filter(R -> R.getMateAlignmentStart() >= mid2).count()) / negativeStrandList.size()));
pw.print('\t');
pw.print(positiveStrandList.size() + negativeStrandList.size());
pw.print('\t');
pw.print(positiveStrandList.stream().filter(R -> R.getCigar() != null && R.getCigar().isClipped()).count() + negativeStrandList.stream().filter(R -> R.getCigar() != null && R.getCigar().isClipped()).count());
pw.print('\t');
pw.print(partition);
pw.println();
}
progress.finish();
forwardPeekIterator.close();
forwardPeekIterator = null;
samIter.close();
samIter = null;
pw.flush();
pw.close();
pw = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(forwardPeekIterator);
CloserUtil.close(samIter);
CloserUtil.close(pw);
}
}
Aggregations