use of com.github.lindenb.jvarkit.iterator.EqualIterator in project jvarkit by lindenb.
the class BamToHaplotypes method processInput.
@Override
protected int processInput(final SAMFileHeader headerIn, final CloseableIterator<SAMRecord> iter0) {
SortingCollection<Haplotype> sorting = null;
try {
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(headerIn);
final String sample = headerIn.getReadGroups().stream().map(RG -> RG.getSample()).filter(R -> !StringUtils.isBlank(R)).findFirst().orElse("SAMPLE");
sorting = SortingCollection.newInstance(Haplotype.class, new HaplotypeCodec(), (A, B) -> A.compareTo(B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
if (this.paired_mode) {
try (EqualIterator<SAMRecord> iter = new EqualIterator<>(iter0, (A, B) -> A.getReadName().compareTo(B.getReadName()))) {
while (iter.hasNext()) {
final LinkedList<SAMRecord> buffer = new LinkedList<>(iter.next());
SAMRecord R1 = null;
SAMRecord R2 = null;
while (!buffer.isEmpty()) {
final SAMRecord rec = buffer.pop();
if (rec.getReadUnmappedFlag() || rec.isSecondaryOrSupplementary()) {
continue;
} else if (!rec.getReadPairedFlag()) {
scanVariants(dict, Collections.singletonList(rec), sorting);
} else if (R1 == null && rec.getFirstOfPairFlag()) {
R1 = rec;
} else if (R2 == null && rec.getSecondOfPairFlag()) {
R2 = rec;
} else {
continue;
}
}
if (R1 != null && R2 != null) {
if (R1.contigsMatch(R2)) {
scanVariants(dict, Arrays.asList(R1, R2), sorting);
} else {
scanVariants(dict, Collections.singletonList(R1), sorting);
scanVariants(dict, Collections.singletonList(R2), sorting);
}
} else if (R1 != null && R2 == null) {
scanVariants(dict, Collections.singletonList(R1), sorting);
} else if (R2 != null && R1 == null) {
scanVariants(dict, Collections.singletonList(R2), sorting);
}
}
}
} else {
while (iter0.hasNext()) {
final SAMRecord rec = iter0.next();
if (rec.getReadUnmappedFlag()) {
continue;
}
scanVariants(dict, Collections.singletonList(rec), sorting);
}
}
sorting.doneAdding();
sorting.setDestructiveIteration(true);
try (CloseableIterator<Haplotype> iter = sorting.iterator()) {
PeekableIterator<Haplotype> peek = new PeekableIterator<Haplotype>(iter);
try (PrintWriter out = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
out.println("#CHROM\tSTART\tEND\tSAMPLE\tN-HAPLOTYPES\tN-VARIANTS\t(POS\\tALT)+");
while (peek.hasNext()) {
int n = 1;
final Haplotype hap = peek.next();
while (peek.hasNext()) {
final Haplotype hap2 = peek.peek();
if (!hap.equals(hap2))
break;
// consumme
peek.next();
n++;
}
out.print(dict.getSequence(hap.tid).getContig());
out.print("\t");
out.print(hap.getStart());
out.print("\t");
out.print(hap.getEnd());
out.print("\t");
out.print(sample);
out.print("\t");
out.print(n);
out.print("\t");
out.print(hap.changes.size());
for (Change c : hap.changes) {
out.print("\t");
out.print(c.pos1);
out.print("\t");
out.print((char) c.alt);
}
out.println();
}
out.flush();
}
peek.close();
}
sorting.cleanup();
return 0;
} catch (Throwable err) {
LOG.error(err);
return -1;
}
}
Aggregations