Search in sources :

Example 6 with EqualIterator

use of com.github.lindenb.jvarkit.iterator.EqualIterator in project jvarkit by lindenb.

the class BamToHaplotypes method processInput.

@Override
protected int processInput(final SAMFileHeader headerIn, final CloseableIterator<SAMRecord> iter0) {
    SortingCollection<Haplotype> sorting = null;
    try {
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(headerIn);
        final String sample = headerIn.getReadGroups().stream().map(RG -> RG.getSample()).filter(R -> !StringUtils.isBlank(R)).findFirst().orElse("SAMPLE");
        sorting = SortingCollection.newInstance(Haplotype.class, new HaplotypeCodec(), (A, B) -> A.compareTo(B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        if (this.paired_mode) {
            try (EqualIterator<SAMRecord> iter = new EqualIterator<>(iter0, (A, B) -> A.getReadName().compareTo(B.getReadName()))) {
                while (iter.hasNext()) {
                    final LinkedList<SAMRecord> buffer = new LinkedList<>(iter.next());
                    SAMRecord R1 = null;
                    SAMRecord R2 = null;
                    while (!buffer.isEmpty()) {
                        final SAMRecord rec = buffer.pop();
                        if (rec.getReadUnmappedFlag() || rec.isSecondaryOrSupplementary()) {
                            continue;
                        } else if (!rec.getReadPairedFlag()) {
                            scanVariants(dict, Collections.singletonList(rec), sorting);
                        } else if (R1 == null && rec.getFirstOfPairFlag()) {
                            R1 = rec;
                        } else if (R2 == null && rec.getSecondOfPairFlag()) {
                            R2 = rec;
                        } else {
                            continue;
                        }
                    }
                    if (R1 != null && R2 != null) {
                        if (R1.contigsMatch(R2)) {
                            scanVariants(dict, Arrays.asList(R1, R2), sorting);
                        } else {
                            scanVariants(dict, Collections.singletonList(R1), sorting);
                            scanVariants(dict, Collections.singletonList(R2), sorting);
                        }
                    } else if (R1 != null && R2 == null) {
                        scanVariants(dict, Collections.singletonList(R1), sorting);
                    } else if (R2 != null && R1 == null) {
                        scanVariants(dict, Collections.singletonList(R2), sorting);
                    }
                }
            }
        } else {
            while (iter0.hasNext()) {
                final SAMRecord rec = iter0.next();
                if (rec.getReadUnmappedFlag()) {
                    continue;
                }
                scanVariants(dict, Collections.singletonList(rec), sorting);
            }
        }
        sorting.doneAdding();
        sorting.setDestructiveIteration(true);
        try (CloseableIterator<Haplotype> iter = sorting.iterator()) {
            PeekableIterator<Haplotype> peek = new PeekableIterator<Haplotype>(iter);
            try (PrintWriter out = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                out.println("#CHROM\tSTART\tEND\tSAMPLE\tN-HAPLOTYPES\tN-VARIANTS\t(POS\\tALT)+");
                while (peek.hasNext()) {
                    int n = 1;
                    final Haplotype hap = peek.next();
                    while (peek.hasNext()) {
                        final Haplotype hap2 = peek.peek();
                        if (!hap.equals(hap2))
                            break;
                        // consumme
                        peek.next();
                        n++;
                    }
                    out.print(dict.getSequence(hap.tid).getContig());
                    out.print("\t");
                    out.print(hap.getStart());
                    out.print("\t");
                    out.print(hap.getEnd());
                    out.print("\t");
                    out.print(sample);
                    out.print("\t");
                    out.print(n);
                    out.print("\t");
                    out.print(hap.changes.size());
                    for (Change c : hap.changes) {
                        out.print("\t");
                        out.print(c.pos1);
                        out.print("\t");
                        out.print((char) c.alt);
                    }
                    out.println();
                }
                out.flush();
            }
            peek.close();
        }
        sorting.cleanup();
        return 0;
    } catch (Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MultiBamLauncher(com.github.lindenb.jvarkit.jcommander.MultiBamLauncher) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AlignmentBlock(htsjdk.samtools.AlignmentBlock) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) LinkedList(java.util.LinkedList) Path(java.nio.file.Path) PeekableIterator(htsjdk.samtools.util.PeekableIterator) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SortingCollection(htsjdk.samtools.util.SortingCollection) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) IOException(java.io.IOException) EOFException(java.io.EOFException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LinkedList(java.util.LinkedList) SAMRecord(htsjdk.samtools.SAMRecord) PeekableIterator(htsjdk.samtools.util.PeekableIterator) PrintWriter(java.io.PrintWriter)

Aggregations

EqualIterator (com.github.lindenb.jvarkit.iterator.EqualIterator)6 SAMRecord (htsjdk.samtools.SAMRecord)5 List (java.util.List)5 Parameter (com.beust.jcommander.Parameter)4 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)4 Program (com.github.lindenb.jvarkit.util.jcommander.Program)4 Logger (com.github.lindenb.jvarkit.util.log.Logger)4 SAMFileHeader (htsjdk.samtools.SAMFileHeader)4 CloseableIterator (htsjdk.samtools.util.CloseableIterator)4 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 ParametersDelegate (com.beust.jcommander.ParametersDelegate)3 MultiBamLauncher (com.github.lindenb.jvarkit.jcommander.MultiBamLauncher)3 DistanceParser (com.github.lindenb.jvarkit.util.bio.DistanceParser)3 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)3 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)2 BufferedVCFReader (com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)2 VCFReaderFactory (com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory)2