Search in sources :

Example 1 with EqualIterator

use of com.github.lindenb.jvarkit.iterator.EqualIterator in project jvarkit by lindenb.

the class Biostar489074 method processInput.

@Override
protected int processInput(final SAMFileHeader header, final CloseableIterator<SAMRecord> iter0) {
    VariantContextWriter out = null;
    GenomicSequence genome = null;
    SortingCollection<Call> sorting = null;
    try {
        this.indexedFastaRef = ReferenceSequenceFileFactory.getReferenceSequenceFile(getRequiredReferencePath());
        if (!(header.getSortOrder().equals(SAMFileHeader.SortOrder.unsorted) || header.getSortOrder().equals(SAMFileHeader.SortOrder.queryname))) {
            getLogger().error("input should be sorted with 'samtools sort -n' or 'samtools collate' but got " + header.getSortOrder());
            return -1;
        }
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
        this.writingVariantsDelegate.dictionary(dict);
        sorting = SortingCollection.newInstance(Call.class, new CallCodec(), (A, B) -> A.compare2(B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        sorting.setDestructiveIteration(true);
        final List<String> samples = header.getReadGroups().stream().map(R -> this.groupBy.apply(R)).filter(S -> !StringUtils.isBlank(S)).sorted().collect(Collectors.toSet()).stream().collect(Collectors.toList());
        final Map<String, Integer> rgid2idx = new HashMap<>();
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            final String sn = this.groupBy.apply(rg);
            if (StringUtils.isBlank(sn))
                continue;
            int idx = samples.indexOf(sn);
            if (idx == -1)
                continue;
            rgid2idx.put(rg.getId(), idx);
        }
        try (CloseableIterator<List<SAMRecord>> iter = new EqualIterator<>(iter0, (A, B) -> A.getReadName().compareTo(B.getReadName()))) {
            while (iter.hasNext()) {
                final List<SAMRecord> buffer = iter.next();
                int read1_idx = -1;
                int read2_idx = -1;
                for (int i = 0; i < buffer.size(); i++) {
                    final SAMRecord rec = buffer.get(i);
                    if (!rec.getReadPairedFlag())
                        continue;
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.getMateUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getReadBases() == SAMRecord.NULL_SEQUENCE)
                        continue;
                    if (rec.getFirstOfPairFlag()) {
                        read1_idx = i;
                    } else if (rec.getSecondOfPairFlag()) {
                        read2_idx = i;
                    }
                }
                if (read1_idx == -1 || read2_idx == -1 || read1_idx == read2_idx)
                    continue;
                final SAMRecord rec1a = buffer.get(read1_idx);
                final SAMRecord rec2a = buffer.get(read2_idx);
                if (!rec1a.overlaps(rec2a))
                    continue;
                final int chromStart = Math.max(rec1a.getStart(), rec2a.getStart());
                final int chromEnd = Math.min(rec1a.getEnd(), rec2a.getEnd());
                if (chromStart > chromEnd)
                    continue;
                final Integer sampleid = rgid2idx.get(rec1a.getReadGroup().getId());
                if (sampleid == null)
                    continue;
                if (genome == null || !genome.contigsMatch(rec1a)) {
                    genome = new GenomicSequence(this.indexedFastaRef, rec1a.getContig());
                }
                final Set<Call> calls1 = new HashSet<>();
                final Set<Call> calls2 = new HashSet<>();
                for (int side = 0; side < 2; side++) {
                    final SAMRecord rec = (side == 0 ? rec1a : rec2a);
                    final Set<Call> calls = (side == 0 ? calls1 : calls2);
                    final byte[] bases = rec.getReadBases();
                    for (AlignmentBlock ab : rec.getAlignmentBlocks()) {
                        for (int n = 0; n < ab.getLength(); ++n) {
                            final int ref = ab.getReferenceStart() + n;
                            if (ref < chromStart)
                                continue;
                            if (ref > chromEnd)
                                break;
                            if (ref < 0 || ref >= genome.length())
                                continue;
                            // 0 based
                            final byte refBase = (byte) Character.toUpperCase(genome.charAt(ref - 1));
                            if (!AcidNucleics.isATGC(refBase))
                                continue;
                            // 0 based
                            final byte readBase = (byte) Character.toUpperCase(bases[(ab.getReadStart() - 1) + n]);
                            if (readBase == refBase)
                                continue;
                            final Call call = new Call();
                            call.sampleid = sampleid;
                            call.tid = rec1a.getReferenceIndex().intValue();
                            call.ref = refBase;
                            call.alt = readBase;
                            call.pos = ref;
                            calls.add(call);
                        }
                    }
                }
                calls1.retainAll(calls2);
                if (calls1.isEmpty())
                    continue;
                for (final Call c : calls1) {
                    sorting.add(c);
                }
            }
            sorting.doneAdding();
            out = this.writingVariantsDelegate.dictionary(dict).open(this.outputFile);
            final Set<VCFHeaderLine> metaData = new HashSet<>();
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.DEPTH_KEY);
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.ALLELE_COUNT_KEY);
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.ALLELE_NUMBER_KEY);
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.ALLELE_FREQUENCY_KEY);
            VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_KEY);
            VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.DEPTH_KEY);
            VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_ALLELE_DEPTHS);
            final VCFHeader header2 = new VCFHeader(metaData, samples);
            header2.setSequenceDictionary(dict);
            JVarkitVersion.getInstance().addMetaData(this, header2);
            out.writeHeader(header2);
            try (CloseableIterator<Call> iter1 = sorting.iterator()) {
                try (EqualRangeIterator<Call> iter2 = new EqualRangeIterator<>(iter1, (A, B) -> A.compare1(B))) {
                    while (iter2.hasNext()) {
                        final List<Call> calls = iter2.next();
                        if (calls.isEmpty())
                            continue;
                        final Call first = calls.get(0);
                        final Set<Allele> altAllelesSet = calls.stream().map(A -> Allele.create(A.alt, false)).collect(Collectors.toSet());
                        final List<Allele> altAllelesList = new ArrayList<>(altAllelesSet);
                        final List<Allele> vcAlleles = new ArrayList<>(altAllelesList.size() + 1);
                        vcAlleles.add(Allele.create(first.ref, true));
                        vcAlleles.addAll(altAllelesList);
                        final List<Genotype> genotypes = new ArrayList<>(samples.size());
                        int DP = 0;
                        for (int i = 0; i < samples.size(); i++) {
                            final String sn = samples.get(i);
                            final Counter<Allele> allele2count = new Counter<>();
                            for (Call c : calls) {
                                if (c.sampleid != i)
                                    continue;
                                allele2count.incr(Allele.create(c.alt, false));
                            }
                            Genotype gt;
                            if (allele2count.isEmpty()) {
                                gt = GenotypeBuilder.createMissing(sn, this.ploidy);
                            } else {
                                final int[] array = new int[vcAlleles.size()];
                                for (int j = 0; j < vcAlleles.size(); j++) {
                                    array[j] = (int) allele2count.count(vcAlleles.get(j));
                                }
                                final GenotypeBuilder gb = new GenotypeBuilder(sn, new ArrayList<>(allele2count.keySet()));
                                gb.AD(array);
                                gb.DP((int) allele2count.getTotal());
                                DP += (int) allele2count.getTotal();
                                gt = gb.make();
                            }
                            genotypes.add(gt);
                        }
                        final VariantContextBuilder vcb = new VariantContextBuilder(null, dict.getSequence(first.tid).getContig(), first.pos, first.pos, vcAlleles);
                        vcb.attribute(VCFConstants.DEPTH_KEY, DP);
                        vcb.genotypes(genotypes);
                        out.add(vcb.make());
                    }
                }
            }
        }
        sorting.cleanup();
        out.close();
        out = null;
        out = null;
        this.indexedFastaRef.close();
        this.indexedFastaRef = null;
        return 0;
    } catch (final Throwable err) {
        getLogger().error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(this.indexedFastaRef);
        ;
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) MultiBamLauncher(com.github.lindenb.jvarkit.jcommander.MultiBamLauncher) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) AlignmentBlock(htsjdk.samtools.AlignmentBlock) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SortingCollection(htsjdk.samtools.util.SortingCollection) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Counter(com.github.lindenb.jvarkit.util.Counter) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) AlignmentBlock(htsjdk.samtools.AlignmentBlock)

Example 2 with EqualIterator

use of com.github.lindenb.jvarkit.iterator.EqualIterator in project jvarkit by lindenb.

the class BamToFastq method processInput.

@Override
protected int processInput(final SAMFileHeader header, final CloseableIterator<SAMRecord> iter0) {
    final Comparator<SAMRecord> queryNameComparator = (A, B) -> A.getReadName().compareTo(B.getReadName());
    SortingCollection<SAMRecord> sortingSAMRecord = null;
    final ArrayList<SAMRecord> buffer = new ArrayList<>(50_000);
    final CountIn<FastqRecord> singleCounter = new CountIn<>("single-end");
    final CountIn<FastqRecord> unpairedCounter = new CountIn<>("unpaired");
    final CountIn<FastqRecord> pairedCounter = new CountIn<>("paired");
    final CountIn<SAMRecord> sortingCounter = new CountIn<>("sorting");
    final PeekableIterator<SAMRecord> iter = new PeekableIterator<>(iter0);
    try {
        if (!SAMFileHeader.SortOrder.coordinate.equals(header.getSortOrder())) {
            LOG.error("Input is not sorted on coordinate. got : " + header.getSortOrder());
            return -1;
        }
        if (singleFastq != null)
            FastqUtils.validateFastqFilename(singleFastq);
        if (unpairedFile1 != null)
            FastqUtils.validateFastqFilename(unpairedFile1);
        if (unpairedFile2 != null)
            FastqUtils.validateFastqFilename(unpairedFile2);
        try (FastqWriter singleEndWriter = this.singleFastq == null ? new NullFastqWriter() : new BasicFastqWriter(this.singleFastq);
            FastqWriter unpairedWriter1 = this.unpairedFile1 == null ? new NullFastqWriter() : new BasicFastqWriter(this.unpairedFile1);
            FastqWriter unpairedWriter2 = this.unpairedFile2 == null ? new NullFastqWriter() : new BasicFastqWriter(this.unpairedFile2);
            FastqPairedWriter R1R2writer = openFastqPairedWriter()) {
            sortingSAMRecord = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), queryNameComparator, sortingCollection.getMaxRecordsInRam(), sortingCollection.getTmpPaths());
            sortingSAMRecord.setDestructiveIteration(true);
            while (iter.hasNext()) {
                final SAMRecord rec = iter.next();
                if (rec.isSecondaryOrSupplementary())
                    continue;
                if (!rec.getReadPairedFlag()) {
                    singleEndWriter.write(singleCounter.apply(toFastq(rec)));
                    continue;
                }
                if ((rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) && iter.hasNext()) {
                    final SAMRecord rec2 = iter.peek();
                    if (!rec2.isSecondaryOrSupplementary() && queryNameComparator.compare(rec, rec2) == 0) {
                        if (rec2.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) {
                            // consumme
                            iter.next();
                            R1R2writer.write(pairedCounter.apply(toFastq(rec2)), pairedCounter.apply(toFastq(rec)));
                            continue;
                        } else if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
                            // consumme
                            iter.next();
                            R1R2writer.write(pairedCounter.apply(toFastq(rec)), pairedCounter.apply(toFastq(rec2)));
                            continue;
                        }
                    }
                }
                if (rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || !rec.getReferenceName().equals(rec.getMateReferenceName()) || Math.abs(rec.getInferredInsertSize()) > this.distance) {
                    sortingSAMRecord.add(sortingCounter.apply(rec));
                    continue;
                }
                while (!buffer.isEmpty() && !buffer.get(0).getReferenceName().equals(rec.getReferenceName())) {
                    sortingSAMRecord.add(sortingCounter.apply(buffer.remove(0)));
                }
                while (!buffer.isEmpty() && (rec.getAlignmentStart() - buffer.get(0).getAlignmentStart()) > this.distance) {
                    sortingSAMRecord.add(sortingCounter.apply(buffer.remove(0)));
                }
                if (rec.getAlignmentStart() < rec.getMateAlignmentStart()) {
                    buffer.add(rec);
                    continue;
                }
                SAMRecord mate = null;
                int i = 0;
                while (i < buffer.size()) {
                    final SAMRecord rec2 = buffer.get(i);
                    if (queryNameComparator.compare(rec2, rec) == 0) {
                        mate = rec2;
                        buffer.remove(i);
                        break;
                    }
                    if (rec2.getAlignmentStart() > rec.getMateAlignmentStart()) {
                        break;
                    }
                    ++i;
                }
                if (mate == null) {
                    (rec.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec)));
                } else if (mate.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) {
                    R1R2writer.write(pairedCounter.apply(toFastq(mate)), pairedCounter.apply(toFastq(rec)));
                } else if (rec.getFirstOfPairFlag() && mate.getSecondOfPairFlag()) {
                    R1R2writer.write(pairedCounter.apply(toFastq(rec)), pairedCounter.apply(toFastq(mate)));
                } else {
                    (rec.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec)));
                    (mate.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(mate)));
                }
            }
            // end while
            for (final SAMRecord rec : buffer) {
                sortingSAMRecord.add(sortingCounter.apply(rec));
            }
            buffer.clear();
            sortingSAMRecord.doneAdding();
            try (CloseableIterator<SAMRecord> iter2 = sortingSAMRecord.iterator()) {
                try (EqualIterator<SAMRecord> eq = new EqualIterator<>(iter2, queryNameComparator)) {
                    while (eq.hasNext()) {
                        final List<SAMRecord> L = eq.next();
                        if (L.size() == 2) {
                            if (L.get(0).getFirstOfPairFlag() && L.get(1).getSecondOfPairFlag()) {
                                R1R2writer.write(pairedCounter.apply(toFastq(L.get(0))), pairedCounter.apply(toFastq(L.get(1))));
                            } else if (L.get(1).getFirstOfPairFlag() && L.get(0).getSecondOfPairFlag()) {
                                R1R2writer.write(pairedCounter.apply(toFastq(L.get(1))), pairedCounter.apply(toFastq(L.get(0))));
                            } else {
                                (L.get(0).getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(L.get(0))));
                                (L.get(1).getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(L.get(1))));
                            }
                        } else {
                            for (SAMRecord rec2 : L) {
                                (rec2.getFirstOfPairFlag() ? unpairedWriter1 : unpairedWriter2).write(unpairedCounter.apply(toFastq(rec2)));
                            }
                        }
                    }
                }
            }
        }
        sortingSAMRecord.cleanup();
        unpairedCounter.log();
        singleCounter.log();
        pairedCounter.log();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        iter.close();
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) MultiBamLauncher(com.github.lindenb.jvarkit.jcommander.MultiBamLauncher) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) UnaryOperator(java.util.function.UnaryOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) PeekableIterator(htsjdk.samtools.util.PeekableIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) FastqUtils(com.github.lindenb.jvarkit.fastq.FastqUtils) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter) FastqPairedWriterFactory(com.github.lindenb.jvarkit.fastq.FastqPairedWriterFactory) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) FastqRecord(htsjdk.samtools.fastq.FastqRecord) List(java.util.List) FastqWriter(htsjdk.samtools.fastq.FastqWriter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) Comparator(java.util.Comparator) ArrayList(java.util.ArrayList) FastqRecord(htsjdk.samtools.fastq.FastqRecord) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) FastqPairedWriter(com.github.lindenb.jvarkit.fastq.FastqPairedWriter) SAMRecord(htsjdk.samtools.SAMRecord) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter) FastqWriter(htsjdk.samtools.fastq.FastqWriter) PeekableIterator(htsjdk.samtools.util.PeekableIterator) BasicFastqWriter(htsjdk.samtools.fastq.BasicFastqWriter)

Example 3 with EqualIterator

use of com.github.lindenb.jvarkit.iterator.EqualIterator in project jvarkit by lindenb.

the class BamPhased01 method scanIterator.

@Override
protected void scanIterator(final SAMFileHeader headerIn, final CloseableIterator<SAMRecord> iter0, final SAMFileWriter sfw) {
    if (this.paired_mode) {
        try (EqualIterator<SAMRecord> iter = new EqualIterator<>(iter0, (A, B) -> A.getReadName().compareTo(B.getReadName()))) {
            while (iter.hasNext()) {
                final LinkedList<SAMRecord> buffer = new LinkedList<>(iter.next());
                SAMRecord R1 = null;
                SAMRecord R2 = null;
                while (!buffer.isEmpty()) {
                    final SAMRecord rec = buffer.pop();
                    if (rec.getReadUnmappedFlag()) {
                        failingSAMRecord(rec, sfw);
                    } else if (!rec.getReadPairedFlag() || rec.isSecondaryOrSupplementary()) {
                        scanVariants(Collections.singletonList(rec), sfw);
                    } else if (R1 == null && rec.getFirstOfPairFlag()) {
                        R1 = rec;
                    } else if (R2 == null && rec.getSecondOfPairFlag()) {
                        R2 = rec;
                    } else {
                        failingSAMRecord(rec, sfw);
                    }
                }
                if (R1 != null && R2 != null) {
                    if (R1.contigsMatch(R2)) {
                        scanVariants(Arrays.asList(R1, R2), sfw);
                    } else {
                        scanVariants(Collections.singletonList(R1), sfw);
                        scanVariants(Collections.singletonList(R2), sfw);
                    }
                } else if (R1 != null && R2 == null) {
                    scanVariants(Collections.singletonList(R1), sfw);
                } else if (R2 != null && R1 == null) {
                    scanVariants(Collections.singletonList(R2), sfw);
                }
            }
        }
    } else {
        while (iter0.hasNext()) {
            final SAMRecord rec = iter0.next();
            if (rec.getReadUnmappedFlag()) {
                failingSAMRecord(rec, sfw);
                continue;
            }
            scanVariants(Collections.singletonList(rec), sfw);
        }
    }
}
Also used : SAMRecord(htsjdk.samtools.SAMRecord) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) LinkedList(java.util.LinkedList)

Example 4 with EqualIterator

use of com.github.lindenb.jvarkit.iterator.EqualIterator in project jvarkit by lindenb.

the class Biostar480685 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader in = null;
    SAMFileWriter out = null;
    try {
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.faidx != null) {
            srf.referenceSequence(this.faidx);
            writingBamArgs.setReferencePath(this.faidx);
        }
        final String input = oneFileOrNull(args);
        if (input == null) {
            in = srf.open(SamInputResource.of(stdin()));
        } else {
            in = srf.open(SamInputResource.of(input));
        }
        final SAMFileHeader header = in.getFileHeader();
        if (!(header.getSortOrder().equals(SAMFileHeader.SortOrder.unsorted) || header.getSortOrder().equals(SAMFileHeader.SortOrder.queryname))) {
            LOG.error("input should be sorted with 'samtools sort -n' or 'samtools collate' but got " + header.getSortOrder());
            return -1;
        }
        final ReadClipper clipper = new ReadClipper();
        header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
        final SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getGitHash());
        JVarkitVersion.getInstance().addMetaData(this, header);
        out = this.writingBamArgs.openSamWriter(this.outputFile, header, true);
        try (CloseableIterator<List<SAMRecord>> iter = new EqualIterator<>(in.iterator(), (A, B) -> A.getReadName().compareTo(B.getReadName()))) {
            while (iter.hasNext()) {
                final List<SAMRecord> buffer = iter.next();
                int read1_idx = -1;
                int read2_idx = -1;
                for (int i = 0; i < buffer.size(); i++) {
                    final SAMRecord rec = buffer.get(i);
                    if (!rec.getReadPairedFlag())
                        continue;
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.getMateUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getFirstOfPairFlag()) {
                        read1_idx = i;
                    } else if (rec.getSecondOfPairFlag()) {
                        read2_idx = i;
                    }
                }
                if (read1_idx == -1 || read2_idx == -1 || read1_idx == read2_idx)
                    continue;
                final SAMRecord rec1a = buffer.get(read1_idx);
                final SAMRecord rec2a = buffer.get(read2_idx);
                if (!rec1a.overlaps(rec2a))
                    continue;
                final int chromStart = Math.max(rec1a.getStart(), rec2a.getStart());
                final int chromEnd = Math.min(rec1a.getEnd(), rec2a.getEnd());
                if (chromStart > chromEnd)
                    continue;
                final SimpleInterval rgn = new SimpleInterval(rec1a.getContig(), chromStart, chromEnd);
                final SAMRecord rec1b = clipper.clip(rec1a, rgn);
                if (rec1b == null || rec1b.getReadUnmappedFlag())
                    continue;
                final SAMRecord rec2b = clipper.clip(rec2a, rgn);
                if (rec2b == null || rec2b.getReadUnmappedFlag())
                    continue;
                rec1b.setAttribute("PG", prg.getId());
                rec2b.setAttribute("PG", prg.getId());
                rec1b.setAlignmentStart(chromStart);
                rec1b.setMateAlignmentStart(rec2b.getAlignmentStart());
                rec2b.setAlignmentStart(chromStart);
                rec2b.setMateAlignmentStart(rec1b.getAlignmentStart());
                rec1b.setAttribute("MC", rec2b.getCigarString());
                rec2b.setAttribute("MC", rec1b.getCigarString());
                rec1b.setAttribute("NM", null);
                rec2b.setAttribute("NM", null);
                buffer.set(read1_idx, rec1b);
                buffer.set(read2_idx, rec2b);
                for (SAMRecord rec : buffer) {
                    out.addAlignment(rec);
                }
            }
        }
        in.close();
        in = null;
        out.close();
        out = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(out);
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) ReadClipper(com.github.lindenb.jvarkit.tools.pcr.ReadClipper) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 5 with EqualIterator

use of com.github.lindenb.jvarkit.iterator.EqualIterator in project jvarkit by lindenb.

the class SetFileTools method fromBed.

private int fromBed(final List<String> args) throws IOException {
    final String input = oneFileOrNull(args);
    final Function<BedLine, String> bed2name = bed -> {
        if (bed.getColumnCount() < 4)
            throw new IllegalArgumentException("Expected 4 columns but got " + bed);
        return bed.get(3);
    };
    try (BufferedReader br = super.openBufferedReader(input)) {
        try (BedLineReader blr = new BedLineReader(br, input)) {
            blr.setValidationStringency(validationStringency);
            blr.setContigNameConverter(this.theCtgConverter);
            try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                final EqualIterator<BedLine> iter = new EqualIterator<BedLine>(blr.stream().iterator(), (A, B) -> bed2name.apply(A).compareTo(bed2name.apply(B)));
                while (iter.hasNext()) {
                    final List<BedLine> lines = iter.next();
                    final List<Locatable> L = sortAndMerge(lines);
                    pw.print(bed2name.apply(lines.get(0)));
                    for (int i = 0; i < L.size(); i++) {
                        pw.print(i == 0 ? "\t" : ",");
                        final Locatable rec = L.get(i);
                        pw.print(noChr(rec.getContig()));
                        pw.print(":");
                        pw.print(rec.getStart());
                        pw.print("-");
                        pw.print(rec.getEnd());
                    }
                    pw.println();
                }
                iter.close();
                pw.flush();
            }
        }
    }
    return 0;
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFHeader(htsjdk.variant.vcf.VCFHeader) UnaryOperator(java.util.function.UnaryOperator) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SetFileRecord(com.github.lindenb.jvarkit.setfile.SetFileRecord) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) AbstractCloseableIterator(com.github.lindenb.jvarkit.iterator.AbstractCloseableIterator) Collectors(java.util.stream.Collectors) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SetFileReaderFactory(com.github.lindenb.jvarkit.setfile.SetFileReaderFactory) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) LinkedList(java.util.LinkedList) Counter(com.github.lindenb.jvarkit.util.Counter) Locatable(htsjdk.samtools.util.Locatable) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) Paths(java.nio.file.Paths) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Comparator(java.util.Comparator) Collections(java.util.Collections) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) BufferedReader(java.io.BufferedReader) PrintWriter(java.io.PrintWriter) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

EqualIterator (com.github.lindenb.jvarkit.iterator.EqualIterator)6 SAMRecord (htsjdk.samtools.SAMRecord)5 List (java.util.List)5 Parameter (com.beust.jcommander.Parameter)4 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)4 Program (com.github.lindenb.jvarkit.util.jcommander.Program)4 Logger (com.github.lindenb.jvarkit.util.log.Logger)4 SAMFileHeader (htsjdk.samtools.SAMFileHeader)4 CloseableIterator (htsjdk.samtools.util.CloseableIterator)4 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 ParametersDelegate (com.beust.jcommander.ParametersDelegate)3 MultiBamLauncher (com.github.lindenb.jvarkit.jcommander.MultiBamLauncher)3 DistanceParser (com.github.lindenb.jvarkit.util.bio.DistanceParser)3 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)3 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)2 BufferedVCFReader (com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader)2 VCFReaderFactory (com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory)2