Search in sources :

Example 36 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class Biostar489074 method processInput.

@Override
protected int processInput(final SAMFileHeader header, final CloseableIterator<SAMRecord> iter0) {
    VariantContextWriter out = null;
    GenomicSequence genome = null;
    SortingCollection<Call> sorting = null;
    try {
        this.indexedFastaRef = ReferenceSequenceFileFactory.getReferenceSequenceFile(getRequiredReferencePath());
        if (!(header.getSortOrder().equals(SAMFileHeader.SortOrder.unsorted) || header.getSortOrder().equals(SAMFileHeader.SortOrder.queryname))) {
            getLogger().error("input should be sorted with 'samtools sort -n' or 'samtools collate' but got " + header.getSortOrder());
            return -1;
        }
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
        this.writingVariantsDelegate.dictionary(dict);
        sorting = SortingCollection.newInstance(Call.class, new CallCodec(), (A, B) -> A.compare2(B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        sorting.setDestructiveIteration(true);
        final List<String> samples = header.getReadGroups().stream().map(R -> this.groupBy.apply(R)).filter(S -> !StringUtils.isBlank(S)).sorted().collect(Collectors.toSet()).stream().collect(Collectors.toList());
        final Map<String, Integer> rgid2idx = new HashMap<>();
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            final String sn = this.groupBy.apply(rg);
            if (StringUtils.isBlank(sn))
                continue;
            int idx = samples.indexOf(sn);
            if (idx == -1)
                continue;
            rgid2idx.put(rg.getId(), idx);
        }
        try (CloseableIterator<List<SAMRecord>> iter = new EqualIterator<>(iter0, (A, B) -> A.getReadName().compareTo(B.getReadName()))) {
            while (iter.hasNext()) {
                final List<SAMRecord> buffer = iter.next();
                int read1_idx = -1;
                int read2_idx = -1;
                for (int i = 0; i < buffer.size(); i++) {
                    final SAMRecord rec = buffer.get(i);
                    if (!rec.getReadPairedFlag())
                        continue;
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.getMateUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getReadBases() == SAMRecord.NULL_SEQUENCE)
                        continue;
                    if (rec.getFirstOfPairFlag()) {
                        read1_idx = i;
                    } else if (rec.getSecondOfPairFlag()) {
                        read2_idx = i;
                    }
                }
                if (read1_idx == -1 || read2_idx == -1 || read1_idx == read2_idx)
                    continue;
                final SAMRecord rec1a = buffer.get(read1_idx);
                final SAMRecord rec2a = buffer.get(read2_idx);
                if (!rec1a.overlaps(rec2a))
                    continue;
                final int chromStart = Math.max(rec1a.getStart(), rec2a.getStart());
                final int chromEnd = Math.min(rec1a.getEnd(), rec2a.getEnd());
                if (chromStart > chromEnd)
                    continue;
                final Integer sampleid = rgid2idx.get(rec1a.getReadGroup().getId());
                if (sampleid == null)
                    continue;
                if (genome == null || !genome.contigsMatch(rec1a)) {
                    genome = new GenomicSequence(this.indexedFastaRef, rec1a.getContig());
                }
                final Set<Call> calls1 = new HashSet<>();
                final Set<Call> calls2 = new HashSet<>();
                for (int side = 0; side < 2; side++) {
                    final SAMRecord rec = (side == 0 ? rec1a : rec2a);
                    final Set<Call> calls = (side == 0 ? calls1 : calls2);
                    final byte[] bases = rec.getReadBases();
                    for (AlignmentBlock ab : rec.getAlignmentBlocks()) {
                        for (int n = 0; n < ab.getLength(); ++n) {
                            final int ref = ab.getReferenceStart() + n;
                            if (ref < chromStart)
                                continue;
                            if (ref > chromEnd)
                                break;
                            if (ref < 0 || ref >= genome.length())
                                continue;
                            // 0 based
                            final byte refBase = (byte) Character.toUpperCase(genome.charAt(ref - 1));
                            if (!AcidNucleics.isATGC(refBase))
                                continue;
                            // 0 based
                            final byte readBase = (byte) Character.toUpperCase(bases[(ab.getReadStart() - 1) + n]);
                            if (readBase == refBase)
                                continue;
                            final Call call = new Call();
                            call.sampleid = sampleid;
                            call.tid = rec1a.getReferenceIndex().intValue();
                            call.ref = refBase;
                            call.alt = readBase;
                            call.pos = ref;
                            calls.add(call);
                        }
                    }
                }
                calls1.retainAll(calls2);
                if (calls1.isEmpty())
                    continue;
                for (final Call c : calls1) {
                    sorting.add(c);
                }
            }
            sorting.doneAdding();
            out = this.writingVariantsDelegate.dictionary(dict).open(this.outputFile);
            final Set<VCFHeaderLine> metaData = new HashSet<>();
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.DEPTH_KEY);
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.ALLELE_COUNT_KEY);
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.ALLELE_NUMBER_KEY);
            VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.ALLELE_FREQUENCY_KEY);
            VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_KEY);
            VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.DEPTH_KEY);
            VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_ALLELE_DEPTHS);
            final VCFHeader header2 = new VCFHeader(metaData, samples);
            header2.setSequenceDictionary(dict);
            JVarkitVersion.getInstance().addMetaData(this, header2);
            out.writeHeader(header2);
            try (CloseableIterator<Call> iter1 = sorting.iterator()) {
                try (EqualRangeIterator<Call> iter2 = new EqualRangeIterator<>(iter1, (A, B) -> A.compare1(B))) {
                    while (iter2.hasNext()) {
                        final List<Call> calls = iter2.next();
                        if (calls.isEmpty())
                            continue;
                        final Call first = calls.get(0);
                        final Set<Allele> altAllelesSet = calls.stream().map(A -> Allele.create(A.alt, false)).collect(Collectors.toSet());
                        final List<Allele> altAllelesList = new ArrayList<>(altAllelesSet);
                        final List<Allele> vcAlleles = new ArrayList<>(altAllelesList.size() + 1);
                        vcAlleles.add(Allele.create(first.ref, true));
                        vcAlleles.addAll(altAllelesList);
                        final List<Genotype> genotypes = new ArrayList<>(samples.size());
                        int DP = 0;
                        for (int i = 0; i < samples.size(); i++) {
                            final String sn = samples.get(i);
                            final Counter<Allele> allele2count = new Counter<>();
                            for (Call c : calls) {
                                if (c.sampleid != i)
                                    continue;
                                allele2count.incr(Allele.create(c.alt, false));
                            }
                            Genotype gt;
                            if (allele2count.isEmpty()) {
                                gt = GenotypeBuilder.createMissing(sn, this.ploidy);
                            } else {
                                final int[] array = new int[vcAlleles.size()];
                                for (int j = 0; j < vcAlleles.size(); j++) {
                                    array[j] = (int) allele2count.count(vcAlleles.get(j));
                                }
                                final GenotypeBuilder gb = new GenotypeBuilder(sn, new ArrayList<>(allele2count.keySet()));
                                gb.AD(array);
                                gb.DP((int) allele2count.getTotal());
                                DP += (int) allele2count.getTotal();
                                gt = gb.make();
                            }
                            genotypes.add(gt);
                        }
                        final VariantContextBuilder vcb = new VariantContextBuilder(null, dict.getSequence(first.tid).getContig(), first.pos, first.pos, vcAlleles);
                        vcb.attribute(VCFConstants.DEPTH_KEY, DP);
                        vcb.genotypes(genotypes);
                        out.add(vcb.make());
                    }
                }
            }
        }
        sorting.cleanup();
        out.close();
        out = null;
        out = null;
        this.indexedFastaRef.close();
        this.indexedFastaRef = null;
        return 0;
    } catch (final Throwable err) {
        getLogger().error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(this.indexedFastaRef);
        ;
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) MultiBamLauncher(com.github.lindenb.jvarkit.jcommander.MultiBamLauncher) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) AlignmentBlock(htsjdk.samtools.AlignmentBlock) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SortingCollection(htsjdk.samtools.util.SortingCollection) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Counter(com.github.lindenb.jvarkit.util.Counter) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) AlignmentBlock(htsjdk.samtools.AlignmentBlock)

Example 37 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class Biostar352930 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader samReader = null;
    SAMFileWriter samFileWriter = null;
    try {
        long count_rescued_seq = 0L;
        long count_rescued_qual = 0L;
        final SamReaderFactory srf = super.createSamReaderFactory().validationStringency(ValidationStringency.SILENT);
        final String input = oneFileOrNull(args);
        if (input == null) {
            samReader = srf.open(SamInputResource.of(stdin()));
        } else {
            samReader = srf.open(SamInputResource.of(input));
        }
        final SAMFileHeader header = samReader.getFileHeader();
        if (header.getSortOrder() != SAMFileHeader.SortOrder.queryname) {
            LOG.error("Expected SAM input to be sorted on " + SAMFileHeader.SortOrder.queryname + " but got " + header.getSortOrder());
            return -1;
        }
        JVarkitVersion.getInstance().addMetaData(this, header);
        samFileWriter = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        final CloseableIterator<SAMRecord> iter = samReader.iterator();
        final Comparator<SAMRecord> readNameCmp = ReadNameSortMethod.picard.get();
        final Iterator<List<SAMRecord>> eq_range = new EqualRangeIterator<>(iter, readNameCmp);
        final Predicate<SAMRecord> noCigarOrNoHardClip = R -> R.getCigar() == null || R.getCigar().getCigarElements().stream().noneMatch(C -> C.getOperator().equals(CigarOperator.H));
        while (eq_range.hasNext()) {
            final List<SAMRecord> records = eq_range.next();
            for (int choice = 0; choice < 3; ++choice) {
                final Predicate<SAMRecord> selectRead;
                switch(choice) {
                    case 0:
                        selectRead = R -> !R.getReadPairedFlag();
                        break;
                    case 1:
                        selectRead = R -> R.getReadPairedFlag() && R.getFirstOfPairFlag();
                        break;
                    case 2:
                        selectRead = R -> R.getReadPairedFlag() && R.getSecondOfPairFlag();
                        break;
                    default:
                        throw new IllegalStateException();
                }
                // get the strand+ SEQUENCE string
                final String unclippedseq = records.stream().filter(selectRead).filter(noCigarOrNoHardClip).filter(R -> !R.getReadString().equals(SAMRecord.NULL_SEQUENCE_STRING)).map(R -> {
                    if (R.getReadNegativeStrandFlag()) {
                        return SequenceUtil.reverseComplement(R.getReadString());
                    } else {
                        return R.getReadString();
                    }
                }).findFirst().orElse(null);
                // get the strand+ QUAL string
                final String unclippedqual = records.stream().filter(selectRead).filter(noCigarOrNoHardClip).filter(R -> !R.getBaseQualityString().equals(SAMRecord.NULL_QUALS_STRING)).map(R -> {
                    if (R.getReadNegativeStrandFlag()) {
                        return new StringBuilder(R.getBaseQualityString()).reverse().toString();
                    } else {
                        return R.getBaseQualityString();
                    }
                }).findFirst().orElse(null);
                // loop over the record and fix
                for (final SAMRecord rec : records) {
                    if (!selectRead.test(rec))
                        continue;
                    String clippedseq = unclippedseq;
                    String clippedqual = unclippedqual;
                    if (rec.getReadUnmappedFlag() && rec.getCigar() != null && rec.getCigar().numCigarElements() > 1) {
                        final Cigar cigar = rec.getCigar();
                        CigarElement ce = cigar.getLastCigarElement();
                        if (ce.getOperator().equals(CigarOperator.H)) {
                            clippedseq = clippedseq.substring(0, clippedseq.length() - ce.getLength());
                            clippedqual = clippedqual.substring(0, clippedqual.length() - ce.getLength());
                        }
                        ce = cigar.getFirstCigarElement();
                        if (ce.getOperator().equals(CigarOperator.H)) {
                            clippedseq = clippedseq.substring(ce.getLength());
                            clippedqual = clippedqual.substring(ce.getLength());
                        }
                    }
                    if (!StringUtil.isBlank(clippedseq) && rec.getReadString().equals(SAMRecord.NULL_SEQUENCE_STRING)) {
                        if (rec.getReadNegativeStrandFlag()) {
                            rec.setReadString(SequenceUtil.reverseComplement(clippedseq));
                        } else {
                            rec.setReadString(clippedseq);
                        }
                        ++count_rescued_seq;
                    }
                    if (!StringUtil.isBlank(clippedqual) && rec.getBaseQualityString().equals(SAMRecord.NULL_QUALS_STRING)) {
                        if (rec.getReadNegativeStrandFlag()) {
                            rec.setBaseQualityString(new StringBuilder(clippedqual).reverse().toString());
                        } else {
                            rec.setBaseQualityString(clippedqual);
                        }
                        ++count_rescued_qual;
                    }
                }
            }
            for (final SAMRecord R : records) samFileWriter.addAlignment(R);
        }
        CloserUtil.close(eq_range);
        iter.close();
        samFileWriter.close();
        samFileWriter = null;
        samReader.close();
        samReader = null;
        LOG.info("Done : rescued seq: " + count_rescued_seq + " rescued qual: " + count_rescued_qual);
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(samReader);
        CloserUtil.close(samFileWriter);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) ReadNameSortMethod(com.github.lindenb.jvarkit.util.samtools.ReadNameSortMethod) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) ParametersDelegate(com.beust.jcommander.ParametersDelegate) StringUtil(htsjdk.samtools.util.StringUtil) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Iterator(java.util.Iterator) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Comparator(java.util.Comparator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 38 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class VCFShuffle method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
    SortingCollection<RLine> shuffled = null;
    try {
        final Random random = new Random(this.seed);
        final VCFHeader header = in.getHeader();
        final VCFEncoder vcfEncoder = new VCFEncoder(header, false, false);
        shuffled = SortingCollection.newInstance(RLine.class, new RLineCodec(), (o1, o2) -> {
            final int i = Long.compare(o1.rand, o2.rand);
            if (i != 0)
                return i;
            return o1.line.compareTo(o2.line);
        }, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        shuffled.setDestructiveIteration(true);
        while (in.hasNext()) {
            final RLine rLine = new RLine();
            rLine.rand = random.nextLong();
            rLine.line = vcfEncoder.encode(in.next());
            shuffled.add(rLine);
        }
        shuffled.doneAdding();
        JVarkitVersion.getInstance().addMetaData(this, header);
        out.writeHeader(header);
        final VCFCodec vcfCodec = new VCFCodec();
        vcfCodec.setVCFHeader(header, VCFHeaderVersion.VCF4_3);
        try (final CloseableIterator<RLine> iter = shuffled.iterator()) {
            while (iter.hasNext()) {
                final VariantContext ctx = vcfCodec.decode(iter.next().line);
                out.add(ctx);
            }
        }
        shuffled.cleanup();
        shuffled = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        if (shuffled != null)
            shuffled.cleanup();
        CloserUtil.close(shuffled);
    }
}
Also used : DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFIterator(htsjdk.variant.vcf.VCFIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) Random(java.util.Random) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) VCFHeaderVersion(htsjdk.variant.vcf.VCFHeaderVersion) EOFException(java.io.EOFException) ParametersDelegate(com.beust.jcommander.ParametersDelegate) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFCodec(htsjdk.variant.vcf.VCFCodec) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFCodec(htsjdk.variant.vcf.VCFCodec) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) Random(java.util.Random) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 39 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class SamReadLengthDistribution method openSamIterator.

/**
 * create input SAMRecord iterator
 */
private CloseableIterator<SAMRecord> openSamIterator(final SamReader sr) {
    if (this.regionFiles != null) {
        this.regionFiles.dictionary(SequenceDictionaryUtils.extractRequired(sr.getFileHeader()));
        if (!sr.hasIndex()) {
            final List<Interval> L = this.regionFiles.stream().map(x -> new Interval(x)).collect(Collectors.toList());
            final SAMRecordIterator st0 = sr.iterator();
            return new FilteringSamIterator(st0, new IntervalFilter(L, sr.getFileHeader()));
        } else {
            return sr.query(this.regionFiles.optimizedQueryIntervals(), false);
        }
    }
    return sr.iterator();
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IntervalFilter(htsjdk.samtools.filter.IntervalFilter) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) FilteringSamIterator(htsjdk.samtools.filter.FilteringSamIterator) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) TreeMap(java.util.TreeMap) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) IntervalFilter(htsjdk.samtools.filter.IntervalFilter) FilteringSamIterator(htsjdk.samtools.filter.FilteringSamIterator) Interval(htsjdk.samtools.util.Interval)

Example 40 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class BamStage method reloadData.

@Override
void reloadData() {
    updateStatusBar(AlertType.NONE, "");
    final int max_items = super.maxItemsLimitSpinner.getValue();
    final List<SAMRecord> L = new ArrayList<SAMRecord>(max_items);
    final String location = this.gotoField.getText().trim();
    final CloseableIterator<SAMRecord> iter;
    final java.util.function.Predicate<SAMRecord> recFilter = makeFlagPredicate();
    Long canvasFirstRecordGenomicIndex = null;
    Long canvasLastRecordGenomicIndex = null;
    try {
        if (location.isEmpty()) {
            iter = this.getBamFile().iterator();
        } else if (location.equalsIgnoreCase("unmapped")) {
            iter = this.getBamFile().queryUnmapped();
        } else {
            final Interval interval = parseInterval(location);
            if (interval == null) {
                iter = null;
            } else {
                iter = this.getBamFile().iterator(interval.getContig(), interval.getStart(), interval.getEnd());
            }
        }
    } catch (final IOException err) {
        err.printStackTrace();
        JfxNgs.showExceptionDialog(this, err);
        return;
    }
    Optional<BamJavascripFilter> bamjsfilter = Optional.empty();
    if (this.owner.javascriptCompiler.isPresent() && !this.javascriptArea.getText().trim().isEmpty()) {
        try {
            bamjsfilter = Optional.of(new BamJavascripFilter(this.getBamFile().getHeader(), Optional.of(this.owner.javascriptCompiler.get().compile(this.javascriptArea.getText()))));
        } catch (final Exception err) {
            LOG.warning(err.getMessage());
            updateStatusBar(AlertType.ERROR, err);
            bamjsfilter = Optional.empty();
        }
    }
    final Map<ContigPos, Pileup> pos2pileup = new TreeMap<>();
    final Function<ContigPos, Pileup> getpileup = new Function<ContigPos, Pileup>() {

        @Override
        public Pileup apply(ContigPos t) {
            Pileup p = pos2pileup.get(t);
            if (p == null) {
                p = new Pileup(t.contig, t.position);
                pos2pileup.put(t, p);
            }
            return p;
        }
    };
    int count_items = 0;
    while (iter != null && iter.hasNext() && count_items < max_items) {
        final SAMRecord rec = iter.next();
        ++count_items;
        if (bamjsfilter.isPresent()) {
            if (bamjsfilter.get().eval(rec) == null)
                continue;
        }
        if (!recFilter.test(rec))
            continue;
        L.add(rec);
        /* get bounds for canvas genmic browser */
        if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
            final long endIndex = convertContigPosToGenomicIndex(rec.getContig(), rec.getUnclippedEnd());
            if (canvasFirstRecordGenomicIndex == null) {
                canvasFirstRecordGenomicIndex = convertContigPosToGenomicIndex(rec.getContig(), rec.getUnclippedStart());
                canvasLastRecordGenomicIndex = endIndex;
            } else if (canvasLastRecordGenomicIndex < endIndex) {
                canvasLastRecordGenomicIndex = endIndex;
            }
        }
        /* FILL pileup */
        if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
            int refpos = rec.getUnclippedStart();
            int readpos = 0;
            final byte[] bases = rec.getReadBases();
            final byte[] quals = rec.getOriginalBaseQualities();
            /**
             * function getting the ith base
             */
            final Function<Integer, Character> getBaseAt = new Function<Integer, Character>() {

                @Override
                public Character apply(Integer readPos) {
                    char c;
                    if (bases == null || bases.length <= readPos) {
                        return '?';
                    } else {
                        c = (char) bases[readPos];
                    }
                    c = (rec.getReadNegativeStrandFlag() ? Character.toLowerCase(c) : Character.toUpperCase(c));
                    return c;
                }
            };
            /**
             * function getting the ith base quality
             */
            final Function<Integer, Character> getQualAt = new Function<Integer, Character>() {

                @Override
                public Character apply(Integer readPos) {
                    char c;
                    if (quals == null || quals.length <= readPos) {
                        return '#';
                    } else {
                        c = (char) quals[readPos];
                    }
                    return c;
                }
            };
            for (final CigarElement ce : rec.getCigar()) {
                switch(ce.getOperator()) {
                    case P:
                        break;
                    case N:
                    case D:
                        {
                            refpos += ce.getLength();
                            break;
                        }
                    case H:
                        {
                            for (int i = 0; i < ce.getLength(); ++i) {
                                final Pileup p = getpileup.apply(new ContigPos(rec.getContig(), refpos));
                                p.watch('-', '#', ce.getOperator());
                                ++refpos;
                            }
                            break;
                        }
                    case I:
                        {
                            for (int i = 0; i < ce.getLength(); ++i) {
                                final Pileup p = getpileup.apply(new ContigPos(rec.getContig(), refpos));
                                p.watch('<', getQualAt.apply(readpos), ce.getOperator());
                                readpos++;
                            }
                            break;
                        }
                    case S:
                        for (int i = 0; i < ce.getLength(); ++i) {
                            final Pileup p = getpileup.apply(new ContigPos(rec.getContig(), refpos));
                            p.watch('-', getQualAt.apply(readpos), ce.getOperator());
                            ++readpos;
                            ++refpos;
                        }
                        break;
                    case EQ:
                    case X:
                    case M:
                        for (int i = 0; i < ce.getLength(); ++i) {
                            final Pileup p = getpileup.apply(new ContigPos(rec.getContig(), refpos));
                            p.watch(getBaseAt.apply(readpos), getQualAt.apply(readpos), ce.getOperator());
                            ++readpos;
                            ++refpos;
                        }
                        break;
                }
            }
        }
    }
    if (iter != null)
        iter.close();
    this.canvasScrolVInCoverage.setMin(0);
    final int max_depth = pos2pileup.values().stream().map(P -> P.depth()).max((A, B) -> (A.compareTo(B))).orElse(0);
    this.canvasScrolVInCoverage.setMax(max_depth + 1);
    this.canvasScrolVInCoverage.setValue(0);
    this.recordTable.getItems().setAll(L);
    this.pileupTable.getItems().setAll(pos2pileup.values());
    if (canvasFirstRecordGenomicIndex != null && canvasLastRecordGenomicIndex != null && canvasFirstRecordGenomicIndex.longValue() < canvasLastRecordGenomicIndex.longValue()) {
        this.canvasScrollHGenomicLoc.setMin(canvasFirstRecordGenomicIndex);
        this.canvasScrollHGenomicLoc.setMax(canvasLastRecordGenomicIndex);
        // this.canvasScrollHGenomicLoc.setUnitIncrement(1);
        // this.canvasScrollHGenomicLoc.setBlockIncrement(Math.max(1.0,Math.min( canvasLastRecordGenomicIndex-canvasFirstRecordGenomicIndex, this.canvas.getWidth())));
        this.canvasScrollHGenomicLoc.setValue(canvasFirstRecordGenomicIndex);
    } else {
        this.canvasScrollHGenomicLoc.setMin(0);
        this.canvasScrollHGenomicLoc.setMax(0);
        this.canvasScrollHGenomicLoc.setValue(0);
        this.canvasScrollHGenomicLoc.setBlockIncrement(0);
        this.canvasScrollHGenomicLoc.setUnitIncrement(1);
    }
    if (!this.recordTable.getItems().isEmpty()) {
        final int last_index = this.recordTable.getItems().size() - 1;
        if (!this.recordTable.getItems().get(0).getReadUnmappedFlag() && !this.recordTable.getItems().get(last_index).getReadUnmappedFlag()) {
            super.seqDictionaryCanvas.setItemsInterval(new ContigPos(this.recordTable.getItems().get(0).getContig(), this.recordTable.getItems().get(0).getStart()), new ContigPos(this.recordTable.getItems().get(last_index).getContig(), this.recordTable.getItems().get(last_index).getEnd()));
            if (this.recordTable.getItems().get(0).getContig().equals(this.recordTable.getItems().get(last_index).getContig())) {
                this.gotoField.setText(this.recordTable.getItems().get(0).getContig() + ":" + this.recordTable.getItems().get(0).getStart() + "-" + this.recordTable.getItems().get(last_index).getEnd());
            }
        } else {
            super.seqDictionaryCanvas.setItemsInterval(null, null);
        }
    } else {
        super.seqDictionaryCanvas.setItemsInterval(null, null);
    }
    /* show hide columns for paired end data if no paired data found */
    {
        final boolean contains_paired = this.recordTable.getItems().stream().anyMatch(S -> S.getReadPairedFlag());
        for (final TableColumn<SAMRecord, ?> tc : this.pairedEndColumns) tc.setVisible(contains_paired);
    }
    repaintCanvas();
}
Also used : Arrays(java.util.Arrays) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarOperator(htsjdk.samtools.CigarOperator) ChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ChartFactory) VariantContextChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantContextChartFactory) ScrollPane(javafx.scene.control.ScrollPane) TabPane(javafx.scene.control.TabPane) Map(java.util.Map) CigarOpPerPositionChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.CigarOpPerPositionChartFactory) ScriptException(javax.script.ScriptException) CloserUtil(htsjdk.samtools.util.CloserUtil) Rectangle2D(javafx.geometry.Rectangle2D) SplitPane(javafx.scene.control.SplitPane) GraphicsContext(javafx.scene.canvas.GraphicsContext) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Event(javafx.event.Event) Set(java.util.Set) SAMFileWriter(htsjdk.samtools.SAMFileWriter) Screen(javafx.stage.Screen) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue) FastqRecord(htsjdk.samtools.fastq.FastqRecord) Platform(javafx.application.Platform) Separator(javafx.scene.control.Separator) Stream(java.util.stream.Stream) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) FlowPane(javafx.scene.layout.FlowPane) SAMFlag(htsjdk.samtools.SAMFlag) BorderPane(javafx.scene.layout.BorderPane) SamFlagsChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.SamFlagsChartFactory) CloseableIterator(htsjdk.samtools.util.CloseableIterator) FXCollections(javafx.collections.FXCollections) LogCloseableIterator(com.github.lindenb.jvarkit.tools.vcfviewgui.NgsStage.LogCloseableIterator) TextFlow(javafx.scene.text.TextFlow) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Color(javafx.scene.paint.Color) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) StringWriter(java.io.StringWriter) CheckBox(javafx.scene.control.CheckBox) IOException(java.io.IOException) File(java.io.File) Menu(javafx.scene.control.Menu) FileChooser(javafx.stage.FileChooser) SamInputResource(htsjdk.samtools.SamInputResource) TreeMap(java.util.TreeMap) Tab(javafx.scene.control.Tab) CompiledScript(javax.script.CompiledScript) ObservableValue(javafx.beans.value.ObservableValue) EventHandler(javafx.event.EventHandler) Button(javafx.scene.control.Button) CigarElement(htsjdk.samtools.CigarElement) CheckMenuItem(javafx.scene.control.CheckMenuItem) OverrunStyle(javafx.scene.control.OverrunStyle) VBox(javafx.scene.layout.VBox) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BasesPerPositionChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.BasesPerPositionChartFactory) ComboBox(javafx.scene.control.ComboBox) AlertType(javafx.scene.control.Alert.AlertType) ContextMenu(javafx.scene.control.ContextMenu) WindowEvent(javafx.stage.WindowEvent) TableView(javafx.scene.control.TableView) ReadLengthChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ReadLengthChartFactory) Orientation(javafx.geometry.Orientation) Alert(javafx.scene.control.Alert) MenuItem(javafx.scene.control.MenuItem) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Font(javafx.scene.text.Font) Chart(javafx.scene.chart.Chart) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Text(javafx.scene.text.Text) SimpleBindings(javax.script.SimpleBindings) QualityPerPositionChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.QualityPerPositionChartFactory) List(java.util.List) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) FastqReader(htsjdk.samtools.fastq.FastqReader) ReadQualityChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ReadQualityChartFactory) Cigar(htsjdk.samtools.Cigar) Scene(javafx.scene.Scene) SequenceUtil(htsjdk.samtools.util.SequenceUtil) SAMUtils(htsjdk.samtools.SAMUtils) TextArea(javafx.scene.control.TextArea) ButtonType(javafx.scene.control.ButtonType) HashMap(java.util.HashMap) GCPercentChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.GCPercentChartFactory) MapqChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.MapqChartFactory) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) TableColumn(javafx.scene.control.TableColumn) HashSet(java.util.HashSet) SAMTag(htsjdk.samtools.SAMTag) Interval(htsjdk.samtools.util.Interval) TableCell(javafx.scene.control.TableCell) Insets(javafx.geometry.Insets) Tooltip(javafx.scene.control.Tooltip) Locatable(htsjdk.samtools.util.Locatable) Label(javafx.scene.control.Label) MenuBar(javafx.scene.control.MenuBar) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JfxHershey(com.github.lindenb.jvarkit.util.hershey.JfxHershey) ReadChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ReadChartFactory) SamReader(htsjdk.samtools.SamReader) ScrollEvent(javafx.scene.input.ScrollEvent) ActionEvent(javafx.event.ActionEvent) Stage(javafx.stage.Stage) SpinnerValueFactory(javafx.scene.control.SpinnerValueFactory) ChangeListener(javafx.beans.value.ChangeListener) ExtensionFilter(javafx.stage.FileChooser.ExtensionFilter) Collections(java.util.Collections) ScrollBar(javafx.scene.control.ScrollBar) ArrayList(java.util.ArrayList) Function(java.util.function.Function) IOException(java.io.IOException) TreeMap(java.util.TreeMap) CigarElement(htsjdk.samtools.CigarElement) TableColumn(javafx.scene.control.TableColumn) ScriptException(javax.script.ScriptException) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) Interval(htsjdk.samtools.util.Interval)

Aggregations

CloseableIterator (htsjdk.samtools.util.CloseableIterator)103 List (java.util.List)86 Logger (com.github.lindenb.jvarkit.util.log.Logger)85 Parameter (com.beust.jcommander.Parameter)82 Program (com.github.lindenb.jvarkit.util.jcommander.Program)78 ArrayList (java.util.ArrayList)73 Collectors (java.util.stream.Collectors)71 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)69 Path (java.nio.file.Path)69 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)66 CloserUtil (htsjdk.samtools.util.CloserUtil)64 Set (java.util.Set)64 VCFHeader (htsjdk.variant.vcf.VCFHeader)59 VariantContext (htsjdk.variant.variantcontext.VariantContext)54 IOException (java.io.IOException)53 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)51 SAMRecord (htsjdk.samtools.SAMRecord)51 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)51 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)50 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)49