Search in sources :

Example 1 with SamRecordIntervalIteratorFactory

use of htsjdk.samtools.util.SamRecordIntervalIteratorFactory in project ASCIIGenome by dariober.

the class SamLocusIterator method iterator.

public Iterator<LocusInfo> iterator() {
    if (samIterator != null) {
        throw new IllegalStateException("Cannot call iterator() more than once on SamLocusIterator");
    }
    CloseableIterator<SAMRecord> tempIterator;
    if (intervals != null) {
        tempIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(samReader, intervals, useIndex);
    } else {
        tempIterator = samReader.iterator();
    }
    if (samFilters != null) {
        tempIterator = new FilteringIterator(tempIterator, new AggregateFilter(samFilters));
    }
    samIterator = new PeekableIterator<SAMRecord>(tempIterator);
    return this;
}
Also used : AggregateFilter(htsjdk.samtools.filter.AggregateFilter) SAMRecord(htsjdk.samtools.SAMRecord) FilteringIterator(htsjdk.samtools.filter.FilteringIterator) SamRecordIntervalIteratorFactory(htsjdk.samtools.util.SamRecordIntervalIteratorFactory)

Example 2 with SamRecordIntervalIteratorFactory

use of htsjdk.samtools.util.SamRecordIntervalIteratorFactory in project jvarkit by lindenb.

the class ImpactOfDuplicates method doWork.

@Override
public int doWork(final List<String> args) {
    CloseableIterator<Duplicate> dupIter = null;
    final List<File> INPUT = args.stream().map(S -> new File(S)).collect(Collectors.toList());
    try {
        this.duplicates = SortingCollection.newInstance(Duplicate.class, new DuplicateCodec(), new Comparator<Duplicate>() {

            @Override
            public int compare(Duplicate o1, Duplicate o2) {
                return o1.compareTo(o2);
            }
        }, this.sortingCollectionArgs.getMaxRecordsInRam(), this.sortingCollectionArgs.getTmpPaths());
        for (this.bamIndex = 0; this.bamIndex < INPUT.size(); this.bamIndex++) {
            int prev_tid = -1;
            int prev_pos = -1;
            long nLines = 0L;
            File inFile = INPUT.get(this.bamIndex);
            LOG.info("Processing " + inFile);
            IOUtil.assertFileIsReadable(inFile);
            SamReader samReader = null;
            CloseableIterator<SAMRecord> iter = null;
            try {
                samReader = SamReaderFactory.make().validationStringency(ValidationStringency.LENIENT).open(inFile);
                final SAMFileHeader header = samReader.getFileHeader();
                this.samFileDicts.add(header.getSequenceDictionary());
                if (BEDFILE == null) {
                    iter = samReader.iterator();
                } else {
                    IntervalList intervalList = new IntervalList(header);
                    BufferedReader in = new BufferedReader(new FileReader(BEDFILE));
                    String line = null;
                    while ((line = in.readLine()) != null) {
                        if (line.isEmpty() || line.startsWith("#"))
                            continue;
                        String[] tokens = line.split("[\t]");
                        Interval interval = new Interval(tokens[0], 1 + Integer.parseInt(tokens[1]), Integer.parseInt(tokens[2]));
                        intervalList.add(interval);
                    }
                    in.close();
                    intervalList = intervalList.sorted();
                    List<Interval> uniqueIntervals = IntervalList.getUniqueIntervals(intervalList, false);
                    SamRecordIntervalIteratorFactory sriif = new SamRecordIntervalIteratorFactory();
                    iter = sriif.makeSamRecordIntervalIterator(samReader, uniqueIntervals, false);
                }
                while (iter.hasNext()) {
                    SAMRecord rec = iter.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (!rec.getReadPairedFlag())
                        continue;
                    if (rec.getReferenceIndex() != rec.getMateReferenceIndex())
                        continue;
                    if (!rec.getProperPairFlag())
                        continue;
                    if (!rec.getFirstOfPairFlag())
                        continue;
                    if (prev_tid != -1) {
                        if (prev_tid > rec.getReferenceIndex()) {
                            throw new IOException("Bad sort order from " + rec);
                        } else if (prev_tid == rec.getReferenceIndex() && prev_pos > rec.getAlignmentStart()) {
                            throw new IOException("Bad sort order from " + rec);
                        } else {
                            prev_pos = rec.getAlignmentStart();
                        }
                    } else {
                        prev_tid = rec.getReferenceIndex();
                        prev_pos = -1;
                    }
                    if ((++nLines) % 1000000 == 0) {
                        LOG.info("In " + inFile + " N=" + nLines);
                    }
                    Duplicate dup = new Duplicate();
                    dup.bamIndex = this.bamIndex;
                    dup.pos = Math.min(rec.getAlignmentStart(), rec.getMateAlignmentStart());
                    dup.tid = rec.getReferenceIndex();
                    dup.size = Math.abs(rec.getInferredInsertSize());
                    this.duplicates.add(dup);
                }
            } finally {
                if (iter != null)
                    iter.close();
                if (samReader != null)
                    samReader.close();
            }
            LOG.info("done " + inFile);
        }
        /**
         * loop done, now scan the duplicates
         */
        LOG.info("doneAdding");
        this.duplicates.doneAdding();
        this.out = super.openFileOrStdoutAsPrintStream(outputFile);
        out.print("#INTERVAL\tMAX\tMEAN");
        for (int i = 0; i < INPUT.size(); ++i) {
            out.print('\t');
            out.print(INPUT.get(i));
        }
        out.println();
        dupIter = this.duplicates.iterator();
        while (dupIter.hasNext()) {
            Duplicate dup = dupIter.next();
            if (this.duplicatesBuffer.isEmpty() || dup.compareChromPosSize(this.duplicatesBuffer.get(0)) == 0) {
                this.duplicatesBuffer.add(dup);
            } else {
                dumpDuplicatesBuffer(INPUT);
                this.duplicatesBuffer.add(dup);
            }
        }
        dumpDuplicatesBuffer(INPUT);
        LOG.info("end iterator");
        out.flush();
        out.close();
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        if (dupIter != null)
            dupIter.close();
        LOG.info("cleaning duplicates");
        this.duplicates.cleanup();
    }
    return 0;
}
Also used : RuntimeEOFException(htsjdk.samtools.util.RuntimeEOFException) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) PrintStream(java.io.PrintStream) SortingCollection(htsjdk.samtools.util.SortingCollection) BinaryCodec(htsjdk.samtools.util.BinaryCodec) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IntervalList(htsjdk.samtools.util.IntervalList) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SamRecordIntervalIteratorFactory(htsjdk.samtools.util.SamRecordIntervalIteratorFactory) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Comparator(java.util.Comparator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IOException(java.io.IOException) SamRecordIntervalIteratorFactory(htsjdk.samtools.util.SamRecordIntervalIteratorFactory) RuntimeEOFException(htsjdk.samtools.util.RuntimeEOFException) IOException(java.io.IOException) Comparator(java.util.Comparator) SamReader(htsjdk.samtools.SamReader) IntervalList(htsjdk.samtools.util.IntervalList) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Aggregations

SAMRecord (htsjdk.samtools.SAMRecord)2 SamRecordIntervalIteratorFactory (htsjdk.samtools.util.SamRecordIntervalIteratorFactory)2 Parameter (com.beust.jcommander.Parameter)1 ParametersDelegate (com.beust.jcommander.ParametersDelegate)1 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SamReader (htsjdk.samtools.SamReader)1 SamReaderFactory (htsjdk.samtools.SamReaderFactory)1 ValidationStringency (htsjdk.samtools.ValidationStringency)1 AggregateFilter (htsjdk.samtools.filter.AggregateFilter)1 FilteringIterator (htsjdk.samtools.filter.FilteringIterator)1 BinaryCodec (htsjdk.samtools.util.BinaryCodec)1 CloseableIterator (htsjdk.samtools.util.CloseableIterator)1 IOUtil (htsjdk.samtools.util.IOUtil)1 Interval (htsjdk.samtools.util.Interval)1 IntervalList (htsjdk.samtools.util.IntervalList)1 RuntimeEOFException (htsjdk.samtools.util.RuntimeEOFException)1