Search in sources :

Example 6 with ProgressLogger

use of htsjdk.samtools.util.ProgressLogger in project gridss by PapenfussLab.

the class SubsetToMissing method doWork.

@Override
protected int doWork() {
    long stop = Long.MAX_VALUE;
    if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
        stop = STOP_AFTER;
    }
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    if (TMP_DIR == null || TMP_DIR.size() == 0) {
        TMP_DIR = Lists.newArrayList(new File("."));
    }
    SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
    SamReader input = factory.open(INPUT);
    Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
    SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
    LongSet hashtable;
    if (PREALLOCATE != null) {
        log.info("Preallocating hash table");
        hashtable = new LongOpenHashBigSet(PREALLOCATE);
    } else {
        hashtable = new LongOpenHashBigSet();
    }
    for (File file : LOOKUP) {
        log.info("Loading lookup hashes for " + file.getAbsolutePath());
        SamReader lookup = factory.open(file);
        AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
        File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
        if (cache.exists()) {
            log.info("Loading lookup hashes from cache");
            long n = stop;
            DataInputStream dis = null;
            try {
                long loadCount = 0;
                dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
                while (n-- > 0) {
                    hashtable.add(dis.readLong());
                    if (loadCount % 10000000 == 0) {
                        log.info(String.format("Loaded %d from cache", loadCount));
                    }
                }
            } catch (EOFException e) {
                try {
                    if (dis != null)
                        dis.close();
                } catch (IOException e1) {
                    log.error(e1);
                }
            } catch (IOException e) {
                log.error(e);
            }
        } else {
            long n = stop;
            ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
            try {
                DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
                while (loggedit.hasNext() && n-- > 0) {
                    long recordhash = hash(loggedit.next());
                    hashtable.add(recordhash);
                    dos.writeLong(recordhash);
                }
                dos.close();
            } catch (Exception e) {
                log.error(e, "Failed to load lookup. Running with partial results");
            }
            loggedit.close();
        }
        it.close();
    }
    long filtered = 0;
    log.info("Processing input");
    intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
    long n = stop;
    while (intputit.hasNext() && n-- > 0) {
        SAMRecord r = intputit.next();
        if (!hashtable.contains(hash(r))) {
            out.addAlignment(r);
        } else {
            filtered++;
            if (filtered % 1000000 == 0) {
                log.info(String.format("Filtered %d reads", filtered));
            }
        }
    }
    log.info("Closing output");
    out.close();
    return 0;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) DataOutputStream(java.io.DataOutputStream) LongSet(it.unimi.dsi.fastutil.longs.LongSet) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(htsjdk.samtools.util.ProgressLogger) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) SamReader(htsjdk.samtools.SamReader) LongOpenHashBigSet(it.unimi.dsi.fastutil.longs.LongOpenHashBigSet) BufferedInputStream(java.io.BufferedInputStream) SAMRecord(htsjdk.samtools.SAMRecord) FileOutputStream(java.io.FileOutputStream) EOFException(java.io.EOFException) ProgressLoggingSAMRecordIterator(au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 7 with ProgressLogger

use of htsjdk.samtools.util.ProgressLogger in project gridss by PapenfussLab.

the class AssemblyEvidenceSource method getAllAssemblies_single_threaded.

@SuppressWarnings("unused")
private Iterator<SAMRecord> getAllAssemblies_single_threaded() {
    ProgressLogger progressLog = new ProgressLogger(log);
    List<Iterator<SAMRecord>> list = new ArrayList<>();
    for (BreakendDirection direction : BreakendDirection.values()) {
        CloseableIterator<DirectedEvidence> it = mergedIterator(source, false);
        Iterator<DirectedEvidence> throttledIt = throttled(it);
        ProgressLoggingDirectedEvidenceIterator<DirectedEvidence> loggedIt = new ProgressLoggingDirectedEvidenceIterator<>(getContext(), throttledIt, progressLog);
        Iterator<SAMRecord> evidenceIt = new PositionalAssembler(getContext(), this, new SequentialIdGenerator("asm"), loggedIt, direction);
        list.add(evidenceIt);
    }
    return Iterators.concat(list.iterator());
}
Also used : PositionalAssembler(au.edu.wehi.idsv.debruijn.positional.PositionalAssembler) ArrayList(java.util.ArrayList) ProgressLogger(htsjdk.samtools.util.ProgressLogger) SAMRecord(htsjdk.samtools.SAMRecord) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Iterator(java.util.Iterator)

Aggregations

ProgressLogger (htsjdk.samtools.util.ProgressLogger)7 SAMRecord (htsjdk.samtools.SAMRecord)6 AsyncBufferedIterator (au.edu.wehi.idsv.util.AsyncBufferedIterator)4 SamReader (htsjdk.samtools.SamReader)4 ArrayList (java.util.ArrayList)4 ProgressLoggingSAMRecordIterator (au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator)2 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)2 SamReaderFactory (htsjdk.samtools.SamReaderFactory)2 File (java.io.File)2 IOException (java.io.IOException)2 DirectedBreakpoint (au.edu.wehi.idsv.DirectedBreakpoint)1 IdsvVariantContext (au.edu.wehi.idsv.IdsvVariantContext)1 PositionalAssembler (au.edu.wehi.idsv.debruijn.positional.PositionalAssembler)1 ReferenceLookup (au.edu.wehi.idsv.picard.ReferenceLookup)1 TwoBitBufferedReferenceSequenceFile (au.edu.wehi.idsv.picard.TwoBitBufferedReferenceSequenceFile)1 NmTagIterator (au.edu.wehi.idsv.sam.NmTagIterator)1 TemplateTagsIterator (au.edu.wehi.idsv.sam.TemplateTagsIterator)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SortOrder (htsjdk.samtools.SAMFileHeader.SortOrder)1 SAMFileWriter (htsjdk.samtools.SAMFileWriter)1