Search in sources :

Example 1 with ProgressLoggingSAMRecordIterator

use of au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator in project gridss by PapenfussLab.

the class ReadsToBedpe method doWork.

@Override
protected int doWork() {
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    validateParameters();
    SamReaderFactory readerFactory = SamReaderFactory.make();
    try {
        try (SamReader reader = readerFactory.open(INPUT)) {
            SAMFileHeader header = reader.getFileHeader();
            SAMSequenceDictionary dict = header.getSequenceDictionary();
            // ExecutorService threadpool = Executors.newFixedThreadPool(WORKER_THREADS, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("Worker-%d").build());
            try (CloseableIterator<SAMRecord> rawit = new AsyncBufferedIterator<SAMRecord>(reader.iterator(), 3, 64)) {
                ProgressLoggingSAMRecordIterator logit = new ProgressLoggingSAMRecordIterator(rawit, new ProgressLogger(log));
                // ParallelTransformIterator<SAMRecord, List<String>> it = new ParallelTransformIterator<>(logit, r -> asBedPe(dict, r), 16 + 2 * WORKER_THREADS, threadpool);
                Iterator<List<String>> it = Iterators.transform(logit, r -> asBedPe(dict, r));
                int i = 0;
                try (BufferedWriter writer = new BufferedWriter(new FileWriter(OUTPUT))) {
                    while (it.hasNext()) {
                        for (String line : it.next()) {
                            if (line != null) {
                                writer.write(line);
                                writer.write('\n');
                            }
                        }
                        i++;
                    }
                    if (i % 1000 == 0) {
                        writer.flush();
                    }
                }
            }
        }
    } catch (IOException e) {
        log.error(e);
        return -1;
    }
    return 0;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) FileWriter(java.io.FileWriter) AsyncBufferedIterator(au.edu.wehi.idsv.util.AsyncBufferedIterator) ProgressLogger(htsjdk.samtools.util.ProgressLogger) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DirectedBreakpoint(au.edu.wehi.idsv.DirectedBreakpoint) BufferedWriter(java.io.BufferedWriter) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) List(java.util.List) ProgressLoggingSAMRecordIterator(au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 2 with ProgressLoggingSAMRecordIterator

use of au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator in project gridss by PapenfussLab.

the class SubsetToMissing method doWork.

@Override
protected int doWork() {
    long stop = Long.MAX_VALUE;
    if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
        stop = STOP_AFTER;
    }
    log.debug("Setting language-neutral locale");
    java.util.Locale.setDefault(Locale.ROOT);
    if (TMP_DIR == null || TMP_DIR.size() == 0) {
        TMP_DIR = Lists.newArrayList(new File("."));
    }
    SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
    SamReader input = factory.open(INPUT);
    Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
    SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
    LongSet hashtable;
    if (PREALLOCATE != null) {
        log.info("Preallocating hash table");
        hashtable = new LongOpenHashBigSet(PREALLOCATE);
    } else {
        hashtable = new LongOpenHashBigSet();
    }
    for (File file : LOOKUP) {
        log.info("Loading lookup hashes for " + file.getAbsolutePath());
        SamReader lookup = factory.open(file);
        AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
        File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
        if (cache.exists()) {
            log.info("Loading lookup hashes from cache");
            long n = stop;
            DataInputStream dis = null;
            try {
                long loadCount = 0;
                dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
                while (n-- > 0) {
                    hashtable.add(dis.readLong());
                    if (loadCount % 10000000 == 0) {
                        log.info(String.format("Loaded %d from cache", loadCount));
                    }
                }
            } catch (EOFException e) {
                try {
                    if (dis != null)
                        dis.close();
                } catch (IOException e1) {
                    log.error(e1);
                }
            } catch (IOException e) {
                log.error(e);
            }
        } else {
            long n = stop;
            ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
            try {
                DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
                while (loggedit.hasNext() && n-- > 0) {
                    long recordhash = hash(loggedit.next());
                    hashtable.add(recordhash);
                    dos.writeLong(recordhash);
                }
                dos.close();
            } catch (Exception e) {
                log.error(e, "Failed to load lookup. Running with partial results");
            }
            loggedit.close();
        }
        it.close();
    }
    long filtered = 0;
    log.info("Processing input");
    intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
    long n = stop;
    while (intputit.hasNext() && n-- > 0) {
        SAMRecord r = intputit.next();
        if (!hashtable.contains(hash(r))) {
            out.addAlignment(r);
        } else {
            filtered++;
            if (filtered % 1000000 == 0) {
                log.info(String.format("Filtered %d reads", filtered));
            }
        }
    }
    log.info("Closing output");
    out.close();
    return 0;
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) DataOutputStream(java.io.DataOutputStream) LongSet(it.unimi.dsi.fastutil.longs.LongSet) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(htsjdk.samtools.util.ProgressLogger) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) SamReader(htsjdk.samtools.SamReader) LongOpenHashBigSet(it.unimi.dsi.fastutil.longs.LongOpenHashBigSet) BufferedInputStream(java.io.BufferedInputStream) SAMRecord(htsjdk.samtools.SAMRecord) FileOutputStream(java.io.FileOutputStream) EOFException(java.io.EOFException) ProgressLoggingSAMRecordIterator(au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Aggregations

ProgressLoggingSAMRecordIterator (au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator)2 SAMRecord (htsjdk.samtools.SAMRecord)2 SamReader (htsjdk.samtools.SamReader)2 SamReaderFactory (htsjdk.samtools.SamReaderFactory)2 ProgressLogger (htsjdk.samtools.util.ProgressLogger)2 IOException (java.io.IOException)2 DirectedBreakpoint (au.edu.wehi.idsv.DirectedBreakpoint)1 AsyncBufferedIterator (au.edu.wehi.idsv.util.AsyncBufferedIterator)1 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMFileWriter (htsjdk.samtools.SAMFileWriter)1 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 LongOpenHashBigSet (it.unimi.dsi.fastutil.longs.LongOpenHashBigSet)1 LongSet (it.unimi.dsi.fastutil.longs.LongSet)1 BufferedInputStream (java.io.BufferedInputStream)1 BufferedOutputStream (java.io.BufferedOutputStream)1 BufferedWriter (java.io.BufferedWriter)1 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 EOFException (java.io.EOFException)1