use of au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator in project gridss by PapenfussLab.
the class ReadsToBedpe method doWork.
@Override
protected int doWork() {
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
validateParameters();
SamReaderFactory readerFactory = SamReaderFactory.make();
try {
try (SamReader reader = readerFactory.open(INPUT)) {
SAMFileHeader header = reader.getFileHeader();
SAMSequenceDictionary dict = header.getSequenceDictionary();
// ExecutorService threadpool = Executors.newFixedThreadPool(WORKER_THREADS, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("Worker-%d").build());
try (CloseableIterator<SAMRecord> rawit = new AsyncBufferedIterator<SAMRecord>(reader.iterator(), 3, 64)) {
ProgressLoggingSAMRecordIterator logit = new ProgressLoggingSAMRecordIterator(rawit, new ProgressLogger(log));
// ParallelTransformIterator<SAMRecord, List<String>> it = new ParallelTransformIterator<>(logit, r -> asBedPe(dict, r), 16 + 2 * WORKER_THREADS, threadpool);
Iterator<List<String>> it = Iterators.transform(logit, r -> asBedPe(dict, r));
int i = 0;
try (BufferedWriter writer = new BufferedWriter(new FileWriter(OUTPUT))) {
while (it.hasNext()) {
for (String line : it.next()) {
if (line != null) {
writer.write(line);
writer.write('\n');
}
}
i++;
}
if (i % 1000 == 0) {
writer.flush();
}
}
}
}
} catch (IOException e) {
log.error(e);
return -1;
}
return 0;
}
use of au.edu.wehi.idsv.ProgressLoggingSAMRecordIterator in project gridss by PapenfussLab.
the class SubsetToMissing method doWork.
@Override
protected int doWork() {
long stop = Long.MAX_VALUE;
if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
stop = STOP_AFTER;
}
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
if (TMP_DIR == null || TMP_DIR.size() == 0) {
TMP_DIR = Lists.newArrayList(new File("."));
}
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
SamReader input = factory.open(INPUT);
Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
LongSet hashtable;
if (PREALLOCATE != null) {
log.info("Preallocating hash table");
hashtable = new LongOpenHashBigSet(PREALLOCATE);
} else {
hashtable = new LongOpenHashBigSet();
}
for (File file : LOOKUP) {
log.info("Loading lookup hashes for " + file.getAbsolutePath());
SamReader lookup = factory.open(file);
AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
if (cache.exists()) {
log.info("Loading lookup hashes from cache");
long n = stop;
DataInputStream dis = null;
try {
long loadCount = 0;
dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
while (n-- > 0) {
hashtable.add(dis.readLong());
if (loadCount % 10000000 == 0) {
log.info(String.format("Loaded %d from cache", loadCount));
}
}
} catch (EOFException e) {
try {
if (dis != null)
dis.close();
} catch (IOException e1) {
log.error(e1);
}
} catch (IOException e) {
log.error(e);
}
} else {
long n = stop;
ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
try {
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
while (loggedit.hasNext() && n-- > 0) {
long recordhash = hash(loggedit.next());
hashtable.add(recordhash);
dos.writeLong(recordhash);
}
dos.close();
} catch (Exception e) {
log.error(e, "Failed to load lookup. Running with partial results");
}
loggedit.close();
}
it.close();
}
long filtered = 0;
log.info("Processing input");
intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
long n = stop;
while (intputit.hasNext() && n-- > 0) {
SAMRecord r = intputit.next();
if (!hashtable.contains(hash(r))) {
out.addAlignment(r);
} else {
filtered++;
if (filtered % 1000000 == 0) {
log.info(String.format("Filtered %d reads", filtered));
}
}
}
log.info("Closing output");
out.close();
return 0;
}
Aggregations