use of htsjdk.samtools.SamReaderFactory in project polyGembler by c-zhou.
the class TenXSamtools method runSort.
private void runSort() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(this.bam_in));
final SAMFileHeader sort_header = inputSam.getFileHeader();
switch(this.sort_order) {
case coordinate:
sort_header.setSortOrder(SortOrder.coordinate);
break;
case queryname:
sort_header.setSortOrder(SortOrder.queryname);
break;
case barcode:
sort_header.setSortOrder(SortOrder.unknown);
break;
}
SAMRecordIterator iter = inputSam.iterator();
long record_inCount = 0;
SAMRecord[] buff = new SAMRecord[this.batch_size];
int k = 0;
SAMRecord temp = iter.hasNext() ? iter.next() : null;
this.initial_thread_pool();
while (temp != null) {
buff[k++] = temp;
record_inCount++;
temp = iter.hasNext() ? iter.next() : null;
if (k == this.batch_size || temp == null) {
executor.submit(new Runnable() {
private SAMRecord[] records;
@Override
public void run() {
// TODO Auto-generated method stub
try {
Arrays.sort(records, comprator);
final SAMFileWriter outputSam;
synchronized (lock) {
outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
}
int count = 0;
for (SAMRecord record : records) {
if (record != null) {
count++;
outputSam.addAlignment(record);
}
}
outputSam.close();
synchronized (lock) {
record_count += count;
}
myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(SAMRecord[] buff) {
// TODO Auto-generated method stub
this.records = buff;
return (this);
}
}.init(buff));
k = 0;
buff = new SAMRecord[this.batch_size];
}
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_inCount + " records read from " + this.bam_in);
this.waitFor();
// merge all batches
myLogger.info("Merge " + batch + " files.");
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
final SamReader[] batchSam = new SamReader[batch];
final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
final boolean[] reachFileEnd = new boolean[batch];
final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
for (int i = 0; i != batch; i++) {
batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
iterSam[i] = batchSam[i].iterator();
if (iterSam[i].hasNext())
treeMap.put(iterSam[i].next(), i);
reachFileEnd[i] = !iterSam[i].hasNext();
}
Entry<SAMRecord, Integer> firstEntry;
int bch, nReachFileEnd = 0;
for (boolean b : reachFileEnd) if (b)
nReachFileEnd++;
long record_outCount = 0;
while (!treeMap.isEmpty()) {
firstEntry = treeMap.pollFirstEntry();
outputSam.addAlignment(firstEntry.getKey());
record_outCount++;
bch = firstEntry.getValue();
if (!reachFileEnd[bch]) {
treeMap.put(iterSam[bch].next(), bch);
if (!iterSam[bch].hasNext()) {
reachFileEnd[bch] = true;
nReachFileEnd++;
}
}
if (treeMap.isEmpty() && nReachFileEnd != batch) {
for (int i = 0; i != batch; i++) {
if (!reachFileEnd[i]) {
treeMap.put(iterSam[i].next(), i);
if (!iterSam[i].hasNext()) {
reachFileEnd[i] = true;
nReachFileEnd++;
}
}
}
}
}
try {
outputSam.close();
for (int i = 0; i != batch; i++) {
iterSam[i].close();
batchSam[i].close();
new File(this.bam_out + String.format("%08d", i)).delete();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_outCount + " records written to " + this.bam_out);
}
use of htsjdk.samtools.SamReaderFactory in project polyGembler by c-zhou.
the class AssemblyGraph method validateAssemblyGraph.
private static void validateAssemblyGraph(String query_file, String bam_in) {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
qry_seqs = Sequence.parseFastaFileWithRevCmpAsMap(query_file);
final SamReader in1 = factory.open(new File(bam_in));
SAMRecordIterator iter1 = in1.iterator();
final List<SAMRecord> buffered_records = new ArrayList<SAMRecord>();
SAMRecord tmp_record1 = iter1.hasNext() ? iter1.next() : null;
String record_id = tmp_record1.getReadName();
SAMRecord r1, r2;
int i1, i2;
StringBuilder oos = new StringBuilder();
String source, sink;
final BFSShortestPath<Integer, DefaultWeightedEdge> bfs = new BFSShortestPath<Integer, DefaultWeightedEdge>(assembly_graph);
while (tmp_record1 != null) {
buffered_records.clear();
buffered_records.add(tmp_record1);
while ((tmp_record1 = iter1.hasNext() ? iter1.next() : null) != null && tmp_record1.getReadName().equals(record_id)) {
buffered_records.add(tmp_record1);
}
if (buffered_records.size() == 1) {
if (tmp_record1 != null)
record_id = tmp_record1.getReadName();
continue;
}
// sort records by head clipping size
buffered_records.sort(new Comparator<SAMRecord>() {
@Override
public int compare(SAMRecord r0, SAMRecord r1) {
// TODO Auto-generated method stub
return Integer.compare(head_clip(r0), head_clip(r1));
}
private int head_clip(SAMRecord r) {
CigarElement cigar = r.getReadNegativeStrandFlag() ? r.getCigar().getLastCigarElement() : r.getCigar().getFirstCigarElement();
return cigar.getOperator().isClipping() ? cigar.getLength() : 0;
}
});
// calculate distance for adjacent alignment record pairs
r1 = buffered_records.get(0);
// for writing
oos.setLength(0);
oos.append(record_id);
oos.append("[");
oos.append(buffered_records.size());
oos.append(", ");
oos.append(readLength(r1));
// find reference index
source = r1.getReferenceName() + (r1.getReadNegativeStrandFlag() ? "'" : "");
i1 = contig_coordinate.getKey(source);
oos.append(", ");
oos.append(source);
oos.append("] ");
for (int i = 1; i < buffered_records.size(); i++) {
r2 = buffered_records.get(i);
sink = r2.getReferenceName() + (r2.getReadNegativeStrandFlag() ? "'" : "");
i2 = contig_coordinate.getKey(sink);
oos.append(", (");
GraphPath<Integer, DefaultWeightedEdge> e = bfs.getPath(i1, i2);
oos.append(sink);
oos.append(",");
oos.append(e == null ? "Inf" : e.getLength());
oos.append(",");
oos.append(pathLength(e));
oos.append(")");
i1 = i2;
}
oos.append("\n");
System.out.println(oos.toString());
if (tmp_record1 != null)
record_id = tmp_record1.getReadName();
}
try {
iter1.close();
in1.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of htsjdk.samtools.SamReaderFactory in project gridss by PapenfussLab.
the class ReadsToBedpe method doWork.
@Override
protected int doWork() {
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
validateParameters();
SamReaderFactory readerFactory = SamReaderFactory.make();
try {
try (SamReader reader = readerFactory.open(INPUT)) {
SAMFileHeader header = reader.getFileHeader();
SAMSequenceDictionary dict = header.getSequenceDictionary();
// ExecutorService threadpool = Executors.newFixedThreadPool(WORKER_THREADS, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("Worker-%d").build());
try (CloseableIterator<SAMRecord> rawit = new AsyncBufferedIterator<SAMRecord>(reader.iterator(), 3, 64)) {
ProgressLoggingSAMRecordIterator logit = new ProgressLoggingSAMRecordIterator(rawit, new ProgressLogger(log));
// ParallelTransformIterator<SAMRecord, List<String>> it = new ParallelTransformIterator<>(logit, r -> asBedPe(dict, r), 16 + 2 * WORKER_THREADS, threadpool);
Iterator<List<String>> it = Iterators.transform(logit, r -> asBedPe(dict, r));
int i = 0;
try (BufferedWriter writer = new BufferedWriter(new FileWriter(OUTPUT))) {
while (it.hasNext()) {
for (String line : it.next()) {
if (line != null) {
writer.write(line);
writer.write('\n');
}
}
i++;
}
if (i % 1000 == 0) {
writer.flush();
}
}
}
}
} catch (IOException e) {
log.error(e);
return -1;
}
return 0;
}
use of htsjdk.samtools.SamReaderFactory in project gridss by PapenfussLab.
the class SoftClipsToSplitReads method doWork.
@Override
protected int doWork() {
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
validateParameters();
GenomicProcessingContext pc = new GenomicProcessingContext(getFileSystemContext(), REFERENCE_SEQUENCE, getReference());
pc.setCommandLineProgram(this);
pc.setFilterDuplicates(IGNORE_DUPLICATES);
SplitReadRealigner realigner = new SplitReadRealigner(pc);
realigner.setMinSoftClipLength(MIN_CLIP_LENGTH);
realigner.setMinSoftClipQuality(MIN_CLIP_QUAL);
realigner.setProcessSecondaryAlignments(PROCESS_SECONDARY_ALIGNMENTS);
realigner.setWorkerThreads(WORKER_THREADS);
try {
SamReaderFactory readerFactory = SamReaderFactory.make();
SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
if (ALIGNER_STREAMING) {
ExternalProcessStreamingAligner aligner = new ExternalProcessStreamingAligner(readerFactory, ALIGNER_COMMAND_LINE, REFERENCE_SEQUENCE, WORKER_THREADS);
realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
} else {
ExternalProcessFastqAligner aligner = new ExternalProcessFastqAligner(readerFactory, writerFactory, ALIGNER_COMMAND_LINE);
realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
}
} catch (IOException e) {
log.error(e);
return -1;
}
return 0;
}
use of htsjdk.samtools.SamReaderFactory in project gridss by PapenfussLab.
the class SubsetToMissing method doWork.
@Override
protected int doWork() {
long stop = Long.MAX_VALUE;
if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
stop = STOP_AFTER;
}
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
if (TMP_DIR == null || TMP_DIR.size() == 0) {
TMP_DIR = Lists.newArrayList(new File("."));
}
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
SamReader input = factory.open(INPUT);
Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
LongSet hashtable;
if (PREALLOCATE != null) {
log.info("Preallocating hash table");
hashtable = new LongOpenHashBigSet(PREALLOCATE);
} else {
hashtable = new LongOpenHashBigSet();
}
for (File file : LOOKUP) {
log.info("Loading lookup hashes for " + file.getAbsolutePath());
SamReader lookup = factory.open(file);
AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
if (cache.exists()) {
log.info("Loading lookup hashes from cache");
long n = stop;
DataInputStream dis = null;
try {
long loadCount = 0;
dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
while (n-- > 0) {
hashtable.add(dis.readLong());
if (loadCount % 10000000 == 0) {
log.info(String.format("Loaded %d from cache", loadCount));
}
}
} catch (EOFException e) {
try {
if (dis != null)
dis.close();
} catch (IOException e1) {
log.error(e1);
}
} catch (IOException e) {
log.error(e);
}
} else {
long n = stop;
ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
try {
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
while (loggedit.hasNext() && n-- > 0) {
long recordhash = hash(loggedit.next());
hashtable.add(recordhash);
dos.writeLong(recordhash);
}
dos.close();
} catch (Exception e) {
log.error(e, "Failed to load lookup. Running with partial results");
}
loggedit.close();
}
it.close();
}
long filtered = 0;
log.info("Processing input");
intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
long n = stop;
while (intputit.hasNext() && n-- > 0) {
SAMRecord r = intputit.next();
if (!hashtable.contains(hash(r))) {
out.addAlignment(r);
} else {
filtered++;
if (filtered % 1000000 == 0) {
log.info(String.format("Filtered %d reads", filtered));
}
}
}
log.info("Closing output");
out.close();
return 0;
}
Aggregations