use of htsjdk.samtools.SAMFileWriterFactory in project polyGembler by c-zhou.
the class TenXMoleculeStats method runBCSortedStats.
public void runBCSortedStats() {
// TODO Auto-generated method stub
final SamReader inputSam = factory.open(new File(this.in_bam));
final SAMSequenceDictionary seqDic = inputSam.getFileHeader().getSequenceDictionary();
mappedReadsOnChromosome = new long[seqDic.size()];
/**
* try {
* oos = Utils.getGZIPBufferedWriter(out_stats);
* } catch (IOException e1) {
* // TODO Auto-generated catch block
* e1.printStackTrace();
* }
*/
oos = Utils.getBufferedWriter(out_stats + ".txt");
oob = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(out_stats + ".bam"));
SAMRecordIterator iter = inputSam.iterator();
SAMRecord record = iter.next();
String bc_str;
this.initial_thread_pool();
while (record != null) {
// if(mappedReads%1000000==0) myLogger.info(mappedReads+" mapped records processed.");
List<SAMRecord> bc_records = new ArrayList<SAMRecord>();
bc_str = record.getStringAttribute("BX");
this.processRecord(bc_records, record);
while (bc_str == null) {
record = iter.hasNext() ? iter.next() : null;
if (record == null)
break;
this.processRecord(bc_records, record);
bc_str = record.getStringAttribute("BX");
}
if (bc_str == null)
continue;
while (true) {
record = iter.hasNext() ? iter.next() : null;
if (record == null)
break;
if (bc_str.equals(record.getStringAttribute("BX"))) {
this.processRecord(bc_records, record);
} else
break;
}
if (bc_records.isEmpty())
continue;
readsOnBarcode.put(bc_records.get(0).getStringAttribute("BX"), bc_records.size());
executor.submit(new MoleculeConstructor(bc_records));
}
try {
iter.close();
inputSam.close();
this.waitFor();
oos.close();
oob.close();
BufferedWriter bw_summary = Utils.getBufferedWriter(this.out_stats + ".summary");
bw_summary.write("##Reads mapped: " + mappedReads + "\n");
bw_summary.write("##Reads unmapped: " + unmappedReads + "\n");
bw_summary.write("##Reads duplicated: " + duplicatedReads + "\n");
bw_summary.write("##Reads bad pairs: " + badpaired + "\n");
bw_summary.write("##Reads by pseudo-chromosomes:\n");
for (int i = 0; i < mappedReadsOnChromosome.length; i++) {
bw_summary.write(" #" + seqDic.getSequence(i).getSequenceName() + " " + mappedReadsOnChromosome[i] + "\n");
}
bw_summary.write("##Reads by barcodes:\n");
for (Map.Entry<String, Integer> entry : readsOnBarcode.entrySet()) {
bw_summary.write(" #" + entry.getKey() + " " + entry.getValue() + "\n");
}
bw_summary.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info("Completed. " + counter + " bam records processed.");
}
use of htsjdk.samtools.SAMFileWriterFactory in project polyGembler by c-zhou.
the class TenXSamtools method runSort.
private void runSort() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(this.bam_in));
final SAMFileHeader sort_header = inputSam.getFileHeader();
switch(this.sort_order) {
case coordinate:
sort_header.setSortOrder(SortOrder.coordinate);
break;
case queryname:
sort_header.setSortOrder(SortOrder.queryname);
break;
case barcode:
sort_header.setSortOrder(SortOrder.unknown);
break;
}
SAMRecordIterator iter = inputSam.iterator();
long record_inCount = 0;
SAMRecord[] buff = new SAMRecord[this.batch_size];
int k = 0;
SAMRecord temp = iter.hasNext() ? iter.next() : null;
this.initial_thread_pool();
while (temp != null) {
buff[k++] = temp;
record_inCount++;
temp = iter.hasNext() ? iter.next() : null;
if (k == this.batch_size || temp == null) {
executor.submit(new Runnable() {
private SAMRecord[] records;
@Override
public void run() {
// TODO Auto-generated method stub
try {
Arrays.sort(records, comprator);
final SAMFileWriter outputSam;
synchronized (lock) {
outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
}
int count = 0;
for (SAMRecord record : records) {
if (record != null) {
count++;
outputSam.addAlignment(record);
}
}
outputSam.close();
synchronized (lock) {
record_count += count;
}
myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(SAMRecord[] buff) {
// TODO Auto-generated method stub
this.records = buff;
return (this);
}
}.init(buff));
k = 0;
buff = new SAMRecord[this.batch_size];
}
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_inCount + " records read from " + this.bam_in);
this.waitFor();
// merge all batches
myLogger.info("Merge " + batch + " files.");
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
final SamReader[] batchSam = new SamReader[batch];
final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
final boolean[] reachFileEnd = new boolean[batch];
final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
for (int i = 0; i != batch; i++) {
batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
iterSam[i] = batchSam[i].iterator();
if (iterSam[i].hasNext())
treeMap.put(iterSam[i].next(), i);
reachFileEnd[i] = !iterSam[i].hasNext();
}
Entry<SAMRecord, Integer> firstEntry;
int bch, nReachFileEnd = 0;
for (boolean b : reachFileEnd) if (b)
nReachFileEnd++;
long record_outCount = 0;
while (!treeMap.isEmpty()) {
firstEntry = treeMap.pollFirstEntry();
outputSam.addAlignment(firstEntry.getKey());
record_outCount++;
bch = firstEntry.getValue();
if (!reachFileEnd[bch]) {
treeMap.put(iterSam[bch].next(), bch);
if (!iterSam[bch].hasNext()) {
reachFileEnd[bch] = true;
nReachFileEnd++;
}
}
if (treeMap.isEmpty() && nReachFileEnd != batch) {
for (int i = 0; i != batch; i++) {
if (!reachFileEnd[i]) {
treeMap.put(iterSam[i].next(), i);
if (!iterSam[i].hasNext()) {
reachFileEnd[i] = true;
nReachFileEnd++;
}
}
}
}
}
try {
outputSam.close();
for (int i = 0; i != batch; i++) {
iterSam[i].close();
batchSam[i].close();
new File(this.bam_out + String.format("%08d", i)).delete();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_outCount + " records written to " + this.bam_out);
}
use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.
the class IntermediateFilesTest method createBAM.
public void createBAM(File file, SAMFileHeader header, SAMRecord... data) {
SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, true, file);
if (header.getSortOrder() == SortOrder.coordinate) {
SortingCollection<SAMRecord> presort = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(), 100000000, testFolder.getRoot());
for (SAMRecord r : data) {
presort.add(r);
}
presort.doneAdding();
for (SAMRecord r : presort) {
writer.addAlignment(r);
}
} else {
for (SAMRecord r : data) {
writer.addAlignment(r);
}
}
writer.close();
}
use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.
the class VcfBreakendToReadPair method writeVisualisationBam.
public void writeVisualisationBam(GenomicProcessingContext pc, File vcf, File bam, File bamFiltered) throws IOException {
File working = FileSystemContext.getWorkingFileFor(bam);
File workingFiltered = FileSystemContext.getWorkingFileFor(bamFiltered);
VCFFileReader vcfReader = new VCFFileReader(vcf, false);
CloseableIterator<VariantContext> it = vcfReader.iterator();
SAMFileWriter writer = null;
SAMFileWriter writerFiltered = null;
try {
SAMFileWriterFactory factory = pc.getSamFileWriterFactory(true);
SAMFileHeader header = pc.getBasicSamHeader();
writer = factory.makeSAMOrBAMWriter(header, false, working);
writerFiltered = factory.makeSAMOrBAMWriter(header, false, workingFiltered);
while (it.hasNext()) {
IdsvVariantContext variant = IdsvVariantContext.create(pc, null, it.next());
if (variant instanceof VariantContextDirectedBreakpoint) {
VariantContextDirectedBreakpoint bp = (VariantContextDirectedBreakpoint) variant;
if (bp.isFiltered()) {
writerFiltered.addAlignment(bp.asSamRecord(header));
} else {
writer.addAlignment(bp.asSamRecord(header));
}
}
}
writer.close();
writerFiltered.close();
// Correct mate pairing since asSAMRecord() does not factor in mate anchor cigar
new FixMate().fix(working, bam);
new FixMate().fix(workingFiltered, bamFiltered);
} finally {
CloserUtil.close(writer);
CloserUtil.close(writerFiltered);
CloserUtil.close(it);
CloserUtil.close(vcfReader);
FileHelper.delete(working, true);
FileHelper.delete(workingFiltered, true);
}
}
use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.
the class MultipleSamFileCommandLineProgram method ensureSequenceDictionary.
public static void ensureSequenceDictionary(File fa) throws IOException {
File output = new File(fa.getAbsolutePath() + ".dict");
try (ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(fa)) {
SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
if (dictionary == null) {
log.info("Attempting to generate missing sequence dictionary for ", fa);
try {
final SAMSequenceDictionary sequences = new CreateSequenceDictionary().makeSequenceDictionary(fa);
final SAMFileHeader samHeader = new SAMFileHeader();
samHeader.setSequenceDictionary(sequences);
try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(samHeader, false, output)) {
}
} catch (Exception e) {
log.error("Missing sequence dictionary for ", fa, " and creation of sequencing dictionary failed.", "Please run the Picard tools CreateSequenceDictionary utility to create", output);
throw e;
}
log.info("Created sequence dictionary ", output);
} else {
log.debug("Found sequence dictionary for ", fa);
}
}
}
Aggregations