use of htsjdk.samtools.SAMFileWriter in project polyGembler by c-zhou.
the class SamFileSplit method run.
@Override
public void run() {
// TODO Auto-generated method stub
Utils.makeOutputDir(bam_out);
final File[] beds = new File(bed_in).listFiles();
final String[] out_prefix = new String[beds.length];
for (int i = 0; i < beds.length; i++) {
out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
Utils.makeOutputDir(out_prefix[i]);
}
final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bam");
}
});
this.initial_thread_pool();
for (File bam : bams) {
executor.submit(new Runnable() {
private File bam;
@Override
public void run() {
// TODO Auto-generated method stub
try {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(bam);
final SAMFileHeader header = inputSam.getFileHeader();
final SAMRecordIterator iter = inputSam.iterator();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
final Map<String, Integer> outMap = new HashMap<String, Integer>();
final String out = bam.getName();
for (int i = 0; i < beds.length; i++) {
Set<String> bed_seq = new HashSet<String>();
String tmp;
BufferedReader br = new BufferedReader(new FileReader(beds[i]));
String line;
while ((line = br.readLine()) != null) {
tmp = line.split("\\s+")[0];
bed_seq.add(tmp);
outMap.put(tmp, i);
}
br.close();
final SAMFileHeader header_i = new SAMFileHeader();
final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
header_i.setAttribute("VN", header.getAttribute("VN"));
header_i.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
seqdic_i.addSequence(seq);
header_i.setSequenceDictionary(seqdic_i);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
seqdics[i] = seqdic_i;
}
Set<String> refs = outMap.keySet();
String ref;
int f;
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (refs.contains(ref = rec.getReferenceName())) {
f = outMap.get(ref);
rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
outputSam[f].addAlignment(rec);
}
}
iter.close();
inputSam.close();
for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
myLogger.info(out + " return true");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(File bam) {
this.bam = bam;
return (this);
}
}.init(bam));
}
this.waitFor();
}
use of htsjdk.samtools.SAMFileWriter in project polyGembler by c-zhou.
the class TenXSamtools method runSort.
private void runSort() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(this.bam_in));
final SAMFileHeader sort_header = inputSam.getFileHeader();
switch(this.sort_order) {
case coordinate:
sort_header.setSortOrder(SortOrder.coordinate);
break;
case queryname:
sort_header.setSortOrder(SortOrder.queryname);
break;
case barcode:
sort_header.setSortOrder(SortOrder.unknown);
break;
}
SAMRecordIterator iter = inputSam.iterator();
long record_inCount = 0;
SAMRecord[] buff = new SAMRecord[this.batch_size];
int k = 0;
SAMRecord temp = iter.hasNext() ? iter.next() : null;
this.initial_thread_pool();
while (temp != null) {
buff[k++] = temp;
record_inCount++;
temp = iter.hasNext() ? iter.next() : null;
if (k == this.batch_size || temp == null) {
executor.submit(new Runnable() {
private SAMRecord[] records;
@Override
public void run() {
// TODO Auto-generated method stub
try {
Arrays.sort(records, comprator);
final SAMFileWriter outputSam;
synchronized (lock) {
outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(bam_out + String.format("%08d", batch++)));
}
int count = 0;
for (SAMRecord record : records) {
if (record != null) {
count++;
outputSam.addAlignment(record);
}
}
outputSam.close();
synchronized (lock) {
record_count += count;
}
myLogger.info("[" + Thread.currentThread().getName() + "] " + record_count + " records processed.");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(SAMRecord[] buff) {
// TODO Auto-generated method stub
this.records = buff;
return (this);
}
}.init(buff));
k = 0;
buff = new SAMRecord[this.batch_size];
}
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_inCount + " records read from " + this.bam_in);
this.waitFor();
// merge all batches
myLogger.info("Merge " + batch + " files.");
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(sort_header, true, new File(this.bam_out));
final SamReader[] batchSam = new SamReader[batch];
final SAMRecordIterator[] iterSam = new SAMRecordIterator[batch];
final boolean[] reachFileEnd = new boolean[batch];
final TreeMap<SAMRecord, Integer> treeMap = new TreeMap<SAMRecord, Integer>(this.comprator);
for (int i = 0; i != batch; i++) {
batchSam[i] = factory.open(new File(this.bam_out + String.format("%08d", i)));
iterSam[i] = batchSam[i].iterator();
if (iterSam[i].hasNext())
treeMap.put(iterSam[i].next(), i);
reachFileEnd[i] = !iterSam[i].hasNext();
}
Entry<SAMRecord, Integer> firstEntry;
int bch, nReachFileEnd = 0;
for (boolean b : reachFileEnd) if (b)
nReachFileEnd++;
long record_outCount = 0;
while (!treeMap.isEmpty()) {
firstEntry = treeMap.pollFirstEntry();
outputSam.addAlignment(firstEntry.getKey());
record_outCount++;
bch = firstEntry.getValue();
if (!reachFileEnd[bch]) {
treeMap.put(iterSam[bch].next(), bch);
if (!iterSam[bch].hasNext()) {
reachFileEnd[bch] = true;
nReachFileEnd++;
}
}
if (treeMap.isEmpty() && nReachFileEnd != batch) {
for (int i = 0; i != batch; i++) {
if (!reachFileEnd[i]) {
treeMap.put(iterSam[i].next(), i);
if (!iterSam[i].hasNext()) {
reachFileEnd[i] = true;
nReachFileEnd++;
}
}
}
}
}
try {
outputSam.close();
for (int i = 0; i != batch; i++) {
iterSam[i].close();
batchSam[i].close();
new File(this.bam_out + String.format("%08d", i)).delete();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
myLogger.info(record_outCount + " records written to " + this.bam_out);
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class IntermediateFilesTest method createBAM.
public void createBAM(File file, SAMFileHeader header, SAMRecord... data) {
SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeSAMOrBAMWriter(header, true, file);
if (header.getSortOrder() == SortOrder.coordinate) {
SortingCollection<SAMRecord> presort = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(), 100000000, testFolder.getRoot());
for (SAMRecord r : data) {
presort.add(r);
}
presort.doneAdding();
for (SAMRecord r : presort) {
writer.addAlignment(r);
}
} else {
for (SAMRecord r : data) {
writer.addAlignment(r);
}
}
writer.close();
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class SplitReadRealigner method mergeSupplementaryAlignment.
private void mergeSupplementaryAlignment(File input, List<File> aligned, File output) throws IOException {
log.info("Merging split read alignments for ", output);
File suppMerged = FileSystemContext.getWorkingFileFor(output, "gridss.tmp.SplitReadAligner.sa.");
File tmpoutput = FileSystemContext.getWorkingFileFor(output);
tmpFiles.add(suppMerged);
tmpFiles.add(tmpoutput);
List<SamReader> suppReaders = new ArrayList<>();
List<PeekingIterator<SAMRecord>> suppIt = new ArrayList<>();
SAMFileHeader header;
try (SamReader reader = readerFactory.open(input)) {
header = reader.getFileHeader();
for (File sf : aligned) {
SamReader suppReader = readerFactory.open(sf);
suppReaders.add(suppReader);
suppIt.add(new AsyncBufferedIterator<>(new NmTagIterator(suppReader.iterator(), pc.getReference()), sf.getName()));
}
try (SAMFileWriter inputWriter = writerFactory.makeSAMOrBAMWriter(header, true, tmpoutput)) {
try (SAMFileWriter suppWriter = writerFactory.makeSAMOrBAMWriter(header, false, suppMerged)) {
try (AsyncBufferedIterator<SAMRecord> bufferedIt = new AsyncBufferedIterator<>(new NmTagIterator(reader.iterator(), pc.getReference()), input.getName())) {
mergeSupplementaryAlignment(bufferedIt, suppIt, inputWriter, suppWriter);
}
}
}
} finally {
for (Iterator<SAMRecord> it : suppIt) {
CloserUtil.close(it);
}
for (SamReader sr : suppReaders) {
sr.close();
}
}
if (header.getSortOrder() != null && header.getSortOrder() != SortOrder.unsorted) {
File suppMergedsorted = FileSystemContext.getWorkingFileFor(output, "gridss.tmp.SplitReadAligner.sorted.sa.");
tmpFiles.add(suppMergedsorted);
SAMFileUtil.sort(pc.getFileSystemContext(), suppMerged, suppMergedsorted, header.getSortOrder());
FileHelper.move(suppMergedsorted, suppMerged, true);
}
SAMFileUtil.merge(ImmutableList.of(tmpoutput, suppMerged), output);
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class VcfBreakendToReadPair method writeVisualisationBam.
public void writeVisualisationBam(GenomicProcessingContext pc, File vcf, File bam, File bamFiltered) throws IOException {
File working = FileSystemContext.getWorkingFileFor(bam);
File workingFiltered = FileSystemContext.getWorkingFileFor(bamFiltered);
VCFFileReader vcfReader = new VCFFileReader(vcf, false);
CloseableIterator<VariantContext> it = vcfReader.iterator();
SAMFileWriter writer = null;
SAMFileWriter writerFiltered = null;
try {
SAMFileWriterFactory factory = pc.getSamFileWriterFactory(true);
SAMFileHeader header = pc.getBasicSamHeader();
writer = factory.makeSAMOrBAMWriter(header, false, working);
writerFiltered = factory.makeSAMOrBAMWriter(header, false, workingFiltered);
while (it.hasNext()) {
IdsvVariantContext variant = IdsvVariantContext.create(pc, null, it.next());
if (variant instanceof VariantContextDirectedBreakpoint) {
VariantContextDirectedBreakpoint bp = (VariantContextDirectedBreakpoint) variant;
if (bp.isFiltered()) {
writerFiltered.addAlignment(bp.asSamRecord(header));
} else {
writer.addAlignment(bp.asSamRecord(header));
}
}
}
writer.close();
writerFiltered.close();
// Correct mate pairing since asSAMRecord() does not factor in mate anchor cigar
new FixMate().fix(working, bam);
new FixMate().fix(workingFiltered, bamFiltered);
} finally {
CloserUtil.close(writer);
CloserUtil.close(writerFiltered);
CloserUtil.close(it);
CloserUtil.close(vcfReader);
FileHelper.delete(working, true);
FileHelper.delete(workingFiltered, true);
}
}
Aggregations