use of htsjdk.samtools.SamReader in project hmftools by hartwigmedical.
the class BamSlicerApplication method sliceFromURLs.
private static void sliceFromURLs(@NotNull final URL indexUrl, @NotNull final URL bamUrl, @NotNull final CommandLine cmd) throws IOException {
final File indexFile = downloadIndex(indexUrl);
indexFile.deleteOnExit();
final SamReader reader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamUrl).index(indexFile));
final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
final BAMIndex bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
final Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
final Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
final List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
final SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);
queryIntervalsAndSpan.ifPresent(pair -> {
LOGGER.info("Slicing bam on bed regions...");
final CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
writeToSlice(writer, bedIterator);
LOGGER.info("Done writing bed slices.");
});
unmappedChunk.ifPresent(chunk -> {
LOGGER.info("Slicing unmapped reads...");
final CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
writeToSlice(writer, unmappedIterator);
LOGGER.info("Done writing unmapped reads.");
});
reader.close();
writer.close();
cachingReader.close();
}
use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.
the class SamFileExtract method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(bam_in));
final SAMFileHeader header = inputSam.getFileHeader();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileHeader header_out = new SAMFileHeader();
final SAMSequenceDictionary seqdic_out = new SAMSequenceDictionary();
SAMRecordIterator iter = inputSam.iterator();
File bed_file = new File(bed_in);
final Set<String> extract = new HashSet<String>();
try (BufferedReader br = new BufferedReader(new FileReader(bed_file))) {
String line;
while ((line = br.readLine()) != null) extract.add(line.split("\\s+")[0]);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
header_out.setAttribute("VN", header.getAttribute("VN"));
header_out.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (extract.contains(seq.getSequenceName()))
seqdic_out.addSequence(seq);
header_out.setSequenceDictionary(seqdic_out);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_out.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_out.addProgramRecord(pg);
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_out, true, new File(bam_out));
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (extract.contains(rec.getReferenceName()))
outputSam.addAlignment(rec);
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outputSam.close();
System.err.println(bam_in + " return true");
}
use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.
the class SamFileFilter method run.
@Override
public void run() {
// TODO Auto-generated method stub
File folder = new File(input_dir);
File[] listOfFiles = folder.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bam");
}
});
int bam_n = listOfFiles.length;
this.initial_thread_pool();
for (int i = 0; i < bam_n; i++) {
executor.submit(new Runnable() {
private String bam_file;
@Override
public void run() {
// TODO Auto-generated method stub
try {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(input_dir + "/" + bam_file));
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(inputSam.getFileHeader(), true, new File(output_dir + "/" + bam_file));
final BufferedReader filter_br = Utils.getBufferedReader(filter_file);
SAMRecordIterator iter = inputSam.iterator();
String line = filter_br.readLine();
if (line == null) {
filter_br.close();
iter.close();
inputSam.close();
outputSam.close();
return;
}
String[] s = line.split("\\s+");
String chr = s[0];
long end_pos = Long.parseLong(s[1]);
SAMRecord samr;
while (iter.hasNext()) {
samr = iter.next();
if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos) {
outputSam.addAlignment(samr);
continue;
}
if (!samr.getReferenceName().equals(chr) || samr.getAlignmentStart() > end_pos) {
line = null;
while ((line = filter_br.readLine()) != null && end_pos < samr.getAlignmentStart()) {
s = line.split("\\s");
chr = s[0];
end_pos = Long.parseLong(s[1]);
}
if (line != null) {
if (samr.getReferenceName().equals(chr) && samr.getAlignmentEnd() < end_pos)
outputSam.addAlignment(samr);
} else {
outputSam.addAlignment(samr);
while (iter.hasNext()) outputSam.addAlignment(iter.next());
break;
}
}
}
filter_br.close();
iter.close();
inputSam.close();
outputSam.close();
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(String bam_file) {
// TODO Auto-generated method stub
this.bam_file = bam_file;
return this;
}
}.init(listOfFiles[i].getName()));
throw new RuntimeException("incorrect tools!!!!");
}
this.waitFor();
}
use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.
the class Consensus method parseDataLibrary.
private void parseDataLibrary(String[] args) {
// TODO Auto-generated method stub
final Map<Integer, String> bamLib = new HashMap<Integer, String>();
final Map<Integer, Integer> insLib = new HashMap<Integer, Integer>();
final Map<Integer, Integer> wLib = new HashMap<Integer, Integer>();
for (int i = 0; i < args.length; i++) {
String arg = args[i];
if (arg.matches(bam_lib)) {
Matcher m = bam_pat.matcher(arg);
m.find();
int lib = Integer.parseInt(m.group(1));
bamLib.put(lib, args[++i]);
}
if (arg.matches(ins_lib)) {
Matcher m = ins_pat.matcher(arg);
m.find();
int lib = Integer.parseInt(m.group(1));
insLib.put(lib, Integer.parseInt(args[++i]));
}
if (arg.matches(w_lib)) {
Matcher m = w_pat.matcher(arg);
m.find();
int lib = Integer.parseInt(m.group(1));
wLib.put(lib, Integer.parseInt(args[++i]));
}
}
int n = bamLib.size();
if (wLib.size() != n) {
printUsage();
throw new IllegalArgumentException("Library parameters do not match!!!");
}
this.bam_list = new String[n];
this.ins_thres = new int[n];
this.link_w = new double[n];
this.read_paired = new boolean[n];
int i = 0;
for (Integer key : bamLib.keySet()) {
bam_list[i] = bamLib.get(key);
final SamReader in1 = factory.open(new File(bam_list[i]));
SAMRecordIterator iter1 = in1.iterator();
SAMRecord record;
while (iter1.hasNext()) {
record = iter1.next();
if (!record.getReadUnmappedFlag()) {
if (record.getReadPairedFlag())
read_paired[i] = true;
break;
}
}
try {
iter1.close();
in1.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (!insLib.containsKey(key) && read_paired[i] || !wLib.containsKey(key)) {
printUsage();
throw new IllegalArgumentException("Library parameters do not match!!!");
}
ins_thres[i] = read_paired[i] ? insLib.get(key) : Integer.MAX_VALUE;
link_w[i] = 1.0 / wLib.get(key);
i++;
}
}
use of htsjdk.samtools.SamReader in project polyGembler by c-zhou.
the class SamToTaxa method distributeSortedBam.
private void distributeSortedBam() {
// TODO Auto-generated method stub
long start = System.currentTimeMillis();
try {
File indexFile = new File(myIndexFileName);
myLogger.info("Using the following index file:");
myLogger.info(indexFile.getAbsolutePath());
File samFile = new File(mySamFileName);
myLogger.info("Using the following BAM file:");
myLogger.info(samFile.getAbsolutePath());
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(samFile);
header_sam = inputSam.getFileHeader();
header_sam.setSortOrder(SortOrder.unsorted);
indexReader = Utils.getBufferedReader(indexFile, 65536);
String line = indexReader.readLine();
String[] samples = line.split("\\s+");
for (int i = 1; i < samples.length; i++) {
String taxa = samples[i];
SAMFileHeader header = header_sam.clone();
SAMReadGroupRecord rg = new SAMReadGroupRecord(taxa);
rg.setSample(taxa);
header.addReadGroup(rg);
bam_writers.put(taxa, new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(myOutputDir + System.getProperty("file.separator") + taxa + ".bam")));
locks.put(taxa, new Object());
}
SAMRecordIterator iter = inputSam.iterator();
cache();
this.initial_thread_pool();
final int block = 10000;
int bS = 0;
SAMRecord[] Qs = new SAMRecord[block];
SAMRecord temp = iter.next();
int k = 0;
allReads = 0;
long tag;
while (temp != null) {
allReads++;
Qs[k] = temp;
temp = iter.hasNext() ? iter.next() : null;
k++;
tag = temp == null ? 0 : Long.parseLong(temp.getReadName());
if (k == block || temp == null || tag >= cursor) {
executor.submit(new Runnable() {
private SAMRecord[] sam;
private int block_i;
@Override
public void run() {
// TODO Auto-generated method stub
long tag;
Tag tagObj;
String taxa;
for (int i = 0; i < sam.length; i++) {
try {
if (sam[i] == null)
break;
tag = Long.parseLong(sam[i].getReadName());
tagObj = indexMap.get(tag);
int l = tagObj.taxa.length;
long cov = 0;
for (int t = 0; t < l; t++) cov += tagObj.count[t];
if (cov > maxCov) {
myLogger.info("?tag id, " + tag + "::tag sequence, " + sam[i].getReadString() + "::aligned to " + sam[i].getReferenceName() + ":" + sam[i].getAlignmentStart() + "-" + sam[i].getAlignmentEnd() + "::omitted due to abnormal high coverage, " + cov);
continue;
}
for (int t = 0; t < l; t++) {
taxa = tagObj.taxa[t];
sam[i].setAttribute("RG", taxa);
int p = tagObj.count[t];
synchronized (locks.get(taxa)) {
for (int r = 0; r < p; r++) bam_writers.get(taxa).addAlignment(sam[i]);
}
}
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
if (block * (block_i + 1) % 1000000 == 0)
myLogger.info("Tag processed: " + block * (block_i + 1));
}
public Runnable init(SAMRecord[] sam, int bS) {
this.sam = sam;
this.block_i = bS;
return (this);
}
}.init(Qs, bS));
k = 0;
Qs = new SAMRecord[block];
bS++;
if (temp == null || tag >= cursor) {
this.waitFor();
if (temp != null) {
cache();
this.initial_thread_pool();
}
}
}
}
iter.close();
inputSam.close();
indexReader.close();
// executor.awaitTermination(365, TimeUnit.DAYS);
for (String key : bam_writers.keySet()) bam_writers.get(key).close();
myLogger.info("Total number of reads in lane=" + allReads);
myLogger.info("Process took " + (System.currentTimeMillis() - start) / 1000 + " seconds.");
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations