use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class ExternalProcessFastqAligner method align.
@Override
public void align(final File fastq, final File output, final File reference, final int threads) throws IOException {
List<String> commandline = template.stream().map(s -> String.format(s, fastq.getAbsolutePath(), reference.getAbsolutePath(), threads)).collect(Collectors.toList());
String commandlinestr = commandline.stream().collect(Collectors.joining(" "));
log.info("Invoking external aligner");
log.info(commandlinestr);
Process aligner = new ProcessBuilder(commandline).redirectError(Redirect.INHERIT).directory(output.getParentFile()).start();
final SamReader reader = readerFactory.open(SamInputResource.of(aligner.getInputStream()));
final SAMFileHeader header = reader.getFileHeader();
try (final SAMFileWriter writer = writerFactory.makeWriter(header, false, output, reference)) {
final SAMRecordIterator it = reader.iterator();
while (it.hasNext()) {
writer.addAlignment(it.next());
}
}
ExternalProcessHelper.shutdownAligner(aligner, commandlinestr, reference);
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class MultipleSamFileCommandLineProgram method ensureSequenceDictionary.
public static void ensureSequenceDictionary(File fa) throws IOException {
File output = new File(fa.getAbsolutePath() + ".dict");
try (ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(fa)) {
SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
if (dictionary == null) {
log.info("Attempting to generate missing sequence dictionary for ", fa);
try {
final SAMSequenceDictionary sequences = new CreateSequenceDictionary().makeSequenceDictionary(fa);
final SAMFileHeader samHeader = new SAMFileHeader();
samHeader.setSequenceDictionary(sequences);
try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(samHeader, false, output)) {
}
} catch (Exception e) {
log.error("Missing sequence dictionary for ", fa, " and creation of sequencing dictionary failed.", "Please run the Picard tools CreateSequenceDictionary utility to create", output);
throw e;
}
log.info("Created sequence dictionary ", output);
} else {
log.debug("Found sequence dictionary for ", fa);
}
}
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class SubsetToMissing method doWork.
@Override
protected int doWork() {
long stop = Long.MAX_VALUE;
if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
stop = STOP_AFTER;
}
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
if (TMP_DIR == null || TMP_DIR.size() == 0) {
TMP_DIR = Lists.newArrayList(new File("."));
}
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
SamReader input = factory.open(INPUT);
Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
LongSet hashtable;
if (PREALLOCATE != null) {
log.info("Preallocating hash table");
hashtable = new LongOpenHashBigSet(PREALLOCATE);
} else {
hashtable = new LongOpenHashBigSet();
}
for (File file : LOOKUP) {
log.info("Loading lookup hashes for " + file.getAbsolutePath());
SamReader lookup = factory.open(file);
AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
if (cache.exists()) {
log.info("Loading lookup hashes from cache");
long n = stop;
DataInputStream dis = null;
try {
long loadCount = 0;
dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
while (n-- > 0) {
hashtable.add(dis.readLong());
if (loadCount % 10000000 == 0) {
log.info(String.format("Loaded %d from cache", loadCount));
}
}
} catch (EOFException e) {
try {
if (dis != null)
dis.close();
} catch (IOException e1) {
log.error(e1);
}
} catch (IOException e) {
log.error(e);
}
} else {
long n = stop;
ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
try {
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
while (loggedit.hasNext() && n-- > 0) {
long recordhash = hash(loggedit.next());
hashtable.add(recordhash);
dos.writeLong(recordhash);
}
dos.close();
} catch (Exception e) {
log.error(e, "Failed to load lookup. Running with partial results");
}
loggedit.close();
}
it.close();
}
long filtered = 0;
log.info("Processing input");
intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
long n = stop;
while (intputit.hasNext() && n-- > 0) {
SAMRecord r = intputit.next();
if (!hashtable.contains(hash(r))) {
out.addAlignment(r);
} else {
filtered++;
if (filtered % 1000000 == 0) {
log.info(String.format("Filtered %d reads", filtered));
}
}
}
log.info("Closing output");
out.close();
return 0;
}
use of htsjdk.samtools.SAMFileWriter in project gridss by PapenfussLab.
the class SAMFileUtil method merge.
/**
* Merges a set of SAM files into a single file.
* The SAM header is taken from the first input file.
* @param input input files. All input files must have the same sort order.
* @param output output file.
* @param readerFactory
* @param writerFactory
* @throws IOException
*/
public static void merge(Collection<File> input, File output, SamReaderFactory readerFactory, SAMFileWriterFactory writerFactory) throws IOException {
if (input == null)
throw new IllegalArgumentException("input is null");
File tmpFile = gridss.Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(output, "gridss.tmp.merging.SAMFileUtil.") : output;
Map<SamReader, AsyncBufferedIterator<SAMRecord>> map = new HashMap<>(input.size());
SAMFileHeader header = null;
try {
for (File in : input) {
SamReader r = readerFactory.open(in);
SAMFileHeader currentHeader = r.getFileHeader();
if (header == null) {
header = currentHeader;
}
if (header.getSortOrder() != null && currentHeader.getSortOrder() != null && header.getSortOrder() != currentHeader.getSortOrder()) {
throw new IllegalArgumentException(String.format("Sort order %s of %s does not match %s of %s", currentHeader.getSortOrder(), input, header.getSortOrder(), input.iterator().next()));
}
map.put(r, new AsyncBufferedIterator<>(r.iterator(), in.getName()));
}
try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, tmpFile)) {
Queue<PeekingIterator<SAMRecord>> queue = createMergeQueue(header.getSortOrder());
for (PeekingIterator<SAMRecord> it : map.values()) {
if (it.hasNext()) {
queue.add(it);
}
}
while (!queue.isEmpty()) {
PeekingIterator<SAMRecord> it = queue.poll();
SAMRecord r = it.next();
writer.addAlignment(r);
if (it.hasNext()) {
queue.add(it);
}
}
}
for (Entry<SamReader, AsyncBufferedIterator<SAMRecord>> entry : map.entrySet()) {
CloserUtil.close(entry.getValue());
CloserUtil.close(entry.getKey());
}
if (tmpFile != output) {
FileHelper.move(tmpFile, output, true);
}
} finally {
for (Entry<SamReader, AsyncBufferedIterator<SAMRecord>> entry : map.entrySet()) {
CloserUtil.close(entry.getValue());
CloserUtil.close(entry.getKey());
}
if (tmpFile != output & tmpFile.exists()) {
FileHelper.delete(tmpFile, true);
}
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class PcrClipReads method run.
private int run(final SamReader reader) {
final SAMFileHeader header1 = reader.getFileHeader();
final SAMFileHeader header2 = header1.clone();
Optional<SAMProgramRecord> samProgramRecord = Optional.empty();
if (this.programId) {
final SAMProgramRecord spr = header2.createProgramRecord();
samProgramRecord = Optional.of(spr);
spr.setProgramName(PcrClipReads.class.getSimpleName());
spr.setProgramVersion(this.getGitHash());
spr.setCommandLine(getProgramCommandLine().replace('\t', ' '));
}
header2.addComment(getProgramName() + " " + getVersion() + ": Processed with " + getProgramCommandLine());
header2.setSortOrder(SortOrder.unsorted);
SAMFileWriter sw = null;
SAMRecordIterator iter = null;
try {
sw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, false);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1).logger(LOG);
iter = reader.iterator();
while (iter.hasNext()) {
SAMRecord rec = progress.watch(iter.next());
if (this.onlyFlag != -1 && (rec.getFlags() & this.onlyFlag) != 0) {
sw.addAlignment(rec);
continue;
}
if (rec.getReadUnmappedFlag()) {
sw.addAlignment(rec);
continue;
}
final Interval fragment = findInterval(rec);
if (fragment == null) {
rec.setMappingQuality(0);
sw.addAlignment(rec);
continue;
}
// strand is '-' and overap in 5' of PCR fragment
if (rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentStart() && rec.getAlignmentStart() < fragment.getEnd()) {
rec.setMappingQuality(0);
sw.addAlignment(rec);
continue;
}
// strand is '+' and overap in 3' of PCR fragment
if (!rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentEnd() && rec.getAlignmentEnd() < fragment.getEnd()) {
rec.setMappingQuality(0);
sw.addAlignment(rec);
continue;
}
// contained int PCR fragment
if (rec.getAlignmentStart() >= fragment.getStart() && rec.getAlignmentEnd() <= fragment.getEnd()) {
sw.addAlignment(rec);
continue;
}
final ReadClipper readClipper = new ReadClipper();
if (samProgramRecord.isPresent()) {
readClipper.setProgramGroup(samProgramRecord.get().getId());
}
rec = readClipper.clip(rec, fragment);
sw.addAlignment(rec);
}
progress.finish();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sw);
}
}
Aggregations