use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.
the class SoftClipsToSplitReads method doWork.
@Override
protected int doWork() {
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
validateParameters();
GenomicProcessingContext pc = new GenomicProcessingContext(getFileSystemContext(), REFERENCE_SEQUENCE, getReference());
pc.setCommandLineProgram(this);
pc.setFilterDuplicates(IGNORE_DUPLICATES);
SplitReadRealigner realigner = new SplitReadRealigner(pc);
realigner.setMinSoftClipLength(MIN_CLIP_LENGTH);
realigner.setMinSoftClipQuality(MIN_CLIP_QUAL);
realigner.setProcessSecondaryAlignments(PROCESS_SECONDARY_ALIGNMENTS);
realigner.setWorkerThreads(WORKER_THREADS);
try {
SamReaderFactory readerFactory = SamReaderFactory.make();
SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
if (ALIGNER_STREAMING) {
ExternalProcessStreamingAligner aligner = new ExternalProcessStreamingAligner(readerFactory, ALIGNER_COMMAND_LINE, REFERENCE_SEQUENCE, WORKER_THREADS);
realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
} else {
ExternalProcessFastqAligner aligner = new ExternalProcessFastqAligner(readerFactory, writerFactory, ALIGNER_COMMAND_LINE);
realigner.createSupplementaryAlignments(aligner, INPUT, OUTPUT);
}
} catch (IOException e) {
log.error(e);
return -1;
}
return 0;
}
use of htsjdk.samtools.SAMFileWriterFactory in project gridss by PapenfussLab.
the class SubsetToMissing method doWork.
@Override
protected int doWork() {
long stop = Long.MAX_VALUE;
if (STOP_AFTER != null && (long) STOP_AFTER > 0) {
stop = STOP_AFTER;
}
log.debug("Setting language-neutral locale");
java.util.Locale.setDefault(Locale.ROOT);
if (TMP_DIR == null || TMP_DIR.size() == 0) {
TMP_DIR = Lists.newArrayList(new File("."));
}
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
SamReader input = factory.open(INPUT);
Iterator<SAMRecord> intputit = new AsyncBufferedIterator<SAMRecord>(input.iterator(), 2, 16384);
SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(input.getFileHeader(), true, OUTPUT);
LongSet hashtable;
if (PREALLOCATE != null) {
log.info("Preallocating hash table");
hashtable = new LongOpenHashBigSet(PREALLOCATE);
} else {
hashtable = new LongOpenHashBigSet();
}
for (File file : LOOKUP) {
log.info("Loading lookup hashes for " + file.getAbsolutePath());
SamReader lookup = factory.open(file);
AsyncBufferedIterator<SAMRecord> it = new AsyncBufferedIterator<SAMRecord>(lookup.iterator(), 2, 16384);
File cache = new File(file.getAbsolutePath() + ".SubsetToMissing.cache");
if (cache.exists()) {
log.info("Loading lookup hashes from cache");
long n = stop;
DataInputStream dis = null;
try {
long loadCount = 0;
dis = new DataInputStream(new BufferedInputStream(new FileInputStream(cache)));
while (n-- > 0) {
hashtable.add(dis.readLong());
if (loadCount % 10000000 == 0) {
log.info(String.format("Loaded %d from cache", loadCount));
}
}
} catch (EOFException e) {
try {
if (dis != null)
dis.close();
} catch (IOException e1) {
log.error(e1);
}
} catch (IOException e) {
log.error(e);
}
} else {
long n = stop;
ProgressLoggingSAMRecordIterator loggedit = new ProgressLoggingSAMRecordIterator(it, new ProgressLogger(log));
try {
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(cache)));
while (loggedit.hasNext() && n-- > 0) {
long recordhash = hash(loggedit.next());
hashtable.add(recordhash);
dos.writeLong(recordhash);
}
dos.close();
} catch (Exception e) {
log.error(e, "Failed to load lookup. Running with partial results");
}
loggedit.close();
}
it.close();
}
long filtered = 0;
log.info("Processing input");
intputit = new ProgressLoggingSAMRecordIterator(intputit, new ProgressLogger(log));
long n = stop;
while (intputit.hasNext() && n-- > 0) {
SAMRecord r = intputit.next();
if (!hashtable.contains(hash(r))) {
out.addAlignment(r);
} else {
filtered++;
if (filtered % 1000000 == 0) {
log.info(String.format("Filtered %d reads", filtered));
}
}
}
log.info("Closing output");
out.close();
return 0;
}
use of htsjdk.samtools.SAMFileWriterFactory in project jvarkit by lindenb.
the class Biostar78400Test method test01.
@Test
public void test01() throws IOException {
final String flowcell = "HS20001259127";
final String lane = "1";
final File in = createTmpFile(".bam");
SAMFileHeader header = new SAMFileHeader();
header.setSortOrder(SortOrder.unsorted);
SAMFileWriter sfw = new SAMFileWriterFactory().makeBAMWriter(header, true, in);
DefaultSAMRecordFactory recfactory = new DefaultSAMRecordFactory();
SAMRecord rec = recfactory.createSAMRecord(header);
rec.setReadName(flowcell + ":" + lane + ":1210:15640:52255");
rec.setReadString("GAATTC");
rec.setBaseQualityString("222222");
SAMUtils.makeReadUnmapped(rec);
sfw.addAlignment(rec);
sfw.close();
assertIsValidBam(in);
final File xml = createTmpFile(".xml");
PrintWriter pw = new PrintWriter(xml);
pw.println("<?xml version=\"1.0\"?><read-groups>" + "<flowcell name=\"" + flowcell + "\"><lane index=\"" + lane + "\">" + "<group ID=\"X1\"><library>L1</library><platform>P1</platform>" + "<sample>S1</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group>" + "</lane></flowcell><flowcell name=\"HS20001259128\">" + "<lane index=\"2\"><group ID=\"x2\"><library>L2</library>" + "<platform>P2</platform><sample>S2</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group></lane>" + "</flowcell></read-groups>");
pw.flush();
pw.close();
assertIsXml(xml);
final File out = createTmpFile(".bam");
Assert.assertEquals(new Biostar78400().instanceMain(newCmd().add("-o").add(out).add("-x").add(xml).add(in).make()), 0);
SamReader r = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(out);
Assert.assertTrue(r.getFileHeader() != null);
Assert.assertTrue(r.getFileHeader().getReadGroups() != null);
Assert.assertFalse(r.getFileHeader().getReadGroups().isEmpty());
SAMRecordIterator iter = r.iterator();
Assert.assertTrue(iter.hasNext());
rec = iter.next();
SAMReadGroupRecord rg = rec.getReadGroup();
Assert.assertNotNull(rg);
Assert.assertEquals(rg.getId(), "X1");
Assert.assertEquals(rg.getSample(), "S1");
Assert.assertFalse(iter.hasNext());
iter.close();
r.close();
}
Aggregations