use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class BlastToSam method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null || !this.faidx.exists() || !this.faidx.isFile()) {
LOG.error("Option -R was not defined or dictionary missing");
return -1;
}
final boolean interleaved_input = this.EXPECTED_SIZE > 0;
final int maxRecordsInRam = 5000;
SAMFileWriter sfw = null;
XMLEventReader rx = null;
final SAMFileWriterFactory sfwf = new SAMFileWriterFactory();
sfwf.setCreateIndex(false);
sfwf.setMaxRecordsInRam(maxRecordsInRam);
sfwf.setCreateMd5File(false);
sfwf.setUseAsyncIo(false);
final SAMFileHeader header = new SAMFileHeader();
try {
LOG.info("opening " + faidx);
this.dictionary = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
header.setSortOrder(SortOrder.unsorted);
header.setSequenceDictionary(this.dictionary);
final JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
this.unmarshaller = jc.createUnmarshaller();
final XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlInputFactory.setXMLResolver(new XMLResolver() {
@Override
public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
return null;
}
});
final String inputName = oneFileOrNull(args);
if (inputName == null) {
LOG.info("Reading from stdin");
rx = xmlInputFactory.createXMLEventReader(stdin());
} else if (args.size() == 1) {
LOG.info("Reading from " + inputName);
rx = xmlInputFactory.createXMLEventReader(IOUtils.openURIForBufferedReading(inputName));
} else {
LOG.error("Illegal number of args");
return -1;
}
final SAMProgramRecord prg2 = header.createProgramRecord();
fillHeader(rx, prg2);
final SAMProgramRecord prg1 = header.createProgramRecord();
prg1.setCommandLine(getProgramCommandLine());
prg1.setProgramVersion(getVersion());
prg1.setProgramName(getProgramName());
prg1.setPreviousProgramGroupId(prg2.getId());
final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("g1");
rg1.setLibrary("blast");
rg1.setSample("blast");
rg1.setDescription("blast");
header.addReadGroup(rg1);
sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
if (interleaved_input) {
run_paired(sfw, rx, header);
} else {
run_single(sfw, rx, header);
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(rx);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class BamStage method doMenuSaveAs.
@Override
protected void doMenuSaveAs() {
final FileChooser fc = owner.newFileChooser();
fc.getExtensionFilters().addAll(EXTENSION_FILTERS);
final File saveAs = owner.updateLastDir(fc.showSaveDialog(this));
if (saveAs == null)
return;
if (!saveAs.getName().endsWith(".bam")) {
final Alert alert = new Alert(AlertType.ERROR, "Output should end with .bam", ButtonType.OK);
alert.showAndWait();
return;
}
Optional<BamJavascripFilter> bamjsfilter = Optional.empty();
if (this.owner.javascriptCompiler.isPresent() && !this.javascriptArea.getText().trim().isEmpty()) {
try {
bamjsfilter = Optional.of(new BamJavascripFilter(this.getBamFile().getHeader(), Optional.of(this.owner.javascriptCompiler.get().compile(this.javascriptArea.getText()))));
} catch (final Exception err) {
JfxNgs.showExceptionDialog(this, err);
bamjsfilter = Optional.empty();
return;
}
}
final Predicate<SAMRecord> flagfilter = makeFlagPredicate();
final SAMFileWriterFactory swf = new SAMFileWriterFactory();
swf.setCreateIndex(true);
CloseableIterator<SAMRecord> iter = null;
SAMFileWriter w = null;
try {
final SAMFileHeader h2 = this.getBamFile().getHeader().clone();
h2.addComment("Generated with JfxNgs. javascript was: " + this.javascriptArea.getText().trim().replaceAll("[\n\t\r ]+", " "));
w = swf.makeBAMWriter(h2, true, saveAs);
iter = new LogCloseableIterator(this.getBamFile().iterator(), null);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (!flagfilter.test(rec))
continue;
if (bamjsfilter.isPresent()) {
if (bamjsfilter.get().eval(rec) == null)
continue;
}
w.addAlignment(rec);
}
w.close();
w = null;
iter.close();
iter = null;
final Alert alert = new Alert(AlertType.CONFIRMATION, "Done", ButtonType.OK);
alert.showAndWait();
} catch (Exception err) {
JfxNgs.showExceptionDialog(this, err);
return;
} finally {
CloserUtil.close(iter);
CloserUtil.close(w);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class GBrowserHtml method doWork.
@Override
public int doWork(List<String> args) {
final int DEFAULT_EXTEND_INTERVAL = 0;
SamReader samReader = null;
ZipOutputStream zout = null;
BufferedReader bufReader = null;
PrintWriter paramsWriter = null;
JsonWriter paramsJsonWriter = null;
String line;
IndexedFastaSequenceFile faidx = null;
long snapshot_id = 0L;
final String inputName = oneFileOrNull(args);
try {
final SamJsonWriterFactory samJsonWriterFactory = SamJsonWriterFactory.newInstance().printHeader(true).printAttributes(false).printMate(true).printReadQualities(false).closeStreamAtEnd(false);
if (this.outputFile != null) {
if (!this.outputFile.getName().endsWith(".zip")) {
LOG.error("Output file should end with *.zip");
return -1;
}
}
bufReader = (inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName));
final File tmpFile1 = File.createTempFile("gbrowse.", ".tmp");
tmpFile1.deleteOnExit();
final File tmpFile2 = File.createTempFile("gbrowse.", ".tmp");
tmpFile2.deleteOnExit();
paramsWriter = new PrintWriter(tmpFile2);
paramsWriter.print("var config=");
paramsWriter.flush();
paramsJsonWriter = new JsonWriter(paramsWriter);
paramsJsonWriter.beginArray();
zout = new ZipOutputStream(super.openFileOrStdoutAsStream(this.outputFile));
File bamFile = null;
File faidxFile = null;
String sampleName = null;
int extend_interval = DEFAULT_EXTEND_INTERVAL;
String title = null;
Interval interval = null;
while ((line = bufReader.readLine()) != null) {
LOG.info(line);
if (line.isEmpty() || line.startsWith("#"))
continue;
final int eq = line.indexOf("=");
final String key = (eq == -1 ? "" : line.substring(0, eq).toLowerCase().trim());
final String value = (eq == -1 ? "" : line.substring(eq + 1).trim());
if (key.equals("bam")) {
if (samReader != null)
samReader.close();
samReader = null;
bamFile = (value.isEmpty() ? null : new File(value));
} else if (key.equals("sample")) {
sampleName = (value.isEmpty() ? null : value);
} else if (key.equals("title")) {
title = (value.isEmpty() ? null : value);
} else if (key.equals("ref") || key.equals("fasta")) {
if (faidx != null)
faidx.close();
faidx = null;
faidxFile = (value.isEmpty() ? null : new File(value));
} else if (key.equals("extend")) {
extend_interval = (value.isEmpty() ? DEFAULT_EXTEND_INTERVAL : Integer.parseInt(value));
} else if (key.equals("position") || key.equals("location") || key.equals("interval") || key.equals("goto")) {
Pattern pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)");
Matcher matcher = pat1.matcher(value);
if (matcher.matches()) {
String c = matcher.group(1);
int pos = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
interval = new Interval(c, Math.max(1, pos - extend_interval), pos + extend_interval);
continue;
}
pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\-([\\d,]+)");
matcher = pat1.matcher(value);
if (matcher.matches()) {
String c = matcher.group(1);
int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
int E = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
if (B > E) {
LOG.error("bad interval :" + line);
return -1;
}
interval = new Interval(c, Math.max(1, B - extend_interval), E + extend_interval);
continue;
}
pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\+([\\d,]+)");
matcher = pat1.matcher(value);
if (matcher.matches()) {
String c = matcher.group(1);
int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
int x = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
interval = new Interval(c, Math.max(1, B - (x + extend_interval)), B + (x + extend_interval));
continue;
}
LOG.error("bad interval :" + line);
return -1;
} else if (line.toLowerCase().equals("snapshot")) {
if (interval == null) {
LOG.error("No interval defined!");
continue;
}
if (bamFile == null) {
LOG.error("No BAM file defined!");
continue;
}
++snapshot_id;
LOG.info("open samFile " + bamFile);
if (samReader == null) {
samReader = super.createSamReaderFactory().open(bamFile);
}
FileWriter jsonFileWriter = new FileWriter(tmpFile1);
JsonWriter jsw = new JsonWriter(jsonFileWriter);
jsw.beginObject();
jsw.name("interval");
jsw.beginObject();
jsw.name("contig");
jsw.value(interval.getContig());
jsw.name("start");
jsw.value(interval.getStart());
jsw.name("end");
jsw.value(interval.getEnd());
jsw.endObject();
if (faidxFile != null) {
if (faidx == null) {
faidx = new IndexedFastaSequenceFile(faidxFile);
}
ReferenceSequence dna = faidx.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
jsw.name("reference");
jsw.value(dna.getBaseString());
}
jsw.name("sam");
SAMFileWriter samFileWriter = samJsonWriterFactory.open(samReader.getFileHeader(), jsw);
SAMRecordIterator samRecIter = samReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
while (samRecIter.hasNext()) {
final SAMRecord rec = samRecIter.next();
if (sampleName != null && !sampleName.isEmpty()) {
SAMReadGroupRecord srg = rec.getReadGroup();
if (srg == null)
continue;
if (!sampleName.equals(srg.getSample()))
continue;
}
if (rec.getReadUnmappedFlag())
continue;
samFileWriter.addAlignment(rec);
}
samRecIter.close();
samFileWriter.close();
jsw.endObject();
jsw.flush();
jsw.close();
jsonFileWriter.close();
ZipEntry entry = new ZipEntry(this.prefix + "_snapshot." + String.format("%05d", snapshot_id) + ".json");
zout.putNextEntry(entry);
IOUtils.copyTo(tmpFile1, zout);
zout.closeEntry();
tmpFile1.delete();
paramsJsonWriter.beginObject();
paramsJsonWriter.name("title");
paramsJsonWriter.value(title == null ? interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + (sampleName == null ? "" : " " + sampleName) : title);
paramsJsonWriter.name("interval");
paramsJsonWriter.beginObject();
paramsJsonWriter.name("contig");
paramsJsonWriter.value(interval.getContig());
paramsJsonWriter.name("start");
paramsJsonWriter.value(interval.getStart());
paramsJsonWriter.name("end");
paramsJsonWriter.value(interval.getEnd());
paramsJsonWriter.endObject();
if (sampleName != null && !sampleName.isEmpty()) {
paramsJsonWriter.name("sample");
paramsJsonWriter.value(sampleName);
}
paramsJsonWriter.name("href");
paramsJsonWriter.value("_snapshot." + String.format("%05d", snapshot_id) + ".json");
paramsJsonWriter.endObject();
} else if (line.toLowerCase().equals("exit") || line.toLowerCase().equals("quit")) {
break;
}
}
bufReader.close();
bufReader = null;
for (final String jssrc : new String[] { "gbrowse", "hershey", "samtools", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
InputStream is = this.getClass().getResourceAsStream("/META-INF/js/" + jssrc + ".js");
if (is == null) {
LOG.error("Cannot read resource /META-INF/js/" + jssrc + ".js");
return -1;
}
ZipEntry entry = new ZipEntry(this.prefix + jssrc + ".js");
entry.setComment("JAVASCRIPT SOURCE " + jssrc);
zout.putNextEntry(entry);
IOUtils.copyTo(is, zout);
CloserUtil.close(is);
zout.closeEntry();
}
// save params.js
paramsJsonWriter.endArray();
paramsJsonWriter.flush();
paramsWriter.println(";");
paramsWriter.flush();
paramsJsonWriter.close();
paramsWriter.close();
zout.putNextEntry(new ZipEntry(this.prefix + "config.js"));
IOUtils.copyTo(tmpFile2, zout);
zout.closeEntry();
tmpFile2.delete();
// save index.html
zout.putNextEntry(new ZipEntry(this.prefix + "index.html"));
XMLOutputFactory xof = XMLOutputFactory.newFactory();
XMLStreamWriter w = xof.createXMLStreamWriter(zout, "UTF-8");
w.writeStartElement("html");
w.writeStartElement("head");
w.writeEmptyElement("meta");
w.writeAttribute("http-equiv", "Content-Type");
w.writeAttribute("content", "text/html; charset=utf-8");
w.writeEmptyElement("meta");
w.writeAttribute("http-equiv", "author");
w.writeAttribute("content", "Pierre Lindenbaum Phd @yokofakun");
w.writeStartElement("title");
w.writeCharacters(getProgramName() + ":" + getVersion());
// title
w.writeEndElement();
w.writeStartElement("style");
w.writeAttribute("type", "text/css");
w.writeCharacters("body { color:rgb(50,50,50); margin:20px; padding:20px; font: 12pt Arial, Helvetica, sans-serif; }\n");
w.writeCharacters("label { text-align:right; }\n");
w.writeCharacters("button { border: 1px solid; background-image:-moz-linear-gradient( top, gray, lightgray ); }\n");
w.writeCharacters("canvas { image-rendering:auto;}\n");
w.writeCharacters(".me { padding-top:100px; font-size:80%; }\n");
// style
w.writeEndElement();
for (final String src : new String[] { "samtools", "gbrowse", "hershey", "config", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
w.writeStartElement("script");
w.writeAttribute("type", "text/javascript");
w.writeAttribute("language", "text/javascript");
w.writeAttribute("src", src + ".js");
w.writeCharacters("");
// script
w.writeEndElement();
}
// head
w.writeEndElement();
w.writeStartElement("body");
w.writeStartElement("div");
w.writeStartElement("button");
w.writeAttribute("onclick", "changemenu(-1)");
w.writeCharacters("prev");
// button
w.writeEndElement();
w.writeStartElement("select");
w.writeAttribute("id", "menu");
// menu
w.writeEndElement();
w.writeStartElement("button");
w.writeAttribute("onclick", "changemenu(+1)");
w.writeCharacters("next");
// button
w.writeEndElement();
// div
w.writeEndElement();
w.writeStartElement("div");
w.writeAttribute("id", "flags");
// div
w.writeEndElement();
w.writeStartElement("div");
w.writeAttribute("style", "text-align:center;");
w.writeStartElement("div");
w.writeAttribute("style", "font-size:200%;margin:10px;");
w.writeAttribute("id", "browserTitle");
// div
w.writeEndElement();
w.writeStartElement("div");
w.writeEmptyElement("canvas");
w.writeAttribute("id", "canvasdoc");
w.writeAttribute("width", "100");
w.writeAttribute("height", "100");
// div
w.writeEndElement();
// div
w.writeEndElement();
w.writeEmptyElement("hr");
w.writeStartElement("div");
w.writeAttribute("class", "me");
w.writeCharacters("Pierre Lindenbaum PhD. ");
w.writeStartElement("a");
w.writeAttribute("href", "https://github.com/lindenb/jvarkit");
w.writeCharacters("https://github.com/lindenb/jvarkit");
w.writeEndElement();
w.writeCharacters(". Tested with Firefox 45.0");
w.writeEndElement();
// body
w.writeEndElement();
// html
w.writeEndElement();
w.flush();
w.close();
w = null;
zout.closeEntry();
zout.finish();
zout.close();
return RETURN_OK;
} catch (Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(paramsJsonWriter);
CloserUtil.close(paramsWriter);
CloserUtil.close(zout);
CloserUtil.close(bufReader);
CloserUtil.close(samReader);
CloserUtil.close(faidx);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class BamClipToInsertion method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
LOG.error("File header missing");
return -1;
}
if (header1.getSortOrder() != SortOrder.coordinate) {
LOG.error("Input is not sorted on coordinate.");
return -1;
}
final SAMFileHeader header2 = header1.clone();
header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
header2.setSortOrder(SortOrder.unsorted);
sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
iter = sfr.iterator();
String curContig = null;
LinkedList<SamAndCigar> buffer = new LinkedList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
// ignore unmapped reads
if (rec.getReadUnmappedFlag()) {
sfw.addAlignment(rec);
continue;
}
}
if (rec == null || (curContig != null && !curContig.equals(rec.getReferenceName()))) {
for (final SamAndCigar r : buffer) sfw.addAlignment(r.getSAMRecord());
buffer.clear();
// we're done
if (rec == null)
break;
curContig = rec.getReferenceName();
}
final SamAndCigar sac = new SamAndCigar(rec);
if (!sac.containsIorS) {
sfw.addAlignment(rec);
continue;
}
buffer.add(sac);
int i = 0;
while (i < buffer.size()) {
final SamAndCigar ri = buffer.get(i);
if (ri.getSAMRecord().getUnclippedEnd() < rec.getUnclippedStart()) {
for (int j = 0; j < buffer.size(); ++j) {
if (i == j)
continue;
ri.merge(buffer.get(j));
}
sfw.addAlignment(ri.build());
buffer.remove(i);
} else {
++i;
}
}
}
progress.finish();
LOG.info("done");
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class BamTile method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
LOG.error("File header missing");
return -1;
}
if (header1.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
LOG.error("File header not sorted on coordinate");
return -1;
}
final SAMFileHeader header2 = header1.clone();
header2.addComment("BamTile:" + getVersion() + ":" + getProgramCommandLine());
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header2, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
iter = sfr.iterator();
final LinkedList<SAMRecord> buffer = new LinkedList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
if (this.filterOut.filterOut(rec))
continue;
if (!buffer.isEmpty()) {
final SAMRecord last = buffer.getLast();
if (this.no_overlap) {
if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getEnd() >= rec.getStart()) {
continue;
}
} else {
if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getAlignmentStart() <= rec.getAlignmentStart() && last.getAlignmentEnd() >= rec.getAlignmentEnd()) {
continue;
}
}
}
}
if (rec == null || (!buffer.isEmpty() && buffer.getLast().getReferenceIndex() != rec.getReferenceIndex())) {
while (!buffer.isEmpty()) {
sfw.addAlignment(buffer.removeFirst());
}
if (rec == null)
break;
}
buffer.add(rec);
if (!this.no_overlap && buffer.size() > 2) {
final int index = buffer.size();
final SAMRecord prev = buffer.get(index - 3);
final SAMRecord curr = buffer.get(index - 2);
final SAMRecord next = buffer.get(index - 1);
if (prev.getAlignmentEnd() >= next.getAlignmentStart() || curr.getAlignmentEnd() <= prev.getAlignmentEnd()) {
buffer.remove(index - 2);
} else if (curr.getAlignmentStart() == prev.getAlignmentStart() && curr.getAlignmentEnd() > prev.getAlignmentEnd()) {
buffer.remove(index - 3);
}
}
while (buffer.size() > 3) {
sfw.addAlignment(buffer.removeFirst());
}
}
progress.finish();
sfw.close();
sfw = null;
iter.close();
iter = null;
sfr.close();
sfr = null;
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
Aggregations