use of htsjdk.tribble.readers.TabixReader in project ASCIIGenome by dariober.
the class TrackBookmark method addBookmark.
/**
* Add current genomic position to track.
* @throws IOException
* @throws SQLException
* @throws InvalidRecordException
* @throws ClassNotFoundException
* @throws InvalidGenomicCoordsException
*/
@Override
public void addBookmark(GenomicCoords gc, String nameForBookmark) throws IOException, ClassNotFoundException, InvalidRecordException, SQLException, InvalidGenomicCoordsException {
// Adding a bookmark means re-preparing the bgzip file again.
// First write the current position to a new tmp file, then append to this file
// all the bookmarks previously added. Then bgzip and compress replacing the old bookmark file.
File plainNew = new File(this.getWorkFilename() + ".add");
plainNew.deleteOnExit();
BufferedWriter wr = new BufferedWriter(new FileWriter(plainNew));
InputStream fileStream = new FileInputStream(this.getWorkFilename());
Reader decoder = new InputStreamReader(new GZIPInputStream(fileStream), "UTF-8");
BufferedReader br = new BufferedReader(decoder);
String line;
while ((line = br.readLine()) != null) {
if (line.contains("\t__ignore_me__")) {
// Hack to circumvent issue #38
continue;
}
wr.write(line + "\n");
}
// Add new bookamrk
wr.write(this.positionToGffLine(gc, nameForBookmark) + "\n");
br.close();
wr.close();
// Recompress and index replacing the original bgzip file
new MakeTabixIndex(plainNew.getAbsolutePath(), new File(this.getWorkFilename()), TabixFormat.GFF);
plainNew.delete();
this.tabixReader = new TabixReader(this.getWorkFilename());
// Update track.
this.update();
}
use of htsjdk.tribble.readers.TabixReader in project jvarkit by lindenb.
the class VcfRegistryCGI method doWork.
private void doWork(XMLStreamWriter w, final GroupFile gf) throws XMLStreamException {
Position pos = parsePosition();
if (pos == null)
return;
w.writeStartElement("div");
w.writeStartElement("h2");
w.writeCharacters(pos.chrom + ":" + pos.pos);
w.writeEndElement();
w.writeStartElement("table");
w.writeStartElement("thead");
w.writeStartElement("tr");
for (String header : new String[] { "CHROM", "POS", "ID", "REF", "QUAL", "Sample", "Alleles", "DP", "GQ", "File" }) {
w.writeStartElement("th");
w.writeCharacters(header);
// td
w.writeEndElement();
}
// tr
w.writeEndElement();
// thead
w.writeEndElement();
w.writeStartElement("tbody");
Set<String> samplesWithGenotypes = new HashSet<String>();
Set<String> allSamples = new HashSet<String>();
for (VcfFile f : getVcfFiles(gf)) {
TabixReader tabixReader = null;
TabixReader.Iterator iter = null;
BlockCompressedInputStream bgzin = null;
VCFHeader header = null;
AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
LineIterator lineIterator = null;
for (int i = 0; i < 2; i++) {
try {
if (i == 0) {
bgzin = new BlockCompressedInputStream(f.file);
lineIterator = new LineIteratorImpl(new SynchronousLineReader(bgzin));
header = (VCFHeader) vcfCodec.readActualHeader(lineIterator);
allSamples.addAll(header.getGenotypeSamples());
} else {
tabixReader = new TabixReader(f.file.getPath());
String line;
int[] x = tabixReader.parseReg(pos.chrom + ":" + pos.pos + "-" + (pos.pos));
if (x != null && x.length > 2 && x[0] != -1) {
iter = tabixReader.query(x[0], x[1], x[2]);
} else {
}
while (iter != null && (line = iter.next()) != null) {
VariantContext var = vcfCodec.decode(line);
for (String sample : header.getSampleNamesInOrder()) {
final Genotype genotype = var.getGenotype(sample);
if (genotype == null || !genotype.isCalled())
continue;
if (!genotype.isAvailable())
continue;
samplesWithGenotypes.add(sample);
w.writeStartElement("tr");
w.writeStartElement("td");
w.writeCharacters(var.getContig());
w.writeEndElement();
w.writeStartElement("td");
w.writeCharacters(String.valueOf(var.getStart()));
w.writeEndElement();
if (var.hasID()) {
w.writeStartElement("td");
if (var.getID().matches("rs[0-9]+")) {
w.writeStartElement("a");
w.writeAttribute("href", "http://www.ncbi.nlm.nih.gov/snp/" + var.getID().substring(2));
w.writeCharacters(var.getID());
// a
w.writeEndElement();
} else {
w.writeCharacters(var.getID());
}
// td
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (var.getReference() != null) {
w.writeStartElement("td");
w.writeCharacters(var.getReference().getBaseString());
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (var.hasLog10PError()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf((int) var.getPhredScaledQual()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(sample);
w.writeEndElement();
List<Allele> alleles = genotype.getAlleles();
w.writeStartElement("td");
w.writeStartElement("span");
if (genotype.isHomRef()) {
w.writeAttribute("style", "color:green;");
} else if (genotype.isHomVar()) {
w.writeAttribute("style", "color:red;");
} else if (genotype.isHet()) {
w.writeAttribute("style", "color:blue;");
}
for (int j = 0; j < alleles.size(); ++j) {
if (j > 0)
w.writeCharacters(genotype.isPhased() ? "|" : "/");
w.writeCharacters(alleles.get(j).getBaseString());
}
// span
w.writeEndElement();
w.writeEndElement();
if (genotype.hasDP()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getDP()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (genotype.hasGQ()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getGQ()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(f.file.getName());
w.writeEndElement();
// tr
w.writeEndElement();
w.flush();
}
}
}
} catch (Exception err) {
w.writeComment("BOUM " + err);
header = null;
lastException = err;
} finally {
CloserUtil.close(lineIterator);
CloserUtil.close(bgzin);
CloserUtil.close(tabixReader);
CloserUtil.close(iter);
}
if (i == 0 && header == null)
break;
}
w.flush();
}
// tbody
w.writeEndElement();
// table
w.writeEndElement();
allSamples.removeAll(samplesWithGenotypes);
if (!allSamples.isEmpty()) {
w.writeStartElement("h3");
w.writeCharacters("Samples not found");
w.writeEndElement();
w.writeStartElement("ol");
for (String sample : new TreeSet<String>(allSamples)) {
w.writeStartElement("li");
w.writeCharacters(sample);
w.writeEndElement();
}
w.writeEndElement();
}
writeHTMLException(w, this.lastException);
// div
w.writeEndElement();
}
use of htsjdk.tribble.readers.TabixReader in project ASCIIGenome by dariober.
the class MakeTabixFileTest method canCompressAndIndexSortedFile.
@Test
public void canCompressAndIndexSortedFile() throws IOException, InvalidRecordException, ClassNotFoundException, SQLException {
String infile = "test_data/overlapped.bed";
File outfile = new File("test_data/tmp.bed.gz");
outfile.deleteOnExit();
File expectedTbi = new File(outfile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
expectedTbi.deleteOnExit();
new MakeTabixIndex(infile, outfile, TabixFormat.BED);
assertTrue(outfile.exists());
assertTrue(outfile.length() > 80);
assertTrue(expectedTbi.exists());
assertTrue(expectedTbi.length() > 80);
TabixReader tbx = new TabixReader(outfile.getAbsolutePath());
Iterator x = tbx.query("chr1", 1, 1000000);
assertTrue(x.next().startsWith("chr1"));
}
use of htsjdk.tribble.readers.TabixReader in project ASCIIGenome by dariober.
the class MakeTabixFileTest method canCompressAndIndexSortedGzipFile.
@Test
public void canCompressAndIndexSortedGzipFile() throws IOException, InvalidRecordException, ClassNotFoundException, SQLException {
String infile = "test_data/hg19_genes.gtf.gz";
File outfile = new File("test_data/tmp2.bed.gz");
outfile.deleteOnExit();
File expectedTbi = new File(outfile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
expectedTbi.deleteOnExit();
new MakeTabixIndex(infile, outfile, TabixFormat.GFF);
assertTrue(outfile.exists());
assertTrue(outfile.length() > 7000000);
assertTrue(expectedTbi.exists());
assertTrue(expectedTbi.length() > 500000);
TabixReader tbx = new TabixReader(outfile.getAbsolutePath());
Iterator x = tbx.query("chr1", 1, 1000000);
assertTrue(x.next().startsWith("chr1"));
}
use of htsjdk.tribble.readers.TabixReader in project jvarkit by lindenb.
the class VCFTabixml method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator r, VariantContextWriter w) {
TabixReader tabixReader = null;
try {
LOG.info("opening BED" + BEDFILE);
tabixReader = new TabixReader(this.BEDFILE);
Pattern tab = Pattern.compile("[\t]");
LOG.info("loading xslt " + STYLESHEET);
this.stylesheet = TransformerFactory.newInstance().newTemplates(new StreamSource(STYLESHEET));
Transformer transformer = this.stylesheet.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
final VCFHeader header = r.getHeader();
final VCFHeader h2 = new VCFHeader(header);
LOG.info("reading Tags " + TAGS);
BufferedReader rT = IOUtils.openFileForBufferedReading(TAGS);
String line;
while ((line = rT.readLine()) != null) {
if (!line.startsWith(VCFHeader.METADATA_INDICATOR)) {
throw new RuntimeException("should start with " + VCFHeader.METADATA_INDICATOR + ":" + line);
}
if (!line.startsWith(VCFConstants.INFO_HEADER_START)) {
throw new RuntimeException("should start with " + VCFConstants.INFO_HEADER_START + ":" + line);
}
VCFInfoHeaderLine hi = new VCFInfoHeaderLine(line.substring(7), VCFHeaderVersion.VCF4_1);
if (hi.getCount() != 1) {
throw new IllegalArgumentException("VCFHeaderLineCount not supported : " + hi);
}
switch(hi.getType()) {
case String:
break;
default:
throw new IllegalArgumentException("VCFHeaderLineTyoe not supported : " + hi);
}
LOG.info(hi.toString());
h2.addMetaDataLine(hi);
}
rT.close();
LOG.info("writing header");
w.writeHeader(h2);
JAXBContext jaxbCtx = JAXBContext.newInstance(Properties.class, Property.class);
Unmarshaller unmarshaller = jaxbCtx.createUnmarshaller();
while (r.hasNext()) {
VariantContext ctx = r.next();
HashMap<String, Set<String>> insert = new LinkedHashMap<String, Set<String>>();
int[] array = tabixReader.parseReg(ctx.getContig() + ":" + (ctx.getStart()) + "-" + (ctx.getEnd() + 1));
TabixReader.Iterator iter = null;
if (array != null && array.length == 3 && array[0] != -1 && array[1] >= 0 && array[2] >= 0) {
iter = tabixReader.query(array[0], array[1], array[2]);
} else {
LOG.info("Cannot get " + ctx.getContig() + ":" + (ctx.getStart()) + "-" + (ctx.getEnd() + 1));
}
String line2 = null;
while (iter != null && (line2 = iter.next()) != null) {
String[] tokens2 = tab.split(line2, 5);
if (tokens2.length < 4) {
LOG.error("[VCFTabixml] VCF. Error not enough columns in tabix.line " + line2);
return -1;
}
int chromStart = Integer.parseInt(tokens2[1]);
int chromEnd = Integer.parseInt(tokens2[2]);
if (chromStart + 1 != chromEnd) {
LOG.error("Error in " + this.BEDFILE + " extected start+1=end int " + tokens2[0] + ":" + tokens2[1] + "-" + tokens2[2]);
continue;
}
if (ctx.getStart() - 1 != chromStart)
continue;
transformer.setParameter("vcfchrom", ctx.getContig());
transformer.setParameter("vcfpos", ctx.getStart());
transformer.setParameter("vcfref", ctx.getReference().getBaseString());
transformer.setParameter("vcfalt", ctx.getAltAlleleWithHighestAlleleCount().getBaseString());
try {
StringWriter sw = new StringWriter();
StreamSource src = new StreamSource(new StringReader(tokens2[3]));
StreamResult rez = new StreamResult(sw);
transformer.transform(src, rez);
Properties props = unmarshaller.unmarshal(new StreamSource(new StringReader(sw.toString())), Properties.class).getValue();
for (Property p : props.getProperty()) {
if (p.key.isEmpty())
continue;
if (h2.getInfoHeaderLine(p.key) == null) {
LOG.info("ignoring key " + p.key + " you could set it to:\n" + "##INFO=<ID=" + p.key + ",Number=1,Type=String,Description=\"" + p.key + " from " + BEDFILE + "\">");
continue;
}
Set<String> x = insert.get(p.key);
if (x == null) {
x = new LinkedHashSet<String>();
insert.put(p.key, x);
}
x.add(p.value);
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("error", e);
}
}
if (insert.isEmpty()) {
w.add(ctx);
continue;
}
VariantContextBuilder b = new VariantContextBuilder(ctx);
for (String key : insert.keySet()) {
for (String s2 : insert.get(key)) {
b.attribute(key, s2);
// limit 1
break;
}
}
w.add(b.make());
}
return 0;
} catch (IOException err) {
err.printStackTrace();
return -1;
} catch (Throwable err) {
err.printStackTrace();
return -1;
}
}
Aggregations