Search in sources :

Example 6 with TabixReader

use of htsjdk.tribble.readers.TabixReader in project ASCIIGenome by dariober.

the class TrackBookmark method addBookmark.

/**
 * Add current genomic position to track.
 * @throws IOException
 * @throws SQLException
 * @throws InvalidRecordException
 * @throws ClassNotFoundException
 * @throws InvalidGenomicCoordsException
 */
@Override
public void addBookmark(GenomicCoords gc, String nameForBookmark) throws IOException, ClassNotFoundException, InvalidRecordException, SQLException, InvalidGenomicCoordsException {
    // Adding a bookmark means re-preparing the bgzip file again.
    // First write the current position to a new tmp file, then append to this file
    // all the bookmarks previously added. Then bgzip and compress replacing the old bookmark file.
    File plainNew = new File(this.getWorkFilename() + ".add");
    plainNew.deleteOnExit();
    BufferedWriter wr = new BufferedWriter(new FileWriter(plainNew));
    InputStream fileStream = new FileInputStream(this.getWorkFilename());
    Reader decoder = new InputStreamReader(new GZIPInputStream(fileStream), "UTF-8");
    BufferedReader br = new BufferedReader(decoder);
    String line;
    while ((line = br.readLine()) != null) {
        if (line.contains("\t__ignore_me__")) {
            // Hack to circumvent issue #38
            continue;
        }
        wr.write(line + "\n");
    }
    // Add new bookamrk
    wr.write(this.positionToGffLine(gc, nameForBookmark) + "\n");
    br.close();
    wr.close();
    // Recompress and index replacing the original bgzip file
    new MakeTabixIndex(plainNew.getAbsolutePath(), new File(this.getWorkFilename()), TabixFormat.GFF);
    plainNew.delete();
    this.tabixReader = new TabixReader(this.getWorkFilename());
    // Update track.
    this.update();
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) MakeTabixIndex(sortBgzipIndex.MakeTabixIndex) InputStreamReader(java.io.InputStreamReader) TabixReader(htsjdk.tribble.readers.TabixReader) GZIPInputStream(java.util.zip.GZIPInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileWriter(java.io.FileWriter) BufferedReader(java.io.BufferedReader) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) TabixReader(htsjdk.tribble.readers.TabixReader) BufferedReader(java.io.BufferedReader) File(java.io.File) FileInputStream(java.io.FileInputStream) BufferedWriter(java.io.BufferedWriter)

Example 7 with TabixReader

use of htsjdk.tribble.readers.TabixReader in project jvarkit by lindenb.

the class VcfRegistryCGI method doWork.

private void doWork(XMLStreamWriter w, final GroupFile gf) throws XMLStreamException {
    Position pos = parsePosition();
    if (pos == null)
        return;
    w.writeStartElement("div");
    w.writeStartElement("h2");
    w.writeCharacters(pos.chrom + ":" + pos.pos);
    w.writeEndElement();
    w.writeStartElement("table");
    w.writeStartElement("thead");
    w.writeStartElement("tr");
    for (String header : new String[] { "CHROM", "POS", "ID", "REF", "QUAL", "Sample", "Alleles", "DP", "GQ", "File" }) {
        w.writeStartElement("th");
        w.writeCharacters(header);
        // td
        w.writeEndElement();
    }
    // tr
    w.writeEndElement();
    // thead
    w.writeEndElement();
    w.writeStartElement("tbody");
    Set<String> samplesWithGenotypes = new HashSet<String>();
    Set<String> allSamples = new HashSet<String>();
    for (VcfFile f : getVcfFiles(gf)) {
        TabixReader tabixReader = null;
        TabixReader.Iterator iter = null;
        BlockCompressedInputStream bgzin = null;
        VCFHeader header = null;
        AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
        LineIterator lineIterator = null;
        for (int i = 0; i < 2; i++) {
            try {
                if (i == 0) {
                    bgzin = new BlockCompressedInputStream(f.file);
                    lineIterator = new LineIteratorImpl(new SynchronousLineReader(bgzin));
                    header = (VCFHeader) vcfCodec.readActualHeader(lineIterator);
                    allSamples.addAll(header.getGenotypeSamples());
                } else {
                    tabixReader = new TabixReader(f.file.getPath());
                    String line;
                    int[] x = tabixReader.parseReg(pos.chrom + ":" + pos.pos + "-" + (pos.pos));
                    if (x != null && x.length > 2 && x[0] != -1) {
                        iter = tabixReader.query(x[0], x[1], x[2]);
                    } else {
                    }
                    while (iter != null && (line = iter.next()) != null) {
                        VariantContext var = vcfCodec.decode(line);
                        for (String sample : header.getSampleNamesInOrder()) {
                            final Genotype genotype = var.getGenotype(sample);
                            if (genotype == null || !genotype.isCalled())
                                continue;
                            if (!genotype.isAvailable())
                                continue;
                            samplesWithGenotypes.add(sample);
                            w.writeStartElement("tr");
                            w.writeStartElement("td");
                            w.writeCharacters(var.getContig());
                            w.writeEndElement();
                            w.writeStartElement("td");
                            w.writeCharacters(String.valueOf(var.getStart()));
                            w.writeEndElement();
                            if (var.hasID()) {
                                w.writeStartElement("td");
                                if (var.getID().matches("rs[0-9]+")) {
                                    w.writeStartElement("a");
                                    w.writeAttribute("href", "http://www.ncbi.nlm.nih.gov/snp/" + var.getID().substring(2));
                                    w.writeCharacters(var.getID());
                                    // a
                                    w.writeEndElement();
                                } else {
                                    w.writeCharacters(var.getID());
                                }
                                // td
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            if (var.getReference() != null) {
                                w.writeStartElement("td");
                                w.writeCharacters(var.getReference().getBaseString());
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            if (var.hasLog10PError()) {
                                w.writeStartElement("td");
                                w.writeCharacters(String.valueOf((int) var.getPhredScaledQual()));
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            w.writeStartElement("td");
                            w.writeCharacters(sample);
                            w.writeEndElement();
                            List<Allele> alleles = genotype.getAlleles();
                            w.writeStartElement("td");
                            w.writeStartElement("span");
                            if (genotype.isHomRef()) {
                                w.writeAttribute("style", "color:green;");
                            } else if (genotype.isHomVar()) {
                                w.writeAttribute("style", "color:red;");
                            } else if (genotype.isHet()) {
                                w.writeAttribute("style", "color:blue;");
                            }
                            for (int j = 0; j < alleles.size(); ++j) {
                                if (j > 0)
                                    w.writeCharacters(genotype.isPhased() ? "|" : "/");
                                w.writeCharacters(alleles.get(j).getBaseString());
                            }
                            // span
                            w.writeEndElement();
                            w.writeEndElement();
                            if (genotype.hasDP()) {
                                w.writeStartElement("td");
                                w.writeCharacters(String.valueOf(genotype.getDP()));
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            if (genotype.hasGQ()) {
                                w.writeStartElement("td");
                                w.writeCharacters(String.valueOf(genotype.getGQ()));
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            w.writeStartElement("td");
                            w.writeCharacters(f.file.getName());
                            w.writeEndElement();
                            // tr
                            w.writeEndElement();
                            w.flush();
                        }
                    }
                }
            } catch (Exception err) {
                w.writeComment("BOUM " + err);
                header = null;
                lastException = err;
            } finally {
                CloserUtil.close(lineIterator);
                CloserUtil.close(bgzin);
                CloserUtil.close(tabixReader);
                CloserUtil.close(iter);
            }
            if (i == 0 && header == null)
                break;
        }
        w.flush();
    }
    // tbody
    w.writeEndElement();
    // table
    w.writeEndElement();
    allSamples.removeAll(samplesWithGenotypes);
    if (!allSamples.isEmpty()) {
        w.writeStartElement("h3");
        w.writeCharacters("Samples not found");
        w.writeEndElement();
        w.writeStartElement("ol");
        for (String sample : new TreeSet<String>(allSamples)) {
            w.writeStartElement("li");
            w.writeCharacters(sample);
            w.writeEndElement();
        }
        w.writeEndElement();
    }
    writeHTMLException(w, this.lastException);
    // div
    w.writeEndElement();
}
Also used : TabixReader(htsjdk.tribble.readers.TabixReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) AbstractVCFCodec(htsjdk.variant.vcf.AbstractVCFCodec) LineIterator(htsjdk.tribble.readers.LineIterator) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) Allele(htsjdk.variant.variantcontext.Allele) TreeSet(java.util.TreeSet) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream) HashSet(java.util.HashSet)

Example 8 with TabixReader

use of htsjdk.tribble.readers.TabixReader in project ASCIIGenome by dariober.

the class MakeTabixFileTest method canCompressAndIndexSortedFile.

@Test
public void canCompressAndIndexSortedFile() throws IOException, InvalidRecordException, ClassNotFoundException, SQLException {
    String infile = "test_data/overlapped.bed";
    File outfile = new File("test_data/tmp.bed.gz");
    outfile.deleteOnExit();
    File expectedTbi = new File(outfile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
    expectedTbi.deleteOnExit();
    new MakeTabixIndex(infile, outfile, TabixFormat.BED);
    assertTrue(outfile.exists());
    assertTrue(outfile.length() > 80);
    assertTrue(expectedTbi.exists());
    assertTrue(expectedTbi.length() > 80);
    TabixReader tbx = new TabixReader(outfile.getAbsolutePath());
    Iterator x = tbx.query("chr1", 1, 1000000);
    assertTrue(x.next().startsWith("chr1"));
}
Also used : TabixReader(htsjdk.tribble.readers.TabixReader) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Iterator(htsjdk.tribble.readers.TabixReader.Iterator) File(java.io.File) Test(org.junit.Test)

Example 9 with TabixReader

use of htsjdk.tribble.readers.TabixReader in project ASCIIGenome by dariober.

the class MakeTabixFileTest method canCompressAndIndexSortedGzipFile.

@Test
public void canCompressAndIndexSortedGzipFile() throws IOException, InvalidRecordException, ClassNotFoundException, SQLException {
    String infile = "test_data/hg19_genes.gtf.gz";
    File outfile = new File("test_data/tmp2.bed.gz");
    outfile.deleteOnExit();
    File expectedTbi = new File(outfile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
    expectedTbi.deleteOnExit();
    new MakeTabixIndex(infile, outfile, TabixFormat.GFF);
    assertTrue(outfile.exists());
    assertTrue(outfile.length() > 7000000);
    assertTrue(expectedTbi.exists());
    assertTrue(expectedTbi.length() > 500000);
    TabixReader tbx = new TabixReader(outfile.getAbsolutePath());
    Iterator x = tbx.query("chr1", 1, 1000000);
    assertTrue(x.next().startsWith("chr1"));
}
Also used : TabixReader(htsjdk.tribble.readers.TabixReader) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Iterator(htsjdk.tribble.readers.TabixReader.Iterator) File(java.io.File) Test(org.junit.Test)

Example 10 with TabixReader

use of htsjdk.tribble.readers.TabixReader in project jvarkit by lindenb.

the class VCFTabixml method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator r, VariantContextWriter w) {
    TabixReader tabixReader = null;
    try {
        LOG.info("opening BED" + BEDFILE);
        tabixReader = new TabixReader(this.BEDFILE);
        Pattern tab = Pattern.compile("[\t]");
        LOG.info("loading xslt " + STYLESHEET);
        this.stylesheet = TransformerFactory.newInstance().newTemplates(new StreamSource(STYLESHEET));
        Transformer transformer = this.stylesheet.newTransformer();
        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
        final VCFHeader header = r.getHeader();
        final VCFHeader h2 = new VCFHeader(header);
        LOG.info("reading Tags " + TAGS);
        BufferedReader rT = IOUtils.openFileForBufferedReading(TAGS);
        String line;
        while ((line = rT.readLine()) != null) {
            if (!line.startsWith(VCFHeader.METADATA_INDICATOR)) {
                throw new RuntimeException("should start with " + VCFHeader.METADATA_INDICATOR + ":" + line);
            }
            if (!line.startsWith(VCFConstants.INFO_HEADER_START)) {
                throw new RuntimeException("should start with " + VCFConstants.INFO_HEADER_START + ":" + line);
            }
            VCFInfoHeaderLine hi = new VCFInfoHeaderLine(line.substring(7), VCFHeaderVersion.VCF4_1);
            if (hi.getCount() != 1) {
                throw new IllegalArgumentException("VCFHeaderLineCount not supported : " + hi);
            }
            switch(hi.getType()) {
                case String:
                    break;
                default:
                    throw new IllegalArgumentException("VCFHeaderLineTyoe not supported : " + hi);
            }
            LOG.info(hi.toString());
            h2.addMetaDataLine(hi);
        }
        rT.close();
        LOG.info("writing header");
        w.writeHeader(h2);
        JAXBContext jaxbCtx = JAXBContext.newInstance(Properties.class, Property.class);
        Unmarshaller unmarshaller = jaxbCtx.createUnmarshaller();
        while (r.hasNext()) {
            VariantContext ctx = r.next();
            HashMap<String, Set<String>> insert = new LinkedHashMap<String, Set<String>>();
            int[] array = tabixReader.parseReg(ctx.getContig() + ":" + (ctx.getStart()) + "-" + (ctx.getEnd() + 1));
            TabixReader.Iterator iter = null;
            if (array != null && array.length == 3 && array[0] != -1 && array[1] >= 0 && array[2] >= 0) {
                iter = tabixReader.query(array[0], array[1], array[2]);
            } else {
                LOG.info("Cannot get " + ctx.getContig() + ":" + (ctx.getStart()) + "-" + (ctx.getEnd() + 1));
            }
            String line2 = null;
            while (iter != null && (line2 = iter.next()) != null) {
                String[] tokens2 = tab.split(line2, 5);
                if (tokens2.length < 4) {
                    LOG.error("[VCFTabixml] VCF. Error not enough columns in tabix.line " + line2);
                    return -1;
                }
                int chromStart = Integer.parseInt(tokens2[1]);
                int chromEnd = Integer.parseInt(tokens2[2]);
                if (chromStart + 1 != chromEnd) {
                    LOG.error("Error in " + this.BEDFILE + " extected start+1=end int " + tokens2[0] + ":" + tokens2[1] + "-" + tokens2[2]);
                    continue;
                }
                if (ctx.getStart() - 1 != chromStart)
                    continue;
                transformer.setParameter("vcfchrom", ctx.getContig());
                transformer.setParameter("vcfpos", ctx.getStart());
                transformer.setParameter("vcfref", ctx.getReference().getBaseString());
                transformer.setParameter("vcfalt", ctx.getAltAlleleWithHighestAlleleCount().getBaseString());
                try {
                    StringWriter sw = new StringWriter();
                    StreamSource src = new StreamSource(new StringReader(tokens2[3]));
                    StreamResult rez = new StreamResult(sw);
                    transformer.transform(src, rez);
                    Properties props = unmarshaller.unmarshal(new StreamSource(new StringReader(sw.toString())), Properties.class).getValue();
                    for (Property p : props.getProperty()) {
                        if (p.key.isEmpty())
                            continue;
                        if (h2.getInfoHeaderLine(p.key) == null) {
                            LOG.info("ignoring key " + p.key + " you could set it to:\n" + "##INFO=<ID=" + p.key + ",Number=1,Type=String,Description=\"" + p.key + " from " + BEDFILE + "\">");
                            continue;
                        }
                        Set<String> x = insert.get(p.key);
                        if (x == null) {
                            x = new LinkedHashSet<String>();
                            insert.put(p.key, x);
                        }
                        x.add(p.value);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                    throw new RuntimeException("error", e);
                }
            }
            if (insert.isEmpty()) {
                w.add(ctx);
                continue;
            }
            VariantContextBuilder b = new VariantContextBuilder(ctx);
            for (String key : insert.keySet()) {
                for (String s2 : insert.get(key)) {
                    b.attribute(key, s2);
                    // limit 1
                    break;
                }
            }
            w.add(b.make());
        }
        return 0;
    } catch (IOException err) {
        err.printStackTrace();
        return -1;
    } catch (Throwable err) {
        err.printStackTrace();
        return -1;
    }
}
Also used : Transformer(javax.xml.transform.Transformer) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) TabixReader(htsjdk.tribble.readers.TabixReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) JAXBContext(javax.xml.bind.JAXBContext) LinkedHashMap(java.util.LinkedHashMap) StringWriter(java.io.StringWriter) StringReader(java.io.StringReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Unmarshaller(javax.xml.bind.Unmarshaller) Pattern(java.util.regex.Pattern) StreamResult(javax.xml.transform.stream.StreamResult) StreamSource(javax.xml.transform.stream.StreamSource) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Aggregations

TabixReader (htsjdk.tribble.readers.TabixReader)10 File (java.io.File)6 Iterator (htsjdk.tribble.readers.TabixReader.Iterator)5 CloseableIterator (htsjdk.samtools.util.CloseableIterator)4 IOException (java.io.IOException)4 Test (org.junit.Test)4 BufferedReader (java.io.BufferedReader)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)2 VCFHeader (htsjdk.variant.vcf.VCFHeader)2 BufferedWriter (java.io.BufferedWriter)2 FileInputStream (java.io.FileInputStream)2 FileNotFoundException (java.io.FileNotFoundException)2 FileWriter (java.io.FileWriter)2 InputStream (java.io.InputStream)2 InputStreamReader (java.io.InputStreamReader)2 Reader (java.io.Reader)2 GZIPInputStream (java.util.zip.GZIPInputStream)2 MakeTabixIndex (sortBgzipIndex.MakeTabixIndex)2 InvalidColourException (exceptions.InvalidColourException)1 InvalidCommandLineException (exceptions.InvalidCommandLineException)1