Search in sources :

Example 16 with Index

use of htsjdk.tribble.index.Index in project jvarkit by lindenb.

the class EvsDumpXml method doWork.

private int doWork() {
    try {
        this.xmlInputFactory = XMLInputFactory.newFactory();
        TransformerFactory factory = TransformerFactory.newInstance();
        this.transformer = factory.newTransformer();
        this.transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        if (this.doSort) {
            this.sortingCollection = SortingCollection.newInstance(String.class, new SnpStringCodec(), new SnpDataComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            this.sortingCollection.setDestructiveIteration(true);
        }
        final List<Fetcher> fetchers = new ArrayList<Fetcher>(24);
        fetchers.add(fetch("1", 249250621));
        fetchers.add(fetch("2", 243199373));
        fetchers.add(fetch("3", 198022430));
        fetchers.add(fetch("4", 191154276));
        fetchers.add(fetch("5", 180915260));
        fetchers.add(fetch("6", 171115067));
        fetchers.add(fetch("7", 159138663));
        fetchers.add(fetch("8", 146364022));
        fetchers.add(fetch("9", 141213431));
        fetchers.add(fetch("10", 135534747));
        fetchers.add(fetch("11", 135006516));
        fetchers.add(fetch("12", 133851895));
        fetchers.add(fetch("13", 115169878));
        fetchers.add(fetch("14", 107349540));
        fetchers.add(fetch("15", 102531392));
        fetchers.add(fetch("16", 90354753));
        fetchers.add(fetch("17", 81195210));
        fetchers.add(fetch("18", 78077248));
        fetchers.add(fetch("19", 59128983));
        fetchers.add(fetch("20", 63025520));
        fetchers.add(fetch("21", 48129895));
        fetchers.add(fetch("22", 51304566));
        fetchers.add(fetch("X", 155270560));
        // fetch("Y",59373566); not in evs
        // fetch("M",16571);
        this.genome_total_size = 0L;
        this.genome_curr_size = 0L;
        for (Fetcher fetcher : fetchers) {
            this.genome_total_size += fetcher.length;
        }
        DynamicIndexCreator indexer = null;
        if (this.outfilename != null) {
            LOG.info("Opening " + this.outfilename);
            this.outputstream = new LocationAwareOutputStream(new FileOutputStream(this.outfilename));
            indexer = new DynamicIndexCreator(this.outfilename, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
        } else {
            this.outputstream = new LocationAwareOutputStream(System.out);
        }
        // print header
        final String xml_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<evsData xmlns=\"http://webservice.evs.gs.washington.edu/\">\n";
        this.outputstream.write(xml_header.getBytes());
        for (Fetcher fetcher : fetchers) {
            fetcher.run();
            this.genome_curr_size += fetcher.length;
        }
        if (this.sortingCollection != null) {
            SnpDataBinding snpDataBinding = new SnpDataBinding();
            this.sortingCollection.doneAdding();
            String prev = null;
            CloseableIterator<String> iter = sortingCollection.iterator();
            while (iter.hasNext()) {
                String s = iter.next();
                if (prev != null && prev.equals(s)) {
                    continue;
                }
                long position = outputstream.getPosition();
                outputstream.write(s.getBytes());
                // important SnpDataCodec needs separate lines
                outputstream.write('\n');
                if (indexer != null) {
                    SnpData sd = snpDataBinding.convert(s);
                    indexer.addFeature(new SnpDataFeature(sd), position);
                }
                prev = s;
            }
            iter.close();
        }
        long last_index = this.outputstream.getPosition();
        final String xml_footer = "</evsData>\n";
        this.outputstream.write(xml_footer.getBytes());
        this.outputstream.flush();
        this.outputstream.close();
        if (indexer != null) {
            LOG.info("Writing index");
            final Index index = indexer.finalizeIndex(last_index);
            index.writeBasedOnFeatureFile(this.outfilename);
        }
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        if (this.sortingCollection != null)
            this.sortingCollection.cleanup();
    }
    return 0;
}
Also used : DynamicIndexCreator(htsjdk.tribble.index.DynamicIndexCreator) LocationAwareOutputStream(com.github.lindenb.jvarkit.io.LocationAwareOutputStream) TransformerFactory(javax.xml.transform.TransformerFactory) ArrayList(java.util.ArrayList) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) SnpData(edu.washington.gs.evs.SnpData)

Example 17 with Index

use of htsjdk.tribble.index.Index in project jvarkit by lindenb.

the class JfxNgs method doMenuIndexVcf.

/**
 * open index a VCF file
 */
private void doMenuIndexVcf(final Window owner) {
    final FileChooser fc = newFileChooser();
    fc.getExtensionFilters().addAll(VcfStage.EXTENSION_FILTERS);
    final List<File> files = fc.showOpenMultipleDialog(owner);
    if (files == null)
        return;
    for (final File file : files) {
        updateLastDir(file);
        if (file.getName().endsWith(".vcf.gz")) {
            LOG.info("writing tabix index for " + file);
            final File output = new File(file.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
            try {
                if (output.exists()) {
                    throw new IOException("Tabix index " + output + " already exists.");
                }
                final TabixIndex index = IndexFactory.createTabixIndex(file, new VCFCodec(), (SAMSequenceDictionary) null);
                index.write(output);
                final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
                alert.showAndWait();
            } catch (final Exception err) {
                showExceptionDialog(owner, err);
                break;
            }
        } else if (file.getName().endsWith(".vcf")) {
            LOG.info("writing tribble index for " + file);
            final File output = new File(file.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
            try {
                if (output.exists()) {
                    throw new IOException("Tribble index " + output + " already exists.");
                }
                final Index index = IndexFactory.createIndex(file, new VCFCodec(), IndexType.LINEAR);
                index.writeBasedOnFeatureFile(file);
                final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
                alert.showAndWait();
            } catch (final Exception err) {
                showExceptionDialog(owner, err);
                break;
            }
        } else {
            showExceptionDialog(owner, "Cannot index file " + file);
            break;
        }
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) FileChooser(javafx.stage.FileChooser) TabixIndex(htsjdk.tribble.index.tabix.TabixIndex) Alert(javafx.scene.control.Alert) BAMIndex(htsjdk.samtools.BAMIndex) TabixIndex(htsjdk.tribble.index.tabix.TabixIndex) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) File(java.io.File) ScriptException(javax.script.ScriptException) BackingStoreException(java.util.prefs.BackingStoreException) IOException(java.io.IOException)

Example 18 with Index

use of htsjdk.tribble.index.Index in project jvarkit by lindenb.

the class BedIndexTabix method run.

protected void run(LineIterator in) throws IOException {
    int bedLineCount = 0;
    File tbi = new File(outputFile.getPath() + TabixUtils.STANDARD_INDEX_EXTENSION);
    BlockCompressedOutputStream writer = null;
    SortingCollection<String> sorter = null;
    final Comparator<String> comparator = new Comparator<String>() {

        @Override
        public int compare(String o1, String o2) {
            BedLine bed1 = bedCodec.decode(o1);
            BedLine bed2 = bedCodec.decode(o2);
            int i = bed1.getContig().compareTo(bed2.getContig());
            if (i != 0)
                return i;
            i = bed1.getStart() - bed2.getStart();
            if (i != 0)
                return i;
            i = bed1.getEnd() - bed2.getEnd();
            if (i != 0)
                return i;
            return o1.compareTo(o2);
        }
    };
    CloseableIterator<String> iter = null;
    try {
        TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.BED);
        LOG.info("Opening" + outputFile);
        writer = new BlockCompressedOutputStream(this.outputFile);
        StringBuilder header = new StringBuilder();
        while (in.hasNext()) {
            String h = in.peek();
            if (!BedLine.isBedHeader(h))
                break;
            header.append(in.next()).append('\n');
        }
        // write header
        if (header.length() > 0) {
            LOG.info("Writing header");
            writer.write(header.toString().getBytes());
        }
        if (this.sort) {
            LOG.info("Sorting");
            sorter = SortingCollection.newInstance(String.class, new BedDataCodec(), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            while (in.hasNext()) {
                String line = in.next();
                BedLine bed = bedCodec.decode(line);
                if (bed == null)
                    continue;
                sorter.add(line);
            }
            sorter.doneAdding();
            sorter.setDestructiveIteration(true);
            iter = sorter.iterator();
            long filePosition = writer.getFilePointer();
            while (iter.hasNext()) {
                String line = iter.next();
                BedLine bed = this.bedCodec.decode(line);
                writer.write(line.getBytes());
                writer.write('\n');
                indexCreator.addFeature(bed, filePosition);
                filePosition = writer.getFilePointer();
            }
            sorter.cleanup();
        } else {
            long filePosition = writer.getFilePointer();
            while (in.hasNext()) {
                String line = in.next();
                BedLine bed = this.bedCodec.decode(line);
                if (bed == null)
                    continue;
                writer.write(line.getBytes());
                writer.write('\n');
                indexCreator.addFeature(bed, filePosition);
                filePosition = writer.getFilePointer();
            }
        }
        writer.flush();
        LOG.info("Creating index");
        Index index = indexCreator.finalizeIndex(writer.getFilePointer());
        LOG.info("Writing index to " + tbi + " using " + index.getClass());
        index.writeBasedOnFeatureFile(this.outputFile);
        writer.close();
        writer = null;
        LOG.info("Done  N=" + bedLineCount);
    } catch (Exception e) {
        if (this.outputFile.exists() && this.outputFile.isFile()) {
            LOG.warning("Deleting " + this.outputFile);
            this.outputFile.delete();
            if (tbi.exists() && tbi.isFile())
                tbi.delete();
        }
        throw new IOException(e);
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sorter);
        CloserUtil.close(writer);
    }
}
Also used : BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) TabixIndexCreator(htsjdk.tribble.index.tabix.TabixIndexCreator) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) IOException(java.io.IOException) Comparator(java.util.Comparator) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) File(java.io.File)

Example 19 with Index

use of htsjdk.tribble.index.Index in project gatk by broadinstitute.

the class IndexUtilsUnitTest method testLoadTabixIndex.

@Test(dataProvider = "okFeatureFilesTabix")
public void testLoadTabixIndex(final File featureFile) throws Exception {
    final Index index = IndexUtils.loadTabixIndex(featureFile);
    Assert.assertNotNull(index);
}
Also used : Index(htsjdk.tribble.index.Index) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 20 with Index

use of htsjdk.tribble.index.Index in project gatk by broadinstitute.

the class IndexUtilsUnitTest method testCheckIndexModificationTime.

@Test
public void testCheckIndexModificationTime() throws Exception {
    final File vcf = new File(getToolTestDataDir(), "test_variants_for_index.vcf");
    final File vcfIdx = new File(getToolTestDataDir(), "test_variants_for_index.vcf.idx");
    final Index index = IndexFactory.loadIndex(vcfIdx.getAbsolutePath());
    //no blowup
    IndexUtils.checkIndexVersionAndModificationTime(vcf, vcfIdx, index);
}
Also used : Index(htsjdk.tribble.index.Index) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

Index (htsjdk.tribble.index.Index)29 File (java.io.File)20 Test (org.testng.annotations.Test)15 TabixIndex (htsjdk.tribble.index.tabix.TabixIndex)12 LinearIndex (htsjdk.tribble.index.linear.LinearIndex)9 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)8 IOException (java.io.IOException)7 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)7 BlockCompressedOutputStream (htsjdk.samtools.util.BlockCompressedOutputStream)3 TabixIndexCreator (htsjdk.tribble.index.tabix.TabixIndexCreator)3 VCFCodec (htsjdk.variant.vcf.VCFCodec)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)2 LineIterator (htsjdk.tribble.readers.LineIterator)2 LittleEndianOutputStream (htsjdk.tribble.util.LittleEndianOutputStream)2 IndexFeatureFile (org.broadinstitute.hellbender.tools.IndexFeatureFile)2 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)2 TargetCodec (org.broadinstitute.hellbender.utils.codecs.TargetCodec)2 BeforeClass (org.testng.annotations.BeforeClass)2 LocationAwareOutputStream (com.github.lindenb.jvarkit.io.LocationAwareOutputStream)1 BedLine (com.github.lindenb.jvarkit.util.bio.bed.BedLine)1