Search in sources :

Example 1 with TabixIndexCreator

use of htsjdk.tribble.index.tabix.TabixIndexCreator in project ASCIIGenome by dariober.

the class UcscFetch method blockCompressAndIndex.

/**
 * Block compress input file and create associated tabix index. Newly created file and index are
 * deleted on exit if deleteOnExit true.
 * @throws IOException
 * @throws InvalidRecordException
 */
private void blockCompressAndIndex(String in, String bgzfOut, boolean deleteOnExit) throws IOException, InvalidRecordException {
    File inFile = new File(in);
    File outFile = new File(bgzfOut);
    LineIterator lin = utils.IOUtils.openURIForLineIterator(inFile.getAbsolutePath());
    BlockCompressedOutputStream writer = new BlockCompressedOutputStream(outFile);
    long filePosition = writer.getFilePointer();
    TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.GFF);
    while (lin.hasNext()) {
        String line = lin.next();
        GtfLine gtf = new GtfLine(line.split("\t"));
        writer.write(line.getBytes());
        writer.write('\n');
        indexCreator.addFeature(gtf, filePosition);
        filePosition = writer.getFilePointer();
    }
    writer.flush();
    File tbi = new File(bgzfOut + TabixUtils.STANDARD_INDEX_EXTENSION);
    if (tbi.exists() && tbi.isFile()) {
        writer.close();
        throw new RuntimeException("Index file exists: " + tbi);
    }
    Index index = indexCreator.finalizeIndex(writer.getFilePointer());
    index.writeBasedOnFeatureFile(outFile);
    writer.close();
    if (deleteOnExit) {
        outFile.deleteOnExit();
        File idx = new File(outFile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
        idx.deleteOnExit();
    }
}
Also used : GtfLine(utils.GtfLine) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) TabixIndexCreator(htsjdk.tribble.index.tabix.TabixIndexCreator) Index(htsjdk.tribble.index.Index) File(java.io.File) LineIterator(htsjdk.tribble.readers.LineIterator)

Example 2 with TabixIndexCreator

use of htsjdk.tribble.index.tabix.TabixIndexCreator in project ASCIIGenome by dariober.

the class MakeTabixIndex method blockCompressAndIndex.

/**
 * Block compress input file and create associated tabix index.
 * @throws IOException
 * @throws InvalidRecordException
 */
private void blockCompressAndIndex(String intab, File bgzfOut, TabixFormat fmt) throws IOException, InvalidRecordException {
    LineIterator lin = utils.IOUtils.openURIForLineIterator(intab);
    BlockCompressedOutputStream writer = new BlockCompressedOutputStream(bgzfOut);
    long filePosition = writer.getFilePointer();
    TabixIndexCreator indexCreator = new TabixIndexCreator(fmt);
    boolean first = true;
    // This is relevant to vcf files only: Prepare header and codec
    // ------------------------------------------------------------
    VCFHeader vcfHeader = null;
    VCFCodec vcfCodec = null;
    if (fmt.equals(TabixFormat.VCF)) {
        try {
            VCFFileReader vcfr = new VCFFileReader(new File(intab), false);
            // new VCFHeader();
            vcfHeader = vcfr.getFileHeader();
            vcfr.close();
        } catch (MalformedFeatureFile e) {
            vcfHeader = new VCFHeader();
        }
        vcfCodec = new VCFCodec();
        vcfCodec.setVCFHeader(vcfHeader, Utils.getVCFHeaderVersion(vcfHeader));
    }
    // ------------------------------------------------------------
    int nWarnings = 10;
    while (lin.hasNext()) {
        String line = lin.next().trim();
        try {
            if (line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (line.startsWith("#")) {
                writer.write((line + "\n").getBytes());
                filePosition = writer.getFilePointer();
                continue;
            }
            if (line.startsWith("##FASTA")) {
                break;
            }
            if (first && !fmt.equals(TabixFormat.VCF)) {
                String dummy = this.makeDummyLine(line, fmt);
                addLineToIndex(dummy, indexCreator, filePosition, fmt, null, null);
                writer.write(dummy.getBytes());
                writer.write('\n');
                filePosition = writer.getFilePointer();
                first = false;
            }
            addLineToIndex(line, indexCreator, filePosition, fmt, vcfHeader, vcfCodec);
            writer.write(line.getBytes());
            writer.write('\n');
            filePosition = writer.getFilePointer();
        } catch (Exception e) {
            if (e.getMessage().contains("added out sequence of order") || e.getMessage().contains("Features added out of order")) {
                // Get a string marker for out-of-order from htsjdk/tribble/index/tabix/TabixIndexCreator.java
                throw new InvalidRecordException();
            }
            if (nWarnings >= 0) {
                System.err.println("Warning: " + e.getMessage() + ". Skipping:\n" + line);
            }
            if (nWarnings == 0) {
                System.err.println("Additional warnings will not be show.");
            }
            nWarnings--;
        }
    }
    writer.flush();
    Index index = indexCreator.finalizeIndex(writer.getFilePointer());
    index.writeBasedOnFeatureFile(bgzfOut);
    writer.close();
    CloserUtil.close(lin);
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) TabixIndexCreator(htsjdk.tribble.index.tabix.TabixIndexCreator) Index(htsjdk.tribble.index.Index) MalformedFeatureFile(htsjdk.tribble.TribbleException.MalformedFeatureFile) LineIterator(htsjdk.tribble.readers.LineIterator) InvalidRecordException(exceptions.InvalidRecordException) SQLException(java.sql.SQLException) IOException(java.io.IOException) VCFHeader(htsjdk.variant.vcf.VCFHeader) MalformedFeatureFile(htsjdk.tribble.TribbleException.MalformedFeatureFile) File(java.io.File) InvalidRecordException(exceptions.InvalidRecordException)

Example 3 with TabixIndexCreator

use of htsjdk.tribble.index.tabix.TabixIndexCreator in project jvarkit by lindenb.

the class BedIndexTabix method run.

protected void run(LineIterator in) throws IOException {
    int bedLineCount = 0;
    File tbi = new File(outputFile.getPath() + TabixUtils.STANDARD_INDEX_EXTENSION);
    BlockCompressedOutputStream writer = null;
    SortingCollection<String> sorter = null;
    final Comparator<String> comparator = new Comparator<String>() {

        @Override
        public int compare(String o1, String o2) {
            BedLine bed1 = bedCodec.decode(o1);
            BedLine bed2 = bedCodec.decode(o2);
            int i = bed1.getContig().compareTo(bed2.getContig());
            if (i != 0)
                return i;
            i = bed1.getStart() - bed2.getStart();
            if (i != 0)
                return i;
            i = bed1.getEnd() - bed2.getEnd();
            if (i != 0)
                return i;
            return o1.compareTo(o2);
        }
    };
    CloseableIterator<String> iter = null;
    try {
        TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.BED);
        LOG.info("Opening" + outputFile);
        writer = new BlockCompressedOutputStream(this.outputFile);
        StringBuilder header = new StringBuilder();
        while (in.hasNext()) {
            String h = in.peek();
            if (!BedLine.isBedHeader(h))
                break;
            header.append(in.next()).append('\n');
        }
        // write header
        if (header.length() > 0) {
            LOG.info("Writing header");
            writer.write(header.toString().getBytes());
        }
        if (this.sort) {
            LOG.info("Sorting");
            sorter = SortingCollection.newInstance(String.class, new BedDataCodec(), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            while (in.hasNext()) {
                String line = in.next();
                BedLine bed = bedCodec.decode(line);
                if (bed == null)
                    continue;
                sorter.add(line);
            }
            sorter.doneAdding();
            sorter.setDestructiveIteration(true);
            iter = sorter.iterator();
            long filePosition = writer.getFilePointer();
            while (iter.hasNext()) {
                String line = iter.next();
                BedLine bed = this.bedCodec.decode(line);
                writer.write(line.getBytes());
                writer.write('\n');
                indexCreator.addFeature(bed, filePosition);
                filePosition = writer.getFilePointer();
            }
            sorter.cleanup();
        } else {
            long filePosition = writer.getFilePointer();
            while (in.hasNext()) {
                String line = in.next();
                BedLine bed = this.bedCodec.decode(line);
                if (bed == null)
                    continue;
                writer.write(line.getBytes());
                writer.write('\n');
                indexCreator.addFeature(bed, filePosition);
                filePosition = writer.getFilePointer();
            }
        }
        writer.flush();
        LOG.info("Creating index");
        Index index = indexCreator.finalizeIndex(writer.getFilePointer());
        LOG.info("Writing index to " + tbi + " using " + index.getClass());
        index.writeBasedOnFeatureFile(this.outputFile);
        writer.close();
        writer = null;
        LOG.info("Done  N=" + bedLineCount);
    } catch (Exception e) {
        if (this.outputFile.exists() && this.outputFile.isFile()) {
            LOG.warning("Deleting " + this.outputFile);
            this.outputFile.delete();
            if (tbi.exists() && tbi.isFile())
                tbi.delete();
        }
        throw new IOException(e);
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(sorter);
        CloserUtil.close(writer);
    }
}
Also used : BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) TabixIndexCreator(htsjdk.tribble.index.tabix.TabixIndexCreator) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) IOException(java.io.IOException) Comparator(java.util.Comparator) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) File(java.io.File)

Aggregations

BlockCompressedOutputStream (htsjdk.samtools.util.BlockCompressedOutputStream)3 Index (htsjdk.tribble.index.Index)3 TabixIndexCreator (htsjdk.tribble.index.tabix.TabixIndexCreator)3 File (java.io.File)3 LineIterator (htsjdk.tribble.readers.LineIterator)2 IOException (java.io.IOException)2 BedLine (com.github.lindenb.jvarkit.util.bio.bed.BedLine)1 InvalidRecordException (exceptions.InvalidRecordException)1 MalformedFeatureFile (htsjdk.tribble.TribbleException.MalformedFeatureFile)1 VCFCodec (htsjdk.variant.vcf.VCFCodec)1 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)1 VCFHeader (htsjdk.variant.vcf.VCFHeader)1 SQLException (java.sql.SQLException)1 Comparator (java.util.Comparator)1 GtfLine (utils.GtfLine)1