Search in sources :

Example 11 with Index

use of htsjdk.tribble.index.Index in project gatk by broadinstitute.

the class IndexUtils method loadTabixIndex.

/**
     * Try to load the tabix index from disk, checking for out of date indexes and old versions
     * @return an Index, or null if we're unable to load
     */
public static Index loadTabixIndex(final File featureFile) {
    Utils.nonNull(featureFile);
    try {
        final String path = featureFile.getAbsolutePath();
        final boolean isTabix = new AbstractFeatureReader.ComponentMethods().isTabix(path, null);
        if (!isTabix) {
            return null;
        }
        final String indexPath = ParsingUtils.appendToPath(path, TabixUtils.STANDARD_INDEX_EXTENSION);
        logger.debug("Loading tabix index from disk for file " + featureFile);
        final Index index = IndexFactory.loadIndex(indexPath);
        final File indexFile = new File(indexPath);
        checkIndexVersionAndModificationTime(featureFile, indexFile, index);
        return index;
    } catch (final IOException | RuntimeException e) {
        return null;
    }
}
Also used : AbstractFeatureReader(htsjdk.tribble.AbstractFeatureReader) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) File(java.io.File) IndexFeatureFile(org.broadinstitute.hellbender.tools.IndexFeatureFile)

Example 12 with Index

use of htsjdk.tribble.index.Index in project gatk by broadinstitute.

the class IndexUtils method createSequenceDictionaryFromFeatureIndex.

/**
     * get the sequence dictionary contig list that is in the index or null if there is no index or no contigs
     * Note: the dictionary returned will not have the contig lengths filled in {@link SAMSequenceRecord#UNKNOWN_SEQUENCE_LENGTH} is used.
     * Note: this method is specifically designed for getting sequence dictionaries from indices on Feature files (tribble or tabix indices)
     * @return a SAMSequenceDictionary or null if the index cannot be loaded or there are no contigs in the index
     */
public static SAMSequenceDictionary createSequenceDictionaryFromFeatureIndex(final File featureFile) {
    Utils.nonNull(featureFile);
    logger.warn(String.format("Feature file \"%s\" appears to contain no sequence dictionary. " + "Attempting to retrieve a sequence dictionary from the associated index file", featureFile.getAbsolutePath()));
    final Index index = loadIndex(featureFile);
    return index == null ? null : getSamSequenceDictionaryFromIndex(index);
}
Also used : Index(htsjdk.tribble.index.Index)

Example 13 with Index

use of htsjdk.tribble.index.Index in project gatk by broadinstitute.

the class AnnotateTargetsIntegrationTest method createTargetFile.

@BeforeClass
public void createTargetFile() throws IOException {
    final SAMSequenceDictionary referenceDictionary = resolveReferenceDictionary();
    final List<SimpleInterval> targetIntervals = createRandomIntervals(referenceDictionary, NUMBER_OF_TARGETS, MIN_TARGET_SIZE, MAX_TARGET_SIZE, MEAN_TARGET_SIZE, TARGET_SIZE_STDEV);
    final List<Target> targets = targetIntervals.stream().map(Target::new).collect(Collectors.toList());
    TargetWriter.writeTargetsToFile(TARGET_FILE, targets);
    final Index index = IndexFactory.createIndex(TARGET_FILE, new TargetCodec(), IndexFactory.IndexType.LINEAR);
    final LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(TARGET_FILE_IDX));
    index.write(stream);
    stream.close();
}
Also used : LittleEndianOutputStream(htsjdk.tribble.util.LittleEndianOutputStream) TargetCodec(org.broadinstitute.hellbender.utils.codecs.TargetCodec) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Index(htsjdk.tribble.index.Index) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BeforeClass(org.testng.annotations.BeforeClass)

Example 14 with Index

use of htsjdk.tribble.index.Index in project ASCIIGenome by dariober.

the class UcscFetch method blockCompressAndIndex.

/**
 * Block compress input file and create associated tabix index. Newly created file and index are
 * deleted on exit if deleteOnExit true.
 * @throws IOException
 * @throws InvalidRecordException
 */
private void blockCompressAndIndex(String in, String bgzfOut, boolean deleteOnExit) throws IOException, InvalidRecordException {
    File inFile = new File(in);
    File outFile = new File(bgzfOut);
    LineIterator lin = utils.IOUtils.openURIForLineIterator(inFile.getAbsolutePath());
    BlockCompressedOutputStream writer = new BlockCompressedOutputStream(outFile);
    long filePosition = writer.getFilePointer();
    TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.GFF);
    while (lin.hasNext()) {
        String line = lin.next();
        GtfLine gtf = new GtfLine(line.split("\t"));
        writer.write(line.getBytes());
        writer.write('\n');
        indexCreator.addFeature(gtf, filePosition);
        filePosition = writer.getFilePointer();
    }
    writer.flush();
    File tbi = new File(bgzfOut + TabixUtils.STANDARD_INDEX_EXTENSION);
    if (tbi.exists() && tbi.isFile()) {
        writer.close();
        throw new RuntimeException("Index file exists: " + tbi);
    }
    Index index = indexCreator.finalizeIndex(writer.getFilePointer());
    index.writeBasedOnFeatureFile(outFile);
    writer.close();
    if (deleteOnExit) {
        outFile.deleteOnExit();
        File idx = new File(outFile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
        idx.deleteOnExit();
    }
}
Also used : GtfLine(utils.GtfLine) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) TabixIndexCreator(htsjdk.tribble.index.tabix.TabixIndexCreator) Index(htsjdk.tribble.index.Index) File(java.io.File) LineIterator(htsjdk.tribble.readers.LineIterator)

Example 15 with Index

use of htsjdk.tribble.index.Index in project ASCIIGenome by dariober.

the class MakeTabixIndex method blockCompressAndIndex.

/**
 * Block compress input file and create associated tabix index.
 * @throws IOException
 * @throws InvalidRecordException
 */
private void blockCompressAndIndex(String intab, File bgzfOut, TabixFormat fmt) throws IOException, InvalidRecordException {
    LineIterator lin = utils.IOUtils.openURIForLineIterator(intab);
    BlockCompressedOutputStream writer = new BlockCompressedOutputStream(bgzfOut);
    long filePosition = writer.getFilePointer();
    TabixIndexCreator indexCreator = new TabixIndexCreator(fmt);
    boolean first = true;
    // This is relevant to vcf files only: Prepare header and codec
    // ------------------------------------------------------------
    VCFHeader vcfHeader = null;
    VCFCodec vcfCodec = null;
    if (fmt.equals(TabixFormat.VCF)) {
        try {
            VCFFileReader vcfr = new VCFFileReader(new File(intab), false);
            // new VCFHeader();
            vcfHeader = vcfr.getFileHeader();
            vcfr.close();
        } catch (MalformedFeatureFile e) {
            vcfHeader = new VCFHeader();
        }
        vcfCodec = new VCFCodec();
        vcfCodec.setVCFHeader(vcfHeader, Utils.getVCFHeaderVersion(vcfHeader));
    }
    // ------------------------------------------------------------
    int nWarnings = 10;
    while (lin.hasNext()) {
        String line = lin.next().trim();
        try {
            if (line.isEmpty() || line.startsWith("track ")) {
                continue;
            }
            if (line.startsWith("#")) {
                writer.write((line + "\n").getBytes());
                filePosition = writer.getFilePointer();
                continue;
            }
            if (line.startsWith("##FASTA")) {
                break;
            }
            if (first && !fmt.equals(TabixFormat.VCF)) {
                String dummy = this.makeDummyLine(line, fmt);
                addLineToIndex(dummy, indexCreator, filePosition, fmt, null, null);
                writer.write(dummy.getBytes());
                writer.write('\n');
                filePosition = writer.getFilePointer();
                first = false;
            }
            addLineToIndex(line, indexCreator, filePosition, fmt, vcfHeader, vcfCodec);
            writer.write(line.getBytes());
            writer.write('\n');
            filePosition = writer.getFilePointer();
        } catch (Exception e) {
            if (e.getMessage().contains("added out sequence of order") || e.getMessage().contains("Features added out of order")) {
                // Get a string marker for out-of-order from htsjdk/tribble/index/tabix/TabixIndexCreator.java
                throw new InvalidRecordException();
            }
            if (nWarnings >= 0) {
                System.err.println("Warning: " + e.getMessage() + ". Skipping:\n" + line);
            }
            if (nWarnings == 0) {
                System.err.println("Additional warnings will not be show.");
            }
            nWarnings--;
        }
    }
    writer.flush();
    Index index = indexCreator.finalizeIndex(writer.getFilePointer());
    index.writeBasedOnFeatureFile(bgzfOut);
    writer.close();
    CloserUtil.close(lin);
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) TabixIndexCreator(htsjdk.tribble.index.tabix.TabixIndexCreator) Index(htsjdk.tribble.index.Index) MalformedFeatureFile(htsjdk.tribble.TribbleException.MalformedFeatureFile) LineIterator(htsjdk.tribble.readers.LineIterator) InvalidRecordException(exceptions.InvalidRecordException) SQLException(java.sql.SQLException) IOException(java.io.IOException) VCFHeader(htsjdk.variant.vcf.VCFHeader) MalformedFeatureFile(htsjdk.tribble.TribbleException.MalformedFeatureFile) File(java.io.File) InvalidRecordException(exceptions.InvalidRecordException)

Aggregations

Index (htsjdk.tribble.index.Index)29 File (java.io.File)20 Test (org.testng.annotations.Test)15 TabixIndex (htsjdk.tribble.index.tabix.TabixIndex)12 LinearIndex (htsjdk.tribble.index.linear.LinearIndex)9 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)8 IOException (java.io.IOException)7 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)7 BlockCompressedOutputStream (htsjdk.samtools.util.BlockCompressedOutputStream)3 TabixIndexCreator (htsjdk.tribble.index.tabix.TabixIndexCreator)3 VCFCodec (htsjdk.variant.vcf.VCFCodec)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)2 LineIterator (htsjdk.tribble.readers.LineIterator)2 LittleEndianOutputStream (htsjdk.tribble.util.LittleEndianOutputStream)2 IndexFeatureFile (org.broadinstitute.hellbender.tools.IndexFeatureFile)2 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)2 TargetCodec (org.broadinstitute.hellbender.utils.codecs.TargetCodec)2 BeforeClass (org.testng.annotations.BeforeClass)2 LocationAwareOutputStream (com.github.lindenb.jvarkit.io.LocationAwareOutputStream)1 BedLine (com.github.lindenb.jvarkit.util.bio.bed.BedLine)1