Search in sources :

Example 6 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class IntervalUtils method getContigSizes.

/**
     * Returns a map of contig names with their sizes.
     * @param reference The reference for the intervals.
     * @return A map of contig names with their sizes.
     */
public static Map<String, Integer> getContigSizes(final File reference) {
    final ReferenceSequenceFile referenceSequenceFile = createReference(reference);
    final List<GenomeLoc> locs = GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSequenceFile.getSequenceDictionary()).toList();
    final Map<String, Integer> lengths = new LinkedHashMap<>();
    for (final GenomeLoc loc : locs) {
        lengths.put(loc.getContig(), loc.size());
    }
    return lengths;
}
Also used : ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile)

Example 7 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class CollectTargetedMetrics method doWork.

/**
     * Asserts that files are readable and writable and then fires off an
     * HsMetricsCalculator instance to do the real work.
     */
@Override
protected Object doWork() {
    for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (PER_TARGET_COVERAGE != null)
        IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
    // Validate that the targets and baits have the same references as the reads file
    SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
    SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
    ReferenceSequenceFile ref = null;
    if (REFERENCE_SEQUENCE != null) {
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
        ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
        SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
    }
    final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord record : reader) {
        collector.acceptRecord(record, null);
        progress.record(record);
    }
    // Write the output file
    final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
    collector.finish();
    collector.addAllLevelsToFile(metrics);
    metrics.write(OUTPUT);
    CloserUtil.close(reader);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) IntervalList(htsjdk.samtools.util.IntervalList) SAMRecord(htsjdk.samtools.SAMRecord) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile)

Example 8 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class ValidateSamFile method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    ReferenceSequenceFile reference = null;
    if (REFERENCE_SEQUENCE != null) {
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
        reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    }
    final PrintWriter out;
    if (OUTPUT != null) {
        IOUtil.assertFileIsWritable(OUTPUT);
        try {
            out = new PrintWriter(OUTPUT);
        } catch (FileNotFoundException e) {
            // we already asserted this so we should not get here
            throw new GATKException("Unexpected exception", e);
        }
    } else {
        out = new PrintWriter(System.out);
    }
    final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).enable(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS);
    final SamReader samReader = factory.open(INPUT);
    if (samReader.type() != SamReader.Type.BAM_TYPE)
        VALIDATE_INDEX = false;
    factory.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, VALIDATE_INDEX);
    factory.reapplyOptions(samReader);
    final SamFileValidator validator = new SamFileValidator(out, MAX_OPEN_TEMP_FILES);
    validator.setErrorsToIgnore(IGNORE);
    if (IGNORE_WARNINGS) {
        validator.setIgnoreWarnings(IGNORE_WARNINGS);
    }
    if (MODE == Mode.SUMMARY) {
        validator.setVerbose(false, 0);
    } else {
        validator.setVerbose(true, MAX_OUTPUT);
    }
    if (IS_BISULFITE_SEQUENCED) {
        validator.setBisulfiteSequenced(IS_BISULFITE_SEQUENCED);
    }
    if (VALIDATE_INDEX) {
        validator.setIndexValidationStringency(BamIndexValidator.IndexValidationStringency.EXHAUSTIVE);
    }
    if (IOUtil.isRegularPath(INPUT)) {
        // Do not check termination if reading from a stream
        validator.validateBamFileTermination(INPUT);
    }
    boolean isValid = false;
    switch(MODE) {
        case SUMMARY:
            isValid = validator.validateSamFileSummary(samReader, reference);
            break;
        case VERBOSE:
            isValid = validator.validateSamFileVerbose(samReader, reference);
            break;
    }
    out.flush();
    return isValid;
}
Also used : FileNotFoundException(java.io.FileNotFoundException) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) PrintWriter(java.io.PrintWriter)

Example 9 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class CreateSequenceDictionary method makeSequenceDictionary.

/**
     * Read all the sequences from the given reference file, and convert into SAMSequenceRecords
     * @param referenceFile fasta or fasta.gz
     * @return SAMSequenceRecords containing info from the fasta, plus from cmd-line arguments.
     */
SAMSequenceDictionary makeSequenceDictionary(final File referenceFile) {
    final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile, true);
    ReferenceSequence refSeq;
    final List<SAMSequenceRecord> ret = new ArrayList<>();
    final Set<String> sequenceNames = new HashSet<>();
    for (int numSequences = 0; numSequences < NUM_SEQUENCES && (refSeq = refSeqFile.nextSequence()) != null; ++numSequences) {
        if (sequenceNames.contains(refSeq.getName())) {
            throw new UserException.MalformedFile(referenceFile, "Sequence name appears more than once in reference: " + refSeq.getName());
        }
        sequenceNames.add(refSeq.getName());
        ret.add(makeSequenceRecord(refSeq));
    }
    return new SAMSequenceDictionary(ret);
}
Also used : ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) HashSet(java.util.HashSet)

Example 10 with ReferenceSequenceFile

use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.

the class NormalizeFasta method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    Utils.validateArg(!INPUT.getAbsoluteFile().equals(OUTPUT.getAbsoluteFile()), "Input and output cannot be the same file.");
    try (final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT, TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE);
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT)) {
        ReferenceSequence seq = null;
        while ((seq = ref.nextSequence()) != null) {
            final String name = seq.getName();
            final byte[] bases = seq.getBases();
            try {
                out.write(">");
                out.write(name);
                out.newLine();
                if (bases.length == 0) {
                    logger.warn("Sequence " + name + " contains 0 bases.");
                } else {
                    for (int i = 0; i < bases.length; ++i) {
                        if (i > 0 && i % LINE_LENGTH == 0)
                            out.write("\n");
                        out.write(bases[i]);
                    }
                    out.write("\n");
                }
            } catch (IOException ioe) {
                throw new RuntimeIOException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
            }
        }
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    return null;
}
Also used : RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) IOException(java.io.IOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) BufferedWriter(java.io.BufferedWriter)

Aggregations

ReferenceSequenceFile (htsjdk.samtools.reference.ReferenceSequenceFile)15 File (java.io.File)7 ReferenceSequence (htsjdk.samtools.reference.ReferenceSequence)5 FileNotFoundException (java.io.FileNotFoundException)4 UserException (org.broadinstitute.hellbender.exceptions.UserException)4 CachingIndexedFastaSequenceFile (org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile)4 SAMFileHeader (htsjdk.samtools.SAMFileHeader)3 SAMFileWriter (htsjdk.samtools.SAMFileWriter)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)3 SamReader (htsjdk.samtools.SamReader)2 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)2 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)2 ReferenceBases (org.broadinstitute.hellbender.utils.reference.ReferenceBases)2 BeforeClass (org.testng.annotations.BeforeClass)2 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 MetricsFile (htsjdk.samtools.metrics.MetricsFile)1 IntervalList (htsjdk.samtools.util.IntervalList)1