use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class IntervalUtils method getContigSizes.
/**
* Returns a map of contig names with their sizes.
* @param reference The reference for the intervals.
* @return A map of contig names with their sizes.
*/
public static Map<String, Integer> getContigSizes(final File reference) {
final ReferenceSequenceFile referenceSequenceFile = createReference(reference);
final List<GenomeLoc> locs = GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSequenceFile.getSequenceDictionary()).toList();
final Map<String, Integer> lengths = new LinkedHashMap<>();
for (final GenomeLoc loc : locs) {
lengths.put(loc.getContig(), loc.size());
}
return lengths;
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class CollectTargetedMetrics method doWork.
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
*/
@Override
protected Object doWork() {
for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (PER_TARGET_COVERAGE != null)
IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
}
final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord record : reader) {
collector.acceptRecord(record, null);
progress.record(record);
}
// Write the output file
final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
collector.finish();
collector.addAllLevelsToFile(metrics);
metrics.write(OUTPUT);
CloserUtil.close(reader);
return null;
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class ValidateSamFile method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
ReferenceSequenceFile reference = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
}
final PrintWriter out;
if (OUTPUT != null) {
IOUtil.assertFileIsWritable(OUTPUT);
try {
out = new PrintWriter(OUTPUT);
} catch (FileNotFoundException e) {
// we already asserted this so we should not get here
throw new GATKException("Unexpected exception", e);
}
} else {
out = new PrintWriter(System.out);
}
final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).enable(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS);
final SamReader samReader = factory.open(INPUT);
if (samReader.type() != SamReader.Type.BAM_TYPE)
VALIDATE_INDEX = false;
factory.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, VALIDATE_INDEX);
factory.reapplyOptions(samReader);
final SamFileValidator validator = new SamFileValidator(out, MAX_OPEN_TEMP_FILES);
validator.setErrorsToIgnore(IGNORE);
if (IGNORE_WARNINGS) {
validator.setIgnoreWarnings(IGNORE_WARNINGS);
}
if (MODE == Mode.SUMMARY) {
validator.setVerbose(false, 0);
} else {
validator.setVerbose(true, MAX_OUTPUT);
}
if (IS_BISULFITE_SEQUENCED) {
validator.setBisulfiteSequenced(IS_BISULFITE_SEQUENCED);
}
if (VALIDATE_INDEX) {
validator.setIndexValidationStringency(BamIndexValidator.IndexValidationStringency.EXHAUSTIVE);
}
if (IOUtil.isRegularPath(INPUT)) {
// Do not check termination if reading from a stream
validator.validateBamFileTermination(INPUT);
}
boolean isValid = false;
switch(MODE) {
case SUMMARY:
isValid = validator.validateSamFileSummary(samReader, reference);
break;
case VERBOSE:
isValid = validator.validateSamFileVerbose(samReader, reference);
break;
}
out.flush();
return isValid;
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class CreateSequenceDictionary method makeSequenceDictionary.
/**
* Read all the sequences from the given reference file, and convert into SAMSequenceRecords
* @param referenceFile fasta or fasta.gz
* @return SAMSequenceRecords containing info from the fasta, plus from cmd-line arguments.
*/
SAMSequenceDictionary makeSequenceDictionary(final File referenceFile) {
final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile, true);
ReferenceSequence refSeq;
final List<SAMSequenceRecord> ret = new ArrayList<>();
final Set<String> sequenceNames = new HashSet<>();
for (int numSequences = 0; numSequences < NUM_SEQUENCES && (refSeq = refSeqFile.nextSequence()) != null; ++numSequences) {
if (sequenceNames.contains(refSeq.getName())) {
throw new UserException.MalformedFile(referenceFile, "Sequence name appears more than once in reference: " + refSeq.getName());
}
sequenceNames.add(refSeq.getName());
ret.add(makeSequenceRecord(refSeq));
}
return new SAMSequenceDictionary(ret);
}
use of htsjdk.samtools.reference.ReferenceSequenceFile in project gatk by broadinstitute.
the class NormalizeFasta method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
Utils.validateArg(!INPUT.getAbsoluteFile().equals(OUTPUT.getAbsoluteFile()), "Input and output cannot be the same file.");
try (final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT, TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE);
final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT)) {
ReferenceSequence seq = null;
while ((seq = ref.nextSequence()) != null) {
final String name = seq.getName();
final byte[] bases = seq.getBases();
try {
out.write(">");
out.write(name);
out.newLine();
if (bases.length == 0) {
logger.warn("Sequence " + name + " contains 0 bases.");
} else {
for (int i = 0; i < bases.length; ++i) {
if (i > 0 && i % LINE_LENGTH == 0)
out.write("\n");
out.write(bases[i]);
}
out.write("\n");
}
} catch (IOException ioe) {
throw new RuntimeIOException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
}
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
return null;
}
Aggregations