use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class PlotSegmentedCopyRatio method doWork.
@Override
protected Object doWork() {
checkRegularReadableUserFiles();
//get sample name from input files (consistency check is performed)
final String sampleName = getSampleName();
//load contig names and lengths from the sequence dictionary into a LinkedHashMap
final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
}, LinkedHashMap::new));
Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
logger.info("Contigs above length threshold: " + contigLengthMap.toString());
//check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
validateContigs(contigLengthMap);
//generate the plots
final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
writeSegmentedCopyRatioPlots(sampleName, contigNames, contigLengths);
return "SUCCESS";
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class MultiVariantDataSource method validateAllSequenceDictionaries.
/**
* GATKTool only validates individual feature dictionaries against the reference dictionary, so cross-validate
* all of the dictionaries against each other here by ensuring that each contig found in any dictionary has the
* same length (and md5, when a value is present for that contig in both dictionaries) in every other dictionary
* in which its present.
*/
private void validateAllSequenceDictionaries() {
final Map<String, FeatureDataSource<VariantContext>> contigMap = new HashMap<>();
featureDataSources.forEach(ds -> {
final SAMSequenceDictionary dictionary = ds.getSequenceDictionary();
if (dictionary == null) {
logger.warn("A sequence dictionary is required for each input when using multiple inputs, and one could" + " not be obtained for feature input: " + ds.getName() + ". The input may not exist or may not have a valid header");
} else {
dictionary.getSequences().forEach(sourceSequence -> {
final String sourceSequenceName = sourceSequence.getSequenceName();
final FeatureDataSource<VariantContext> previousDataSource = contigMap.getOrDefault(sourceSequenceName, null);
if (previousDataSource != null) {
final SAMSequenceDictionary previousDictionary = previousDataSource.getSequenceDictionary();
final SAMSequenceRecord previousSequence = previousDictionary.getSequence(sourceSequenceName);
validateSequenceDictionaryRecords(ds.getName(), dictionary, sourceSequence, previousDataSource.getName(), previousDictionary, previousSequence);
} else {
contigMap.put(sourceSequenceName, ds);
}
});
}
});
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class ReorderSam method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SAMSequenceDictionary refDict = reference.getSequenceDictionary();
if (refDict == null) {
CloserUtil.close(in);
throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
}
printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
printDictionary("Reference", refDict);
Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
// has to be after we create the newOrder
SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
outHeader.setSequenceDictionary(refDict);
logger.info("Writing reads...");
if (in.hasIndex()) {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
// write the reads in contig order
for (final SAMSequenceRecord contig : refDict.getSequences()) {
final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
writeReads(out, it, newOrder, contig.getSequenceName());
}
// don't forget the unmapped reads
writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
}
} else {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
writeReads(out, in.iterator(), newOrder, "All reads");
}
}
// cleanup
CloserUtil.close(in);
return null;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class UpdateVCFSequenceDictionary method apply.
@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext ref, final FeatureContext featureContext) {
// Validate each variant against the source dictionary manually
SAMSequenceRecord samSeqRec = sourceDictionary.getSequence(vc.getContig());
if (samSeqRec == null) {
throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that is not present in the provided dictionary", vc.getID(), vc.getContig()));
} else if (vc.getEnd() > samSeqRec.getSequenceLength()) {
throw new CommandLineException.BadArgumentValue(String.format("The input variant file contains a variant (ID: \"%s\") with a reference to a sequence (\"%s\") " + "that ends at a position (%d) that exceeds the length of that sequence (%d) in the provided dictionary", vc.getID(), vc.getContig(), vc.getEnd(), samSeqRec.getSequenceLength()));
}
vcfWriter.add(vc);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class IndexUtils method getSamSequenceDictionaryFromIndex.
private static SAMSequenceDictionary getSamSequenceDictionaryFromIndex(final Index index) {
final List<String> seqNames = index.getSequenceNames();
if (seqNames == null || seqNames.isEmpty()) {
return null;
}
final SAMSequenceDictionary dict = new SAMSequenceDictionary();
//use UNKNOWN_SEQUENCE_LENGTH to indicate contigs that will not be compared by length (see SequenceDictionaryUtils.sequenceRecordsAreEquivalent)
seqNames.forEach(seqName -> dict.addSequence(new SAMSequenceRecord(seqName, SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH)));
return dict;
}
Aggregations