use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class SimpleIntervalUnitTest method expandWithinContigTestData.
@DataProvider(name = "ExpandWithinContigData")
public Object[][] expandWithinContigTestData() {
final int CONTIG_LENGTH = 10000;
final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("1", CONTIG_LENGTH)));
return new Object[][] { { new SimpleInterval("1", 5, 10), 0, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 5, 10) }, { new SimpleInterval("1", 5, 10), 1, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 4, 11) }, { new SimpleInterval("1", 1, 10), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 20) }, { new SimpleInterval("1", 10, 20), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 30) }, { new SimpleInterval("1", 10, 20), 9, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 29) }, { new SimpleInterval("1", 30, 40), 5, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 25, 45) }, { new SimpleInterval("1", CONTIG_LENGTH - 10, CONTIG_LENGTH), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH) }, { new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH - 10), 11, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 31, CONTIG_LENGTH) }, { new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH - 10), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 30, CONTIG_LENGTH) } };
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk-protected by broadinstitute.
the class PlotSegmentedCopyRatio method doWork.
@Override
protected Object doWork() {
checkRegularReadableUserFiles();
//get sample name from input files (consistency check is performed)
final String sampleName = getSampleName();
//load contig names and lengths from the sequence dictionary into a LinkedHashMap
final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
}, LinkedHashMap::new));
Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
logger.info("Contigs above length threshold: " + contigLengthMap.toString());
//check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
validateContigs(contigLengthMap);
//generate the plots
final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
writeSegmentedCopyRatioPlots(sampleName, contigNames, contigLengths);
return "SUCCESS";
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class TandemRepeatUnitTest method testUsingVC.
@Test
public void testUsingVC() {
// - [ref] / ATC from 20-20
final String insLoc = "chr1";
final int insLocStart = 2;
final int insLocStop = 2;
final byte[] refBytes = "GTATCATCATCGGA".getBytes();
final Allele nullR = Allele.create("A", true);
final Allele atc = Allele.create("AATC", false);
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
final VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR, atc)).make();
// we test that the interval from which the ReferenceContext is constructed does not need to exactly overlap
// the VariantContext. The annotation should be able to handle this.
final SimpleInterval interval = new SimpleInterval(insLoc, insLocStart + 3, insLocStop + 4);
final SimpleInterval interval1 = new SimpleInterval(insLoc, 1, refBytes.length);
final ReferenceBases ref1 = new ReferenceBases(refBytes, interval1);
final SAMSequenceDictionary dict = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord(insLoc, refBytes.length)));
final ReferenceContext ref = new ReferenceContext(ReferenceDataSource.of(ref1, dict), interval, 20, 20);
final InfoFieldAnnotation ann = new TandemRepeat();
final Map<String, Object> a = ann.annotate(ref, vc, null);
Assert.assertEquals(a.size(), 3);
Assert.assertEquals(a.get(GATKVCFConstants.STR_PRESENT_KEY), true);
Assert.assertEquals(a.get(GATKVCFConstants.REPEAT_UNIT_KEY), "ATC");
//3 repeats in the reference, 4 in the variant
Assert.assertEquals(a.get(GATKVCFConstants.REPEATS_PER_ALLELE_KEY), Arrays.asList(3, 4));
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class TandemRepeatUnitTest method testUsingVCNoRepeat.
@Test
public void testUsingVCNoRepeat() {
// - [ref] / ATC from 20-20
final String insLoc = "chr1";
final int insLocStart = 6;
final int insLocStop = 6;
final byte[] refBytes = "GTATCATCATCGGA".getBytes();
final Allele nullR = Allele.create("A", true);
final Allele atc = Allele.create("AATC", false);
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
final VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR, atc)).make();
final SimpleInterval interval = new SimpleInterval(insLoc, insLocStart, insLocStop);
final SimpleInterval interval1 = new SimpleInterval(insLoc, 1, refBytes.length);
final ReferenceBases ref1 = new ReferenceBases(refBytes, interval1);
final SAMSequenceDictionary dict = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord(insLoc, refBytes.length)));
final ReferenceContext ref = new ReferenceContext(ReferenceDataSource.of(ref1, dict), interval, 0, 20);
final InfoFieldAnnotation ann = new TandemRepeat();
final Map<String, Object> a = ann.annotate(ref, vc, null);
Assert.assertTrue(a.isEmpty());
}
use of htsjdk.samtools.SAMSequenceDictionary in project gatk by broadinstitute.
the class TandemRepeatUnitTest method testUsingVCNotIndel.
@Test
public void testUsingVCNotIndel() {
// - [ref] / ATC from 20-20
String insLoc = "chr1";
int insLocStart = 2;
int insLocStop = 2;
byte[] refBytes = "GTATCATCATCGGA".getBytes();
Allele nullR = Allele.create("A", true);
Allele atc = Allele.create("C", false);
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR, atc)).make();
final SimpleInterval interval = new SimpleInterval("chr1", insLocStart, insLocStop);
final String contigName = "chr1";
final SimpleInterval interval1 = new SimpleInterval(contigName, 1, refBytes.length);
final ReferenceBases ref1 = new ReferenceBases(refBytes, interval1);
final SAMSequenceDictionary dict = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord(contigName, refBytes.length)));
final ReferenceContext ref = new ReferenceContext(ReferenceDataSource.of(ref1, dict), interval, 0, 20);
final InfoFieldAnnotation ann = new TandemRepeat();
final Map<String, Object> a = ann.annotate(ref, vc, null);
Assert.assertTrue(a.isEmpty());
}
Aggregations