use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class AnnotateTargetsIntegrationTest method createRandomIntervals.
private List<SimpleInterval> createRandomIntervals(final SAMSequenceDictionary referenceDictionary, final int numberOfIntervals, final int minIntervalSize, final int maxIntervalSize, final int meanIntervalSize, final double intervalSizeStdev) {
final List<SimpleInterval> result = new ArrayList<>(numberOfIntervals);
final int numberOfSequences = referenceDictionary.getSequences().size();
for (int i = 0; i < numberOfIntervals; i++) {
final SAMSequenceRecord contig = referenceDictionary.getSequence(RANDOM.nextInt(numberOfSequences));
final String contigName = contig.getSequenceName();
final int intervalSize = Math.min(maxIntervalSize, (int) Math.max(minIntervalSize, Math.round(RANDOM.nextDouble() * intervalSizeStdev + meanIntervalSize)));
final int intervalStart = 1 + RANDOM.nextInt(contig.getSequenceLength() - intervalSize);
final int intervalEnd = intervalStart + intervalSize - 1;
final SimpleInterval interval = new SimpleInterval(contigName, intervalStart, intervalEnd);
result.add(interval);
}
final Comparator<SimpleInterval> comparator = Comparator.comparing(SimpleInterval::getContig, (a, b) -> Integer.compare(referenceDictionary.getSequenceIndex(a), referenceDictionary.getSequenceIndex(b))).thenComparingInt(SimpleInterval::getStart).thenComparingInt(SimpleInterval::getEnd);
Collections.sort(result, comparator);
return result;
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class BAQUnitTest method createData1.
@DataProvider(name = "data")
public Object[][] createData1() {
List<BAQTest> params = new ArrayList<>();
SAMSequenceDictionary dict = new SAMSequenceDictionary();
dict.addSequence(new SAMSequenceRecord("1", Integer.MAX_VALUE));
params.add(new BAQTest("GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTTCTCCTGCTCCTGCGTGATCCGCTGCAG", "GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTCCTCCTGCTCCTGCGTGATCCGCTGCAG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGIIHGGGIHHIIHIIHIIHGICCIGEII@IGIHCG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGII410..0HIIHIIHIIHGICCIGEII@IGIHCE"));
params.add(new BAQTest("GCTTTTTCTCCTCCTG", "GCTTTTCCTCCTCCTG", "IIHGGGIHHIIHHIIH", "EI410..0HIIHHIIE"));
final String refString1 = "AAATTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATAG";
final ReferenceDataSource rds1 = new ReferenceMemorySource(new ReferenceBases(refString1.getBytes(), new SimpleInterval("1", 9999807, 10000032)), dict);
// big and complex, also does a cap from 3 to 4!
params.add(new BAQTest(-3, 9999810L, "49M1I126M1I20M1I25M", refString1, "TTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGTAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCCACCCACCACCACGCCTGGCCTAATTTTTTTGTATTTTTAGTAGAGA", ">IHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?B3BBC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@C<>5;<A5=A;>=64>???B>=6497<<;;<;>2?>BA@??A6<<A59", ">EHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?838BC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@%<>5;<A5=A;>=64>???B;86497<<;;<;>2?>BA@??A6<<A59", rds1));
final String refString2 = "CCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCC";
final ReferenceDataSource rds2 = new ReferenceMemorySource(new ReferenceBases(refString2.getBytes(), new SimpleInterval("1", 9999963, 10000004)), dict);
// now changes
params.add(new BAQTest(-3, 9999966L, "36M", refString2, "AGTAGCTGGGACTACAGGCACCCACCACCACGCCTG", "A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9", "A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9", rds2));
final String refString3 = "CCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATA";
final ReferenceDataSource rds3 = new ReferenceMemorySource(new ReferenceBases(refString3.getBytes(), new SimpleInterval("1", 9999990, 10000031)), dict);
// raw base qualities are low -- but they shouldn't be capped
params.add(new BAQTest(-3, 9999993L, "4=13X2=3X1=4X2=4X1=2X", refString3, "CCACGCTTGGCAAAGTTTTCCGTACGTTTAGCCGAG", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8", rds3));
List<Object[]> params2 = new ArrayList<>();
for (BAQTest x : params) params2.add(new Object[] { x });
return params2.toArray(new Object[][] {});
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SequenceDictionaryUtilsUnitTest method testSequenceRecordsAreEquivalentDataProvider.
@DataProvider(name = "testSequenceRecordsAreEquivalentDataProvider")
public Object[][] testSequenceRecordsAreEquivalentDataProvider() {
final SAMSequenceRecord CHRM_HG19 = new SAMSequenceRecord("chrM", 16571);
final SAMSequenceRecord CHR_NONSTANDARD1 = new SAMSequenceRecord("NonStandard1", 8675309);
final SAMSequenceRecord CHR1_HG19_WITH_UNKNOWN_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH);
final SAMSequenceRecord CHR1_HG19_WITH_DIFFERENT_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), 123456);
return new Object[][] { { CHR1_HG19, CHR1_HG19, true }, { CHR1_HG19, CHRM_HG19, false }, { CHR1_HG19, CHR_NONSTANDARD1, false }, { null, null, true }, { CHR1_HG19, null, false }, { null, CHR1_HG19, false }, { CHR1_HG19, CHR1_HG19_WITH_UNKNOWN_LENGTH, true }, { CHR1_HG19, CHR1_HG19_WITH_DIFFERENT_LENGTH, false }, { CHR1_HG19_WITH_UNKNOWN_LENGTH, CHR1_HG19, true }, { CHR1_HG19_WITH_DIFFERENT_LENGTH, CHR1_HG19, false } };
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SimpleIntervalUnitTest method expandWithinContigTestData.
@DataProvider(name = "ExpandWithinContigData")
public Object[][] expandWithinContigTestData() {
final int CONTIG_LENGTH = 10000;
final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("1", CONTIG_LENGTH)));
return new Object[][] { { new SimpleInterval("1", 5, 10), 0, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 5, 10) }, { new SimpleInterval("1", 5, 10), 1, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 4, 11) }, { new SimpleInterval("1", 1, 10), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 20) }, { new SimpleInterval("1", 10, 20), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 30) }, { new SimpleInterval("1", 10, 20), 9, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 1, 29) }, { new SimpleInterval("1", 30, 40), 5, CONTIG_LENGTH, dictionary, new SimpleInterval("1", 25, 45) }, { new SimpleInterval("1", CONTIG_LENGTH - 10, CONTIG_LENGTH), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH) }, { new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH - 10), 11, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 31, CONTIG_LENGTH) }, { new SimpleInterval("1", CONTIG_LENGTH - 20, CONTIG_LENGTH - 10), 10, CONTIG_LENGTH, dictionary, new SimpleInterval("1", CONTIG_LENGTH - 30, CONTIG_LENGTH) } };
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk-protected by broadinstitute.
the class PlotSegmentedCopyRatio method doWork.
@Override
protected Object doWork() {
checkRegularReadableUserFiles();
//get sample name from input files (consistency check is performed)
final String sampleName = getSampleName();
//load contig names and lengths from the sequence dictionary into a LinkedHashMap
final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
}, LinkedHashMap::new));
Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
logger.info("Contigs above length threshold: " + contigLengthMap.toString());
//check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
validateContigs(contigLengthMap);
//generate the plots
final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
writeSegmentedCopyRatioPlots(sampleName, contigNames, contigLengths);
return "SUCCESS";
}
Aggregations