use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SequenceDictionaryUtilsUnitTest method generateSequenceDictionaryTestData.
@DataProvider(name = "SequenceDictionaryDataProvider")
public Object[][] generateSequenceDictionaryTestData() {
final SAMSequenceRecord CHRM_HG19 = new SAMSequenceRecord("chrM", 16571);
final SAMSequenceRecord CHR_NONSTANDARD1 = new SAMSequenceRecord("NonStandard1", 8675309);
final SAMSequenceRecord CHR_NONSTANDARD2 = new SAMSequenceRecord("NonStandard2", 8675308);
final SAMSequenceRecord CHR1_HG19_WITH_UNKNOWN_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH);
final SAMSequenceRecord CHR1_HG19_WITH_DIFFERENT_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), 123456);
final SAMSequenceRecord CHR1_HG19_WITH_ATTRIBUTES = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), CHR1_HG19.getSequenceLength());
CHR1_HG19_WITH_ATTRIBUTES.setAttribute("M5", "0dec9660ec1efaaf33281c0d5ea2560f");
CHR1_HG19_WITH_ATTRIBUTES.setAttribute("UR", "file:/foo/bar");
final Class<UserException.IncompatibleSequenceDictionaries> NO_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class<UserException.IncompatibleSequenceDictionaries> UNEQUAL_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class<UserException.LexicographicallySortedSequenceDictionary> NON_CANONICAL_HUMAN_ORDER_EXCEPTION = UserException.LexicographicallySortedSequenceDictionary.class;
final Class<UserException.IncompatibleSequenceDictionaries> OUT_OF_ORDER_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final Class<UserException.IncompatibleSequenceDictionaries> DIFFERENT_INDICES_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
final List<SimpleInterval> hg19AllContigsIntervalSet = Arrays.asList(new SimpleInterval("chrM", 1, 1), new SimpleInterval("chr1", 1, 1), new SimpleInterval("chr2", 1, 1), new SimpleInterval("chr10", 1, 1));
final List<SimpleInterval> hg19PartialContigsIntervalSet = Arrays.asList(new SimpleInterval("chrM", 1, 1), new SimpleInterval("chr1", 1, 1));
return new Object[][] { // Identical dictionaries:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), IDENTICAL, null, false, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), IDENTICAL, null, false, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), IDENTICAL, null, true, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), IDENTICAL, null, false, true }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19), IDENTICAL, null, true, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), IDENTICAL, null, false, false }, { Arrays.asList(CHR1_B37), Arrays.asList(CHR1_B37), IDENTICAL, null, false, false }, { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), IDENTICAL, null, false, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH), IDENTICAL, null, false, false }, { Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH), Arrays.asList(CHR1_HG19), IDENTICAL, null, false, false }, // Dictionaries with a common subset:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, true }, // If requireSuperset == true, we should get an exception upon COMMON_SUBSET:
{ Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR10_HG19), COMMON_SUBSET, UserException.IncompatibleSequenceDictionaries.class, true, false }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR_NONSTANDARD2), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHRM_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD2), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD1), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD2), COMMON_SUBSET, null, false, false }, // If requireSuperset == true, we should get an exception upon COMMON_SUBSET:
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), COMMON_SUBSET, UserException.IncompatibleSequenceDictionaries.class, true, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), COMMON_SUBSET, null, false, false }, // If checkContigOrdering == false, ordering of the common contigs should not matter:
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR10_HG19, CHR1_HG19), COMMON_SUBSET, null, false, false }, // Dictionaries with no common contigs:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, true, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR2_HG19), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, true, true }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_B37), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false }, // Dictionaries with unequal common contigs:
{ Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH), Arrays.asList(CHR1_HG19), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, true, false }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19), Arrays.asList(CHR1_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, true, true }, { Arrays.asList(CHR1_B36), Arrays.asList(CHR1_B37), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), Arrays.asList(CHR1_B36, CHR2_B36, CHR10_B36), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18, CHR_NONSTANDARD2), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG18, CHR2_HG18, CHR10_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false }, // One or both dictionaries in non-canonical human order:
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, true, true }, { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, true, true }, { Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true }, // If checkContigOrdering == false, we should not get NON_CANONICAL_HUMAN_ORDER:
{ Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), IDENTICAL, null, false, false }, { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, false }, // Dictionaries with a common subset, but different relative ordering within that subset
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, true, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, true, true }, { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHRM_HG19), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR2_HG19, CHR1_HG19), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHRM_HG19, CHR1_HG19), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true }, { Arrays.asList(CHR1_B37, CHR2_B37), Arrays.asList(CHR2_B37, CHR1_B37), OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true }, // If checkContigOrdering == false, we should not get OUT_OF_ORDER:
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, false }, // These have checkContigOrdering == true, so we expect DIFFERENT_INDICES and an exception:
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, // Setting requireSuperset == true should make no difference here (we should still get DIFFERENT_INDICES and an exception):
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true }, // Same test cases as above, but these have checkContigOrdering == false, so we expect SUPERSET or COMMON_SUBSET instead of DIFFERENT_INDICES, and no exception:
{ Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHRM_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19), SUPERSET, null, true, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19), COMMON_SUBSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2), SUPERSET, null, false, false }, // tests for SUPERSET
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19), SUPERSET, null, false, true }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19), SUPERSET, null, true, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19), SUPERSET, null, true, true }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19), SUPERSET, null, false, false }, // Extended attributes should be ignored when determining whether a superset exists:
{ Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES), SUPERSET, null, false, false }, { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES, CHR10_HG19), SUPERSET, null, false, false } };
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class SimpleIntervalUnitTest method expandWithinContigInvalidTestData.
@DataProvider(name = "ExpandWithinContigInvalidData")
public Object[][] expandWithinContigInvalidTestData() {
final int CONTIG_LENGTH = 10000;
final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("1", CONTIG_LENGTH)));
final SAMSequenceDictionary badDictionary = new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("2", CONTIG_LENGTH)));
return new Object[][] { { new SimpleInterval("1", 1, 10), -1, CONTIG_LENGTH, dictionary }, { new SimpleInterval("1", 1, 10), 1, 0, dictionary }, { new SimpleInterval("1", 1, 10), 1, -1, dictionary }, { new SimpleInterval("1", 1, 10), 1, CONTIG_LENGTH, null }, { new SimpleInterval("1", 1, 10), 1, CONTIG_LENGTH, badDictionary } };
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class IntervalUtilsUnitTest method testParseIntervalWithPeriodInContigName.
@Test
public void testParseIntervalWithPeriodInContigName() {
// Make sure that we don't interpret contigs with periods in their name as files
final String contigName = "GL000249.1";
final SAMSequenceRecord contigRecord = new SAMSequenceRecord(contigName, 100);
final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(contigRecord));
final GenomeLocParser parser = new GenomeLocParser(dictionary);
final List<GenomeLoc> result = IntervalUtils.parseIntervalArguments(parser, contigName);
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0).getContig(), contigName);
Assert.assertEquals(result.get(0).getStart(), 1);
Assert.assertEquals(result.get(0).getEnd(), 100);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk by broadinstitute.
the class IntervalUtilsUnitTest method testConvertSimpleIntervalToQueryInterval.
@Test
public void testConvertSimpleIntervalToQueryInterval() {
final SAMSequenceRecord contigRecord = new SAMSequenceRecord("1", 100);
final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(contigRecord));
final SimpleInterval originalInterval = new SimpleInterval("1", 5, 10);
final QueryInterval convertedInterval = IntervalUtils.convertSimpleIntervalToQueryInterval(originalInterval, dictionary);
Assert.assertEquals(convertedInterval.referenceIndex, 0);
Assert.assertEquals(convertedInterval.start, 5);
Assert.assertEquals(convertedInterval.end, 10);
}
use of htsjdk.samtools.SAMSequenceRecord in project gatk-protected by broadinstitute.
the class PlotACNVResults method doWork.
@Override
protected Object doWork() {
checkRegularReadableUserFiles();
//get sample name from input files (consistency check is performed)
final String sampleName = getSampleName();
//load contig names and lengths from the sequence dictionary into a LinkedHashMap
final SAMSequenceDictionary sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
Utils.validateArg(sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).noneMatch(n -> n.contains(CONTIG_DELIMITER)), String.format("Contig names cannot contain \"%s\".", CONTIG_DELIMITER));
final Map<String, Integer> contigLengthMap = sequenceDictionary.getSequences().stream().filter(s -> s.getSequenceLength() >= minContigLength).collect(Collectors.toMap(SAMSequenceRecord::getSequenceName, SAMSequenceRecord::getSequenceLength, (c, l) -> {
throw new IllegalArgumentException(String.format("Duplicate contig in sequence dictionary: %s", c));
}, LinkedHashMap::new));
Utils.validateArg(contigLengthMap.size() > 0, "There must be at least one contig above the threshold length in the sequence dictionary.");
logger.info("Contigs above length threshold: " + contigLengthMap.toString());
//check that contigs in input files are present in sequence dictionary and that data points are valid given lengths
validateContigs(contigLengthMap);
//generate the plots
final List<String> contigNames = new ArrayList<>(contigLengthMap.keySet());
final List<Integer> contigLengths = new ArrayList<>(contigLengthMap.values());
writeSegmentedAlleleFractionPlot(sampleName, contigNames, contigLengths);
return "SUCCESS";
}
Aggregations