use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class TitanFileConverter method convertHetPulldownToTitanHetFile.
/**
* Create a het pulldown file that is compatible with TITAN.
*
* @param hetPulldown Readable file from one of the het pulldown tools
* @param outputFile Not {@code null}
*/
public static void convertHetPulldownToTitanHetFile(final File hetPulldown, final File outputFile) {
IOUtils.canReadFile(hetPulldown);
try {
final AllelicCountCollection acc = new AllelicCountCollection(hetPulldown);
final TitanAllelicCountWriter titanAllelicCountWriter = new TitanAllelicCountWriter(outputFile);
titanAllelicCountWriter.writeAllRecords(acc.getCounts());
titanAllelicCountWriter.close();
} catch (final IOException ioe) {
throw new UserException.BadInput("Bad output file: " + outputFile);
} catch (final IllegalArgumentException iae) {
logger.warn("Cannot convert pulldown with basic verbosity (i.e., missing ref/alt nucleotide information) to TITAN-compatible file. An empty file will be output.");
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class PlotACNVResults method validateContigs.
private void validateContigs(final Map<String, Integer> contigLengthMap) {
final Set<String> contigNames = contigLengthMap.keySet();
//validate contig names and lengths in SNP counts file
final AllelicCountCollection snpCounts = new AllelicCountCollection(snpCountsFile);
final Set<String> snpCountsContigNames = snpCounts.getCounts().stream().map(AllelicCount::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(snpCountsContigNames)) {
logger.warn("Contigs present in the SNP counts file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> snpCountsContigMaxPositionMap = snpCounts.getCounts().stream().filter(c -> contigNames.contains(c.getContig())).collect(Collectors.toMap(AllelicCount::getContig, AllelicCount::getEnd, Integer::max));
snpCountsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(snpCountsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the SNP-counts file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in tangent file
final ReadCountCollection tangent;
try {
tangent = ReadCountCollectionUtils.parse(tangentFile);
} catch (final IOException e) {
throw new UserException.CouldNotReadInputFile(tangentFile, e);
}
final Set<String> tangentContigNames = tangent.targets().stream().map(Target::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(tangentContigNames)) {
logger.warn("Contigs present in the tangent-normalized coverage file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> tangentContigMaxPositionMap = tangent.targets().stream().filter(t -> contigNames.contains(t.getContig())).collect(Collectors.toMap(Target::getContig, Target::getEnd, Integer::max));
tangentContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(tangentContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the tangent-normalized coverage file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in segments file
final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
final Set<String> segmentsContigNames = segments.stream().map(ACNVModeledSegment::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(segmentsContigNames)) {
logger.warn("Contigs present in the segments file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> segmentsContigMaxPositionMap = segments.stream().filter(s -> contigNames.contains(s.getContig())).collect(Collectors.toMap(ACNVModeledSegment::getContig, ACNVModeledSegment::getEnd, Integer::max));
segmentsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(segmentsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the segments file exceeds contig length in the sequence dictionary."));
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class PerformAlleleFractionSegmentation method doWork.
@Override
public Object doWork() {
final String sampleName = FilenameUtils.getBaseName(snpCountsFile.getAbsolutePath());
final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
final AllelicCountCollection acc = new AllelicCountCollection(snpCountsFile);
final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(initialNumStates, acc, allelicPoN);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, true);
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class AlleleFractionModellerUnitTest method dataBiasCorrection.
@DataProvider(name = "biasCorrection")
public Object[][] dataBiasCorrection() {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final AllelicCountCollection sampleNormal = new AllelicCountCollection(SAMPLE_NORMAL_FILE);
final AllelicCountCollection sampleWithBadSNPs = new AllelicCountCollection(SAMPLE_WITH_BAD_SNPS_FILE);
final AllelicCountCollection sampleWithEvent = new AllelicCountCollection(SAMPLE_WITH_EVENT_FILE);
final AllelicPanelOfNormals allelicPoNNormal = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_NORMAL_COUNTS_FILE));
final AllelicPanelOfNormals allelicPoNWithBadSNPs = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_WITH_BAD_SNPS_COUNTS_FILE));
final double minorFractionExpectedInMiddleSegmentNormal = 0.5;
final double minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN = 0.4;
final double minorFractionExpectedInMiddleSegmentWithEvent = 0.33;
return new Object[][] { { sampleNormal, allelicPoNNormal, minorFractionExpectedInMiddleSegmentNormal }, { sampleWithBadSNPs, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN }, { sampleWithEvent, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithEvent }, { sampleWithBadSNPs, allelicPoNWithBadSNPs, minorFractionExpectedInMiddleSegmentNormal } };
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection in project gatk-protected by broadinstitute.
the class SNPSegmenterUnitTest method testAllelicFractionBasedSegmentation.
/**
* Tests that segments are correctly determined using allelic counts from SNP sites.
* Segment-mean and target-number columns from expected segment file are not checked.
*/
@Test
public void testAllelicFractionBasedSegmentation() {
final String sampleName = "test";
final File snpFile = new File(TEST_SUB_DIR, "snps-simplified-for-allelic-fraction-segmentation.tsv");
final List<AllelicCount> snpCounts = new AllelicCountCollection(snpFile).getCounts();
final TargetCollection<AllelicCount> snps = new HashedListTargetCollection<>(snpCounts);
final File resultFile = createTempFile("snp-segmenter-test-result", ".seg");
SNPSegmenter.writeSegmentFile(snps, sampleName, resultFile);
final File expectedFile = new File(TEST_SUB_DIR, "snp-segmenter-test-expected.seg");
Assert.assertTrue(resultFile.exists(), "SNPSegmenterTest output was not written to temp file: " + resultFile);
final List<SimpleInterval> result = SegmentUtils.readIntervalsFromSegmentFile(resultFile);
final List<SimpleInterval> expected = SegmentUtils.readIntervalsFromSegmentFile(expectedFile);
Assert.assertEquals(result, expected);
}
Aggregations