use of org.apache.commons.lang3.math.Fraction in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
use of org.apache.commons.lang3.math.Fraction in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
use of org.apache.commons.lang3.math.Fraction in project gatk by broadinstitute.
the class AllelicPanelOfNormalsCreator method create.
/**
* Creates an {@link AllelicPanelOfNormals} given a site-frequency threshold;
* sites appearing in strictly less than this fraction of samples will not be included in the panel of normals.
* @param siteFrequencyThreshold site-frequency threshold
* @return an {@link AllelicPanelOfNormals} containing sites
* above the site-frequency threshold
*/
public AllelicPanelOfNormals create(final double siteFrequencyThreshold) {
logger.info("Creating allelic panel of normals...");
//used to filter on frequency
final Map<SimpleInterval, MutableInt> numberOfSamplesMap = new HashMap<>();
//store only the total counts (smaller memory footprint)
final Map<SimpleInterval, AllelicCount> totalCountsMap = new HashMap<>();
int pulldownFileCounter = 1;
final int totalNumberOfSamples = pulldownFiles.size();
for (final File pulldownFile : pulldownFiles) {
logger.info("Processing pulldown file " + pulldownFileCounter++ + "/" + totalNumberOfSamples + " (" + pulldownFile + ")...");
final AllelicCountCollection pulldownCounts = new AllelicCountCollection(pulldownFile);
for (final AllelicCount count : pulldownCounts.getCounts()) {
//update the sum of ref and alt counts at each site
final SimpleInterval site = count.getInterval();
final AllelicCount currentCountAtSite = totalCountsMap.getOrDefault(site, new AllelicCount(site, 0, 0));
final AllelicCount updatedCountAtSite = new AllelicCount(site, currentCountAtSite.getRefReadCount() + count.getRefReadCount(), currentCountAtSite.getAltReadCount() + count.getAltReadCount());
totalCountsMap.put(site, updatedCountAtSite);
//update the number of samples seen possessing each site
final MutableInt numberOfSamplesAtSite = numberOfSamplesMap.get(site);
if (numberOfSamplesAtSite == null) {
numberOfSamplesMap.put(site, new MutableInt(1));
} else {
numberOfSamplesAtSite.increment();
}
}
}
logger.info("Total number of unique sites present in samples: " + totalCountsMap.size());
//filter out sites that appear at a frequency strictly less than the provided threshold
final AllelicCountCollection totalCounts = new AllelicCountCollection();
numberOfSamplesMap.entrySet().stream().filter(e -> e.getValue().doubleValue() / totalNumberOfSamples >= siteFrequencyThreshold).map(e -> totalCountsMap.get(e.getKey())).forEach(totalCounts::add);
logger.info(String.format("Number of unique sites present in samples above site frequency = %4.3f: %d", siteFrequencyThreshold, totalCounts.getCounts().size()));
return new AllelicPanelOfNormals(totalCounts);
}
use of org.apache.commons.lang3.math.Fraction in project opencast by opencast.
the class ProbeResolutionWorkflowOperationHandler method start.
@Override
public WorkflowOperationResult start(WorkflowInstance workflowInstance, JobContext context) throws WorkflowOperationException {
logger.info("Running probe-resolution workflow operation");
final MediaPackage mediaPackage = workflowInstance.getMediaPackage();
final String sourceFlavorName = getConfig(workflowInstance, OPT_SOURCE_FLAVOR);
final MediaPackageElementFlavor sourceFlavor = MediaPackageElementFlavor.parseFlavor(sourceFlavorName);
// Ensure we have a matching track
final Track[] tracks = mediaPackage.getTracks(sourceFlavor);
if (tracks.length <= 0) {
logger.info("No tracks with specified flavor ({}).", sourceFlavorName);
return createResult(mediaPackage, Action.CONTINUE);
}
// Create mapping: resolution -> [varNames]
Map<Fraction, Set<String>> resolutionMapping = new HashMap<>();
for (String key : workflowInstance.getCurrentOperation().getConfigurationKeys()) {
if (key.startsWith(OPT_VAR_PREFIX)) {
String varName = key.substring(OPT_VAR_PREFIX.length());
for (Fraction resolution : getResolutions(getConfig(workflowInstance, key))) {
if (!resolutionMapping.containsKey(resolution)) {
resolutionMapping.put(resolution, new HashSet<String>());
}
resolutionMapping.get(resolution).add(varName);
}
}
}
// Create mapping: varName -> value
Map<String, String> valueMapping = new HashMap<>();
for (String key : workflowInstance.getCurrentOperation().getConfigurationKeys()) {
if (key.startsWith(OPT_VAL_PREFIX)) {
String varName = key.substring(OPT_VAL_PREFIX.length());
valueMapping.put(varName, getConfig(workflowInstance, key));
}
}
Map<String, String> properties = new HashMap<String, String>();
for (Track track : tracks) {
final String flavor = toVariableName(track.getFlavor());
// Check if resolution fits
if (track.hasVideo()) {
for (VideoStream video : ((TrackImpl) track).getVideo()) {
Fraction resolution = Fraction.getFraction(video.getFrameWidth(), video.getFrameHeight());
if (resolutionMapping.containsKey(resolution)) {
for (String varName : resolutionMapping.get(resolution)) {
String value = valueMapping.containsKey(varName) ? valueMapping.get(varName) : "true";
properties.put(flavor + varName, value);
}
}
}
}
}
logger.info("Finished workflow operation adding the properties: {}", properties);
return createResult(mediaPackage, properties, Action.CONTINUE, 0);
}
use of org.apache.commons.lang3.math.Fraction in project opencast by opencast.
the class AnalyzeTracksWorkflowOperationHandlerTest method testGetNearestResolution.
@Test
public void testGetNearestResolution() throws Exception {
Fraction frac43 = Fraction.getFraction(4, 3);
Fraction frac169 = Fraction.getFraction(16, 9);
List<Fraction> aspects = new ArrayList<>();
aspects.add(frac43);
aspects.add(frac169);
int[][] aspect43 = { { 640, 480 }, { 768, 576 }, { 720, 576 }, { 703, 576 }, { 720, 576 }, { 720, 480 }, { 240, 180 }, { 638, 512 }, { 704, 576 }, { 756, 576 }, { 800, 600 }, { 1024, 768 }, { 1280, 1023 }, { 1016, 768 }, { 1280, 1024 } };
int[][] aspect169 = { { 1024, 576 }, { 1280, 720 }, { 1068, 600 }, { 1248, 702 }, { 1278, 720 } };
for (int[] resArr : aspect43) {
Fraction res = Fraction.getFraction(resArr[0], resArr[1]);
Fraction aspect = operationHandler.getNearestAspectRatio(res, aspects);
logger.info("res: {} -> aspect: {} | expected 4/3", res, aspect);
assertEquals(frac43, aspect);
}
for (int[] resArr : aspect169) {
Fraction res = Fraction.getFraction(resArr[0], resArr[1]);
Fraction aspect = operationHandler.getNearestAspectRatio(res, aspects);
logger.info("res: {} -> aspect: {} | expected 16/9", res, aspect);
assertEquals(frac169, aspect);
}
}
Aggregations