use of org.apache.logging.log4j.Logger in project gatk by broadinstitute.
the class ReadCountCollectionUtils method removeTargetsWithTooManyZeros.
/**
* Remove targets that have too many counts equal to 0.
* <p>
* It will return a copy of the input read-count collection with such targets dropped.
* </p>
*
* @param readCounts the input read counts.
* @param maximumTargetZeros maximum number of counts equal to 0 per target tolerated.
* @return never {@code null}. It might be a reference to the input read-counts if there is
* is no target to be dropped.
*/
public static ReadCountCollection removeTargetsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumTargetZeros, final boolean roundToInteger, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Set<Target> targetsToKeep = IntStream.range(0, counts.getRowDimension()).boxed().filter(i -> countZeroes(counts.getRow(i), roundToInteger) <= maximumTargetZeros).map(i -> readCounts.targets().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
final int targetsToDropCount = readCounts.targets().size() - targetsToKeep.size();
if (targetsToDropCount == 0) {
logger.info(String.format("There are no targets with large number of columns with zero counts (<= %d of %d) to drop", maximumTargetZeros, readCounts.columnNames().size()));
return readCounts;
} else if (targetsToDropCount == readCounts.targets().size()) {
throw new UserException.BadInput("the number of zeros per target in the input is too large resulting " + "in all targets being dropped");
} else {
final double droppedPercentage = ((double) (targetsToDropCount) / readCounts.targets().size()) * 100;
logger.info(String.format("Some targets dropped (%d out of %d, %.2f%%) as they had too many zeros (> %d of %d).", targetsToDropCount, readCounts.targets().size(), droppedPercentage, maximumTargetZeros, readCounts.columnNames().size()));
return readCounts.subsetTargets(targetsToKeep);
}
}
use of org.apache.logging.log4j.Logger in project gatk by broadinstitute.
the class ReadCountCollectionUtils method removeColumnsWithTooManyZeros.
/**
* Remove columns that have too many counts equal to 0.
* <p>
* It will return a copy of the input read-count collection with such columns dropped.
* </p>
*
* @param readCounts the input read counts.
* @param maximumColumnZeros maximum number of counts equal to 0 per column tolerated.
* @return never {@code null}. It might be a reference to the input read-counts if there is
* is no column to be dropped.
*/
@VisibleForTesting
public static ReadCountCollection removeColumnsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumColumnZeros, final boolean roundToInteger, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Set<String> columnsToKeep = IntStream.range(0, counts.getColumnDimension()).boxed().filter(i -> countZeroes(counts.getColumn(i), roundToInteger) <= maximumColumnZeros).map(i -> readCounts.columnNames().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
final int columnsToDropCount = readCounts.columnNames().size() - columnsToKeep.size();
if (columnsToDropCount == 0) {
logger.info(String.format("There were no columns with a large number of targets with zero counts " + "(<= %d of %d) to drop", maximumColumnZeros, readCounts.targets().size()));
return readCounts;
} else if (columnsToDropCount == readCounts.columnNames().size()) {
throw new UserException.BadInput("The number of zeros per count column is too large resulting in all count " + "columns to be dropped");
} else {
final double droppedPercentage = ((double) (columnsToDropCount) / readCounts.columnNames().size()) * 100;
logger.info(String.format("Some counts columns dropped (%d out of %d, %.2f%%) as they had too many targets with zeros (> %d of %d)", columnsToDropCount, readCounts.columnNames().size(), droppedPercentage, maximumColumnZeros, readCounts.targets().size()));
return readCounts.subsetColumns(columnsToKeep);
}
}
use of org.apache.logging.log4j.Logger in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtils method removeTargetsWithTooManyZeros.
/**
* Remove targets that have too many counts equal to 0.
* <p>
* It will return a copy of the input read-count collection with such targets dropped.
* </p>
*
* @param readCounts the input read counts.
* @param maximumTargetZeros maximum number of counts equal to 0 per target tolerated.
* @return never {@code null}. It might be a reference to the input read-counts if there is
* is no target to be dropped.
*/
public static ReadCountCollection removeTargetsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumTargetZeros, final boolean roundToInteger, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Set<Target> targetsToKeep = IntStream.range(0, counts.getRowDimension()).boxed().filter(i -> countZeroes(counts.getRow(i), roundToInteger) <= maximumTargetZeros).map(i -> readCounts.targets().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
final int targetsToDropCount = readCounts.targets().size() - targetsToKeep.size();
if (targetsToDropCount == 0) {
logger.info(String.format("There are no targets with large number of columns with zero counts (<= %d of %d) to drop", maximumTargetZeros, readCounts.columnNames().size()));
return readCounts;
} else if (targetsToDropCount == readCounts.targets().size()) {
throw new UserException.BadInput("the number of zeros per target in the input is too large resulting " + "in all targets being dropped");
} else {
final double droppedPercentage = ((double) (targetsToDropCount) / readCounts.targets().size()) * 100;
logger.info(String.format("Some targets dropped (%d out of %d, %.2f%%) as they had too many zeros (> %d of %d).", targetsToDropCount, readCounts.targets().size(), droppedPercentage, maximumTargetZeros, readCounts.columnNames().size()));
return readCounts.subsetTargets(targetsToKeep);
}
}
use of org.apache.logging.log4j.Logger in project gatk-protected by broadinstitute.
the class ReadCountCollectionUtils method removeColumnsWithTooManyZeros.
/**
* Remove columns that have too many counts equal to 0.
* <p>
* It will return a copy of the input read-count collection with such columns dropped.
* </p>
*
* @param readCounts the input read counts.
* @param maximumColumnZeros maximum number of counts equal to 0 per column tolerated.
* @return never {@code null}. It might be a reference to the input read-counts if there is
* is no column to be dropped.
*/
@VisibleForTesting
public static ReadCountCollection removeColumnsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumColumnZeros, final boolean roundToInteger, final Logger logger) {
final RealMatrix counts = readCounts.counts();
final Set<String> columnsToKeep = IntStream.range(0, counts.getColumnDimension()).boxed().filter(i -> countZeroes(counts.getColumn(i), roundToInteger) <= maximumColumnZeros).map(i -> readCounts.columnNames().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
final int columnsToDropCount = readCounts.columnNames().size() - columnsToKeep.size();
if (columnsToDropCount == 0) {
logger.info(String.format("There were no columns with a large number of targets with zero counts " + "(<= %d of %d) to drop", maximumColumnZeros, readCounts.targets().size()));
return readCounts;
} else if (columnsToDropCount == readCounts.columnNames().size()) {
throw new UserException.BadInput("The number of zeros per count column is too large resulting in all count " + "columns to be dropped");
} else {
final double droppedPercentage = ((double) (columnsToDropCount) / readCounts.columnNames().size()) * 100;
logger.info(String.format("Some counts columns dropped (%d out of %d, %.2f%%) as they had too many targets with zeros (> %d of %d)", columnsToDropCount, readCounts.columnNames().size(), droppedPercentage, maximumColumnZeros, readCounts.targets().size()));
return readCounts.subsetColumns(columnsToKeep);
}
}
use of org.apache.logging.log4j.Logger in project gatk-protected by broadinstitute.
the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.
/**
* Processes raw read counts and targets:
* <dl>
* <dt> If more than one sample is present in the collection, filters out fully uncovered targets
* from read counts and removes the uncovered targets from the target list</dt>
*
* <dt> Otherwise, does nothing and warns the user
* </dt>
* </dl>
*
* @param rawReadCounts raw read count collection
* @param targetList user provided target list
* @return pair of processed read counts and targets
*/
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
final ReadCountCollection finalReadCounts;
final List<Target> finalTargetList;
/* remove totally uncovered targets */
if (rawReadCounts.columnNames().size() > 1) {
finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
} else {
final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
final long numAllTargets = rawReadCounts.targets().size();
logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
finalReadCounts = rawReadCounts;
finalTargetList = targetList;
}
return ImmutablePair.of(finalReadCounts, finalTargetList);
}
Aggregations