Search in sources :

Example 76 with Logger

use of org.apache.logging.log4j.Logger in project gatk by broadinstitute.

the class ReadCountCollectionUtils method removeTargetsWithTooManyZeros.

/**
     * Remove targets that have too many counts equal to 0.
     * <p>
     *     It will return a copy of the input read-count collection with such targets dropped.
     * </p>
     *
     * @param readCounts the input read counts.
     * @param maximumTargetZeros maximum number of counts equal to 0 per target tolerated.
     * @return never {@code null}. It might be a reference to the input read-counts if there is
     *   is no target to be dropped.
     */
public static ReadCountCollection removeTargetsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumTargetZeros, final boolean roundToInteger, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Set<Target> targetsToKeep = IntStream.range(0, counts.getRowDimension()).boxed().filter(i -> countZeroes(counts.getRow(i), roundToInteger) <= maximumTargetZeros).map(i -> readCounts.targets().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
    final int targetsToDropCount = readCounts.targets().size() - targetsToKeep.size();
    if (targetsToDropCount == 0) {
        logger.info(String.format("There are no targets with large number of columns with zero counts (<= %d of %d) to drop", maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts;
    } else if (targetsToDropCount == readCounts.targets().size()) {
        throw new UserException.BadInput("the number of zeros per target in the input is too large resulting " + "in all targets being dropped");
    } else {
        final double droppedPercentage = ((double) (targetsToDropCount) / readCounts.targets().size()) * 100;
        logger.info(String.format("Some targets dropped (%d out of %d, %.2f%%) as they had too many zeros (> %d of %d).", targetsToDropCount, readCounts.targets().size(), droppedPercentage, maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts.subsetTargets(targetsToKeep);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 77 with Logger

use of org.apache.logging.log4j.Logger in project gatk by broadinstitute.

the class ReadCountCollectionUtils method removeColumnsWithTooManyZeros.

/**
     * Remove columns that have too many counts equal to 0.
     * <p>
     *     It will return a copy of the input read-count collection with such columns dropped.
     * </p>
     *
     * @param readCounts the input read counts.
     * @param maximumColumnZeros maximum number of counts equal to 0 per column tolerated.
     * @return never {@code null}. It might be a reference to the input read-counts if there is
     *   is no column to be dropped.
     */
@VisibleForTesting
public static ReadCountCollection removeColumnsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumColumnZeros, final boolean roundToInteger, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Set<String> columnsToKeep = IntStream.range(0, counts.getColumnDimension()).boxed().filter(i -> countZeroes(counts.getColumn(i), roundToInteger) <= maximumColumnZeros).map(i -> readCounts.columnNames().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
    final int columnsToDropCount = readCounts.columnNames().size() - columnsToKeep.size();
    if (columnsToDropCount == 0) {
        logger.info(String.format("There were no columns with a large number of targets with zero counts " + "(<= %d of %d) to drop", maximumColumnZeros, readCounts.targets().size()));
        return readCounts;
    } else if (columnsToDropCount == readCounts.columnNames().size()) {
        throw new UserException.BadInput("The number of zeros per count column is too large resulting in all count " + "columns to be dropped");
    } else {
        final double droppedPercentage = ((double) (columnsToDropCount) / readCounts.columnNames().size()) * 100;
        logger.info(String.format("Some counts columns dropped (%d out of %d, %.2f%%) as they had too many targets with zeros (> %d of %d)", columnsToDropCount, readCounts.columnNames().size(), droppedPercentage, maximumColumnZeros, readCounts.targets().size()));
        return readCounts.subsetColumns(columnsToKeep);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 78 with Logger

use of org.apache.logging.log4j.Logger in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method removeTargetsWithTooManyZeros.

/**
     * Remove targets that have too many counts equal to 0.
     * <p>
     *     It will return a copy of the input read-count collection with such targets dropped.
     * </p>
     *
     * @param readCounts the input read counts.
     * @param maximumTargetZeros maximum number of counts equal to 0 per target tolerated.
     * @return never {@code null}. It might be a reference to the input read-counts if there is
     *   is no target to be dropped.
     */
public static ReadCountCollection removeTargetsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumTargetZeros, final boolean roundToInteger, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Set<Target> targetsToKeep = IntStream.range(0, counts.getRowDimension()).boxed().filter(i -> countZeroes(counts.getRow(i), roundToInteger) <= maximumTargetZeros).map(i -> readCounts.targets().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
    final int targetsToDropCount = readCounts.targets().size() - targetsToKeep.size();
    if (targetsToDropCount == 0) {
        logger.info(String.format("There are no targets with large number of columns with zero counts (<= %d of %d) to drop", maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts;
    } else if (targetsToDropCount == readCounts.targets().size()) {
        throw new UserException.BadInput("the number of zeros per target in the input is too large resulting " + "in all targets being dropped");
    } else {
        final double droppedPercentage = ((double) (targetsToDropCount) / readCounts.targets().size()) * 100;
        logger.info(String.format("Some targets dropped (%d out of %d, %.2f%%) as they had too many zeros (> %d of %d).", targetsToDropCount, readCounts.targets().size(), droppedPercentage, maximumTargetZeros, readCounts.columnNames().size()));
        return readCounts.subsetTargets(targetsToKeep);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 79 with Logger

use of org.apache.logging.log4j.Logger in project gatk-protected by broadinstitute.

the class ReadCountCollectionUtils method removeColumnsWithTooManyZeros.

/**
     * Remove columns that have too many counts equal to 0.
     * <p>
     *     It will return a copy of the input read-count collection with such columns dropped.
     * </p>
     *
     * @param readCounts the input read counts.
     * @param maximumColumnZeros maximum number of counts equal to 0 per column tolerated.
     * @return never {@code null}. It might be a reference to the input read-counts if there is
     *   is no column to be dropped.
     */
@VisibleForTesting
public static ReadCountCollection removeColumnsWithTooManyZeros(final ReadCountCollection readCounts, final int maximumColumnZeros, final boolean roundToInteger, final Logger logger) {
    final RealMatrix counts = readCounts.counts();
    final Set<String> columnsToKeep = IntStream.range(0, counts.getColumnDimension()).boxed().filter(i -> countZeroes(counts.getColumn(i), roundToInteger) <= maximumColumnZeros).map(i -> readCounts.columnNames().get(i)).collect(Collectors.toCollection(LinkedHashSet::new));
    final int columnsToDropCount = readCounts.columnNames().size() - columnsToKeep.size();
    if (columnsToDropCount == 0) {
        logger.info(String.format("There were no columns with a large number of targets with zero counts " + "(<= %d of %d) to drop", maximumColumnZeros, readCounts.targets().size()));
        return readCounts;
    } else if (columnsToDropCount == readCounts.columnNames().size()) {
        throw new UserException.BadInput("The number of zeros per count column is too large resulting in all count " + "columns to be dropped");
    } else {
        final double droppedPercentage = ((double) (columnsToDropCount) / readCounts.columnNames().size()) * 100;
        logger.info(String.format("Some counts columns dropped (%d out of %d, %.2f%%) as they had too many targets with zeros (> %d of %d)", columnsToDropCount, readCounts.columnNames().size(), droppedPercentage, maximumColumnZeros, readCounts.targets().size()));
        return readCounts.subsetColumns(columnsToKeep);
    }
}
Also used : IntStream(java.util.stream.IntStream) DefaultRealMatrixChangingVisitor(org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor) java.util(java.util) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader) MatrixSummaryUtils(org.broadinstitute.hellbender.utils.MatrixSummaryUtils) StringUtils(org.apache.commons.lang3.StringUtils) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) SampleNameFinder(org.broadinstitute.hellbender.tools.exome.samplenamefinder.SampleNameFinder) DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) Median(org.apache.commons.math3.stat.descriptive.rank.Median) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) Nonnull(javax.annotation.Nonnull) Locatable(htsjdk.samtools.util.Locatable) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Predicate(java.util.function.Predicate) FastMath(org.apache.commons.math3.util.FastMath) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) DoubleStream(java.util.stream.DoubleStream) TableWriter(org.broadinstitute.hellbender.utils.tsv.TableWriter) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) java.io(java.io) SetUniqueList(org.apache.commons.collections4.list.SetUniqueList) Doubles(com.google.common.primitives.Doubles) Utils(org.broadinstitute.hellbender.utils.Utils) RealMatrix(org.apache.commons.math3.linear.RealMatrix) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) UserException(org.broadinstitute.hellbender.exceptions.UserException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 80 with Logger

use of org.apache.logging.log4j.Logger in project gatk-protected by broadinstitute.

the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.

/**
     * Processes raw read counts and targets:
     * <dl>
     *     <dt> If more than one sample is present in the collection, filters out fully uncovered targets
     *     from read counts and removes the uncovered targets from the target list</dt>
     *
     *     <dt> Otherwise, does nothing and warns the user
     *     </dt>
     * </dl>
     *
     * @param rawReadCounts raw read count collection
     * @param targetList user provided target list
     * @return pair of processed read counts and targets
     */
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
    final ReadCountCollection finalReadCounts;
    final List<Target> finalTargetList;
    /* remove totally uncovered targets */
    if (rawReadCounts.columnNames().size() > 1) {
        finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
        final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
        finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
    } else {
        final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
        final long numAllTargets = rawReadCounts.targets().size();
        logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
        finalReadCounts = rawReadCounts;
        finalTargetList = targetList;
    }
    return ImmutablePair.of(finalReadCounts, finalTargetList);
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) Collectors(java.util.stream.Collectors) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) Sets(com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets) Logger(org.apache.logging.log4j.Logger) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Target(org.broadinstitute.hellbender.tools.exome.Target) Median(org.apache.commons.math3.stat.descriptive.rank.Median) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) LogManager(org.apache.logging.log4j.LogManager) Nonnull(javax.annotation.Nonnull) Target(org.broadinstitute.hellbender.tools.exome.Target) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection)

Aggregations

Logger (org.apache.logging.log4j.Logger)491 Test (org.junit.Test)226 File (java.io.File)80 Test (org.junit.jupiter.api.Test)69 IOException (java.io.IOException)34 LoggerContext (org.apache.logging.log4j.core.LoggerContext)33 Appender (org.apache.logging.log4j.core.Appender)32 Collectors (java.util.stream.Collectors)30 StatusLogger (org.apache.logging.log4j.status.StatusLogger)30 BufferedReader (java.io.BufferedReader)29 Level (org.apache.logging.log4j.Level)27 FileReader (java.io.FileReader)26 Path (java.nio.file.Path)26 CountDownLatch (java.util.concurrent.CountDownLatch)23 Map (java.util.Map)21 IntStream (java.util.stream.IntStream)20 LoggerConfig (org.apache.logging.log4j.core.config.LoggerConfig)20 java.util (java.util)18 HashMap (java.util.HashMap)18 Configuration (org.apache.logging.log4j.core.config.Configuration)18