Search in sources :

Example 1 with StandardCovariateList

use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.

the class BaseRecalibratorSparkSharded method runPipeline.

@Override
protected void runPipeline(JavaSparkContext ctx) {
    if (readArguments.getReadFilesNames().size() != 1) {
        throw new UserException("Sorry, we only support a single reads input for now.");
    }
    final String bam = readArguments.getReadFilesNames().get(0);
    final String referenceURL = referenceArguments.getReferenceFileName();
    auth = getAuthHolder();
    final ReferenceMultiSource rds = new ReferenceMultiSource(auth, referenceURL, BaseRecalibrationEngine.BQSR_REFERENCE_WINDOW_FUNCTION);
    SAMFileHeader readsHeader = new ReadsSparkSource(ctx, readArguments.getReadValidationStringency()).getHeader(bam, referenceURL);
    final SAMSequenceDictionary readsDictionary = readsHeader.getSequenceDictionary();
    final SAMSequenceDictionary refDictionary = rds.getReferenceSequenceDictionary(readsDictionary);
    final ReadFilter readFilterToApply = ReadFilter.fromList(BaseRecalibrator.getStandardBQSRReadFilterList(), readsHeader);
    SequenceDictionaryUtils.validateDictionaries("reference", refDictionary, "reads", readsDictionary);
    Broadcast<SAMFileHeader> readsHeaderBcast = ctx.broadcast(readsHeader);
    Broadcast<SAMSequenceDictionary> refDictionaryBcast = ctx.broadcast(refDictionary);
    List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary()) : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());
    List<String> localVariants = knownVariants;
    localVariants = hackilyCopyFromGCSIfNecessary(localVariants);
    List<GATKVariant> variants = VariantsSource.getVariantsList(localVariants);
    // get reads, reference, variants
    JavaRDD<ContextShard> readsWithContext = AddContextDataToReadSparkOptimized.add(ctx, intervals, bam, variants, readFilterToApply, rds);
    // run BaseRecalibratorEngine.
    BaseRecalibratorEngineSparkWrapper recal = new BaseRecalibratorEngineSparkWrapper(readsHeaderBcast, refDictionaryBcast, bqsrArgs);
    JavaRDD<RecalibrationTables> tables = readsWithContext.mapPartitions(s -> recal.apply(s));
    final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(bqsrArgs, readsHeader));
    final RecalibrationTables table = tables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(tables.partitions().size()) / Math.log(2))));
    BaseRecalibrationEngine.finalizeRecalibrationTables(table);
    try {
        BaseRecalibratorEngineSparkWrapper.saveTextualReport(outputTablesPath, readsHeader, table, bqsrArgs, auth);
    } catch (IOException e) {
        throw new UserException.CouldNotCreateOutputFile(new File(outputTablesPath), e);
    }
}
Also used : ContextShard(org.broadinstitute.hellbender.engine.ContextShard) ReadsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSource) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) IOException(java.io.IOException) RecalibrationTables(org.broadinstitute.hellbender.utils.recalibration.RecalibrationTables) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseRecalibratorEngineSparkWrapper(org.broadinstitute.hellbender.tools.spark.transforms.bqsr.BaseRecalibratorEngineSparkWrapper) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 2 with StandardCovariateList

use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.

the class BaseRecalibratorSparkFn method apply.

public static RecalibrationReport apply(final JavaPairRDD<GATKRead, ReadContextData> readsWithContext, final SAMFileHeader header, final SAMSequenceDictionary referenceDictionary, final RecalibrationArgumentCollection recalArgs) {
    JavaRDD<RecalibrationTables> unmergedTables = readsWithContext.mapPartitions(readWithContextIterator -> {
        final BaseRecalibrationEngine bqsr = new BaseRecalibrationEngine(recalArgs, header);
        bqsr.logCovariatesUsed();
        while (readWithContextIterator.hasNext()) {
            final Tuple2<GATKRead, ReadContextData> readWithData = readWithContextIterator.next();
            Iterable<GATKVariant> variants = readWithData._2().getOverlappingVariants();
            final ReferenceBases refBases = readWithData._2().getOverlappingReferenceBases();
            ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceDictionary);
            bqsr.processRead(readWithData._1(), refDS, variants);
        }
        return Arrays.asList(bqsr.getRecalibrationTables()).iterator();
    });
    final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(recalArgs, header));
    final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(unmergedTables.partitions().size()) / Math.log(2))));
    BaseRecalibrationEngine.finalizeRecalibrationTables(combinedTables);
    final QuantizationInfo quantizationInfo = new QuantizationInfo(combinedTables, recalArgs.QUANTIZING_LEVELS);
    final StandardCovariateList covariates = new StandardCovariateList(recalArgs, header);
    return RecalUtils.createRecalibrationReport(recalArgs.generateReportTable(covariates.covariateNames()), quantizationInfo.generateReportTable(), RecalUtils.generateReportTables(combinedTables, covariates));
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ReferenceDataSource(org.broadinstitute.hellbender.engine.ReferenceDataSource) ReferenceMemorySource(org.broadinstitute.hellbender.engine.ReferenceMemorySource) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases)

Example 3 with StandardCovariateList

use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.

the class RecalibrationReport method initializeArgumentCollectionTable.

/**
     * Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values
     *
     * @param table the GATKReportTable containing the arguments and its corresponding values
     * @return a RAC object properly initialized with all the objects in the table
     */
private static RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
    final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    final List<String> standardCovariateClassNames = new StandardCovariateList(RAC, Collections.emptyList()).getStandardCovariateClassNames();
    for (int i = 0; i < table.getNumRows(); i++) {
        final String argument = table.get(i, "Argument").toString();
        Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
        if (value.equals("null")) {
            // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
            value = null;
        }
        if (argument.equals("covariate") && value != null) {
            final List<String> covs = new ArrayList<>(Arrays.asList(value.toString().split(",")));
            if (!covs.equals(standardCovariateClassNames)) {
                throw new UserException("Non-standard covariates are not supported. Only the following are supported " + standardCovariateClassNames + " but was " + covs);
            }
        } else if (argument.equals("no_standard_covs")) {
            final boolean no_standard_covs = decodeBoolean(value);
            if (no_standard_covs) {
                throw new UserException("Non-standard covariates are not supported. Only the following are supported " + standardCovariateClassNames + " but no_standard_covs was true");
            }
        } else if (argument.equals("solid_recal_mode")) {
            final String solid_recal_mode = (String) value;
            if (!RecalibrationArgumentCollection.SOLID_RECAL_MODE.equals(solid_recal_mode)) {
                throw new UserException("Solid is not supported. Only " + RecalibrationArgumentCollection.SOLID_RECAL_MODE + " is allowed as value for solid_recal_mode");
            }
        } else if (argument.equals("solid_nocall_strategy")) {
            final String solid_nocall_strategy = (String) value;
            if (!RecalibrationArgumentCollection.SOLID_NOCALL_STRATEGY.equals(solid_nocall_strategy)) {
                throw new UserException("Solid is not supported. Only " + RecalibrationArgumentCollection.SOLID_NOCALL_STRATEGY + " is allowed as value for solid_nocall_strategy");
            }
        } else if (argument.equals("mismatches_context_size"))
            RAC.MISMATCHES_CONTEXT_SIZE = decodeInteger(value);
        else if (argument.equals("indels_context_size"))
            RAC.INDELS_CONTEXT_SIZE = decodeInteger(value);
        else if (argument.equals("mismatches_default_quality"))
            RAC.MISMATCHES_DEFAULT_QUALITY = decodeByte(value);
        else if (argument.equals("insertions_default_quality"))
            RAC.INSERTIONS_DEFAULT_QUALITY = decodeByte(value);
        else if (argument.equals("deletions_default_quality"))
            RAC.DELETIONS_DEFAULT_QUALITY = decodeByte(value);
        else if (argument.equals("maximum_cycle_value"))
            RAC.MAXIMUM_CYCLE_VALUE = decodeInteger(value);
        else if (argument.equals("low_quality_tail"))
            RAC.LOW_QUAL_TAIL = decodeByte(value);
        else if (argument.equals("default_platform"))
            RAC.DEFAULT_PLATFORM = (String) value;
        else if (argument.equals("force_platform"))
            RAC.FORCE_PLATFORM = (String) value;
        else if (argument.equals("quantizing_levels"))
            RAC.QUANTIZING_LEVELS = decodeInteger(value);
        else if (argument.equals("recalibration_report"))
            RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
        else if (argument.equals("binary_tag_name"))
            RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
    }
    return RAC;
}
Also used : ArrayList(java.util.ArrayList) UserException(org.broadinstitute.hellbender.exceptions.UserException) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) File(java.io.File)

Example 4 with StandardCovariateList

use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.

the class RecalibrationTablesUnitTest method makeTables.

@BeforeMethod
private void makeTables() {
    final List<String> readGroups = IntStream.range(1, numReadGroups).mapToObj(i -> "readgroup" + i).collect(Collectors.toList());
    covariates = new StandardCovariateList(new RecalibrationArgumentCollection(), readGroups);
    tables = new RecalibrationTables(covariates, numReadGroups);
    fillTable(tables);
}
Also used : IntStream(java.util.stream.IntStream) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) NestedIntegerArray(org.broadinstitute.hellbender.utils.collections.NestedIntegerArray) Arrays(java.util.Arrays) List(java.util.List) Assert(org.testng.Assert) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) BeforeMethod(org.testng.annotations.BeforeMethod) Test(org.testng.annotations.Test) Covariate(org.broadinstitute.hellbender.utils.recalibration.covariates.Covariate) Collectors(java.util.stream.Collectors) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList) BeforeMethod(org.testng.annotations.BeforeMethod)

Example 5 with StandardCovariateList

use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.

the class RecalUtils method generateCsv.

/**
     * Prints out a collection of reports into a file in Csv format in a way
     * that can be used by R scripts (such as the plot generator script).
     * <p/>
     * The set of covariates is take as the minimum common set from all reports.
     *
     * @param out the output file. It will be overridden.
     * @param reports map where keys are the unique 'mode' (ORIGINAL, RECALIBRATED, ...)
     *                of each report and the corresponding value the report itself.
     * @throws FileNotFoundException if <code>out</code> could not be created anew.
     */
public static void generateCsv(final File out, final Map<String, RecalibrationReport> reports) throws FileNotFoundException {
    if (reports.isEmpty()) {
        throw new GATKException("no reports");
    }
    final RecalibrationReport firstReport = reports.values().iterator().next();
    final StandardCovariateList covariates = firstReport.getCovariates();
    writeCsv(out, reports, covariates);
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException) StandardCovariateList(org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList)

Aggregations

StandardCovariateList (org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList)6 File (java.io.File)2 UserException (org.broadinstitute.hellbender.exceptions.UserException)2 GATKVariant (org.broadinstitute.hellbender.utils.variant.GATKVariant)2 SAMFileHeader (htsjdk.samtools.SAMFileHeader)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 PrintStream (java.io.PrintStream)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 ContextShard (org.broadinstitute.hellbender.engine.ContextShard)1 ReadContextData (org.broadinstitute.hellbender.engine.ReadContextData)1 ReferenceDataSource (org.broadinstitute.hellbender.engine.ReferenceDataSource)1 ReferenceMemorySource (org.broadinstitute.hellbender.engine.ReferenceMemorySource)1 ReferenceMultiSource (org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource)1 ReadFilter (org.broadinstitute.hellbender.engine.filters.ReadFilter)1