use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.
the class BaseRecalibratorSparkSharded method runPipeline.
@Override
protected void runPipeline(JavaSparkContext ctx) {
if (readArguments.getReadFilesNames().size() != 1) {
throw new UserException("Sorry, we only support a single reads input for now.");
}
final String bam = readArguments.getReadFilesNames().get(0);
final String referenceURL = referenceArguments.getReferenceFileName();
auth = getAuthHolder();
final ReferenceMultiSource rds = new ReferenceMultiSource(auth, referenceURL, BaseRecalibrationEngine.BQSR_REFERENCE_WINDOW_FUNCTION);
SAMFileHeader readsHeader = new ReadsSparkSource(ctx, readArguments.getReadValidationStringency()).getHeader(bam, referenceURL);
final SAMSequenceDictionary readsDictionary = readsHeader.getSequenceDictionary();
final SAMSequenceDictionary refDictionary = rds.getReferenceSequenceDictionary(readsDictionary);
final ReadFilter readFilterToApply = ReadFilter.fromList(BaseRecalibrator.getStandardBQSRReadFilterList(), readsHeader);
SequenceDictionaryUtils.validateDictionaries("reference", refDictionary, "reads", readsDictionary);
Broadcast<SAMFileHeader> readsHeaderBcast = ctx.broadcast(readsHeader);
Broadcast<SAMSequenceDictionary> refDictionaryBcast = ctx.broadcast(refDictionary);
List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary()) : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());
List<String> localVariants = knownVariants;
localVariants = hackilyCopyFromGCSIfNecessary(localVariants);
List<GATKVariant> variants = VariantsSource.getVariantsList(localVariants);
// get reads, reference, variants
JavaRDD<ContextShard> readsWithContext = AddContextDataToReadSparkOptimized.add(ctx, intervals, bam, variants, readFilterToApply, rds);
// run BaseRecalibratorEngine.
BaseRecalibratorEngineSparkWrapper recal = new BaseRecalibratorEngineSparkWrapper(readsHeaderBcast, refDictionaryBcast, bqsrArgs);
JavaRDD<RecalibrationTables> tables = readsWithContext.mapPartitions(s -> recal.apply(s));
final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(bqsrArgs, readsHeader));
final RecalibrationTables table = tables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(tables.partitions().size()) / Math.log(2))));
BaseRecalibrationEngine.finalizeRecalibrationTables(table);
try {
BaseRecalibratorEngineSparkWrapper.saveTextualReport(outputTablesPath, readsHeader, table, bqsrArgs, auth);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile(new File(outputTablesPath), e);
}
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.
the class BaseRecalibratorSparkFn method apply.
public static RecalibrationReport apply(final JavaPairRDD<GATKRead, ReadContextData> readsWithContext, final SAMFileHeader header, final SAMSequenceDictionary referenceDictionary, final RecalibrationArgumentCollection recalArgs) {
JavaRDD<RecalibrationTables> unmergedTables = readsWithContext.mapPartitions(readWithContextIterator -> {
final BaseRecalibrationEngine bqsr = new BaseRecalibrationEngine(recalArgs, header);
bqsr.logCovariatesUsed();
while (readWithContextIterator.hasNext()) {
final Tuple2<GATKRead, ReadContextData> readWithData = readWithContextIterator.next();
Iterable<GATKVariant> variants = readWithData._2().getOverlappingVariants();
final ReferenceBases refBases = readWithData._2().getOverlappingReferenceBases();
ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceDictionary);
bqsr.processRead(readWithData._1(), refDS, variants);
}
return Arrays.asList(bqsr.getRecalibrationTables()).iterator();
});
final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(recalArgs, header));
final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable, RecalibrationTables::inPlaceCombine, RecalibrationTables::inPlaceCombine, Math.max(1, (int) (Math.log(unmergedTables.partitions().size()) / Math.log(2))));
BaseRecalibrationEngine.finalizeRecalibrationTables(combinedTables);
final QuantizationInfo quantizationInfo = new QuantizationInfo(combinedTables, recalArgs.QUANTIZING_LEVELS);
final StandardCovariateList covariates = new StandardCovariateList(recalArgs, header);
return RecalUtils.createRecalibrationReport(recalArgs.generateReportTable(covariates.covariateNames()), quantizationInfo.generateReportTable(), RecalUtils.generateReportTables(combinedTables, covariates));
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.
the class RecalibrationReport method initializeArgumentCollectionTable.
/**
* Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values
*
* @param table the GATKReportTable containing the arguments and its corresponding values
* @return a RAC object properly initialized with all the objects in the table
*/
private static RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
final List<String> standardCovariateClassNames = new StandardCovariateList(RAC, Collections.emptyList()).getStandardCovariateClassNames();
for (int i = 0; i < table.getNumRows(); i++) {
final String argument = table.get(i, "Argument").toString();
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
if (value.equals("null")) {
// generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
value = null;
}
if (argument.equals("covariate") && value != null) {
final List<String> covs = new ArrayList<>(Arrays.asList(value.toString().split(",")));
if (!covs.equals(standardCovariateClassNames)) {
throw new UserException("Non-standard covariates are not supported. Only the following are supported " + standardCovariateClassNames + " but was " + covs);
}
} else if (argument.equals("no_standard_covs")) {
final boolean no_standard_covs = decodeBoolean(value);
if (no_standard_covs) {
throw new UserException("Non-standard covariates are not supported. Only the following are supported " + standardCovariateClassNames + " but no_standard_covs was true");
}
} else if (argument.equals("solid_recal_mode")) {
final String solid_recal_mode = (String) value;
if (!RecalibrationArgumentCollection.SOLID_RECAL_MODE.equals(solid_recal_mode)) {
throw new UserException("Solid is not supported. Only " + RecalibrationArgumentCollection.SOLID_RECAL_MODE + " is allowed as value for solid_recal_mode");
}
} else if (argument.equals("solid_nocall_strategy")) {
final String solid_nocall_strategy = (String) value;
if (!RecalibrationArgumentCollection.SOLID_NOCALL_STRATEGY.equals(solid_nocall_strategy)) {
throw new UserException("Solid is not supported. Only " + RecalibrationArgumentCollection.SOLID_NOCALL_STRATEGY + " is allowed as value for solid_nocall_strategy");
}
} else if (argument.equals("mismatches_context_size"))
RAC.MISMATCHES_CONTEXT_SIZE = decodeInteger(value);
else if (argument.equals("indels_context_size"))
RAC.INDELS_CONTEXT_SIZE = decodeInteger(value);
else if (argument.equals("mismatches_default_quality"))
RAC.MISMATCHES_DEFAULT_QUALITY = decodeByte(value);
else if (argument.equals("insertions_default_quality"))
RAC.INSERTIONS_DEFAULT_QUALITY = decodeByte(value);
else if (argument.equals("deletions_default_quality"))
RAC.DELETIONS_DEFAULT_QUALITY = decodeByte(value);
else if (argument.equals("maximum_cycle_value"))
RAC.MAXIMUM_CYCLE_VALUE = decodeInteger(value);
else if (argument.equals("low_quality_tail"))
RAC.LOW_QUAL_TAIL = decodeByte(value);
else if (argument.equals("default_platform"))
RAC.DEFAULT_PLATFORM = (String) value;
else if (argument.equals("force_platform"))
RAC.FORCE_PLATFORM = (String) value;
else if (argument.equals("quantizing_levels"))
RAC.QUANTIZING_LEVELS = decodeInteger(value);
else if (argument.equals("recalibration_report"))
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
else if (argument.equals("binary_tag_name"))
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
}
return RAC;
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.
the class RecalibrationTablesUnitTest method makeTables.
@BeforeMethod
private void makeTables() {
final List<String> readGroups = IntStream.range(1, numReadGroups).mapToObj(i -> "readgroup" + i).collect(Collectors.toList());
covariates = new StandardCovariateList(new RecalibrationArgumentCollection(), readGroups);
tables = new RecalibrationTables(covariates, numReadGroups);
fillTable(tables);
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.StandardCovariateList in project gatk by broadinstitute.
the class RecalUtils method generateCsv.
/**
* Prints out a collection of reports into a file in Csv format in a way
* that can be used by R scripts (such as the plot generator script).
* <p/>
* The set of covariates is take as the minimum common set from all reports.
*
* @param out the output file. It will be overridden.
* @param reports map where keys are the unique 'mode' (ORIGINAL, RECALIBRATED, ...)
* of each report and the corresponding value the report itself.
* @throws FileNotFoundException if <code>out</code> could not be created anew.
*/
public static void generateCsv(final File out, final Map<String, RecalibrationReport> reports) throws FileNotFoundException {
if (reports.isEmpty()) {
throw new GATKException("no reports");
}
final RecalibrationReport firstReport = reports.values().iterator().next();
final StandardCovariateList covariates = firstReport.getCovariates();
writeCsv(out, reports, covariates);
}
Aggregations