use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class WriteCSVMR method runJob.
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String csvWriteInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs) throws Exception {
JobConf job = new JobConf(WriteCSVMR.class);
job.setJobName("WriteCSV-MR");
byte[] realIndexes = new byte[inputs.length];
for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
//set up the input files and their format information
MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, ConvertTarget.CSVWRITE);
//set up the dimensions of input matrices
MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
//set up the block size
MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
MRJobConfiguration.setCSVWriteInstructions(job, csvWriteInstructions);
//set up the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
//set up preferred custom serialization framework for binary block format
if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
MRJobConfiguration.addBinaryBlockSerializationFramework(job);
//set up custom map/reduce configurations
DMLConfig config = ConfigurationManager.getDMLConfig();
MRJobConfiguration.setupCustomMRConfigurations(job, config);
long maxRlen = 0;
for (long rlen : rlens) if (rlen > maxRlen)
maxRlen = rlen;
//set up the number of reducers (according to output size)
int numRed = determineNumReducers(rlens, clens, config.getIntValue(DMLConfig.NUM_REDUCERS), (int) maxRlen);
job.setNumReduceTasks(numRed);
byte[] resultDimsUnknown = new byte[resultIndexes.length];
MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
OutputInfo[] outputInfos = new OutputInfo[outputs.length];
HashMap<Byte, Integer> indexmap = new HashMap<Byte, Integer>();
for (int i = 0; i < stats.length; i++) {
indexmap.put(resultIndexes[i], i);
resultDimsUnknown[i] = (byte) 0;
stats[i] = new MatrixCharacteristics();
outputInfos[i] = OutputInfo.CSVOutputInfo;
}
CSVWriteInstruction[] ins = MRInstructionParser.parseCSVWriteInstructions(csvWriteInstructions);
for (CSVWriteInstruction in : ins) stats[indexmap.get(in.output)].set(rlens[in.input], clens[in.input], -1, -1);
// Print the complete instruction
if (LOG.isTraceEnabled())
inst.printCompleteMRJobInstruction(stats);
//set up what matrices are needed to pass from the mapper to reducer
MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, "", "", csvWriteInstructions, resultIndexes);
//set up the multiple output files, and their format information
MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true);
// configure mapper and the mapper output key value pairs
job.setMapperClass(CSVWriteMapper.class);
job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
job.setMapOutputValueClass(MatrixBlock.class);
//configure reducer
job.setReducerClass(CSVWriteReducer.class);
job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
job.setPartitionerClass(TaggedFirstSecondIndexes.FirstIndexRangePartitioner.class);
//job.setOutputFormat(UnPaddedOutputFormat.class);
MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
for (int i = 0; i < inputs.length; i++) {
inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
}
//set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
RunningJob runjob = JobClient.runJob(job);
/* Process different counters */
Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
for (int i = 0; i < resultIndexes.length; i++) {
// number of non-zeros
stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
}
return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class NativeHelper method init.
// Performing loading in a method instead of a static block will throw a detailed stack trace in case of fatal errors
private static void init() {
// Only Linux supported for BLAS
if (!SystemUtils.IS_OS_LINUX)
return;
// again and again especially in the parfor (hence the double-checking with synchronized).
if (!attemptedLoading) {
DMLConfig dmlConfig = ConfigurationManager.getDMLConfig();
// -------------------------------------------------------------------------------------
// We allow BLAS to be enabled or disabled or explicitly selected in one of the two ways:
// 1. DML Configuration: native.blas (boolean flag)
// 2. Environment variable: SYSTEMML_BLAS (can be set to mkl, openblas or none)
// The option 1 will be removed in later SystemML versions.
// The option 2 is useful for two reasons:
// - Developer testing of different BLAS
// - Provides fine-grained control. Certain machines could use mkl while others use openblas, etc.
String userSpecifiedBLAS = (dmlConfig == null) ? "auto" : dmlConfig.getTextValue(DMLConfig.NATIVE_BLAS).trim().toLowerCase();
if (userSpecifiedBLAS.equals("auto") || userSpecifiedBLAS.equals("mkl") || userSpecifiedBLAS.equals("openblas")) {
long start = System.nanoTime();
if (!supportedArchitectures.containsKey(SystemUtils.OS_ARCH)) {
LOG.info("Unsupported architecture for native BLAS:" + SystemUtils.OS_ARCH);
return;
}
synchronized (NativeHelper.class) {
if (!attemptedLoading) {
// If both MKL and OpenBLAS are not available we fall back to Java BLAS.
if (userSpecifiedBLAS.equals("auto")) {
blasType = isMKLAvailable() ? "mkl" : isOpenBLASAvailable() ? "openblas" : null;
if (blasType == null)
LOG.info("Unable to load either MKL or OpenBLAS due to " + hintOnFailures);
} else if (userSpecifiedBLAS.equals("mkl")) {
blasType = isMKLAvailable() ? "mkl" : null;
if (blasType == null)
LOG.info("Unable to load MKL due to " + hintOnFailures);
} else if (userSpecifiedBLAS.equals("openblas")) {
blasType = isOpenBLASAvailable() ? "openblas" : null;
if (blasType == null)
LOG.info("Unable to load OpenBLAS due to " + hintOnFailures);
} else {
// Only thrown at development time.
throw new RuntimeException("Unsupported BLAS:" + userSpecifiedBLAS);
}
// =============================================================================
if (blasType != null && loadLibraryHelper("libsystemml_" + blasType + "-Linux-x86_64.so")) {
String blasPathAndHint = "";
// This logic gets the list of native libraries that are loaded
if (LOG.isDebugEnabled()) {
// Only perform the checking of library paths when DEBUG is enabled to avoid runtime overhead.
try {
java.lang.reflect.Field loadedLibraryNamesField = ClassLoader.class.getDeclaredField("loadedLibraryNames");
loadedLibraryNamesField.setAccessible(true);
@SuppressWarnings("unchecked") Vector<String> libraries = (Vector<String>) loadedLibraryNamesField.get(ClassLoader.getSystemClassLoader());
LOG.debug("List of native libraries loaded:" + libraries);
for (String library : libraries) {
if (library.contains("libmkl_rt") || library.contains("libopenblas")) {
blasPathAndHint = " from the path " + library;
break;
}
}
} catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) {
LOG.debug("Error while finding list of native libraries:" + e.getMessage());
}
}
// ------------------------------------------------------------
LOG.info("Using native blas: " + blasType + blasPathAndHint);
isSystemMLLoaded = true;
}
}
}
double timeToLoadInMilliseconds = (System.nanoTime() - start) * 1e-6;
if (timeToLoadInMilliseconds > 1000)
LOG.warn("Time to load native blas: " + timeToLoadInMilliseconds + " milliseconds.");
} else {
LOG.debug("Using internal Java BLAS as native BLAS support the configuration 'native.blas'=" + userSpecifiedBLAS + ".");
}
attemptedLoading = true;
}
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class DMLScript method execute.
///////////////////////////////
// private internal interface
// (core compilation and execute)
////////
/**
* The running body of DMLScript execution. This method should be called after execution properties have been correctly set,
* and customized parameters have been put into _argVals
*
* @param dmlScriptStr DML script string
* @param fnameOptConfig configuration file
* @param argVals map of argument values
* @param allArgs arguments
* @param scriptType type of script (DML or PyDML)
* @throws ParseException if ParseException occurs
* @throws IOException if IOException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
* @throws LanguageException if LanguageException occurs
* @throws HopsException if HopsException occurs
* @throws LopsException if LopsException occurs
*/
private static void execute(String dmlScriptStr, String fnameOptConfig, Map<String, String> argVals, String[] allArgs, ScriptType scriptType) throws ParseException, IOException, DMLRuntimeException, LanguageException, HopsException, LopsException {
SCRIPT_TYPE = scriptType;
//print basic time and environment info
printStartExecInfo(dmlScriptStr);
//Step 1: parse configuration files
DMLConfig dmlconf = DMLConfig.readConfigurationFile(fnameOptConfig);
ConfigurationManager.setGlobalConfig(dmlconf);
CompilerConfig cconf = OptimizerUtils.constructCompilerConfig(dmlconf);
ConfigurationManager.setGlobalConfig(cconf);
LOG.debug("\nDML config: \n" + dmlconf.getConfigInfo());
//Step 2: set local/remote memory if requested (for compile in AM context)
if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
DMLAppMasterUtils.setupConfigRemoteMaxMemory(dmlconf);
}
//Step 3: parse dml script
Statistics.startCompileTimer();
ParserWrapper parser = ParserFactory.createParser(scriptType);
DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);
//Step 4: construct HOP DAGs (incl LVA, validate, and setup)
DMLTranslator dmlt = new DMLTranslator(prog);
dmlt.liveVariableAnalysis(prog);
dmlt.validateParseTree(prog);
dmlt.constructHops(prog);
//init working directories (before usage by following compilation steps)
initHadoopExecution(dmlconf);
//Step 5: rewrite HOP DAGs (incl IPA and memory estimates)
dmlt.rewriteHopsDAG(prog);
//Step 5.1: Generate code for the rewritten Hop dags
if (dmlconf.getBooleanValue(DMLConfig.CODEGEN)) {
SpoofCompiler.PLAN_CACHE_POLICY = PlanCachePolicy.get(dmlconf.getBooleanValue(DMLConfig.CODEGEN_PLANCACHE), dmlconf.getIntValue(DMLConfig.CODEGEN_LITERALS) == 2);
SpoofCompiler.setExecTypeSpecificJavaCompiler();
if (SpoofCompiler.INTEGRATION == IntegrationType.HOPS)
dmlt.codgenHopsDAG(prog);
}
//Step 6: construct lops (incl exec type and op selection)
dmlt.constructLops(prog);
if (LOG.isDebugEnabled()) {
LOG.debug("\n********************** LOPS DAG *******************");
dmlt.printLops(prog);
dmlt.resetLopsDAGVisitStatus(prog);
}
//Step 7: generate runtime program
Program rtprog = prog.getRuntimeProgram(dmlconf);
//Step 7.1: Generate code for the rewritten Hop dags w/o modify
if (dmlconf.getBooleanValue(DMLConfig.CODEGEN) && SpoofCompiler.INTEGRATION == IntegrationType.RUNTIME) {
dmlt.codgenHopsDAG(rtprog);
}
//Step 8: [optional global data flow optimization]
if (OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY)) {
LOG.warn("Optimization level '" + OptimizationLevel.O4_GLOBAL_TIME_MEMORY + "' " + "is still in experimental state and not intended for production use.");
rtprog = GlobalOptimizerWrapper.optimizeProgram(prog, rtprog);
}
//launch SystemML appmaster (if requested and not already in launched AM)
if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
if (!isActiveAM() && DMLYarnClientProxy.launchDMLYarnAppmaster(dmlScriptStr, dmlconf, allArgs, rtprog))
//if AM launch unsuccessful, fall back to normal execute
return;
if (//in AM context (not failed AM launch)
isActiveAM())
DMLAppMasterUtils.setupProgramMappingRemoteMaxMemory(rtprog);
}
//Step 9: prepare statistics [and optional explain output]
//count number compiled MR jobs / SP instructions
ExplainCounts counts = Explain.countDistributedOperations(rtprog);
Statistics.resetNoOfCompiledJobs(counts.numJobs);
//explain plan of program (hops or runtime)
if (EXPLAIN != ExplainType.NONE) {
LOG.info("EXPLAIN (" + EXPLAIN.toString() + "):\n" + Explain.explainMemoryBudget(counts) + "\n" + Explain.explainDegreeOfParallelism(counts) + Explain.explain(prog, rtprog, EXPLAIN));
}
Statistics.stopCompileTimer();
//double costs = CostEstimationWrapper.getTimeEstimate(rtprog, ExecutionContextFactory.createContext());
//System.out.println("Estimated costs: "+costs);
//Step 10: execute runtime program
ExecutionContext ec = null;
try {
ec = ExecutionContextFactory.createContext(rtprog);
ScriptExecutorUtils.executeRuntimeProgram(rtprog, ec, dmlconf, STATISTICS ? STATISTICS_COUNT : 0);
} finally {
if (ec != null && ec instanceof SparkExecutionContext)
((SparkExecutionContext) ec).close();
LOG.info("END DML run " + getDateTime());
//cleanup scratch_space and all working dirs
cleanupHadoopExecution(dmlconf);
}
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class DMLScript method cleanSystemMLWorkspace.
private static void cleanSystemMLWorkspace() throws DMLException {
try {
//read the default config
DMLConfig conf = DMLConfig.readConfigurationFile(null);
//run cleanup job to clean remote local tmp dirs
CleanupMR.runJob(conf);
//cleanup scratch space (on HDFS)
String scratch = conf.getTextValue(DMLConfig.SCRATCH_SPACE);
if (scratch != null)
MapReduceTool.deleteFileIfExistOnHDFS(scratch);
//cleanup local working dir
String localtmp = conf.getTextValue(DMLConfig.LOCAL_TMP_DIR);
if (localtmp != null)
LocalFileUtils.cleanupRcWorkingDirectory(localtmp);
} catch (Exception ex) {
throw new DMLException("Failed to run SystemML workspace cleanup.", ex);
}
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class MLContextUtil method setConfig.
/**
* Set SystemML configuration properties based on a configuration file.
*
* @param configFilePath
* Path to configuration file.
* @throws MLContextException
* if configuration file was not found or a parse exception
* occurred
*/
public static void setConfig(String configFilePath) {
try {
DMLConfig config = new DMLConfig(configFilePath);
ConfigurationManager.setGlobalConfig(config);
} catch (ParseException e) {
throw new MLContextException("Parse Exception when setting config", e);
} catch (FileNotFoundException e) {
throw new MLContextException("File not found (" + configFilePath + ") when setting config", e);
}
}
Aggregations