use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.
the class ProgramConverter method serializeDataObject.
public static String serializeDataObject(String key, Data dat) {
// SCHEMA: <name>|<datatype>|<valuetype>|value
// (scalars are serialize by value, matrices by filename)
StringBuilder sb = new StringBuilder();
// prepare data for serialization
String name = key;
DataType datatype = dat.getDataType();
ValueType valuetype = dat.getValueType();
String value = null;
String[] matrixMetaData = null;
switch(datatype) {
case SCALAR:
ScalarObject so = (ScalarObject) dat;
// name = so.getName();
value = so.getStringValue();
break;
case MATRIX:
MatrixObject mo = (MatrixObject) dat;
MetaDataFormat md = (MetaDataFormat) dat.getMetaData();
MatrixCharacteristics mc = md.getMatrixCharacteristics();
value = mo.getFileName();
PartitionFormat partFormat = (mo.getPartitionFormat() != null) ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize()) : PartitionFormat.NONE;
matrixMetaData = new String[9];
matrixMetaData[0] = String.valueOf(mc.getRows());
matrixMetaData[1] = String.valueOf(mc.getCols());
matrixMetaData[2] = String.valueOf(mc.getRowsPerBlock());
matrixMetaData[3] = String.valueOf(mc.getColsPerBlock());
matrixMetaData[4] = String.valueOf(mc.getNonZeros());
matrixMetaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
matrixMetaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
matrixMetaData[7] = String.valueOf(partFormat);
matrixMetaData[8] = String.valueOf(mo.getUpdateType());
break;
default:
throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
}
// serialize data
sb.append(name);
sb.append(DATA_FIELD_DELIM);
sb.append(datatype);
sb.append(DATA_FIELD_DELIM);
sb.append(valuetype);
sb.append(DATA_FIELD_DELIM);
sb.append(value);
if (matrixMetaData != null)
for (int i = 0; i < matrixMetaData.length; i++) {
sb.append(DATA_FIELD_DELIM);
sb.append(matrixMetaData[i]);
}
return sb.toString();
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.
the class RemoteDPParWorkerReducer method configure.
@Override
public void configure(JobConf job) {
// Step 1: configure data partitioning information
_dpf = MRJobConfiguration.getPartitioningFormat(job);
MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
PartitionFormat pf = new PartitionFormat(_dpf, MRJobConfiguration.getPartitioningSizeN(job));
_rlen = (int) pf.getNumRows(mc);
_clen = (int) pf.getNumColumns(mc);
_brlen = mc.getRowsPerBlock();
_bclen = mc.getColsPerBlock();
_iterVar = MRJobConfiguration.getPartitioningItervar(job);
_inputVar = MRJobConfiguration.getPartitioningMatrixvar(job);
_info = MRJobConfiguration.getPartitioningOutputInfo(job);
_tSparseCol = MRJobConfiguration.getPartitioningTransposedCol(job);
if (_tSparseCol)
_partition = new MatrixBlock((int) _clen, _rlen, true);
else
_partition = new MatrixBlock((int) _rlen, _clen, false);
// Step 1: configure parworker
String taskID = job.get(MRConfigurationNames.MR_TASK_ID);
LOG.trace("configure RemoteDPParWorkerReducer " + taskID);
try {
_stringID = taskID;
// int task ID
_workerID = IDHandler.extractIntID(_stringID);
// in the context of mr jobs (for example this config points to local fs instead of hdfs by default).
if (!InfrastructureAnalyzer.isLocalMode(job)) {
ConfigurationManager.setCachedJobConf(job);
}
// create local runtime program
String in = MRJobConfiguration.getProgramBlocks(job);
ParForBody body = ProgramConverter.parseParForBody(in, (int) _workerID);
_childBlocks = body.getChildBlocks();
_ec = body.getEc();
_resultVars = body.getResultVariables();
// init local cache manager
if (!CacheableData.isCachingActive()) {
String uuid = IDHandler.createDistributedUniqueID();
LocalFileUtils.createWorkingDirectoryWithUUID(uuid);
// incl activation, cache dir creation (each map task gets its own dir for simplified cleanup)
CacheableData.initCaching(uuid);
}
if (!CacheableData.cacheEvictionLocalFilePrefix.contains("_")) {
// account for local mode
CacheableData.cacheEvictionLocalFilePrefix = CacheableData.cacheEvictionLocalFilePrefix + "_" + _workerID;
}
// ensure that resultvar files are not removed
super.pinResultVariables();
// enable/disable caching (if required)
boolean cpCaching = MRJobConfiguration.getParforCachingConfig(job);
if (!cpCaching)
CacheableData.disableCaching();
_numTasks = 0;
_numIters = 0;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
// disable parfor stat monitoring, reporting execution times via counters not useful
StatisticMonitor.disableStatMonitoring();
// always reset stats because counters per map task (for case of JVM reuse)
if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode(job))
Statistics.reset();
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.
the class OptimizerConstrained method optimize.
/**
* Main optimization procedure.
*
* Transformation-based heuristic (rule-based) optimization
* (no use of sb, direct change of pb).
*/
@Override
public boolean optimize(ParForStatementBlock sb, ParForProgramBlock pb, OptTree plan, CostEstimator est, ExecutionContext ec) {
LOG.debug("--- " + getOptMode() + " OPTIMIZER -------");
OptNode pn = plan.getRoot();
// early abort for empty parfor body
if (pn.isLeaf())
return true;
// ANALYZE infrastructure properties
super.analyzeProblemAndInfrastructure(pn);
_cost = est;
// debug and warnings output
LOG.debug(getOptMode() + " OPT: Optimize with local_max_mem=" + toMB(_lm) + " and remote_max_mem=" + toMB(_rm) + ").");
if (_rnk <= 0 || _rk <= 0)
LOG.warn(getOptMode() + " OPT: Optimize for inactive cluster (num_nodes=" + _rnk + ", num_map_slots=" + _rk + ").");
// ESTIMATE memory consumption
ExecType oldET = pn.getExecType();
int oldK = pn.getK();
// for basic mem consumption
pn.setSerialParFor();
double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
pn.setExecType(oldET);
pn.setK(oldK);
LOG.debug(getOptMode() + " OPT: estimated mem (serial exec) M=" + toMB(M0a));
// OPTIMIZE PARFOR PLAN
// rewrite 1: data partitioning (incl. log. recompile RIX)
HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>();
rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget());
// reestimate
double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
// rewrite 2: remove unnecessary compare matrix
rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
// rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX)
boolean flagLIX = super.rewriteSetResultPartitioning(pn, M0b, ec.getVariables());
// reestimate
double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec) M=" + toMB(M1));
// determine memory consumption for what-if: all-cp or partitioned
double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec, all CP) M=" + toMB(M2));
double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
LOG.debug(getOptMode() + " OPT: estimated new mem (cond partitioning) M=" + toMB(M3));
// rewrite 4: execution strategy
// keep old
PExecMode tmpmode = getPExecMode(pn);
boolean flagRecompMR = rewriteSetExecutionStategy(pn, M0a, M1, M2, M3, flagLIX);
// exec-type-specific rewrites
if (pn.getExecType() == getRemoteExecType()) {
if (M1 > _rm && M3 <= _rm) {
// rewrite 1: data partitioning (apply conditional partitioning)
rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, M3);
// reestimate
M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
}
if (flagRecompMR) {
// rewrite 5: set operations exec type
rewriteSetOperationsExecType(pn, flagRecompMR);
// reestimate
M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
}
// rewrite 6: data colocation
super.rewriteDataColocation(pn, ec.getVariables());
// rewrite 7: rewrite set partition replication factor
super.rewriteSetPartitionReplicationFactor(pn, partitionedMatrices, ec.getVariables());
// rewrite 8: rewrite set partition replication factor
super.rewriteSetExportReplicationFactor(pn, ec.getVariables());
// rewrite 10: determine parallelism
rewriteSetDegreeOfParallelism(pn, M1, false);
// rewrite 11: task partitioning
rewriteSetTaskPartitioner(pn, false, flagLIX);
// rewrite 12: fused data partitioning and execution
rewriteSetFusedDataPartitioningExecution(pn, M1, flagLIX, partitionedMatrices, ec.getVariables(), tmpmode);
// rewrite 13: transpose sparse vector operations
super.rewriteSetTranposeSparseVectorOperations(pn, partitionedMatrices, ec.getVariables());
// rewrite 14:
HashSet<ResultVar> inplaceResultVars = new HashSet<>();
super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
// rewrite 15:
super.rewriteDisableCPCaching(pn, inplaceResultVars, ec.getVariables());
} else // if( pn.getExecType() == ExecType.CP )
{
// rewrite 10: determine parallelism
rewriteSetDegreeOfParallelism(pn, M1, false);
// rewrite 11: task partitioning
// flagLIX always false
rewriteSetTaskPartitioner(pn, false, false);
// rewrite 14: set in-place result indexing
HashSet<ResultVar> inplaceResultVars = new HashSet<>();
super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
if (!OptimizerUtils.isSparkExecutionMode()) {
// rewrite 16: runtime piggybacking
super.rewriteEnableRuntimePiggybacking(pn, ec.getVariables(), partitionedMatrices);
} else {
// rewrite 17: checkpoint injection for parfor loop body
super.rewriteInjectSparkLoopCheckpointing(pn);
// rewrite 18: repartition read-only inputs for zipmm
super.rewriteInjectSparkRepartition(pn, ec.getVariables());
// rewrite 19: eager caching for checkpoint rdds
super.rewriteSetSparkEagerRDDCaching(pn, ec.getVariables());
}
}
// rewrite 20: set result merge
rewriteSetResultMerge(pn, ec.getVariables(), true);
// rewrite 21: set local recompile memory budget
super.rewriteSetRecompileMemoryBudget(pn);
// /////
// Final rewrites for cleanup / minor improvements
// rewrite 22: parfor (in recursive functions) to for
super.rewriteRemoveRecursiveParFor(pn, ec.getVariables());
// rewrite 23: parfor (par=1) to for
super.rewriteRemoveUnnecessaryParFor(pn);
// info optimization result
_numEvaluatedPlans = 1;
return true;
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.
the class DataPartitionMR method processPartitionInstructions.
private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
int i = 0;
for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
// long begin = System.currentTimeMillis();
String[] parts = InstructionUtils.getInstructionParts(inst);
int input_index = Integer.parseInt(parts[1]);
int output_index = Integer.parseInt(parts[2]);
MatrixObject in = inputMatrices[input_index];
MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
long rlen = in.getNumRows();
long clen = in.getNumColumns();
long brlen = in.getNumRowsPerBlock();
long bclen = in.getNumColumnsPerBlock();
long N = -1;
switch(pformat) {
case ROW_BLOCK_WISE_N:
{
long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
N = numRowBlocks * brlen;
break;
}
case COLUMN_BLOCK_WISE_N:
{
long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
N = numColBlocks * bclen;
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
}
PartitionFormat pf = new PartitionFormat(pformat, (int) N);
DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
out = dpart.createPartitionedMatrixObject(in, out, true);
sts[i] = out.getMatrixCharacteristics();
i++;
}
}
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.
the class ProgramConverter method parseDataObject.
/**
* NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
* related symbol tables can be hierarchically structured.
*
* @param in data object as string
* @return array of objects
*/
public static Object[] parseDataObject(String in) {
Object[] ret = new Object[2];
StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
String name = st.nextToken();
DataType datatype = DataType.valueOf(st.nextToken());
ValueType valuetype = ValueType.valueOf(st.nextToken());
String valString = st.hasMoreTokens() ? st.nextToken() : "";
Data dat = null;
switch(datatype) {
case SCALAR:
{
switch(valuetype) {
case INT:
dat = new IntObject(Long.parseLong(valString));
break;
case DOUBLE:
dat = new DoubleObject(Double.parseDouble(valString));
break;
case BOOLEAN:
dat = new BooleanObject(Boolean.parseBoolean(valString));
break;
case STRING:
dat = new StringObject(valString);
break;
default:
throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
}
break;
}
case MATRIX:
{
MatrixObject mo = new MatrixObject(valuetype, valString);
long rows = Long.parseLong(st.nextToken());
long cols = Long.parseLong(st.nextToken());
int brows = Integer.parseInt(st.nextToken());
int bcols = Integer.parseInt(st.nextToken());
long nnz = Long.parseLong(st.nextToken());
InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
UpdateType inplace = UpdateType.valueOf(st.nextToken());
MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
mo.setMetaData(md);
if (partFormat._dpf != PDataPartitionFormat.NONE)
mo.setPartitioned(partFormat._dpf, partFormat._N);
mo.setUpdateType(inplace);
dat = mo;
break;
}
default:
throw new DMLRuntimeException("Unable to parse datatype " + datatype);
}
ret[0] = name;
ret[1] = dat;
return ret;
}
Aggregations