use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class MatrixIndexingCPFileInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
String opcode = getOpcode();
IndexRange ixrange = getIndexRange(ec).add(1);
MatrixObject mo = ec.getMatrixObject(input1.getName());
if (mo.isPartitioned() && opcode.equalsIgnoreCase(RightIndex.OPCODE)) {
MetaDataFormat meta = (MetaDataFormat) mo.getMetaData();
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
String pfname = mo.getPartitionFileName(ixrange, mc.getRowsPerBlock(), mc.getColsPerBlock());
if (MapReduceTool.existsFileOnHDFS(pfname)) {
// create output matrix object
MatrixObject mobj = new MatrixObject(mo.getValueType(), pfname);
MatrixCharacteristics mcNew = null;
switch(mo.getPartitionFormat()) {
case ROW_WISE:
mcNew = new MatrixCharacteristics(1, mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
break;
case ROW_BLOCK_WISE_N:
mcNew = new MatrixCharacteristics(mo.getPartitionSize(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock());
break;
case COLUMN_WISE:
mcNew = new MatrixCharacteristics(mc.getRows(), 1, mc.getRowsPerBlock(), mc.getColsPerBlock());
break;
case COLUMN_BLOCK_WISE_N:
mcNew = new MatrixCharacteristics(mc.getRows(), mo.getPartitionSize(), mc.getRowsPerBlock(), mc.getColsPerBlock());
break;
default:
throw new DMLRuntimeException("Unsupported partition format for CP_FILE " + RightIndex.OPCODE + ": " + mo.getPartitionFormat());
}
MetaDataFormat metaNew = new MetaDataFormat(mcNew, meta.getOutputInfo(), meta.getInputInfo());
mobj.setMetaData(metaNew);
// put output object into symbol table
ec.setVariable(output.getName(), mobj);
} else {
// will return an empty matrix partition
MatrixBlock resultBlock = mo.readMatrixPartition(ixrange);
ec.setMatrixOutput(output.getName(), resultBlock, getExtendedOpcode());
}
} else {
throw new DMLRuntimeException("Invalid opcode or index predicate for MatrixIndexingCPFileInstruction: " + instString);
}
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetSparkEagerRDDCaching.
// /////
// REWRITE set spark eager rdd caching
// /
protected void rewriteSetSparkEagerRDDCaching(OptNode n, LocalVariableMap vars) {
// get program blocks of root parfor
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
ArrayList<String> ret = new ArrayList<>();
if (// spark exec mode
OptimizerUtils.isSparkExecutionMode() && // local parfor
n.getExecType() == ExecType.CP && // at least 2 iterations
_N > 1) {
Set<String> cand = pfsb.variablesRead().getVariableNames();
Collection<String> rpVars = pfpb.getSparkRepartitionVariables();
for (String var : cand) {
Data dat = vars.get(var);
if (dat != null && dat instanceof MatrixObject && ((MatrixObject) dat).getRDDHandle() != null) {
MatrixObject mo = (MatrixObject) dat;
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
RDDObject rdd = mo.getRDDHandle();
if (// not a repartition var
(rpVars == null || !rpVars.contains(var)) && // is cached rdd
rdd.rHasCheckpointRDDChilds() && // is out-of-core dataset
_lm / n.getK() < OptimizerUtils.estimateSizeExactSparsity(mc)) {
ret.add(var);
}
}
}
// apply rewrite to parfor pb
if (!ret.isEmpty()) {
pfpb.setSparkEagerCacheVariables(ret);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set spark eager rdd caching' - result=" + ret.size() + " (" + ProgramConverter.serializeStringCollection(ret) + ")");
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class DataPartitioner method createPartitionedMatrixObject.
/**
* Creates a partitioned matrix object based on the given input matrix object,
* according to the specified split format. The input matrix can be in-memory
* or still on HDFS and the partitioned output matrix is written to HDFS. The
* created matrix object can be used transparently for obtaining the full matrix
* or reading 1 or multiple partitions based on given index ranges.
*
* @param in input matrix object
* @param out output matrix object
* @param force if false, try to optimize
* @return partitioned matrix object
*/
public MatrixObject createPartitionedMatrixObject(MatrixObject in, MatrixObject out, boolean force) {
// check for naive partitioning
if (_format == PDataPartitionFormat.NONE)
return in;
// analyze input matrix object
MetaDataFormat meta = (MetaDataFormat) in.getMetaData();
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
InputInfo ii = meta.getInputInfo();
OutputInfo oi = meta.getOutputInfo();
long rows = mc.getRows();
long cols = mc.getCols();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
long nonZeros = mc.getNonZeros();
double sparsity = mc.dimsKnown(true) ? ((double) nonZeros) / (rows * cols) : 1.0;
if (// try to optimize, if format not forced
!force) {
// check lower bound of useful data partitioning
if (// or matrix already fits in mem
rows < Hop.CPThreshold && cols < Hop.CPThreshold) {
return in;
}
// check for changing to blockwise representations
if (_format == PDataPartitionFormat.ROW_WISE && cols < Hop.CPThreshold) {
LOG.debug("Changing format from " + PDataPartitionFormat.ROW_WISE + " to " + PDataPartitionFormat.ROW_BLOCK_WISE + ".");
_format = PDataPartitionFormat.ROW_BLOCK_WISE;
}
if (_format == PDataPartitionFormat.COLUMN_WISE && rows < Hop.CPThreshold) {
LOG.debug("Changing format from " + PDataPartitionFormat.COLUMN_WISE + " to " + PDataPartitionFormat.ROW_BLOCK_WISE + ".");
_format = PDataPartitionFormat.COLUMN_BLOCK_WISE;
}
// _format = PDataPartitionFormat.ROW_BLOCK_WISE_N;
}
// check changing to binarycell in case of sparse cols (robustness)
boolean convertBlock2Cell = false;
if (ii == InputInfo.BinaryBlockInputInfo && _allowBinarycell && _format == PDataPartitionFormat.COLUMN_WISE && sparsity < SPARSITY_CELL_THRESHOLD) {
LOG.debug("Changing partition outputinfo from binaryblock to binarycell due to sparsity=" + sparsity);
oi = OutputInfo.BinaryCellOutputInfo;
convertBlock2Cell = true;
}
// prepare filenames and cleanup if required
String fnameNew = out.getFileName();
try {
MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// core partitioning (depending on subclass)
partitionMatrix(in, fnameNew, ii, oi, rows, cols, brlen, bclen);
// create output matrix object
out.setPartitioned(_format, _n);
MatrixCharacteristics mcNew = new MatrixCharacteristics(rows, cols, (int) brlen, (int) bclen);
mcNew.setNonZeros(nonZeros);
if (convertBlock2Cell)
ii = InputInfo.BinaryCellInputInfo;
MetaDataFormat metaNew = new MetaDataFormat(mcNew, oi, ii);
out.setMetaData(metaNew);
return out;
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class DataPartitionerRemoteMapper method configure.
@Override
public void configure(JobConf job) {
MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
InputInfo ii = MRJobConfiguration.getPartitioningInputInfo(job);
OutputInfo oi = MRJobConfiguration.getPartitioningOutputInfo(job);
PDataPartitionFormat pdf = MRJobConfiguration.getPartitioningFormat(job);
int n = MRJobConfiguration.getPartitioningSizeN(job);
boolean keepIndexes = MRJobConfiguration.getPartitioningIndexFlag(job);
if (ii == InputInfo.TextCellInputInfo)
_mapper = new DataPartitionerMapperTextcell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
else if (ii == InputInfo.BinaryCellInputInfo)
_mapper = new DataPartitionerMapperBinarycell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
else if (ii == InputInfo.BinaryBlockInputInfo) {
if (oi == OutputInfo.BinaryBlockOutputInfo)
_mapper = new DataPartitionerMapperBinaryblock(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes);
else if (oi == OutputInfo.BinaryCellOutputInfo) {
// fused parfor
boolean outputEmpty = MRJobConfiguration.getProgramBlocks(job) != null;
_mapper = new DataPartitionerMapperBinaryblock2Binarycell(job, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes, outputEmpty);
} else
throw new RuntimeException("Partitioning from '" + ii + "' to '" + oi + "' not supported");
} else
throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class QuaternaryInstruction method computeMatrixCharacteristics.
public void computeMatrixCharacteristics(MatrixCharacteristics mc1, MatrixCharacteristics mc2, MatrixCharacteristics mc3, MatrixCharacteristics dimOut) {
QuaternaryOperator qop = (QuaternaryOperator) optr;
if (qop.wtype1 != null || qop.wtype4 != null) {
// wsloss/wcemm
// output size independent of chain type (scalar)
dimOut.set(1, 1, mc1.getRowsPerBlock(), mc1.getColsPerBlock());
} else if (qop.wtype2 != null || qop.wtype5 != null) {
// wsigmoid/wumm
// output size determined by main input
dimOut.set(mc1.getRows(), mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
} else if (qop.wtype3 != null) {
// wdivmm
// note: cannot directly consume mc2 or mc3 for redwdivmm because rep instruction changed
// the relevant dimensions; as a workaround the original dims are passed via nnz
boolean mapwdivmm = _cacheU && _cacheV;
long rank = qop.wtype3.isLeft() ? mapwdivmm ? mc3.getCols() : mc3.getNonZeros() : mapwdivmm ? mc2.getCols() : mc2.getNonZeros();
MatrixCharacteristics mcTmp = qop.wtype3.computeOutputCharacteristics(mc1.getRows(), mc1.getCols(), rank);
dimOut.set(mcTmp.getRows(), mcTmp.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
}
}
Aggregations