use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project systemml by apache.
the class DataPartitionerRemoteMapper method configure.
@Override
public void configure(JobConf job) {
MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
InputInfo ii = MRJobConfiguration.getPartitioningInputInfo(job);
OutputInfo oi = MRJobConfiguration.getPartitioningOutputInfo(job);
PDataPartitionFormat pdf = MRJobConfiguration.getPartitioningFormat(job);
int n = MRJobConfiguration.getPartitioningSizeN(job);
boolean keepIndexes = MRJobConfiguration.getPartitioningIndexFlag(job);
if (ii == InputInfo.TextCellInputInfo)
_mapper = new DataPartitionerMapperTextcell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
else if (ii == InputInfo.BinaryCellInputInfo)
_mapper = new DataPartitionerMapperBinarycell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
else if (ii == InputInfo.BinaryBlockInputInfo) {
if (oi == OutputInfo.BinaryBlockOutputInfo)
_mapper = new DataPartitionerMapperBinaryblock(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes);
else if (oi == OutputInfo.BinaryCellOutputInfo) {
// fused parfor
boolean outputEmpty = MRJobConfiguration.getProgramBlocks(job) != null;
_mapper = new DataPartitionerMapperBinaryblock2Binarycell(job, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes, outputEmpty);
} else
throw new RuntimeException("Partitioning from '" + ii + "' to '" + oi + "' not supported");
} else
throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project systemml by apache.
the class DataPartitionCPInstruction method parseInstruction.
public static DataPartitionCPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
InstructionUtils.checkNumFields(parts, 3);
String opcode = parts[0];
CPOperand in1 = new CPOperand(parts[1]);
CPOperand out = new CPOperand(parts[2]);
PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
if (!opcode.equalsIgnoreCase("partition"))
throw new DMLRuntimeException("Unknown opcode while parsing an DataPartitionCPInstruction: " + str);
else
return new DataPartitionCPInstruction(new Operator(true), in1, pformat, out, opcode, str);
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project incubator-systemml by apache.
the class RemoteParForColocatedNLineInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
InputSplit[] tmp = super.getSplits(job, numSplits);
// get partitioning information
MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
PDataPartitionFormat dpf = MRJobConfiguration.getPartitioningFormat(job);
PartitionFormat pf = new PartitionFormat(dpf, -1);
int blen = (int) (pf.isRowwise() ? pf.getNumRows(mc) : pf.getNumColumns(mc));
String fname = MRJobConfiguration.getPartitioningFilename(job);
// create wrapper splits
InputSplit[] ret = new InputSplit[tmp.length];
for (int i = 0; i < tmp.length; i++) {
// check for robustness of subsequent cast
if (tmp[i] instanceof FileSplit)
ret[i] = new RemoteParForColocatedFileSplit((FileSplit) tmp[i], fname, blen);
else
ret[i] = tmp[i];
}
return ret;
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project systemml by apache.
the class MRBaseForCommonInstructions method setupDistCacheFiles.
protected void setupDistCacheFiles(JobConf job) throws IOException {
if (MRJobConfiguration.getDistCacheInputIndices(job) == null)
return;
// boolean isJobLocal = false;
isJobLocal = InfrastructureAnalyzer.isLocalMode(job);
String[] inputIndices = MRJobConfiguration.getInputPaths(job);
String[] dcIndices = MRJobConfiguration.getDistCacheInputIndices(job).split(Instruction.INSTRUCTION_DELIM);
Path[] dcFiles = DistributedCache.getLocalCacheFiles(job);
PDataPartitionFormat[] inputPartitionFormats = MRJobConfiguration.getInputPartitionFormats(job);
DistributedCacheInput[] dcInputs = new DistributedCacheInput[dcIndices.length];
for (int i = 0; i < dcIndices.length; i++) {
byte inputIndex = Byte.parseByte(dcIndices[i]);
// load if not already present (jvm reuse)
if (!dcValues.containsKey(inputIndex)) {
// When the job is in local mode, files can be read from HDFS directly -- use
// input paths as opposed to "local" paths prepared by DistributedCache.
Path p = null;
if (isJobLocal)
p = new Path(inputIndices[Byte.parseByte(dcIndices[i])]);
else
p = dcFiles[i];
dcInputs[i] = new DistributedCacheInput(p, // rlens[inputIndex],
MRJobConfiguration.getNumRows(job, inputIndex), // clens[inputIndex],
MRJobConfiguration.getNumColumns(job, inputIndex), // brlens[inputIndex],
MRJobConfiguration.getNumRowsPerBlock(job, inputIndex), // bclens[inputIndex],
MRJobConfiguration.getNumColumnsPerBlock(job, inputIndex), inputPartitionFormats[inputIndex]);
dcValues.put(inputIndex, dcInputs[i]);
}
}
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project systemml by apache.
the class DataPartitionMR method processPartitionInstructions.
private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
int i = 0;
for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
// long begin = System.currentTimeMillis();
String[] parts = InstructionUtils.getInstructionParts(inst);
int input_index = Integer.parseInt(parts[1]);
int output_index = Integer.parseInt(parts[2]);
MatrixObject in = inputMatrices[input_index];
MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
long rlen = in.getNumRows();
long clen = in.getNumColumns();
long brlen = in.getNumRowsPerBlock();
long bclen = in.getNumColumnsPerBlock();
long N = -1;
switch(pformat) {
case ROW_BLOCK_WISE_N:
{
long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
N = numRowBlocks * brlen;
break;
}
case COLUMN_BLOCK_WISE_N:
{
long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
N = numColBlocks * bclen;
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
}
PartitionFormat pf = new PartitionFormat(pformat, (int) N);
DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
out = dpart.createPartitionedMatrixObject(in, out, true);
sts[i] = out.getMatrixCharacteristics();
i++;
}
}
}
Aggregations