use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class CMCOVMR method runJob.
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String cmNcomInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception {
JobConf job = new JobConf(CMCOVMR.class);
job.setJobName("CM-COV-MR");
//whether use block representation or cell representation
MRJobConfiguration.setMatrixValueClassForCM_N_COM(job, true);
//added for handling recordreader instruction
String[] realinputs = inputs;
InputInfo[] realinputInfos = inputInfos;
long[] realrlens = rlens;
long[] realclens = clens;
int[] realbrlens = brlens;
int[] realbclens = bclens;
byte[] realIndexes = new byte[inputs.length];
for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
//set up the input files and their format information
MRJobConfiguration.setUpMultipleInputs(job, realIndexes, realinputs, realinputInfos, realbrlens, realbclens, true, ConvertTarget.WEIGHTEDCELL);
//set up the dimensions of input matrices
MRJobConfiguration.setMatricesDimensions(job, realIndexes, realrlens, realclens);
//set up the block size
MRJobConfiguration.setBlocksSizes(job, realIndexes, realbrlens, realbclens);
//set up unary instructions that will perform in the mapper
MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
//set up the aggregate instructions that will happen in the combiner and reducer
MRJobConfiguration.setCM_N_COMInstructions(job, cmNcomInstructions);
//set up the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
//set up custom map/reduce configurations
DMLConfig config = ConfigurationManager.getDMLConfig();
MRJobConfiguration.setupCustomMRConfigurations(job, config);
//set up what matrices are needed to pass from the mapper to reducer
HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, null, cmNcomInstructions, resultIndexes);
//set up the multiple output files, and their format information
MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, new byte[resultIndexes.length], outputs, outputInfos, false);
// configure mapper and the mapper output key value pairs
job.setMapperClass(CMCOVMRMapper.class);
job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
job.setMapOutputValueClass(CM_N_COVCell.class);
job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
job.setPartitionerClass(TaggedFirstSecondIndexes.TagPartitioner.class);
//configure reducer
job.setReducerClass(CMCOVMRReducer.class);
//job.setReducerClass(PassThroughReducer.class);
MatrixCharacteristics[] stats = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, null, null, cmNcomInstructions, resultIndexes, mapoutputIndexes, false).stats;
//set up the number of reducers
//each output tag is a group
MRJobConfiguration.setNumReducers(job, mapoutputIndexes.size(), numReducers);
// Print the complete instruction
if (LOG.isTraceEnabled())
inst.printCompleteMRJobInstruction(stats);
// By default, the job executes in "cluster" mode.
// Determine if we can optimize and run it in "local" mode.
MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
for (int i = 0; i < inputs.length; i++) {
inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
}
//set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
RunningJob runjob = JobClient.runJob(job);
return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class MMCJMR method commonSetup.
private static MatrixCharacteristics[] commonSetup(JobConf job, boolean inBlockRepresentation, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrction, int numReducers, int replication, byte resultDimsUnknown, String output, OutputInfo outputinfo) throws Exception {
job.setJobName("MMCJ-MR");
if (numReducers <= 0)
throw new Exception("MMCJ-MR has to have at least one reduce task!");
//whether use block representation or cell representation
MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation);
byte[] realIndexes = new byte[inputs.length];
for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
//set up the input files and their format information
MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, inBlockRepresentation ? ConvertTarget.BLOCK : ConvertTarget.CELL);
//set up the dimensions of input matrices
MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
//set up the block size
MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
//set up unary instructions that will perform in the mapper
MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
//set up the aggregate instructions that will happen in the combiner and reducer
MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer);
//set up the aggregate binary operation for the mmcj job
MRJobConfiguration.setAggregateBinaryInstructions(job, aggBinInstrction);
//set up the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
//set up preferred custom serialization framework for binary block format
if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
MRJobConfiguration.addBinaryBlockSerializationFramework(job);
//set up map/reduce memory configurations (if in AM context)
DMLConfig config = ConfigurationManager.getDMLConfig();
DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
//set up custom map/reduce configurations
MRJobConfiguration.setupCustomMRConfigurations(job, config);
byte[] resultIndexes = new byte[] { MRInstructionParser.parseSingleInstruction(aggBinInstrction).output };
byte[] resultDimsUnknown_Array = new byte[] { resultDimsUnknown };
// byte[] resultIndexes=new byte[]{AggregateBinaryInstruction.parseMRInstruction(aggBinInstrction).output};
//set up what matrices are needed to pass from the mapper to reducer
HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, resultIndexes);
//set up the multiple output files, and their format information
MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown_Array, new String[] { output }, new OutputInfo[] { outputinfo }, inBlockRepresentation);
// configure mapper
job.setMapperClass(MMCJMRMapper.class);
job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
if (inBlockRepresentation)
job.setMapOutputValueClass(MatrixBlock.class);
else
job.setMapOutputValueClass(MatrixCell.class);
job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
job.setPartitionerClass(TaggedFirstSecondIndexes.FirstIndexPartitioner.class);
//configure combiner
//TODO: cannot set up combiner, because it will destroy the stable numerical algorithms
// for sum or for central moments
//if(aggInstructionsInReducer!=null && !aggInstructionsInReducer.isEmpty())
// job.setCombinerClass(MMCJMRCombiner.class);
MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, null, resultIndexes, mapoutputIndexes, true);
//set up the number of reducers
if (AUTOMATIC_CONFIG_NUM_REDUCERS) {
int numRed = determineNumReducers(rlens, clens, numReducers, ret.numReducerGroups);
job.setNumReduceTasks(numRed);
} else
MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);
//configure reducer
// note: the alternative MMCJMRReducer is not maintained
job.setReducerClass(MMCJMRReducerWithAggregator.class);
return ret.stats;
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class MMRJMR method runJob.
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrctions, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception {
JobConf job = new JobConf(MMRJMR.class);
job.setJobName("MMRJ-MR");
if (numReducers <= 0)
throw new Exception("MMRJ-MR has to have at least one reduce task!");
// TODO: check w/ yuanyuan. This job always runs in blocked mode, and hence derivation is not necessary.
boolean inBlockRepresentation = MRJobConfiguration.deriveRepresentation(inputInfos);
//whether use block representation or cell representation
MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation);
byte[] realIndexes = new byte[inputs.length];
for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
//set up the input files and their format information
MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, inBlockRepresentation ? ConvertTarget.BLOCK : ConvertTarget.CELL);
//set up the dimensions of input matrices
MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
//set up the block size
MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
//set up unary instructions that will perform in the mapper
MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
//set up the aggregate instructions that will happen in the combiner and reducer
MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer);
//set up the aggregate binary operation for the mmcj job
MRJobConfiguration.setAggregateBinaryInstructions(job, aggBinInstrctions);
//set up the instructions that will happen in the reducer, after the aggregation instrucions
MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);
//set up the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
//set up map/reduce memory configurations (if in AM context)
DMLConfig config = ConfigurationManager.getDMLConfig();
DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
//set up custom map/reduce configurations
MRJobConfiguration.setupCustomMRConfigurations(job, config);
// byte[] resultIndexes=new byte[]{AggregateBinaryInstruction.parseMRInstruction(aggBinInstrction).output};
//set up what matrices are needed to pass from the mapper to reducer
HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrctions, resultIndexes);
MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrctions, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false);
MatrixCharacteristics[] stats = ret.stats;
//set up the number of reducers
MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);
// Print the complete instruction
if (LOG.isTraceEnabled())
inst.printCompleteMRJobInstruction(stats);
byte[] dimsUnknown = new byte[resultIndexes.length];
for (int i = 0; i < resultIndexes.length; i++) {
if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
dimsUnknown[i] = (byte) 1;
} else {
dimsUnknown[i] = (byte) 0;
}
}
//set up the multiple output files, and their format information
MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, dimsUnknown, outputs, outputInfos, inBlockRepresentation);
// configure mapper
job.setMapperClass(MMRJMRMapper.class);
job.setMapOutputKeyClass(TripleIndexes.class);
if (inBlockRepresentation)
job.setMapOutputValueClass(TaggedMatrixBlock.class);
else
job.setMapOutputValueClass(TaggedMatrixCell.class);
job.setOutputKeyComparatorClass(TripleIndexes.Comparator.class);
job.setPartitionerClass(TripleIndexes.FirstTwoIndexesPartitioner.class);
//configure combiner
//TODO: cannot set up combiner, because it will destroy the stable numerical algorithms
// for sum or for central moments
// if(aggInstructionsInReducer!=null && !aggInstructionsInReducer.isEmpty())
// job.setCombinerClass(MMCJMRCombiner.class);
//configure reducer
job.setReducerClass(MMRJMRReducer.class);
// By default, the job executes in "cluster" mode.
// Determine if we can optimize and run it in "local" mode.
MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
for (int i = 0; i < inputs.length; i++) {
inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
}
//set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
RunningJob runjob = JobClient.runJob(job);
/* Process different counters */
Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
for (int i = 0; i < resultIndexes.length; i++) {
// number of non-zeros
stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
}
return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class AutomatedTestBase method checkForProcessLocalTemporaryDir.
/**
* <p>
* Checks if a process-local temporary directory exists
* in the current working directory.
* </p>
*
* @return true if a process-local temp directory is present.
*/
public boolean checkForProcessLocalTemporaryDir() {
try {
DMLConfig conf = new DMLConfig(getCurConfigFile().getPath());
StringBuilder sb = new StringBuilder();
sb.append(conf.getTextValue(DMLConfig.SCRATCH_SPACE));
sb.append(Lop.FILE_SEPARATOR);
sb.append(Lop.PROCESS_PREFIX);
sb.append(DMLScript.getUUID());
String pLocalDir = sb.toString();
return MapReduceTool.existsFileOnHDFS(pLocalDir);
} catch (Exception ex) {
ex.printStackTrace();
return true;
}
}
use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.
the class AutomatedTestBase method cleanupScratchSpace.
public void cleanupScratchSpace() {
try {
//parse config file
DMLConfig conf = new DMLConfig(getCurConfigFile().getPath());
// delete the scratch_space and all contents
// (prevent side effect between tests)
String dir = conf.getTextValue(DMLConfig.SCRATCH_SPACE);
MapReduceTool.deleteFileIfExistOnHDFS(dir);
} catch (Exception ex) {
//no effect on tests
return;
}
}
Aggregations