Search in sources :

Example 16 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class CMCOVMR method runJob.

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String cmNcomInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception {
    JobConf job = new JobConf(CMCOVMR.class);
    job.setJobName("CM-COV-MR");
    //whether use block representation or cell representation
    MRJobConfiguration.setMatrixValueClassForCM_N_COM(job, true);
    //added for handling recordreader instruction
    String[] realinputs = inputs;
    InputInfo[] realinputInfos = inputInfos;
    long[] realrlens = rlens;
    long[] realclens = clens;
    int[] realbrlens = brlens;
    int[] realbclens = bclens;
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, realinputs, realinputInfos, realbrlens, realbclens, true, ConvertTarget.WEIGHTEDCELL);
    //set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, realrlens, realclens);
    //set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, realbrlens, realbclens);
    //set up unary instructions that will perform in the mapper
    MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setCM_N_COMInstructions(job, cmNcomInstructions);
    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set up custom map/reduce configurations 
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, null, cmNcomInstructions, resultIndexes);
    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, new byte[resultIndexes.length], outputs, outputInfos, false);
    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CMCOVMRMapper.class);
    job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
    job.setMapOutputValueClass(CM_N_COVCell.class);
    job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
    job.setPartitionerClass(TaggedFirstSecondIndexes.TagPartitioner.class);
    //configure reducer
    job.setReducerClass(CMCOVMRReducer.class);
    //job.setReducerClass(PassThroughReducer.class);
    MatrixCharacteristics[] stats = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, null, null, cmNcomInstructions, resultIndexes, mapoutputIndexes, false).stats;
    //set up the number of reducers
    //each output tag is a group
    MRJobConfiguration.setNumReducers(job, mapoutputIndexes.size(), numReducers);
    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(stats);
    // By default, the job executes in "cluster" mode.
    // Determine if we can optimize and run it in "local" mode.
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    RunningJob runjob = JobClient.runJob(job);
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
Also used : DMLConfig(org.apache.sysml.conf.DMLConfig) TaggedFirstSecondIndexes(org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 17 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class MMCJMR method commonSetup.

private static MatrixCharacteristics[] commonSetup(JobConf job, boolean inBlockRepresentation, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrction, int numReducers, int replication, byte resultDimsUnknown, String output, OutputInfo outputinfo) throws Exception {
    job.setJobName("MMCJ-MR");
    if (numReducers <= 0)
        throw new Exception("MMCJ-MR has to have at least one reduce task!");
    //whether use block representation or cell representation
    MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation);
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, inBlockRepresentation ? ConvertTarget.BLOCK : ConvertTarget.CELL);
    //set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
    //set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
    //set up unary instructions that will perform in the mapper
    MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer);
    //set up the aggregate binary operation for the mmcj job
    MRJobConfiguration.setAggregateBinaryInstructions(job, aggBinInstrction);
    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);
    //set up map/reduce memory configurations (if in AM context)
    DMLConfig config = ConfigurationManager.getDMLConfig();
    DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
    //set up custom map/reduce configurations 
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    byte[] resultIndexes = new byte[] { MRInstructionParser.parseSingleInstruction(aggBinInstrction).output };
    byte[] resultDimsUnknown_Array = new byte[] { resultDimsUnknown };
    // byte[] resultIndexes=new byte[]{AggregateBinaryInstruction.parseMRInstruction(aggBinInstrction).output};
    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, resultIndexes);
    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown_Array, new String[] { output }, new OutputInfo[] { outputinfo }, inBlockRepresentation);
    // configure mapper
    job.setMapperClass(MMCJMRMapper.class);
    job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
    if (inBlockRepresentation)
        job.setMapOutputValueClass(MatrixBlock.class);
    else
        job.setMapOutputValueClass(MatrixCell.class);
    job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
    job.setPartitionerClass(TaggedFirstSecondIndexes.FirstIndexPartitioner.class);
    //configure combiner
    //TODO: cannot set up combiner, because it will destroy the stable numerical algorithms 
    // for sum or for central moments 
    //if(aggInstructionsInReducer!=null && !aggInstructionsInReducer.isEmpty())
    //	job.setCombinerClass(MMCJMRCombiner.class);
    MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, null, resultIndexes, mapoutputIndexes, true);
    //set up the number of reducers
    if (AUTOMATIC_CONFIG_NUM_REDUCERS) {
        int numRed = determineNumReducers(rlens, clens, numReducers, ret.numReducerGroups);
        job.setNumReduceTasks(numRed);
    } else
        MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);
    //configure reducer
    // note: the alternative MMCJMRReducer is not maintained
    job.setReducerClass(MMCJMRReducerWithAggregator.class);
    return ret.stats;
}
Also used : MatrixChar_N_ReducerGroups(org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DMLConfig(org.apache.sysml.conf.DMLConfig) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) TaggedFirstSecondIndexes(org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes)

Example 18 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class MMRJMR method runJob.

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrctions, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception {
    JobConf job = new JobConf(MMRJMR.class);
    job.setJobName("MMRJ-MR");
    if (numReducers <= 0)
        throw new Exception("MMRJ-MR has to have at least one reduce task!");
    // TODO: check w/ yuanyuan. This job always runs in blocked mode, and hence derivation is not necessary.
    boolean inBlockRepresentation = MRJobConfiguration.deriveRepresentation(inputInfos);
    //whether use block representation or cell representation
    MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation);
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, inBlockRepresentation ? ConvertTarget.BLOCK : ConvertTarget.CELL);
    //set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
    //set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
    //set up unary instructions that will perform in the mapper
    MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer);
    //set up the aggregate binary operation for the mmcj job
    MRJobConfiguration.setAggregateBinaryInstructions(job, aggBinInstrctions);
    //set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);
    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set up map/reduce memory configurations (if in AM context)
    DMLConfig config = ConfigurationManager.getDMLConfig();
    DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
    //set up custom map/reduce configurations 
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    // byte[] resultIndexes=new byte[]{AggregateBinaryInstruction.parseMRInstruction(aggBinInstrction).output};
    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrctions, resultIndexes);
    MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, aggInstructionsInReducer, aggBinInstrctions, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false);
    MatrixCharacteristics[] stats = ret.stats;
    //set up the number of reducers
    MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);
    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(stats);
    byte[] dimsUnknown = new byte[resultIndexes.length];
    for (int i = 0; i < resultIndexes.length; i++) {
        if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
            dimsUnknown[i] = (byte) 1;
        } else {
            dimsUnknown[i] = (byte) 0;
        }
    }
    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, dimsUnknown, outputs, outputInfos, inBlockRepresentation);
    // configure mapper
    job.setMapperClass(MMRJMRMapper.class);
    job.setMapOutputKeyClass(TripleIndexes.class);
    if (inBlockRepresentation)
        job.setMapOutputValueClass(TaggedMatrixBlock.class);
    else
        job.setMapOutputValueClass(TaggedMatrixCell.class);
    job.setOutputKeyComparatorClass(TripleIndexes.Comparator.class);
    job.setPartitionerClass(TripleIndexes.FirstTwoIndexesPartitioner.class);
    //configure combiner
    //TODO: cannot set up combiner, because it will destroy the stable numerical algorithms 
    // for sum or for central moments 
    //	if(aggInstructionsInReducer!=null && !aggInstructionsInReducer.isEmpty())
    //		job.setCombinerClass(MMCJMRCombiner.class);
    //configure reducer
    job.setReducerClass(MMRJMRReducer.class);
    // By default, the job executes in "cluster" mode.
    // Determine if we can optimize and run it in "local" mode.
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    RunningJob runjob = JobClient.runJob(job);
    /* Process different counters */
    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        // number of non-zeros
        stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
    }
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
Also used : Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) TripleIndexes(org.apache.sysml.runtime.matrix.data.TripleIndexes) TaggedMatrixBlock(org.apache.sysml.runtime.matrix.data.TaggedMatrixBlock) MatrixChar_N_ReducerGroups(org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups) TaggedMatrixCell(org.apache.sysml.runtime.matrix.data.TaggedMatrixCell) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 19 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class AutomatedTestBase method checkForProcessLocalTemporaryDir.

/**
	 * <p>
	 * Checks if a process-local temporary directory exists
	 * in the current working directory.
	 * </p>
	 *
	 * @return true if a process-local temp directory is present.
	 */
public boolean checkForProcessLocalTemporaryDir() {
    try {
        DMLConfig conf = new DMLConfig(getCurConfigFile().getPath());
        StringBuilder sb = new StringBuilder();
        sb.append(conf.getTextValue(DMLConfig.SCRATCH_SPACE));
        sb.append(Lop.FILE_SEPARATOR);
        sb.append(Lop.PROCESS_PREFIX);
        sb.append(DMLScript.getUUID());
        String pLocalDir = sb.toString();
        return MapReduceTool.existsFileOnHDFS(pLocalDir);
    } catch (Exception ex) {
        ex.printStackTrace();
        return true;
    }
}
Also used : DMLConfig(org.apache.sysml.conf.DMLConfig) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Example 20 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class AutomatedTestBase method cleanupScratchSpace.

public void cleanupScratchSpace() {
    try {
        //parse config file
        DMLConfig conf = new DMLConfig(getCurConfigFile().getPath());
        // delete the scratch_space and all contents
        // (prevent side effect between tests)
        String dir = conf.getTextValue(DMLConfig.SCRATCH_SPACE);
        MapReduceTool.deleteFileIfExistOnHDFS(dir);
    } catch (Exception ex) {
        //no effect on tests
        return;
    }
}
Also used : DMLConfig(org.apache.sysml.conf.DMLConfig) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Aggregations

DMLConfig (org.apache.sysml.conf.DMLConfig)31 JobConf (org.apache.hadoop.mapred.JobConf)17 RunningJob (org.apache.hadoop.mapred.RunningJob)13 Group (org.apache.hadoop.mapred.Counters.Group)11 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)11 Path (org.apache.hadoop.fs.Path)10 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)7 IOException (java.io.IOException)6 DMLProgram (org.apache.sysml.parser.DMLProgram)6 DMLTranslator (org.apache.sysml.parser.DMLTranslator)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 ParserWrapper (org.apache.sysml.parser.ParserWrapper)4 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)4 HashMap (java.util.HashMap)3 LanguageException (org.apache.sysml.parser.LanguageException)3 TaggedFirstSecondIndexes (org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 ArrayList (java.util.ArrayList)2 CompilerConfig (org.apache.sysml.conf.CompilerConfig)2