Search in sources :

Example 21 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class ProgramRecompiler method generatePartitialRuntimeProgram.

public static ArrayList<ProgramBlock> generatePartitialRuntimeProgram(Program rtprog, ArrayList<StatementBlock> sbs) throws LopsException, DMLRuntimeException, IOException, HopsException {
    ArrayList<ProgramBlock> ret = new ArrayList<ProgramBlock>();
    DMLConfig config = ConfigurationManager.getDMLConfig();
    //construct lops from hops if not existing
    DMLTranslator dmlt = new DMLTranslator(sbs.get(0).getDMLProg());
    for (StatementBlock sb : sbs) {
        dmlt.constructLops(sb);
    }
    //construct runtime program from lops
    for (StatementBlock sb : sbs) {
        DMLProgram prog = sb.getDMLProg();
        ret.add(prog.createRuntimeProgramBlock(rtprog, sb, config));
    }
    return ret;
}
Also used : DMLConfig(org.apache.sysml.conf.DMLConfig) ArrayList(java.util.ArrayList) DMLProgram(org.apache.sysml.parser.DMLProgram) ForProgramBlock(org.apache.sysml.runtime.controlprogram.ForProgramBlock) IfProgramBlock(org.apache.sysml.runtime.controlprogram.IfProgramBlock) ProgramBlock(org.apache.sysml.runtime.controlprogram.ProgramBlock) WhileProgramBlock(org.apache.sysml.runtime.controlprogram.WhileProgramBlock) DMLTranslator(org.apache.sysml.parser.DMLTranslator) IfStatementBlock(org.apache.sysml.parser.IfStatementBlock) WhileStatementBlock(org.apache.sysml.parser.WhileStatementBlock) ForStatementBlock(org.apache.sysml.parser.ForStatementBlock) StatementBlock(org.apache.sysml.parser.StatementBlock)

Example 22 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class LocalFileUtils method createWorkingDirectoryWithUUID.

public static String createWorkingDirectoryWithUUID(String uuid) throws DMLRuntimeException {
    //create local tmp dir if not existing
    String dirRoot = null;
    DMLConfig conf = ConfigurationManager.getDMLConfig();
    if (conf != null)
        dirRoot = conf.getTextValue(DMLConfig.LOCAL_TMP_DIR);
    else
        dirRoot = DMLConfig.getDefaultTextValue(DMLConfig.LOCAL_TMP_DIR);
    //create shared staging dir if not existing
    if (!LocalFileUtils.createLocalFileIfNotExist(dirRoot, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION)) {
        throw new DMLRuntimeException("Failed to create non-existing local working directory: " + dirRoot);
    }
    //create process specific sub tmp dir
    StringBuilder sb = new StringBuilder();
    sb.append(dirRoot);
    sb.append(Lop.FILE_SEPARATOR);
    sb.append(Lop.PROCESS_PREFIX);
    sb.append(uuid);
    sb.append(Lop.FILE_SEPARATOR);
    _workingDir = sb.toString();
    //create process-specific staging dir if not existing
    if (!LocalFileUtils.createLocalFileIfNotExist(_workingDir)) {
        throw new DMLRuntimeException("Failed to create local working directory: " + _workingDir);
    }
    return _workingDir;
}
Also used : DMLConfig(org.apache.sysml.conf.DMLConfig) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 23 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class DataPartitionerRemoteMR method partitionMatrix.

@Override
protected void partitionMatrix(MatrixObject in, String fnameNew, InputInfo ii, OutputInfo oi, long rlen, long clen, int brlen, int bclen) throws DMLRuntimeException {
    String jobname = "ParFor-DPMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    JobConf job;
    job = new JobConf(DataPartitionerRemoteMR.class);
    if (//use in parfor
    _pfid >= 0)
        job.setJobName(jobname + _pfid);
    else
        //use for partition instruction
        job.setJobName("Partition-MR");
    //maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();
    try {
        //force writing to disk (typically not required since partitioning only applied if dataset exceeds CP size)
        //written to disk iff dirty
        in.exportData();
        Path path = new Path(in.getFileName());
        /////
        //configure the MR job
        MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, ii, oi, _format, _n, fnameNew, _keepIndexes);
        //set mappers, reducers, combiners
        job.setMapperClass(DataPartitionerRemoteMapper.class);
        job.setReducerClass(DataPartitionerRemoteReducer.class);
        if (oi == OutputInfo.TextCellOutputInfo) {
            //binary cell intermediates for reduced IO 
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableCell.class);
        } else if (oi == OutputInfo.BinaryCellOutputInfo) {
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableCell.class);
        } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(PairWritableBlock.class);
            //check Alignment
            if ((_format == PDataPartitionFormat.ROW_BLOCK_WISE_N && rlen > _n && _n % brlen != 0) || (_format == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && clen > _n && _n % bclen != 0)) {
                throw new DMLRuntimeException("Data partitioning format " + _format + " requires aligned blocks.");
            }
        }
        //set input format 
        job.setInputFormat(ii.inputFormatClass);
        //set the input path and output path 
        FileInputFormat.setInputPaths(job, path);
        //set output path
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        //FileOutputFormat.setOutputPath(job, pathNew);
        job.setOutputFormat(NullOutputFormat.class);
        //////
        //set optimization parameters
        //set the number of mappers and reducers 
        //job.setNumMapTasks( _numMappers ); //use default num mappers
        long reducerGroups = -1;
        switch(_format) {
            case ROW_WISE:
                reducerGroups = rlen;
                break;
            case COLUMN_WISE:
                reducerGroups = clen;
                break;
            case ROW_BLOCK_WISE:
                reducerGroups = (rlen / brlen) + ((rlen % brlen == 0) ? 0 : 1);
                break;
            case COLUMN_BLOCK_WISE:
                reducerGroups = (clen / bclen) + ((clen % bclen == 0) ? 0 : 1);
                break;
            case ROW_BLOCK_WISE_N:
                reducerGroups = (rlen / _n) + ((rlen % _n == 0) ? 0 : 1);
                break;
            case COLUMN_BLOCK_WISE_N:
                reducerGroups = (clen / _n) + ((clen % _n == 0) ? 0 : 1);
                break;
            default:
        }
        job.setNumReduceTasks((int) Math.min(_numReducers, reducerGroups));
        //disable automatic tasks timeouts and speculative task exec
        job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
        job.setMapSpeculativeExecution(false);
        //set up preferred custom serialization framework for binary block format
        if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
            MRJobConfiguration.addBinaryBlockSerializationFramework(job);
        //enables the reuse of JVMs (multiple tasks per MR task)
        if (_jvmReuse)
            //unlimited
            job.setNumTasksToExecutePerJvm(-1);
        //enables compression - not conclusive for different codecs (empirically good compression ratio, but significantly slower)
        //job.set(MRConfigurationNames.MR_MAP_OUTPUT_COMPRESS, "true");
        //job.set(MRConfigurationNames.MR_MAP_OUTPUT_COMPRESS_CODEC, "org.apache.hadoop.io.compress.GzipCodec");
        //set the replication factor for the results
        job.setInt(MRConfigurationNames.DFS_REPLICATION, _replication);
        //set up map/reduce memory configurations (if in AM context)
        DMLConfig config = ConfigurationManager.getDMLConfig();
        DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
        //set up custom map/reduce configurations 
        MRJobConfiguration.setupCustomMRConfigurations(job, config);
        //set the max number of retries per map task
        //  disabled job-level configuration to respect cluster configuration
        //  note: this refers to hadoop2, hence it never had effect on mr1
        //job.setInt(MRConfigurationNames.MR_MAP_MAXATTEMPTS, _max_retry);
        //set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);
        /////
        // execute the MR job	
        JobClient.runJob(job);
        //maintain dml script counters
        Statistics.incrementNoOfExecutedMRJobs();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    if (DMLScript.STATISTICS && _pfid >= 0) {
        //only for parfor 
        long t1 = System.nanoTime();
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DMLConfig(org.apache.sysml.conf.DMLConfig) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) PairWritableCell(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableCell) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 24 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class ReblockMR method runJob.

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, long[] nnz, String instructionsInMapper, String reblockInstructions, String otherInstructionsInReducer, int numReducers, int replication, boolean jvmReuse, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception {
    JobConf job = new JobConf(ReblockMR.class);
    job.setJobName("Reblock-MR");
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    //set up the input files and their format information
    //(internally used input converters: text2bc for text, identity for binary inputs)
    MRJobConfiguration.setUpMultipleInputsReblock(job, realIndexes, inputs, inputInfos, brlens, bclens);
    //set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens, nnz);
    //set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
    //set up unary instructions that will perform in the mapper
    MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper);
    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setReblockInstructions(job, reblockInstructions);
    //set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);
    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //disable automatic tasks timeouts and speculative task exec
    job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
    job.setMapSpeculativeExecution(false);
    //set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);
    //set up custom map/reduce configurations 
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    //enable jvm reuse (based on SystemML configuration)
    if (jvmReuse)
        job.setNumTasksToExecutePerJvm(-1);
    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, reblockInstructions, null, otherInstructionsInReducer, resultIndexes);
    MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, reblockInstructions, null, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false);
    MatrixCharacteristics[] stats = ret.stats;
    //set up the number of reducers (according to output size)
    int numRed = determineNumReducers(rlens, clens, nnz, config.getIntValue(DMLConfig.NUM_REDUCERS), ret.numReducerGroups);
    job.setNumReduceTasks(numRed);
    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(stats);
    // Update resultDimsUnknown based on computed "stats"
    byte[] resultDimsUnknown = new byte[resultIndexes.length];
    for (int i = 0; i < resultIndexes.length; i++) {
        if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
            resultDimsUnknown[i] = (byte) 1;
        } else {
            resultDimsUnknown[i] = (byte) 0;
        }
    }
    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true);
    // configure mapper and the mapper output key value pairs
    job.setMapperClass(ReblockMapper.class);
    //represent key offsets for block
    job.setMapOutputKeyClass(MatrixIndexes.class);
    //binary cell/block
    job.setMapOutputValueClass(TaggedAdaptivePartialBlock.class);
    //configure reducer
    job.setReducerClass(ReblockReducer.class);
    // By default, the job executes in "cluster" mode.
    // Determine if we can optimize and run it in "local" mode.
    // at this point, both reblock_binary and reblock_text are similar
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    RunningJob runjob = JobClient.runJob(job);
    /* Process different counters */
    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        // number of non-zeros
        stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
    //	System.out.println("result #"+resultIndexes[i]+" ===>\n"+stats[i]);
    }
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}
Also used : Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) MatrixChar_N_ReducerGroups(org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 25 with DMLConfig

use of org.apache.sysml.conf.DMLConfig in project incubator-systemml by apache.

the class SortMR method runJob.

@SuppressWarnings({ "unchecked", "rawtypes" })
public static JobReturn runJob(MRJobInstruction inst, String input, InputInfo inputInfo, long rlen, long clen, int brlen, int bclen, String combineInst, String sortInst, int numReducers, int replication, String output, OutputInfo outputInfo, boolean valueIsWeight) throws Exception {
    boolean sortIndexes = getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes;
    String tmpOutput = sortIndexes ? MRJobConfiguration.constructTempOutputFilename() : output;
    JobConf job = new JobConf(SortMR.class);
    job.setJobName("SortMR");
    //setup partition file
    String pfname = MRJobConfiguration.setUpSortPartitionFilename(job);
    Path partitionFile = new Path(pfname);
    URI partitionUri = new URI(partitionFile.toString());
    //setup input/output paths
    Path inputDir = new Path(input);
    inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
    SamplingSortMRInputFormat.setInputPaths(job, inputDir);
    Path outpath = new Path(tmpOutput);
    FileOutputFormat.setOutputPath(job, outpath);
    MapReduceTool.deleteFileIfExistOnHDFS(outpath, job);
    //set number of reducers (1 if local mode)
    if (!InfrastructureAnalyzer.isLocalMode(job)) {
        MRJobConfiguration.setNumReducers(job, numReducers, numReducers);
        //on cp-side qpick instructions for quantile/iqm/median (~128MB)
        if (!(getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes))
            job.setNumReduceTasks((int) Math.max(job.getNumReduceTasks(), rlen / 10000000));
    } else
        //in case of local mode
        job.setNumReduceTasks(1);
    //setup input/output format
    job.setInputFormat(SamplingSortMRInputFormat.class);
    SamplingSortMRInputFormat.setTargetKeyValueClasses(job, (Class<? extends WritableComparable>) outputInfo.outputKeyClass, outputInfo.outputValueClass);
    //setup instructions and meta information
    if (combineInst != null && !combineInst.trim().isEmpty())
        job.set(COMBINE_INSTRUCTION, combineInst);
    job.set(SORT_INSTRUCTION, sortInst);
    job.setBoolean(VALUE_IS_WEIGHT, valueIsWeight);
    boolean desc = getSortInstructionDescending(sortInst);
    job.setBoolean(SORT_DECREASING, desc);
    MRJobConfiguration.setBlockSize(job, (byte) 0, brlen, bclen);
    MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL);
    int partitionWith0 = SamplingSortMRInputFormat.writePartitionFile(job, partitionFile);
    //setup mapper/reducer/partitioner/output classes
    if (getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes) {
        MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL);
        job.setOutputFormat(OutputInfo.BinaryBlockOutputInfo.outputFormatClass);
        job.setMapperClass(IndexSortMapper.class);
        job.setReducerClass(IndexSortReducer.class);
        job.setMapOutputKeyClass(!desc ? IndexSortComparable.class : IndexSortComparableDesc.class);
        job.setMapOutputValueClass(LongWritable.class);
        job.setOutputKeyClass(MatrixIndexes.class);
        job.setOutputValueClass(MatrixBlock.class);
    } else {
        //default case: SORT w/wo weights
        MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL);
        job.setOutputFormat(CompactOutputFormat.class);
        job.setMapperClass(ValueSortMapper.class);
        job.setReducerClass(ValueSortReducer.class);
        //double
        job.setOutputKeyClass(outputInfo.outputKeyClass);
        //int
        job.setOutputValueClass(outputInfo.outputValueClass);
    }
    job.setPartitionerClass(TotalOrderPartitioner.class);
    //setup distributed cache
    DistributedCache.addCacheFile(partitionUri, job);
    DistributedCache.createSymlink(job);
    //setup replication factor
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set up custom map/reduce configurations 
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    MatrixCharacteristics[] s = new MatrixCharacteristics[1];
    s[0] = new MatrixCharacteristics(rlen, clen, brlen, bclen);
    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(s);
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    //run mr job
    RunningJob runjob = JobClient.runJob(job);
    Group group = runjob.getCounters().getGroup(NUM_VALUES_PREFIX);
    numReducers = job.getNumReduceTasks();
    //process final meta data
    long[] counts = new long[numReducers];
    long total = 0;
    for (int i = 0; i < numReducers; i++) {
        counts[i] = group.getCounter(Integer.toString(i));
        total += counts[i];
    }
    //add missing 0s back to the results
    long missing0s = 0;
    if (total < rlen * clen) {
        if (partitionWith0 < 0)
            throw new RuntimeException("no partition contains 0, which is wrong!");
        missing0s = rlen * clen - total;
        counts[partitionWith0] += missing0s;
    } else
        partitionWith0 = -1;
    if (sortIndexes) {
        //run builtin job for shifting partially sorted blocks according to global offsets
        //we do this in this custom form since it would not fit into the current structure
        //of systemml to output two intermediates (partially sorted data, offsets) out of a 
        //single SortKeys lop
        boolean success = runjob.isSuccessful();
        if (success) {
            success = runStitchupJob(tmpOutput, rlen, clen, brlen, bclen, counts, numReducers, replication, output);
        }
        MapReduceTool.deleteFileIfExistOnHDFS(tmpOutput);
        MapReduceTool.deleteFileIfExistOnHDFS(pfname);
        return new JobReturn(s[0], OutputInfo.BinaryBlockOutputInfo, success);
    } else {
        MapReduceTool.deleteFileIfExistOnHDFS(pfname);
        return new JobReturn(s[0], counts, partitionWith0, missing0s, runjob.isSuccessful());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) IndexSortComparableDesc(org.apache.sysml.runtime.matrix.sort.IndexSortComparableDesc) URI(java.net.URI) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) RunningJob(org.apache.hadoop.mapred.RunningJob) IndexSortComparable(org.apache.sysml.runtime.matrix.sort.IndexSortComparable) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

DMLConfig (org.apache.sysml.conf.DMLConfig)31 JobConf (org.apache.hadoop.mapred.JobConf)17 RunningJob (org.apache.hadoop.mapred.RunningJob)13 Group (org.apache.hadoop.mapred.Counters.Group)11 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)11 Path (org.apache.hadoop.fs.Path)10 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)7 IOException (java.io.IOException)6 DMLProgram (org.apache.sysml.parser.DMLProgram)6 DMLTranslator (org.apache.sysml.parser.DMLTranslator)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 ParserWrapper (org.apache.sysml.parser.ParserWrapper)4 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)4 HashMap (java.util.HashMap)3 LanguageException (org.apache.sysml.parser.LanguageException)3 TaggedFirstSecondIndexes (org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 ArrayList (java.util.ArrayList)2 CompilerConfig (org.apache.sysml.conf.CompilerConfig)2