Search in sources :

Example 41 with LocalVariableMap

use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.

the class ProgramConverter method parseVariables.

private static LocalVariableMap parseVariables(String in) {
    LocalVariableMap ret = null;
    if (in.length() > PARFOR_VARS_BEGIN.length() + PARFOR_VARS_END.length()) {
        String varStr = in.substring(PARFOR_VARS_BEGIN.length(), in.length() - PARFOR_VARS_END.length()).trim();
        ret = LocalVariableMap.deserialize(varStr);
    } else // empty input symbol table
    {
        ret = new LocalVariableMap();
    }
    return ret;
}
Also used : LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap)

Example 42 with LocalVariableMap

use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.

the class ProgramConverter method parseExecutionContext.

private static ExecutionContext parseExecutionContext(String in, Program prog) {
    ExecutionContext ec = null;
    String lin = in.substring(PARFOR_EC_BEGIN.length(), in.length() - PARFOR_EC_END.length()).trim();
    if (!lin.equals(EMPTY)) {
        LocalVariableMap vars = parseVariables(lin);
        ec = ExecutionContextFactory.createContext(false, prog);
        ec.setVariables(vars);
    }
    return ec;
}
Also used : ExecutionContext(org.apache.sysml.runtime.controlprogram.context.ExecutionContext) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap)

Example 43 with LocalVariableMap

use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.

the class RemoteDPParForMR method runJob.

public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, // config params
String resultFile, // config params
MatrixObject input, // config params
PartitionFormat dpf, // config params
OutputInfo oi, // config params
boolean tSparseCol, // opt params
boolean enableCPCaching, // opt params
int numReducers, // opt params
int replication) {
    RemoteParForJobReturn ret = null;
    String jobname = "ParFor-DPEMR";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    JobConf job;
    job = new JobConf(RemoteDPParForMR.class);
    job.setJobName(jobname + pfid);
    // maintain dml script counters
    Statistics.incrementNoOfCompiledMRJobs();
    try {
        // ///
        // configure the MR job
        // set arbitrary CP program blocks that will perform in the reducers
        MRJobConfiguration.setProgramBlocks(job, program);
        // enable/disable caching
        MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
        // setup input matrix
        Path path = new Path(input.getFileName());
        long rlen = input.getNumRows();
        long clen = input.getNumColumns();
        int brlen = (int) input.getNumRowsPerBlock();
        int bclen = (int) input.getNumColumnsPerBlock();
        MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, InputInfo.BinaryBlockInputInfo, oi, dpf._dpf, dpf._N, input.getFileName(), itervar, matrixvar, tSparseCol);
        job.setInputFormat(InputInfo.BinaryBlockInputInfo.inputFormatClass);
        FileInputFormat.setInputPaths(job, path);
        // set mapper and reducers classes
        job.setMapperClass(DataPartitionerRemoteMapper.class);
        job.setReducerClass(RemoteDPParWorkerReducer.class);
        // set output format
        job.setOutputFormat(SequenceFileOutputFormat.class);
        // set output path
        MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
        FileOutputFormat.setOutputPath(job, new Path(resultFile));
        // set the output key, value schema
        // parfor partitioning outputs (intermediates)
        job.setMapOutputKeyClass(LongWritable.class);
        if (oi == OutputInfo.BinaryBlockOutputInfo)
            job.setMapOutputValueClass(PairWritableBlock.class);
        else if (oi == OutputInfo.BinaryCellOutputInfo)
            job.setMapOutputValueClass(PairWritableCell.class);
        else
            throw new DMLRuntimeException("Unsupported intermrediate output info: " + oi);
        // parfor exec output
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        // ////
        // set optimization parameters
        // set the number of mappers and reducers
        job.setNumReduceTasks(numReducers);
        // disable automatic tasks timeouts and speculative task exec
        job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
        job.setMapSpeculativeExecution(false);
        // set up preferred custom serialization framework for binary block format
        if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
            MRJobConfiguration.addBinaryBlockSerializationFramework(job);
        // set up map/reduce memory configurations (if in AM context)
        DMLConfig config = ConfigurationManager.getDMLConfig();
        DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
        // set up custom map/reduce configurations
        MRJobConfiguration.setupCustomMRConfigurations(job, config);
        // disable JVM reuse
        // -1 for unlimited
        job.setNumTasksToExecutePerJvm(1);
        // set the replication factor for the results
        job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
        // set the max number of retries per map task
        // note: currently disabled to use cluster config
        // job.setInt(MRConfigurationNames.MR_MAP_MAXATTEMPTS, max_retry);
        // set unique working dir
        MRJobConfiguration.setUniqueWorkingDir(job);
        // ///
        // execute the MR job
        RunningJob runjob = JobClient.runJob(job);
        // Process different counters
        Statistics.incrementNoOfExecutedMRJobs();
        Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
        int numTasks = (int) pgroup.getCounter(Stat.PARFOR_NUMTASKS.toString());
        int numIters = (int) pgroup.getCounter(Stat.PARFOR_NUMITERS.toString());
        if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode()) {
            Statistics.incrementJITCompileTime(pgroup.getCounter(Stat.PARFOR_JITCOMPILE.toString()));
            Statistics.incrementJVMgcCount(pgroup.getCounter(Stat.PARFOR_JVMGC_COUNT.toString()));
            Statistics.incrementJVMgcTime(pgroup.getCounter(Stat.PARFOR_JVMGC_TIME.toString()));
            Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
            CacheStatistics.incrementMemHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_MEM.toString()));
            CacheStatistics.incrementFSBuffHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString()));
            CacheStatistics.incrementFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FS.toString()));
            CacheStatistics.incrementHDFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_HDFS.toString()));
            CacheStatistics.incrementFSBuffWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString()));
            CacheStatistics.incrementFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FS.toString()));
            CacheStatistics.incrementHDFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_HDFS.toString()));
            CacheStatistics.incrementAcquireRTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQR.toString()));
            CacheStatistics.incrementAcquireMTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQM.toString()));
            CacheStatistics.incrementReleaseTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_RLS.toString()));
            CacheStatistics.incrementExportTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_EXP.toString()));
        }
        // read all files of result variables and prepare for return
        LocalVariableMap[] results = readResultFile(job, resultFile);
        ret = new RemoteParForJobReturn(runjob.isSuccessful(), numTasks, numIters, results);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    } finally {
        // remove created files
        try {
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        }
    }
    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 44 with LocalVariableMap

use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.

the class RemoteDPParForSpark method runJob.

public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, HashMap<String, byte[]> clsMap, String resultFile, MatrixObject input, ExecutionContext ec, PartitionFormat dpf, OutputInfo oi, boolean tSparseCol, boolean enableCPCaching, int numReducers) {
    String jobname = "ParFor-DPESP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaSparkContext sc = sec.getSparkContext();
    // prepare input parameters
    MatrixObject mo = sec.getMatrixObject(matrixvar);
    MatrixCharacteristics mc = mo.getMatrixCharacteristics();
    // initialize accumulators for tasks/iterations, and inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(matrixvar);
    LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
    LongAccumulator aIters = sc.sc().longAccumulator("iterations");
    // compute number of reducers (to avoid OOMs and reduce memory pressure)
    int numParts = SparkUtils.getNumPreferredPartitions(mc, in);
    int numReducers2 = Math.max(numReducers, Math.min(numParts, (int) dpf.getNumParts(mc)));
    // core parfor datapartition-execute (w/ or w/o shuffle, depending on data characteristics)
    RemoteDPParForSparkWorker efun = new RemoteDPParForSparkWorker(program, clsMap, matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
    JavaPairRDD<Long, Writable> tmp = getPartitionedInput(sec, matrixvar, oi, dpf);
    List<Tuple2<Long, String>> out = (requiresGrouping(dpf, mo) ? tmp.groupByKey(numReducers2) : tmp.map(new PseudoGrouping())).mapPartitionsToPair(// execute parfor tasks, incl cleanup
    efun).collect();
    // de-serialize results
    LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
    // get accumulator value
    int numTasks = aTasks.value().intValue();
    // get accumulator value
    int numIters = aIters.value().intValue();
    // create output symbol table entries
    RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS) {
        Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    }
    return ret;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Writable(org.apache.hadoop.io.Writable) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) LongAccumulator(org.apache.spark.util.LongAccumulator) Tuple2(scala.Tuple2) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 45 with LocalVariableMap

use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.

the class RemoteParForSpark method runJob.

public static RemoteParForJobReturn runJob(long pfid, String prog, HashMap<String, byte[]> clsMap, List<Task> tasks, ExecutionContext ec, boolean cpCaching, int numMappers) {
    String jobname = "ParFor-ESP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaSparkContext sc = sec.getSparkContext();
    // initialize accumulators for tasks/iterations
    LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
    LongAccumulator aIters = sc.sc().longAccumulator("iterations");
    // reset cached shared inputs for correctness in local mode
    long jobid = _jobID.getNextID();
    if (InfrastructureAnalyzer.isLocalMode())
        RemoteParForSparkWorker.cleanupCachedVariables(jobid);
    // run remote_spark parfor job
    // (w/o lazy evaluation to fit existing parfor framework, e.g., result merge)
    List<Tuple2<Long, String>> out = // create rdd of parfor tasks
    sc.parallelize(tasks, tasks.size()).flatMapToPair(new RemoteParForSparkWorker(jobid, prog, clsMap, cpCaching, aTasks, aIters)).collect();
    // de-serialize results
    LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
    // get accumulator value
    int numTasks = aTasks.value().intValue();
    // get accumulator value
    int numIters = aIters.value().intValue();
    // create output symbol table entries
    RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS)
        Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    return ret;
}
Also used : LongAccumulator(org.apache.spark.util.LongAccumulator) Tuple2(scala.Tuple2) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Aggregations

LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)64 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)19 IfStatementBlock (org.apache.sysml.parser.IfStatementBlock)19 StatementBlock (org.apache.sysml.parser.StatementBlock)19 WhileStatementBlock (org.apache.sysml.parser.WhileStatementBlock)19 FunctionStatementBlock (org.apache.sysml.parser.FunctionStatementBlock)15 ArrayList (java.util.ArrayList)13 HashMap (java.util.HashMap)12 HashSet (java.util.HashSet)11 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)11 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)11 FunctionStatement (org.apache.sysml.parser.FunctionStatement)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)10 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)9 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)9 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)9 Path (org.apache.hadoop.fs.Path)8 ExternalFunctionStatement (org.apache.sysml.parser.ExternalFunctionStatement)8 ExecutionContext (org.apache.sysml.runtime.controlprogram.context.ExecutionContext)8 FunctionOp (org.apache.sysml.hops.FunctionOp)7