Search in sources :

Example 31 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class FrameDecodeTest method execDMLScriptviaJMLC.

private static ArrayList<String[][]> execDMLScriptviaJMLC(String testname, String[][] F1, String[][] M, boolean modelReuse) throws IOException {
    Timing time = new Timing(true);
    ArrayList<String[][]> ret = new ArrayList<String[][]>();
    // establish connection to SystemML
    Connection conn = new Connection();
    try {
        // prepare input arguments
        HashMap<String, String> args = new HashMap<String, String>();
        args.put("$TRANSFORM_SPEC", "{ \"ids\": true ,\"recode\": [ 1, 2, 3] }");
        // read and precompile script
        String script = conn.readScript(SCRIPT_DIR + TEST_DIR + testname + ".dml");
        PreparedScript pstmt = conn.prepareScript(script, args, new String[] { "F1", "M" }, new String[] { "F2" }, false);
        if (modelReuse)
            pstmt.setFrame("M", M, true);
        // execute script multiple times
        for (int i = 0; i < nRuns; i++) {
            // bind input parameters
            if (!modelReuse)
                pstmt.setFrame("M", M);
            pstmt.setFrame("F1", F1);
            // execute script
            ResultVariables rs = pstmt.executeScript();
            // get output parameter
            String[][] Y = rs.getFrame("F2");
            // keep result for comparison
            ret.add(Y);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new IOException(ex);
    } finally {
        IOUtilFunctions.closeSilently(conn);
    }
    System.out.println("JMLC scoring w/ " + nRuns + " runs in " + time.stop() + "ms.");
    return ret;
}
Also used : PreparedScript(org.apache.sysml.api.jmlc.PreparedScript) HashMap(java.util.HashMap) ResultVariables(org.apache.sysml.api.jmlc.ResultVariables) ArrayList(java.util.ArrayList) Connection(org.apache.sysml.api.jmlc.Connection) IOException(java.io.IOException) IOException(java.io.IOException) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 32 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class ReuseModelVariablesTest method execDMLScriptviaJMLC.

private static ArrayList<double[][]> execDMLScriptviaJMLC(String testname, ArrayList<double[][]> X, boolean modelReuse) throws IOException {
    Timing time = new Timing(true);
    ArrayList<double[][]> ret = new ArrayList<double[][]>();
    // establish connection to SystemML
    Connection conn = new Connection();
    try {
        // For now, JMLC pipeline only allows dml
        boolean parsePyDML = false;
        // read and precompile script
        String script = conn.readScript(SCRIPT_DIR + TEST_DIR + testname + ".dml");
        PreparedScript pstmt = conn.prepareScript(script, new String[] { "X", "W" }, new String[] { "predicted_y" }, parsePyDML);
        // read model
        String modelData = conn.readScript(SCRIPT_DIR + TEST_DIR + MODEL_FILE);
        double[][] W = conn.convertToDoubleMatrix(modelData, rows, cols);
        if (modelReuse)
            pstmt.setMatrix("W", W, true);
        // execute script multiple times
        for (int i = 0; i < nRuns; i++) {
            // bind input parameters
            if (!modelReuse)
                pstmt.setMatrix("W", W);
            pstmt.setMatrix("X", X.get(i));
            // execute script
            ResultVariables rs = pstmt.executeScript();
            // get output parameter
            double[][] Y = rs.getMatrix("predicted_y");
            // keep result for comparison
            ret.add(Y);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new IOException(ex);
    } finally {
        IOUtilFunctions.closeSilently(conn);
    }
    System.out.println("JMLC scoring w/ " + nRuns + " runs in " + time.stop() + "ms.");
    return ret;
}
Also used : PreparedScript(org.apache.sysml.api.jmlc.PreparedScript) ResultVariables(org.apache.sysml.api.jmlc.ResultVariables) ArrayList(java.util.ArrayList) Connection(org.apache.sysml.api.jmlc.Connection) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) IOException(java.io.IOException) IOException(java.io.IOException)

Example 33 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class ParForProgramBlock method executeRemoteSparkParForDP.

private void executeRemoteSparkParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = checkMRAndRecompileToCP(0);
    // Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
    PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
    // mark matrix var as partitioned
    inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
    // Step 2) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    HashMap<String, byte[]> clsMap = new HashMap<>();
    String program = ProgramConverter.serializeParForBody(body, clsMap);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 3) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    // partitioner.createTasks().size();
    long numCreatedTasks = numIterations;
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS, except DP matrix which is the input to the RemoteDPParForSpark job
    exportMatricesToHDFS(ec, _colocatedDPMatrix);
    // Step 4) submit MR job (wait for finished work)
    // TODO runtime support for binary cell partitioning
    OutputInfo inputOI = OutputInfo.BinaryBlockOutputInfo;
    RemoteParForJobReturn ret = RemoteDPParForSpark.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, clsMap, resultFile, inputMatrix, ec, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 5) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    inputMatrix.unsetPartitioned();
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) HashMap(java.util.HashMap) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 34 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class ParForProgramBlock method executeRemoteSparkParFor.

private void executeRemoteSparkParFor(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) {
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = false;
    if (FORCE_CP_ON_REMOTE_MR && (_optMode == POptMode.NONE || (_optMode == POptMode.CONSTRAINED && _execMode == PExecMode.REMOTE_SPARK))) {
        // tid = 0  because replaced in remote parworker
        flagForced = checkMRAndRecompileToCP(0);
    }
    // Step 1) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    HashMap<String, byte[]> clsMap = new HashMap<>();
    String program = ProgramConverter.serializeParForBody(body, clsMap);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 2) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    long numIterations = partitioner.getNumIterations();
    // sequentially create tasks as input to parfor job
    List<Task> tasks = partitioner.createTasks();
    long numCreatedTasks = tasks.size();
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS
    exportMatricesToHDFS(ec);
    // Step 3) submit Spark parfor job (no lazy evaluation, since collect on result)
    // MatrixObject colocatedDPMatrixObj = (_colocatedDPMatrix!=null)? (MatrixObject)ec.getVariable(_colocatedDPMatrix) : null;
    RemoteParForJobReturn ret = RemoteParForSpark.runJob(_ID, program, clsMap, tasks, ec, _enableCPCaching, _numThreads);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 4) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) Task(org.apache.sysml.runtime.controlprogram.parfor.Task) HashMap(java.util.HashMap) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 35 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class ParForProgramBlock method executeRemoteMRParForDP.

private void executeRemoteMRParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
    /* Step 0) check and recompile MR inst
		 * Step 1) serialize child PB and inst
		 * Step 2) create and serialize tasks
		 * Step 3) submit MR Jobs and wait for results
		 * Step 4) collect results from each parallel worker
		 */
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = checkMRAndRecompileToCP(0);
    // Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
    PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
    // mark matrix var as partitioned
    inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
    // Step 2) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    String program = ProgramConverter.serializeParForBody(body);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 3) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    // partitioner.createTasks().size();
    long numCreatedTasks = numIterations;
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS
    exportMatricesToHDFS(ec);
    // Step 4) submit MR job (wait for finished work)
    OutputInfo inputOI = ((inputMatrix.getSparsity() < 0.1 && inputDPF == PartitionFormat.COLUMN_WISE) || (inputMatrix.getSparsity() < 0.001 && inputDPF == PartitionFormat.ROW_WISE)) ? OutputInfo.BinaryCellOutputInfo : OutputInfo.BinaryBlockOutputInfo;
    RemoteParForJobReturn ret = RemoteDPParForMR.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, resultFile, inputMatrix, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads, _replicationDP);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 5) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    inputMatrix.unsetPartitioned();
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Aggregations

Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)78 IOException (java.io.IOException)31 ArrayList (java.util.ArrayList)29 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)25 HashMap (java.util.HashMap)24 Connection (org.apache.sysml.api.jmlc.Connection)17 PreparedScript (org.apache.sysml.api.jmlc.PreparedScript)17 ResultVariables (org.apache.sysml.api.jmlc.ResultVariables)17 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)17 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 TaskPartitioner (org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)10 ParForBody (org.apache.sysml.runtime.controlprogram.parfor.ParForBody)8 RemoteParForJobReturn (org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn)8 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)7 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)7 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 LocalTaskQueue (org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue)6 Task (org.apache.sysml.runtime.controlprogram.parfor.Task)6