Search in sources :

Example 1 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class ProgramConverter method serializeDataObject.

public static String serializeDataObject(String key, Data dat) throws DMLRuntimeException {
    // SCHEMA: <name>|<datatype>|<valuetype>|value
    // (scalars are serialize by value, matrices by filename)
    StringBuilder sb = new StringBuilder();
    //prepare data for serialization
    String name = key;
    DataType datatype = dat.getDataType();
    ValueType valuetype = dat.getValueType();
    String value = null;
    String[] matrixMetaData = null;
    switch(datatype) {
        case SCALAR:
            ScalarObject so = (ScalarObject) dat;
            //name = so.getName();
            value = so.getStringValue();
            break;
        case MATRIX:
            MatrixObject mo = (MatrixObject) dat;
            MatrixFormatMetaData md = (MatrixFormatMetaData) dat.getMetaData();
            MatrixCharacteristics mc = md.getMatrixCharacteristics();
            value = mo.getFileName();
            PartitionFormat partFormat = (mo.getPartitionFormat() != null) ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize()) : PartitionFormat.NONE;
            matrixMetaData = new String[9];
            matrixMetaData[0] = String.valueOf(mc.getRows());
            matrixMetaData[1] = String.valueOf(mc.getCols());
            matrixMetaData[2] = String.valueOf(mc.getRowsPerBlock());
            matrixMetaData[3] = String.valueOf(mc.getColsPerBlock());
            matrixMetaData[4] = String.valueOf(mc.getNonZeros());
            matrixMetaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
            matrixMetaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
            matrixMetaData[7] = String.valueOf(partFormat);
            matrixMetaData[8] = String.valueOf(mo.getUpdateType());
            break;
        default:
            throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
    }
    //serialize data
    sb.append(name);
    sb.append(DATA_FIELD_DELIM);
    sb.append(datatype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(valuetype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(value);
    if (matrixMetaData != null)
        for (int i = 0; i < matrixMetaData.length; i++) {
            sb.append(DATA_FIELD_DELIM);
            sb.append(matrixMetaData[i]);
        }
    return sb.toString();
}
Also used : ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DataType(org.apache.sysml.parser.Expression.DataType) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 2 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class StatementBlock method rewriteFunctionCallStatements.

public ArrayList<Statement> rewriteFunctionCallStatements(DMLProgram dmlProg, ArrayList<Statement> statements) throws LanguageException {
    ArrayList<Statement> newStatements = new ArrayList<Statement>();
    for (Statement current : statements) {
        if (isRewritableFunctionCall(current, dmlProg)) {
            Expression sourceExpr = null;
            if (current instanceof AssignmentStatement)
                sourceExpr = ((AssignmentStatement) current).getSource();
            else
                sourceExpr = ((MultiAssignmentStatement) current).getSource();
            FunctionCallIdentifier fcall = (FunctionCallIdentifier) sourceExpr;
            FunctionStatementBlock fblock = dmlProg.getFunctionStatementBlock(fcall.getNamespace(), fcall.getName());
            if (fblock == null) {
                fcall.raiseValidateError("function " + fcall.getName() + " is undefined in namespace " + fcall.getNamespace(), false);
            }
            FunctionStatement fstmt = (FunctionStatement) fblock.getStatement(0);
            // recursive inlining (no memo required because update-inplace of function statement blocks, so no redundant inlining)
            if (rIsInlineableFunction(fblock, dmlProg)) {
                fstmt.getBody().get(0).setStatements(rewriteFunctionCallStatements(dmlProg, fstmt.getBody().get(0).getStatements()));
            }
            //MB: we cannot use the hash since multiple interleaved inlined functions should be independent.
            //String prefix = new Integer(fblock.hashCode()).toString() + "_";
            String prefix = _seq.getNextID() + "_";
            if (fstmt.getBody().size() > 1) {
                sourceExpr.raiseValidateError("rewritable function can only have 1 statement block", false);
            }
            StatementBlock sblock = fstmt.getBody().get(0);
            if (fcall.getParamExprs().size() < fstmt.getInputParams().size()) {
                sourceExpr.raiseValidateError("Wrong number of function parameters: " + fcall.getParamExprs().size() + ", but " + fstmt.getInputParams().size() + " expected.");
            }
            for (int i = 0; i < fstmt.getInputParams().size(); i++) {
                DataIdentifier currFormalParam = fstmt.getInputParams().get(i);
                // create new assignment statement
                String newFormalParameterName = prefix + currFormalParam.getName();
                DataIdentifier newTarget = new DataIdentifier(currFormalParam);
                newTarget.setName(newFormalParameterName);
                Expression currCallParam = fcall.getParamExprs().get(i).getExpr();
                //auto casting of inputs on inlining (if required)
                ValueType targetVT = newTarget.getValueType();
                if (newTarget.getDataType() == DataType.SCALAR && currCallParam.getOutput() != null && targetVT != currCallParam.getOutput().getValueType() && targetVT != ValueType.STRING) {
                    currCallParam = new BuiltinFunctionExpression(BuiltinFunctionExpression.getValueTypeCastOperator(targetVT), new Expression[] { currCallParam }, newTarget.getFilename(), newTarget.getBeginLine(), newTarget.getBeginColumn(), newTarget.getEndLine(), newTarget.getEndColumn());
                }
                // create the assignment statement to bind the call parameter to formal parameter
                AssignmentStatement binding = new AssignmentStatement(newTarget, currCallParam, newTarget.getBeginLine(), newTarget.getBeginColumn(), newTarget.getEndLine(), newTarget.getEndColumn());
                newStatements.add(binding);
            }
            for (Statement stmt : sblock._statements) {
                // rewrite the statement to use the "rewritten" name
                Statement rewrittenStmt = stmt.rewriteStatement(prefix);
                newStatements.add(rewrittenStmt);
            }
            if (current instanceof AssignmentStatement) {
                if (fstmt.getOutputParams().size() == 0) {
                    AssignmentStatement as = (AssignmentStatement) current;
                    if ((as.getTargetList().size() == 1) && (as.getTargetList().get(0) != null)) {
                        raiseValidateError("Function '" + fcall.getName() + "' does not return a value but is assigned to " + as.getTargetList().get(0), true);
                    }
                }
            } else if (current instanceof MultiAssignmentStatement) {
                if (fstmt.getOutputParams().size() == 0) {
                    MultiAssignmentStatement mas = (MultiAssignmentStatement) current;
                    raiseValidateError("Function '" + fcall.getName() + "' does not return a value but is assigned to " + mas.getTargetList(), true);
                }
            }
            // handle the return values
            for (int i = 0; i < fstmt.getOutputParams().size(); i++) {
                // get the target (return parameter from function)
                DataIdentifier currReturnParam = fstmt.getOutputParams().get(i);
                String newSourceName = prefix + currReturnParam.getName();
                DataIdentifier newSource = new DataIdentifier(currReturnParam);
                newSource.setName(newSourceName);
                // get binding
                DataIdentifier newTarget = null;
                if (current instanceof AssignmentStatement) {
                    if (i > 0) {
                        fstmt.raiseValidateError("Assignment statement cannot return multiple values", false);
                    }
                    AssignmentStatement as = (AssignmentStatement) current;
                    DataIdentifier targ = as.getTarget();
                    if (targ == null) {
                        Expression exp = as.getSource();
                        FunctionCallIdentifier fci = (FunctionCallIdentifier) exp;
                        String functionName = fci.getName();
                        fstmt.raiseValidateError(functionName + " requires LHS value", false);
                    } else {
                        newTarget = new DataIdentifier(((AssignmentStatement) current).getTarget());
                    }
                } else {
                    newTarget = new DataIdentifier(((MultiAssignmentStatement) current).getTargetList().get(i));
                }
                //auto casting of inputs on inlining (always, redundant cast removed during Hop Rewrites)
                ValueType sourceVT = newSource.getValueType();
                if (newSource.getDataType() == DataType.SCALAR && sourceVT != ValueType.STRING) {
                    newSource = new BuiltinFunctionExpression(BuiltinFunctionExpression.getValueTypeCastOperator(sourceVT), new Expression[] { newSource }, newTarget.getFilename(), newTarget.getBeginLine(), newTarget.getBeginColumn(), newTarget.getEndLine(), newTarget.getEndColumn());
                }
                // create the assignment statement to bind the call parameter to formal parameter
                AssignmentStatement binding = new AssignmentStatement(newTarget, newSource, newTarget.getBeginLine(), newTarget.getBeginColumn(), newTarget.getEndLine(), newTarget.getEndColumn());
                newStatements.add(binding);
            }
        } else // end if (isRewritableFunctionCall(current, dmlProg)
        {
            newStatements.add(current);
        }
    }
    return newStatements;
}
Also used : ValueType(org.apache.sysml.parser.Expression.ValueType) ArrayList(java.util.ArrayList)

Example 3 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class ResultMergeRemoteSpark method executeParallelMerge.

@Override
public MatrixObject executeParallelMerge(int par) throws DMLRuntimeException {
    //always create new matrix object (required for nested parallelism)
    MatrixObject moNew = null;
    LOG.trace("ResultMerge (remote, spark): Execute serial merge for output " + _output.getVarName() + " (fname=" + _output.getFileName() + ")");
    try {
        if (_inputs != null && _inputs.length > 0) {
            //prepare compare
            MatrixFormatMetaData metadata = (MatrixFormatMetaData) _output.getMetaData();
            MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
            MatrixObject compare = (mcOld.getNonZeros() == 0) ? null : _output;
            //actual merge
            RDDObject ro = executeMerge(compare, _inputs, _output.getVarName(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            //create new output matrix (e.g., to prevent potential export<->read file access conflict
            String varName = _output.getVarName();
            ValueType vt = _output.getValueType();
            moNew = new MatrixObject(vt, _outputFName);
            moNew.setVarName(varName.contains(NAME_SUFFIX) ? varName : varName + NAME_SUFFIX);
            moNew.setDataType(DataType.MATRIX);
            OutputInfo oiOld = metadata.getOutputInfo();
            InputInfo iiOld = metadata.getInputInfo();
            MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            mc.setNonZeros(computeNonZeros(_output, convertToList(_inputs)));
            MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, oiOld, iiOld);
            moNew.setMetaData(meta);
            moNew.setRDDHandle(ro);
        } else {
            //return old matrix, to prevent copy
            moNew = _output;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ValueType(org.apache.sysml.parser.Expression.ValueType) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 4 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameReaderTextCSV method readFrameFromHDFS.

@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    FileInputFormat.addInputPath(job, path);
    //check existence and non-empty file
    checkValidInputFile(fs, path);
    //compute size if necessary
    if (rlen <= 0 || clen <= 0) {
        Pair<Integer, Integer> size = computeCSVSize(path, job, fs);
        rlen = size.getKey();
        clen = size.getValue();
    }
    //allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    //core read (sequential/parallel) 
    readCSVFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Example 5 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameReaderTextCSV method readFrameFromInputStream.

@Override
public FrameBlock readFrameFromInputStream(InputStream is, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    //allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    //core read (sequential/parallel) 
    InputStreamInputFormat informat = new InputStreamInputFormat(is);
    InputSplit split = informat.getSplits(null, 1)[0];
    readCSVFrameFromInputSplit(split, informat, null, ret, schema, names, rlen, clen, 0, true);
    return ret;
}
Also used : InputStreamInputFormat(org.apache.sysml.runtime.util.InputStreamInputFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

ValueType (org.apache.sysml.parser.Expression.ValueType)55 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)21 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)21 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)19 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)13 DataType (org.apache.sysml.parser.Expression.DataType)11 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)7 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)7 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 LongWritable (org.apache.hadoop.io.LongWritable)5 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)5 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)5 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)5 Text (org.apache.hadoop.io.Text)4 Row (org.apache.spark.sql.Row)4 StructType (org.apache.spark.sql.types.StructType)4 StructField (org.apache.spark.sql.types.StructField)3