Search in sources :

Example 11 with Matrix

use of org.apache.sysml.udf.Matrix in project incubator-systemml by apache.

the class RemoveEmptyRows method execute.

@Override
public void execute() {
    Matrix mat = (Matrix) this.getFunctionInput(0);
    String fnameOld = mat.getFilePath();
    // old,new rowID
    HashMap<Long, Long> keyMap = new HashMap<>();
    try {
        // prepare input
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fnameOld);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        if (!fs.exists(path))
            throw new IOException("File " + fnameOld + " does not exist on HDFS.");
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);
        // prepare output
        String fnameNew = createOutputFilePathAndName(OUTPUT_FILE);
        DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream(fnameNew, true);
        // read and write if necessary
        InputSplit[] splits = informat.getSplits(job, 1);
        LongWritable key = new LongWritable();
        Text value = new Text();
        long ID = 1;
        try {
            // for obj reuse and preventing repeated buffer re-allocations
            StringBuilder sb = new StringBuilder();
            for (InputSplit split : splits) {
                RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
                try {
                    while (reader.next(key, value)) {
                        String cellStr = value.toString().trim();
                        StringTokenizer st = new StringTokenizer(cellStr, " ");
                        long row = Integer.parseInt(st.nextToken());
                        long col = Integer.parseInt(st.nextToken());
                        double lvalue = Double.parseDouble(st.nextToken());
                        if (!keyMap.containsKey(row))
                            keyMap.put(row, ID++);
                        long rowNew = keyMap.get(row);
                        sb.append(rowNew);
                        sb.append(' ');
                        sb.append(col);
                        sb.append(' ');
                        sb.append(lvalue);
                        sb.append('\n');
                        ostream.writeBytes(sb.toString());
                        sb.setLength(0);
                    }
                } finally {
                    if (reader != null)
                        reader.close();
                }
            }
            _ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double);
        } finally {
            if (ostream != null)
                ostream.close();
        }
    } catch (Exception ex) {
        throw new RuntimeException("Unable to execute external function.", ex);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) DataOutputStream(java.io.DataOutputStream) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) IOException(java.io.IOException) StringTokenizer(java.util.StringTokenizer) Matrix(org.apache.sysml.udf.Matrix) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 12 with Matrix

use of org.apache.sysml.udf.Matrix in project incubator-systemml by apache.

the class RowClassMeet method execute.

@Override
public void execute() {
    try {
        MatrixBlock A = ((Matrix) getFunctionInput(0)).getMatrixObject().acquireRead();
        MatrixBlock B = ((Matrix) getFunctionInput(1)).getMatrixObject().acquireRead();
        int nr = Math.max(A.getNumRows(), B.getNumRows());
        int nc = Math.max(A.getNumColumns(), B.getNumColumns());
        MatrixBlock C = new MatrixBlock(nr, nc, false).allocateBlock();
        MatrixBlock N = new MatrixBlock(nr, nc, false).allocateBlock();
        double[] dC = C.getDenseBlockValues();
        double[] dN = N.getDenseBlockValues();
        // wrap both A and B into side inputs for efficient sparse access
        SideInput sB = CodegenUtils.createSideInput(B);
        boolean mv = (B.getNumRows() == 1);
        int numCols = Math.min(A.getNumColumns(), B.getNumColumns());
        HashMap<ClassLabel, IntArrayList> classLabelMapping = new HashMap<>();
        for (int i = 0, ai = 0; i < A.getNumRows(); i++, ai += A.getNumColumns()) {
            classLabelMapping.clear();
            sB.reset();
            if (A.isInSparseFormat()) {
                if (A.getSparseBlock() == null || A.getSparseBlock().isEmpty(i))
                    continue;
                int alen = A.getSparseBlock().size(i);
                int apos = A.getSparseBlock().pos(i);
                int[] aix = A.getSparseBlock().indexes(i);
                double[] avals = A.getSparseBlock().values(i);
                for (int k = apos; k < apos + alen; k++) {
                    if (aix[k] >= numCols)
                        break;
                    int bval = (int) sB.getValue(mv ? 0 : i, aix[k]);
                    if (bval != 0) {
                        ClassLabel key = new ClassLabel((int) avals[k], bval);
                        if (!classLabelMapping.containsKey(key))
                            classLabelMapping.put(key, new IntArrayList());
                        classLabelMapping.get(key).appendValue(aix[k]);
                    }
                }
            } else {
                double[] denseBlk = A.getDenseBlockValues();
                if (denseBlk == null)
                    break;
                for (int j = 0; j < numCols; j++) {
                    int aVal = (int) denseBlk[ai + j];
                    int bVal = (int) sB.getValue(mv ? 0 : i, j);
                    if (aVal != 0 && bVal != 0) {
                        ClassLabel key = new ClassLabel(aVal, bVal);
                        if (!classLabelMapping.containsKey(key))
                            classLabelMapping.put(key, new IntArrayList());
                        classLabelMapping.get(key).appendValue(j);
                    }
                }
            }
            int labelID = 1;
            for (Entry<ClassLabel, IntArrayList> entry : classLabelMapping.entrySet()) {
                int nVal = entry.getValue().size();
                int[] list = entry.getValue().extractValues();
                for (int k = 0, off = i * nc; k < nVal; k++) {
                    dN[off + list[k]] = nVal;
                    dC[off + list[k]] = labelID;
                }
                labelID++;
            }
        }
        ((Matrix) getFunctionInput(0)).getMatrixObject().release();
        ((Matrix) getFunctionInput(1)).getMatrixObject().release();
        // prepare outputs
        C.recomputeNonZeros();
        C.examSparsity();
        CMat = new Matrix(createOutputFilePathAndName("TMP"), nr, nc, ValueType.Double);
        CMat.setMatrixDoubleArray(C, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
        N.recomputeNonZeros();
        N.examSparsity();
        NMat = new Matrix(createOutputFilePathAndName("TMP"), nr, nc, ValueType.Double);
        NMat.setMatrixDoubleArray(N, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
    } catch (DMLRuntimeException | IOException e) {
        throw new RuntimeException("Error while executing RowClassMeet", e);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) HashMap(java.util.HashMap) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) Matrix(org.apache.sysml.udf.Matrix) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IntArrayList(org.apache.sysml.runtime.compress.utils.IntArrayList) SideInput(org.apache.sysml.runtime.codegen.SpoofOperator.SideInput)

Example 13 with Matrix

use of org.apache.sysml.udf.Matrix in project incubator-systemml by apache.

the class SGDNesterovUpdate method execute.

@Override
public void execute() {
    try {
        MatrixBlock X = ((Matrix) getFunctionInput(0)).getMatrixObject().acquireRead();
        MatrixBlock dX = ((Matrix) getFunctionInput(1)).getMatrixObject().acquireRead();
        double lr = Double.parseDouble(((Scalar) getFunctionInput(2)).getValue());
        double mu = Double.parseDouble(((Scalar) getFunctionInput(3)).getValue());
        MatrixBlock v = ((Matrix) getFunctionInput(4)).getMatrixObject().acquireRead();
        double lambda = Double.parseDouble(((Scalar) getFunctionInput(5)).getValue());
        // v = mu * v - lr * dX - lr*lambda*X
        updatedV = new Matrix("tmp_" + rand.nextLong(), v.getNumRows(), v.getNumColumns(), ValueType.Double);
        MatrixBlock updatedVMB = allocateDenseMatrixBlock(updatedV);
        double[] updatedVData = updatedVMB.getDenseBlockValues();
        if (isDense(v) && isDense(dX) && isDense(X)) {
            double[] vArr = v.getDenseBlockValues();
            double[] dXArr = dX.getDenseBlockValues();
            double[] XArr = X.getDenseBlockValues();
            int nnz = 0;
            for (int i = 0; i < updatedVData.length; i++) {
                updatedVData[i] = mu * vArr[i] - lr * dXArr[i] - lr * lambda * XArr[i];
                nnz += (updatedVData[i] != 0) ? 1 : 0;
            }
            updatedVMB.setNonZeros(nnz);
        } else {
            multiplyByConstant(v, mu, updatedVData);
            multiplyByConstant(dX, -lr, updatedVData);
            multiplyByConstant(X, -lr * lambda, updatedVData);
            updatedVMB.recomputeNonZeros();
        }
        updatedV.setMatrixDoubleArray(updatedVMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
        // X = X - mu * v_prev + (1 + mu) * v
        updatedX = new Matrix("tmp_" + rand.nextLong(), X.getNumRows(), X.getNumColumns(), ValueType.Double);
        MatrixBlock updatedXMB = allocateDenseMatrixBlock(updatedX);
        double[] updatedXData = updatedXMB.getDenseBlockValues();
        if (isDense(X) && isDense(v)) {
            double[] XArr = X.getDenseBlockValues();
            double[] vPrevArr = v.getDenseBlockValues();
            int nnz = 0;
            double muPlus1 = mu + 1;
            for (int i = 0; i < updatedXData.length; i++) {
                updatedXData[i] = XArr[i] - mu * vPrevArr[i] + muPlus1 * updatedVData[i];
                nnz += (updatedXData[i] != 0) ? 1 : 0;
            }
            updatedXMB.setNonZeros(nnz);
        } else if (isDense(v)) {
            copy(X, updatedXData);
            double[] vPrevArr = v.getDenseBlockValues();
            int nnz = 0;
            double muPlus1 = mu + 1;
            for (int i = 0; i < updatedXData.length; i++) {
                updatedXData[i] += -mu * vPrevArr[i] + muPlus1 * updatedVData[i];
                nnz += (updatedXData[i] != 0) ? 1 : 0;
            }
            updatedXMB.setNonZeros(nnz);
        } else {
            copy(X, updatedXData);
            multiplyByConstant(v, -mu, updatedXData);
            multiplyByConstant(updatedVData, 1 + mu, updatedXData);
            updatedXMB.recomputeNonZeros();
        }
        updatedX.setMatrixDoubleArray(updatedXMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
        ((Matrix) getFunctionInput(0)).getMatrixObject().release();
        ((Matrix) getFunctionInput(1)).getMatrixObject().release();
        ((Matrix) getFunctionInput(4)).getMatrixObject().release();
    } catch (IOException e) {
        throw new RuntimeException("Exception while executing SGDNesterovUpdate", e);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Matrix(org.apache.sysml.udf.Matrix) IOException(java.io.IOException)

Example 14 with Matrix

use of org.apache.sysml.udf.Matrix in project systemml by apache.

the class BinningWrapper method execute.

@Override
public void execute() {
    try {
        // get input parameters (input matrix assumed to be sorted)
        Matrix inM = (Matrix) getFunctionInput(0);
        double[][] col = inM.getMatrixAsDoubleArray();
        int binsize = Integer.parseInt(((Scalar) getFunctionInput(1)).getValue());
        int numbins = Integer.parseInt(((Scalar) getFunctionInput(2)).getValue());
        int nrowX = (int) inM.getNumRows();
        // execute binning (extend bins for duplicates)
        double[] col_bins = new double[numbins + 1];
        int pos_col = 0;
        int bin_id = 0;
        col_bins[0] = col[0][0];
        while (pos_col < nrowX - 1 && bin_id < numbins) {
            // for all bins
            pos_col = (pos_col + binsize >= nrowX) ? nrowX - 1 : pos_col + binsize;
            double end_val = col[pos_col][0];
            col_bins[bin_id + 1] = end_val;
            // pull all duplicates in current bin
            boolean cont = true;
            while (cont && pos_col < nrowX - 1) {
                if (end_val == col[pos_col + 1][0])
                    pos_col++;
                else
                    cont = false;
            }
            bin_id++;
        }
        // prepare results
        int num_bins_defined = bin_id;
        for (int i = 0; i < num_bins_defined; i++) col_bins[i] = (col_bins[i] + col_bins[i + 1]) / 2;
        // create and copy output matrix
        String dir = createOutputFilePathAndName(OUTPUT_FILE);
        _bins = new Matrix(dir, col_bins.length, 1, ValueType.Double);
        _bins.setMatrixDoubleArray(col_bins);
        _defBins = new Scalar(ScalarValueType.Integer, String.valueOf(num_bins_defined));
    } catch (Exception e) {
        throw new RuntimeException("Error executing external order function", e);
    }
}
Also used : Matrix(org.apache.sysml.udf.Matrix) Scalar(org.apache.sysml.udf.Scalar)

Example 15 with Matrix

use of org.apache.sysml.udf.Matrix in project systemml by apache.

the class CumSumProd method allocateOutput.

private void allocateOutput() {
    String dir = createOutputFilePathAndName("TMP");
    ret = new Matrix(dir, numRetRows, numRetCols, ValueType.Double);
    retMB = new MatrixBlock((int) numRetRows, (int) numRetCols, false);
    retMB.allocateDenseBlock();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Matrix(org.apache.sysml.udf.Matrix)

Aggregations

Matrix (org.apache.sysml.udf.Matrix)33 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)19 Scalar (org.apache.sysml.udf.Scalar)14 IOException (java.io.IOException)8 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)8 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 StringTokenizer (java.util.StringTokenizer)4 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)4 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)3 DataOutputStream (java.io.DataOutputStream)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 LongWritable (org.apache.hadoop.io.LongWritable)2 Text (org.apache.hadoop.io.Text)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 JobConf (org.apache.hadoop.mapred.JobConf)2 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)2