use of org.apache.sysml.udf.Matrix in project incubator-systemml by apache.
the class RemoveEmptyRows method execute.
@Override
public void execute() {
Matrix mat = (Matrix) this.getFunctionInput(0);
String fnameOld = mat.getFilePath();
// old,new rowID
HashMap<Long, Long> keyMap = new HashMap<>();
try {
// prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
if (!fs.exists(path))
throw new IOException("File " + fnameOld + " does not exist on HDFS.");
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
// prepare output
String fnameNew = createOutputFilePathAndName(OUTPUT_FILE);
DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream(fnameNew, true);
// read and write if necessary
InputSplit[] splits = informat.getSplits(job, 1);
LongWritable key = new LongWritable();
Text value = new Text();
long ID = 1;
try {
// for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
for (InputSplit split : splits) {
RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
try {
while (reader.next(key, value)) {
String cellStr = value.toString().trim();
StringTokenizer st = new StringTokenizer(cellStr, " ");
long row = Integer.parseInt(st.nextToken());
long col = Integer.parseInt(st.nextToken());
double lvalue = Double.parseDouble(st.nextToken());
if (!keyMap.containsKey(row))
keyMap.put(row, ID++);
long rowNew = keyMap.get(row);
sb.append(rowNew);
sb.append(' ');
sb.append(col);
sb.append(' ');
sb.append(lvalue);
sb.append('\n');
ostream.writeBytes(sb.toString());
sb.setLength(0);
}
} finally {
if (reader != null)
reader.close();
}
}
_ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double);
} finally {
if (ostream != null)
ostream.close();
}
} catch (Exception ex) {
throw new RuntimeException("Unable to execute external function.", ex);
}
}
use of org.apache.sysml.udf.Matrix in project incubator-systemml by apache.
the class RowClassMeet method execute.
@Override
public void execute() {
try {
MatrixBlock A = ((Matrix) getFunctionInput(0)).getMatrixObject().acquireRead();
MatrixBlock B = ((Matrix) getFunctionInput(1)).getMatrixObject().acquireRead();
int nr = Math.max(A.getNumRows(), B.getNumRows());
int nc = Math.max(A.getNumColumns(), B.getNumColumns());
MatrixBlock C = new MatrixBlock(nr, nc, false).allocateBlock();
MatrixBlock N = new MatrixBlock(nr, nc, false).allocateBlock();
double[] dC = C.getDenseBlockValues();
double[] dN = N.getDenseBlockValues();
// wrap both A and B into side inputs for efficient sparse access
SideInput sB = CodegenUtils.createSideInput(B);
boolean mv = (B.getNumRows() == 1);
int numCols = Math.min(A.getNumColumns(), B.getNumColumns());
HashMap<ClassLabel, IntArrayList> classLabelMapping = new HashMap<>();
for (int i = 0, ai = 0; i < A.getNumRows(); i++, ai += A.getNumColumns()) {
classLabelMapping.clear();
sB.reset();
if (A.isInSparseFormat()) {
if (A.getSparseBlock() == null || A.getSparseBlock().isEmpty(i))
continue;
int alen = A.getSparseBlock().size(i);
int apos = A.getSparseBlock().pos(i);
int[] aix = A.getSparseBlock().indexes(i);
double[] avals = A.getSparseBlock().values(i);
for (int k = apos; k < apos + alen; k++) {
if (aix[k] >= numCols)
break;
int bval = (int) sB.getValue(mv ? 0 : i, aix[k]);
if (bval != 0) {
ClassLabel key = new ClassLabel((int) avals[k], bval);
if (!classLabelMapping.containsKey(key))
classLabelMapping.put(key, new IntArrayList());
classLabelMapping.get(key).appendValue(aix[k]);
}
}
} else {
double[] denseBlk = A.getDenseBlockValues();
if (denseBlk == null)
break;
for (int j = 0; j < numCols; j++) {
int aVal = (int) denseBlk[ai + j];
int bVal = (int) sB.getValue(mv ? 0 : i, j);
if (aVal != 0 && bVal != 0) {
ClassLabel key = new ClassLabel(aVal, bVal);
if (!classLabelMapping.containsKey(key))
classLabelMapping.put(key, new IntArrayList());
classLabelMapping.get(key).appendValue(j);
}
}
}
int labelID = 1;
for (Entry<ClassLabel, IntArrayList> entry : classLabelMapping.entrySet()) {
int nVal = entry.getValue().size();
int[] list = entry.getValue().extractValues();
for (int k = 0, off = i * nc; k < nVal; k++) {
dN[off + list[k]] = nVal;
dC[off + list[k]] = labelID;
}
labelID++;
}
}
((Matrix) getFunctionInput(0)).getMatrixObject().release();
((Matrix) getFunctionInput(1)).getMatrixObject().release();
// prepare outputs
C.recomputeNonZeros();
C.examSparsity();
CMat = new Matrix(createOutputFilePathAndName("TMP"), nr, nc, ValueType.Double);
CMat.setMatrixDoubleArray(C, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
N.recomputeNonZeros();
N.examSparsity();
NMat = new Matrix(createOutputFilePathAndName("TMP"), nr, nc, ValueType.Double);
NMat.setMatrixDoubleArray(N, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
} catch (DMLRuntimeException | IOException e) {
throw new RuntimeException("Error while executing RowClassMeet", e);
}
}
use of org.apache.sysml.udf.Matrix in project incubator-systemml by apache.
the class SGDNesterovUpdate method execute.
@Override
public void execute() {
try {
MatrixBlock X = ((Matrix) getFunctionInput(0)).getMatrixObject().acquireRead();
MatrixBlock dX = ((Matrix) getFunctionInput(1)).getMatrixObject().acquireRead();
double lr = Double.parseDouble(((Scalar) getFunctionInput(2)).getValue());
double mu = Double.parseDouble(((Scalar) getFunctionInput(3)).getValue());
MatrixBlock v = ((Matrix) getFunctionInput(4)).getMatrixObject().acquireRead();
double lambda = Double.parseDouble(((Scalar) getFunctionInput(5)).getValue());
// v = mu * v - lr * dX - lr*lambda*X
updatedV = new Matrix("tmp_" + rand.nextLong(), v.getNumRows(), v.getNumColumns(), ValueType.Double);
MatrixBlock updatedVMB = allocateDenseMatrixBlock(updatedV);
double[] updatedVData = updatedVMB.getDenseBlockValues();
if (isDense(v) && isDense(dX) && isDense(X)) {
double[] vArr = v.getDenseBlockValues();
double[] dXArr = dX.getDenseBlockValues();
double[] XArr = X.getDenseBlockValues();
int nnz = 0;
for (int i = 0; i < updatedVData.length; i++) {
updatedVData[i] = mu * vArr[i] - lr * dXArr[i] - lr * lambda * XArr[i];
nnz += (updatedVData[i] != 0) ? 1 : 0;
}
updatedVMB.setNonZeros(nnz);
} else {
multiplyByConstant(v, mu, updatedVData);
multiplyByConstant(dX, -lr, updatedVData);
multiplyByConstant(X, -lr * lambda, updatedVData);
updatedVMB.recomputeNonZeros();
}
updatedV.setMatrixDoubleArray(updatedVMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
// X = X - mu * v_prev + (1 + mu) * v
updatedX = new Matrix("tmp_" + rand.nextLong(), X.getNumRows(), X.getNumColumns(), ValueType.Double);
MatrixBlock updatedXMB = allocateDenseMatrixBlock(updatedX);
double[] updatedXData = updatedXMB.getDenseBlockValues();
if (isDense(X) && isDense(v)) {
double[] XArr = X.getDenseBlockValues();
double[] vPrevArr = v.getDenseBlockValues();
int nnz = 0;
double muPlus1 = mu + 1;
for (int i = 0; i < updatedXData.length; i++) {
updatedXData[i] = XArr[i] - mu * vPrevArr[i] + muPlus1 * updatedVData[i];
nnz += (updatedXData[i] != 0) ? 1 : 0;
}
updatedXMB.setNonZeros(nnz);
} else if (isDense(v)) {
copy(X, updatedXData);
double[] vPrevArr = v.getDenseBlockValues();
int nnz = 0;
double muPlus1 = mu + 1;
for (int i = 0; i < updatedXData.length; i++) {
updatedXData[i] += -mu * vPrevArr[i] + muPlus1 * updatedVData[i];
nnz += (updatedXData[i] != 0) ? 1 : 0;
}
updatedXMB.setNonZeros(nnz);
} else {
copy(X, updatedXData);
multiplyByConstant(v, -mu, updatedXData);
multiplyByConstant(updatedVData, 1 + mu, updatedXData);
updatedXMB.recomputeNonZeros();
}
updatedX.setMatrixDoubleArray(updatedXMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
((Matrix) getFunctionInput(0)).getMatrixObject().release();
((Matrix) getFunctionInput(1)).getMatrixObject().release();
((Matrix) getFunctionInput(4)).getMatrixObject().release();
} catch (IOException e) {
throw new RuntimeException("Exception while executing SGDNesterovUpdate", e);
}
}
use of org.apache.sysml.udf.Matrix in project systemml by apache.
the class BinningWrapper method execute.
@Override
public void execute() {
try {
// get input parameters (input matrix assumed to be sorted)
Matrix inM = (Matrix) getFunctionInput(0);
double[][] col = inM.getMatrixAsDoubleArray();
int binsize = Integer.parseInt(((Scalar) getFunctionInput(1)).getValue());
int numbins = Integer.parseInt(((Scalar) getFunctionInput(2)).getValue());
int nrowX = (int) inM.getNumRows();
// execute binning (extend bins for duplicates)
double[] col_bins = new double[numbins + 1];
int pos_col = 0;
int bin_id = 0;
col_bins[0] = col[0][0];
while (pos_col < nrowX - 1 && bin_id < numbins) {
// for all bins
pos_col = (pos_col + binsize >= nrowX) ? nrowX - 1 : pos_col + binsize;
double end_val = col[pos_col][0];
col_bins[bin_id + 1] = end_val;
// pull all duplicates in current bin
boolean cont = true;
while (cont && pos_col < nrowX - 1) {
if (end_val == col[pos_col + 1][0])
pos_col++;
else
cont = false;
}
bin_id++;
}
// prepare results
int num_bins_defined = bin_id;
for (int i = 0; i < num_bins_defined; i++) col_bins[i] = (col_bins[i] + col_bins[i + 1]) / 2;
// create and copy output matrix
String dir = createOutputFilePathAndName(OUTPUT_FILE);
_bins = new Matrix(dir, col_bins.length, 1, ValueType.Double);
_bins.setMatrixDoubleArray(col_bins);
_defBins = new Scalar(ScalarValueType.Integer, String.valueOf(num_bins_defined));
} catch (Exception e) {
throw new RuntimeException("Error executing external order function", e);
}
}
use of org.apache.sysml.udf.Matrix in project systemml by apache.
the class CumSumProd method allocateOutput.
private void allocateOutput() {
String dir = createOutputFilePathAndName("TMP");
ret = new Matrix(dir, numRetRows, numRetCols, ValueType.Double);
retMB = new MatrixBlock((int) numRetRows, (int) numRetCols, false);
retMB.allocateDenseBlock();
}
Aggregations