Search in sources :

Example 21 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.

the class DataPartitionerLocal method writeBinaryCellSequenceFileToHDFS.

@SuppressWarnings("deprecation")
public void writeBinaryCellSequenceFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
    long key = getKeyFromFilePath(lpdir);
    Path path = new Path(dir + "/" + key);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // beware ca 50ms
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    try {
        MatrixIndexes indexes = new MatrixIndexes();
        MatrixCell cell = new MatrixCell();
        String[] fnameBlocks = new File(lpdir).list();
        for (String fnameBlock : fnameBlocks) {
            LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
            for (Cell c : tmp) {
                indexes.setIndexes(c.getRow(), c.getCol());
                cell.setValue(c.getValue());
                writer.append(indexes, cell);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 22 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.

the class ResultMergeLocalFile method mergeBinaryCellWithoutComp.

@SuppressWarnings("deprecation")
private static void mergeBinaryCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO) {
    try {
        // delete target file if already exists
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        if (ALLOW_COPY_CELLFILES) {
            copyAllFiles(fnameNew, inMO);
            // we're done
            return;
        }
        // actual merge
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fnameNew);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // beware ca 50ms
        SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
        MatrixIndexes key = new MatrixIndexes();
        MatrixCell value = new MatrixCell();
        try {
            for (// read/write all inputs
            MatrixObject in : // read/write all inputs
            inMO) {
                if (LOG.isTraceEnabled())
                    LOG.trace("ResultMerge (local, file): Merge input " + in.hashCode() + " (fname=" + in.getFileName() + ") via stream merge");
                JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
                Path tmpPath = new Path(in.getFileName());
                for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, tmpPath)) {
                    SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, tmpJob);
                    try {
                        while (reader.next(key, value)) {
                            out.append(key, value);
                        }
                    } finally {
                        IOUtilFunctions.closeSilently(reader);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException("Unable to merge binary cell results.", ex);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 23 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.

the class CtableSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input rdd handle
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = null;
    double scalar_input2 = -1, scalar_input3 = -1;
    Ctable.OperationTypes ctableOp = Ctable.findCtableOperationByInputDataTypes(input1.getDataType(), input2.getDataType(), input3.getDataType());
    ctableOp = _isExpand ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ctableOp;
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    // First get the block sizes and then set them as -1 to allow for binary cell reblock
    int brlen = mc1.getRowsPerBlock();
    int bclen = mc1.getColsPerBlock();
    JavaPairRDD<MatrixIndexes, ArrayList<MatrixBlock>> inputMBs = null;
    JavaPairRDD<MatrixIndexes, CTableMap> ctables = null;
    JavaPairRDD<MatrixIndexes, Double> bincellsNoFilter = null;
    boolean setLineage2 = false;
    boolean setLineage3 = false;
    switch(ctableOp) {
        case // (VECTOR)
        CTABLE_TRANSFORM:
            // F=ctable(A,B,W)
            in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
            in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
            setLineage2 = true;
            setLineage3 = true;
            inputMBs = in1.cogroup(in2).cogroup(in3).mapToPair(new MapThreeMBIterableIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        case // (VECTOR)
        CTABLE_EXPAND_SCALAR_WEIGHT:
            // F = ctable(seq,A) or F = ctable(seq,B,1)
            scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
            if (scalar_input3 == 1) {
                in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
                setLineage2 = true;
                bincellsNoFilter = in2.flatMapToPair(new ExpandScalarCtableOperation(brlen));
                break;
            }
        case // (VECTOR/MATRIX)
        CTABLE_TRANSFORM_SCALAR_WEIGHT:
            // F = ctable(A,B) or F = ctable(A,B,1)
            in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
            setLineage2 = true;
            scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
            inputMBs = in1.cogroup(in2).mapToPair(new MapTwoMBIterableIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        case // (VECTOR)
        CTABLE_TRANSFORM_HISTOGRAM:
            // F=ctable(A,1) or F = ctable(A,1,1)
            scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
            scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
            inputMBs = in1.mapToPair(new MapMBIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        case // (VECTOR)
        CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
            // F=ctable(A,1,W)
            in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
            setLineage3 = true;
            scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
            inputMBs = in1.cogroup(in3).mapToPair(new MapTwoMBIterableIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        default:
            throw new DMLRuntimeException("Encountered an invalid ctable operation (" + ctableOp + ") while executing instruction: " + this.toString());
    }
    // Now perform aggregation on ctables to get binaryCells
    if (bincellsNoFilter == null && ctables != null) {
        bincellsNoFilter = ctables.values().flatMapToPair(new ExtractBinaryCellsFromCTable());
        bincellsNoFilter = RDDAggregateUtils.sumCellsByKeyStable(bincellsNoFilter);
    } else if (!(bincellsNoFilter != null && ctables == null)) {
        throw new DMLRuntimeException("Incorrect ctable operation");
    }
    // handle known/unknown dimensions
    long outputDim1 = (_dim1Literal ? (long) Double.parseDouble(_outDim1) : (sec.getScalarInput(_outDim1, ValueType.DOUBLE, false)).getLongValue());
    long outputDim2 = (_dim2Literal ? (long) Double.parseDouble(_outDim2) : (sec.getScalarInput(_outDim2, ValueType.DOUBLE, false)).getLongValue());
    MatrixCharacteristics mcBinaryCells = null;
    boolean findDimensions = (outputDim1 == -1 && outputDim2 == -1);
    if (!findDimensions) {
        if ((outputDim1 == -1 && outputDim2 != -1) || (outputDim1 != -1 && outputDim2 == -1))
            throw new DMLRuntimeException("Incorrect output dimensions passed to TernarySPInstruction:" + outputDim1 + " " + outputDim2);
        else
            mcBinaryCells = new MatrixCharacteristics(outputDim1, outputDim2, brlen, bclen);
        // filtering according to given dimensions
        bincellsNoFilter = bincellsNoFilter.filter(new FilterCells(mcBinaryCells.getRows(), mcBinaryCells.getCols()));
    }
    // convert double values to matrix cell
    JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = bincellsNoFilter.mapToPair(new ConvertToBinaryCell());
    // find dimensions if necessary (w/ cache for reblock)
    if (findDimensions) {
        binaryCells = SparkUtils.cacheBinaryCellRDD(binaryCells);
        mcBinaryCells = SparkUtils.computeMatrixCharacteristics(binaryCells);
    }
    // store output rdd handle
    sec.setRDDHandleForVariable(output.getName(), binaryCells);
    mcOut.set(mcBinaryCells);
    // Since we are outputing binary cells, we set block sizes = -1
    mcOut.setRowsPerBlock(-1);
    mcOut.setColsPerBlock(-1);
    sec.addLineageRDD(output.getName(), input1.getName());
    if (setLineage2)
        sec.addLineageRDD(output.getName(), input2.getName());
    if (setLineage3)
        sec.addLineageRDD(output.getName(), input3.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) Ctable(org.apache.sysml.lops.Ctable) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CTableMap(org.apache.sysml.runtime.matrix.data.CTableMap)

Example 24 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.

the class WriterBinaryCell method writeEmptyMatrixToHDFS.

@Override
@SuppressWarnings("deprecation")
public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    SequenceFile.Writer writer = null;
    try {
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
        MatrixIndexes index = new MatrixIndexes(1, 1);
        MatrixCell cell = new MatrixCell(0);
        writer.append(index, cell);
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf)

Example 25 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.

the class WriterBinaryCell method writeBinaryCellMatrixToHDFS.

@SuppressWarnings("deprecation")
protected void writeBinaryCellMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen) throws IOException {
    boolean sparse = src.isInSparseFormat();
    boolean entriesWritten = false;
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    MatrixIndexes indexes = new MatrixIndexes();
    MatrixCell cell = new MatrixCell();
    int rows = src.getNumRows();
    int cols = src.getNumColumns();
    try {
        // bound check per block
        if (rows > rlen || cols > clen) {
            throw new IOException("Matrix block [1:" + rows + ",1:" + cols + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        }
        if (// SPARSE
        sparse) {
            Iterator<IJV> iter = src.getSparseBlockIterator();
            while (iter.hasNext()) {
                IJV lcell = iter.next();
                indexes.setIndexes(lcell.getI() + 1, lcell.getJ() + 1);
                cell.setValue(lcell.getV());
                writer.append(indexes, cell);
                entriesWritten = true;
            }
        } else // DENSE
        {
            for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) {
                double lvalue = src.getValueDenseUnsafe(i, j);
                if (// for nnz
                lvalue != 0) {
                    indexes.setIndexes(i + 1, j + 1);
                    cell.setValue(lvalue);
                    writer.append(indexes, cell);
                    entriesWritten = true;
                }
            }
        }
        // handle empty result
        if (!entriesWritten) {
            writer.append(new MatrixIndexes(1, 1), new MatrixCell(0));
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) IJV(org.apache.sysml.runtime.matrix.data.IJV) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) IOException(java.io.IOException)

Aggregations

MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)35 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)33 SequenceFile (org.apache.hadoop.io.SequenceFile)21 FileSystem (org.apache.hadoop.fs.FileSystem)19 Path (org.apache.hadoop.fs.Path)17 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)17 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)13 IOException (java.io.IOException)12 JobConf (org.apache.hadoop.mapred.JobConf)11 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)11 BufferedWriter (java.io.BufferedWriter)7 File (java.io.File)7 OutputStreamWriter (java.io.OutputStreamWriter)7 ArrayList (java.util.ArrayList)6 RecordReader (org.apache.hadoop.mapred.RecordReader)6 Cell (org.apache.sysml.runtime.controlprogram.parfor.util.Cell)6 IJV (org.apache.sysml.runtime.matrix.data.IJV)5 LinkedList (java.util.LinkedList)4 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)4 CTableMap (org.apache.sysml.runtime.matrix.data.CTableMap)4