Search in sources :

Example 16 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class ColGroupDDC1 method computeRowSums.

public static void computeRowSums(ColGroupDDC1[] grps, MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    // prepare distinct values once
    double[][] vals = new double[grps.length][];
    for (int i = 0; i < grps.length; i++) {
        // pre-aggregate all distinct values (guaranteed <=255)
        vals[i] = grps[i].sumAllValues(kplus, kbuff);
    }
    // cache-conscious row sums operations
    // iterative over codes of all groups and add to output
    // (use kahan plus not general KahanFunction for correctness in case of sqk+)
    // 16KB
    int blksz = 1024;
    double[] tmpAgg = new double[blksz];
    for (int bi = rl; bi < ru; bi += blksz) {
        Arrays.fill(tmpAgg, 0);
        // aggregate all groups
        for (int j = 0; j < grps.length; j++) {
            double[] valsj = vals[j];
            byte[] dataj = grps[j]._data;
            for (int i = bi; i < Math.min(bi + blksz, ru); i++) tmpAgg[i - bi] += valsj[dataj[i] & 0xFF];
        }
        // add partial results of all ddc groups
        for (int i = bi; i < Math.min(bi + blksz, ru); i++) {
            double[] cvals = c.values(i);
            int cix = c.pos(i);
            kbuff.set(cvals[cix], cvals[cix + 1]);
            kplus2.execute2(kbuff, tmpAgg[i - bi]);
            cvals[cix] = kbuff._sum;
            cvals[cix + 1] = kbuff._correction;
        }
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 17 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class ColGroupDDC1 method computeRowSums.

@Override
protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    // pre-aggregate nnz per value tuple
    double[] vals = sumAllValues(kplus, kbuff, false);
    // for correctness in case of sqk+)
    for (int i = rl; i < ru; i++) {
        double[] cvals = c.values(i);
        int cix = c.pos(i);
        kbuff.set(cvals[cix], cvals[cix + 1]);
        kplus2.execute2(kbuff, vals[_data[i] & 0xFF]);
        cvals[cix] = kbuff._sum;
        cvals[cix + 1] = kbuff._correction;
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 18 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class ResultMergeLocalFile method createTextCellResultFile.

private void createTextCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MetaDataFormat metadata, boolean withCompare) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fnameNew);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
    long rlen = mc.getRows();
    long clen = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    try (BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)))) {
        // for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();
        boolean written = false;
        for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
            File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
            File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
            MatrixBlock mb = null;
            long row_offset = (brow - 1) * brlen + 1;
            long col_offset = (bcol - 1) * bclen + 1;
            if (dir.exists()) {
                if (// WITH COMPARE BLOCK
                withCompare && dir2.exists()) {
                    // copy only values that are different from the original
                    String[] lnames2 = dir2.list();
                    if (// there should be exactly 1 compare block
                    lnames2.length != 1)
                        throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
                    mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen);
                    boolean appendOnly = mb.isInSparseFormat();
                    DenseBlock compare = DataConverter.convertToDenseBlock(mb, false);
                    for (String lname : dir.list()) {
                        MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                        mergeWithComp(mb, tmp, compare);
                    }
                    // sort sparse and exam sparsity due to append-only
                    if (appendOnly && !_isAccum)
                        mb.sortSparseRows();
                    // change sparsity if required after
                    mb.examSparsity();
                } else // WITHOUT COMPARE BLOCK
                {
                    // copy all non-zeros from all workers
                    boolean appendOnly = false;
                    for (String lname : dir.list()) {
                        if (mb == null) {
                            mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            appendOnly = mb.isInSparseFormat();
                        } else {
                            MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            mergeWithoutComp(mb, tmp, appendOnly);
                        }
                    }
                    // sort sparse due to append-only
                    if (appendOnly && !_isAccum)
                        mb.sortSparseRows();
                    // change sparsity if required after
                    mb.examSparsity();
                }
            }
            // write the block to text cell
            if (mb != null) {
                if (mb.isInSparseFormat()) {
                    Iterator<IJV> iter = mb.getSparseBlockIterator();
                    while (iter.hasNext()) {
                        IJV lcell = iter.next();
                        sb.append(row_offset + lcell.getI());
                        sb.append(' ');
                        sb.append(col_offset + lcell.getJ());
                        sb.append(' ');
                        sb.append(lcell.getV());
                        sb.append('\n');
                        out.write(sb.toString());
                        sb.setLength(0);
                        written = true;
                    }
                } else {
                    for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) {
                        double lvalue = mb.getValueDenseUnsafe(i, j);
                        if (// for nnz
                        lvalue != 0) {
                            sb.append(row_offset + i);
                            sb.append(' ');
                            sb.append(col_offset + j);
                            sb.append(' ');
                            sb.append(lvalue);
                            sb.append('\n');
                            out.write(sb.toString());
                            sb.setLength(0);
                            written = true;
                        }
                    }
                }
            }
        }
        if (!written)
            out.write(IOUtilFunctions.EMPTY_TEXT_LINE);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) BufferedWriter(java.io.BufferedWriter) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) IJV(org.apache.sysml.runtime.matrix.data.IJV) FileSystem(org.apache.hadoop.fs.FileSystem) Iterator(java.util.Iterator) OutputStreamWriter(java.io.OutputStreamWriter) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 19 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class ResultMergeLocalFile method createBinaryCellResultFile.

@SuppressWarnings("deprecation")
private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MetaDataFormat metadata, boolean withCompare) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fnameNew);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
    long rlen = mc.getRows();
    long clen = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    MatrixIndexes indexes = new MatrixIndexes(1, 1);
    MatrixCell cell = new MatrixCell(0);
    // beware ca 50ms
    SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    try {
        boolean written = false;
        for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
            File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
            File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
            MatrixBlock mb = null;
            long row_offset = (brow - 1) * brlen + 1;
            long col_offset = (bcol - 1) * bclen + 1;
            if (dir.exists()) {
                if (// WITH COMPARE BLOCK
                withCompare && dir2.exists()) {
                    // copy only values that are different from the original
                    String[] lnames2 = dir2.list();
                    if (// there should be exactly 1 compare block
                    lnames2.length != 1)
                        throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
                    mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen);
                    boolean appendOnly = mb.isInSparseFormat();
                    DenseBlock compare = DataConverter.convertToDenseBlock(mb, false);
                    for (String lname : dir.list()) {
                        MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                        mergeWithComp(mb, tmp, compare);
                    }
                    // sort sparse due to append-only
                    if (appendOnly && !_isAccum)
                        mb.sortSparseRows();
                    // change sparsity if required after
                    mb.examSparsity();
                } else // WITHOUT COMPARE BLOCK
                {
                    // copy all non-zeros from all workers
                    boolean appendOnly = false;
                    for (String lname : dir.list()) {
                        if (mb == null) {
                            mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            appendOnly = mb.isInSparseFormat();
                        } else {
                            MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            mergeWithoutComp(mb, tmp, appendOnly);
                        }
                    }
                    // sort sparse due to append-only
                    if (appendOnly && !_isAccum)
                        mb.sortSparseRows();
                    // change sparsity if required after
                    mb.examSparsity();
                }
            }
            // write the block to binary cell
            if (mb != null) {
                if (mb.isInSparseFormat()) {
                    Iterator<IJV> iter = mb.getSparseBlockIterator();
                    while (iter.hasNext()) {
                        IJV lcell = iter.next();
                        indexes.setIndexes(row_offset + lcell.getI(), col_offset + lcell.getJ());
                        cell.setValue(lcell.getV());
                        out.append(indexes, cell);
                        written = true;
                    }
                } else {
                    for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) {
                        double lvalue = mb.getValueDenseUnsafe(i, j);
                        if (// for nnz
                        lvalue != 0) {
                            indexes.setIndexes(row_offset + i, col_offset + j);
                            cell.setValue(lvalue);
                            out.append(indexes, cell);
                            written = true;
                        }
                    }
                }
            }
        }
        if (!written)
            out.append(indexes, cell);
    } finally {
        IOUtilFunctions.closeSilently(out);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) IJV(org.apache.sysml.runtime.matrix.data.IJV) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Iterator(java.util.Iterator) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 20 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class StagingFileUtils method readCellList2BlockFromLocal.

public static MatrixBlock readCellList2BlockFromLocal(String fname, int brlen, int bclen, boolean sparse) throws IOException, DMLRuntimeException {
    MatrixBlock tmp = new MatrixBlock(brlen, bclen, sparse);
    if (!sparse)
        tmp.allocateDenseBlockUnsafe(brlen, bclen);
    FileInputStream fis = new FileInputStream(fname);
    BufferedReader in = new BufferedReader(new InputStreamReader(fis));
    FastStringTokenizer st = new FastStringTokenizer(' ');
    try {
        String value = null;
        if (sparse) {
            while ((value = in.readLine()) != null) {
                // reset tokenizer
                st.reset(value);
                int row = st.nextInt();
                int col = st.nextInt();
                double lvalue = st.nextDouble();
                tmp.quickSetValue(row, col, lvalue);
            }
        } else {
            DenseBlock a = tmp.getDenseBlock();
            while ((value = in.readLine()) != null) {
                // reset tokenizer
                st.reset(value);
                int row = st.nextInt();
                int col = st.nextInt();
                double lvalue = st.nextDouble();
                a.set(row, col, lvalue);
            }
            tmp.recomputeNonZeros();
        }
    } finally {
        IOUtilFunctions.closeSilently(in);
    }
    // finally change internal representation if required
    tmp.examSparsity();
    return tmp;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileInputStream(java.io.FileInputStream)

Aggregations

DenseBlock (org.apache.sysml.runtime.matrix.data.DenseBlock)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)20 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)10 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)10 BufferedReader (java.io.BufferedReader)6 BufferedWriter (java.io.BufferedWriter)6 File (java.io.File)6 InputStreamReader (java.io.InputStreamReader)6 OutputStreamWriter (java.io.OutputStreamWriter)6 ArrayList (java.util.ArrayList)6 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 SequenceFile (org.apache.hadoop.io.SequenceFile)6 JobConf (org.apache.hadoop.mapred.JobConf)6 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 IJV (org.apache.sysml.runtime.matrix.data.IJV)6