Search in sources :

Example 31 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.

the class ColGroupDDC1 method computeRowSums.

public static void computeRowSums(ColGroupDDC1[] grps, MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    // prepare distinct values once
    double[][] vals = new double[grps.length][];
    for (int i = 0; i < grps.length; i++) {
        // pre-aggregate all distinct values (guaranteed <=255)
        vals[i] = grps[i].sumAllValues(kplus, kbuff);
    }
    // cache-conscious row sums operations
    // iterative over codes of all groups and add to output
    // (use kahan plus not general KahanFunction for correctness in case of sqk+)
    // 16KB
    int blksz = 1024;
    double[] tmpAgg = new double[blksz];
    for (int bi = rl; bi < ru; bi += blksz) {
        Arrays.fill(tmpAgg, 0);
        // aggregate all groups
        for (int j = 0; j < grps.length; j++) {
            double[] valsj = vals[j];
            byte[] dataj = grps[j]._data;
            for (int i = bi; i < Math.min(bi + blksz, ru); i++) tmpAgg[i - bi] += valsj[dataj[i] & 0xFF];
        }
        // add partial results of all ddc groups
        for (int i = bi; i < Math.min(bi + blksz, ru); i++) {
            double[] cvals = c.values(i);
            int cix = c.pos(i);
            kbuff.set(cvals[cix], cvals[cix + 1]);
            kplus2.execute2(kbuff, tmpAgg[i - bi]);
            cvals[cix] = kbuff._sum;
            cvals[cix + 1] = kbuff._correction;
        }
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 32 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.

the class ColGroupOLE method computeRowSums.

@Override
protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
    final int numVals = getNumValues();
    if (ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz) {
        final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
        // step 1: prepare position and value arrays
        int[] apos = skipScan(numVals, rl);
        double[] aval = sumAllValues(kplus, kbuff, false);
        // step 2: cache conscious row sums via horizontal scans
        for (int bi = rl; bi < ru; bi += blksz2) {
            int bimax = Math.min(bi + blksz2, ru);
            // horizontal segment scan, incl pos maintenance
            for (int k = 0; k < numVals; k++) {
                int boff = _ptr[k];
                int blen = len(k);
                double val = aval[k];
                int bix = apos[k];
                for (int ii = bi; ii < bimax && bix < blen; ii += blksz) {
                    // prepare length, start, and end pos
                    int len = _data[boff + bix];
                    int pos = boff + bix + 1;
                    // compute partial results
                    for (int i = 0; i < len; i++) {
                        int rix = ii + _data[pos + i];
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                    bix += len + 1;
                }
                apos[k] = bix;
            }
        }
    } else {
        // iterate over all values and their bitmaps
        for (int k = 0; k < numVals; k++) {
            // prepare value-to-add for entire value bitmap
            int boff = _ptr[k];
            int blen = len(k);
            double val = sumValues(k, kplus, kbuff);
            // iterate over bitmap blocks and add values
            if (val != 0) {
                int slen;
                int bix = skipScanVal(k, rl);
                for (int off = ((rl + 1) / blksz) * blksz; bix < blen && off < ru; bix += slen + 1, off += blksz) {
                    slen = _data[boff + bix];
                    for (int i = 1; i <= slen; i++) {
                        int rix = off + _data[boff + bix + i];
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                }
            }
        }
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 33 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.

the class ReaderTextCSV method readCSVMatrixFromInputStream.

private static long readCSVMatrixFromInputStream(InputStream is, String srcInfo, MatrixBlock dest, MutableInt rowPos, long rlen, long clen, int brlen, int bclen, boolean hasHeader, String delim, boolean fill, double fillValue, boolean first) throws IOException {
    boolean sparse = dest.isInSparseFormat();
    String value = null;
    int row = rowPos.intValue();
    double cellValue = 0;
    long lnnz = 0;
    BufferedReader br = new BufferedReader(new InputStreamReader(is));
    if (first && hasHeader)
        // ignore header
        br.readLine();
    // Read the data
    boolean emptyValuesFound = false;
    try {
        if (// SPARSE<-value
        sparse) {
            while (// foreach line
            (value = br.readLine()) != null) {
                String cellStr = value.toString().trim();
                emptyValuesFound = false;
                String[] parts = IOUtilFunctions.split(cellStr, delim);
                int col = 0;
                for (// foreach cell
                String part : // foreach cell
                parts) {
                    part = part.trim();
                    if (part.isEmpty()) {
                        emptyValuesFound = true;
                        cellValue = fillValue;
                    } else {
                        cellValue = UtilFunctions.parseToDouble(part);
                    }
                    if (cellValue != 0) {
                        dest.appendValue(row, col, cellValue);
                        lnnz++;
                    }
                    col++;
                }
                // sanity checks for empty values and number of columns
                IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(cellStr, fill, emptyValuesFound);
                IOUtilFunctions.checkAndRaiseErrorCSVNumColumns(srcInfo, cellStr, parts, clen);
                row++;
            }
        } else // DENSE<-value
        {
            DenseBlock a = dest.getDenseBlock();
            while ((value = br.readLine()) != null) {
                // foreach line
                String cellStr = value.toString().trim();
                emptyValuesFound = false;
                String[] parts = IOUtilFunctions.split(cellStr, delim);
                int col = 0;
                for (String part : parts) {
                    // foreach cell
                    part = part.trim();
                    if (part.isEmpty()) {
                        emptyValuesFound = true;
                        cellValue = fillValue;
                    } else {
                        cellValue = UtilFunctions.parseToDouble(part);
                    }
                    if (cellValue != 0) {
                        a.set(row, col, cellValue);
                        lnnz++;
                    }
                    col++;
                }
                // sanity checks for empty values and number of columns
                IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(cellStr, fill, emptyValuesFound);
                IOUtilFunctions.checkAndRaiseErrorCSVNumColumns(srcInfo, cellStr, parts, clen);
                row++;
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(br);
    }
    rowPos.setValue(row);
    return lnnz;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader)

Example 34 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.

the class ReaderTextCell method readTextCellMatrixFromHDFS.

private static void readTextCellMatrixFromHDFS(Path path, JobConf job, MatrixBlock dest, long rlen, long clen, int brlen, int bclen) throws IOException {
    boolean sparse = dest.isInSparseFormat();
    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);
    InputSplit[] splits = informat.getSplits(job, 1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    int row = -1;
    int col = -1;
    try {
        FastStringTokenizer st = new FastStringTokenizer(' ');
        for (InputSplit split : splits) {
            RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
            try {
                if (// SPARSE<-value
                sparse) {
                    while (reader.next(key, value)) {
                        // reinit tokenizer
                        st.reset(value.toString());
                        row = st.nextInt() - 1;
                        col = st.nextInt() - 1;
                        if (row == -1 || col == -1)
                            continue;
                        double lvalue = st.nextDouble();
                        dest.appendValue(row, col, lvalue);
                    }
                    dest.sortSparseRows();
                } else // DENSE<-value
                {
                    DenseBlock a = dest.getDenseBlock();
                    while (reader.next(key, value)) {
                        // reinit tokenizer
                        st.reset(value.toString());
                        row = st.nextInt() - 1;
                        col = st.nextInt() - 1;
                        if (row == -1 || col == -1)
                            continue;
                        double lvalue = st.nextDouble();
                        a.set(row, col, lvalue);
                    }
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
    } catch (Exception ex) {
        // post-mortem error handling and bounds checking
        if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen)
            throw new IOException("Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        else
            throw new IOException("Unable to read matrix in text cell format.", ex);
    }
}
Also used : Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LongWritable(org.apache.hadoop.io.LongWritable) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 35 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.

the class DataConverter method convertToDenseBlock.

public static DenseBlock convertToDenseBlock(MatrixBlock mb, boolean deep) {
    int rows = mb.getNumRows();
    int cols = mb.getNumColumns();
    DenseBlock ret = (!mb.isInSparseFormat() && mb.isAllocated() && !deep) ? mb.getDenseBlock() : // 0-initialized
    DenseBlockFactory.createDenseBlock(rows, cols);
    if (!mb.isEmptyBlock(false)) {
        if (mb.isInSparseFormat()) {
            Iterator<IJV> iter = mb.getSparseBlockIterator();
            while (iter.hasNext()) {
                IJV cell = iter.next();
                ret.set(cell.getI(), cell.getJ(), cell.getV());
            }
        } else if (deep) {
            ret.set(mb.getDenseBlock());
        }
    }
    return ret;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) IJV(org.apache.sysml.runtime.matrix.data.IJV)

Aggregations

DenseBlock (org.apache.sysml.runtime.matrix.data.DenseBlock)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)20 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)10 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)10 BufferedReader (java.io.BufferedReader)6 BufferedWriter (java.io.BufferedWriter)6 File (java.io.File)6 InputStreamReader (java.io.InputStreamReader)6 OutputStreamWriter (java.io.OutputStreamWriter)6 ArrayList (java.util.ArrayList)6 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 SequenceFile (org.apache.hadoop.io.SequenceFile)6 JobConf (org.apache.hadoop.mapred.JobConf)6 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 IJV (org.apache.sysml.runtime.matrix.data.IJV)6