Search in sources :

Example 11 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.

the class StagingFileUtils method nextKeyMap.

public static void nextKeyMap(BufferedReader in, HashMap<Integer, HashMap<Long, Long>> map, int bi, int blen) throws NumberFormatException, IOException {
    String value = null;
    FastStringTokenizer st = new FastStringTokenizer(' ');
    while ((value = in.readLine()) != null) {
        // reset tokenizer
        st.reset(value);
        long row1 = st.nextLong();
        long row2 = st.nextLong();
        int id = (int) row1 / blen;
        if (!map.containsKey(id))
            map.put(id, new HashMap<Long, Long>());
        map.get(id).put(row1, row2);
        if (id > bi)
            break;
    }
}
Also used : FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) HashMap(java.util.HashMap)

Example 12 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.

the class StagingFileUtils method nextSizedKeyMap.

public static int nextSizedKeyMap(BufferedReader in, HashMap<Integer, HashMap<Long, Long>> map, int blen, int size) throws NumberFormatException, IOException {
    map.clear();
    String value = null;
    int len = 0;
    FastStringTokenizer st = new FastStringTokenizer(' ');
    while ((value = in.readLine()) != null) {
        // reset tokenizer
        st.reset(value);
        long row1 = st.nextLong();
        long row2 = st.nextLong();
        int id = (int) row1 / blen;
        if (!map.containsKey(id))
            map.put(id, new HashMap<Long, Long>());
        map.get(id).put(row1, row2);
        len++;
        if (len >= size)
            break;
    }
    return len;
}
Also used : FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) HashMap(java.util.HashMap)

Example 13 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.

the class StagingFileUtils method readCellList2BlockFromLocal.

public static MatrixBlock readCellList2BlockFromLocal(String fname, int brlen, int bclen, boolean sparse) throws IOException, DMLRuntimeException {
    MatrixBlock tmp = new MatrixBlock(brlen, bclen, sparse);
    if (!sparse)
        tmp.allocateDenseBlockUnsafe(brlen, bclen);
    FileInputStream fis = new FileInputStream(fname);
    BufferedReader in = new BufferedReader(new InputStreamReader(fis));
    FastStringTokenizer st = new FastStringTokenizer(' ');
    try {
        String value = null;
        if (sparse) {
            while ((value = in.readLine()) != null) {
                // reset tokenizer
                st.reset(value);
                int row = st.nextInt();
                int col = st.nextInt();
                double lvalue = st.nextDouble();
                tmp.quickSetValue(row, col, lvalue);
            }
        } else {
            DenseBlock a = tmp.getDenseBlock();
            while ((value = in.readLine()) != null) {
                // reset tokenizer
                st.reset(value);
                int row = st.nextInt();
                int col = st.nextInt();
                double lvalue = st.nextDouble();
                a.set(row, col, lvalue);
            }
            tmp.recomputeNonZeros();
        }
    } finally {
        IOUtilFunctions.closeSilently(in);
    }
    // finally change internal representation if required
    tmp.examSparsity();
    return tmp;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileInputStream(java.io.FileInputStream)

Example 14 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.

the class ReaderTextCell method readTextCellMatrixFromHDFS.

private static void readTextCellMatrixFromHDFS(Path path, JobConf job, MatrixBlock dest, long rlen, long clen, int brlen, int bclen) throws IOException {
    boolean sparse = dest.isInSparseFormat();
    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);
    InputSplit[] splits = informat.getSplits(job, 1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    int row = -1;
    int col = -1;
    try {
        FastStringTokenizer st = new FastStringTokenizer(' ');
        for (InputSplit split : splits) {
            RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
            try {
                if (// SPARSE<-value
                sparse) {
                    while (reader.next(key, value)) {
                        // reinit tokenizer
                        st.reset(value.toString());
                        row = st.nextInt() - 1;
                        col = st.nextInt() - 1;
                        if (row == -1 || col == -1)
                            continue;
                        double lvalue = st.nextDouble();
                        dest.appendValue(row, col, lvalue);
                    }
                    dest.sortSparseRows();
                } else // DENSE<-value
                {
                    DenseBlock a = dest.getDenseBlock();
                    while (reader.next(key, value)) {
                        // reinit tokenizer
                        st.reset(value.toString());
                        row = st.nextInt() - 1;
                        col = st.nextInt() - 1;
                        if (row == -1 || col == -1)
                            continue;
                        double lvalue = st.nextDouble();
                        a.set(row, col, lvalue);
                    }
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
    } catch (Exception ex) {
        // post-mortem error handling and bounds checking
        if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen)
            throw new IOException("Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        else
            throw new IOException("Unable to read matrix in text cell format.", ex);
    }
}
Also used : Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LongWritable(org.apache.hadoop.io.LongWritable) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 15 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project systemml by apache.

the class StagingFileUtils method nextSizedKeyMap.

public static int nextSizedKeyMap(BufferedReader in, HashMap<Integer, HashMap<Long, Long>> map, int blen, int size) throws NumberFormatException, IOException {
    map.clear();
    String value = null;
    int len = 0;
    FastStringTokenizer st = new FastStringTokenizer(' ');
    while ((value = in.readLine()) != null) {
        // reset tokenizer
        st.reset(value);
        long row1 = st.nextLong();
        long row2 = st.nextLong();
        int id = (int) row1 / blen;
        if (!map.containsKey(id))
            map.put(id, new HashMap<Long, Long>());
        map.get(id).put(row1, row2);
        len++;
        if (len >= size)
            break;
    }
    return len;
}
Also used : FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) HashMap(java.util.HashMap)

Aggregations

FastStringTokenizer (org.apache.sysml.runtime.util.FastStringTokenizer)20 IOException (java.io.IOException)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)10 BufferedReader (java.io.BufferedReader)8 InputStreamReader (java.io.InputStreamReader)8 LongWritable (org.apache.hadoop.io.LongWritable)8 Text (org.apache.hadoop.io.Text)8 LinkedList (java.util.LinkedList)6 InputSplit (org.apache.hadoop.mapred.InputSplit)6 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)6 DenseBlock (org.apache.sysml.runtime.matrix.data.DenseBlock)6 FileInputStream (java.io.FileInputStream)4 HashMap (java.util.HashMap)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 Cell (org.apache.sysml.runtime.controlprogram.parfor.util.Cell)4 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)4 File (java.io.File)2 SequenceFile (org.apache.hadoop.io.SequenceFile)2 ValueType (org.apache.sysml.parser.Expression.ValueType)2