Search in sources :

Example 6 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project incubator-systemml by apache.

the class ReaderTextCell method readRawTextCellMatrixFromInputStream.

private static void readRawTextCellMatrixFromInputStream(InputStream is, MatrixBlock dest, long rlen, long clen, int brlen, int bclen, boolean matrixMarket) throws IOException {
    BufferedReader br = new BufferedReader(new InputStreamReader(is));
    boolean sparse = dest.isInSparseFormat();
    String value = null;
    int row = -1;
    int col = -1;
    // Read the header lines, if reading from a matrixMarket file
    if (matrixMarket) {
        // header line
        value = br.readLine();
        if (value == null || !value.startsWith("%%")) {
            throw new IOException("Error while reading file in MatrixMarket format. Expecting a header line, but encountered, \"" + value + "\".");
        }
        // skip until end-of-comments
        while ((value = br.readLine()) != null && value.charAt(0) == '%') {
        // do nothing just skip comments
        }
        // the first line after comments is the one w/ matrix dimensions
        // validate (rlen clen nnz)
        String[] fields = value.trim().split("\\s+");
        long mm_rlen = Long.parseLong(fields[0]);
        long mm_clen = Long.parseLong(fields[1]);
        if (rlen != mm_rlen || clen != mm_clen) {
            throw new IOException("Unexpected matrix dimensions while reading file in MatrixMarket format. Expecting dimensions [" + rlen + " rows, " + clen + " cols] but encountered [" + mm_rlen + " rows, " + mm_clen + "cols].");
        }
    }
    try {
        FastStringTokenizer st = new FastStringTokenizer(' ');
        if (// SPARSE<-value
        sparse) {
            while ((value = br.readLine()) != null) {
                // reinit tokenizer
                st.reset(value);
                row = st.nextInt() - 1;
                col = st.nextInt() - 1;
                if (row == -1 || col == -1)
                    continue;
                double lvalue = st.nextDouble();
                dest.appendValue(row, col, lvalue);
            }
            dest.sortSparseRows();
        } else // DENSE<-value
        {
            DenseBlock a = dest.getDenseBlock();
            while ((value = br.readLine()) != null) {
                // reinit tokenizer
                st.reset(value);
                row = st.nextInt() - 1;
                col = st.nextInt() - 1;
                if (row == -1 || col == -1)
                    continue;
                double lvalue = st.nextDouble();
                a.set(row, col, lvalue);
            }
        }
    } catch (Exception ex) {
        // post-mortem error handling and bounds checking
        if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen)
            throw new IOException("Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].", ex);
        else
            throw new IOException("Unable to read matrix in raw text cell format.", ex);
    } finally {
        IOUtilFunctions.closeSilently(br);
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Example 7 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project systemml by apache.

the class ResultMergeLocalFile method createTextCellStagingFile.

private static void createTextCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(mo.getFileName());
    FileInputFormat.addInputPath(job, path);
    TextInputFormat informat = new TextInputFormat();
    informat.configure(job);
    InputSplit[] splits = informat.getSplits(job, 1);
    LinkedList<Cell> buffer = new LinkedList<>();
    LongWritable key = new LongWritable();
    Text value = new Text();
    MatrixCharacteristics mc = mo.getMatrixCharacteristics();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    // long row = -1, col = -1; //FIXME needs reconsideration whenever textcell is used actively
    // NOTE MB: Originally, we used long row, col but this led reproducibly to JIT compilation
    // errors during runtime; experienced under WINDOWS, Intel x86-64, IBM JDK 64bit/32bit.
    // It works fine with int row, col but we require long for larger matrices.
    // Since, textcell is never used for result merge (hybrid/hadoop: binaryblock, singlenode:binarycell)
    // we just propose the to exclude it with -Xjit:exclude={package.method*}(count=0,optLevel=0)
    FastStringTokenizer st = new FastStringTokenizer(' ');
    for (InputSplit split : splits) {
        RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
        try {
            while (reader.next(key, value)) {
                // reset tokenizer
                st.reset(value.toString());
                long row = st.nextLong();
                long col = st.nextLong();
                double lvalue = Double.parseDouble(st.nextToken());
                Cell tmp = new Cell(row, col, lvalue);
                buffer.addLast(tmp);
                if (// periodic flush
                buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
                    appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                    buffer.clear();
                }
            }
            // final flush
            if (!buffer.isEmpty()) {
                appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                buffer.clear();
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) LinkedList(java.util.LinkedList) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell)

Example 8 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project systemml by apache.

the class StagingFileUtils method nextKeyMap.

public static void nextKeyMap(BufferedReader in, HashMap<Integer, HashMap<Long, Long>> map, int bi, int blen) throws NumberFormatException, IOException {
    String value = null;
    FastStringTokenizer st = new FastStringTokenizer(' ');
    while ((value = in.readLine()) != null) {
        // reset tokenizer
        st.reset(value);
        long row1 = st.nextLong();
        long row2 = st.nextLong();
        int id = (int) row1 / blen;
        if (!map.containsKey(id))
            map.put(id, new HashMap<Long, Long>());
        map.get(id).put(row1, row2);
        if (id > bi)
            break;
    }
}
Also used : FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) HashMap(java.util.HashMap)

Example 9 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project systemml by apache.

the class StagingFileUtils method readCellList2BlockFromLocal.

public static MatrixBlock readCellList2BlockFromLocal(String fname, int brlen, int bclen, boolean sparse) throws IOException, DMLRuntimeException {
    MatrixBlock tmp = new MatrixBlock(brlen, bclen, sparse);
    if (!sparse)
        tmp.allocateDenseBlockUnsafe(brlen, bclen);
    FileInputStream fis = new FileInputStream(fname);
    BufferedReader in = new BufferedReader(new InputStreamReader(fis));
    FastStringTokenizer st = new FastStringTokenizer(' ');
    try {
        String value = null;
        if (sparse) {
            while ((value = in.readLine()) != null) {
                // reset tokenizer
                st.reset(value);
                int row = st.nextInt();
                int col = st.nextInt();
                double lvalue = st.nextDouble();
                tmp.quickSetValue(row, col, lvalue);
            }
        } else {
            DenseBlock a = tmp.getDenseBlock();
            while ((value = in.readLine()) != null) {
                // reset tokenizer
                st.reset(value);
                int row = st.nextInt();
                int col = st.nextInt();
                double lvalue = st.nextDouble();
                a.set(row, col, lvalue);
            }
            tmp.recomputeNonZeros();
        }
    } finally {
        IOUtilFunctions.closeSilently(in);
    }
    // finally change internal representation if required
    tmp.examSparsity();
    return tmp;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileInputStream(java.io.FileInputStream)

Example 10 with FastStringTokenizer

use of org.apache.sysml.runtime.util.FastStringTokenizer in project systemml by apache.

the class ReaderTextCell method readRawTextCellMatrixFromInputStream.

private static void readRawTextCellMatrixFromInputStream(InputStream is, MatrixBlock dest, long rlen, long clen, int brlen, int bclen, boolean matrixMarket) throws IOException {
    BufferedReader br = new BufferedReader(new InputStreamReader(is));
    boolean sparse = dest.isInSparseFormat();
    String value = null;
    int row = -1;
    int col = -1;
    // Read the header lines, if reading from a matrixMarket file
    if (matrixMarket) {
        // header line
        value = br.readLine();
        if (value == null || !value.startsWith("%%")) {
            throw new IOException("Error while reading file in MatrixMarket format. Expecting a header line, but encountered, \"" + value + "\".");
        }
        // skip until end-of-comments
        while ((value = br.readLine()) != null && value.charAt(0) == '%') {
        // do nothing just skip comments
        }
        // the first line after comments is the one w/ matrix dimensions
        // validate (rlen clen nnz)
        String[] fields = value.trim().split("\\s+");
        long mm_rlen = Long.parseLong(fields[0]);
        long mm_clen = Long.parseLong(fields[1]);
        if (rlen != mm_rlen || clen != mm_clen) {
            throw new IOException("Unexpected matrix dimensions while reading file in MatrixMarket format. Expecting dimensions [" + rlen + " rows, " + clen + " cols] but encountered [" + mm_rlen + " rows, " + mm_clen + "cols].");
        }
    }
    try {
        FastStringTokenizer st = new FastStringTokenizer(' ');
        if (// SPARSE<-value
        sparse) {
            while ((value = br.readLine()) != null) {
                // reinit tokenizer
                st.reset(value);
                row = st.nextInt() - 1;
                col = st.nextInt() - 1;
                if (row == -1 || col == -1)
                    continue;
                double lvalue = st.nextDouble();
                dest.appendValue(row, col, lvalue);
            }
            dest.sortSparseRows();
        } else // DENSE<-value
        {
            DenseBlock a = dest.getDenseBlock();
            while ((value = br.readLine()) != null) {
                // reinit tokenizer
                st.reset(value);
                row = st.nextInt() - 1;
                col = st.nextInt() - 1;
                if (row == -1 || col == -1)
                    continue;
                double lvalue = st.nextDouble();
                a.set(row, col, lvalue);
            }
        }
    } catch (Exception ex) {
        // post-mortem error handling and bounds checking
        if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen)
            throw new IOException("Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].", ex);
        else
            throw new IOException("Unable to read matrix in raw text cell format.", ex);
    } finally {
        IOUtilFunctions.closeSilently(br);
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Aggregations

FastStringTokenizer (org.apache.sysml.runtime.util.FastStringTokenizer)20 IOException (java.io.IOException)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)10 BufferedReader (java.io.BufferedReader)8 InputStreamReader (java.io.InputStreamReader)8 LongWritable (org.apache.hadoop.io.LongWritable)8 Text (org.apache.hadoop.io.Text)8 LinkedList (java.util.LinkedList)6 InputSplit (org.apache.hadoop.mapred.InputSplit)6 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)6 DenseBlock (org.apache.sysml.runtime.matrix.data.DenseBlock)6 FileInputStream (java.io.FileInputStream)4 HashMap (java.util.HashMap)4 Path (org.apache.hadoop.fs.Path)4 JobConf (org.apache.hadoop.mapred.JobConf)4 Cell (org.apache.sysml.runtime.controlprogram.parfor.util.Cell)4 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)4 File (java.io.File)2 SequenceFile (org.apache.hadoop.io.SequenceFile)2 ValueType (org.apache.sysml.parser.Expression.ValueType)2