Search in sources :

Example 6 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class MatrixReaderFactory method createMatrixReader.

public static MatrixReader createMatrixReader(InputInfo iinfo) throws DMLRuntimeException {
    MatrixReader reader = null;
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCellParallel(iinfo);
        else
            reader = new ReaderTextCell(iinfo);
    } else if (iinfo == InputInfo.CSVInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCSVParallel(new CSVFileFormatProperties());
        else
            reader = new ReaderTextCSV(new CSVFileFormatProperties());
    } else if (iinfo == InputInfo.BinaryCellInputInfo)
        reader = new ReaderBinaryCell();
    else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderBinaryBlockParallel(false);
        else
            reader = new ReaderBinaryBlock(false);
    } else {
        throw new DMLRuntimeException("Failed to create matrix reader for unknown input info: " + InputInfo.inputInfoToString(iinfo));
    }
    return reader;
}
Also used : CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 7 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class AutomatedTestBase method readRFrameFromHDFS.

protected static FrameBlock readRFrameFromHDFS(String fileName, InputInfo iinfo, MatrixCharacteristics md) throws DMLRuntimeException, IOException {
    //read frame data from hdfs
    String strFrameFileName = baseDirectory + EXPECTED_DIR + fileName;
    CSVFileFormatProperties fprop = new CSVFileFormatProperties();
    fprop.setHeader(true);
    FrameReader reader = FrameReaderFactory.createFrameReader(iinfo, fprop);
    return reader.readFrameFromHDFS(strFrameFileName, md.getRows(), md.getCols());
}
Also used : CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 8 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class WriterTextCSV method writeCSVMatrixToFile.

protected final void writeCSVMatrixToFile(Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int ru, CSVFileFormatProperties props) throws IOException {
    boolean sparse = src.isInSparseFormat();
    int clen = src.getNumColumns();
    //create buffered writer
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
    try {
        //for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();
        props = (props == null) ? new CSVFileFormatProperties() : props;
        String delim = props.getDelim();
        boolean csvsparse = props.isSparse();
        // Write header line, if needed
        if (props.hasHeader() && rl == 0) {
            //write row chunk-wise to prevent OOM on large number of columns
            for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
                for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
                    sb.append("C" + (j + 1));
                    if (j < clen - 1)
                        sb.append(delim);
                }
                br.write(sb.toString());
                sb.setLength(0);
            }
            sb.append('\n');
            br.write(sb.toString());
            sb.setLength(0);
        }
        // Write data lines
        if (//SPARSE
        sparse) {
            SparseBlock sblock = src.getSparseBlock();
            for (int i = rl; i < ru; i++) {
                //write row chunk-wise to prevent OOM on large number of columns
                int prev_jix = -1;
                if (sblock != null && i < sblock.numRows() && !sblock.isEmpty(i)) {
                    int pos = sblock.pos(i);
                    int alen = sblock.size(i);
                    int[] aix = sblock.indexes(i);
                    double[] avals = sblock.values(i);
                    for (int j = pos; j < pos + alen; j++) {
                        int jix = aix[j];
                        // output empty fields, if needed
                        for (int j2 = prev_jix; j2 < jix - 1; j2++) {
                            if (!csvsparse)
                                sb.append('0');
                            sb.append(delim);
                            //flush buffered string
                            if (j2 % BLOCKSIZE_J == 0) {
                                br.write(sb.toString());
                                sb.setLength(0);
                            }
                        }
                        // output the value (non-zero)
                        sb.append(avals[j]);
                        if (jix < clen - 1)
                            sb.append(delim);
                        br.write(sb.toString());
                        sb.setLength(0);
                        //flush buffered string
                        if (jix % BLOCKSIZE_J == 0) {
                            br.write(sb.toString());
                            sb.setLength(0);
                        }
                        prev_jix = jix;
                    }
                }
                // In case of an empty row, output (clen-1) empty fields
                for (int bj = prev_jix + 1; bj < clen; bj += BLOCKSIZE_J) {
                    for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
                        if (!csvsparse)
                            sb.append('0');
                        if (j < clen - 1)
                            sb.append(delim);
                    }
                    br.write(sb.toString());
                    sb.setLength(0);
                }
                sb.append('\n');
                br.write(sb.toString());
                sb.setLength(0);
            }
        } else //DENSE
        {
            for (int i = rl; i < ru; i++) {
                //write row chunk-wise to prevent OOM on large number of columns
                for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
                    for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
                        double lvalue = src.getValueDenseUnsafe(i, j);
                        if (//for nnz
                        lvalue != 0)
                            sb.append(lvalue);
                        else if (!csvsparse)
                            sb.append('0');
                        if (j != clen - 1)
                            sb.append(delim);
                    }
                    br.write(sb.toString());
                    sb.setLength(0);
                }
                sb.append('\n');
                //same as append
                br.write(sb.toString());
                sb.setLength(0);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(br);
    }
}
Also used : CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) OutputStreamWriter(java.io.OutputStreamWriter) SparseBlock(org.apache.sysml.runtime.matrix.data.SparseBlock) BufferedWriter(java.io.BufferedWriter)

Example 9 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class MapReduceTool method metaDataToString.

public static String metaDataToString(ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException {
    // maintain order in output file
    OrderedJSONObject mtd = new OrderedJSONObject();
    //handle data type and value types (incl schema for frames)
    mtd.put(DataExpression.DATATYPEPARAM, dt.toString().toLowerCase());
    if (schema == null) {
        mtd.put(DataExpression.VALUETYPEPARAM, vt.toString().toLowerCase());
    } else {
        StringBuffer schemaSB = new StringBuffer();
        for (int i = 0; i < schema.length; i++) {
            if (schema[i] == ValueType.UNKNOWN)
                schemaSB.append("*");
            else
                schemaSB.append(schema[i].toString());
            schemaSB.append(DataExpression.DEFAULT_DELIM_DELIMITER);
        }
        mtd.put(DataExpression.SCHEMAPARAM, schemaSB.toString());
    }
    //handle output dimensions
    if (!dt.isScalar()) {
        mtd.put(DataExpression.READROWPARAM, mc.getRows());
        mtd.put(DataExpression.READCOLPARAM, mc.getCols());
        // handle output nnz and binary block configuration
        if (dt.isMatrix()) {
            if (outinfo == OutputInfo.BinaryBlockOutputInfo) {
                mtd.put(DataExpression.ROWBLOCKCOUNTPARAM, mc.getRowsPerBlock());
                mtd.put(DataExpression.COLUMNBLOCKCOUNTPARAM, mc.getColsPerBlock());
            }
            mtd.put(DataExpression.READNUMNONZEROPARAM, mc.getNonZeros());
        }
    }
    //handle format type and additional arguments	
    mtd.put(DataExpression.FORMAT_TYPE, OutputInfo.outputInfoToStringExternal(outinfo));
    if (outinfo == OutputInfo.CSVOutputInfo) {
        CSVFileFormatProperties csvProperties = (formatProperties == null) ? new CSVFileFormatProperties() : (CSVFileFormatProperties) formatProperties;
        mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, csvProperties.hasHeader());
        mtd.put(DataExpression.DELIM_DELIMITER, csvProperties.getDelim());
    }
    if (formatProperties != null) {
        String description = formatProperties.getDescription();
        if (StringUtils.isNotEmpty(description)) {
            String jsonDescription = StringEscapeUtils.escapeJson(description);
            mtd.put(DataExpression.DESCRIPTIONPARAM, jsonDescription);
        }
    }
    String userName = System.getProperty("user.name");
    if (StringUtils.isNotEmpty(userName)) {
        mtd.put(DataExpression.AUTHORPARAM, userName);
    } else {
        mtd.put(DataExpression.AUTHORPARAM, "SystemML");
    }
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z");
    mtd.put(DataExpression.CREATEDPARAM, sdf.format(new Date()));
    // indent with 4 spaces	
    return mtd.toString(4);
}
Also used : CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) OrderedJSONObject(org.apache.wink.json4j.OrderedJSONObject) SimpleDateFormat(java.text.SimpleDateFormat) Date(java.util.Date)

Example 10 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class FrameReadWriteTest method runFrameReadWriteTest.

/**
	 * 
	 * @param sparseM1
	 * @param sparseM2
	 * @param instType
	 */
private void runFrameReadWriteTest(OutputInfo oinfo, ValueType[] schema1, ValueType[] schema2, boolean parallel) {
    boolean oldParText = CompilerConfig.FLAG_PARREADWRITE_TEXT;
    boolean oldParBin = CompilerConfig.FLAG_PARREADWRITE_BINARY;
    try {
        CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;
        CompilerConfig.FLAG_PARREADWRITE_BINARY = parallel;
        ConfigurationManager.setGlobalConfig(new CompilerConfig());
        //data generation
        double[][] A = getRandomMatrix(rows, schema1.length, -10, 10, 0.9, 2373);
        double[][] B = getRandomMatrix(rows, schema2.length, -10, 10, 0.9, 129);
        //Initialize the frame data.
        //init data frame 1
        FrameBlock frame1 = new FrameBlock(schema1);
        initFrameData(frame1, A, schema1);
        //init data frame 2
        FrameBlock frame2 = new FrameBlock(schema2);
        initFrameData(frame2, B, schema2);
        //Write frame data to disk
        CSVFileFormatProperties fprop = new CSVFileFormatProperties();
        fprop.setDelim(DELIMITER);
        fprop.setHeader(HEADER);
        writeAndVerifyData(oinfo, frame1, frame2, fprop);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        CompilerConfig.FLAG_PARREADWRITE_TEXT = oldParText;
        CompilerConfig.FLAG_PARREADWRITE_BINARY = oldParBin;
        ConfigurationManager.setGlobalConfig(new CompilerConfig());
    }
}
Also used : CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) CompilerConfig(org.apache.sysml.conf.CompilerConfig) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Aggregations

CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)24 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)12 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)10 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)8 FrameReader (org.apache.sysml.runtime.io.FrameReader)7 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)7 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)5 LongWritable (org.apache.hadoop.io.LongWritable)4 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)4 Text (org.apache.hadoop.io.Text)3 JavaRDD (org.apache.spark.api.java.JavaRDD)3 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)3 BufferedWriter (java.io.BufferedWriter)2 IOException (java.io.IOException)2 OutputStreamWriter (java.io.OutputStreamWriter)2 ArrayList (java.util.ArrayList)2 ValueType (org.apache.sysml.parser.Expression.ValueType)2 LongFrameToLongWritableFrameFunction (org.apache.sysml.runtime.instructions.spark.utils.FrameRDDConverterUtils.LongFrameToLongWritableFrameFunction)2 ReaderBinaryBlock (org.apache.sysml.runtime.io.ReaderBinaryBlock)2