Examples with CSVFileFormatProperties - org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties

Example 1 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class ReblockSPInstruction method processMatrixReblockInstruction.

@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) throws DMLRuntimeException {
    MatrixObject mo = sec.getMatrixObject(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        //check jdk version (prevent double.parseDouble contention on <jdk8)
        sec.checkAndRaiseValidationWarningJDKVersion();
        //get the input textcell rdd
        JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        //convert textcell to binary block
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.CSVInputInfo) {
        // HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
        // throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
        CSVReblockSPInstruction csvInstruction = null;
        boolean hasHeader = false;
        String delim = ",";
        boolean fill = false;
        double fillValue = 0;
        if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
            CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
            hasHeader = props.hasHeader();
            delim = props.getDelim();
            fill = props.isFill();
            fillValue = props.getFillValue();
        }
        csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
        csvInstruction.processInstruction(sec);
        return;
    } else if (iinfo == InputInfo.BinaryCellInputInfo) {
        JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        //BINARY BLOCK <- BINARY BLOCK (different sizes)
        JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
        out = RDDAggregateUtils.mergeByKey(out, false);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else {
        throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
    }
}

Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Text(org.apache.hadoop.io.Text) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExtractBlockForBinaryReblock(org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) LongWritable(org.apache.hadoop.io.LongWritable)

Example 2 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class WriteSPInstruction method processMatrixWriteInstruction.

protected void processMatrixWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi) throws DMLRuntimeException, IOException {
    //get input rdd
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    if (oi == OutputInfo.MatrixMarketOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
        //piggyback nnz maintenance on write
        LongAccumulator aNnz = null;
        if (isInputMatrixBlock && !mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        JavaRDD<String> header = null;
        if (oi == OutputInfo.MatrixMarketOutputInfo) {
            ArrayList<String> headerContainer = new ArrayList<String>(1);
            // First output MM header
            String headerStr = "%%MatrixMarket matrix coordinate real general\n" + // output number of rows, number of columns and number of nnz
            mc.getRows() + " " + mc.getCols() + " " + mc.getNonZeros();
            headerContainer.add(headerStr);
            header = sec.getSparkContext().parallelize(headerContainer);
        }
        JavaRDD<String> ijv = RDDConverterUtils.binaryBlockToTextCell(in1, mc);
        if (header != null)
            customSaveTextFile(header.union(ijv), fname, true);
        else
            customSaveTextFile(ijv, fname, false);
        if (isInputMatrixBlock && !mc.nnzKnown())
            mc.setNonZeros(aNnz.value());
    } else if (oi == OutputInfo.CSVOutputInfo) {
        JavaRDD<String> out = null;
        LongAccumulator aNnz = null;
        if (isInputMatrixBlock) {
            //piggyback nnz computation on actual write
            if (!mc.nnzKnown()) {
                aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
                in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
            }
            out = RDDConverterUtils.binaryBlockToCsv(in1, mc, (CSVFileFormatProperties) formatProperties, true);
        } else {
            // This case is applicable when the CSV output from transform() is written out
            // TODO remove once transform over frames supported
            @SuppressWarnings("unchecked") JavaPairRDD<Long, String> rdd = (JavaPairRDD<Long, String>) (sec.getMatrixObject(input1.getName())).getRDDHandle().getRDD();
            out = rdd.values();
            String sep = ",";
            boolean hasHeader = false;
            if (formatProperties != null) {
                sep = ((CSVFileFormatProperties) formatProperties).getDelim();
                hasHeader = ((CSVFileFormatProperties) formatProperties).hasHeader();
            }
            if (hasHeader) {
                StringBuffer buf = new StringBuffer();
                for (int j = 1; j < mc.getCols(); j++) {
                    if (j != 1) {
                        buf.append(sep);
                    }
                    buf.append("C" + j);
                }
                ArrayList<String> headerContainer = new ArrayList<String>(1);
                headerContainer.add(0, buf.toString());
                JavaRDD<String> header = sec.getSparkContext().parallelize(headerContainer);
                out = header.union(out);
            }
        }
        customSaveTextFile(out, fname, false);
        if (isInputMatrixBlock && !mc.nnzKnown())
            mc.setNonZeros((long) aNnz.value().longValue());
    } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
        //piggyback nnz computation on actual write
        LongAccumulator aNnz = null;
        if (!mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        //save binary block rdd on hdfs
        in1.saveAsHadoopFile(fname, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
        if (!mc.nnzKnown())
            mc.setNonZeros((long) aNnz.value().longValue());
    } else {
        //unsupported formats: binarycell (not externalized)
        throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
    }
    // write meta data file
    MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, mc, oi, formatProperties);
}

Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) ComputeBinaryBlockNnzFunction(org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) JavaRDD(org.apache.spark.api.java.JavaRDD) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LongAccumulator(org.apache.spark.util.LongAccumulator) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD)

Example 3 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class WriteSPInstruction method parseInstruction.

public static WriteSPInstruction parseInstruction(String str) throws DMLRuntimeException {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = parts[0];
    if (!opcode.equals("write")) {
        throw new DMLRuntimeException("Unsupported opcode");
    }
    // Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
    if (parts.length != 5 && parts.length != 9) {
        throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
    }
    //SPARK°write°_mVar2·MATRIX·DOUBLE°./src/test/scripts/functions/data/out/B·SCALAR·STRING·true°matrixmarket·SCALAR·STRING·true
    // _mVar2·MATRIX·DOUBLE
    CPOperand in1 = new CPOperand(parts[1]);
    CPOperand in2 = new CPOperand(parts[2]);
    CPOperand in3 = new CPOperand(parts[3]);
    WriteSPInstruction inst = new WriteSPInstruction(in1, in2, in3, opcode, str);
    if (in3.getName().equalsIgnoreCase("csv")) {
        boolean hasHeader = Boolean.parseBoolean(parts[4]);
        String delim = parts[5];
        boolean sparse = Boolean.parseBoolean(parts[6]);
        FileFormatProperties formatProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
        inst.setFormatProperties(formatProperties);
        boolean isInputMB = Boolean.parseBoolean(parts[7]);
        inst.setInputMatrixBlock(isInputMB);
        CPOperand in4 = new CPOperand(parts[8]);
        inst.input4 = in4;
    } else {
        FileFormatProperties ffp = new FileFormatProperties();
        CPOperand in4 = new CPOperand(parts[4]);
        inst.input4 = in4;
        inst.setFormatProperties(ffp);
    }
    return inst;
}

Also used : CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FileFormatProperties(org.apache.sysml.runtime.matrix.data.FileFormatProperties) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 4 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class DataTransform method mrDataTransform.

/**
	 * Main method to create and/or apply transformation metdata using MapReduce.
	 * 
	 * @param jobinst MR job instruction
	 * @param inputs array of input matrices
	 * @param shuffleInst shuffle instructions
	 * @param otherInst other instructions
	 * @param resultIndices byte array of result indices
	 * @param outputs array of output matrices
	 * @param numReducers number of reducers
	 * @param replication ?
	 * @return MR job result
	 * @throws Exception if IOException occurs
	 */
public static JobReturn mrDataTransform(MRJobInstruction jobinst, MatrixObject[] inputs, String shuffleInst, String otherInst, byte[] resultIndices, MatrixObject[] outputs, int numReducers, int replication) throws Exception {
    String[] insts = shuffleInst.split(Instruction.INSTRUCTION_DELIM);
    // Parse transform instruction (the first instruction) to obtain relevant fields
    TransformOperands oprnds = new TransformOperands(insts[0], inputs[0]);
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    // find the first file in alphabetical ordering of part files in directory inputPath 
    String smallestFile = CSVReblockMR.findSmallestFile(job, oprnds.inputPath);
    // find column names
    FileSystem fs = IOUtilFunctions.getFileSystem(smallestFile);
    String headerLine = readHeaderLine(fs, oprnds.inputCSVProperties, smallestFile);
    HashMap<String, Integer> colNamesToIds = processColumnNames(fs, oprnds.inputCSVProperties, headerLine, smallestFile);
    String outHeader = getOutputHeader(fs, headerLine, oprnds);
    int numColumns = colNamesToIds.size();
    int numColumnsTf = 0;
    long numRowsTf = 0;
    ArrayList<Integer> csvoutputs = new ArrayList<Integer>();
    ArrayList<Integer> bboutputs = new ArrayList<Integer>();
    // divide output objects based on output format (CSV or BinaryBlock)
    for (int i = 0; i < outputs.length; i++) {
        if (outputs[i].getFileFormatProperties() != null && outputs[i].getFileFormatProperties().getFileFormat() == FileFormatProperties.FileFormat.CSV)
            csvoutputs.add(i);
        else
            bboutputs.add(i);
    }
    boolean isCSV = (csvoutputs.size() > 0);
    boolean isBB = (bboutputs.size() > 0);
    String tmpPath = MRJobConfiguration.constructTempOutputFilename();
    checkIfOutputOverlapsWithTxMtd(outputs, oprnds, isCSV, isBB, csvoutputs, bboutputs, fs);
    JobReturn retCSV = null, retBB = null;
    if (!oprnds.isApply) {
        // build specification file with column IDs insteadof column names
        String specWithIDs = processSpecFile(fs, oprnds.inputPath, smallestFile, colNamesToIds, oprnds.inputCSVProperties, oprnds.spec);
        // enable GC on colNamesToIds
        colNamesToIds = null;
        // Build transformation metadata, including recode maps, bin definitions, etc.
        // Also, generate part offsets file (counters file), which is to be used in csv-reblock
        String partOffsetsFile = MRJobConfiguration.constructTempOutputFilename();
        numRowsTf = GenTfMtdMR.runJob(oprnds.inputPath, oprnds.txMtdPath, specWithIDs, smallestFile, partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
        if (numRowsTf == 0)
            throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
        // store the specFileWithIDs as transformation metadata
        MapReduceTool.writeStringToHDFS(specWithIDs, oprnds.txMtdPath + "/" + "spec.json");
        numColumnsTf = getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
        // Apply transformation metadata, and perform actual transformation 
        if (isCSV)
            retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
        if (isBB) {
            DMLConfig conf = ConfigurationManager.getDMLConfig();
            int blockSize = conf.getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
            CSVReblockInstruction rblk = prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
            AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { blockSize }, new int[] { blockSize }, rblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
            if (ret1.rlens[0] == 0)
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numRowsTf, numColumns, numColumnsTf, replication, outHeader);
        }
        MapReduceTool.deleteFileIfExistOnHDFS(new Path(partOffsetsFile), job);
    } else {
        // enable GC on colNamesToIds
        colNamesToIds = null;
        // copy given transform metadata (applyTxPath) to specified location (txMtdPath)
        MapReduceTool.deleteFileIfExistOnHDFS(new Path(oprnds.txMtdPath), job);
        MapReduceTool.copyFileOnHDFS(oprnds.applyTxPath, oprnds.txMtdPath);
        // path to specification file
        String specWithIDs = (oprnds.spec != null) ? oprnds.spec : MapReduceTool.readStringFromHDFSFile(oprnds.txMtdPath + "/" + "spec.json");
        numColumnsTf = getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
        if (isCSV) {
            DMLConfig conf = ConfigurationManager.getDMLConfig();
            int blockSize = conf.getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
            CSVReblockInstruction rblk = prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
            AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { blockSize }, new int[] { blockSize }, rblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
            numRowsTf = ret1.rlens[0];
            if (ret1.rlens[0] == 0)
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            // Apply transformation metadata, and perform actual transformation 
            retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.applyTxPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numColumns, replication, outHeader);
        }
        if (isBB) {
            // compute part offsets file
            CSVReblockInstruction rblk = (CSVReblockInstruction) InstructionParser.parseSingleInstruction(insts[1]);
            CSVReblockInstruction newrblk = (CSVReblockInstruction) rblk.clone((byte) 0);
            AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { newrblk.brlen }, new int[] { newrblk.bclen }, newrblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
            numRowsTf = ret1.rlens[0];
            if (ret1.rlens[0] == 0)
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            // apply transformation metadata, as well as reblock the resulting data
            retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, ret1.rlens[0], ret1.clens[0], numColumnsTf, replication, outHeader);
        }
    }
    // copy auxiliary data (old and new header lines) from temporary location to txMtdPath
    moveFilesFromTmp(fs, tmpPath, oprnds.txMtdPath);
    // generate matrix metadata file for outputs
    if (retCSV != null) {
        retCSV.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
        CSVFileFormatProperties prop = new CSVFileFormatProperties(false, // use the same header as the input
        oprnds.inputCSVProperties.getDelim(), false, Double.NaN, null);
        MapReduceTool.writeMetaDataFile(outputs[csvoutputs.get(0)].getFileName() + ".mtd", ValueType.DOUBLE, retCSV.getMatrixCharacteristics(0), OutputInfo.CSVOutputInfo, prop);
        return retCSV;
    }
    if (retBB != null) {
        retBB.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
        MapReduceTool.writeMetaDataFile(outputs[bboutputs.get(0)].getFileName() + ".mtd", ValueType.DOUBLE, retBB.getMatrixCharacteristics(0), OutputInfo.BinaryBlockOutputInfo);
        return retBB;
    }
    return null;
}

Also used : AssignRowIDMRReturn(org.apache.sysml.runtime.matrix.CSVReblockMR.AssignRowIDMRReturn) Path(org.apache.hadoop.fs.Path) DMLConfig(org.apache.sysml.conf.DMLConfig) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) ArrayList(java.util.ArrayList) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Example 5 with CSVFileFormatProperties

use of org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties in project incubator-systemml by apache.

the class MatrixReaderFactory method createMatrixReader.

public static MatrixReader createMatrixReader(ReadProperties props) throws DMLRuntimeException {
    //check valid read properties
    if (props == null)
        throw new DMLRuntimeException("Failed to create matrix reader with empty properties.");
    MatrixReader reader = null;
    InputInfo iinfo = props.inputInfo;
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCellParallel(iinfo);
        else
            reader = new ReaderTextCell(iinfo);
    } else if (iinfo == InputInfo.CSVInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCSVParallel(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
        else
            reader = new ReaderTextCSV(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
    } else if (iinfo == InputInfo.BinaryCellInputInfo)
        reader = new ReaderBinaryCell();
    else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderBinaryBlockParallel(props.localFS);
        else
            reader = new ReaderBinaryBlock(props.localFS);
    } else {
        throw new DMLRuntimeException("Failed to create matrix reader for unknown input info: " + InputInfo.inputInfoToString(iinfo));
    }
    return reader;
}

Also used : InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)24 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)12 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)10 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)8 FrameReader (org.apache.sysml.runtime.io.FrameReader)7 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)7 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)5 LongWritable (org.apache.hadoop.io.LongWritable)4 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)4 Text (org.apache.hadoop.io.Text)3 JavaRDD (org.apache.spark.api.java.JavaRDD)3 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)3 BufferedWriter (java.io.BufferedWriter)2 IOException (java.io.IOException)2 OutputStreamWriter (java.io.OutputStreamWriter)2 ArrayList (java.util.ArrayList)2 ValueType (org.apache.sysml.parser.Expression.ValueType)2 LongFrameToLongWritableFrameFunction (org.apache.sysml.runtime.instructions.spark.utils.FrameRDDConverterUtils.LongFrameToLongWritableFrameFunction)2 ReaderBinaryBlock (org.apache.sysml.runtime.io.ReaderBinaryBlock)2