Search in sources :

Example 26 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class UaggOuterChainSPInstruction method parseInstruction.

public static UaggOuterChainSPInstruction parseInstruction(String str) throws DMLRuntimeException {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = parts[0];
    if (opcode.equalsIgnoreCase(UAggOuterChain.OPCODE)) {
        AggregateUnaryOperator uaggop = InstructionUtils.parseBasicAggregateUnaryOperator(parts[1]);
        BinaryOperator bop = InstructionUtils.parseBinaryOperator(parts[2]);
        CPOperand in1 = new CPOperand(parts[3]);
        CPOperand in2 = new CPOperand(parts[4]);
        CPOperand out = new CPOperand(parts[5]);
        //derive aggregation operator from unary operator
        String aopcode = InstructionUtils.deriveAggregateOperatorOpcode(parts[1]);
        CorrectionLocationType corrLoc = InstructionUtils.deriveAggregateOperatorCorrectionLocation(parts[1]);
        String corrExists = (corrLoc != CorrectionLocationType.NONE) ? "true" : "false";
        AggregateOperator aop = InstructionUtils.parseAggregateOperator(aopcode, corrExists, corrLoc.toString());
        return new UaggOuterChainSPInstruction(bop, uaggop, aop, in1, in2, out, opcode, str);
    } else {
        throw new DMLRuntimeException("UaggOuterChainSPInstruction.parseInstruction():: Unknown opcode " + opcode);
    }
}
Also used : AggregateUnaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) BinaryOperator(org.apache.sysml.runtime.matrix.operators.BinaryOperator) CorrectionLocationType(org.apache.sysml.lops.PartialAggregate.CorrectionLocationType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 27 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class WriteSPInstruction method parseInstruction.

public static WriteSPInstruction parseInstruction(String str) throws DMLRuntimeException {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = parts[0];
    if (!opcode.equals("write")) {
        throw new DMLRuntimeException("Unsupported opcode");
    }
    // Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
    if (parts.length != 5 && parts.length != 9) {
        throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str);
    }
    //SPARK°write°_mVar2·MATRIX·DOUBLE°./src/test/scripts/functions/data/out/B·SCALAR·STRING·true°matrixmarket·SCALAR·STRING·true
    // _mVar2·MATRIX·DOUBLE
    CPOperand in1 = new CPOperand(parts[1]);
    CPOperand in2 = new CPOperand(parts[2]);
    CPOperand in3 = new CPOperand(parts[3]);
    WriteSPInstruction inst = new WriteSPInstruction(in1, in2, in3, opcode, str);
    if (in3.getName().equalsIgnoreCase("csv")) {
        boolean hasHeader = Boolean.parseBoolean(parts[4]);
        String delim = parts[5];
        boolean sparse = Boolean.parseBoolean(parts[6]);
        FileFormatProperties formatProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
        inst.setFormatProperties(formatProperties);
        boolean isInputMB = Boolean.parseBoolean(parts[7]);
        inst.setInputMatrixBlock(isInputMB);
        CPOperand in4 = new CPOperand(parts[8]);
        inst.input4 = in4;
    } else {
        FileFormatProperties ffp = new FileFormatProperties();
        CPOperand in4 = new CPOperand(parts[4]);
        inst.input4 = in4;
        inst.setFormatProperties(ffp);
    }
    return inst;
}
Also used : CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FileFormatProperties(org.apache.sysml.runtime.matrix.data.FileFormatProperties) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 28 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class LibMatrixReorg method sort.

public static MatrixBlock sort(MatrixBlock in, MatrixBlock out, int by, boolean desc, boolean ixret) throws DMLRuntimeException {
    //meta data gathering and preparation
    boolean sparse = in.isInSparseFormat();
    int rlen = in.rlen;
    int clen = in.clen;
    out.sparse = (in.sparse && !ixret);
    out.nonZeros = ixret ? rlen : in.nonZeros;
    //step 1: error handling
    if (by <= 0 || clen < by)
        throw new DMLRuntimeException("Sort configuration issue: non-existing orderby column: " + by + " (" + rlen + "x" + clen + " input).");
    //step 2: empty block / special case handling
    if (//SORT DATA
    !ixret) {
        if (//EMPTY INPUT BLOCK
        in.isEmptyBlock(false))
            return out;
        if (!sparse && clen == 1) {
            //DENSE COLUMN VECTOR
            //in-place quicksort, unstable (no indexes needed)
            //dense
            out.copy(in);
            Arrays.sort(out.denseBlock);
            if (desc)
                sortReverseDense(out);
            return out;
        }
    } else //SORT INDEX
    {
        if (in.isEmptyBlock(false)) {
            //EMPTY INPUT BLOCK
            out.allocateDenseBlock(false);
            for (//seq(1,n)
            int i = 0; //seq(1,n)
            i < rlen; //seq(1,n)
            i++) out.setValueDenseUnsafe(i, 0, i + 1);
            return out;
        }
    }
    //step 3: index vector sorting
    //create index vector and extract values
    int[] vix = new int[rlen];
    double[] values = new double[rlen];
    for (int i = 0; i < rlen; i++) {
        vix[i] = i;
        values[i] = in.quickGetValue(i, by - 1);
    }
    //sort index vector on extracted data (unstable)
    SortUtils.sortByValue(0, rlen, values, vix);
    //before we ensure stable outputs, hence we also flip values)
    if (desc) {
        sortReverseDense(vix);
        sortReverseDense(values);
    }
    //final pass to ensure stable output
    for (int i = 0; i < rlen - 1; i++) {
        double tmp = values[i];
        //determine run of equal values
        int len = 0;
        while (i + len + 1 < rlen && tmp == values[i + len + 1]) len++;
        //unstable sort of run indexes (equal value guaranteed)
        if (len > 0) {
            Arrays.sort(vix, i, i + len + 1);
            //skip processed run
            i += len;
        }
    }
    //step 4: create output matrix (guaranteed non-empty, see step 2)
    if (!ixret) {
        //copy input data in sorted order into result
        if (//DENSE
        !sparse) {
            out.allocateDenseBlock(false);
            for (int i = 0; i < rlen; i++) {
                System.arraycopy(in.denseBlock, vix[i] * clen, out.denseBlock, i * clen, clen);
            }
        } else //SPARSE
        {
            out.allocateSparseRowsBlock(false);
            for (int i = 0; i < rlen; i++) if (!in.sparseBlock.isEmpty(vix[i])) {
                out.sparseBlock.set(i, in.sparseBlock.get(vix[i]), //row remains unchanged
                !SHALLOW_COPY_REORG);
            }
        }
    } else {
        //copy sorted index vector into result
        out.allocateDenseBlock(false);
        for (int i = 0; i < rlen; i++) out.setValueDenseUnsafe(i, 0, vix[i] + 1);
    }
    return out;
}
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 29 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class LibMatrixReorg method rexpandRows.

private static MatrixBlock rexpandRows(MatrixBlock in, MatrixBlock ret, int max, boolean cast, boolean ignore) throws DMLRuntimeException {
    //set meta data
    final int rlen = max;
    final int clen = in.rlen;
    final long nnz = in.nonZeros;
    boolean sp = MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz);
    ret.reset(rlen, clen, sp);
    //setup temporary array for 'buffered append w/ sorting' in order
    //to mitigate performance issues due to random row access for large m
    //max 12MB
    final int blksize = 1024 * 1024;
    int[] tmpi = new int[Math.min(blksize, clen)];
    double[] tmp = new double[Math.min(blksize, clen)];
    //but generic implementation for general case)		
    for (int i = 0; i < clen; i += blksize) {
        //create sorted block indexes (append buffer)
        int len = Math.min(blksize, clen - i);
        copyColVector(in, i, tmp, tmpi, len);
        SortUtils.sortByValue(0, len, tmp, tmpi);
        //process current append buffer
        for (int j = 0; j < len; j++) {
            //get value and cast if necessary (table)
            double val = tmp[j];
            if (cast)
                val = UtilFunctions.toLong(val);
            //handle invalid values if not to be ignored
            if (!ignore && val <= 0)
                throw new DMLRuntimeException("Invalid input value <= 0 for ignore=false: " + val);
            //set expanded value if matching
            if (val == Math.floor(val) && val >= 1 && val <= max)
                ret.appendValue((int) (val - 1), i + tmpi[j], 1);
        }
    }
    //(necessary due to cache-conscious processing w/ unstable sort)
    if (ret.isInSparseFormat())
        ret.sortSparseRows();
    return ret;
}
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 30 with DMLRuntimeException

use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.

the class DataTransform method mrDataTransform.

/**
	 * Main method to create and/or apply transformation metdata using MapReduce.
	 * 
	 * @param jobinst MR job instruction
	 * @param inputs array of input matrices
	 * @param shuffleInst shuffle instructions
	 * @param otherInst other instructions
	 * @param resultIndices byte array of result indices
	 * @param outputs array of output matrices
	 * @param numReducers number of reducers
	 * @param replication ?
	 * @return MR job result
	 * @throws Exception if IOException occurs
	 */
public static JobReturn mrDataTransform(MRJobInstruction jobinst, MatrixObject[] inputs, String shuffleInst, String otherInst, byte[] resultIndices, MatrixObject[] outputs, int numReducers, int replication) throws Exception {
    String[] insts = shuffleInst.split(Instruction.INSTRUCTION_DELIM);
    // Parse transform instruction (the first instruction) to obtain relevant fields
    TransformOperands oprnds = new TransformOperands(insts[0], inputs[0]);
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    // find the first file in alphabetical ordering of part files in directory inputPath 
    String smallestFile = CSVReblockMR.findSmallestFile(job, oprnds.inputPath);
    // find column names
    FileSystem fs = IOUtilFunctions.getFileSystem(smallestFile);
    String headerLine = readHeaderLine(fs, oprnds.inputCSVProperties, smallestFile);
    HashMap<String, Integer> colNamesToIds = processColumnNames(fs, oprnds.inputCSVProperties, headerLine, smallestFile);
    String outHeader = getOutputHeader(fs, headerLine, oprnds);
    int numColumns = colNamesToIds.size();
    int numColumnsTf = 0;
    long numRowsTf = 0;
    ArrayList<Integer> csvoutputs = new ArrayList<Integer>();
    ArrayList<Integer> bboutputs = new ArrayList<Integer>();
    // divide output objects based on output format (CSV or BinaryBlock)
    for (int i = 0; i < outputs.length; i++) {
        if (outputs[i].getFileFormatProperties() != null && outputs[i].getFileFormatProperties().getFileFormat() == FileFormatProperties.FileFormat.CSV)
            csvoutputs.add(i);
        else
            bboutputs.add(i);
    }
    boolean isCSV = (csvoutputs.size() > 0);
    boolean isBB = (bboutputs.size() > 0);
    String tmpPath = MRJobConfiguration.constructTempOutputFilename();
    checkIfOutputOverlapsWithTxMtd(outputs, oprnds, isCSV, isBB, csvoutputs, bboutputs, fs);
    JobReturn retCSV = null, retBB = null;
    if (!oprnds.isApply) {
        // build specification file with column IDs insteadof column names
        String specWithIDs = processSpecFile(fs, oprnds.inputPath, smallestFile, colNamesToIds, oprnds.inputCSVProperties, oprnds.spec);
        // enable GC on colNamesToIds
        colNamesToIds = null;
        // Build transformation metadata, including recode maps, bin definitions, etc.
        // Also, generate part offsets file (counters file), which is to be used in csv-reblock
        String partOffsetsFile = MRJobConfiguration.constructTempOutputFilename();
        numRowsTf = GenTfMtdMR.runJob(oprnds.inputPath, oprnds.txMtdPath, specWithIDs, smallestFile, partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
        if (numRowsTf == 0)
            throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
        // store the specFileWithIDs as transformation metadata
        MapReduceTool.writeStringToHDFS(specWithIDs, oprnds.txMtdPath + "/" + "spec.json");
        numColumnsTf = getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
        // Apply transformation metadata, and perform actual transformation 
        if (isCSV)
            retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
        if (isBB) {
            DMLConfig conf = ConfigurationManager.getDMLConfig();
            int blockSize = conf.getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
            CSVReblockInstruction rblk = prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
            AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { blockSize }, new int[] { blockSize }, rblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
            if (ret1.rlens[0] == 0)
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numRowsTf, numColumns, numColumnsTf, replication, outHeader);
        }
        MapReduceTool.deleteFileIfExistOnHDFS(new Path(partOffsetsFile), job);
    } else {
        // enable GC on colNamesToIds
        colNamesToIds = null;
        // copy given transform metadata (applyTxPath) to specified location (txMtdPath)
        MapReduceTool.deleteFileIfExistOnHDFS(new Path(oprnds.txMtdPath), job);
        MapReduceTool.copyFileOnHDFS(oprnds.applyTxPath, oprnds.txMtdPath);
        // path to specification file
        String specWithIDs = (oprnds.spec != null) ? oprnds.spec : MapReduceTool.readStringFromHDFSFile(oprnds.txMtdPath + "/" + "spec.json");
        numColumnsTf = getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
        if (isCSV) {
            DMLConfig conf = ConfigurationManager.getDMLConfig();
            int blockSize = conf.getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
            CSVReblockInstruction rblk = prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
            AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { blockSize }, new int[] { blockSize }, rblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
            numRowsTf = ret1.rlens[0];
            if (ret1.rlens[0] == 0)
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            // Apply transformation metadata, and perform actual transformation 
            retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.applyTxPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numColumns, replication, outHeader);
        }
        if (isBB) {
            // compute part offsets file
            CSVReblockInstruction rblk = (CSVReblockInstruction) InstructionParser.parseSingleInstruction(insts[1]);
            CSVReblockInstruction newrblk = (CSVReblockInstruction) rblk.clone((byte) 0);
            AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[] { oprnds.inputPath }, new InputInfo[] { InputInfo.CSVInputInfo }, new int[] { newrblk.brlen }, new int[] { newrblk.bclen }, newrblk.toString(), replication, new String[] { smallestFile }, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
            numRowsTf = ret1.rlens[0];
            if (ret1.rlens[0] == 0)
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            // apply transformation metadata, as well as reblock the resulting data
            retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, ret1.rlens[0], ret1.clens[0], numColumnsTf, replication, outHeader);
        }
    }
    // copy auxiliary data (old and new header lines) from temporary location to txMtdPath
    moveFilesFromTmp(fs, tmpPath, oprnds.txMtdPath);
    // generate matrix metadata file for outputs
    if (retCSV != null) {
        retCSV.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
        CSVFileFormatProperties prop = new CSVFileFormatProperties(false, // use the same header as the input
        oprnds.inputCSVProperties.getDelim(), false, Double.NaN, null);
        MapReduceTool.writeMetaDataFile(outputs[csvoutputs.get(0)].getFileName() + ".mtd", ValueType.DOUBLE, retCSV.getMatrixCharacteristics(0), OutputInfo.CSVOutputInfo, prop);
        return retCSV;
    }
    if (retBB != null) {
        retBB.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
        MapReduceTool.writeMetaDataFile(outputs[bboutputs.get(0)].getFileName() + ".mtd", ValueType.DOUBLE, retBB.getMatrixCharacteristics(0), OutputInfo.BinaryBlockOutputInfo);
        return retBB;
    }
    return null;
}
Also used : AssignRowIDMRReturn(org.apache.sysml.runtime.matrix.CSVReblockMR.AssignRowIDMRReturn) Path(org.apache.hadoop.fs.Path) DMLConfig(org.apache.sysml.conf.DMLConfig) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) ArrayList(java.util.ArrayList) JobReturn(org.apache.sysml.runtime.matrix.JobReturn) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)502 IOException (java.io.IOException)106 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)96 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)76 ArrayList (java.util.ArrayList)72 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)70 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)44 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)37 ExecutorService (java.util.concurrent.ExecutorService)35 Path (org.apache.hadoop.fs.Path)35 Future (java.util.concurrent.Future)31 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)31 Pointer (jcuda.Pointer)25 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)25 FileSystem (org.apache.hadoop.fs.FileSystem)22 CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)21 HopsException (org.apache.sysml.hops.HopsException)20 Operator (org.apache.sysml.runtime.matrix.operators.Operator)20 JobConf (org.apache.hadoop.mapred.JobConf)19 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)19