Search in sources :

Example 1 with JSONArray

use of org.apache.wink.json4j.JSONArray in project incubator-systemml by apache.

the class DataTransform method processSpecFile.

	 * Convert input transformation specification file with column names into a
	 * specification with corresponding column Ids. This file is sent to all the
	 * relevant MR jobs.
	 * @param fs file system
	 * @param inputPath input file path
	 * @param smallestFile file name
	 * @param colNames column names
	 * @param prop csv file format properties
	 * @param specFileWithNames ?
	 * @return specification as a JSONObject
	 * @throws IllegalArgumentException if IllegalArgumentException occurs
	 * @throws IOException if IOException occurs
	 * @throws JSONException if JSONException occurs
private static String processSpecFile(FileSystem fs, String inputPath, String smallestFile, HashMap<String, Integer> colNames, CSVFileFormatProperties prop, String specWithNames) throws IllegalArgumentException, IOException, JSONException {
    JSONObject inputSpec = new JSONObject(specWithNames);
    final String NAME = "name";
    final String ID = "id";
    final String METHOD = "method";
    final String VALUE = "value";
    final String MV_METHOD_MEAN = "global_mean";
    final String MV_METHOD_MODE = "global_mode";
    final String MV_METHOD_CONSTANT = "constant";
    final String BIN_METHOD_WIDTH = "equi-width";
    final String BIN_METHOD_HEIGHT = "equi-height";
    final String SCALE_METHOD_Z = "z-score";
    final String SCALE_METHOD_M = "mean-subtraction";
    final String JSON_BYPOS = "ids";
    String stmp = null;
    JSONObject entry = null;
    byte btmp = 0;
    final int[] mvList;
    int[] rcdList, dcdList, omitList;
    final int[] binList;
    final int[] scaleList;
    byte[] mvMethods = null, binMethods = null, scaleMethods = null;
    Object[] numBins = null;
    Object[] mvConstants = null;
    boolean byPositions = (inputSpec.containsKey(JSON_BYPOS) && ((Boolean) inputSpec.get(JSON_BYPOS)).booleanValue() == true);
    // Omit
    if (inputSpec.containsKey(TfUtils.TXMETHOD_OMIT)) {
        JSONArray arrtmp = (JSONArray) inputSpec.get(TfUtils.TXMETHOD_OMIT);
        omitList = new int[arrtmp.size()];
        for (int i = 0; i < arrtmp.size(); i++) {
            if (byPositions)
                omitList[i] = UtilFunctions.toInt(arrtmp.get(i));
            else {
                stmp = UtilFunctions.unquote((String) arrtmp.get(i));
                omitList[i] = colNames.get(stmp);
    } else
        omitList = null;
    // Missing value imputation
    if (inputSpec.containsKey(TfUtils.TXMETHOD_IMPUTE)) {
        JSONArray arrtmp = (JSONArray) inputSpec.get(TfUtils.TXMETHOD_IMPUTE);
        mvList = new int[arrtmp.size()];
        mvMethods = new byte[arrtmp.size()];
        mvConstants = new Object[arrtmp.size()];
        for (int i = 0; i < arrtmp.size(); i++) {
            entry = (JSONObject) arrtmp.get(i);
            if (byPositions) {
                mvList[i] = UtilFunctions.toInt(entry.get(ID));
            } else {
                stmp = UtilFunctions.unquote((String) entry.get(NAME));
                mvList[i] = colNames.get(stmp);
            stmp = UtilFunctions.unquote((String) entry.get(METHOD));
            if (stmp.equals(MV_METHOD_MEAN))
                btmp = (byte) 1;
            else if (stmp.equals(MV_METHOD_MODE))
                btmp = (byte) 2;
            else if (stmp.equals(MV_METHOD_CONSTANT))
                btmp = (byte) 3;
                throw new IOException("Unknown missing value imputation method (" + stmp + ") in transformation specification: " + specWithNames);
            mvMethods[i] = btmp;
            //txMethods.add( btmp );
            mvConstants[i] = null;
            if (entry.containsKey(VALUE))
                mvConstants[i] = entry.get(VALUE);
        Integer[] idx = new Integer[mvList.length];
        for (int i = 0; i < mvList.length; i++) idx[i] = i;
        Arrays.sort(idx, new Comparator<Integer>() {

            public int compare(Integer o1, Integer o2) {
                return (mvList[o1] - mvList[o2]);
        // rearrange mvList, mvMethods, and mvConstants according to permutation idx
        inplacePermute(mvList, mvMethods, mvConstants, idx);
    } else
        mvList = null;
    // Recoding
    if (inputSpec.containsKey(TfUtils.TXMETHOD_RECODE)) {
        JSONArray arrtmp = (JSONArray) inputSpec.get(TfUtils.TXMETHOD_RECODE);
        rcdList = new int[arrtmp.size()];
        for (int i = 0; i < arrtmp.size(); i++) {
            if (byPositions)
                rcdList[i] = UtilFunctions.toInt(arrtmp.get(i));
            else {
                stmp = UtilFunctions.unquote((String) arrtmp.get(i));
                rcdList[i] = colNames.get(stmp);
    } else
        rcdList = null;
    // Binning
    if (inputSpec.containsKey(TfUtils.TXMETHOD_BIN)) {
        JSONArray arrtmp = (JSONArray) inputSpec.get(TfUtils.TXMETHOD_BIN);
        binList = new int[arrtmp.size()];
        binMethods = new byte[arrtmp.size()];
        numBins = new Object[arrtmp.size()];
        for (int i = 0; i < arrtmp.size(); i++) {
            entry = (JSONObject) arrtmp.get(i);
            if (byPositions) {
                binList[i] = UtilFunctions.toInt(entry.get(ID));
            } else {
                stmp = UtilFunctions.unquote((String) entry.get(NAME));
                binList[i] = colNames.get(stmp);
            stmp = UtilFunctions.unquote((String) entry.get(METHOD));
            if (stmp.equals(BIN_METHOD_WIDTH))
                btmp = (byte) 1;
            else if (stmp.equals(BIN_METHOD_HEIGHT))
                throw new IOException("Equi-height binning method is not yet supported, in transformation specification: " + specWithNames);
                throw new IOException("Unknown missing value imputation method (" + stmp + ") in transformation specification: " + specWithNames);
            binMethods[i] = btmp;
            numBins[i] = entry.get(TfUtils.JSON_NBINS);
            if (((Integer) numBins[i]).intValue() <= 1)
                throw new IllegalArgumentException("Invalid transformation on column \"" + (String) entry.get(NAME) + "\". Number of bins must be greater than 1.");
        Integer[] idx = new Integer[binList.length];
        for (int i = 0; i < binList.length; i++) idx[i] = i;
        Arrays.sort(idx, new Comparator<Integer>() {

            public int compare(Integer o1, Integer o2) {
                return (binList[o1] - binList[o2]);
        // rearrange binList and binMethods according to permutation idx
        inplacePermute(binList, binMethods, numBins, idx);
    } else
        binList = null;
    // Dummycoding
    if (inputSpec.containsKey(TfUtils.TXMETHOD_DUMMYCODE)) {
        JSONArray arrtmp = (JSONArray) inputSpec.get(TfUtils.TXMETHOD_DUMMYCODE);
        dcdList = new int[arrtmp.size()];
        for (int i = 0; i < arrtmp.size(); i++) {
            if (byPositions)
                dcdList[i] = UtilFunctions.toInt(arrtmp.get(i));
            else {
                stmp = UtilFunctions.unquote((String) arrtmp.get(i));
                dcdList[i] = colNames.get(stmp);
    } else
        dcdList = null;
    // Scaling
    if (inputSpec.containsKey(TfUtils.TXMETHOD_SCALE)) {
        JSONArray arrtmp = (JSONArray) inputSpec.get(TfUtils.TXMETHOD_SCALE);
        scaleList = new int[arrtmp.size()];
        scaleMethods = new byte[arrtmp.size()];
        for (int i = 0; i < arrtmp.size(); i++) {
            entry = (JSONObject) arrtmp.get(i);
            if (byPositions) {
                scaleList[i] = UtilFunctions.toInt(entry.get(ID));
            } else {
                stmp = UtilFunctions.unquote((String) entry.get(NAME));
                scaleList[i] = colNames.get(stmp);
            stmp = UtilFunctions.unquote((String) entry.get(METHOD));
            if (stmp.equals(SCALE_METHOD_M))
                btmp = (byte) 1;
            else if (stmp.equals(SCALE_METHOD_Z))
                btmp = (byte) 2;
                throw new IOException("Unknown missing value imputation method (" + stmp + ") in transformation specification: " + specWithNames);
            scaleMethods[i] = btmp;
        Integer[] idx = new Integer[scaleList.length];
        for (int i = 0; i < scaleList.length; i++) idx[i] = i;
        Arrays.sort(idx, new Comparator<Integer>() {

            public int compare(Integer o1, Integer o2) {
                return (scaleList[o1] - scaleList[o2]);
        // rearrange scaleList and scaleMethods according to permutation idx
        inplacePermute(scaleList, scaleMethods, null, idx);
    } else
        scaleList = null;
    // --------------------------------------------------------------------------
    // check for column IDs that are imputed with mode, but not recoded
    // These columns have be handled separately, because the computation of mode 
    // requires the computation of distinct values (i.e., recode maps)
    ArrayList<Integer> tmpList = new ArrayList<Integer>();
    if (mvList != null)
        for (int i = 0; i < mvList.length; i++) {
            int colID = mvList[i];
            if (mvMethods[i] == 2 && (rcdList == null || Arrays.binarySearch(rcdList, colID) < 0))
    int[] mvrcdList = null;
    if (tmpList.size() > 0) {
        mvrcdList = new int[tmpList.size()];
        for (int i = 0; i < tmpList.size(); i++) mvrcdList[i] = tmpList.get(i);
    if (mvList != null)
        for (int i = 0; i < mvList.length; i++) {
            int colID = mvList[i];
            if (omitList != null && Arrays.binarySearch(omitList, colID) >= 0)
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be both omitted and imputed.");
            if (mvMethods[i] == 1) {
                if (rcdList != null && Arrays.binarySearch(rcdList, colID) >= 0)
                    throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A numeric column can not be recoded.");
                if (dcdList != null && Arrays.binarySearch(dcdList, colID) >= 0)
                    // throw an error only if the column is not binned
                    if (binList == null || Arrays.binarySearch(binList, colID) < 0)
                        throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A numeric column can not be dummycoded.");
    if (scaleList != null)
        for (int i = 0; i < scaleList.length; i++) {
            int colID = scaleList[i];
            if (rcdList != null && Arrays.binarySearch(rcdList, colID) >= 0)
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be recoded and scaled.");
            if (binList != null && Arrays.binarySearch(binList, colID) >= 0)
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be binned and scaled.");
            if (dcdList != null && Arrays.binarySearch(dcdList, colID) >= 0)
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be dummycoded and scaled.");
    if (rcdList != null)
        for (int i = 0; i < rcdList.length; i++) {
            int colID = rcdList[i];
            if (binList != null && Arrays.binarySearch(binList, colID) >= 0)
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be recoded and binned.");
    // Check if dummycoded columns are either recoded or binned.
    // If not, add them to recode list.
    ArrayList<Integer> addToRcd = new ArrayList<Integer>();
    if (dcdList != null)
        for (int i = 0; i < dcdList.length; i++) {
            int colID = dcdList[i];
            boolean isRecoded = (rcdList != null && Arrays.binarySearch(rcdList, colID) >= 0);
            boolean isBinned = (binList != null && Arrays.binarySearch(binList, colID) >= 0);
            // If colID is neither recoded nor binned, then, add it to rcdList.
            if (!isRecoded && !isBinned)
    if (addToRcd.size() > 0) {
        int[] newRcdList = null;
        if (rcdList != null)
            newRcdList = Arrays.copyOf(rcdList, rcdList.length + addToRcd.size());
            newRcdList = new int[addToRcd.size()];
        int i = (rcdList != null ? rcdList.length : 0);
        for (int idx = 0; i < newRcdList.length; i++, idx++) newRcdList[i] = addToRcd.get(idx);
        rcdList = newRcdList;
    // -----------------------------------------------------------------------------
    // Prepare output spec
    JSONObject outputSpec = new JSONObject();
    if (omitList != null) {
        JSONObject rcdSpec = new JSONObject();
        rcdSpec.put(TfUtils.JSON_ATTRS, toJSONArray(omitList));
        outputSpec.put(TfUtils.TXMETHOD_OMIT, rcdSpec);
    if (mvList != null) {
        JSONObject mvSpec = new JSONObject();
        mvSpec.put(TfUtils.JSON_ATTRS, toJSONArray(mvList));
        mvSpec.put(TfUtils.JSON_MTHD, toJSONArray(mvMethods));
        mvSpec.put(TfUtils.JSON_CONSTS, toJSONArray(mvConstants));
        outputSpec.put(TfUtils.TXMETHOD_IMPUTE, mvSpec);
    if (rcdList != null) {
        JSONObject rcdSpec = new JSONObject();
        rcdSpec.put(TfUtils.JSON_ATTRS, toJSONArray(rcdList));
        outputSpec.put(TfUtils.TXMETHOD_RECODE, rcdSpec);
    if (binList != null) {
        JSONObject binSpec = new JSONObject();
        binSpec.put(TfUtils.JSON_ATTRS, toJSONArray(binList));
        binSpec.put(TfUtils.JSON_MTHD, toJSONArray(binMethods));
        binSpec.put(TfUtils.JSON_NBINS, toJSONArray(numBins));
        outputSpec.put(TfUtils.TXMETHOD_BIN, binSpec);
    if (dcdList != null) {
        JSONObject dcdSpec = new JSONObject();
        dcdSpec.put(TfUtils.JSON_ATTRS, toJSONArray(dcdList));
        outputSpec.put(TfUtils.TXMETHOD_DUMMYCODE, dcdSpec);
    if (scaleList != null) {
        JSONObject scaleSpec = new JSONObject();
        scaleSpec.put(TfUtils.JSON_ATTRS, toJSONArray(scaleList));
        scaleSpec.put(TfUtils.JSON_MTHD, toJSONArray(scaleMethods));
        outputSpec.put(TfUtils.TXMETHOD_SCALE, scaleSpec);
    if (mvrcdList != null) {
        JSONObject mvrcd = new JSONObject();
        mvrcd.put(TfUtils.JSON_ATTRS, toJSONArray(mvrcdList));
        outputSpec.put(TfUtils.TXMETHOD_MVRCD, mvrcd);
    // return output spec with IDs
    return outputSpec.toString();
Also used : JSONArray(org.apache.wink.json4j.JSONArray) ArrayList(java.util.ArrayList) IOException( JSONObject(org.apache.wink.json4j.JSONObject) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) JSONObject(org.apache.wink.json4j.JSONObject) RDDObject(

Example 2 with JSONArray

use of org.apache.wink.json4j.JSONArray in project incubator-systemml by apache.

the class MVImputeAgent method parseMethodsAndReplacments.

private void parseMethodsAndReplacments(JSONObject parsedSpec) throws JSONException {
    JSONArray mvspec = (JSONArray) parsedSpec.get(TfUtils.TXMETHOD_IMPUTE);
    _mvMethodList = new MVMethod[mvspec.size()];
    _replacementList = new String[mvspec.size()];
    _meanList = new KahanObject[mvspec.size()];
    _countList = new long[mvspec.size()];
    for (int i = 0; i < mvspec.size(); i++) {
        JSONObject mvobj = (JSONObject) mvspec.get(i);
        _mvMethodList[i] = MVMethod.valueOf(mvobj.get("method").toString().toUpperCase());
        if (_mvMethodList[i] == MVMethod.CONSTANT) {
            _replacementList[i] = mvobj.getString("value").toString();
        _meanList[i] = new KahanObject(0, 0);
Also used : JSONObject(org.apache.wink.json4j.JSONObject) JSONArray(org.apache.wink.json4j.JSONArray) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject)

Example 3 with JSONArray

use of org.apache.wink.json4j.JSONArray in project incubator-systemml by apache.

the class DataExpression method parseMetaDataFileParameters.

private void parseMetaDataFileParameters(String mtdFileName, JSONObject configObject, boolean conditional) throws LanguageException {
    for (Object obj : configObject.entrySet()) {
        Entry<Object, Object> e = (Entry<Object, Object>) obj;
        Object key = e.getKey();
        Object val = e.getValue();
        boolean isValidName = false;
        for (String paramName : READ_VALID_MTD_PARAM_NAMES) {
            if (paramName.equals(key))
                isValidName = true;
        if (!isValidName) {
            //wrong parameters always rejected
            raiseValidateError("MTD file " + mtdFileName + " contains invalid parameter name: " + key, false);
        // if the read method parameter is a constant, then verify value matches MTD metadata file
        if (getVarParam(key.toString()) != null && (getVarParam(key.toString()) instanceof ConstIdentifier) && !getVarParam(key.toString()).toString().equalsIgnoreCase(val.toString())) {
            raiseValidateError("parameter " + key.toString() + " has conflicting values in read statement definition and metadata. " + "Config file value: " + val.toString() + " from MTD file.  Read statement value: " + getVarParam(key.toString()), conditional);
        } else {
            // if the read method does not specify parameter value, then add MTD metadata file value to parameter list
            if (getVarParam(key.toString()) == null) {
                if ((!key.toString().equalsIgnoreCase(DESCRIPTIONPARAM)) && (!key.toString().equalsIgnoreCase(AUTHORPARAM)) && (!key.toString().equalsIgnoreCase(CREATEDPARAM))) {
                    StringIdentifier strId = new StringIdentifier(val.toString(), this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
                    if (key.toString().equalsIgnoreCase(DELIM_HAS_HEADER_ROW) || key.toString().equalsIgnoreCase(DELIM_FILL) || key.toString().equalsIgnoreCase(DELIM_SPARSE)) {
                        // parse these parameters as boolean values
                        BooleanIdentifier boolId = null;
                        if (strId.toString().equalsIgnoreCase("true")) {
                            boolId = new BooleanIdentifier(true, this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
                        } else if (strId.toString().equalsIgnoreCase("false")) {
                            boolId = new BooleanIdentifier(false, this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
                        } else {
                            raiseValidateError("Invalid value provided for '" + DELIM_HAS_HEADER_ROW + "' in metadata file '" + mtdFileName + "'. " + "Must be either TRUE or FALSE.", conditional);
                        addVarParam(key.toString(), boolId);
                    } else if (key.toString().equalsIgnoreCase(DELIM_FILL_VALUE)) {
                        // parse these parameters as numeric values
                        DoubleIdentifier doubleId = new DoubleIdentifier(Double.parseDouble(strId.toString()), this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
                        addVarParam(key.toString(), doubleId);
                    } else if (key.toString().equalsIgnoreCase(DELIM_NA_STRINGS)) {
                        String naStrings = null;
                        if (val instanceof String) {
                            naStrings = val.toString();
                        } else {
                            StringBuilder sb = new StringBuilder();
                            JSONArray valarr = (JSONArray) val;
                            for (int naid = 0; naid < valarr.size(); naid++) {
                                sb.append((String) valarr.get(naid));
                                if (naid < valarr.size() - 1)
                            naStrings = sb.toString();
                        StringIdentifier sid = new StringIdentifier(naStrings, this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
                        addVarParam(key.toString(), sid);
                    } else {
                        // by default, treat a parameter as a string
                        addVarParam(key.toString(), strId);
Also used : Entry(java.util.Map.Entry) JSONArray(org.apache.wink.json4j.JSONArray) JSONObject(org.apache.wink.json4j.JSONObject)

Example 4 with JSONArray

use of org.apache.wink.json4j.JSONArray in project incubator-systemml by apache.

the class ScalingTest method generateSpecFile.

// ----------------------------
private void generateSpecFile(int cols, String specFile) throws IOException, Exception {
    final String NAME = "name";
    final String METHOD = "method";
    final String SCALE_METHOD_Z = "z-score";
    final String SCALE_METHOD_M = "mean-subtraction";
    JSONObject outputSpec = new JSONObject();
    JSONArray scaleSpec = new JSONArray();
    for (int colID = 1; colID <= cols; colID++) {
        JSONObject obj = new JSONObject();
        obj.put(NAME, "V" + colID);
        if (colID <= cols / 2)
            obj.put(METHOD, SCALE_METHOD_M);
            obj.put(METHOD, SCALE_METHOD_Z);
    outputSpec.put(TfUtils.TXMETHOD_SCALE, scaleSpec);
    FileSystem fs = IOUtilFunctions.getFileSystem(specFile);
    try (BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(specFile), true)))) {
Also used : Path(org.apache.hadoop.fs.Path) JSONObject(org.apache.wink.json4j.JSONObject) FileSystem(org.apache.hadoop.fs.FileSystem) JSONArray(org.apache.wink.json4j.JSONArray) OutputStreamWriter( BufferedWriter(

Example 5 with JSONArray

use of org.apache.wink.json4j.JSONArray in project incubator-systemml by apache.

the class TfMetaUtils method parseJsonIDList.

	 * TODO consolidate external and internal json spec definitions
	 * @param spec transform specification as json string
	 * @param colnames column names
	 * @param group ?
	 * @return list of column ids
	 * @throws JSONException if JSONException occurs
public static int[] parseJsonIDList(JSONObject spec, String[] colnames, String group) throws JSONException {
    int[] colList = new int[0];
    boolean ids = spec.containsKey("ids") && spec.getBoolean("ids");
    if (spec.containsKey(group)) {
        //parse attribute-array or plain array of IDs
        JSONArray attrs = null;
        if (spec.get(group) instanceof JSONObject) {
            attrs = (JSONArray) ((JSONObject) spec.get(group)).get(TfUtils.JSON_ATTRS);
            //file-based transform outputs ids w/o id tags
            ids = true;
        } else
            attrs = (JSONArray) spec.get(group);
        //construct ID list array
        colList = new int[attrs.size()];
        for (int i = 0; i < colList.length; i++) {
            colList[i] = ids ? UtilFunctions.toInt(attrs.get(i)) : (ArrayUtils.indexOf(colnames, attrs.get(i)) + 1);
            if (colList[i] <= 0) {
                throw new RuntimeException("Specified column '" + attrs.get(i) + "' does not exist.");
        //ensure ascending order of column IDs
    return colList;
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) JSONObject(org.apache.wink.json4j.JSONObject) JSONArray(org.apache.wink.json4j.JSONArray)


JSONArray (org.apache.wink.json4j.JSONArray)7 JSONObject (org.apache.wink.json4j.JSONObject)7 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)3 Path (org.apache.hadoop.fs.Path)2 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 RDDObject ( BufferedReader ( BufferedWriter ( IOException ( InputStreamReader ( OutputStreamWriter ( ArrayList (java.util.ArrayList)1 Entry (java.util.Map.Entry)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)1