use of org.apache.wink.json4j.JSONArray in project incubator-systemml by apache.
the class TfMetaUtils method parseJsonObjectIDList.
public static int[] parseJsonObjectIDList(JSONObject spec, String[] colnames, String group) throws JSONException {
int[] colList = new int[0];
boolean ids = spec.containsKey("ids") && spec.getBoolean("ids");
if (spec.containsKey(group) && spec.get(group) instanceof JSONArray) {
JSONArray colspecs = (JSONArray) spec.get(group);
colList = new int[colspecs.size()];
for (int j = 0; j < colspecs.size(); j++) {
JSONObject colspec = (JSONObject) colspecs.get(j);
colList[j] = ids ? colspec.getInt("id") : (ArrayUtils.indexOf(colnames, colspec.get("name")) + 1);
if (colList[j] <= 0) {
throw new RuntimeException("Specified column '" + colspec.get(ids ? "id" : "name") + "' does not exist.");
}
}
//ensure ascending order of column IDs
Arrays.sort(colList);
}
return colList;
}
use of org.apache.wink.json4j.JSONArray in project incubator-systemml by apache.
the class DataTransform method getNumColumnsTf.
/**
* Helper function to determine the number of columns after applying
* transformations. Note that dummycoding changes the number of columns.
*
* @param fs file system
* @param header header line
* @param delim delimiter
* @param tfMtdPath transform metadata path
* @return number of columns after applying transformations
* @throws IllegalArgumentException if IllegalArgumentException occurs
* @throws IOException if IOException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
* @throws JSONException if JSONException occurs
*/
private static int getNumColumnsTf(FileSystem fs, String header, String delim, String tfMtdPath) throws IllegalArgumentException, IOException, DMLRuntimeException, JSONException {
String[] columnNames = Pattern.compile(Pattern.quote(delim)).split(header, -1);
int ret = columnNames.length;
JSONObject spec = null;
try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(tfMtdPath + "/spec.json"))))) {
spec = JSONHelper.parse(br);
}
// fetch relevant attribute lists
if (!spec.containsKey(TfUtils.TXMETHOD_DUMMYCODE))
return ret;
JSONArray dcdList = (JSONArray) ((JSONObject) spec.get(TfUtils.TXMETHOD_DUMMYCODE)).get(TfUtils.JSON_ATTRS);
// look for numBins among binned columns
for (Object o : dcdList) {
int id = UtilFunctions.toInt(o);
Path binpath = new Path(tfMtdPath + "/Bin/" + UtilFunctions.unquote(columnNames[id - 1]) + TfUtils.TXMTD_BIN_FILE_SUFFIX);
Path rcdpath = new Path(tfMtdPath + "/Recode/" + UtilFunctions.unquote(columnNames[id - 1]) + TfUtils.TXMTD_RCD_DISTINCT_SUFFIX);
if (TfUtils.checkValidInputFile(fs, binpath, false)) {
int nbins = -1;
try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(binpath)))) {
nbins = UtilFunctions.parseToInt(br.readLine().split(TfUtils.TXMTD_SEP)[4]);
}
ret += (nbins - 1);
} else if (TfUtils.checkValidInputFile(fs, rcdpath, false)) {
int ndistinct = -1;
try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(rcdpath)))) {
ndistinct = UtilFunctions.parseToInt(br.readLine());
}
ret += (ndistinct - 1);
} else
throw new DMLRuntimeException("Relevant transformation metadata for column (id=" + id + ", name=" + columnNames[id - 1] + ") is not found.");
}
return ret;
}
Aggregations