use of com.tencent.angel.split.SplitClassification in project angel by Tencent.
the class DataSpliter method dispatchSplitsUseLocation.
private void dispatchSplitsUseLocation(InputSplit[] splitArray, int groupNumber, int groupItemNumber) throws IOException {
splitNum = splitArray.length;
// Since the actual split size is sometimes not exactly equal to the expected split size, we
// need to fine tune the number of workergroup and task based on the actual split number
int estimatedGroupNum = (splitNum + groupItemNumber - 1) / groupItemNumber;
int base = 0;
// Record the location information for the splits in order to data localized schedule
for (int i = 0; i < estimatedGroupNum; i++) {
List<String> locationList = new ArrayList<String>(maxLocationLimit);
List<org.apache.hadoop.mapred.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapred.InputSplit>();
base = i * groupItemNumber;
for (int j = 0; j < groupItemNumber && (base < splitNum); j++, base++) {
splitList.add(splitArray[base]);
String[] locations = splitArray[base].getLocations();
for (int k = 0; k < locations.length && locationList.size() < maxLocationLimit; k++) {
locationList.add(locations[k]);
}
}
SplitClassification splitClassification = new SplitClassification(splitList, null, locationList.toArray(new String[locationList.size()]), useNewAPI);
splitClassifications.put(i, splitClassification);
}
}
Aggregations