use of com.tencent.angel.split.SplitClassification in project angel by Tencent.
the class MasterClient method getWorkerGroupMetaInfo.
/**
* Get worker group information:workers and data splits, it will wait until the worker group is ready
*
* @return WorkerGroup worker group information
* @throws ClassNotFoundException split class not found
* @throws IOException deserialize data splits meta failed
* @throws ServiceException rpc failed
* @throws InterruptedException interrupted when wait for next try
*/
public WorkerGroup getWorkerGroupMetaInfo() throws ClassNotFoundException, IOException, ServiceException, InterruptedException {
GetWorkerGroupMetaInfoRequest request = GetWorkerGroupMetaInfoRequest.newBuilder().setWorkerAttemptId(WorkerContext.get().getWorkerAttemptIdProto()).build();
while (true) {
GetWorkerGroupMetaInfoResponse response = master.getWorkerGroupMetaInfo(null, request);
assert (response.getWorkerGroupStatus() != GetWorkerGroupMetaInfoResponse.WorkerGroupStatus.WORKERGROUP_EXITED);
LOG.debug("GetWorkerGroupMetaInfoResponse response=" + response);
if (response.getWorkerGroupStatus() == GetWorkerGroupMetaInfoResponse.WorkerGroupStatus.WORKERGROUP_OK) {
// Deserialize data splits meta
SplitClassification splits = null;
if (response.getWorkerGroupMeta().getSplitsCount() > 0) {
splits = ProtobufUtil.getSplitClassification(response.getWorkerGroupMeta().getSplitsList(), WorkerContext.get().getConf());
}
// Get workers
WorkerGroup group = new WorkerGroup(WorkerContext.get().getWorkerGroupId(), splits);
for (WorkerMetaInfoProto worker : response.getWorkerGroupMeta().getWorkersList()) {
WorkerRef workerRef = new WorkerRef(worker.getWorkerLocation().getWorkerAttemptId(), worker.getWorkerLocation().getLocation(), worker.getTasksList());
group.addWorkerRef(workerRef);
}
return group;
} else {
Thread.sleep(WorkerContext.get().getRequestSleepTimeMS());
}
}
}
use of com.tencent.angel.split.SplitClassification in project angel by Tencent.
the class DataSpliter method dispatchSplitsUseLocation.
private void dispatchSplitsUseLocation(List<org.apache.hadoop.mapreduce.InputSplit> splitsNewAPI, int groupNumber, int groupItemNumber) throws IOException, InterruptedException {
splitNum = splitsNewAPI.size();
// Since the actual split size is sometimes not exactly equal to the expected split size, we
// need to fine tune the number of workergroup and task based on the actual split number
int estimatedGroupNum = (splitNum + groupItemNumber - 1) / groupItemNumber;
int base = 0;
// Record the location information for the splits in order to data localized schedule
for (int i = 0; i < estimatedGroupNum; i++) {
List<String> locationList = new ArrayList<String>(maxLocationLimit);
List<org.apache.hadoop.mapreduce.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>();
base = i * groupItemNumber;
for (int j = 0; j < groupItemNumber && (base < splitNum); j++, base++) {
splitList.add(splitsNewAPI.get(base));
String[] locations = splitsNewAPI.get(base).getLocations();
for (int k = 0; k < locations.length && locationList.size() < maxLocationLimit; k++) {
locationList.add(locations[k]);
}
}
SplitClassification splitClassification = new SplitClassification(null, splitList, locationList.toArray(new String[locationList.size()]), true);
splitClassifications.put(i, splitClassification);
}
}
use of com.tencent.angel.split.SplitClassification in project angel by Tencent.
the class DataSpliter method deserialize.
/**
* read data splits from a input stream
*
* @param inputStream input stream
* @throws IOException
*/
public void deserialize(FSDataInputStream inputStream) throws IOException, ClassNotFoundException {
splitNum = inputStream.readInt();
int size = inputStream.readInt();
for (int i = 0; i < size; i++) {
int index = inputStream.readInt();
SplitClassification split = new SplitClassification();
split.deserialize(inputStream);
splitClassifications.put(index, split);
}
inputStream.close();
}
use of com.tencent.angel.split.SplitClassification in project angel by Tencent.
the class SerdeUtils method deSerilizeSplitProtos.
public static SplitClassification deSerilizeSplitProtos(List<SplitInfoProto> splitInfoList, Configuration conf) throws ClassNotFoundException, IOException {
boolean isUseNewAPI = conf.getBoolean("mapred.mapper.new-api", false);
if (isUseNewAPI) {
List<org.apache.hadoop.mapreduce.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>();
for (SplitInfoProto splitInfo : splitInfoList) {
splitList.add(deSerilizeNewSplit(splitInfo.getSplitClass(), splitInfo.getSplit().toByteArray(), conf));
}
SplitClassification splits = new SplitClassification(null, splitList, true);
return splits;
} else {
List<org.apache.hadoop.mapred.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapred.InputSplit>();
for (SplitInfoProto splitInfo : splitInfoList) {
splitList.add(deSerilizeOldSplit(splitInfo.getSplitClass(), splitInfo.getSplit().toByteArray(), conf));
}
SplitClassification splits = new SplitClassification(splitList, null, true);
return splits;
}
}
use of com.tencent.angel.split.SplitClassification in project angel by Tencent.
the class SerdeUtils method deSerilizeSplits.
public static SplitClassification deSerilizeSplits(List<SplitInfo> splitInfoList, Configuration conf) throws ClassNotFoundException, IOException {
boolean isUseNewAPI = conf.getBoolean("mapred.mapper.new-api", false);
if (isUseNewAPI) {
List<org.apache.hadoop.mapreduce.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>();
for (SplitInfo splitInfo : splitInfoList) {
splitList.add(deSerilizeNewSplit(splitInfo, conf));
}
SplitClassification splits = new SplitClassification(null, splitList, true);
return splits;
} else {
List<org.apache.hadoop.mapred.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapred.InputSplit>();
for (SplitInfo splitInfo : splitInfoList) {
splitList.add(deSerilizeOldSplit(splitInfo, conf));
}
SplitClassification splits = new SplitClassification(splitList, null, true);
return splits;
}
}
Aggregations