Search in sources :

Example 1 with SplitClassification

use of com.tencent.angel.split.SplitClassification in project angel by Tencent.

the class MasterClient method getWorkerGroupMetaInfo.

/**
 * Get worker group information:workers and data splits, it will wait until the worker group is ready
 *
 * @return WorkerGroup worker group information
 * @throws ClassNotFoundException split class not found
 * @throws IOException            deserialize data splits meta failed
 * @throws ServiceException       rpc failed
 * @throws InterruptedException   interrupted when wait for next try
 */
public WorkerGroup getWorkerGroupMetaInfo() throws ClassNotFoundException, IOException, ServiceException, InterruptedException {
    GetWorkerGroupMetaInfoRequest request = GetWorkerGroupMetaInfoRequest.newBuilder().setWorkerAttemptId(WorkerContext.get().getWorkerAttemptIdProto()).build();
    while (true) {
        GetWorkerGroupMetaInfoResponse response = master.getWorkerGroupMetaInfo(null, request);
        assert (response.getWorkerGroupStatus() != GetWorkerGroupMetaInfoResponse.WorkerGroupStatus.WORKERGROUP_EXITED);
        LOG.debug("GetWorkerGroupMetaInfoResponse response=" + response);
        if (response.getWorkerGroupStatus() == GetWorkerGroupMetaInfoResponse.WorkerGroupStatus.WORKERGROUP_OK) {
            // Deserialize data splits meta
            SplitClassification splits = null;
            if (response.getWorkerGroupMeta().getSplitsCount() > 0) {
                splits = ProtobufUtil.getSplitClassification(response.getWorkerGroupMeta().getSplitsList(), WorkerContext.get().getConf());
            }
            // Get workers
            WorkerGroup group = new WorkerGroup(WorkerContext.get().getWorkerGroupId(), splits);
            for (WorkerMetaInfoProto worker : response.getWorkerGroupMeta().getWorkersList()) {
                WorkerRef workerRef = new WorkerRef(worker.getWorkerLocation().getWorkerAttemptId(), worker.getWorkerLocation().getLocation(), worker.getTasksList());
                group.addWorkerRef(workerRef);
            }
            return group;
        } else {
            Thread.sleep(WorkerContext.get().getRequestSleepTimeMS());
        }
    }
}
Also used : WorkerRef(com.tencent.angel.worker.WorkerRef) WorkerGroup(com.tencent.angel.worker.WorkerGroup) SplitClassification(com.tencent.angel.split.SplitClassification)

Example 2 with SplitClassification

use of com.tencent.angel.split.SplitClassification in project angel by Tencent.

the class DataSpliter method dispatchSplitsUseLocation.

private void dispatchSplitsUseLocation(List<org.apache.hadoop.mapreduce.InputSplit> splitsNewAPI, int groupNumber, int groupItemNumber) throws IOException, InterruptedException {
    splitNum = splitsNewAPI.size();
    // Since the actual split size is sometimes not exactly equal to the expected split size, we
    // need to fine tune the number of workergroup and task based on the actual split number
    int estimatedGroupNum = (splitNum + groupItemNumber - 1) / groupItemNumber;
    int base = 0;
    // Record the location information for the splits in order to data localized schedule
    for (int i = 0; i < estimatedGroupNum; i++) {
        List<String> locationList = new ArrayList<String>(maxLocationLimit);
        List<org.apache.hadoop.mapreduce.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>();
        base = i * groupItemNumber;
        for (int j = 0; j < groupItemNumber && (base < splitNum); j++, base++) {
            splitList.add(splitsNewAPI.get(base));
            String[] locations = splitsNewAPI.get(base).getLocations();
            for (int k = 0; k < locations.length && locationList.size() < maxLocationLimit; k++) {
                locationList.add(locations[k]);
            }
        }
        SplitClassification splitClassification = new SplitClassification(null, splitList, locationList.toArray(new String[locationList.size()]), true);
        splitClassifications.put(i, splitClassification);
    }
}
Also used : ArrayList(java.util.ArrayList) SplitClassification(com.tencent.angel.split.SplitClassification)

Example 3 with SplitClassification

use of com.tencent.angel.split.SplitClassification in project angel by Tencent.

the class DataSpliter method deserialize.

/**
 * read data splits from a input stream
 *
 * @param inputStream input stream
 * @throws IOException
 */
public void deserialize(FSDataInputStream inputStream) throws IOException, ClassNotFoundException {
    splitNum = inputStream.readInt();
    int size = inputStream.readInt();
    for (int i = 0; i < size; i++) {
        int index = inputStream.readInt();
        SplitClassification split = new SplitClassification();
        split.deserialize(inputStream);
        splitClassifications.put(index, split);
    }
    inputStream.close();
}
Also used : SplitClassification(com.tencent.angel.split.SplitClassification)

Example 4 with SplitClassification

use of com.tencent.angel.split.SplitClassification in project angel by Tencent.

the class SerdeUtils method deSerilizeSplitProtos.

public static SplitClassification deSerilizeSplitProtos(List<SplitInfoProto> splitInfoList, Configuration conf) throws ClassNotFoundException, IOException {
    boolean isUseNewAPI = conf.getBoolean("mapred.mapper.new-api", false);
    if (isUseNewAPI) {
        List<org.apache.hadoop.mapreduce.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>();
        for (SplitInfoProto splitInfo : splitInfoList) {
            splitList.add(deSerilizeNewSplit(splitInfo.getSplitClass(), splitInfo.getSplit().toByteArray(), conf));
        }
        SplitClassification splits = new SplitClassification(null, splitList, true);
        return splits;
    } else {
        List<org.apache.hadoop.mapred.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapred.InputSplit>();
        for (SplitInfoProto splitInfo : splitInfoList) {
            splitList.add(deSerilizeOldSplit(splitInfo.getSplitClass(), splitInfo.getSplit().toByteArray(), conf));
        }
        SplitClassification splits = new SplitClassification(splitList, null, true);
        return splits;
    }
}
Also used : ArrayList(java.util.ArrayList) SplitInfoProto(com.tencent.angel.protobuf.generated.WorkerMasterServiceProtos.SplitInfoProto) SplitClassification(com.tencent.angel.split.SplitClassification)

Example 5 with SplitClassification

use of com.tencent.angel.split.SplitClassification in project angel by Tencent.

the class SerdeUtils method deSerilizeSplits.

public static SplitClassification deSerilizeSplits(List<SplitInfo> splitInfoList, Configuration conf) throws ClassNotFoundException, IOException {
    boolean isUseNewAPI = conf.getBoolean("mapred.mapper.new-api", false);
    if (isUseNewAPI) {
        List<org.apache.hadoop.mapreduce.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>();
        for (SplitInfo splitInfo : splitInfoList) {
            splitList.add(deSerilizeNewSplit(splitInfo, conf));
        }
        SplitClassification splits = new SplitClassification(null, splitList, true);
        return splits;
    } else {
        List<org.apache.hadoop.mapred.InputSplit> splitList = new ArrayList<org.apache.hadoop.mapred.InputSplit>();
        for (SplitInfo splitInfo : splitInfoList) {
            splitList.add(deSerilizeOldSplit(splitInfo, conf));
        }
        SplitClassification splits = new SplitClassification(splitList, null, true);
        return splits;
    }
}
Also used : ArrayList(java.util.ArrayList) SplitClassification(com.tencent.angel.split.SplitClassification) SplitInfo(com.tencent.angel.split.SplitInfo)

Aggregations

SplitClassification (com.tencent.angel.split.SplitClassification)6 ArrayList (java.util.ArrayList)4 SplitInfoProto (com.tencent.angel.protobuf.generated.WorkerMasterServiceProtos.SplitInfoProto)1 SplitInfo (com.tencent.angel.split.SplitInfo)1 WorkerGroup (com.tencent.angel.worker.WorkerGroup)1 WorkerRef (com.tencent.angel.worker.WorkerRef)1 org.apache.hadoop.mapred (org.apache.hadoop.mapred)1