use of com.tencent.angel.worker.task.TaskId in project angel by Tencent.
the class AMTaskManager method deserialize.
/**
* read all tasks state from a input stream
*
* @param input the input stream
* @throws IOException
*/
public void deserialize(DataInputStream input) throws IOException {
try {
writeLock.lock();
int size = input.readInt();
for (int i = 0; i < size; i++) {
TaskId taskId = new TaskId(input.readInt());
AMTask task = new AMTask(taskId, null);
task.deserialize(input);
LOG.info("task deserialize=" + task);
idToTaskMap.put(taskId, task);
}
} finally {
writeLock.unlock();
}
}
use of com.tencent.angel.worker.task.TaskId in project angel by Tencent.
the class TaskCalPerfChecker method check.
@Override
public List<Id> check(AMContext context) {
double slowestDiscount = context.getConf().getDouble(AngelConf.ANGEL_AM_TASK_SLOWEST_DISCOUNT, AngelConf.DEFAULT_ANGEL_AM_TASK_SLOWEST_DISCOUNT);
LOG.info("start to check slow workers use TaskCalPerfChecker policy, slowestDiscount = " + slowestDiscount);
Set<Id> slowWorkers = new HashSet<Id>();
AMTaskManager taskManage = context.getTaskManager();
WorkerManager workerManager = context.getWorkerManager();
Collection<AMTask> tasks = taskManage.getTasks();
long totalSamples = 0;
long totalCalTimeMs = 0;
double averageRate = 0.0;
Map<TaskId, Double> taskIdToRateMap = new HashMap<TaskId, Double>(tasks.size());
for (AMTask task : tasks) {
if (task.getMetrics().containsKey(TaskCounter.TOTAL_CALCULATE_SAMPLES) && task.getMetrics().containsKey(TaskCounter.TOTAL_CALCULATE_TIME_MS)) {
long sampleNum = Long.valueOf(task.getMetrics().get(TaskCounter.TOTAL_CALCULATE_SAMPLES));
double calTimeMs = Long.valueOf(task.getMetrics().get(TaskCounter.TOTAL_CALCULATE_TIME_MS));
LOG.info("for task " + task.getTaskId() + ", sampleNum = " + sampleNum + ", calTimeMs = " + calTimeMs);
totalSamples += sampleNum;
totalCalTimeMs += calTimeMs;
if (sampleNum > 5000000) {
LOG.info("task " + task.getTaskId() + " calculate rate = " + (calTimeMs * 10000 / sampleNum));
taskIdToRateMap.put(task.getTaskId(), calTimeMs * 10000 / sampleNum);
}
}
}
if (totalSamples != 0) {
averageRate = (double) totalCalTimeMs * 10000 / totalSamples;
}
LOG.info("totalSamples = " + totalSamples + ", totalCalTimeMs = " + totalCalTimeMs + ", average calulate time for 10000 samples = " + averageRate + ", the maximum calulate time for 10000 sample = " + averageRate / slowestDiscount);
for (Map.Entry<TaskId, Double> rateEntry : taskIdToRateMap.entrySet()) {
if (averageRate < rateEntry.getValue() * slowestDiscount) {
LOG.info("task " + rateEntry.getKey() + " rate = " + rateEntry.getValue() + " is < " + averageRate * slowestDiscount);
AMWorker worker = workerManager.getWorker(rateEntry.getKey());
if (worker != null) {
LOG.info("put worker " + worker.getId() + " to slow worker list");
slowWorkers.add(worker.getId());
}
}
}
List<Id> slowWorkerList = new ArrayList<>(slowWorkers.size());
slowWorkerList.addAll(slowWorkers);
return slowWorkerList;
}
use of com.tencent.angel.worker.task.TaskId in project angel by Tencent.
the class ServerPartitionTest method testWriteTo.
@Test
public void testWriteTo() throws Exception {
// set basic configuration keys
conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_INT_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
angelClient.addMatrix(mMatrix);
angelClient.startPSServer();
angelClient.runTask(DummyTask.class);
Thread.sleep(5000);
group0Id = new WorkerGroupId(0);
worker0Id = new WorkerId(group0Id, 0);
worker0Attempt0Id = new WorkerAttemptId(worker0Id, 0);
task0Id = new TaskId(0);
task1Id = new TaskId(1);
psId = new ParameterServerId(0);
psAttempt0Id = new PSAttemptId(psId, 0);
DataOutputStream out = new DataOutputStream(new FileOutputStream("data"));
ByteBuf buf = Unpooled.buffer(16);
buf.writeDouble(0.00);
buf.writeDouble(1.00);
buf.writeDouble(-1.00);
buf.writeDouble(-2.00);
buf.writeDouble(-5.00);
buf.writeDouble(-6.00);
buf.writeDouble(-7.00);
buf.writeDouble(-8.00);
serverPartition.getRow(6).update(RowType.T_DOUBLE_DENSE, buf, 8);
serverPartition.save(out);
out.close();
DataInputStream in = new DataInputStream(new FileInputStream("data"));
PartitionKey partitionKeyNew = new PartitionKey(2, 1, 1, 2, 8, 10);
ServerPartition serverPartitionNew = new ServerPartition(partitionKeyNew, RowType.T_DOUBLE_DENSE);
serverPartitionNew.init();
assertNotEquals(((ServerDenseDoubleRow) serverPartition.getRow(6)).getData(), ((ServerDenseDoubleRow) serverPartitionNew.getRow(6)).getData());
serverPartitionNew.load(in);
in.close();
assertEquals(((ServerDenseDoubleRow) serverPartition.getRow(6)).getData(), ((ServerDenseDoubleRow) serverPartitionNew.getRow(6)).getData());
angelClient.stop();
}
use of com.tencent.angel.worker.task.TaskId in project angel by Tencent.
the class AlgoMetricsTest method setup.
@Before
public void setup() throws Exception {
try {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 2);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(1);
mMatrix.setColNum(100000);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(50000);
mMatrix.setRowType(RowType.T_INT_DENSE);
mMatrix.set(MatrixConf.MATRIX_OPLOG_ENABLEFILTER, "false");
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
mMatrix.set(MatrixConf.MATRIX_OPLOG_TYPE, "DENSE_INT");
angelClient.addMatrix(mMatrix);
angelClient.startPSServer();
angelClient.runTask(MetricTestTask.class);
Thread.sleep(5000);
task0Id = new TaskId(0);
task1Id = new TaskId(1);
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
use of com.tencent.angel.worker.task.TaskId in project angel by Tencent.
the class PeriodHATest method setup.
@Before
public void setup() throws Exception {
try {
// set basic configuration keys
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, true);
conf.set(AngelConf.ANGEL_TASK_USER_TASKCLASS, DummyTask.class.getName());
// use local deploy mode and dummy dataspliter
conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL");
conf.setBoolean(AngelConf.ANGEL_AM_USE_DUMMY_DATASPLITER, true);
conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, CombineTextInputFormat.class.getName());
conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/out");
conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, LOCAL_FS + TMP_PATH + "/in");
conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log");
conf.setInt(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_NUMBER, 2);
conf.setInt(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1);
conf.setInt(AngelConf.ANGEL_PS_HA_REPLICATION_NUMBER, 2);
conf.setInt(AngelConf.ANGEL_PS_HA_PUSH_INTERVAL_MS, 1000);
// conf.setBoolean(AngelConf.ANGEL_PS_HA_USE_EVENT_PUSH, true);
// conf.setBoolean(AngelConf.ANGEL_PS_HA_PUSH_SYNC, true);
// get a angel client
angelClient = AngelClientFactory.get(conf);
// add matrix
MatrixContext mMatrix = new MatrixContext();
mMatrix.setName("w1");
mMatrix.setRowNum(1);
mMatrix.setColNum(dim);
mMatrix.setMaxRowNumInBlock(1);
mMatrix.setMaxColNumInBlock(dim / 2);
mMatrix.setRowType(RowType.T_INT_DENSE);
mMatrix.set(MatrixConf.MATRIX_HOGWILD, "true");
mMatrix.set(MatrixConf.MATRIX_AVERAGE, "false");
angelClient.addMatrix(mMatrix);
angelClient.startPSServer();
angelClient.run();
Thread.sleep(5000);
task0Id = new TaskId(0);
} catch (Exception x) {
LOG.error("setup failed ", x);
throw x;
}
}
Aggregations