use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.
the class DataSkewDiagnoser method diagnose.
@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
Collection<Diagnosis> diagnoses = new ArrayList<>();
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
if (bp.size() > 1) {
// TODO handle cases where multiple detectors create back pressure symptom
throw new IllegalStateException("Multiple back-pressure symptoms case");
}
if (bp.size() == 0) {
return diagnoses;
}
String bpComponent = bp.first().assignments().iterator().next();
SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
// verify data skew, larger queue size and back pressure for the same component exists
if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() == 0) {
return diagnoses;
}
Collection<String> assignments = new ArrayList<>();
Instant newest = context.checkpoint();
Instant oldest = context.previousCheckpoint();
MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
for (String instance : measurements.uniqueInstances()) {
MeasurementsTable instanceMeasurements = measurements.instance(instance);
double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
double processingRate = instanceMeasurements.type(METRIC_EXE_COUNT.text()).mean();
if ((measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) && (measurements.type(METRIC_EXE_COUNT.text()).max() < 1.10 * processingRate)) {
assignments.add(instance);
LOG.info(String.format("DataSkew: %s back-pressure, high execution count: %s and " + "high buffer size %s", instance, processingRate, waitQSize));
}
}
if (assignments.size() > 0) {
diagnoses.add(new Diagnosis(DIAGNOSIS_DATA_SKEW.text(), context.checkpoint(), assignments));
}
return diagnoses;
}
use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.
the class SlowInstanceDiagnoser method diagnose.
@Override
public Collection<Diagnosis> diagnose(Collection<Symptom> symptoms) {
publishingMetrics.executeDiagnoserIncr(SLOW_INSTANCE_DIAGNOSER);
Collection<Diagnosis> diagnoses = new ArrayList<>();
SymptomsTable symptomsTable = SymptomsTable.of(symptoms);
SymptomsTable bp = symptomsTable.type(SYMPTOM_COMP_BACK_PRESSURE.text());
if (bp.size() > 1) {
// TODO handle cases where multiple detectors create back pressure symptom
throw new IllegalStateException("Multiple back-pressure symptoms case");
}
if (bp.size() == 0) {
return diagnoses;
}
String bpComponent = bp.first().assignments().iterator().next();
SymptomsTable processingRateSkew = symptomsTable.type(SYMPTOM_PROCESSING_RATE_SKEW.text());
SymptomsTable waitQSkew = symptomsTable.type(SYMPTOM_WAIT_Q_SIZE_SKEW.text());
// exist
if (waitQSkew.assignment(bpComponent).size() == 0 || processingRateSkew.assignment(bpComponent).size() > 0) {
// TODO in a short window rate skew could exist
return diagnoses;
}
Collection<String> assignments = new ArrayList<>();
Instant newest = context.checkpoint();
Instant oldest = context.previousCheckpoint();
MeasurementsTable measurements = context.measurements().between(oldest, newest).component(bpComponent);
for (String instance : measurements.uniqueInstances()) {
MeasurementsTable instanceMeasurements = measurements.instance(instance);
double waitQSize = instanceMeasurements.type(METRIC_WAIT_Q_SIZE.text()).mean();
if (measurements.type(METRIC_WAIT_Q_SIZE.text()).max() < waitQSize * 2) {
assignments.add(instance);
LOG.info(String.format("SLOW: %s back-pressure and high buffer size: %s " + "and similar processing rates", instance, waitQSize));
}
}
if (assignments.size() > 0) {
Instant now = context.checkpoint();
diagnoses.add(new Diagnosis(DIAGNOSIS_SLOW_INSTANCE.text(), now, assignments));
}
return diagnoses;
}
use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.
the class ScaleUpResolver method resolve.
@Override
public Collection<Action> resolve(Collection<Diagnosis> diagnosis) {
List<Action> actions = new ArrayList<>();
DiagnosisTable table = DiagnosisTable.of(diagnosis);
table = table.type(DIAGNOSIS_UNDER_PROVISIONING.text());
if (table.size() == 0) {
LOG.fine("No under-previsioning diagnosis present, ending as there's nothing to fix");
return actions;
}
// Scale the first assigned component
Diagnosis diagnoses = table.first();
// verify diagnoses instance is valid
if (diagnoses.assignments().isEmpty()) {
LOG.warning(String.format("Diagnosis %s is missing assignments", diagnoses.id()));
return actions;
}
String component = diagnoses.assignments().iterator().next();
int newParallelism = computeScaleUpFactor(component);
Map<String, Integer> changeRequest = new HashMap<>();
changeRequest.put(component, newParallelism);
PackingPlan currentPackingPlan = packingPlanProvider.get();
PackingPlan newPlan = buildNewPackingPlan(changeRequest, currentPackingPlan);
if (newPlan == null) {
return null;
}
Scheduler.UpdateTopologyRequest updateTopologyRequest = Scheduler.UpdateTopologyRequest.newBuilder().setCurrentPackingPlan(getSerializedPlan(currentPackingPlan)).setProposedPackingPlan(getSerializedPlan(newPlan)).build();
LOG.info("Sending Updating topology request: " + updateTopologyRequest);
if (!schedulerClient.updateTopology(updateTopologyRequest)) {
throw new RuntimeException(String.format("Failed to update topology with Scheduler, " + "updateTopologyRequest=%s", updateTopologyRequest));
}
LOG.info("Scheduler updated topology successfully.");
LOG.info("Broadcasting topology update event");
TopologyUpdate action = new TopologyUpdate(context.checkpoint(), Collections.singletonList(component));
eventManager.onEvent(action);
actions.add(action);
return actions;
}
use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.
the class SlowInstanceDiagnoserTest method failsIfNoBufferSizeDisparity.
@Test
public void failsIfNoBufferSizeDisparity() {
Symptom symptom = new Symptom(SYMPTOM_COMP_BACK_PRESSURE.text(), Instant.now(), null);
Collection<Symptom> symptoms = Collections.singletonList(symptom);
Collection<Diagnosis> result = diagnoser.diagnose(symptoms);
assertEquals(0, result.size());
}
use of com.microsoft.dhalion.core.Diagnosis in project heron by twitter.
the class SlowInstanceDiagnoserTest method diagnosis1of3SlowInstances.
@Test
public void diagnosis1of3SlowInstances() {
addMeasurements(METRIC_BACK_PRESSURE, 123, 0, 0);
addMeasurements(METRIC_WAIT_Q_SIZE, 1000, 20, 20);
when(context.measurements()).thenReturn(MeasurementsTable.of(measurements));
Collection<String> assign = Collections.singleton(comp);
Symptom bpSymptom = new Symptom(SYMPTOM_COMP_BACK_PRESSURE.text(), now, assign);
Symptom qDisparitySymptom = new Symptom(SYMPTOM_WAIT_Q_SIZE_SKEW.text(), now, assign);
Collection<Symptom> symptoms = Arrays.asList(bpSymptom, qDisparitySymptom);
Collection<Diagnosis> result = diagnoser.diagnose(symptoms);
assertEquals(1, result.size());
Diagnosis diagnoses = result.iterator().next();
assertEquals(DIAGNOSIS_SLOW_INSTANCE.text(), diagnoses.type());
assertEquals(1, diagnoses.assignments().size());
assertEquals("i1", diagnoses.assignments().iterator().next());
// TODO
// assertEquals(1, diagnoses.symptoms().size());
}
Aggregations