use of org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster in project flink by apache.
the class RemoteEnvironmentITCase method setupCluster.
@BeforeClass
public static void setupCluster() {
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, NUM_TM);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, TM_SLOTS);
cluster = new LocalFlinkMiniCluster(config, false);
cluster.start();
} catch (Exception e) {
e.printStackTrace();
fail("Error starting test cluster: " + e.getMessage());
}
}
use of org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster in project flink by apache.
the class CustomSerializationITCase method startCluster.
@BeforeClass
public static void startCluster() {
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, PARLLELISM);
config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 30);
cluster = new LocalFlinkMiniCluster(config, false);
cluster.start();
} catch (Exception e) {
e.printStackTrace();
fail("Failed to start test cluster: " + e.getMessage());
}
}
use of org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster in project flink by apache.
the class LocalFlinkMiniClusterITCase method testLocalFlinkMiniClusterWithMultipleTaskManagers.
@Test
public void testLocalFlinkMiniClusterWithMultipleTaskManagers() {
final ActorSystem system = ActorSystem.create("Testkit", AkkaUtils.getDefaultAkkaConfig());
LocalFlinkMiniCluster miniCluster = null;
final int numTMs = 3;
final int numSlots = 14;
// gather the threads that already exist
final Set<Thread> threadsBefore = new HashSet<>();
{
final Thread[] allThreads = new Thread[Thread.activeCount()];
Thread.enumerate(allThreads);
threadsBefore.addAll(Arrays.asList(allThreads));
}
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlots);
miniCluster = new LocalFlinkMiniCluster(config, true);
miniCluster.start();
final ActorGateway jmGateway = miniCluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
new JavaTestKit(system) {
{
final ActorGateway selfGateway = new AkkaActorGateway(getRef(), null);
new Within(TestingUtils.TESTING_DURATION()) {
@Override
protected void run() {
jmGateway.tell(JobManagerMessages.getRequestNumberRegisteredTaskManager(), selfGateway);
expectMsgEquals(TestingUtils.TESTING_DURATION(), numTMs);
jmGateway.tell(JobManagerMessages.getRequestTotalNumberOfSlots(), selfGateway);
expectMsgEquals(TestingUtils.TESTING_DURATION(), numTMs * numSlots);
}
};
}
};
} finally {
if (miniCluster != null) {
miniCluster.stop();
miniCluster.awaitTermination();
}
JavaTestKit.shutdownActorSystem(system);
system.awaitTermination();
}
// shut down the global execution context, to make sure it does not affect this testing
try {
Field f = ExecutionContextImpl.class.getDeclaredField("executor");
f.setAccessible(true);
Object exec = ExecutionContext$.MODULE$.global();
ForkJoinPool executor = (ForkJoinPool) f.get(exec);
executor.shutdownNow();
} catch (Exception e) {
System.err.println("Cannot test proper thread shutdown for local execution.");
return;
}
// check for remaining threads
// we need to check repeatedly for a while, because some threads shut down slowly
long deadline = System.currentTimeMillis() + 30000;
boolean foundThreads = true;
String threadName = "";
while (System.currentTimeMillis() < deadline) {
// check that no additional threads remain
final Thread[] threadsAfter = new Thread[Thread.activeCount()];
Thread.enumerate(threadsAfter);
foundThreads = false;
for (Thread t : threadsAfter) {
if (t.isAlive() && !threadsBefore.contains(t)) {
// this thread was not there before. check if it is allowed
boolean allowed = false;
for (String prefix : ALLOWED_THREAD_PREFIXES) {
if (t.getName().startsWith(prefix)) {
allowed = true;
break;
}
}
if (!allowed) {
foundThreads = true;
threadName = t.toString();
break;
}
}
}
if (foundThreads) {
try {
Thread.sleep(500);
} catch (InterruptedException ignored) {
}
} else {
break;
}
}
if (foundThreads) {
fail("Thread " + threadName + " was started by the mini cluster, but not shut down");
}
}
use of org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster in project flink by apache.
the class TaskManagerFailureRecoveryITCase method testRestartWithFailingTaskManager.
@Test
public void testRestartWithFailingTaskManager() {
final int PARALLELISM = 4;
LocalFlinkMiniCluster cluster = null;
ActorSystem additionalSystem = null;
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 2);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, PARALLELISM);
config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 16);
config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "500 ms");
config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "20 s");
config.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 20);
cluster = new LocalFlinkMiniCluster(config, false);
cluster.start();
// for the result
List<Long> resultCollection = new ArrayList<Long>();
final ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
env.setParallelism(PARALLELISM);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
env.getConfig().disableSysoutLogging();
env.generateSequence(1, 10).map(new FailingMapper<Long>()).reduce(new ReduceFunction<Long>() {
@Override
public Long reduce(Long value1, Long value2) {
return value1 + value2;
}
}).output(new LocalCollectionOutputFormat<Long>(resultCollection));
// simple reference (atomic does not matter) to pass back an exception from the trigger thread
final AtomicReference<Throwable> ref = new AtomicReference<Throwable>();
// trigger the execution from a separate thread, so we are available to temper with the
// cluster during the execution
Thread trigger = new Thread("program trigger") {
@Override
public void run() {
try {
env.execute();
} catch (Throwable t) {
ref.set(t);
}
}
};
trigger.setDaemon(true);
trigger.start();
// the mappers in turn are waiting
for (int i = 0; i < PARALLELISM; i++) {
FailingMapper.TASK_TO_COORD_QUEUE.take();
}
// bring up one more task manager and wait for it to appear
{
additionalSystem = cluster.startTaskManagerActorSystem(2);
ActorRef additionalTaskManager = cluster.startTaskManager(2, additionalSystem);
Object message = TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage();
Future<Object> future = Patterns.ask(additionalTaskManager, message, 30000);
try {
Await.result(future, new FiniteDuration(30000, TimeUnit.MILLISECONDS));
} catch (TimeoutException e) {
fail("The additional TaskManager did not come up within 30 seconds");
}
}
// kill the two other TaskManagers
for (ActorRef tm : cluster.getTaskManagersAsJava()) {
tm.tell(PoisonPill.getInstance(), null);
}
// wait for the next set of mappers (the recovery ones) to come online
for (int i = 0; i < PARALLELISM; i++) {
FailingMapper.TASK_TO_COORD_QUEUE.take();
}
// tell the mappers that they may continue this time
for (int i = 0; i < PARALLELISM; i++) {
FailingMapper.COORD_TO_TASK_QUEUE.add(new Object());
}
// wait for the program to finish
trigger.join();
if (ref.get() != null) {
Throwable t = ref.get();
t.printStackTrace();
fail("Program execution caused an exception: " + t.getMessage());
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (additionalSystem != null) {
additionalSystem.shutdown();
}
if (cluster != null) {
cluster.stop();
}
}
}
Aggregations