use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.
the class BlobLibraryCacheRecoveryITCase method testRecoveryRegisterAndDownload.
/**
* Tests that with {@link HighAvailabilityMode#ZOOKEEPER} distributed JARs are recoverable from any
* participating BlobLibraryCacheManager.
*/
@Test
public void testRecoveryRegisterAndDownload() throws Exception {
Random rand = new Random();
BlobServer[] server = new BlobServer[2];
InetSocketAddress[] serverAddress = new InetSocketAddress[2];
BlobLibraryCacheManager[] libServer = new BlobLibraryCacheManager[2];
BlobCache cache = null;
BlobLibraryCacheManager libCache = null;
Configuration config = new Configuration();
config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
config.setString(CoreOptions.STATE_BACKEND, "FILESYSTEM");
config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.getRoot().getAbsolutePath());
try {
for (int i = 0; i < server.length; i++) {
server[i] = new BlobServer(config);
serverAddress[i] = new InetSocketAddress("localhost", server[i].getPort());
libServer[i] = new BlobLibraryCacheManager(server[i], 3600 * 1000);
}
// Random data
byte[] expected = new byte[1024];
rand.nextBytes(expected);
List<BlobKey> keys = new ArrayList<>(2);
// Upload some data (libraries)
try (BlobClient client = new BlobClient(serverAddress[0], config)) {
// Request 1
keys.add(client.put(expected));
// Request 2
keys.add(client.put(expected, 32, 256));
}
// The cache
cache = new BlobCache(serverAddress[0], config);
libCache = new BlobLibraryCacheManager(cache, 3600 * 1000);
// Register uploaded libraries
JobID jobId = new JobID();
ExecutionAttemptID executionId = new ExecutionAttemptID();
libServer[0].registerTask(jobId, executionId, keys, Collections.<URL>emptyList());
// Verify key 1
File f = libCache.getFile(keys.get(0));
assertEquals(expected.length, f.length());
try (FileInputStream fis = new FileInputStream(f)) {
for (int i = 0; i < expected.length && fis.available() > 0; i++) {
assertEquals(expected[i], (byte) fis.read());
}
assertEquals(0, fis.available());
}
// Shutdown cache and start with other server
cache.shutdown();
libCache.shutdown();
cache = new BlobCache(serverAddress[1], config);
libCache = new BlobLibraryCacheManager(cache, 3600 * 1000);
// Verify key 1
f = libCache.getFile(keys.get(0));
assertEquals(expected.length, f.length());
try (FileInputStream fis = new FileInputStream(f)) {
for (int i = 0; i < expected.length && fis.available() > 0; i++) {
assertEquals(expected[i], (byte) fis.read());
}
assertEquals(0, fis.available());
}
// Verify key 2
f = libCache.getFile(keys.get(1));
assertEquals(256, f.length());
try (FileInputStream fis = new FileInputStream(f)) {
for (int i = 0; i < 256 && fis.available() > 0; i++) {
assertEquals(expected[32 + i], (byte) fis.read());
}
assertEquals(0, fis.available());
}
// Remove blobs again
try (BlobClient client = new BlobClient(serverAddress[1], config)) {
client.delete(keys.get(0));
client.delete(keys.get(1));
}
// Verify everything is clean below recoveryDir/<cluster_id>
final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
File haBlobStoreDir = new File(temporaryFolder.getRoot(), clusterId);
File[] recoveryFiles = haBlobStoreDir.listFiles();
assertNotNull("HA storage directory does not exist", recoveryFiles);
assertEquals("Unclean state backend: " + Arrays.toString(recoveryFiles), 0, recoveryFiles.length);
} finally {
for (BlobServer s : server) {
if (s != null) {
s.shutdown();
}
}
if (cache != null) {
cache.shutdown();
}
if (libCache != null) {
libCache.shutdown();
}
}
}
use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.
the class RescalePartitionerTest method testExecutionGraphGeneration.
@Test
public void testExecutionGraphGeneration() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
// get input data
DataStream<String> text = env.addSource(new ParallelSourceFunction<String>() {
private static final long serialVersionUID = 7772338606389180774L;
@Override
public void run(SourceContext<String> ctx) throws Exception {
}
@Override
public void cancel() {
}
}).setParallelism(2);
DataStream<Tuple2<String, Integer>> counts = text.rescale().flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
private static final long serialVersionUID = -5255930322161596829L;
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
}
});
counts.rescale().print().setParallelism(2);
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
final JobID jobId = new JobID();
final String jobName = "Semi-Rebalance Test Job";
final Configuration cfg = new Configuration();
List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
JobVertex sourceVertex = jobVertices.get(0);
JobVertex mapVertex = jobVertices.get(1);
JobVertex sinkVertex = jobVertices.get(2);
assertEquals(2, sourceVertex.getParallelism());
assertEquals(4, mapVertex.getParallelism());
assertEquals(2, sinkVertex.getParallelism());
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new ArrayList<BlobKey>(), new ArrayList<URL>(), new Scheduler(TestingUtils.defaultExecutionContext()), ExecutionGraph.class.getClassLoader(), new UnregisteredMetricsGroup());
try {
eg.attachJobGraph(jobVertices);
} catch (JobException e) {
e.printStackTrace();
fail("Building ExecutionGraph failed: " + e.getMessage());
}
ExecutionJobVertex execSourceVertex = eg.getJobVertex(sourceVertex.getID());
ExecutionJobVertex execMapVertex = eg.getJobVertex(mapVertex.getID());
ExecutionJobVertex execSinkVertex = eg.getJobVertex(sinkVertex.getID());
assertEquals(0, execSourceVertex.getInputs().size());
assertEquals(1, execMapVertex.getInputs().size());
assertEquals(4, execMapVertex.getParallelism());
ExecutionVertex[] mapTaskVertices = execMapVertex.getTaskVertices();
// verify that we have each parallel input partition exactly twice, i.e. that one source
// sends to two unique mappers
Map<Integer, Integer> mapInputPartitionCounts = new HashMap<>();
for (ExecutionVertex mapTaskVertex : mapTaskVertices) {
assertEquals(1, mapTaskVertex.getNumberOfInputs());
assertEquals(1, mapTaskVertex.getInputEdges(0).length);
ExecutionEdge inputEdge = mapTaskVertex.getInputEdges(0)[0];
assertEquals(sourceVertex.getID(), inputEdge.getSource().getProducer().getJobvertexId());
int inputPartition = inputEdge.getSource().getPartitionNumber();
if (!mapInputPartitionCounts.containsKey(inputPartition)) {
mapInputPartitionCounts.put(inputPartition, 1);
} else {
mapInputPartitionCounts.put(inputPartition, mapInputPartitionCounts.get(inputPartition) + 1);
}
}
assertEquals(2, mapInputPartitionCounts.size());
for (int count : mapInputPartitionCounts.values()) {
assertEquals(2, count);
}
assertEquals(1, execSinkVertex.getInputs().size());
assertEquals(2, execSinkVertex.getParallelism());
ExecutionVertex[] sinkTaskVertices = execSinkVertex.getTaskVertices();
// verify each sink instance has two inputs from the map and that each map subpartition
// only occurs in one unique input edge
Set<Integer> mapSubpartitions = new HashSet<>();
for (ExecutionVertex sinkTaskVertex : sinkTaskVertices) {
assertEquals(1, sinkTaskVertex.getNumberOfInputs());
assertEquals(2, sinkTaskVertex.getInputEdges(0).length);
ExecutionEdge inputEdge1 = sinkTaskVertex.getInputEdges(0)[0];
ExecutionEdge inputEdge2 = sinkTaskVertex.getInputEdges(0)[1];
assertEquals(mapVertex.getID(), inputEdge1.getSource().getProducer().getJobvertexId());
assertEquals(mapVertex.getID(), inputEdge2.getSource().getProducer().getJobvertexId());
int inputPartition1 = inputEdge1.getSource().getPartitionNumber();
assertFalse(mapSubpartitions.contains(inputPartition1));
mapSubpartitions.add(inputPartition1);
int inputPartition2 = inputEdge2.getSource().getPartitionNumber();
assertFalse(mapSubpartitions.contains(inputPartition2));
mapSubpartitions.add(inputPartition2);
}
assertEquals(4, mapSubpartitions.size());
}
Aggregations