Search in sources :

Example 31 with IOManagerAsync

use of org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync in project flink by apache.

the class HashTableITCase method testSpillingHashJoinOneRecursionValidityIntPair.

@Test
public void testSpillingHashJoinOneRecursionValidityIntPair() throws IOException {
    final int NUM_KEYS = 1000000;
    final int BUILD_VALS_PER_KEY = 3;
    final int PROBE_VALS_PER_KEY = 10;
    // create a build input that gives 3 million pairs with 3 values sharing the same key
    MutableObjectIterator<IntPair> buildInput = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
    // create a probe input that gives 10 million pairs with 10 values sharing a key
    MutableObjectIterator<IntPair> probeInput = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
    // allocate the memory for the HashTable
    List<MemorySegment> memSegments;
    try {
        memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
    } catch (MemoryAllocationException maex) {
        fail("Memory for the Join could not be provided.");
        return;
    }
    // create the I/O access for spilling
    IOManager ioManager = new IOManagerAsync();
    // create the map for validating the results
    HashMap<Integer, Long> map = new HashMap<Integer, Long>(NUM_KEYS);
    // ----------------------------------------------------------------------------------------
    final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator, memSegments, ioManager);
    join.open(buildInput, probeInput);
    IntPair record;
    final IntPair recordReuse = new IntPair();
    while (join.nextRecord()) {
        int numBuildValues = 0;
        int key = 0;
        MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
        if ((record = buildSide.next(recordReuse)) != null) {
            numBuildValues = 1;
            key = record.getKey();
        } else {
            fail("No build side values found for a probe key.");
        }
        while ((record = buildSide.next(recordReuse)) != null) {
            numBuildValues++;
        }
        if (numBuildValues != 3) {
            fail("Other than 3 build values!!!");
        }
        IntPair pr = join.getCurrentProbeRecord();
        Assert.assertEquals("Probe-side key was different than build-side key.", key, pr.getKey());
        Long contained = map.get(key);
        if (contained == null) {
            contained = Long.valueOf(numBuildValues);
        } else {
            contained = Long.valueOf(contained.longValue() + (numBuildValues));
        }
        map.put(key, contained);
    }
    join.close();
    Assert.assertEquals("Wrong number of keys", NUM_KEYS, map.size());
    for (Map.Entry<Integer, Long> entry : map.entrySet()) {
        long val = entry.getValue();
        int key = entry.getKey();
        Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, PROBE_VALS_PER_KEY * BUILD_VALS_PER_KEY, val);
    }
    // ----------------------------------------------------------------------------------------
    this.memManager.release(join.getFreedMemory());
}
Also used : MemoryAllocationException(org.apache.flink.runtime.memory.MemoryAllocationException) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) HashMap(java.util.HashMap) IntPair(org.apache.flink.runtime.operators.testutils.types.IntPair) MemorySegment(org.apache.flink.core.memory.MemorySegment) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) HashMap(java.util.HashMap) Map(java.util.Map) UniformIntPairGenerator(org.apache.flink.runtime.operators.testutils.UniformIntPairGenerator) Test(org.junit.Test)

Example 32 with IOManagerAsync

use of org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync in project flink by apache.

the class BarrierBufferTest method setup.

@BeforeClass
public static void setup() {
    IO_MANAGER = new IOManagerAsync();
    SIZE_COUNTER = 1;
}
Also used : IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BeforeClass(org.junit.BeforeClass)

Example 33 with IOManagerAsync

use of org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync in project flink by apache.

the class TaskManagerComponentsStartupShutdownTest method testComponentsStartupShutdown.

/**
	 * Makes sure that all components are shut down when the TaskManager
	 * actor is shut down.
	 */
@Test
public void testComponentsStartupShutdown() {
    final String[] TMP_DIR = new String[] { ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH };
    final Time timeout = Time.seconds(100);
    final int BUFFER_SIZE = 32 * 1024;
    Configuration config = new Configuration();
    config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "200 ms");
    config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "1 s");
    config.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 1);
    ActorSystem actorSystem = null;
    try {
        actorSystem = AkkaUtils.createLocalActorSystem(config);
        final ActorRef jobManager = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
        FlinkResourceManager.startResourceManagerActors(config, actorSystem, LeaderRetrievalUtils.createLeaderRetrievalService(config, jobManager), StandaloneResourceManager.class);
        final int numberOfSlots = 1;
        // create the components for the TaskManager manually
        final TaskManagerConfiguration tmConfig = new TaskManagerConfiguration(numberOfSlots, TMP_DIR, timeout, null, Time.milliseconds(500), Time.seconds(30), Time.seconds(10), // cleanup interval
        1000000, config, // exit-jvm-on-fatal-error
        false);
        final NetworkEnvironmentConfiguration netConf = new NetworkEnvironmentConfiguration(32, BUFFER_SIZE, MemoryType.HEAP, IOManager.IOMode.SYNC, 0, 0, 2, 8, null);
        ResourceID taskManagerId = ResourceID.generate();
        final TaskManagerLocation connectionInfo = new TaskManagerLocation(taskManagerId, InetAddress.getLocalHost(), 10000);
        final MemoryManager memManager = new MemoryManager(32 * BUFFER_SIZE, 1, BUFFER_SIZE, MemoryType.HEAP, false);
        final IOManager ioManager = new IOManagerAsync(TMP_DIR);
        final NetworkEnvironment network = new NetworkEnvironment(new NetworkBufferPool(netConf.numNetworkBuffers(), netConf.networkBufferSize(), netConf.memoryType()), new LocalConnectionManager(), new ResultPartitionManager(), new TaskEventDispatcher(), new KvStateRegistry(), null, netConf.ioMode(), netConf.partitionRequestInitialBackoff(), netConf.partitionRequestMaxBackoff(), netConf.networkBuffersPerChannel(), netConf.extraNetworkBuffersPerGate());
        network.start();
        LeaderRetrievalService leaderRetrievalService = new StandaloneLeaderRetrievalService(jobManager.path().toString());
        MetricRegistryConfiguration metricRegistryConfiguration = MetricRegistryConfiguration.fromConfiguration(config);
        // create the task manager
        final Props tmProps = Props.create(TaskManager.class, tmConfig, taskManagerId, connectionInfo, memManager, ioManager, network, numberOfSlots, leaderRetrievalService, new MetricRegistry(metricRegistryConfiguration));
        final ActorRef taskManager = actorSystem.actorOf(tmProps);
        new JavaTestKit(actorSystem) {

            {
                // wait for the TaskManager to be registered
                new Within(new FiniteDuration(5000, TimeUnit.SECONDS)) {

                    @Override
                    protected void run() {
                        taskManager.tell(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), getTestActor());
                        expectMsgEquals(TaskManagerMessages.getRegisteredAtJobManagerMessage());
                    }
                };
            }
        };
        // shut down all actors and the actor system
        // Kill the Task down the JobManager
        taskManager.tell(Kill.getInstance(), ActorRef.noSender());
        jobManager.tell(Kill.getInstance(), ActorRef.noSender());
        // shut down the actors and the actor system
        actorSystem.shutdown();
        actorSystem.awaitTermination();
        actorSystem = null;
        // now that the TaskManager is shut down, the components should be shut down as well
        assertTrue(network.isShutdown());
        assertTrue(ioManager.isProperlyShutDown());
        assertTrue(memManager.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (actorSystem != null) {
            actorSystem.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) TaskManagerConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerConfiguration) ActorRef(akka.actor.ActorRef) Time(org.apache.flink.api.common.time.Time) JobManager(org.apache.flink.runtime.jobmanager.JobManager) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Props(akka.actor.Props) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerConfiguration) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FiniteDuration(scala.concurrent.duration.FiniteDuration) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) ResultPartitionManager(org.apache.flink.runtime.io.network.partition.ResultPartitionManager) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) LocalConnectionManager(org.apache.flink.runtime.io.network.LocalConnectionManager) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 34 with IOManagerAsync

use of org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync in project flink by apache.

the class HashTableRecordWidthCombinations method main.

public static void main(String[] args) throws Exception {
    @SuppressWarnings("unchecked") final TypeSerializer<Tuple2<Long, byte[]>> buildSerializer = new TupleSerializer<Tuple2<Long, byte[]>>((Class<Tuple2<Long, byte[]>>) (Class<?>) Tuple2.class, new TypeSerializer<?>[] { LongSerializer.INSTANCE, BytePrimitiveArraySerializer.INSTANCE });
    final TypeSerializer<Long> probeSerializer = LongSerializer.INSTANCE;
    final TypeComparator<Tuple2<Long, byte[]>> buildComparator = new TupleComparator<Tuple2<Long, byte[]>>(new int[] { 0 }, new TypeComparator<?>[] { new LongComparator(true) }, new TypeSerializer<?>[] { LongSerializer.INSTANCE });
    final TypeComparator<Long> probeComparator = new LongComparator(true);
    final TypePairComparator<Long, Tuple2<Long, byte[]>> pairComparator = new TypePairComparator<Long, Tuple2<Long, byte[]>>() {

        private long ref;

        @Override
        public void setReference(Long reference) {
            ref = reference;
        }

        @Override
        public boolean equalToReference(Tuple2<Long, byte[]> candidate) {
            //noinspection UnnecessaryUnboxing
            return candidate.f0.longValue() == ref;
        }

        @Override
        public int compareToReference(Tuple2<Long, byte[]> candidate) {
            long x = ref;
            long y = candidate.f0;
            return (x < y) ? -1 : ((x == y) ? 0 : 1);
        }
    };
    final IOManager ioMan = new IOManagerAsync();
    try {
        final int pageSize = 32 * 1024;
        final int numSegments = 34;
        for (int num = 3400; num < 3550; num++) {
            final int numRecords = num;
            for (int recordLen = 270; recordLen < 320; recordLen++) {
                final byte[] payload = new byte[recordLen - 8 - 4];
                System.out.println("testing " + numRecords + " / " + recordLen);
                List<MemorySegment> memory = getMemory(numSegments, pageSize);
                // we create a hash table that thinks the records are super large. that makes it choose initially
                // a lot of memory for the partition buffers, and start with a smaller hash table. that way
                // we trigger a hash table growth early.
                MutableHashTable<Tuple2<Long, byte[]>, Long> table = new MutableHashTable<>(buildSerializer, probeSerializer, buildComparator, probeComparator, pairComparator, memory, ioMan, 16, false);
                final MutableObjectIterator<Tuple2<Long, byte[]>> buildInput = new MutableObjectIterator<Tuple2<Long, byte[]>>() {

                    private int count = 0;

                    @Override
                    public Tuple2<Long, byte[]> next(Tuple2<Long, byte[]> reuse) {
                        return next();
                    }

                    @Override
                    public Tuple2<Long, byte[]> next() {
                        if (count++ < numRecords) {
                            return new Tuple2<>(42L, payload);
                        } else {
                            return null;
                        }
                    }
                };
                // probe side
                final MutableObjectIterator<Long> probeInput = new MutableObjectIterator<Long>() {

                    private final long numRecords = 10000;

                    private long value = 0;

                    @Override
                    public Long next(Long aLong) {
                        return next();
                    }

                    @Override
                    public Long next() {
                        if (value < numRecords) {
                            return value++;
                        } else {
                            return null;
                        }
                    }
                };
                table.open(buildInput, probeInput);
                try {
                    while (table.nextRecord()) {
                        MutableObjectIterator<Tuple2<Long, byte[]>> matches = table.getBuildSideIterator();
                        while (matches.next() != null) ;
                    }
                } catch (RuntimeException e) {
                    if (!e.getMessage().contains("exceeded maximum number of recursions")) {
                        throw e;
                    }
                } finally {
                    table.close();
                }
                // make sure no temp files are left
                checkNoTempFilesRemain(ioMan);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        ioMan.shutdown();
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) TupleComparator(org.apache.flink.api.java.typeutils.runtime.TupleComparator) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TypePairComparator(org.apache.flink.api.common.typeutils.TypePairComparator) LongComparator(org.apache.flink.api.common.typeutils.base.LongComparator) MemorySegment(org.apache.flink.core.memory.MemorySegment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) MutableHashTable(org.apache.flink.runtime.operators.hash.MutableHashTable)

Example 35 with IOManagerAsync

use of org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync in project flink by apache.

the class MassiveStringValueSorting method testStringValueSorting.

public void testStringValueSorting() {
    File input = null;
    File sorted = null;
    try {
        // the source file
        input = generateFileWithStrings(300000, "http://some-uri.com/that/is/a/common/prefix/to/all");
        // the sorted file
        sorted = File.createTempFile("sorted_strings", "txt");
        String[] command = { "/bin/bash", "-c", "export LC_ALL=\"C\" && cat \"" + input.getAbsolutePath() + "\" | sort > \"" + sorted.getAbsolutePath() + "\"" };
        Process p = null;
        try {
            p = Runtime.getRuntime().exec(command);
            int retCode = p.waitFor();
            if (retCode != 0) {
                throw new Exception("Command failed with return code " + retCode);
            }
            p = null;
        } finally {
            if (p != null) {
                p.destroy();
            }
        }
        // sort the data
        UnilateralSortMerger<StringValue> sorter = null;
        BufferedReader reader = null;
        BufferedReader verifyReader = null;
        try {
            MemoryManager mm = new MemoryManager(1024 * 1024, 1);
            IOManager ioMan = new IOManagerAsync();
            TypeSerializer<StringValue> serializer = new CopyableValueSerializer<StringValue>(StringValue.class);
            TypeComparator<StringValue> comparator = new CopyableValueComparator<StringValue>(true, StringValue.class);
            reader = new BufferedReader(new FileReader(input));
            MutableObjectIterator<StringValue> inputIterator = new StringValueReaderMutableObjectIterator(reader);
            sorter = new UnilateralSortMerger<StringValue>(mm, ioMan, inputIterator, new DummyInvokable(), new RuntimeSerializerFactory<StringValue>(serializer, StringValue.class), comparator, 1.0, 4, 0.8f, true, /* use large record handler */
            true);
            MutableObjectIterator<StringValue> sortedData = sorter.getIterator();
            reader.close();
            // verify
            verifyReader = new BufferedReader(new FileReader(sorted));
            String nextVerify;
            StringValue nextFromFlinkSort = new StringValue();
            while ((nextVerify = verifyReader.readLine()) != null) {
                nextFromFlinkSort = sortedData.next(nextFromFlinkSort);
                Assert.assertNotNull(nextFromFlinkSort);
                Assert.assertEquals(nextVerify, nextFromFlinkSort.getValue());
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
            if (verifyReader != null) {
                verifyReader.close();
            }
            if (sorter != null) {
                sorter.close();
            }
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        if (input != null) {
            //noinspection ResultOfMethodCallIgnored
            input.delete();
        }
        if (sorted != null) {
            //noinspection ResultOfMethodCallIgnored
            sorted.delete();
        }
    }
}
Also used : IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) RuntimeSerializerFactory(org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory) CopyableValueSerializer(org.apache.flink.api.java.typeutils.runtime.CopyableValueSerializer) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) IOException(java.io.IOException) CopyableValueComparator(org.apache.flink.api.java.typeutils.runtime.CopyableValueComparator) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringValue(org.apache.flink.types.StringValue) File(java.io.File)

Aggregations

IOManagerAsync (org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync)42 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)33 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)22 Before (org.junit.Before)18 Test (org.junit.Test)16 MemorySegment (org.apache.flink.core.memory.MemorySegment)15 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)10 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 GenericPairComparator (org.apache.flink.api.common.typeutils.GenericPairComparator)7 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)7 File (java.io.File)6 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)5 BufferedReader (java.io.BufferedReader)4 FileReader (java.io.FileReader)4 Random (java.util.Random)4 RuntimeSerializerFactory (org.apache.flink.api.java.typeutils.runtime.RuntimeSerializerFactory)4 IntComparator (org.apache.flink.api.common.typeutils.base.IntComparator)3