Search in sources :

Example 1 with Partition

use of com.datatorrent.api.Partitioner.Partition in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testPartitioningStateTransferFailure.

/**
 * Test for testing dynamic partitioning interrupting ongoing read.
 * - Create 4 file with 3 records each.
 * - Create a single partition, and read some records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit the remaining records.
 */
@Test
public void testPartitioningStateTransferFailure() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    oper.setEmitBatchSize(2);
    LineByLineFileInputOperator initialState = new Kryo().copy(oper);
    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    int wid = 0;
    // Read some records
    oper.setup(testMeta.context);
    for (int i = 0; i < 5; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);
    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());
    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }
    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    // No record should be read.
    Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) StatsListener(com.datatorrent.api.StatsListener) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) Kryo(com.esotericsoftware.kryo.Kryo) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 2 with Partition

use of com.datatorrent.api.Partitioner.Partition in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testPartitioningStateTransfer.

/**
 * Test for testing dynamic partitioning.
 * - Create 4 file with 3 records each.
 * - Create a single partition, and read all records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit records again, expected result is no record is emitted, as all files are
 *   processed.
 * - Create another 4 files with 3 records each
 * - Try to emit records again, expected result total record emitted 4 * 3 = 12.
 */
@Test
public void testPartitioningStateTransfer() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    LineByLineFileInputOperator initialState = new Kryo().copy(oper);
    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    int wid = 0;
    // Read all records to populate processedList in operator.
    oper.setup(testMeta.context);
    for (int i = 0; i < 10; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);
    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());
    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }
    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    // No record should be read.
    Assert.assertEquals("No new tuples read ", 0, sink.collectedTuples.size());
    // Add four new files with 3 records each.
    for (; file < 8; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    // If all files are processed only once then number of records emitted should
    // be 12.
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) StatsListener(com.datatorrent.api.StatsListener) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) Kryo(com.esotericsoftware.kryo.Kryo) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 3 with Partition

use of com.datatorrent.api.Partitioner.Partition in project apex-core by apache.

the class PhysicalPlanTest method testDefaultRepartitioning.

@Test
public void testDefaultRepartitioning() {
    List<PartitionKeys> twoBitPartitionKeys = Arrays.asList(newPartitionKeys("11", "00"), newPartitionKeys("11", "10"), newPartitionKeys("11", "01"), newPartitionKeys("11", "11"));
    GenericTestOperator operator = new GenericTestOperator();
    Set<PartitionKeys> initialPartitionKeys = Sets.newHashSet(newPartitionKeys("1", "0"), newPartitionKeys("1", "1"));
    final ArrayList<Partition<Operator>> partitions = new ArrayList<>();
    for (PartitionKeys pks : initialPartitionKeys) {
        Map<InputPort<?>, PartitionKeys> p1Keys = new HashMap<>();
        p1Keys.put(operator.inport1, pks);
        partitions.add(new DefaultPartition<Operator>(operator, p1Keys, 1, null));
    }
    ArrayList<Partition<Operator>> lowLoadPartitions = new ArrayList<>();
    for (Partition<Operator> p : partitions) {
        lowLoadPartitions.add(new DefaultPartition<>(p.getPartitionedInstance(), p.getPartitionKeys(), -1, null));
    }
    // merge to single partition
    List<Partition<Operator>> newPartitions = Lists.newArrayList();
    Collection<Partition<Operator>> tempNewPartitions = StatelessPartitioner.repartition(lowLoadPartitions);
    newPartitions.addAll(tempNewPartitions);
    Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
    Assert.assertEquals("" + newPartitions.get(0).getPartitionKeys(), 0, newPartitions.get(0).getPartitionKeys().values().iterator().next().mask);
    List<Partition<Operator>> tempList = Collections.singletonList((Partition<Operator>) new DefaultPartition<Operator>(operator, newPartitions.get(0).getPartitionKeys(), -1, null));
    tempNewPartitions = StatelessPartitioner.repartition(tempList);
    newPartitions.clear();
    newPartitions.addAll(tempNewPartitions);
    Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
    // split back into two
    tempList = Collections.singletonList((Partition<Operator>) new DefaultPartition<Operator>(operator, newPartitions.get(0).getPartitionKeys(), 1, null));
    tempNewPartitions = StatelessPartitioner.repartition(tempList);
    newPartitions.clear();
    newPartitions.addAll(tempNewPartitions);
    Assert.assertEquals("" + newPartitions, 2, newPartitions.size());
    // split partitions
    tempNewPartitions = StatelessPartitioner.repartition(partitions);
    newPartitions.clear();
    newPartitions.addAll(tempNewPartitions);
    Assert.assertEquals("" + newPartitions, 4, newPartitions.size());
    Set<PartitionKeys> expectedPartitionKeys = Sets.newHashSet(twoBitPartitionKeys);
    for (Partition<?> p : newPartitions) {
        Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
        Assert.assertEquals("" + p.getPartitionKeys(), operator.inport1, p.getPartitionKeys().keySet().iterator().next());
        PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
        expectedPartitionKeys.remove(pks);
    }
    Assert.assertTrue("" + expectedPartitionKeys, expectedPartitionKeys.isEmpty());
    // partition merge
    List<HashSet<PartitionKeys>> expectedKeysSets = Arrays.asList(Sets.newHashSet(newPartitionKeys("11", "00"), newPartitionKeys("11", "10"), newPartitionKeys("1", "1")), Sets.newHashSet(newPartitionKeys("1", "0"), newPartitionKeys("11", "01"), newPartitionKeys("11", "11")));
    for (Set<PartitionKeys> expectedKeys : expectedKeysSets) {
        List<Partition<Operator>> clonePartitions = Lists.newArrayList();
        for (PartitionKeys pks : twoBitPartitionKeys) {
            Map<InputPort<?>, PartitionKeys> p1Keys = new HashMap<>();
            p1Keys.put(operator.inport1, pks);
            int load = expectedKeys.contains(pks) ? 0 : -1;
            clonePartitions.add(new DefaultPartition<Operator>(operator, p1Keys, load, null));
        }
        tempNewPartitions = StatelessPartitioner.repartition(clonePartitions);
        newPartitions.clear();
        newPartitions.addAll(tempNewPartitions);
        Assert.assertEquals("" + newPartitions, 3, newPartitions.size());
        for (Partition<?> p : newPartitions) {
            Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
            Assert.assertEquals("" + p.getPartitionKeys(), operator.inport1, p.getPartitionKeys().keySet().iterator().next());
            PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
            expectedKeys.remove(pks);
        }
        Assert.assertTrue("" + expectedKeys, expectedKeys.isEmpty());
    }
    // merge 2 into single partition
    lowLoadPartitions = Lists.newArrayList();
    for (Partition<?> p : partitions) {
        lowLoadPartitions.add(new DefaultPartition<Operator>(operator, p.getPartitionKeys(), -1, null));
    }
    tempNewPartitions = StatelessPartitioner.repartition(lowLoadPartitions);
    newPartitions.clear();
    newPartitions.addAll(tempNewPartitions);
    Assert.assertEquals("" + newPartitions, 1, newPartitions.size());
    for (Partition<?> p : newPartitions) {
        Assert.assertEquals("" + p.getPartitionKeys(), 1, p.getPartitionKeys().size());
        PartitionKeys pks = p.getPartitionKeys().values().iterator().next();
        Assert.assertEquals("" + pks, 0, pks.mask);
        Assert.assertEquals("" + pks, Sets.newHashSet(0), pks.partitions);
    }
}
Also used : GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) Operator(com.datatorrent.api.Operator) TestGeneratorInputOperator(com.datatorrent.stram.engine.TestGeneratorInputOperator) TestInputOperator(com.datatorrent.stram.PartitioningTest.TestInputOperator) BaseOperator(com.datatorrent.common.util.BaseOperator) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) InputPort(com.datatorrent.api.Operator.InputPort) DefaultInputPort(com.datatorrent.api.DefaultInputPort) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Checkpoint(com.datatorrent.stram.api.Checkpoint) GenericTestOperator(com.datatorrent.stram.engine.GenericTestOperator) PartitionKeys(com.datatorrent.api.Partitioner.PartitionKeys) DefaultPartition(com.datatorrent.api.DefaultPartition) HashSet(java.util.HashSet) GenericNodeTest(com.datatorrent.stram.engine.GenericNodeTest) Test(org.junit.Test) PartitioningTest(com.datatorrent.stram.PartitioningTest)

Example 4 with Partition

use of com.datatorrent.api.Partitioner.Partition in project apex-core by apache.

the class DefaultPartition method assignPartitionKeys.

/**
 * Assign partitions keys for the given list of partitions and port of the logical operator.
 * <p>
 * The incoming stream will be partitioned by n keys, with n the nearest power of 2 greater or equal to the
 * number of partition instances provided. If the number of instances does not align with a power of 2, some of the
 * partitions will be assigned 2 keys. This logic is used for default partitioning and can be used to implement
 * {@link Partitioner}.
 *
 * @param <T>        Type of the partitionable object
 * @param partitions
 * @param inputPort
 */
public static <T> void assignPartitionKeys(Collection<Partition<T>> partitions, InputPort<?> inputPort) {
    if (partitions.isEmpty()) {
        throw new IllegalArgumentException("partitions collection cannot be empty");
    }
    int partitionBits = (Integer.numberOfLeadingZeros(0) - Integer.numberOfLeadingZeros(partitions.size() - 1));
    int partitionMask = 0;
    if (partitionBits > 0) {
        partitionMask = -1 >>> (Integer.numberOfLeadingZeros(-1)) - partitionBits;
    }
    Iterator<Partition<T>> iterator = partitions.iterator();
    for (int i = 0; i <= partitionMask; i++) {
        Partition<?> p;
        if (iterator.hasNext()) {
            p = iterator.next();
        } else {
            iterator = partitions.iterator();
            p = iterator.next();
        }
        PartitionKeys pks = p.getPartitionKeys().get(inputPort);
        if (pks == null) {
            p.getPartitionKeys().put(inputPort, new PartitionKeys(partitionMask, Sets.newHashSet(i)));
        } else {
            pks.partitions.add(i);
        }
    }
}
Also used : Partition(com.datatorrent.api.Partitioner.Partition) PartitionKeys(com.datatorrent.api.Partitioner.PartitionKeys)

Example 5 with Partition

use of com.datatorrent.api.Partitioner.Partition in project apex-malhar by apache.

the class CouchBaseInputOperatorTest method TestCouchBaseInputOperator.

@Test
public void TestCouchBaseInputOperator() throws Exception {
    BucketConfiguration bucketConfiguration = new BucketConfiguration();
    CouchbaseConnectionFactoryBuilder cfb = new CouchbaseConnectionFactoryBuilder();
    CouchbaseMock mockCouchbase1 = createMock("default", "", bucketConfiguration);
    CouchbaseMock mockCouchbase2 = createMock("default", "", bucketConfiguration);
    mockCouchbase1.start();
    mockCouchbase1.waitForStartup();
    List<URI> uriList = new ArrayList<URI>();
    int port1 = mockCouchbase1.getHttpPort();
    logger.debug("port is {}", port1);
    mockCouchbase2.start();
    mockCouchbase2.waitForStartup();
    int port2 = mockCouchbase2.getHttpPort();
    logger.debug("port is {}", port2);
    uriList.add(new URI("http", null, "localhost", port1, "/pools", "", ""));
    connectionFactory = cfb.buildCouchbaseConnection(uriList, bucketConfiguration.name, bucketConfiguration.password);
    client = new CouchbaseClient(connectionFactory);
    CouchBaseStore store = new CouchBaseStore();
    keyList = new ArrayList<String>();
    store.setBucket(bucketConfiguration.name);
    store.setPasswordConfig(password);
    store.setPassword(bucketConfiguration.password);
    store.setUriString("localhost:" + port1 + "," + "localhost:" + port1);
    // couchbaseBucket.getCouchServers();
    AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
    attributeMap.put(DAG.APPLICATION_ID, APP_ID);
    TestInputOperator inputOperator = new TestInputOperator();
    inputOperator.setStore(store);
    inputOperator.insertEventsInTable(10);
    CollectorTestSink<Object> sink = new CollectorTestSink<Object>();
    inputOperator.outputPort.setSink(sink);
    List<Partition<AbstractCouchBaseInputOperator<String>>> partitions = Lists.newArrayList();
    Collection<Partition<AbstractCouchBaseInputOperator<String>>> newPartitions = inputOperator.definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    for (Partition<AbstractCouchBaseInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(inputOperator, p.getPartitionedInstance());
    }
    // Collect all operators in a list
    List<AbstractCouchBaseInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractCouchBaseInputOperator<String>> p : newPartitions) {
        TestInputOperator oi = (TestInputOperator) p.getPartitionedInstance();
        oi.setServerURIString("localhost:" + port1);
        oi.setStore(store);
        oi.setup(null);
        oi.outputPort.setSink(sink);
        opers.add(oi);
        port1 = port2;
    }
    sink.clear();
    int wid = 0;
    for (int i = 0; i < 10; i++) {
        for (AbstractCouchBaseInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    Assert.assertEquals("Tuples read should be same ", 10, sink.collectedTuples.size());
    for (AbstractCouchBaseInputOperator<String> o : opers) {
        o.teardown();
    }
    mockCouchbase1.stop();
    mockCouchbase2.stop();
}
Also used : CouchbaseConnectionFactoryBuilder(com.couchbase.client.CouchbaseConnectionFactoryBuilder) ArrayList(java.util.ArrayList) URI(java.net.URI) CouchbaseClient(com.couchbase.client.CouchbaseClient) Partition(com.datatorrent.api.Partitioner.Partition) CouchbaseMock(org.couchbase.mock.CouchbaseMock) BucketConfiguration(org.couchbase.mock.BucketConfiguration) AttributeMap(com.datatorrent.api.Attribute.AttributeMap) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Aggregations

Partition (com.datatorrent.api.Partitioner.Partition)11 DefaultPartition (com.datatorrent.api.DefaultPartition)9 Test (org.junit.Test)8 PartitioningContextImpl (org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl)7 File (java.io.File)6 LineByLineFileInputOperator (org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator)6 Path (org.apache.hadoop.fs.Path)6 StatsListener (com.datatorrent.api.StatsListener)4 ArrayList (java.util.ArrayList)4 CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)4 Operator (com.datatorrent.api.Operator)3 Checkpoint (com.datatorrent.stram.api.Checkpoint)3 Kryo (com.esotericsoftware.kryo.Kryo)3 Partitioner (com.datatorrent.api.Partitioner)2 PartitionKeys (com.datatorrent.api.Partitioner.PartitionKeys)2 HashMap (java.util.HashMap)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 Configuration (org.apache.hadoop.conf.Configuration)2 CouchbaseClient (com.couchbase.client.CouchbaseClient)1 CouchbaseConnectionFactoryBuilder (com.couchbase.client.CouchbaseConnectionFactoryBuilder)1