Search in sources :

Example 1 with PartitioningContextImpl

use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testPartitioningStateTransferFailure.

/**
 * Test for testing dynamic partitioning interrupting ongoing read.
 * - Create 4 file with 3 records each.
 * - Create a single partition, and read some records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit the remaining records.
 */
@Test
public void testPartitioningStateTransferFailure() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    oper.setEmitBatchSize(2);
    LineByLineFileInputOperator initialState = new Kryo().copy(oper);
    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    int wid = 0;
    // Read some records
    oper.setup(testMeta.context);
    for (int i = 0; i < 5; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);
    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());
    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }
    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    // No record should be read.
    Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) StatsListener(com.datatorrent.api.StatsListener) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) Kryo(com.esotericsoftware.kryo.Kryo) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 2 with PartitioningContextImpl

use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testPartitioningStateTransfer.

/**
 * Test for testing dynamic partitioning.
 * - Create 4 file with 3 records each.
 * - Create a single partition, and read all records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit records again, expected result is no record is emitted, as all files are
 *   processed.
 * - Create another 4 files with 3 records each
 * - Try to emit records again, expected result total record emitted 4 * 3 = 12.
 */
@Test
public void testPartitioningStateTransfer() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    LineByLineFileInputOperator initialState = new Kryo().copy(oper);
    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    int wid = 0;
    // Read all records to populate processedList in operator.
    oper.setup(testMeta.context);
    for (int i = 0; i < 10; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);
    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());
    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }
    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    // No record should be read.
    Assert.assertEquals("No new tuples read ", 0, sink.collectedTuples.size());
    // Add four new files with 3 records each.
    for (; file < 8; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    // If all files are processed only once then number of records emitted should
    // be 12.
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) StatsListener(com.datatorrent.api.StatsListener) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) Kryo(com.esotericsoftware.kryo.Kryo) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 3 with PartitioningContextImpl

use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testPartitioningStateTransferInterrupted.

/**
 * Test for testing dynamic partitioning.
 * - Create 4 file with 3 records each.
 * - Create a single partition, and read some records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit the remaining records.
 */
@Test
public void testPartitioningStateTransferInterrupted() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    oper.setEmitBatchSize(2);
    LineByLineFileInputOperator initialState = new Kryo().copy(oper);
    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }
    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);
    int wid = 0;
    // Read some records
    oper.setup(testMeta.context);
    for (int i = 0; i < 5; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);
    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState.definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());
    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }
    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) StatsListener(com.datatorrent.api.StatsListener) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) Kryo(com.esotericsoftware.kryo.Kryo) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 4 with PartitioningContextImpl

use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testWithCustomScanner.

/**
 * Partition the operator in 2
 * create ten files with index of the file at the start, i.e 1_file, 2_file .. etc.
 * The scanner returns this index from getPartition method.
 * each partition should read 5 files as file index are from 0 to 9 (including 0 and 9).
 * @throws Exception
 */
@Test
public void testWithCustomScanner() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.setScanner(new MyScanner());
    oper.getScanner().setFilePatternRegexp(".*partition_([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    Random rand = new Random();
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 10; file++) {
        FileUtils.write(new File(testMeta.dir, file + "_partition_00" + rand.nextInt(100)), "");
    }
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2));
    Assert.assertEquals(2, newPartitions.size());
    // partitioned() wasn't called
    Assert.assertEquals(1, oper.getCurrentPartitions());
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(oper, p.getPartitionedInstance());
        Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
        Set<String> consumed = Sets.newHashSet();
        LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner().scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
        Assert.assertEquals("partition " + files, 6, files.size());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) Test(org.junit.Test)

Example 5 with PartitioningContextImpl

use of org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl in project apex-malhar by apache.

the class AbstractFileInputOperatorTest method testPartitioning.

@Test
public void testPartitioning() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
    }
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2));
    Assert.assertEquals(2, newPartitions.size());
    // partitioned() wasn't called
    Assert.assertEquals(1, oper.getCurrentPartitions());
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(oper, p.getPartitionedInstance());
        Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
        Set<String> consumed = Sets.newHashSet();
        LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner().scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
        Assert.assertEquals("partition " + files, 3, files.size());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) Configuration(org.apache.hadoop.conf.Configuration) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) LineByLineFileInputOperator(org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator) File(java.io.File) Test(org.junit.Test)

Aggregations

Partition (com.datatorrent.api.Partitioner.Partition)7 PartitioningContextImpl (org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl)7 Test (org.junit.Test)7 DefaultPartition (com.datatorrent.api.DefaultPartition)6 File (java.io.File)6 LineByLineFileInputOperator (org.apache.apex.malhar.lib.fs.LineByLineFileInputOperator)6 Path (org.apache.hadoop.fs.Path)6 CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)4 StatsListener (com.datatorrent.api.StatsListener)3 Kryo (com.esotericsoftware.kryo.Kryo)3 Configuration (org.apache.hadoop.conf.Configuration)2 CouchbaseClient (com.couchbase.client.CouchbaseClient)1 CouchbaseConnectionFactoryBuilder (com.couchbase.client.CouchbaseConnectionFactoryBuilder)1 AttributeMap (com.datatorrent.api.Attribute.AttributeMap)1 URI (java.net.URI)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1 FSWindowDataManager (org.apache.apex.malhar.lib.wal.FSWindowDataManager)1 BucketConfiguration (org.couchbase.mock.BucketConfiguration)1 CouchbaseMock (org.couchbase.mock.CouchbaseMock)1