Search in sources :

Example 6 with AttributeMap

use of com.datatorrent.api.Attribute.AttributeMap in project apex-malhar by apache.

the class HiveMockTest method testHDFSHiveCheckpoint.

@Test
public void testHDFSHiveCheckpoint() throws SQLException, TException {
    hiveInitializeDatabase(createStore(null));
    HiveStore hiveStore = createStore(null);
    hiveStore.setFilepath(testdir);
    HiveOperator outputOperator = new HiveOperator();
    HiveOperator newOp;
    outputOperator.setStore(hiveStore);
    ArrayList<String> hivePartitionColumns = new ArrayList<String>();
    hivePartitionColumns.add("dt");
    FSRollingTestImpl fsRolling = new FSRollingTestImpl();
    hiveInitializeDatabase(createStore(null));
    outputOperator.setHivePartitionColumns(hivePartitionColumns);
    outputOperator.setTablename(tablename);
    fsRolling.setFilePath(testdir);
    short persmission = 511;
    fsRolling.setFilePermission(persmission);
    fsRolling.setAlwaysWriteToTmp(false);
    fsRolling.setMaxLength(128);
    AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
    attributeMap.put(OperatorContext.PROCESSING_MODE, ProcessingMode.AT_LEAST_ONCE);
    attributeMap.put(OperatorContext.ACTIVATION_WINDOW_ID, -1L);
    attributeMap.put(DAG.APPLICATION_ID, APP_ID);
    OperatorContext context = mockOperatorContext(OPERATOR_ID, attributeMap);
    fsRolling.setup(context);
    FilePartitionMapping mapping1 = new FilePartitionMapping();
    FilePartitionMapping mapping2 = new FilePartitionMapping();
    FilePartitionMapping mapping3 = new FilePartitionMapping();
    outputOperator.setup(context);
    mapping1.setFilename(APP_ID + "/" + OPERATOR_ID + "/" + "2014-12-10" + "/" + "0-transaction.out.part.0");
    ArrayList<String> partitions1 = new ArrayList<String>();
    partitions1.add("2014-12-10");
    mapping1.setPartition(partitions1);
    mapping2.setFilename(APP_ID + "/" + OPERATOR_ID + "/" + "2014-12-11" + "/" + "0-transaction.out.part.0");
    ArrayList<String> partitions2 = new ArrayList<String>();
    partitions2.add("2014-12-11");
    mapping2.setPartition(partitions2);
    ArrayList<String> partitions3 = new ArrayList<String>();
    partitions3.add("2014-12-12");
    mapping3.setFilename(APP_ID + "/" + OPERATOR_ID + "/" + "2014-12-12" + "/" + "0-transaction.out.part.0");
    mapping3.setPartition(partitions3);
    for (int wid = 0, total = 0; wid < NUM_WINDOWS; wid++) {
        fsRolling.beginWindow(wid);
        for (int tupleCounter = 0; tupleCounter < BLAST_SIZE && total < DATABASE_SIZE; tupleCounter++, total++) {
            fsRolling.input.process("2014-12-1" + tupleCounter);
        }
        if (wid == 7) {
            fsRolling.committed(wid - 1);
            outputOperator.processTuple(mapping1);
            outputOperator.processTuple(mapping2);
        }
        fsRolling.endWindow();
        if ((wid == 6) || (wid == 9)) {
            fsRolling.beforeCheckpoint(wid);
            fsRolling.checkpointed(wid);
        }
        if (wid == 9) {
            Kryo kryo = new Kryo();
            FieldSerializer<HiveOperator> f1 = (FieldSerializer<HiveOperator>) kryo.getSerializer(HiveOperator.class);
            FieldSerializer<HiveStore> f2 = (FieldSerializer<HiveStore>) kryo.getSerializer(HiveStore.class);
            f1.setCopyTransient(false);
            f2.setCopyTransient(false);
            newOp = kryo.copy(outputOperator);
            outputOperator.teardown();
            newOp.setup(context);
            newOp.beginWindow(7);
            newOp.processTuple(mapping3);
            newOp.endWindow();
            newOp.teardown();
            break;
        }
    }
    hiveStore.connect();
    client.execute("select * from " + tablename + " where dt='2014-12-10'");
    List<String> recordsInDatePartition1 = client.fetchAll();
    client.execute("select * from " + tablename + " where dt='2014-12-11'");
    List<String> recordsInDatePartition2 = client.fetchAll();
    client.execute("select * from " + tablename + " where dt='2014-12-12'");
    List<String> recordsInDatePartition3 = client.fetchAll();
    client.execute("drop table " + tablename);
    hiveStore.disconnect();
    Assert.assertEquals(7, recordsInDatePartition1.size());
    for (int i = 0; i < recordsInDatePartition1.size(); i++) {
        LOG.debug("records in first date partition are {}", recordsInDatePartition1.get(i));
        /*An array containing partition and data is returned as a string record, hence we need to upcast it to an object first
       and then downcast to a string in order to use in Assert.*/
        Object record = recordsInDatePartition1.get(i);
        Object[] records = (Object[]) record;
        Assert.assertEquals("2014-12-10", records[1]);
    }
    Assert.assertEquals(7, recordsInDatePartition2.size());
    for (int i = 0; i < recordsInDatePartition2.size(); i++) {
        LOG.debug("records in second date partition are {}", recordsInDatePartition2.get(i));
        Object record = recordsInDatePartition2.get(i);
        Object[] records = (Object[]) record;
        Assert.assertEquals("2014-12-11", records[1]);
    }
    Assert.assertEquals(10, recordsInDatePartition3.size());
    for (int i = 0; i < recordsInDatePartition3.size(); i++) {
        LOG.debug("records in second date partition are {}", recordsInDatePartition3.get(i));
        Object record = recordsInDatePartition3.get(i);
        Object[] records = (Object[]) record;
        Assert.assertEquals("2014-12-12", records[1]);
    }
}
Also used : ArrayList(java.util.ArrayList) FieldSerializer(com.esotericsoftware.kryo.serializers.FieldSerializer) AttributeMap(com.datatorrent.api.Attribute.AttributeMap) OperatorContextTestHelper.mockOperatorContext(org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext) OperatorContext(com.datatorrent.api.Context.OperatorContext) FilePartitionMapping(org.apache.apex.malhar.hive.AbstractFSRollingOutputOperator.FilePartitionMapping) Kryo(com.esotericsoftware.kryo.Kryo) Test(org.junit.Test)

Example 7 with AttributeMap

use of com.datatorrent.api.Attribute.AttributeMap in project apex-malhar by apache.

the class HiveMockTest method testInsertPOJO.

@Test
public void testInsertPOJO() throws Exception {
    HiveStore hiveStore = createStore(null);
    hiveStore.setFilepath(testdir);
    ArrayList<String> hivePartitionColumns = new ArrayList<String>();
    hivePartitionColumns.add("dt");
    ArrayList<String> hiveColumns = new ArrayList<String>();
    hiveColumns.add("col1");
    hiveInitializePOJODatabase(createStore(null));
    HiveOperator hiveOperator = new HiveOperator();
    hiveOperator.setStore(hiveStore);
    hiveOperator.setTablename(tablepojo);
    hiveOperator.setHivePartitionColumns(hivePartitionColumns);
    FSPojoToHiveOperator fsRolling = new FSPojoToHiveOperator();
    fsRolling.setFilePath(testdir);
    fsRolling.setHiveColumns(hiveColumns);
    ArrayList<FIELD_TYPE> fieldtypes = new ArrayList<FIELD_TYPE>();
    ArrayList<FIELD_TYPE> partitiontypes = new ArrayList<FIELD_TYPE>();
    fieldtypes.add(FIELD_TYPE.INTEGER);
    partitiontypes.add(FIELD_TYPE.STRING);
    fsRolling.setHiveColumnDataTypes(fieldtypes);
    fsRolling.setHivePartitionColumnDataTypes(partitiontypes);
    // ArrayList<FIELD_TYPE> partitionColumnType = new ArrayList<FIELD_TYPE>();
    // partitionColumnType.add(FIELD_TYPE.STRING);
    fsRolling.setHivePartitionColumns(hivePartitionColumns);
    // fsRolling.setHivePartitionColumnsDataTypes(partitionColumnType);
    ArrayList<String> expressions = new ArrayList<String>();
    expressions.add("getId()");
    ArrayList<String> expressionsPartitions = new ArrayList<String>();
    expressionsPartitions.add("getDate()");
    short permission = 511;
    fsRolling.setFilePermission(permission);
    fsRolling.setAlwaysWriteToTmp(false);
    fsRolling.setMaxLength(128);
    fsRolling.setExpressionsForHiveColumns(expressions);
    fsRolling.setExpressionsForHivePartitionColumns(expressionsPartitions);
    AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
    attributeMap.put(OperatorContext.PROCESSING_MODE, ProcessingMode.AT_LEAST_ONCE);
    attributeMap.put(OperatorContext.ACTIVATION_WINDOW_ID, -1L);
    attributeMap.put(DAG.APPLICATION_ID, APP_ID);
    OperatorContext context = mockOperatorContext(OPERATOR_ID, attributeMap);
    fsRolling.setup(context);
    hiveOperator.setup(context);
    FilePartitionMapping mapping1 = new FilePartitionMapping();
    FilePartitionMapping mapping2 = new FilePartitionMapping();
    mapping1.setFilename(APP_ID + "/" + OPERATOR_ID + "/" + "2014-12-11" + "/" + "0-transaction.out.part.0");
    ArrayList<String> partitions1 = new ArrayList<String>();
    partitions1.add("2014-12-11");
    mapping1.setPartition(partitions1);
    ArrayList<String> partitions2 = new ArrayList<String>();
    partitions2.add("2014-12-12");
    mapping2.setFilename(APP_ID + "/" + OPERATOR_ID + "/" + "2014-12-12" + "/" + "0-transaction.out.part.0");
    mapping2.setPartition(partitions2);
    for (int wid = 0, total = 0; wid < NUM_WINDOWS; wid++) {
        fsRolling.beginWindow(wid);
        for (int tupleCounter = 1; tupleCounter < BLAST_SIZE && total < DATABASE_SIZE; tupleCounter++, total++) {
            InnerObj innerObj = new InnerObj();
            innerObj.setId(tupleCounter);
            innerObj.setDate("2014-12-1" + tupleCounter);
            fsRolling.input.process(innerObj);
        }
        if (wid == 7) {
            fsRolling.committed(wid - 1);
            hiveOperator.processTuple(mapping1);
            hiveOperator.processTuple(mapping2);
        }
        fsRolling.endWindow();
        if (wid == 6) {
            fsRolling.beforeCheckpoint(wid);
            fsRolling.checkpointed(wid);
        }
    }
    fsRolling.teardown();
    hiveStore.connect();
    client.execute("select * from " + tablepojo + " where dt='2014-12-11'");
    List<String> recordsInDatePartition1 = client.fetchAll();
    client.execute("select * from " + tablepojo + " where dt='2014-12-12'");
    List<String> recordsInDatePartition2 = client.fetchAll();
    client.execute("drop table " + tablepojo);
    hiveStore.disconnect();
    Assert.assertEquals(7, recordsInDatePartition1.size());
    for (int i = 0; i < recordsInDatePartition1.size(); i++) {
        LOG.debug("records in first date partition are {}", recordsInDatePartition1.get(i));
        /*An array containing partition and data is returned as a string record, hence we need to upcast it to an object first
       and then downcast to a string in order to use in Assert.*/
        Object record = recordsInDatePartition1.get(i);
        Object[] records = (Object[]) record;
        Assert.assertEquals(1, records[0]);
        Assert.assertEquals("2014-12-11", records[1]);
    }
    Assert.assertEquals(7, recordsInDatePartition2.size());
    for (int i = 0; i < recordsInDatePartition2.size(); i++) {
        LOG.debug("records in second date partition are {}", recordsInDatePartition2.get(i));
        Object record = recordsInDatePartition2.get(i);
        Object[] records = (Object[]) record;
        Assert.assertEquals(2, records[0]);
        Assert.assertEquals("2014-12-12", records[1]);
    }
}
Also used : ArrayList(java.util.ArrayList) FIELD_TYPE(org.apache.apex.malhar.hive.FSPojoToHiveOperator.FIELD_TYPE) AttributeMap(com.datatorrent.api.Attribute.AttributeMap) OperatorContextTestHelper.mockOperatorContext(org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext) OperatorContext(com.datatorrent.api.Context.OperatorContext) FilePartitionMapping(org.apache.apex.malhar.hive.AbstractFSRollingOutputOperator.FilePartitionMapping) Test(org.junit.Test)

Example 8 with AttributeMap

use of com.datatorrent.api.Attribute.AttributeMap in project apex-malhar by apache.

the class SplunkInputOperatorTest method TestSplunkInputOperator.

@Test
public void TestSplunkInputOperator() {
    SplunkStore store = new SplunkStore();
    store.setHost(HOST);
    store.setPassword(PASSWORD);
    store.setPort(PORT);
    store.setUserName(USER_NAME);
    AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
    attributeMap.put(DAG.APPLICATION_ID, APP_ID);
    OperatorContext context = mockOperatorContext(OPERATOR_ID, attributeMap);
    TestInputOperator inputOperator = new TestInputOperator();
    inputOperator.setStore(store);
    inputOperator.setEarliestTime("-1000h");
    inputOperator.setLatestTime("now");
    CollectorTestSink<Object> sink = new CollectorTestSink<Object>();
    inputOperator.outputPort.setSink(sink);
    inputOperator.setup(context);
    inputOperator.beginWindow(0);
    inputOperator.emitTuples();
    inputOperator.endWindow();
    Assert.assertEquals("rows from splunk", 100, sink.collectedTuples.size());
}
Also used : AttributeMap(com.datatorrent.api.Attribute.AttributeMap) OperatorContext(com.datatorrent.api.Context.OperatorContext) OperatorContextTestHelper.mockOperatorContext(org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 9 with AttributeMap

use of com.datatorrent.api.Attribute.AttributeMap in project apex-malhar by apache.

the class CouchBaseInputOperatorTest method TestCouchBaseInputOperator.

@Test
public void TestCouchBaseInputOperator() throws Exception {
    BucketConfiguration bucketConfiguration = new BucketConfiguration();
    CouchbaseConnectionFactoryBuilder cfb = new CouchbaseConnectionFactoryBuilder();
    CouchbaseMock mockCouchbase1 = createMock("default", "", bucketConfiguration);
    CouchbaseMock mockCouchbase2 = createMock("default", "", bucketConfiguration);
    mockCouchbase1.start();
    mockCouchbase1.waitForStartup();
    List<URI> uriList = new ArrayList<URI>();
    int port1 = mockCouchbase1.getHttpPort();
    logger.debug("port is {}", port1);
    mockCouchbase2.start();
    mockCouchbase2.waitForStartup();
    int port2 = mockCouchbase2.getHttpPort();
    logger.debug("port is {}", port2);
    uriList.add(new URI("http", null, "localhost", port1, "/pools", "", ""));
    connectionFactory = cfb.buildCouchbaseConnection(uriList, bucketConfiguration.name, bucketConfiguration.password);
    client = new CouchbaseClient(connectionFactory);
    CouchBaseStore store = new CouchBaseStore();
    keyList = new ArrayList<String>();
    store.setBucket(bucketConfiguration.name);
    store.setPasswordConfig(password);
    store.setPassword(bucketConfiguration.password);
    store.setUriString("localhost:" + port1 + "," + "localhost:" + port1);
    // couchbaseBucket.getCouchServers();
    AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
    attributeMap.put(DAG.APPLICATION_ID, APP_ID);
    TestInputOperator inputOperator = new TestInputOperator();
    inputOperator.setStore(store);
    inputOperator.insertEventsInTable(10);
    CollectorTestSink<Object> sink = new CollectorTestSink<Object>();
    inputOperator.outputPort.setSink(sink);
    List<Partition<AbstractCouchBaseInputOperator<String>>> partitions = Lists.newArrayList();
    Collection<Partition<AbstractCouchBaseInputOperator<String>>> newPartitions = inputOperator.definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    for (Partition<AbstractCouchBaseInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(inputOperator, p.getPartitionedInstance());
    }
    // Collect all operators in a list
    List<AbstractCouchBaseInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractCouchBaseInputOperator<String>> p : newPartitions) {
        TestInputOperator oi = (TestInputOperator) p.getPartitionedInstance();
        oi.setServerURIString("localhost:" + port1);
        oi.setStore(store);
        oi.setup(null);
        oi.outputPort.setSink(sink);
        opers.add(oi);
        port1 = port2;
    }
    sink.clear();
    int wid = 0;
    for (int i = 0; i < 10; i++) {
        for (AbstractCouchBaseInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }
    Assert.assertEquals("Tuples read should be same ", 10, sink.collectedTuples.size());
    for (AbstractCouchBaseInputOperator<String> o : opers) {
        o.teardown();
    }
    mockCouchbase1.stop();
    mockCouchbase2.stop();
}
Also used : CouchbaseConnectionFactoryBuilder(com.couchbase.client.CouchbaseConnectionFactoryBuilder) ArrayList(java.util.ArrayList) URI(java.net.URI) CouchbaseClient(com.couchbase.client.CouchbaseClient) Partition(com.datatorrent.api.Partitioner.Partition) CouchbaseMock(org.couchbase.mock.CouchbaseMock) BucketConfiguration(org.couchbase.mock.BucketConfiguration) AttributeMap(com.datatorrent.api.Attribute.AttributeMap) PartitioningContextImpl(org.apache.apex.malhar.lib.partitioner.StatelessPartitionerTest.PartitioningContextImpl) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Example 10 with AttributeMap

use of com.datatorrent.api.Attribute.AttributeMap in project apex-malhar by apache.

the class CouchBasePOJOTest method TestCouchBaseInputOperator.

@Test
public void TestCouchBaseInputOperator() {
    CouchBaseWindowStore store = new CouchBaseWindowStore();
    System.setProperty("viewmode", "development");
    keyList = new ArrayList<String>();
    store.setBucket(bucket);
    store.setPassword(password);
    store.setUriString(uri);
    try {
        store.connect();
    } catch (IOException ex) {
        DTThrowable.rethrow(ex);
    }
    store.getInstance().flush();
    AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
    attributeMap.put(DAG.APPLICATION_ID, APP_ID);
    OperatorContext context = mockOperatorContext(OPERATOR_ID, attributeMap);
    TestInputOperator inputOperator = new TestInputOperator();
    inputOperator.setStore(store);
    inputOperator.setOutputClass("org.apache.apex.malhar.contrib.couchbase.TestComplexPojoInput");
    inputOperator.insertEventsInTable(2);
    try {
        Thread.sleep(10000);
    } catch (InterruptedException ex) {
        throw new RuntimeException(ex);
    }
    inputOperator.createAndFetchViewQuery1();
    try {
        Thread.sleep(1000);
    } catch (InterruptedException ex) {
        throw new RuntimeException(ex);
    }
    CollectorTestSink<Object> sink = new CollectorTestSink<Object>();
    inputOperator.outputPort.setSink(sink);
    inputOperator.setup(context);
    inputOperator.setDesignDocumentName(DESIGN_DOC_ID1);
    inputOperator.setViewName(TEST_VIEW1);
    inputOperator.beginWindow(0);
    inputOperator.emitTuples();
    inputOperator.endWindow();
    logger.debug("collected tuples are {}", sink.collectedTuples.size());
    int count = 0;
    for (Object o : sink.collectedTuples) {
        count++;
        TestComplexPojoInput object = (TestComplexPojoInput) o;
        if (count == 1) {
            Assert.assertEquals("name set in testpojo", "test", object.getName());
            Assert.assertEquals("map in testpojo", "{test=12345}", object.getMap().toString());
            Assert.assertEquals("age in testpojo", "23", object.getAge().toString());
        }
        if (count == 2) {
            Assert.assertEquals("name set in testpojo", "test1", object.getName());
            Assert.assertEquals("map in testpojo", "{test2=12345}", object.getMap().toString());
            Assert.assertEquals("age in testpojo", "12", object.getAge().toString());
        }
    }
    sink.clear();
    store.client.deleteDesignDoc(DESIGN_DOC_ID1);
    inputOperator.teardown();
}
Also used : IOException(java.io.IOException) AttributeMap(com.datatorrent.api.Attribute.AttributeMap) OperatorContextTestHelper.mockOperatorContext(org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext) OperatorContext(com.datatorrent.api.Context.OperatorContext) CollectorTestSink(org.apache.apex.malhar.lib.testbench.CollectorTestSink) Test(org.junit.Test)

Aggregations

AttributeMap (com.datatorrent.api.Attribute.AttributeMap)12 OperatorContext (com.datatorrent.api.Context.OperatorContext)10 OperatorContextTestHelper.mockOperatorContext (org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext)10 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)7 FilePartitionMapping (org.apache.apex.malhar.hive.AbstractFSRollingOutputOperator.FilePartitionMapping)4 CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)4 CouchbaseConnectionFactoryBuilder (com.couchbase.client.CouchbaseConnectionFactoryBuilder)2 Attribute (com.datatorrent.api.Attribute)2 IOException (java.io.IOException)2 URI (java.net.URI)2 HashMap (java.util.HashMap)2 TestPortContext (org.apache.apex.malhar.lib.helper.TestPortContext)2 BucketConfiguration (org.couchbase.mock.BucketConfiguration)2 CouchbaseMock (org.couchbase.mock.CouchbaseMock)2 CouchbaseClient (com.couchbase.client.CouchbaseClient)1 Partition (com.datatorrent.api.Partitioner.Partition)1 Kryo (com.esotericsoftware.kryo.Kryo)1 FieldSerializer (com.esotericsoftware.kryo.serializers.FieldSerializer)1 ResultSet (java.sql.ResultSet)1