Search in sources :

Example 1 with FIELD_TYPE

use of org.apache.apex.malhar.hive.FSPojoToHiveOperator.FIELD_TYPE in project apex-malhar by apache.

the class HiveMockTest method testInsertPOJO.

@Test
public void testInsertPOJO() throws Exception {
    HiveStore hiveStore = createStore(null);
    hiveStore.setFilepath(testdir);
    ArrayList<String> hivePartitionColumns = new ArrayList<String>();
    hivePartitionColumns.add("dt");
    ArrayList<String> hiveColumns = new ArrayList<String>();
    hiveColumns.add("col1");
    hiveInitializePOJODatabase(createStore(null));
    HiveOperator hiveOperator = new HiveOperator();
    hiveOperator.setStore(hiveStore);
    hiveOperator.setTablename(tablepojo);
    hiveOperator.setHivePartitionColumns(hivePartitionColumns);
    FSPojoToHiveOperator fsRolling = new FSPojoToHiveOperator();
    fsRolling.setFilePath(testdir);
    fsRolling.setHiveColumns(hiveColumns);
    ArrayList<FIELD_TYPE> fieldtypes = new ArrayList<FIELD_TYPE>();
    ArrayList<FIELD_TYPE> partitiontypes = new ArrayList<FIELD_TYPE>();
    fieldtypes.add(FIELD_TYPE.INTEGER);
    partitiontypes.add(FIELD_TYPE.STRING);
    fsRolling.setHiveColumnDataTypes(fieldtypes);
    fsRolling.setHivePartitionColumnDataTypes(partitiontypes);
    // ArrayList<FIELD_TYPE> partitionColumnType = new ArrayList<FIELD_TYPE>();
    // partitionColumnType.add(FIELD_TYPE.STRING);
    fsRolling.setHivePartitionColumns(hivePartitionColumns);
    // fsRolling.setHivePartitionColumnsDataTypes(partitionColumnType);
    ArrayList<String> expressions = new ArrayList<String>();
    expressions.add("getId()");
    ArrayList<String> expressionsPartitions = new ArrayList<String>();
    expressionsPartitions.add("getDate()");
    short permission = 511;
    fsRolling.setFilePermission(permission);
    fsRolling.setAlwaysWriteToTmp(false);
    fsRolling.setMaxLength(128);
    fsRolling.setExpressionsForHiveColumns(expressions);
    fsRolling.setExpressionsForHivePartitionColumns(expressionsPartitions);
    AttributeMap.DefaultAttributeMap attributeMap = new AttributeMap.DefaultAttributeMap();
    attributeMap.put(OperatorContext.PROCESSING_MODE, ProcessingMode.AT_LEAST_ONCE);
    attributeMap.put(OperatorContext.ACTIVATION_WINDOW_ID, -1L);
    attributeMap.put(DAG.APPLICATION_ID, APP_ID);
    OperatorContext context = mockOperatorContext(OPERATOR_ID, attributeMap);
    fsRolling.setup(context);
    hiveOperator.setup(context);
    FilePartitionMapping mapping1 = new FilePartitionMapping();
    FilePartitionMapping mapping2 = new FilePartitionMapping();
    mapping1.setFilename(APP_ID + "/" + OPERATOR_ID + "/" + "2014-12-11" + "/" + "0-transaction.out.part.0");
    ArrayList<String> partitions1 = new ArrayList<String>();
    partitions1.add("2014-12-11");
    mapping1.setPartition(partitions1);
    ArrayList<String> partitions2 = new ArrayList<String>();
    partitions2.add("2014-12-12");
    mapping2.setFilename(APP_ID + "/" + OPERATOR_ID + "/" + "2014-12-12" + "/" + "0-transaction.out.part.0");
    mapping2.setPartition(partitions2);
    for (int wid = 0, total = 0; wid < NUM_WINDOWS; wid++) {
        fsRolling.beginWindow(wid);
        for (int tupleCounter = 1; tupleCounter < BLAST_SIZE && total < DATABASE_SIZE; tupleCounter++, total++) {
            InnerObj innerObj = new InnerObj();
            innerObj.setId(tupleCounter);
            innerObj.setDate("2014-12-1" + tupleCounter);
            fsRolling.input.process(innerObj);
        }
        if (wid == 7) {
            fsRolling.committed(wid - 1);
            hiveOperator.processTuple(mapping1);
            hiveOperator.processTuple(mapping2);
        }
        fsRolling.endWindow();
        if (wid == 6) {
            fsRolling.beforeCheckpoint(wid);
            fsRolling.checkpointed(wid);
        }
    }
    fsRolling.teardown();
    hiveStore.connect();
    client.execute("select * from " + tablepojo + " where dt='2014-12-11'");
    List<String> recordsInDatePartition1 = client.fetchAll();
    client.execute("select * from " + tablepojo + " where dt='2014-12-12'");
    List<String> recordsInDatePartition2 = client.fetchAll();
    client.execute("drop table " + tablepojo);
    hiveStore.disconnect();
    Assert.assertEquals(7, recordsInDatePartition1.size());
    for (int i = 0; i < recordsInDatePartition1.size(); i++) {
        LOG.debug("records in first date partition are {}", recordsInDatePartition1.get(i));
        /*An array containing partition and data is returned as a string record, hence we need to upcast it to an object first
       and then downcast to a string in order to use in Assert.*/
        Object record = recordsInDatePartition1.get(i);
        Object[] records = (Object[]) record;
        Assert.assertEquals(1, records[0]);
        Assert.assertEquals("2014-12-11", records[1]);
    }
    Assert.assertEquals(7, recordsInDatePartition2.size());
    for (int i = 0; i < recordsInDatePartition2.size(); i++) {
        LOG.debug("records in second date partition are {}", recordsInDatePartition2.get(i));
        Object record = recordsInDatePartition2.get(i);
        Object[] records = (Object[]) record;
        Assert.assertEquals(2, records[0]);
        Assert.assertEquals("2014-12-12", records[1]);
    }
}
Also used : ArrayList(java.util.ArrayList) FIELD_TYPE(org.apache.apex.malhar.hive.FSPojoToHiveOperator.FIELD_TYPE) AttributeMap(com.datatorrent.api.Attribute.AttributeMap) OperatorContextTestHelper.mockOperatorContext(org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext) OperatorContext(com.datatorrent.api.Context.OperatorContext) FilePartitionMapping(org.apache.apex.malhar.hive.AbstractFSRollingOutputOperator.FilePartitionMapping) Test(org.junit.Test)

Aggregations

AttributeMap (com.datatorrent.api.Attribute.AttributeMap)1 OperatorContext (com.datatorrent.api.Context.OperatorContext)1 ArrayList (java.util.ArrayList)1 FilePartitionMapping (org.apache.apex.malhar.hive.AbstractFSRollingOutputOperator.FilePartitionMapping)1 FIELD_TYPE (org.apache.apex.malhar.hive.FSPojoToHiveOperator.FIELD_TYPE)1 OperatorContextTestHelper.mockOperatorContext (org.apache.apex.malhar.lib.helper.OperatorContextTestHelper.mockOperatorContext)1 Test (org.junit.Test)1