Search in sources :

Example 46 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project flink by apache.

the class HCatInputFormatBase method getFields.

/**
	 * Specifies the fields which are returned by the InputFormat and their order.
	 *
	 * @param fields The fields and their order which are returned by the InputFormat.
	 * @return This InputFormat with specified return fields.
	 * @throws java.io.IOException
	 */
public HCatInputFormatBase<T> getFields(String... fields) throws IOException {
    // build output schema
    ArrayList<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(fields.length);
    for (String field : fields) {
        fieldSchemas.add(this.outputSchema.get(field));
    }
    this.outputSchema = new HCatSchema(fieldSchemas);
    // update output schema configuration
    configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
    return this;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 47 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestHCatClientNotification method dropTable.

// TODO - Currently no way to test alter table, as this interface doesn't support alter table
@Test
public void dropTable() throws Exception {
    String dbName = "default";
    String tableName = "hcatdroptable";
    HCatTable table = new HCatTable(dbName, tableName);
    table.cols(Arrays.asList(new HCatFieldSchema("onecol", TypeInfoFactory.stringTypeInfo, "")));
    hCatClient.createTable(HCatCreateTableDesc.create(table).build());
    hCatClient.dropTable(dbName, tableName, false);
    List<HCatNotificationEvent> events = hCatClient.getNextNotification(firstEventId, 0, null);
    assertEquals(2, events.size());
    HCatNotificationEvent event = events.get(1);
    assertEquals(firstEventId + 2, event.getEventId());
    assertTrue(event.getEventTime() >= startTime);
    assertEquals(HCatConstants.HCAT_DROP_TABLE_EVENT, event.getEventType());
    assertEquals(dbName, event.getDbName());
    assertEquals(tableName, event.getTableName());
    DropTableMessage dropTableMessage = md.getDropTableMessage(event.getMessage());
    assertEquals(dbName, dropTableMessage.getDB());
    assertEquals(tableName, dropTableMessage.getTable());
    assertEquals(TableType.MANAGED_TABLE.toString(), dropTableMessage.getTableType());
}
Also used : DropTableMessage(org.apache.hive.hcatalog.messaging.DropTableMessage) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 48 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestHCatClientNotification method createTable.

@Test
public void createTable() throws Exception {
    String dbName = "default";
    String tableName = "hcatcreatetable";
    HCatTable table = new HCatTable(dbName, tableName);
    table.cols(Arrays.asList(new HCatFieldSchema("onecol", TypeInfoFactory.stringTypeInfo, "")));
    hCatClient.createTable(HCatCreateTableDesc.create(table).build());
    List<HCatNotificationEvent> events = hCatClient.getNextNotification(firstEventId, 0, null);
    assertEquals(1, events.size());
    HCatNotificationEvent event = events.get(0);
    assertEquals(firstEventId + 1, event.getEventId());
    assertTrue(event.getEventTime() >= startTime);
    assertEquals(HCatConstants.HCAT_CREATE_TABLE_EVENT, event.getEventType());
    assertEquals(dbName, event.getDbName());
    assertEquals("hcatcreatetable", event.getTableName());
    // Parse the message field
    CreateTableMessage createTableMessage = md.getCreateTableMessage(event.getMessage());
    assertEquals(dbName, createTableMessage.getDB());
    assertEquals(tableName, createTableMessage.getTable());
    assertEquals(TableType.MANAGED_TABLE.toString(), createTableMessage.getTableType());
    // fetch the table marked by the message and compare
    HCatTable createdTable = hCatClient.getTable(dbName, tableName);
    assertTrue(createdTable.diff(table).equals(HCatTable.NO_DIFF));
}
Also used : CreateTableMessage(org.apache.hive.hcatalog.messaging.CreateTableMessage) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 49 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestHCatClientNotification method addPartition.

@Test
public void addPartition() throws Exception {
    String dbName = "default";
    String tableName = "hcataddparttable";
    String partColName = "pc";
    HCatTable table = new HCatTable(dbName, tableName);
    table.partCol(new HCatFieldSchema(partColName, TypeInfoFactory.stringTypeInfo, ""));
    table.cols(Arrays.asList(new HCatFieldSchema("onecol", TypeInfoFactory.stringTypeInfo, "")));
    hCatClient.createTable(HCatCreateTableDesc.create(table).build());
    String partName = "testpart";
    Map<String, String> partSpec = new HashMap<String, String>(1);
    partSpec.put(partColName, partName);
    HCatPartition part = new HCatPartition(table, partSpec, null);
    hCatClient.addPartition(HCatAddPartitionDesc.create(part).build());
    List<HCatNotificationEvent> events = hCatClient.getNextNotification(firstEventId, 0, null);
    assertEquals(2, events.size());
    HCatNotificationEvent event = events.get(1);
    assertEquals(firstEventId + 2, event.getEventId());
    assertTrue(event.getEventTime() >= startTime);
    assertEquals(HCatConstants.HCAT_ADD_PARTITION_EVENT, event.getEventType());
    assertEquals("default", event.getDbName());
    assertEquals(tableName, event.getTableName());
    // Parse the message field
    AddPartitionMessage addPartitionMessage = md.getAddPartitionMessage(event.getMessage());
    assertEquals(dbName, addPartitionMessage.getDB());
    assertEquals(tableName, addPartitionMessage.getTable());
    assertEquals(TableType.MANAGED_TABLE.toString(), addPartitionMessage.getTableType());
    List<Map<String, String>> ptndescs = addPartitionMessage.getPartitions();
    // fetch the partition referred to by the message and compare
    HCatPartition addedPart = hCatClient.getPartition(dbName, tableName, ptndescs.get(0));
    assertEquals(part.getDatabaseName(), addedPart.getDatabaseName());
    assertEquals(part.getTableName(), addedPart.getTableName());
    assertEquals(part.getValues(), addedPart.getValues());
    assertEquals(part.getColumns(), addedPart.getColumns());
    assertEquals(part.getPartColumns(), addedPart.getPartColumns());
    assertEquals(part.getLocation(), addedPart.getLocation());
}
Also used : HashMap(java.util.HashMap) AddPartitionMessage(org.apache.hive.hcatalog.messaging.AddPartitionMessage) HashMap(java.util.HashMap) Map(java.util.Map) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 50 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestHCatClient method testPartitionRegistrationWithCustomSchema.

/**
 * Test that partition-definitions can be replicated between HCat-instances,
 * independently of table-metadata replication.
 * 2 identical tables are created on 2 different HCat instances ("source" and "target").
 * On the source instance,
 * 1. One partition is added with the old format ("TEXTFILE").
 * 2. The table is updated with an additional column and the data-format changed to ORC.
 * 3. Another partition is added with the new format.
 * 4. The partitions' metadata is copied to the target HCat instance, without updating the target table definition.
 * 5. The partitions' metadata is tested to be an exact replica of that on the source.
 * @throws Exception
 */
@Test
public void testPartitionRegistrationWithCustomSchema() throws Exception {
    try {
        startReplicationTargetMetaStoreIfRequired();
        final String dbName = "myDb";
        final String tableName = "myTable";
        sourceMetaStore().dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
        sourceMetaStore().createDatabase(HCatCreateDBDesc.create(dbName).build());
        List<HCatFieldSchema> columnSchema = new ArrayList<HCatFieldSchema>(Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")));
        List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, ""));
        HCatTable sourceTable = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema).comment("Source table.");
        sourceMetaStore().createTable(HCatCreateTableDesc.create(sourceTable).build());
        // Verify that the sourceTable was created successfully.
        sourceTable = sourceMetaStore().getTable(dbName, tableName);
        assertNotNull("Table couldn't be queried for. ", sourceTable);
        // Partitions added now should inherit table-schema, properties, etc.
        Map<String, String> partitionSpec_1 = new HashMap<String, String>();
        partitionSpec_1.put("grid", "AB");
        partitionSpec_1.put("dt", "2011_12_31");
        HCatPartition sourcePartition_1 = new HCatPartition(sourceTable, partitionSpec_1, makePartLocation(sourceTable, partitionSpec_1));
        sourceMetaStore().addPartition(HCatAddPartitionDesc.create(sourcePartition_1).build());
        assertEquals("Unexpected number of partitions. ", 1, sourceMetaStore().getPartitions(dbName, tableName).size());
        // Verify that partition_1 was added correctly, and properties were inherited from the HCatTable.
        HCatPartition addedPartition_1 = sourceMetaStore().getPartition(dbName, tableName, partitionSpec_1);
        assertEquals("Column schema doesn't match.", sourceTable.getCols(), addedPartition_1.getColumns());
        assertEquals("InputFormat doesn't match.", sourceTable.getInputFileFormat(), addedPartition_1.getInputFormat());
        assertEquals("OutputFormat doesn't match.", sourceTable.getOutputFileFormat(), addedPartition_1.getOutputFormat());
        assertEquals("SerDe doesn't match.", sourceTable.getSerdeLib(), addedPartition_1.getSerDe());
        assertEquals("SerDe params don't match.", sourceTable.getSerdeParams(), addedPartition_1.getSerdeParams());
        // Replicate table definition.
        targetMetaStore().dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
        targetMetaStore().createDatabase(HCatCreateDBDesc.create(dbName).build());
        // Make a copy of the source-table, as would be done across class-loaders.
        HCatTable targetTable = targetMetaStore().deserializeTable(sourceMetaStore().serializeTable(sourceTable));
        targetMetaStore().createTable(HCatCreateTableDesc.create(targetTable).build());
        targetTable = targetMetaStore().getTable(dbName, tableName);
        assertEquals("Created table doesn't match the source.", HCatTable.NO_DIFF, targetTable.diff(sourceTable));
        // Modify Table schema at the source.
        List<HCatFieldSchema> newColumnSchema = new ArrayList<HCatFieldSchema>(columnSchema);
        newColumnSchema.add(new HCatFieldSchema("goo_new", Type.DOUBLE, ""));
        Map<String, String> tableParams = new HashMap<String, String>(1);
        tableParams.put("orc.compress", "ZLIB");
        // Add a column.
        sourceTable.cols(newColumnSchema).fileFormat(// Change SerDe, File I/O formats.
        "orcfile").tblProps(tableParams).serdeParam(serdeConstants.FIELD_DELIM, Character.toString('\001'));
        sourceMetaStore().updateTableSchema(dbName, tableName, sourceTable);
        sourceTable = sourceMetaStore().getTable(dbName, tableName);
        // Add another partition to the source.
        Map<String, String> partitionSpec_2 = new HashMap<String, String>();
        partitionSpec_2.put("grid", "AB");
        partitionSpec_2.put("dt", "2012_01_01");
        HCatPartition sourcePartition_2 = new HCatPartition(sourceTable, partitionSpec_2, makePartLocation(sourceTable, partitionSpec_2));
        sourceMetaStore().addPartition(HCatAddPartitionDesc.create(sourcePartition_2).build());
        // The source table now has 2 partitions, one in TEXTFILE, the other in ORC.
        // Test adding these partitions to the target-table *without* replicating the table-change.
        List<HCatPartition> sourcePartitions = sourceMetaStore().getPartitions(dbName, tableName);
        assertEquals("Unexpected number of source partitions.", 2, sourcePartitions.size());
        List<HCatAddPartitionDesc> addPartitionDescs = new ArrayList<HCatAddPartitionDesc>(sourcePartitions.size());
        for (HCatPartition partition : sourcePartitions) {
            addPartitionDescs.add(HCatAddPartitionDesc.create(partition).build());
        }
        targetMetaStore().addPartitions(addPartitionDescs);
        List<HCatPartition> targetPartitions = targetMetaStore().getPartitions(dbName, tableName);
        assertEquals("Expected the same number of partitions. ", sourcePartitions.size(), targetPartitions.size());
        for (int i = 0; i < targetPartitions.size(); ++i) {
            HCatPartition sourcePartition = sourcePartitions.get(i), targetPartition = targetPartitions.get(i);
            assertEquals("Column schema doesn't match.", sourcePartition.getColumns(), targetPartition.getColumns());
            assertEquals("InputFormat doesn't match.", sourcePartition.getInputFormat(), targetPartition.getInputFormat());
            assertEquals("OutputFormat doesn't match.", sourcePartition.getOutputFormat(), targetPartition.getOutputFormat());
            assertEquals("SerDe doesn't match.", sourcePartition.getSerDe(), targetPartition.getSerDe());
            assertEquals("SerDe params don't match.", sourcePartition.getSerdeParams(), targetPartition.getSerdeParams());
        }
    } catch (Exception unexpected) {
        LOG.error("Unexpected exception! ", unexpected);
        assertTrue("Unexpected exception! " + unexpected.getMessage(), false);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Aggregations

HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)61 ArrayList (java.util.ArrayList)34 Test (org.junit.Test)30 HCatException (org.apache.hive.hcatalog.common.HCatException)22 IOException (java.io.IOException)21 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)21 HashMap (java.util.HashMap)19 Configuration (org.apache.hadoop.conf.Configuration)15 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)15 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 ResourceSchema (org.apache.pig.ResourceSchema)6 HCatTable (org.apache.hive.hcatalog.api.HCatTable)5 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)5 List (java.util.List)4 Map (java.util.Map)4 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)3 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)3