Search in sources :

Example 1 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class JsonSerDe method populateRecord.

private void populateRecord(List<Object> r, JsonToken token, JsonParser p, HCatSchema s) throws IOException {
    if (token != JsonToken.FIELD_NAME) {
        throw new IOException("Field name expected");
    }
    String fieldName = p.getText();
    Integer fpos = s.getPosition(fieldName);
    if (fpos == null) {
        fpos = getPositionFromHiveInternalColumnName(fieldName);
        LOG.debug("NPE finding position for field [{}] in schema [{}]," + " attempting to check if it is an internal column name like _col0", fieldName, s);
        if (fpos == -1) {
            skipValue(p);
            // unknown field, we return. We'll continue from the next field onwards.
            return;
        }
        // if we find it.
        if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) {
            LOG.error("Hive internal column name {} and position " + "encoding {} for the column name are at odds", fieldName, fpos);
            throw new IOException("Hive internal column name (" + fieldName + ") and position encoding (" + fpos + ") for the column name are at odds");
        }
    // If we reached here, then we were successful at finding an alternate internal
    // column mapping, and we're about to proceed.
    }
    HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
    Object currField = extractCurrentField(p, hcatFieldSchema, false);
    r.set(fpos, currField);
}
Also used : IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 2 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class HCatBaseInputFormat method getTableSchema.

/**
   * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call
   * on the specified job context. This information is available only after HCatInputFormat.setInput
   * has been called for a JobContext.
   * @param conf the Configuration object
   * @return the table schema
   * @throws IOException if HCatInputFormat.setInput has not been called
   *                     for the current context
   */
public static HCatSchema getTableSchema(Configuration conf) throws IOException {
    InputJobInfo inputJobInfo = getJobInfo(conf);
    HCatSchema allCols = new HCatSchema(new LinkedList<HCatFieldSchema>());
    for (HCatFieldSchema field : inputJobInfo.getTableInfo().getDataColumns().getFields()) {
        allCols.append(field);
    }
    for (HCatFieldSchema field : inputJobInfo.getTableInfo().getPartitionColumns().getFields()) {
        allCols.append(field);
    }
    return allCols;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 3 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class FileOutputCommitterContainer method constructPartition.

/**
   * Generate partition metadata object to be used to add to metadata.
   * @param context The job context.
   * @param jobInfo The OutputJobInfo.
   * @param partLocnRoot The table-equivalent location root of the partition
   *                       (temporary dir if dynamic partition, table dir if static)
   * @param dynPartPath The path of dynamic partition which is created
   * @param partKVs The keyvalue pairs that form the partition
   * @param outputSchema The output schema for the partition
   * @param params The parameters to store inside the partition
   * @param table The Table metadata object under which this Partition will reside
   * @param fs FileSystem object to operate on the underlying filesystem
   * @param grpName Group name that owns the table dir
   * @param perms FsPermission that's the default permission of the table dir.
   * @return Constructed Partition metadata object
   * @throws java.io.IOException
   */
private Partition constructPartition(JobContext context, OutputJobInfo jobInfo, String partLocnRoot, String dynPartPath, Map<String, String> partKVs, HCatSchema outputSchema, Map<String, String> params, Table table, FileSystem fs, String grpName, FsPermission perms) throws IOException {
    Partition partition = new Partition();
    partition.setDbName(table.getDbName());
    partition.setTableName(table.getTableName());
    partition.setSd(new StorageDescriptor(table.getTTable().getSd()));
    List<FieldSchema> fields = new ArrayList<FieldSchema>();
    for (HCatFieldSchema fieldSchema : outputSchema.getFields()) {
        fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema));
    }
    partition.getSd().setCols(fields);
    partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));
    partition.setParameters(params);
    // Sets permissions and group name on partition dirs and files.
    Path partPath;
    if (customDynamicLocationUsed) {
        partPath = new Path(dynPartPath);
    } else if (!dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL")) && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) {
        // Now, we need to de-scratchify this location - i.e., get rid of any
        // _SCRATCH[\d].?[\d]+ from the location.
        String jobLocation = jobInfo.getLocation();
        String finalLocn = jobLocation.replaceAll(Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+", "");
        partPath = new Path(finalLocn);
    } else {
        partPath = new Path(partLocnRoot);
        int i = 0;
        for (FieldSchema partKey : table.getPartitionKeys()) {
            if (i++ != 0) {
                // Attempt to make the path in case it does not exist before we check
                fs.mkdirs(partPath);
                applyGroupAndPerms(fs, partPath, perms, grpName, false);
            }
            partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
        }
    }
    // Apply the group and permissions to the leaf partition and files.
    // Need not bother in case of HDFS as permission is taken care of by setting UMask
    // Attempt to make the path in case it does not exist before we check
    fs.mkdirs(partPath);
    if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) {
        applyGroupAndPerms(fs, partPath, perms, grpName, true);
    }
    // Set the location in the StorageDescriptor
    if (dynamicPartitioningUsed) {
        String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo);
        if (harProcessor.isEnabled()) {
            harProcessor.exec(context, partition, partPath);
            partition.getSd().setLocation(harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination)));
        } else {
            partition.getSd().setLocation(dynamicPartitionDestination);
        }
    } else {
        partition.getSd().setLocation(partPath.toString());
    }
    return partition;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 4 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestHCatClient method testDropPartitionsWithPartialSpec.

@Test
public void testDropPartitionsWithPartialSpec() throws Exception {
    try {
        HCatClient client = HCatClient.create(new Configuration(hcatConf));
        final String dbName = "myDb";
        final String tableName = "myTable";
        client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
        client.createDatabase(HCatCreateDBDesc.create(dbName).build());
        List<HCatFieldSchema> columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, ""));
        List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, ""));
        HCatTable table = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema);
        client.createTable(HCatCreateTableDesc.create(table, false).build());
        // Verify that the table was created successfully.
        table = client.getTable(dbName, tableName);
        assertNotNull("Table couldn't be queried for. ", table);
        Map<String, String> partitionSpec = new HashMap<String, String>();
        partitionSpec.put("grid", "AB");
        partitionSpec.put("dt", "2011_12_31");
        client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
        partitionSpec.put("grid", "AB");
        partitionSpec.put("dt", "2012_01_01");
        client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
        partitionSpec.put("dt", "2012_01_01");
        partitionSpec.put("grid", "OB");
        client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
        partitionSpec.put("dt", "2012_01_01");
        partitionSpec.put("grid", "XB");
        client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
        Map<String, String> partialPartitionSpec = new HashMap<String, String>();
        partialPartitionSpec.put("dt", "2012_01_01");
        client.dropPartitions(dbName, tableName, partialPartitionSpec, true);
        List<HCatPartition> partitions = client.getPartitions(dbName, tableName);
        assertEquals("Unexpected number of partitions.", 1, partitions.size());
        assertArrayEquals("Mismatched partition.", new String[] { "2011_12_31", "AB" }, partitions.get(0).getValues().toArray());
        List<HCatFieldSchema> partColumns = partitions.get(0).getPartColumns();
        assertEquals(2, partColumns.size());
        assertEquals("dt", partColumns.get(0).getName());
        assertEquals("grid", partColumns.get(1).getName());
        client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE);
    } catch (Exception unexpected) {
        LOG.error("Unexpected exception!", unexpected);
        assertTrue("Unexpected exception! " + unexpected.getMessage(), false);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 5 with HCatFieldSchema

use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.

the class TestHCatClient method testPartitionSpecRegistrationWithCustomSchema.

/**
   * Test that partition-definitions can be replicated between HCat-instances,
   * independently of table-metadata replication, using PartitionSpec interfaces.
   * (This is essentially the same test as testPartitionRegistrationWithCustomSchema(),
   * transliterated to use the PartitionSpec APIs.)
   * 2 identical tables are created on 2 different HCat instances ("source" and "target").
   * On the source instance,
   * 1. One partition is added with the old format ("TEXTFILE").
   * 2. The table is updated with an additional column and the data-format changed to ORC.
   * 3. Another partition is added with the new format.
   * 4. The partitions' metadata is copied to the target HCat instance, without updating the target table definition.
   * 5. The partitions' metadata is tested to be an exact replica of that on the source.
   * @throws Exception
   */
@Test
public void testPartitionSpecRegistrationWithCustomSchema() throws Exception {
    try {
        startReplicationTargetMetaStoreIfRequired();
        HCatClient sourceMetaStore = HCatClient.create(new Configuration(hcatConf));
        final String dbName = "myDb";
        final String tableName = "myTable";
        sourceMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
        sourceMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build());
        List<HCatFieldSchema> columnSchema = new ArrayList<HCatFieldSchema>(Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")));
        List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, ""));
        HCatTable sourceTable = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema).comment("Source table.");
        sourceMetaStore.createTable(HCatCreateTableDesc.create(sourceTable).build());
        // Verify that the sourceTable was created successfully.
        sourceTable = sourceMetaStore.getTable(dbName, tableName);
        assertNotNull("Table couldn't be queried for. ", sourceTable);
        // Partitions added now should inherit table-schema, properties, etc.
        Map<String, String> partitionSpec_1 = new HashMap<String, String>();
        partitionSpec_1.put("grid", "AB");
        partitionSpec_1.put("dt", "2011_12_31");
        HCatPartition sourcePartition_1 = new HCatPartition(sourceTable, partitionSpec_1, makePartLocation(sourceTable, partitionSpec_1));
        sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_1).build());
        assertEquals("Unexpected number of partitions. ", 1, sourceMetaStore.getPartitions(dbName, tableName).size());
        // Verify that partition_1 was added correctly, and properties were inherited from the HCatTable.
        HCatPartition addedPartition_1 = sourceMetaStore.getPartition(dbName, tableName, partitionSpec_1);
        assertEquals("Column schema doesn't match.", sourceTable.getCols(), addedPartition_1.getColumns());
        assertEquals("InputFormat doesn't match.", sourceTable.getInputFileFormat(), addedPartition_1.getInputFormat());
        assertEquals("OutputFormat doesn't match.", sourceTable.getOutputFileFormat(), addedPartition_1.getOutputFormat());
        assertEquals("SerDe doesn't match.", sourceTable.getSerdeLib(), addedPartition_1.getSerDe());
        assertEquals("SerDe params don't match.", sourceTable.getSerdeParams(), addedPartition_1.getSerdeParams());
        // Replicate table definition.
        HCatClient targetMetaStore = HCatClient.create(new Configuration(replicationTargetHCatConf));
        targetMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
        targetMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build());
        // Make a copy of the source-table, as would be done across class-loaders.
        HCatTable targetTable = targetMetaStore.deserializeTable(sourceMetaStore.serializeTable(sourceTable));
        targetMetaStore.createTable(HCatCreateTableDesc.create(targetTable).build());
        targetTable = targetMetaStore.getTable(dbName, tableName);
        assertEquals("Created table doesn't match the source.", HCatTable.NO_DIFF, targetTable.diff(sourceTable));
        // Modify Table schema at the source.
        List<HCatFieldSchema> newColumnSchema = new ArrayList<HCatFieldSchema>(columnSchema);
        newColumnSchema.add(new HCatFieldSchema("goo_new", Type.DOUBLE, ""));
        Map<String, String> tableParams = new HashMap<String, String>(1);
        tableParams.put("orc.compress", "ZLIB");
        // Add a column.
        sourceTable.cols(newColumnSchema).fileFormat(// Change SerDe, File I/O formats.
        "orcfile").tblProps(tableParams).serdeParam(serdeConstants.FIELD_DELIM, Character.toString('\001'));
        sourceMetaStore.updateTableSchema(dbName, tableName, sourceTable);
        sourceTable = sourceMetaStore.getTable(dbName, tableName);
        // Add another partition to the source.
        Map<String, String> partitionSpec_2 = new HashMap<String, String>();
        partitionSpec_2.put("grid", "AB");
        partitionSpec_2.put("dt", "2012_01_01");
        HCatPartition sourcePartition_2 = new HCatPartition(sourceTable, partitionSpec_2, makePartLocation(sourceTable, partitionSpec_2));
        sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_2).build());
        // The source table now has 2 partitions, one in TEXTFILE, the other in ORC.
        // Test adding these partitions to the target-table *without* replicating the table-change.
        HCatPartitionSpec sourcePartitionSpec = sourceMetaStore.getPartitionSpecs(dbName, tableName, -1);
        assertEquals("Unexpected number of source partitions.", 2, sourcePartitionSpec.size());
        // Serialize the hcatPartitionSpec.
        List<String> partitionSpecString = sourceMetaStore.serializePartitionSpec(sourcePartitionSpec);
        // Deserialize the HCatPartitionSpec using the target HCatClient instance.
        HCatPartitionSpec targetPartitionSpec = targetMetaStore.deserializePartitionSpec(partitionSpecString);
        assertEquals("Could not add the expected number of partitions.", sourcePartitionSpec.size(), targetMetaStore.addPartitionSpec(targetPartitionSpec));
        // Retrieve partitions.
        targetPartitionSpec = targetMetaStore.getPartitionSpecs(dbName, tableName, -1);
        assertEquals("Could not retrieve the expected number of partitions.", sourcePartitionSpec.size(), targetPartitionSpec.size());
        // Assert that the source and target partitions are equivalent.
        HCatPartitionSpec.HCatPartitionIterator sourceIterator = sourcePartitionSpec.getPartitionIterator();
        HCatPartitionSpec.HCatPartitionIterator targetIterator = targetPartitionSpec.getPartitionIterator();
        while (targetIterator.hasNext()) {
            assertTrue("Fewer target partitions than source.", sourceIterator.hasNext());
            HCatPartition sourcePartition = sourceIterator.next();
            HCatPartition targetPartition = targetIterator.next();
            assertEquals("Column schema doesn't match.", sourcePartition.getColumns(), targetPartition.getColumns());
            assertEquals("InputFormat doesn't match.", sourcePartition.getInputFormat(), targetPartition.getInputFormat());
            assertEquals("OutputFormat doesn't match.", sourcePartition.getOutputFormat(), targetPartition.getOutputFormat());
            assertEquals("SerDe doesn't match.", sourcePartition.getSerDe(), targetPartition.getSerDe());
            assertEquals("SerDe params don't match.", sourcePartition.getSerdeParams(), targetPartition.getSerdeParams());
        }
    } catch (Exception unexpected) {
        LOG.error("Unexpected exception! ", unexpected);
        assertTrue("Unexpected exception! " + unexpected.getMessage(), false);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Aggregations

HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)61 ArrayList (java.util.ArrayList)34 Test (org.junit.Test)30 HCatException (org.apache.hive.hcatalog.common.HCatException)22 IOException (java.io.IOException)21 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)21 HashMap (java.util.HashMap)19 Configuration (org.apache.hadoop.conf.Configuration)18 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)15 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 ResourceSchema (org.apache.pig.ResourceSchema)6 HCatTable (org.apache.hive.hcatalog.api.HCatTable)5 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)5 Map (java.util.Map)4 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 List (java.util.List)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 CommandNeedRetryException (org.apache.hadoop.hive.ql.CommandNeedRetryException)3 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)3