use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class JsonSerDe method populateRecord.
private void populateRecord(List<Object> r, JsonToken token, JsonParser p, HCatSchema s) throws IOException {
if (token != JsonToken.FIELD_NAME) {
throw new IOException("Field name expected");
}
String fieldName = p.getText();
Integer fpos = s.getPosition(fieldName);
if (fpos == null) {
fpos = getPositionFromHiveInternalColumnName(fieldName);
LOG.debug("NPE finding position for field [{}] in schema [{}]," + " attempting to check if it is an internal column name like _col0", fieldName, s);
if (fpos == -1) {
skipValue(p);
// unknown field, we return. We'll continue from the next field onwards.
return;
}
// if we find it.
if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) {
LOG.error("Hive internal column name {} and position " + "encoding {} for the column name are at odds", fieldName, fpos);
throw new IOException("Hive internal column name (" + fieldName + ") and position encoding (" + fpos + ") for the column name are at odds");
}
// If we reached here, then we were successful at finding an alternate internal
// column mapping, and we're about to proceed.
}
HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
Object currField = extractCurrentField(p, hcatFieldSchema, false);
r.set(fpos, currField);
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class HCatBaseInputFormat method getTableSchema.
/**
* Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call
* on the specified job context. This information is available only after HCatInputFormat.setInput
* has been called for a JobContext.
* @param conf the Configuration object
* @return the table schema
* @throws IOException if HCatInputFormat.setInput has not been called
* for the current context
*/
public static HCatSchema getTableSchema(Configuration conf) throws IOException {
InputJobInfo inputJobInfo = getJobInfo(conf);
HCatSchema allCols = new HCatSchema(new LinkedList<HCatFieldSchema>());
for (HCatFieldSchema field : inputJobInfo.getTableInfo().getDataColumns().getFields()) {
allCols.append(field);
}
for (HCatFieldSchema field : inputJobInfo.getTableInfo().getPartitionColumns().getFields()) {
allCols.append(field);
}
return allCols;
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class FileOutputCommitterContainer method constructPartition.
/**
* Generate partition metadata object to be used to add to metadata.
* @param context The job context.
* @param jobInfo The OutputJobInfo.
* @param partLocnRoot The table-equivalent location root of the partition
* (temporary dir if dynamic partition, table dir if static)
* @param dynPartPath The path of dynamic partition which is created
* @param partKVs The keyvalue pairs that form the partition
* @param outputSchema The output schema for the partition
* @param params The parameters to store inside the partition
* @param table The Table metadata object under which this Partition will reside
* @param fs FileSystem object to operate on the underlying filesystem
* @param grpName Group name that owns the table dir
* @param perms FsPermission that's the default permission of the table dir.
* @return Constructed Partition metadata object
* @throws java.io.IOException
*/
private Partition constructPartition(JobContext context, OutputJobInfo jobInfo, String partLocnRoot, String dynPartPath, Map<String, String> partKVs, HCatSchema outputSchema, Map<String, String> params, Table table, FileSystem fs, String grpName, FsPermission perms) throws IOException {
Partition partition = new Partition();
partition.setDbName(table.getDbName());
partition.setTableName(table.getTableName());
partition.setSd(new StorageDescriptor(table.getTTable().getSd()));
List<FieldSchema> fields = new ArrayList<FieldSchema>();
for (HCatFieldSchema fieldSchema : outputSchema.getFields()) {
fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema));
}
partition.getSd().setCols(fields);
partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));
partition.setParameters(params);
// Sets permissions and group name on partition dirs and files.
Path partPath;
if (customDynamicLocationUsed) {
partPath = new Path(dynPartPath);
} else if (!dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL")) && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) {
// Now, we need to de-scratchify this location - i.e., get rid of any
// _SCRATCH[\d].?[\d]+ from the location.
String jobLocation = jobInfo.getLocation();
String finalLocn = jobLocation.replaceAll(Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+", "");
partPath = new Path(finalLocn);
} else {
partPath = new Path(partLocnRoot);
int i = 0;
for (FieldSchema partKey : table.getPartitionKeys()) {
if (i++ != 0) {
// Attempt to make the path in case it does not exist before we check
fs.mkdirs(partPath);
applyGroupAndPerms(fs, partPath, perms, grpName, false);
}
partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
}
}
// Apply the group and permissions to the leaf partition and files.
// Need not bother in case of HDFS as permission is taken care of by setting UMask
// Attempt to make the path in case it does not exist before we check
fs.mkdirs(partPath);
if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) {
applyGroupAndPerms(fs, partPath, perms, grpName, true);
}
// Set the location in the StorageDescriptor
if (dynamicPartitioningUsed) {
String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo);
if (harProcessor.isEnabled()) {
harProcessor.exec(context, partition, partPath);
partition.getSd().setLocation(harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination)));
} else {
partition.getSd().setLocation(dynamicPartitionDestination);
}
} else {
partition.getSd().setLocation(partPath.toString());
}
return partition;
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class TestHCatClient method testDropPartitionsWithPartialSpec.
@Test
public void testDropPartitionsWithPartialSpec() throws Exception {
try {
HCatClient client = HCatClient.create(new Configuration(hcatConf));
final String dbName = "myDb";
final String tableName = "myTable";
client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
client.createDatabase(HCatCreateDBDesc.create(dbName).build());
List<HCatFieldSchema> columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, ""));
List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, ""));
HCatTable table = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema);
client.createTable(HCatCreateTableDesc.create(table, false).build());
// Verify that the table was created successfully.
table = client.getTable(dbName, tableName);
assertNotNull("Table couldn't be queried for. ", table);
Map<String, String> partitionSpec = new HashMap<String, String>();
partitionSpec.put("grid", "AB");
partitionSpec.put("dt", "2011_12_31");
client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
partitionSpec.put("grid", "AB");
partitionSpec.put("dt", "2012_01_01");
client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
partitionSpec.put("dt", "2012_01_01");
partitionSpec.put("grid", "OB");
client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
partitionSpec.put("dt", "2012_01_01");
partitionSpec.put("grid", "XB");
client.addPartition(HCatAddPartitionDesc.create(new HCatPartition(table, partitionSpec, makePartLocation(table, partitionSpec))).build());
Map<String, String> partialPartitionSpec = new HashMap<String, String>();
partialPartitionSpec.put("dt", "2012_01_01");
client.dropPartitions(dbName, tableName, partialPartitionSpec, true);
List<HCatPartition> partitions = client.getPartitions(dbName, tableName);
assertEquals("Unexpected number of partitions.", 1, partitions.size());
assertArrayEquals("Mismatched partition.", new String[] { "2011_12_31", "AB" }, partitions.get(0).getValues().toArray());
List<HCatFieldSchema> partColumns = partitions.get(0).getPartColumns();
assertEquals(2, partColumns.size());
assertEquals("dt", partColumns.get(0).getName());
assertEquals("grid", partColumns.get(1).getName());
client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE);
} catch (Exception unexpected) {
LOG.error("Unexpected exception!", unexpected);
assertTrue("Unexpected exception! " + unexpected.getMessage(), false);
}
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class TestHCatClient method testPartitionSpecRegistrationWithCustomSchema.
/**
* Test that partition-definitions can be replicated between HCat-instances,
* independently of table-metadata replication, using PartitionSpec interfaces.
* (This is essentially the same test as testPartitionRegistrationWithCustomSchema(),
* transliterated to use the PartitionSpec APIs.)
* 2 identical tables are created on 2 different HCat instances ("source" and "target").
* On the source instance,
* 1. One partition is added with the old format ("TEXTFILE").
* 2. The table is updated with an additional column and the data-format changed to ORC.
* 3. Another partition is added with the new format.
* 4. The partitions' metadata is copied to the target HCat instance, without updating the target table definition.
* 5. The partitions' metadata is tested to be an exact replica of that on the source.
* @throws Exception
*/
@Test
public void testPartitionSpecRegistrationWithCustomSchema() throws Exception {
try {
startReplicationTargetMetaStoreIfRequired();
HCatClient sourceMetaStore = HCatClient.create(new Configuration(hcatConf));
final String dbName = "myDb";
final String tableName = "myTable";
sourceMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
sourceMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build());
List<HCatFieldSchema> columnSchema = new ArrayList<HCatFieldSchema>(Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), new HCatFieldSchema("bar", Type.STRING, "")));
List<HCatFieldSchema> partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), new HCatFieldSchema("grid", Type.STRING, ""));
HCatTable sourceTable = new HCatTable(dbName, tableName).cols(columnSchema).partCols(partitionSchema).comment("Source table.");
sourceMetaStore.createTable(HCatCreateTableDesc.create(sourceTable).build());
// Verify that the sourceTable was created successfully.
sourceTable = sourceMetaStore.getTable(dbName, tableName);
assertNotNull("Table couldn't be queried for. ", sourceTable);
// Partitions added now should inherit table-schema, properties, etc.
Map<String, String> partitionSpec_1 = new HashMap<String, String>();
partitionSpec_1.put("grid", "AB");
partitionSpec_1.put("dt", "2011_12_31");
HCatPartition sourcePartition_1 = new HCatPartition(sourceTable, partitionSpec_1, makePartLocation(sourceTable, partitionSpec_1));
sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_1).build());
assertEquals("Unexpected number of partitions. ", 1, sourceMetaStore.getPartitions(dbName, tableName).size());
// Verify that partition_1 was added correctly, and properties were inherited from the HCatTable.
HCatPartition addedPartition_1 = sourceMetaStore.getPartition(dbName, tableName, partitionSpec_1);
assertEquals("Column schema doesn't match.", sourceTable.getCols(), addedPartition_1.getColumns());
assertEquals("InputFormat doesn't match.", sourceTable.getInputFileFormat(), addedPartition_1.getInputFormat());
assertEquals("OutputFormat doesn't match.", sourceTable.getOutputFileFormat(), addedPartition_1.getOutputFormat());
assertEquals("SerDe doesn't match.", sourceTable.getSerdeLib(), addedPartition_1.getSerDe());
assertEquals("SerDe params don't match.", sourceTable.getSerdeParams(), addedPartition_1.getSerdeParams());
// Replicate table definition.
HCatClient targetMetaStore = HCatClient.create(new Configuration(replicationTargetHCatConf));
targetMetaStore.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE);
targetMetaStore.createDatabase(HCatCreateDBDesc.create(dbName).build());
// Make a copy of the source-table, as would be done across class-loaders.
HCatTable targetTable = targetMetaStore.deserializeTable(sourceMetaStore.serializeTable(sourceTable));
targetMetaStore.createTable(HCatCreateTableDesc.create(targetTable).build());
targetTable = targetMetaStore.getTable(dbName, tableName);
assertEquals("Created table doesn't match the source.", HCatTable.NO_DIFF, targetTable.diff(sourceTable));
// Modify Table schema at the source.
List<HCatFieldSchema> newColumnSchema = new ArrayList<HCatFieldSchema>(columnSchema);
newColumnSchema.add(new HCatFieldSchema("goo_new", Type.DOUBLE, ""));
Map<String, String> tableParams = new HashMap<String, String>(1);
tableParams.put("orc.compress", "ZLIB");
// Add a column.
sourceTable.cols(newColumnSchema).fileFormat(// Change SerDe, File I/O formats.
"orcfile").tblProps(tableParams).serdeParam(serdeConstants.FIELD_DELIM, Character.toString('\001'));
sourceMetaStore.updateTableSchema(dbName, tableName, sourceTable);
sourceTable = sourceMetaStore.getTable(dbName, tableName);
// Add another partition to the source.
Map<String, String> partitionSpec_2 = new HashMap<String, String>();
partitionSpec_2.put("grid", "AB");
partitionSpec_2.put("dt", "2012_01_01");
HCatPartition sourcePartition_2 = new HCatPartition(sourceTable, partitionSpec_2, makePartLocation(sourceTable, partitionSpec_2));
sourceMetaStore.addPartition(HCatAddPartitionDesc.create(sourcePartition_2).build());
// The source table now has 2 partitions, one in TEXTFILE, the other in ORC.
// Test adding these partitions to the target-table *without* replicating the table-change.
HCatPartitionSpec sourcePartitionSpec = sourceMetaStore.getPartitionSpecs(dbName, tableName, -1);
assertEquals("Unexpected number of source partitions.", 2, sourcePartitionSpec.size());
// Serialize the hcatPartitionSpec.
List<String> partitionSpecString = sourceMetaStore.serializePartitionSpec(sourcePartitionSpec);
// Deserialize the HCatPartitionSpec using the target HCatClient instance.
HCatPartitionSpec targetPartitionSpec = targetMetaStore.deserializePartitionSpec(partitionSpecString);
assertEquals("Could not add the expected number of partitions.", sourcePartitionSpec.size(), targetMetaStore.addPartitionSpec(targetPartitionSpec));
// Retrieve partitions.
targetPartitionSpec = targetMetaStore.getPartitionSpecs(dbName, tableName, -1);
assertEquals("Could not retrieve the expected number of partitions.", sourcePartitionSpec.size(), targetPartitionSpec.size());
// Assert that the source and target partitions are equivalent.
HCatPartitionSpec.HCatPartitionIterator sourceIterator = sourcePartitionSpec.getPartitionIterator();
HCatPartitionSpec.HCatPartitionIterator targetIterator = targetPartitionSpec.getPartitionIterator();
while (targetIterator.hasNext()) {
assertTrue("Fewer target partitions than source.", sourceIterator.hasNext());
HCatPartition sourcePartition = sourceIterator.next();
HCatPartition targetPartition = targetIterator.next();
assertEquals("Column schema doesn't match.", sourcePartition.getColumns(), targetPartition.getColumns());
assertEquals("InputFormat doesn't match.", sourcePartition.getInputFormat(), targetPartition.getInputFormat());
assertEquals("OutputFormat doesn't match.", sourcePartition.getOutputFormat(), targetPartition.getOutputFormat());
assertEquals("SerDe doesn't match.", sourcePartition.getSerDe(), targetPartition.getSerDe());
assertEquals("SerDe params don't match.", sourcePartition.getSerdeParams(), targetPartition.getSerdeParams());
}
} catch (Exception unexpected) {
LOG.error("Unexpected exception! ", unexpected);
assertTrue("Unexpected exception! " + unexpected.getMessage(), false);
}
}
Aggregations