use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class HCatBaseInputFormat method getColValsNotInDataColumns.
/**
* gets values for fields requested by output schema which will not be in the data
*/
private static Map<String, Object> getColValsNotInDataColumns(HCatSchema outputSchema, PartInfo partInfo) throws HCatException {
HCatSchema dataSchema = partInfo.getPartitionSchema();
Map<String, Object> vals = new HashMap<String, Object>();
for (String fieldName : outputSchema.getFieldNames()) {
if (dataSchema.getPosition(fieldName) == null) {
// so, we first check the table schema to see if it is a part col
if (partInfo.getPartitionValues().containsKey(fieldName)) {
// First, get the appropriate field schema for this field
HCatFieldSchema fschema = outputSchema.get(fieldName);
// For a partition key type, this will be a primitive typeinfo.
// Obtain relevant object inspector for this typeinfo
ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(fschema.getTypeInfo());
// get appropriate object from the string representation of the value in partInfo.getPartitionValues()
// Essentially, partition values are represented as strings, but we want the actual object type associated
Object objVal = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(partInfo.getPartitionValues().get(fieldName));
vals.put(fieldName, objVal);
} else {
vals.put(fieldName, null);
}
}
}
return vals;
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class HCatTableInfo method valueOf.
/**
* create an HCatTableInfo instance from the supplied Hive Table instance
* @param table to create an instance from
* @return HCatTableInfo
* @throws IOException
*/
static HCatTableInfo valueOf(Table table) throws IOException {
// Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema,
// but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized
// into the job conf.
org.apache.hadoop.hive.ql.metadata.Table mTable = new org.apache.hadoop.hive.ql.metadata.Table(table);
HCatSchema schema = HCatUtil.extractSchema(mTable);
StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getSd(), table.getParameters());
HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable);
return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, partitionColumns, storerInfo, table);
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class InitializeInput method getInputJobInfo.
/**
* Returns the given InputJobInfo after populating with data queried from the metadata service.
*/
private static InputJobInfo getInputJobInfo(Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception {
IMetaStoreClient client = null;
HiveConf hiveConf = null;
try {
if (conf != null) {
hiveConf = HCatUtil.getHiveConf(conf);
} else {
hiveConf = new HiveConf(HCatInputFormat.class);
}
client = HCatUtil.getHiveMetastoreClient(hiveConf);
Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName());
List<PartInfo> partInfoList = new ArrayList<PartInfo>();
inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
if (table.getPartitionKeys().size() != 0) {
// Partitioned table
List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), (short) -1);
// Default to 100,000 partitions if hive.metastore.maxpartition is not defined
int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000);
if (parts != null && parts.size() > maxPart) {
throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size());
}
// populate partition info
for (Partition ptn : parts) {
HCatSchema schema = HCatUtil.extractSchema(new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn));
PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), ptn.getParameters(), conf, inputJobInfo);
partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn));
partInfoList.add(partInfo);
}
} else {
// Non partitioned table
HCatSchema schema = HCatUtil.extractSchema(table);
PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), table.getParameters(), conf, inputJobInfo);
partInfo.setPartitionValues(new HashMap<String, String>());
partInfoList.add(partInfo);
}
inputJobInfo.setPartitions(partInfoList);
return inputJobInfo;
} finally {
HCatUtil.closeHiveClientQuietly(client);
}
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class TestHCatUtil method testGetTableSchemaWithPtnColsApi.
@Test
public void testGetTableSchemaWithPtnColsApi() throws IOException {
// Check the schema of a table with one field & no partition keys.
StorageDescriptor sd = new StorageDescriptor(Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), "location", "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), new ArrayList<String>(), new ArrayList<Order>(), new HashMap<String, String>());
org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList<FieldSchema>(), new HashMap<String, String>(), "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name());
Table table = new Table(apiTable);
List<HCatFieldSchema> expectedHCatSchema = Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null));
Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
// Add a partition key & ensure its reflected in the schema.
List<FieldSchema> partitionKeys = Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null));
table.getTTable().setPartitionKeys(partitionKeys);
expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null));
Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class HCatMapReduceTest method runMRCreate.
/**
* Run a local map reduce job to load data from in memory records to an HCatalog Table
* @param partitionValues
* @param partitionColumns
* @param records data to be written to HCatalog table
* @param writeCount
* @param assertWrite
* @param asSingleMapTask
* @return
* @throws Exception
*/
Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite, boolean asSingleMapTask, String customDynamicPathPattern) throws Exception {
writeRecords = records;
MapCreate.writeCount = 0;
Configuration conf = new Configuration();
Job job = new Job(conf, "hcat mapreduce write test");
job.setJarByClass(this.getClass());
job.setMapperClass(HCatMapReduceTest.MapCreate.class);
// input/output settings
job.setInputFormatClass(TextInputFormat.class);
if (asSingleMapTask) {
// One input path would mean only one map task
Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
createInputFile(path, writeCount);
TextInputFormat.setInputPaths(job, path);
} else {
// Create two input paths so that two map tasks get triggered. There could be other ways
// to trigger two map tasks.
Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
createInputFile(path, writeCount / 2);
Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2");
createInputFile(path2, (writeCount - writeCount / 2));
TextInputFormat.setInputPaths(job, path, path2);
}
job.setOutputFormatClass(HCatOutputFormat.class);
OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
if (customDynamicPathPattern != null) {
job.getConfiguration().set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern);
}
HCatOutputFormat.setOutput(job, outputJobInfo);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(DefaultHCatRecord.class);
job.setNumReduceTasks(0);
HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns));
boolean success = job.waitForCompletion(true);
// Ensure counters are set when data has actually been read.
if (partitionValues != null) {
assertTrue(job.getCounters().getGroup("FileSystemCounters").findCounter("FILE_BYTES_READ").getValue() > 0);
}
if (!HCatUtil.isHadoop23()) {
// Local mode outputcommitter hook is not invoked in Hadoop 1.x
if (success) {
new FileOutputCommitterContainer(job, null).commitJob(job);
} else {
new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
}
}
if (assertWrite) {
// we assert only if we expected to assert with this call.
Assert.assertEquals(writeCount, MapCreate.writeCount);
}
if (isTableExternal()) {
externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();
}
return job;
}
Aggregations