use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class HCatBaseInputFormat method getColValsNotInDataColumns.
/**
* gets values for fields requested by output schema which will not be in the data
*/
private static Map<String, Object> getColValsNotInDataColumns(HCatSchema outputSchema, PartInfo partInfo) throws HCatException {
HCatSchema dataSchema = partInfo.getPartitionSchema();
Map<String, Object> vals = new HashMap<String, Object>();
for (String fieldName : outputSchema.getFieldNames()) {
if (dataSchema.getPosition(fieldName) == null) {
// so, we first check the table schema to see if it is a part col
if (partInfo.getPartitionValues().containsKey(fieldName)) {
// First, get the appropriate field schema for this field
HCatFieldSchema fschema = outputSchema.get(fieldName);
// For a partition key type, this will be a primitive typeinfo.
// Obtain relevant object inspector for this typeinfo
ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(fschema.getTypeInfo());
// get appropriate object from the string representation of the value in partInfo.getPartitionValues()
// Essentially, partition values are represented as strings, but we want the actual object type associated
Object objVal = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(partInfo.getPartitionValues().get(fieldName));
vals.put(fieldName, objVal);
} else {
vals.put(fieldName, null);
}
}
}
return vals;
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class SpecialCases method addSpecialCasesParametersToOutputJobProperties.
/**
* Method to do any file-format specific special casing while
* instantiating a storage handler to write. We set any parameters
* we want to be visible to the job in jobProperties, and this will
* be available to the job via jobconf at run time.
*
* This is mostly intended to be used by StorageHandlers that wrap
* File-based OutputFormats such as FosterStorageHandler that wraps
* RCFile, ORC, etc.
*
* @param jobProperties : map to write to
* @param jobInfo : information about this output job to read from
* @param ofclass : the output format in use
*/
public static void addSpecialCasesParametersToOutputJobProperties(Map<String, String> jobProperties, OutputJobInfo jobInfo, Class<? extends OutputFormat> ofclass) {
if (ofclass == RCFileOutputFormat.class) {
// RCFile specific parameter
jobProperties.put(HiveConf.ConfVars.HIVE_RCFILE_COLUMN_NUMBER_CONF.varname, Integer.toOctalString(jobInfo.getOutputSchema().getFields().size()));
} else if (ofclass == OrcOutputFormat.class) {
// Special cases for ORC
// We need to check table properties to see if a couple of parameters,
// such as compression parameters are defined. If they are, then we copy
// them to job properties, so that it will be available in jobconf at runtime
// See HIVE-5504 for details
Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (OrcConf property : OrcConf.values()) {
String propName = property.getAttribute();
if (tableProps.containsKey(propName)) {
jobProperties.put(propName, tableProps.get(propName));
}
}
} else if (ofclass == AvroContainerOutputFormat.class) {
// Special cases for Avro. As with ORC, we make table properties that
// Avro is interested in available in jobconf at runtime
Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (AvroSerdeUtils.AvroTableProperties property : AvroSerdeUtils.AvroTableProperties.values()) {
String propName = property.getPropName();
if (tableProps.containsKey(propName)) {
String propVal = tableProps.get(propName);
jobProperties.put(propName, tableProps.get(propName));
}
}
Properties properties = new Properties();
properties.put("name", jobInfo.getTableName());
List<String> colNames = jobInfo.getOutputSchema().getFieldNames();
List<TypeInfo> colTypes = new ArrayList<TypeInfo>();
for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()) {
colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString()));
}
if (jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) == null || jobProperties.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()).isEmpty()) {
jobProperties.put(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), AvroSerDe.getSchemaFromCols(properties, colNames, colTypes, null).toString());
}
} else if (ofclass == MapredParquetOutputFormat.class) {
// Handle table properties
Properties tblProperties = new Properties();
Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters();
for (String key : tableProps.keySet()) {
if (ParquetTableUtils.isParquetProperty(key)) {
tblProperties.put(key, tableProps.get(key));
}
}
// Handle table schema
List<String> colNames = jobInfo.getOutputSchema().getFieldNames();
List<TypeInfo> colTypes = new ArrayList<TypeInfo>();
for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()) {
colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString()));
}
String parquetSchema = HiveSchemaConverter.convert(colNames, colTypes).toString();
jobProperties.put(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA, parquetSchema);
jobProperties.putAll(Maps.fromProperties(tblProperties));
}
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class TestHCatUtil method testGetTableSchemaWithPtnColsApi.
@Test
public void testGetTableSchemaWithPtnColsApi() throws IOException {
// Check the schema of a table with one field & no partition keys.
StorageDescriptor sd = new StorageDescriptor(Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), "location", "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), new ArrayList<String>(), new ArrayList<Order>(), new HashMap<String, String>());
org.apache.hadoop.hive.metastore.api.Table apiTable = new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", 0, 0, 0, sd, new ArrayList<FieldSchema>(), new HashMap<String, String>(), "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name());
Table table = new Table(apiTable);
List<HCatFieldSchema> expectedHCatSchema = Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null));
Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
// Add a partition key & ensure its reflected in the schema.
List<FieldSchema> partitionKeys = Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null));
table.getTTable().setPartitionKeys(partitionKeys);
expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null));
Assert.assertEquals(new HCatSchema(expectedHCatSchema), HCatUtil.getTableSchemaWithPtnCols(table));
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class TestHCatPartitioned method columnOrderChangeTest.
// check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(5, tableSchema.getFields().size());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
objList.add("co str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value8");
partitionMap.put("part0", "508");
Exception exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
if (isTableImmutable()) {
// Read should get 10 + 20 + 10 + 10 + 20 rows
runMRRead(70);
} else {
// +20 from the duplicate publish
runMRRead(90);
}
}
use of org.apache.hive.hcatalog.data.schema.HCatFieldSchema in project hive by apache.
the class HCatBaseStorer method validateSchema.
/**
* This method encodes which Pig type can map (be stored in) to which HCat type.
* @throws HCatException
* @throws FrontendException
*/
private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField, Schema topLevelPigSchema, HCatSchema topLevelHCatSchema, int columnPos) throws HCatException, FrontendException {
validateAlias(pigField.alias);
byte type = pigField.type;
if (DataType.isComplex(type)) {
switch(type) {
case DataType.MAP:
if (hcatField != null) {
if (hcatField.getMapKeyType() != Type.STRING) {
throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
}
// Map values can be primitive or complex
}
break;
case DataType.BAG:
HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema();
for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) {
validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
}
break;
case DataType.TUPLE:
HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema();
for (FieldSchema innerField : pigField.schema.getFields()) {
validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
}
break;
default:
throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE);
}
} else if (hcatField != null) {
// there is no point trying to validate further if we have no type info about target field
switch(type) {
case DataType.BIGDECIMAL:
throwTypeMismatchException(type, Lists.newArrayList(Type.DECIMAL), hcatField, columnPos);
break;
case DataType.DATETIME:
throwTypeMismatchException(type, Lists.newArrayList(Type.TIMESTAMP, Type.DATE), hcatField, columnPos);
break;
case DataType.BYTEARRAY:
throwTypeMismatchException(type, Lists.newArrayList(Type.BINARY), hcatField, columnPos);
break;
case DataType.BIGINTEGER:
throwTypeMismatchException(type, Collections.<Type>emptyList(), hcatField, columnPos);
break;
case DataType.BOOLEAN:
throwTypeMismatchException(type, Lists.newArrayList(Type.BOOLEAN), hcatField, columnPos);
break;
case DataType.CHARARRAY:
throwTypeMismatchException(type, Lists.newArrayList(Type.STRING, Type.CHAR, Type.VARCHAR), hcatField, columnPos);
break;
case DataType.DOUBLE:
throwTypeMismatchException(type, Lists.newArrayList(Type.DOUBLE), hcatField, columnPos);
break;
case DataType.FLOAT:
throwTypeMismatchException(type, Lists.newArrayList(Type.FLOAT), hcatField, columnPos);
break;
case DataType.INTEGER:
throwTypeMismatchException(type, Lists.newArrayList(Type.INT, Type.BIGINT, Type.TINYINT, Type.SMALLINT), hcatField, columnPos);
break;
case DataType.LONG:
throwTypeMismatchException(type, Lists.newArrayList(Type.BIGINT), hcatField, columnPos);
break;
default:
throw new FrontendException("'" + type + "' Pig datatype in column " + columnPos + "(0-based) is not supported by HCat", PigHCatUtil.PIG_EXCEPTION_CODE);
}
} else {
if (false) {
// see HIVE-6194
throw new FrontendException("(pigSch,hcatSchema)=(" + pigField + "," + "" + hcatField + ") (topPig, topHcat)=(" + topLevelPigSchema + "," + "" + topLevelHCatSchema + ")");
}
}
}
Aggregations