use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class ColumnMappings method setHiveColumnDescription.
void setHiveColumnDescription(String serdeName, List<String> columnNames, List<TypeInfo> columnTypes) throws SerDeException {
if (columnsMapping.length != columnNames.size()) {
throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" + " (counting the key if implicit)");
}
// where key extends LazyPrimitive<?, ?> and thus has type Category.PRIMITIVE
for (int i = 0; i < columnNames.size(); i++) {
ColumnMapping colMap = columnsMapping[i];
colMap.columnName = columnNames.get(i);
colMap.columnType = columnTypes.get(i);
if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE)) {
throw new SerDeException(serdeName + ": hbase column family '" + colMap.familyName + "' should be mapped to Map<? extends LazyPrimitive<?, ?>,?>, that is " + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName());
}
}
if (colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) && !colMap.isCategory(PrimitiveCategory.LONG)) {
throw new SerDeException(serdeName + ": timestamp columns should be of " + "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
}
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class VectorPartitionConversion method isImplicitVectorColumnConversion.
public static boolean isImplicitVectorColumnConversion(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) {
if (fromTypeInfo.getCategory() == Category.PRIMITIVE && toTypeInfo.getCategory() == Category.PRIMITIVE) {
PrimitiveCategory fromPrimitiveCategory = ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory();
PrimitiveCategory toPrimitiveCategory = ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory();
PrimitiveCategory[] toPrimitiveCategories = implicitPrimitiveMap.get(fromPrimitiveCategory);
if (toPrimitiveCategories != null) {
for (PrimitiveCategory candidatePrimitiveCategory : toPrimitiveCategories) {
if (candidatePrimitiveCategory == toPrimitiveCategory) {
return true;
}
}
}
return false;
}
return false;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class FetchOperator method getRecordReader.
private RecordReader<WritableComparable, Writable> getRecordReader() throws Exception {
if (!iterSplits.hasNext()) {
FetchInputFormatSplit[] splits = getNextSplits();
if (splits == null) {
return null;
}
if (!isPartitioned || convertedOI == null) {
currSerDe = tableSerDe;
ObjectConverter = null;
} else {
currSerDe = needConversion(currDesc) ? currDesc.getDeserializer(job) : tableSerDe;
ObjectInspector inputOI = currSerDe.getObjectInspector();
ObjectConverter = ObjectInspectorConverters.getConverter(inputOI, convertedOI);
}
if (isPartitioned) {
row[1] = createPartValue(currDesc, partKeyOI);
}
iterSplits = Arrays.asList(splits).iterator();
if (LOG.isDebugEnabled()) {
LOG.debug("Creating fetchTask with deserializer typeinfo: " + currSerDe.getObjectInspector().getTypeName());
LOG.debug("deserializer properties:\ntable properties: " + currDesc.getTableDesc().getProperties() + "\npartition properties: " + currDesc.getProperties());
}
}
final FetchInputFormatSplit target = iterSplits.next();
@SuppressWarnings("unchecked") final RecordReader<WritableComparable, Writable> reader = target.getRecordReader(job);
if (hasVC || work.getSplitSample() != null) {
currRecReader = new HiveRecordReader<WritableComparable, Writable>(reader, job) {
@Override
public boolean doNext(WritableComparable key, Writable value) throws IOException {
// each split by table sampling, stop fetching any more (early exit)
if (target.shrinkedLength > 0 && context.getIoCxt().getCurrentBlockStart() > target.shrinkedLength) {
return false;
}
return super.doNext(key, value);
}
};
((HiveContextAwareRecordReader) currRecReader).initIOContext(target, job, target.inputFormat.getClass(), reader);
} else {
currRecReader = reader;
}
key = currRecReader.createKey();
value = currRecReader.createValue();
headerCount = footerCount = 0;
return currRecReader;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project phoenix by apache.
the class PhoenixSerDe method createLazyPhoenixInspector.
private ObjectInspector createLazyPhoenixInspector(Configuration conf, Properties tbl) throws SerDeException {
List<String> columnNameList = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS).split(PhoenixStorageHandlerConstants.COMMA));
List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES));
List<ObjectInspector> columnObjectInspectors = Lists.newArrayListWithExpectedSize(columnTypeList.size());
for (TypeInfo typeInfo : columnTypeList) {
columnObjectInspectors.add(PhoenixObjectInspectorFactory.createObjectInspector(typeInfo, serdeParams));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columnNameList, columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams, ObjectInspectorOptions.JAVA);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project drill by apache.
the class HiveAbstractReader method init.
private void init() throws ExecutionSetupException {
final JobConf job = new JobConf(hiveConf);
// Get the configured default val
defaultPartitionValue = hiveConf.get(ConfVars.DEFAULTPARTITIONNAME.varname);
Properties tableProperties;
try {
tableProperties = HiveUtilities.getTableMetadata(table);
final Properties partitionProperties = (partition == null) ? tableProperties : HiveUtilities.getPartitionMetadata(partition, table);
HiveUtilities.addConfToJob(job, partitionProperties);
final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties);
final StructObjectInspector tableOI = getStructOI(tableSerDe);
if (partition != null) {
partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties);
partitionOI = getStructOI(partitionSerDe);
finalOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI);
partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI);
job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table));
} else {
// For non-partitioned tables, there is no need to create converter as there are no schema changes expected.
partitionSerDe = tableSerDe;
partitionOI = tableOI;
partTblObjectInspectorConverter = null;
finalOI = tableOI;
job.setInputFormat(HiveUtilities.getInputFormatClass(job, table.getSd(), table));
}
if (logger.isTraceEnabled()) {
for (StructField field : finalOI.getAllStructFieldRefs()) {
logger.trace("field in finalOI: {}", field.getClass().getName());
}
logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName());
}
// Get list of partition column names
final List<String> partitionNames = Lists.newArrayList();
for (FieldSchema field : table.getPartitionKeys()) {
partitionNames.add(field.getName());
}
// We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore
// may not contain the schema, instead it is derived from other sources such as table properties or external file.
// SerDe object knows how to get the schema with all the config and table properties passed in initialization.
// ObjectInspector created from the SerDe object has the schema.
final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI);
final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames();
// Select list of columns for project pushdown into Hive SerDe readers.
final List<Integer> columnIds = Lists.newArrayList();
if (isStarQuery()) {
selectedColumnNames = tableColumnNames;
for (int i = 0; i < selectedColumnNames.size(); i++) {
columnIds.add(i);
}
selectedPartitionNames = partitionNames;
} else {
selectedColumnNames = Lists.newArrayList();
for (SchemaPath field : getColumns()) {
String columnName = field.getRootSegment().getPath();
if (partitionNames.contains(columnName)) {
selectedPartitionNames.add(columnName);
} else {
columnIds.add(tableColumnNames.indexOf(columnName));
selectedColumnNames.add(columnName);
}
}
}
ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames);
for (String columnName : selectedColumnNames) {
StructField fieldRef = finalOI.getStructFieldRef(columnName);
selectedStructFieldRefs.add(fieldRef);
ObjectInspector fieldOI = fieldRef.getFieldObjectInspector();
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fieldOI.getTypeName());
selectedColumnObjInspectors.add(fieldOI);
selectedColumnTypes.add(typeInfo);
selectedColumnFieldConverters.add(HiveFieldConverter.create(typeInfo, fragmentContext));
}
for (int i = 0; i < selectedColumnNames.size(); ++i) {
logger.trace("inspector:typeName={}, className={}, TypeInfo: {}, converter:{}", selectedColumnObjInspectors.get(i).getTypeName(), selectedColumnObjInspectors.get(i).getClass().getName(), selectedColumnTypes.get(i).toString(), selectedColumnFieldConverters.get(i).getClass().getName());
}
for (int i = 0; i < table.getPartitionKeys().size(); i++) {
FieldSchema field = table.getPartitionKeys().get(i);
if (selectedPartitionNames.contains(field.getName())) {
TypeInfo pType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
selectedPartitionTypes.add(pType);
if (partition != null) {
selectedPartitionValues.add(HiveUtilities.convertPartitionType(pType, partition.getValues().get(i), defaultPartitionValue));
}
}
}
} catch (Exception e) {
throw new ExecutionSetupException("Failure while initializing Hive Reader " + this.getClass().getName(), e);
}
if (!empty) {
try {
reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
} catch (Exception e) {
throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
}
internalInit(tableProperties, reader);
}
}
Aggregations