use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class MapOperator method initEmptyInputChildren.
/*
* This is the same as the setChildren method below but for empty tables.
* It takes care of the following:
* 1. Create the right object inspector.
* 2. Set up the childrenOpToOI with the object inspector.
* So as to ensure that the initialization happens correctly.
*/
public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf) throws SerDeException, Exception {
setChildOperators(children);
Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
for (Operator<?> child : children) {
TableScanOperator tsOp = (TableScanOperator) child;
StructObjectInspector soi = null;
PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias());
Configuration newConf = tableNameToConf.get(partDesc.getTableDesc().getTableName());
Deserializer serde = partDesc.getTableDesc().getDeserializer();
partDesc.setProperties(partDesc.getProperties());
MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc);
StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector();
initObjectInspector(newConf, opCtx, tableRowOI);
soi = opCtx.rowObjectInspector;
child.getParentOperators().add(this);
childrenOpToOI.put(child, soi);
}
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class MapOperator method getConvertedOI.
// Return the mapping for table descriptor to the expected table OI
/**
* Traverse all the partitions for a table, and get the OI for the table.
* Note that a conversion is required if any of the partition OI is different
* from the table OI. For eg. if the query references table T (partitions P1, P2),
* and P1's schema is same as T, whereas P2's scheme is different from T, conversion
* might be needed for both P1 and P2, since SettableOI might be needed for T
*/
private Map<TableDesc, StructObjectInspector> getConvertedOI(Map<String, Configuration> tableToConf) throws HiveException {
Map<TableDesc, StructObjectInspector> tableDescOI = new HashMap<TableDesc, StructObjectInspector>();
Set<TableDesc> identityConverterTableDesc = new HashSet<TableDesc>();
try {
Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();
for (Path onefile : conf.getPathToAliases().keySet()) {
PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile);
TableDesc tableDesc = pd.getTableDesc();
Configuration hconf = tableToConf.get(tableDesc.getTableName());
Deserializer partDeserializer = pd.getDeserializer(hconf);
StructObjectInspector partRawRowObjectInspector;
boolean isAcid = AcidUtils.isTablePropertyTransactional(tableDesc.getProperties());
if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
partRawRowObjectInspector = (StructObjectInspector) tblDeserializer.getObjectInspector();
} else {
partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector();
}
StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc);
if ((tblRawRowObjectInspector == null) || (identityConverterTableDesc.contains(tableDesc))) {
Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
tblRawRowObjectInspector = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partRawRowObjectInspector, tblDeserializer.getObjectInspector(), oiSettableProperties);
if (identityConverterTableDesc.contains(tableDesc)) {
if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
identityConverterTableDesc.remove(tableDesc);
}
} else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
identityConverterTableDesc.add(tableDesc);
}
tableDescOI.put(tableDesc, tblRawRowObjectInspector);
}
}
} catch (Exception e) {
throw new HiveException(e);
}
return tableDescOI;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class JdbcSerDe method initialize.
/*
* This method gets called multiple times by Hive. On some invocations, the properties will be empty.
* We need to detect when the properties are not empty to initialise the class variables.
*
* @see org.apache.hadoop.hive.serde2.Deserializer#initialize(org.apache.hadoop.conf.Configuration, java.util.Properties)
*/
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
try {
LOGGER.debug("Initializing the SerDe");
// Hive cdh-4.3 does not provide the properties object on all calls
if (tbl.containsKey(JdbcStorageConfig.DATABASE_TYPE.getPropertyName())) {
Configuration tableConfig = JdbcStorageConfigManager.convertPropertiesToConfiguration(tbl);
DatabaseAccessor dbAccessor = DatabaseAccessorFactory.getAccessor(tableConfig);
columnNames = dbAccessor.getColumnNames(tableConfig);
numColumns = columnNames.size();
String[] hiveColumnNameArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMNS), ",");
if (numColumns != hiveColumnNameArray.length) {
throw new SerDeException("Expected " + numColumns + " columns. Table definition has " + hiveColumnNameArray.length + " columns");
}
List<String> hiveColumnNames = Arrays.asList(hiveColumnNameArray);
hiveColumnTypeArray = parseProperty(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES), ":");
if (hiveColumnTypeArray.length == 0) {
throw new SerDeException("Received an empty Hive column type definition");
}
List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>(numColumns);
for (int i = 0; i < numColumns; i++) {
fieldInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
objectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(hiveColumnNames, fieldInspectors);
row = new ArrayList<String>(numColumns);
}
} catch (Exception e) {
LOGGER.error("Caught exception while initializing the SqlSerDe", e);
throw new SerDeException(e);
}
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class HiveInputFormat method wrapForLlap.
public static InputFormat<WritableComparable, Writable> wrapForLlap(InputFormat<WritableComparable, Writable> inputFormat, Configuration conf, PartitionDesc part) throws HiveException {
if (!HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon())) {
// LLAP not enabled, no-op.
return inputFormat;
}
String ifName = inputFormat.getClass().getCanonicalName();
boolean isSupported = inputFormat instanceof LlapWrappableInputFormatInterface;
boolean isVectorized = Utilities.getUseVectorizedInputFileFormat(conf);
if (!isVectorized) {
// Pretend it's vectorized if the non-vector wrapped is enabled.
isVectorized = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED) && (Utilities.getPlanPath(conf) != null);
}
boolean isSerdeBased = false;
if (isVectorized && !isSupported && HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENCODE_ENABLED)) {
// See if we can use re-encoding to read the format thru IO elevator.
String formatList = HiveConf.getVar(conf, ConfVars.LLAP_IO_ENCODE_FORMATS);
if (LOG.isDebugEnabled()) {
LOG.debug("Checking " + ifName + " against " + formatList);
}
String[] formats = StringUtils.getStrings(formatList);
if (formats != null) {
for (String format : formats) {
// TODO: should we check isAssignableFrom?
if (ifName.equals(format)) {
if (LOG.isInfoEnabled()) {
LOG.info("Using SerDe-based LLAP reader for " + ifName);
}
isSupported = isSerdeBased = true;
break;
}
}
}
}
if (!isSupported || !isVectorized) {
if (LOG.isInfoEnabled()) {
LOG.info("Not using llap for " + ifName + ": supported = " + isSupported + ", vectorized = " + isVectorized);
}
return inputFormat;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Wrapping " + ifName);
}
@SuppressWarnings("unchecked") LlapIo<VectorizedRowBatch> llapIo = LlapProxy.getIo();
if (llapIo == null) {
if (LOG.isInfoEnabled()) {
LOG.info("Not using LLAP IO because it is not initialized");
}
return inputFormat;
}
Deserializer serde = null;
if (isSerdeBased) {
if (part == null) {
if (LOG.isInfoEnabled()) {
LOG.info("Not using LLAP IO because there's no partition spec for SerDe-based IF");
}
return inputFormat;
}
VectorPartitionDesc vpart = part.getVectorPartitionDesc();
if (vpart != null) {
VectorMapOperatorReadType old = vpart.getVectorMapOperatorReadType();
if (old != VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
if (LOG.isInfoEnabled()) {
LOG.info("Resetting VectorMapOperatorReadType from " + old + " for partition " + part.getTableName() + " " + part.getPartSpec());
}
vpart.setVectorMapOperatorReadType(VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT);
}
}
try {
serde = part.getDeserializer(conf);
} catch (Exception e) {
throw new HiveException("Error creating SerDe for LLAP IO", e);
}
}
InputFormat<?, ?> wrappedIf = llapIo.getInputFormat(inputFormat, serde);
if (wrappedIf == null) {
// We cannot wrap; the cause is logged inside.
return inputFormat;
}
return castInputFormat(wrappedIf);
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class TestSerdeWithFieldComments method testFieldComments.
public void testFieldComments() throws MetaException, SerDeException {
StructObjectInspector mockSOI = mock(StructObjectInspector.class);
when(mockSOI.getCategory()).thenReturn(ObjectInspector.Category.STRUCT);
List fieldRefs = new ArrayList<StructField>();
// Add field with a comment...
fieldRefs.add(mockedStructField("first", "type name 1", "this is a comment"));
// ... and one without
fieldRefs.add(mockedStructField("second", "type name 2", null));
when(mockSOI.getAllStructFieldRefs()).thenReturn(fieldRefs);
Deserializer mockDe = mock(Deserializer.class);
when(mockDe.getObjectInspector()).thenReturn(mockSOI);
List<FieldSchema> result = MetaStoreUtils.getFieldsFromDeserializer("testTable", mockDe);
assertEquals(2, result.size());
assertEquals("first", result.get(0).getName());
assertEquals("this is a comment", result.get(0).getComment());
assertEquals("second", result.get(1).getName());
assertEquals("from deserializer", result.get(1).getComment());
}
Aggregations