use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class Table method shouldStoreFieldsInMetastore.
public static boolean shouldStoreFieldsInMetastore(HiveConf conf, String serdeLib, Map<String, String> tableParams) {
if (hasMetastoreBasedSchema(conf, serdeLib)) {
return true;
}
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_LEGACY_SCHEMA_FOR_ALL_SERDES)) {
return true;
}
// Table may or may not be using metastore. Only the SerDe can tell us.
AbstractSerDe deserializer = null;
try {
Class<?> clazz = conf.getClassByName(serdeLib);
if (!AbstractSerDe.class.isAssignableFrom(clazz)) {
// The default.
return true;
}
deserializer = ReflectionUtil.newInstance(conf.getClassByName(serdeLib).asSubclass(AbstractSerDe.class), conf);
} catch (Exception ex) {
LOG.warn("Cannot initialize SerDe: " + serdeLib + ", ignoring", ex);
return true;
}
return deserializer.shouldStoreFieldsInMetastore(tableParams);
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class DDLTask method describeTable.
/**
* Write the description of a table to a file.
*
* @param db
* The database in question.
* @param descTbl
* This is the table we're interested in.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
* @throws MetaException
*/
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
String colPath = descTbl.getColumnPath();
String tableName = descTbl.getTableName();
// describe the table - populate the output stream
Table tbl = db.getTable(tableName, false);
if (tbl == null) {
throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
}
Partition part = null;
if (descTbl.getPartSpec() != null) {
part = db.getPartition(tbl, descTbl.getPartSpec(), false);
if (part == null) {
throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
}
tbl = part.getTable();
}
DataOutputStream outStream = getOutputStream(descTbl.getResFile());
try {
LOG.debug("DDLTask: got data for {}", tableName);
List<FieldSchema> cols = null;
List<ColumnStatisticsObj> colStats = null;
Deserializer deserializer = tbl.getDeserializer(true);
if (deserializer instanceof AbstractSerDe) {
String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
if (errorMsgs != null && !errorMsgs.isEmpty()) {
throw new SQLException(errorMsgs);
}
}
if (colPath.equals(tableName)) {
cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
if (!descTbl.isFormatted()) {
cols.addAll(tbl.getPartCols());
}
if (tbl.isPartitioned() && part == null) {
// No partitioned specified for partitioned table, lets fetch all.
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
Map<String, Long> valueMap = new HashMap<>();
Map<String, Boolean> stateMap = new HashMap<>();
for (String stat : StatsSetupConst.supportedStats) {
valueMap.put(stat, 0L);
stateMap.put(stat, true);
}
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
int numParts = 0;
for (Partition partition : parts) {
Map<String, String> props = partition.getParameters();
Boolean state = StatsSetupConst.areBasicStatsUptoDate(props);
for (String stat : StatsSetupConst.supportedStats) {
stateMap.put(stat, stateMap.get(stat) && state);
if (props != null && props.get(stat) != null) {
valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat)));
}
}
numParts++;
}
for (String stat : StatsSetupConst.supportedStats) {
StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
tblProps.put(stat, valueMap.get(stat).toString());
}
tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts));
tbl.setParameters(tblProps);
}
} else {
if (descTbl.isFormatted()) {
// when column name is specified in describe table DDL, colPath will
// will be table_name.column_name
String colName = colPath.split("\\.")[1];
String[] dbTab = Utilities.getDbTableName(tableName);
List<String> colNames = new ArrayList<String>();
colNames.add(colName.toLowerCase());
if (null == part) {
if (tbl.isPartitioned()) {
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
if (tbl.isPartitionKey(colNames.get(0))) {
FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
cols = Collections.singletonList(partCol);
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
ColumnStatisticsData data = new ColumnStatisticsData();
ColStatistics.Range r = cs.getRange();
StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
colStats = Collections.singletonList(cso);
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
colStats = aggrStats.getColStats();
if (parts.size() == aggrStats.getPartsFound()) {
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
StatsSetupConst.removeColumnStatsState(tblProps, colNames);
}
}
tbl.setParameters(tblProps);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
}
} else {
List<String> partitions = new ArrayList<String>();
partitions.add(part.getName());
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
}
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
}
}
PrimaryKeyInfo pkInfo = null;
ForeignKeyInfo fkInfo = null;
UniqueConstraint ukInfo = null;
NotNullConstraint nnInfo = null;
DefaultConstraint dInfo = null;
CheckConstraint cInfo = null;
if (descTbl.isExt() || descTbl.isFormatted()) {
pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
ukInfo = db.getUniqueConstraints(tbl.getDbName(), tbl.getTableName());
nnInfo = db.getNotNullConstraints(tbl.getDbName(), tbl.getTableName());
dInfo = db.getDefaultConstraints(tbl.getDbName(), tbl.getTableName());
cInfo = db.getCheckConstraints(tbl.getDbName(), tbl.getTableName());
}
fixDecimalColumnTypeName(cols);
// In case the query is served by HiveServer2, don't pad it with spaces,
// as HiveServer2 output is consumed by JDBC/ODBC clients.
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), isOutputPadded, colStats, pkInfo, fkInfo, ukInfo, nnInfo, dInfo, cInfo);
LOG.debug("DDLTask: written data for {}", tableName);
} catch (SQLException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
} finally {
IOUtils.closeStream(outStream);
}
return 0;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, int vectorizedTestingReducerBatchSize) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) {
// For now, we don't go higher than the default batch size unless we do more work
// to verify every vectorized operator downstream can handle a larger batch size.
vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE;
}
this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueDeserializer.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class FlatRowContainer method add.
/**
* Called when loading the hashtable.
*/
public void add(MapJoinObjectSerDeContext context, BytesWritable value) throws HiveException {
AbstractSerDe serde = context.getSerDe();
// has tag => need to set later
isAliasFilterSet = !context.hasFilterTag();
if (rowLength == UNKNOWN) {
try {
rowLength = ObjectInspectorUtils.getStructSize(serde.getObjectInspector());
} catch (SerDeException ex) {
throw new HiveException("Get structure size error", ex);
}
if (rowLength == 0) {
array = EMPTY_OBJECT_ARRAY;
}
}
if (rowLength > 0) {
int rowCount = (array.length / rowLength);
listRealloc(array.length + rowLength);
read(serde, value, rowCount);
} else {
// see rowLength javadoc
--rowLength;
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class MapJoinEagerRowContainer method read.
@SuppressWarnings("unchecked")
public void read(MapJoinObjectSerDeContext context, Writable currentValue) throws SerDeException {
AbstractSerDe serde = context.getSerDe();
List<Object> value = (List<Object>) ObjectInspectorUtils.copyToStandardObject(serde.deserialize(currentValue), serde.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
if (value == null) {
addRow(toList(EMPTY_OBJECT_ARRAY));
} else {
Object[] valuesArray = value.toArray();
if (context.hasFilterTag()) {
aliasFilter &= ((ShortWritable) valuesArray[valuesArray.length - 1]).get();
}
addRow(toList(valuesArray));
}
}
Aggregations