use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class Table method getFields.
public ArrayList<StructField> getFields() {
ArrayList<StructField> fields = new ArrayList<StructField>();
try {
Deserializer decoder = getDeserializer();
// Expand out all the columns of the table
StructObjectInspector structObjectInspector = (StructObjectInspector) decoder.getObjectInspector();
List<? extends StructField> fld_lst = structObjectInspector.getAllStructFieldRefs();
for (StructField field : fld_lst) {
fields.add(field);
}
} catch (SerDeException e) {
throw new RuntimeException(e);
}
return fields;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class HashTableDummyOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
TableDesc tbl = this.getConf().getTbl();
try {
Deserializer serde = tbl.getDeserializerClass().newInstance();
SerDeUtils.initializeSerDe(serde, hconf, tbl.getProperties(), null);
this.outputObjInspector = serde.getObjectInspector();
} catch (Exception e) {
LOG.error("Generating output obj inspector from dummy object error", e);
e.printStackTrace();
}
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class DDLTask method createTableLike.
/**
* Create a new table like an existing table.
*
* @param db
* The database in question.
* @param crtTbl
* This is the table we're creating.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
*/
private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exception {
// Get the existing table
Table oldtbl = db.getTable(crtTbl.getLikeTableName());
Table tbl;
if (oldtbl.getTableType() == TableType.VIRTUAL_VIEW || oldtbl.getTableType() == TableType.MATERIALIZED_VIEW) {
String targetTableName = crtTbl.getTableName();
tbl = db.newTable(targetTableName);
if (crtTbl.getTblProps() != null) {
tbl.getTTable().getParameters().putAll(crtTbl.getTblProps());
}
tbl.setTableType(TableType.MANAGED_TABLE);
if (crtTbl.isExternal()) {
tbl.setProperty("EXTERNAL", "TRUE");
tbl.setTableType(TableType.EXTERNAL_TABLE);
}
tbl.setFields(oldtbl.getCols());
tbl.setPartCols(oldtbl.getPartCols());
if (crtTbl.getDefaultSerName() == null) {
LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName());
tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
// let's validate that the serde exists
validateSerDe(crtTbl.getDefaultSerName());
tbl.setSerializationLib(crtTbl.getDefaultSerName());
}
if (crtTbl.getDefaultSerdeProps() != null) {
Iterator<Entry<String, String>> iter = crtTbl.getDefaultSerdeProps().entrySet().iterator();
while (iter.hasNext()) {
Entry<String, String> m = iter.next();
tbl.setSerdeParam(m.getKey(), m.getValue());
}
}
tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
} else {
tbl = oldtbl;
// find out database name and table name of target table
String targetTableName = crtTbl.getTableName();
String[] names = Utilities.getDbTableName(targetTableName);
tbl.setDbName(names[0]);
tbl.setTableName(names[1]);
// using old table object, hence reset the owner to current user for new table.
tbl.setOwner(SessionState.getUserFromAuthenticator());
if (crtTbl.getLocation() != null) {
tbl.setDataLocation(new Path(crtTbl.getLocation()));
} else {
tbl.unsetDataLocation();
}
Class<? extends Deserializer> serdeClass = oldtbl.getDeserializerClass();
Map<String, String> params = tbl.getParameters();
// We should copy only those table parameters that are specified in the config.
SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
String paramsStr = HiveConf.getVar(conf, HiveConf.ConfVars.DDL_CTL_PARAMETERS_WHITELIST);
Set<String> retainer = new HashSet<String>();
// for non-native table, property storage_handler should be retained
retainer.add(META_TABLE_STORAGE);
if (spec != null && spec.schemaProps() != null) {
retainer.addAll(Arrays.asList(spec.schemaProps()));
}
if (paramsStr != null) {
retainer.addAll(Arrays.asList(paramsStr.split(",")));
}
// Retain Parquet INT96 write zone property to keep Parquet timezone bugfixes.
if (params.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY) != null) {
retainer.add(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY);
}
if (!retainer.isEmpty()) {
params.keySet().retainAll(retainer);
} else {
params.clear();
}
if (crtTbl.getTblProps() != null) {
params.putAll(crtTbl.getTblProps());
}
if (crtTbl.isUserStorageFormat()) {
tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
if (crtTbl.getDefaultSerName() == null) {
LOG.info("Default to LazySimpleSerDe for like table " + crtTbl.getTableName());
tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
// let's validate that the serde exists
validateSerDe(crtTbl.getDefaultSerName());
tbl.setSerializationLib(crtTbl.getDefaultSerName());
}
}
tbl.getTTable().setTemporary(crtTbl.isTemporary());
if (crtTbl.isExternal()) {
tbl.setProperty("EXTERNAL", "TRUE");
tbl.setTableType(TableType.EXTERNAL_TABLE);
} else {
tbl.getParameters().remove("EXTERNAL");
}
}
if (!Utilities.isDefaultNameNode(conf)) {
// If location is specified - ensure that it is a full qualified name
makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName(), conf);
}
if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.TRUE);
}
// create the table
db.createTable(tbl, crtTbl.getIfNotExists());
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class Vectorizer method canSpecializeReduceSink.
private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
// Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this
// operator was vectorized, but not native. And, the conditions.
VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc();
desc.setVectorDesc(vectorDesc);
boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty();
boolean hasTopN = desc.getTopN() >= 0;
boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0;
TableDesc keyTableDesc = desc.getKeySerializeInfo();
Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
TableDesc valueTableDesc = desc.getValueSerializeInfo();
Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
// Remember the condition variables for EXPLAIN regardless.
vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
vectorDesc.setEngine(engine);
vectorDesc.setHasBuckets(hasBuckets);
vectorDesc.setHasTopN(hasTopN);
vectorDesc.setUseUniformHash(useUniformHash);
vectorDesc.setHasDistinctColumns(hasDistinctColumns);
vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
// Many restrictions.
if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasBuckets || hasTopN || !useUniformHash || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary) {
return false;
}
// We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
// So if we later decide not to specialize, we'll just waste any scratch columns allocated...
List<ExprNodeDesc> keysDescs = desc.getKeyCols();
VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
// Since a key expression can be a calculation and the key will go into a scratch column,
// we need the mapping and type information.
int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] reduceSinkKeyExpressions;
for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
VectorExpression ve = allKeyExpressions[i];
reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
groupByKeyExpressionsList.add(ve);
}
}
if (groupByKeyExpressionsList.size() == 0) {
reduceSinkKeyExpressions = null;
} else {
reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
}
ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] reduceSinkValueExpressions;
for (int i = 0; i < valueDescs.size(); ++i) {
VectorExpression ve = allValueExpressions[i];
reduceSinkValueColumnMap[i] = ve.getOutputColumn();
reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
reduceSinkValueExpressionsList.add(ve);
}
}
if (reduceSinkValueExpressionsList.size() == 0) {
reduceSinkValueExpressions = null;
} else {
reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
}
vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
return true;
}
use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.
the class GenericColumnVectorProducer method createReadPipeline.
@Override
public ReadPipeline createReadPipeline(Consumer<ColumnVectorBatch> consumer, FileSplit split, List<Integer> columnIds, SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, TypeDescription schema, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, JobConf job, Map<Path, PartitionDesc> parts) throws IOException {
cacheMetrics.incrCacheReadRequests();
OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, columnIds.size(), false, counters, ioMetrics);
SerDeFileMetadata fm;
try {
fm = new SerDeFileMetadata(sourceSerDe);
} catch (SerDeException e) {
throw new IOException(e);
}
edc.setFileMetadata(fm);
// Note that we pass job config to the record reader, but use global config for LLAP IO.
SerDeEncodedDataReader reader = new SerDeEncodedDataReader(cache, bufferManager, conf, split, columnIds, edc, job, reporter, sourceInputFormat, sourceSerDe, counters, fm.getSchema(), parts);
edc.init(reader, reader);
if (LlapIoImpl.LOG.isDebugEnabled()) {
LlapIoImpl.LOG.debug("Ignoring schema: " + schema);
}
return edc;
}
Aggregations