use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class HBaseUtils method desierliazeDbNameTableNameFromPartitionKey.
private static List<String> desierliazeDbNameTableNameFromPartitionKey(byte[] key, Configuration conf) {
StringBuffer names = new StringBuffer();
names.append("dbName,tableName,");
StringBuffer types = new StringBuffer();
types.append("string,string,");
BinarySortableSerDe serDe = new BinarySortableSerDe();
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
try {
serDe.initialize(conf, props);
List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(0, 2);
List<String> keys = new ArrayList<>();
for (int i = 0; i < deserializedkeys.size(); i++) {
Object deserializedKey = deserializedkeys.get(i);
if (deserializedKey == null) {
throw new RuntimeException("Can't have a null dbname or tablename");
} else {
TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString("string");
ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
keys.add((String) converter.convert(deserializedKey));
}
}
return keys;
} catch (SerDeException e) {
throw new RuntimeException("Error when deserialize key", e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class HBaseUtils method buildPartitionKey.
static byte[] buildPartitionKey(String dbName, String tableName, List<String> partTypes, List<String> partVals, boolean endPrefix) {
Object[] components = new Object[partVals.size()];
for (int i = 0; i < partVals.size(); i++) {
TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(partTypes.get(i));
ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType);
Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
components[i] = converter.convert(partVals.get(i));
}
return buildSerializedPartitionKey(dbName, tableName, partTypes, components, endPrefix);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class DDLTask method describeTable.
/**
* Write the description of a table to a file.
*
* @param db
* The database in question.
* @param descTbl
* This is the table we're interested in.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
* @throws MetaException
*/
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
String colPath = descTbl.getColumnPath();
String tableName = descTbl.getTableName();
// describe the table - populate the output stream
Table tbl = db.getTable(tableName, false);
if (tbl == null) {
throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
}
Partition part = null;
if (descTbl.getPartSpec() != null) {
part = db.getPartition(tbl, descTbl.getPartSpec(), false);
if (part == null) {
throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
}
tbl = part.getTable();
}
DataOutputStream outStream = getOutputStream(descTbl.getResFile());
try {
LOG.debug("DDLTask: got data for " + tbl.getTableName());
List<FieldSchema> cols = null;
List<ColumnStatisticsObj> colStats = null;
Deserializer deserializer = tbl.getDeserializer(true);
if (deserializer instanceof AbstractSerDe) {
String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
if (errorMsgs != null && !errorMsgs.isEmpty()) {
throw new SQLException(errorMsgs);
}
}
if (colPath.equals(tableName)) {
cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
if (!descTbl.isFormatted()) {
cols.addAll(tbl.getPartCols());
}
if (tbl.isPartitioned() && part == null) {
// No partitioned specified for partitioned table, lets fetch all.
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
for (String stat : StatsSetupConst.supportedStats) {
boolean state = true;
long statVal = 0l;
for (Partition partition : parts) {
Map<String, String> props = partition.getParameters();
state &= StatsSetupConst.areBasicStatsUptoDate(props);
if (props != null && props.get(stat) != null) {
statVal += Long.parseLong(props.get(stat));
}
}
StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(state));
tblProps.put(stat, String.valueOf(statVal));
}
tbl.setParameters(tblProps);
}
} else {
if (descTbl.isFormatted()) {
// when column name is specified in describe table DDL, colPath will
// will be table_name.column_name
String colName = colPath.split("\\.")[1];
String[] dbTab = Utilities.getDbTableName(tableName);
List<String> colNames = new ArrayList<String>();
colNames.add(colName.toLowerCase());
if (null == part) {
if (tbl.isPartitioned()) {
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
if (tbl.isPartitionKey(colNames.get(0))) {
FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
cols = Collections.singletonList(partCol);
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
ColumnStatisticsData data = new ColumnStatisticsData();
ColStatistics.Range r = cs.getRange();
StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
colStats = Collections.singletonList(cso);
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
colStats = aggrStats.getColStats();
if (parts.size() == aggrStats.getPartsFound()) {
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
StatsSetupConst.removeColumnStatsState(tblProps, colNames);
}
}
tbl.setParameters(tblProps);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
}
} else {
List<String> partitions = new ArrayList<String>();
partitions.add(part.getName());
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
}
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
}
}
PrimaryKeyInfo pkInfo = null;
ForeignKeyInfo fkInfo = null;
if (descTbl.isExt() || descTbl.isFormatted()) {
pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
}
fixDecimalColumnTypeName(cols);
// In case the query is served by HiveServer2, don't pad it with spaces,
// as HiveServer2 output is consumed by JDBC/ODBC clients.
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), descTbl.isPretty(), isOutputPadded, colStats, pkInfo, fkInfo);
LOG.debug("DDLTask: written data for " + tbl.getTableName());
} catch (SQLException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
} finally {
IOUtils.closeStream(outStream);
}
return 0;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class BaseSemanticAnalyzer method validatePartColumnType.
public static void validatePartColumnType(Table tbl, Map<String, String> partSpec, ASTNode astNode, HiveConf conf) throws SemanticException {
if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
return;
}
Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
if (!getPartExprNodeDesc(astNode, conf, astExprNodeMap)) {
STATIC_LOG.warn("Dynamic partitioning is used; only validating " + astExprNodeMap.size() + " columns");
}
if (astExprNodeMap.isEmpty()) {
// All columns are dynamic, nothing to do.
return;
}
List<FieldSchema> parts = tbl.getPartitionKeys();
Map<String, String> partCols = new HashMap<String, String>(parts.size());
for (FieldSchema col : parts) {
partCols.put(col.getName(), col.getType().toLowerCase());
}
for (Entry<ASTNode, ExprNodeDesc> astExprNodePair : astExprNodeMap.entrySet()) {
String astKeyName = astExprNodePair.getKey().toString().toLowerCase();
if (astExprNodePair.getKey().getType() == HiveParser.Identifier) {
astKeyName = stripIdentifierQuotes(astKeyName);
}
String colType = partCols.get(astKeyName);
ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(astExprNodePair.getValue().getTypeInfo());
TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(colType);
ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType);
// Since partVal is a constant, it is safe to cast ExprNodeDesc to ExprNodeConstantDesc.
// Its value should be in normalized format (e.g. no leading zero in integer, date is in
// format of YYYY-MM-DD etc)
Object value = ((ExprNodeConstantDesc) astExprNodePair.getValue()).getValue();
Object convertedValue = value;
if (!inputOI.getTypeName().equals(outputOI.getTypeName())) {
convertedValue = ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value);
if (convertedValue == null) {
throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH, astKeyName, inputOI.getTypeName(), outputOI.getTypeName());
}
if (!convertedValue.toString().equals(value.toString())) {
// value might have been changed because of the normalization in conversion
STATIC_LOG.warn("Partition " + astKeyName + " expects type " + outputOI.getTypeName() + " but input value is in type " + inputOI.getTypeName() + ". Convert " + value.toString() + " to " + convertedValue.toString());
}
}
if (!convertedValue.toString().equals(partSpec.get(astKeyName))) {
STATIC_LOG.warn("Partition Spec " + astKeyName + "=" + partSpec.get(astKeyName) + " has been changed to " + astKeyName + "=" + convertedValue.toString());
}
partSpec.put(astKeyName, convertedValue.toString());
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class SemanticAnalyzer method handleInsertStatementSpec.
/**
* This modifies the Select projections when the Select is part of an insert statement and
* the insert statement specifies a column list for the target table, e.g.
* create table source (a int, b int);
* create table target (x int, y int, z int);
* insert into target(z,x) select * from source
*
* Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
* as if the original query was written as
* insert into target select b, null, a from source
*
* if target schema is not specified, this is no-op
*
* @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
* @throws SemanticException
*/
public RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, RowResolver outputRR, RowResolver inputRR, QB qb, ASTNode selExprList) throws SemanticException {
//(z,x)
//specified in the query
List<String> targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);
if (targetTableSchema == null) {
//no insert schema was specified
return outputRR;
}
if (targetTableSchema.size() != col_list.size()) {
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
throw new SemanticException(generateErrorMessage(selExprList, "Expected " + targetTableSchema.size() + " columns for " + dest + (target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) + "; select produces " + col_list.size() + " columns"));
}
//e.g. map z->expr for a
Map<String, ExprNodeDesc> targetCol2Projection = new HashMap<String, ExprNodeDesc>();
//e.g. map z->ColumnInfo for a
Map<String, ColumnInfo> targetCol2ColumnInfo = new HashMap<String, ColumnInfo>();
int colListPos = 0;
for (String targetCol : targetTableSchema) {
targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
targetCol2Projection.put(targetCol, col_list.get(colListPos++));
}
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
if (target == null && partition == null) {
throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'"));
}
ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>();
colListPos = 0;
List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols();
List<String> targetTableColNames = new ArrayList<String>();
List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>();
for (FieldSchema fs : targetTableCols) {
targetTableColNames.add(fs.getName());
targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
}
Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest);
if (partSpec != null) {
//relies on consistent order via LinkedHashMap
for (Map.Entry<String, String> partKeyVal : partSpec.entrySet()) {
if (partKeyVal.getValue() == null) {
//these must be after non-partition cols
targetTableColNames.add(partKeyVal.getKey());
targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
}
}
}
RowResolver newOutputRR = new RowResolver();
//where missing columns are NULL-filled
for (int i = 0; i < targetTableColNames.size(); i++) {
String f = targetTableColNames.get(i);
if (targetCol2Projection.containsKey(f)) {
//put existing column in new list to make sure it is in the right position
new_col_list.add(targetCol2Projection.get(f));
//todo: is this OK?
ColumnInfo ci = targetCol2ColumnInfo.get(f);
ci.setInternalName(getColumnInternalName(colListPos));
newOutputRR.put(ci.getTabAlias(), ci.getInternalName(), ci);
} else {
//add new 'synthetic' columns for projections not provided by Select
ExprNodeDesc exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null);
new_col_list.add(exp);
//this column doesn't come from any table
final String tableAlias = null;
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), exp.getWritableObjectInspector(), tableAlias, false);
newOutputRR.put(colInfo.getTabAlias(), colInfo.getInternalName(), colInfo);
}
colListPos++;
}
col_list.clear();
col_list.addAll(new_col_list);
return newOutputRR;
}
Aggregations