use of org.apache.hadoop.hive.ql.optimizer.FieldNode in project hive by apache.
the class DataWritableReadSupport method getPrunedNestedColumns.
/**
* Return the columns which contains required nested attribute level
* E.g., given struct a:<x:int, y:int> while 'x' is required and 'y' is not, the method will return
* a pruned struct for 'a' which only contains the attribute 'x'
*
* @param nestedColPaths the paths for required nested attribute
* @return a map from the column to its selected nested column paths, of which the keys are all lower-cased.
*/
private static Map<String, FieldNode> getPrunedNestedColumns(Set<String> nestedColPaths) {
Map<String, FieldNode> resMap = new HashMap<>();
if (nestedColPaths.isEmpty()) {
return resMap;
}
for (String s : nestedColPaths) {
String c = StringUtils.split(s, '.')[0].toLowerCase();
if (!resMap.containsKey(c)) {
FieldNode f = NestedColumnFieldPruningUtils.addNodeByPath(null, s);
resMap.put(c, f);
} else {
resMap.put(c, NestedColumnFieldPruningUtils.addNodeByPath(resMap.get(c), s));
}
}
return resMap;
}
use of org.apache.hadoop.hive.ql.optimizer.FieldNode in project hive by apache.
the class DataWritableReadSupport method getProjectedSchema.
/**
* Generate the projected schema from colIndexes and nested column paths. If the column is
* contained by colIndex, it will be added directly, otherwise it will build a group type which
* contains all required sub types using nestedColumnPaths.
* @param schema original schema
* @param colNames
* @param colIndexes the index of needed columns
* @param nestedColumnPaths the paths for nested columns
* @return
*/
public static MessageType getProjectedSchema(MessageType schema, List<String> colNames, List<Integer> colIndexes, Set<String> nestedColumnPaths) {
List<Type> schemaTypes = new ArrayList<Type>();
Map<String, FieldNode> prunedCols = getPrunedNestedColumns(nestedColumnPaths);
for (Integer i : colIndexes) {
if (i < colNames.size()) {
if (i < schema.getFieldCount()) {
Type t = schema.getType(i);
String tn = t.getName().toLowerCase();
if (!prunedCols.containsKey(tn)) {
schemaTypes.add(schema.getType(i));
} else {
if (t.isPrimitive()) {
// For primitive type, add directly.
schemaTypes.add(t);
} else {
// For group type, we need to build the projected group type with required leaves
List<Type> g = projectLeafTypes(Arrays.asList(t), Arrays.asList(prunedCols.get(tn)));
if (!g.isEmpty()) {
schemaTypes.addAll(g);
}
}
}
} else {
//prefixing with '_mask_' to ensure no conflict with named
//columns in the file schema
schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named("_mask_" + colNames.get(i)));
}
}
}
return new MessageType(schema.getName(), schemaTypes);
}
use of org.apache.hadoop.hive.ql.optimizer.FieldNode in project hive by apache.
the class DataWritableReadSupport method projectLeafTypes.
private static List<Type> projectLeafTypes(List<Type> types, List<FieldNode> nodes) {
List<Type> res = new ArrayList<>();
if (nodes.isEmpty()) {
return res;
}
Map<String, FieldNode> fieldMap = new HashMap<>();
for (FieldNode n : nodes) {
fieldMap.put(n.getFieldName().toLowerCase(), n);
}
for (Type type : types) {
String tn = type.getName().toLowerCase();
if (fieldMap.containsKey(tn)) {
FieldNode f = fieldMap.get(tn);
if (f.getNodes().isEmpty()) {
// no child, no need for pruning
res.add(type);
} else {
if (type instanceof GroupType) {
GroupType groupType = type.asGroupType();
List<Type> ts = projectLeafTypes(groupType.getFields(), f.getNodes());
GroupType g = buildProjectedGroupType(groupType, ts);
if (g != null) {
res.add(g);
}
} else {
throw new RuntimeException("Primitive type " + f.getFieldName() + "should not " + "doesn't match type" + f.toString());
}
}
}
}
return res;
}
use of org.apache.hadoop.hive.ql.optimizer.FieldNode in project hive by apache.
the class ParquetHiveSerDe method processRawPrunedPaths.
/**
* Given a list of raw pruned paths separated by ',', return a list of merged pruned paths.
* For instance, if the 'prunedPaths' is "s.a, s, s", this returns ["s"].
*/
private static List<String> processRawPrunedPaths(String prunedPaths) {
List<FieldNode> fieldNodes = new ArrayList<>();
for (String p : prunedPaths.split(",")) {
fieldNodes = FieldNode.mergeFieldNodes(fieldNodes, FieldNode.fromPath(p));
}
List<String> prunedPathList = new ArrayList<>();
for (FieldNode fn : fieldNodes) {
prunedPathList.addAll(fn.toPaths());
}
return prunedPathList;
}
use of org.apache.hadoop.hive.ql.optimizer.FieldNode in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceTableScanProcess.
/**
* This method replaces the original TableScanOperator with the new
* TableScanOperator and metadata that scans over the index table rather than
* scanning over the original table.
*
*/
private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
String alias = rewriteQueryCtx.getAlias();
// Need to remove the original TableScanOperators from these data structures
// and add new ones
HashMap<String, TableScanOperator> topOps = rewriteQueryCtx.getParseContext().getTopOps();
// remove original TableScanOperator
topOps.remove(alias);
String indexTableName = rewriteQueryCtx.getIndexName();
Table indexTableHandle = null;
try {
indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
} catch (HiveException e) {
LOG.error("Error while getting the table handle for index table.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
// construct a new descriptor for the index table scan
TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle);
indexTableScanDesc.setGatherStats(false);
String k = MetaStoreUtils.encodeTableName(indexTableName) + Path.SEPARATOR;
indexTableScanDesc.setStatsAggPrefix(k);
scanOperator.setConf(indexTableScanDesc);
// Construct the new RowResolver for the new TableScanOperator
ArrayList<ColumnInfo> sigRS = new ArrayList<ColumnInfo>();
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle.getDeserializer().getObjectInspector();
StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
sigRS.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), indexTableName, false));
} catch (SerDeException e) {
LOG.error("Error while creating the RowResolver for new TableScanOperator.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
RowSchema rs = new RowSchema(sigRS);
// Set row resolver for new table
String newAlias = indexTableName;
int index = alias.lastIndexOf(":");
if (index >= 0) {
newAlias = alias.substring(0, index) + ":" + indexTableName;
}
// Scan operator now points to other table
scanOperator.getConf().setAlias(newAlias);
scanOperator.setAlias(indexTableName);
topOps.put(newAlias, scanOperator);
rewriteQueryCtx.getParseContext().setTopOps(topOps);
ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rs, Arrays.asList(new FieldNode(rewriteQueryCtx.getIndexKey())));
}
Aggregations