Search in sources :

Example 46 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class MapOperator method initObjectInspector.

private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
    PartitionDesc pd = opCtx.partDesc;
    TableDesc td = pd.getTableDesc();
    // Use table properties in case of unpartitioned tables,
    // and the union of table properties and partition properties, with partition
    // taking precedence, in the case of partitioned tables
    Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());
    Map<String, String> partSpec = pd.getPartSpec();
    opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
    opCtx.partName = String.valueOf(partSpec);
    opCtx.deserializer = pd.getDeserializer(hconf);
    StructObjectInspector partRawRowObjectInspector;
    boolean isAcid = AcidUtils.isTablePropertyTransactional(td.getProperties());
    if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
        partRawRowObjectInspector = tableRowOI;
    } else {
        partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
    }
    opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);
    // Next check if this table has partitions and if so
    // get the list of partition names as well as allocate
    // the serdes for the partition columns
    String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    if (pcols != null && pcols.length() > 0) {
        String[] partKeys = pcols.trim().split("/");
        String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
        String[] partKeyTypes = pcolTypes.trim().split(":");
        if (partKeys.length > partKeyTypes.length) {
            throw new HiveException("Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length);
        }
        List<String> partNames = new ArrayList<String>(partKeys.length);
        Object[] partValues = new Object[partKeys.length];
        List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);
        for (int i = 0; i < partKeys.length; i++) {
            String key = partKeys[i];
            partNames.add(key);
            ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
            // Partitions do not exist for this table
            if (partSpec == null) {
                // for partitionless table, initialize partValue to null
                partValues[i] = null;
            } else {
                partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(partSpec.get(key));
            }
            partObjectInspectors.add(oi);
        }
        opCtx.rowWithPart = new Object[] { null, partValues };
        opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
    }
    // In that case, it will be a Select, but the rowOI need not be amended
    if (opCtx.op instanceof TableScanOperator) {
        TableScanOperator tsOp = (TableScanOperator) opCtx.op;
        TableScanDesc tsDesc = tsOp.getConf();
        if (tsDesc != null && tsDesc.hasVirtualCols()) {
            opCtx.vcs = tsDesc.getVirtualCols();
            opCtx.vcValues = new Object[opCtx.vcs.size()];
            opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
            if (opCtx.isPartitioned()) {
                opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
            } else {
                opCtx.rowWithPartAndVC = new Object[2];
            }
        }
    }
    if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
        opCtx.rowObjectInspector = tableRowOI;
        return opCtx;
    }
    List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
    inspectors.add(tableRowOI);
    if (opCtx.isPartitioned()) {
        inspectors.add(opCtx.partObjectInspector);
    }
    if (opCtx.hasVC()) {
        inspectors.add(opCtx.vcsObjectInspector);
    }
    opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
    return opCtx;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 47 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class JoinUtil method initSpillTables.

public static TableDesc[] initSpillTables(JoinDesc conf, boolean noFilter) {
    int tagLen = conf.getTagLength();
    Map<Byte, List<ExprNodeDesc>> exprs = conf.getExprs();
    TableDesc[] spillTableDesc = new TableDesc[tagLen];
    for (int tag = 0; tag < exprs.size(); tag++) {
        List<ExprNodeDesc> valueCols = exprs.get((byte) tag);
        int columnSize = valueCols.size();
        StringBuilder colNames = new StringBuilder();
        StringBuilder colTypes = new StringBuilder();
        if (columnSize <= 0) {
            continue;
        }
        for (int k = 0; k < columnSize; k++) {
            // any name, it does not
            String newColName = tag + "_VALUE_" + k;
            // matter.
            colNames.append(newColName);
            colNames.append(',');
            colTypes.append(valueCols.get(k).getTypeString());
            colTypes.append(',');
        }
        if (!noFilter) {
            colNames.append("filtered");
            colNames.append(',');
            colTypes.append(TypeInfoFactory.shortTypeInfo.getTypeName());
            colTypes.append(',');
        }
        // remove the last ','
        colNames.setLength(colNames.length() - 1);
        colTypes.setLength(colTypes.length() - 1);
        TableDesc tblDesc = new TableDesc(SequenceFileInputFormat.class, HiveSequenceFileOutputFormat.class, Utilities.makeProperties(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode, org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, colNames.toString(), org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, colTypes.toString(), serdeConstants.SERIALIZATION_LIB, LazyBinarySerDe.class.getName()));
        spillTableDesc[tag] = tblDesc;
    }
    return spillTableDesc;
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 48 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class JoinUtil method getSpillSerDe.

public static AbstractSerDe getSpillSerDe(byte alias, TableDesc[] spillTableDesc, JoinDesc conf, boolean noFilter) {
    TableDesc desc = getSpillTableDesc(alias, spillTableDesc, conf, noFilter);
    if (desc == null) {
        return null;
    }
    AbstractSerDe sd = (AbstractSerDe) ReflectionUtil.newInstance(desc.getDeserializerClass(), null);
    try {
        SerDeUtils.initializeSerDe(sd, null, desc.getProperties(), null);
    } catch (SerDeException e) {
        e.printStackTrace();
        return null;
    }
    return sd;
}
Also used : TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 49 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class MapJoinOperator method generateMapMetaData.

public void generateMapMetaData() throws HiveException {
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
        for (int pos = 0; pos < order.length; pos++) {
            if (pos == posBigTable) {
                continue;
            }
            TableDesc valueTableDesc;
            if (conf.getNoOuterJoin()) {
                valueTableDesc = conf.getValueTblDescs().get(pos);
            } else {
                valueTableDesc = conf.getValueFilteredTblDescs().get(pos);
            }
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
            MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos));
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext);
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 50 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class IndexWhereProcessor method process.

@Override
public /**
   * Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher
   */
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    TableScanOperator operator = (TableScanOperator) nd;
    List<Node> opChildren = operator.getChildren();
    TableScanDesc operatorDesc = operator.getConf();
    if (operatorDesc == null || !tsToIndices.containsKey(operator)) {
        return null;
    }
    List<Index> indexes = tsToIndices.get(operator);
    ExprNodeDesc predicate = operatorDesc.getFilterExpr();
    IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
    ParseContext pctx = context.getParseContext();
    LOG.info("Processing predicate for index optimization");
    if (predicate == null) {
        LOG.info("null predicate pushed down");
        return null;
    }
    LOG.info(predicate.getExprString());
    // check if we have tsToIndices on all partitions in this table scan
    Set<Partition> queryPartitions;
    try {
        queryPartitions = IndexUtils.checkPartitionsCoveredByIndex(operator, pctx, indexes);
        if (queryPartitions == null) {
            // partitions not covered
            return null;
        }
    } catch (HiveException e) {
        LOG.error("Fatal Error: problem accessing metastore", e);
        throw new SemanticException(e);
    }
    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
        return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();
    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    // make sure we have an index on the table being scanned
    TableDesc tblDesc = operator.getTableDesc();
    Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
    for (Index indexOnTable : indexes) {
        if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
            List<Index> newType = new ArrayList<Index>();
            newType.add(indexOnTable);
            indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
        } else {
            indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
        }
    }
    // choose index type with most tsToIndices of the same type on the table
    // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
    List<Index> bestIndexes = indexesByType.values().iterator().next();
    for (List<Index> indexTypes : indexesByType.values()) {
        if (bestIndexes.size() < indexTypes.size()) {
            bestIndexes = indexTypes;
        }
    }
    // rewrite index queries for the chosen index type
    HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
    tmpQueryContext.setQueryPartitions(queryPartitions);
    rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
    List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();
    if (indexTasks != null && indexTasks.size() > 0) {
        queryContexts.put(bestIndexes.get(0), tmpQueryContext);
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
        // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
        Index chosenIndex = queryContexts.keySet().iterator().next();
        // modify the parse context to use indexing
        // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
        HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);
        // prepare the map reduce job to use indexing
        MapWork work = currentTask.getWork().getMapWork();
        work.setInputformat(queryContext.getIndexInputFormat());
        work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
        // modify inputs based on index query
        Set<ReadEntity> inputs = pctx.getSemanticInputs();
        inputs.addAll(queryContext.getAdditionalSemanticInputs());
        List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
        // add dependencies so index query runs first
        insertIndexQuery(pctx, context, chosenRewrite);
    }
    return null;
}
Also used : HiveIndexQueryContext(org.apache.hadoop.hive.ql.index.HiveIndexQueryContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Index(org.apache.hadoop.hive.metastore.api.Index) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)93 ArrayList (java.util.ArrayList)47 Path (org.apache.hadoop.fs.Path)34 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)29 HashMap (java.util.HashMap)26 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)26 LinkedHashMap (java.util.LinkedHashMap)23 Properties (java.util.Properties)19 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 List (java.util.List)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)11