Search in sources :

Example 91 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class MapJoinTestConfig method createMapJoinTableContainerSerDe.

public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoinDesc mapJoinDesc) throws SerDeException {
    final Byte smallTablePos = 1;
    // UNDONE: Why do we need to specify BinarySortableSerDe explicitly here???
    TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc();
    AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(BinarySortableSerDe.class, null);
    SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
    MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
    TableDesc valueTableDesc;
    if (mapJoinDesc.getNoOuterJoin()) {
        valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos);
    } else {
        valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos);
    }
    AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
    SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
    MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(mapJoinDesc, smallTablePos));
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = new MapJoinTableContainerSerDe(keyContext, valueContext);
    return mapJoinTableContainerSerDe;
}
Also used : MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Example 92 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class MapJoinTestConfig method createMapJoinDesc.

public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) {
    MapJoinDesc mapJoinDesc = new MapJoinDesc();
    mapJoinDesc.setPosBigTable(0);
    List<ExprNodeDesc> keyExpr = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < testDesc.bigTableKeyColumnNums.length; i++) {
        keyExpr.add(new ExprNodeColumnDesc(testDesc.bigTableKeyTypeInfos[i], testDesc.bigTableKeyColumnNames[i], "B", false));
    }
    Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
    keyMap.put((byte) 0, keyExpr);
    List<ExprNodeDesc> smallTableExpr = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) {
        smallTableExpr.add(new ExprNodeColumnDesc(testDesc.smallTableValueTypeInfos[i], testDesc.smallTableValueColumnNames[i], "S", false));
    }
    keyMap.put((byte) 1, smallTableExpr);
    mapJoinDesc.setKeys(keyMap);
    mapJoinDesc.setExprs(keyMap);
    Byte[] order = new Byte[] { (byte) 0, (byte) 1 };
    mapJoinDesc.setTagOrder(order);
    mapJoinDesc.setNoOuterJoin(testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER);
    Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
    // None.
    filterMap.put((byte) 0, new ArrayList<ExprNodeDesc>());
    mapJoinDesc.setFilters(filterMap);
    List<Integer> bigTableRetainColumnNumsList = intArrayToList(testDesc.bigTableRetainColumnNums);
    // For now, just small table values...
    List<Integer> smallTableRetainColumnNumsList = intArrayToList(testDesc.smallTableRetainValueColumnNums);
    Map<Byte, List<Integer>> retainListMap = new HashMap<Byte, List<Integer>>();
    retainListMap.put((byte) 0, bigTableRetainColumnNumsList);
    retainListMap.put((byte) 1, smallTableRetainColumnNumsList);
    mapJoinDesc.setRetainList(retainListMap);
    int joinDescType;
    switch(testDesc.vectorMapJoinVariation) {
        case INNER:
        case INNER_BIG_ONLY:
            joinDescType = JoinDesc.INNER_JOIN;
            break;
        case LEFT_SEMI:
            joinDescType = JoinDesc.LEFT_SEMI_JOIN;
            break;
        case OUTER:
            joinDescType = JoinDesc.LEFT_OUTER_JOIN;
            break;
        default:
            throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation);
    }
    JoinCondDesc[] conds = new JoinCondDesc[1];
    conds[0] = new JoinCondDesc(0, 1, joinDescType);
    mapJoinDesc.setConds(conds);
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(testDesc.hiveConf, PlanUtils.getFieldSchemasFromColumnList(keyExpr, ""));
    mapJoinDesc.setKeyTblDesc(keyTableDesc);
    TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(smallTableExpr, ""));
    ArrayList<TableDesc> valueTableDescsList = new ArrayList<TableDesc>();
    valueTableDescsList.add(null);
    valueTableDescsList.add(valueTableDesc);
    mapJoinDesc.setValueTblDescs(valueTableDescsList);
    mapJoinDesc.setValueFilteredTblDescs(valueTableDescsList);
    mapJoinDesc.setOutputColumnNames(Arrays.asList(testDesc.outputColumnNames));
    return mapJoinDesc;
}
Also used : MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc)

Example 93 with TableDesc

use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.

the class TestInputOutputFormat method createMockExecutionEnvironment.

/**
 * Create a mock execution environment that has enough detail that
 * ORC, vectorization, HiveInputFormat, and CombineHiveInputFormat don't
 * explode.
 * @param workDir a local filesystem work directory
 * @param warehouseDir a mock filesystem warehouse directory
 * @param tableName the table name
 * @param objectInspector object inspector for the row
 * @param isVectorized should run vectorized
 * @return a JobConf that contains the necessary information
 * @throws IOException
 * @throws HiveException
 */
JobConf createMockExecutionEnvironment(Path workDir, Path warehouseDir, String tableName, ObjectInspector objectInspector, boolean isVectorized, int partitions) throws IOException, HiveException {
    JobConf conf = new JobConf();
    Utilities.clearWorkMap(conf);
    conf.set("hive.exec.plan", workDir.toString());
    conf.set("mapred.job.tracker", "local");
    String isVectorizedString = Boolean.toString(isVectorized);
    conf.set("hive.vectorized.execution.enabled", isVectorizedString);
    conf.set(Utilities.VECTOR_MODE, isVectorizedString);
    conf.set(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, isVectorizedString);
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    conf.set("mapred.mapper.class", ExecMapper.class.getName());
    Path root = new Path(warehouseDir, tableName);
    // clean out previous contents
    ((MockFileSystem) root.getFileSystem(conf)).clear();
    // build partition strings
    String[] partPath = new String[partitions];
    StringBuilder buffer = new StringBuilder();
    for (int p = 0; p < partitions; ++p) {
        partPath[p] = new Path(root, "p=" + p).toString();
        if (p != 0) {
            buffer.append(',');
        }
        buffer.append(partPath[p]);
    }
    conf.set("mapred.input.dir", buffer.toString());
    StringBuilder columnIds = new StringBuilder();
    StringBuilder columnNames = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    StructObjectInspector structOI = (StructObjectInspector) objectInspector;
    List<? extends StructField> fields = structOI.getAllStructFieldRefs();
    int numCols = fields.size();
    for (int i = 0; i < numCols; ++i) {
        if (i != 0) {
            columnIds.append(',');
            columnNames.append(',');
            columnTypes.append(',');
        }
        columnIds.append(i);
        columnNames.append(fields.get(i).getFieldName());
        columnTypes.append(fields.get(i).getFieldObjectInspector().getTypeName());
    }
    conf.set("hive.io.file.readcolumn.ids", columnIds.toString());
    conf.set("partition_columns", "p");
    conf.set(serdeConstants.LIST_COLUMNS, columnNames.toString());
    conf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString());
    MockFileSystem fs = (MockFileSystem) warehouseDir.getFileSystem(conf);
    fs.clear();
    Properties tblProps = new Properties();
    tblProps.put("name", tableName);
    tblProps.put("serialization.lib", OrcSerde.class.getName());
    tblProps.put("columns", columnNames.toString());
    tblProps.put("columns.types", columnTypes.toString());
    TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps);
    MapWork mapWork = new MapWork();
    mapWork.setVectorMode(isVectorized);
    if (isVectorized) {
        VectorizedRowBatchCtx vectorizedRowBatchCtx = new VectorizedRowBatchCtx();
        vectorizedRowBatchCtx.init(structOI, new String[0]);
        mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
    }
    mapWork.setUseBucketizedHiveInputFormat(false);
    LinkedHashMap<Path, ArrayList<String>> aliasMap = new LinkedHashMap<>();
    ArrayList<String> aliases = new ArrayList<String>();
    aliases.add(tableName);
    LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
    for (int p = 0; p < partitions; ++p) {
        Path path = new Path(partPath[p]);
        aliasMap.put(path, aliases);
        LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
        PartitionDesc part = new PartitionDesc(tbl, partSpec);
        if (isVectorized) {
            part.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false));
        }
        partMap.put(path, part);
    }
    mapWork.setPathToAliases(aliasMap);
    mapWork.setPathToPartitionInfo(partMap);
    // write the plan out
    FileSystem localFs = FileSystem.getLocal(conf).getRaw();
    Path mapXml = new Path(workDir, "map.xml");
    localFs.delete(mapXml, true);
    FSDataOutputStream planStream = localFs.create(mapXml);
    SerializationUtilities.serializePlan(mapWork, planStream);
    conf.setBoolean(Utilities.HAS_MAP_WORK, true);
    planStream.close();
    return conf;
}
Also used : ArrayList(java.util.ArrayList) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) ExecMapper(org.apache.hadoop.hive.ql.exec.mr.ExecMapper) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)93 ArrayList (java.util.ArrayList)47 Path (org.apache.hadoop.fs.Path)34 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)29 HashMap (java.util.HashMap)26 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)26 LinkedHashMap (java.util.LinkedHashMap)23 Properties (java.util.Properties)19 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)19 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)16 JobConf (org.apache.hadoop.mapred.JobConf)15 List (java.util.List)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)14 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)11 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)11