Search in sources :

Example 36 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class TestHiveBinarySearchRecordReader method init.

private void init() throws IOException {
    conf = new JobConf();
    resetIOContext();
    rcfReader = mock(RCFileRecordReader.class);
    when(rcfReader.next((LongWritable) anyObject(), (BytesRefArrayWritable) anyObject())).thenReturn(true);
    // Since the start is 0, and the length is 100, the first call to sync should be with the value
    // 50 so return that for getPos()
    when(rcfReader.getPos()).thenReturn(50L);
    conf.setBoolean("hive.input.format.sorted", true);
    TableDesc tblDesc = Utilities.defaultTd;
    PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
    LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
    pt.put(new Path("/tmp/testfolder"), partDesc);
    MapredWork mrwork = new MapredWork();
    mrwork.getMapWork().setPathToPartitionInfo(pt);
    Utilities.setMapRedWork(conf, mrwork, new Path("/tmp/" + System.getProperty("user.name"), "hive"));
    hiveSplit = new TestHiveInputSplit();
    hbsReader = new TestHiveRecordReader(rcfReader, conf);
    hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}
Also used : Path(org.apache.hadoop.fs.Path) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) LinkedHashMap(java.util.LinkedHashMap)

Example 37 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class MapOperator method initObjectInspector.

private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, StructObjectInspector tableRowOI) throws Exception {
    PartitionDesc pd = opCtx.partDesc;
    TableDesc td = pd.getTableDesc();
    // Use table properties in case of unpartitioned tables,
    // and the union of table properties and partition properties, with partition
    // taking precedence, in the case of partitioned tables
    Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), pd.getProperties());
    Map<String, String> partSpec = pd.getPartSpec();
    opCtx.tableName = String.valueOf(overlayedProps.getProperty("name"));
    opCtx.partName = String.valueOf(partSpec);
    opCtx.deserializer = pd.getDeserializer(hconf);
    StructObjectInspector partRawRowObjectInspector;
    boolean isAcid = AcidUtils.isTablePropertyTransactional(td.getProperties());
    if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
        partRawRowObjectInspector = tableRowOI;
    } else {
        partRawRowObjectInspector = (StructObjectInspector) opCtx.deserializer.getObjectInspector();
    }
    opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI);
    // Next check if this table has partitions and if so
    // get the list of partition names as well as allocate
    // the serdes for the partition columns
    String pcols = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
    if (pcols != null && pcols.length() > 0) {
        String[] partKeys = pcols.trim().split("/");
        String pcolTypes = overlayedProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
        String[] partKeyTypes = pcolTypes.trim().split(":");
        if (partKeys.length > partKeyTypes.length) {
            throw new HiveException("Internal error : partKeys length, " + partKeys.length + " greater than partKeyTypes length, " + partKeyTypes.length);
        }
        List<String> partNames = new ArrayList<String>(partKeys.length);
        Object[] partValues = new Object[partKeys.length];
        List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>(partKeys.length);
        for (int i = 0; i < partKeys.length; i++) {
            String key = partKeys[i];
            partNames.add(key);
            ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
            // Partitions do not exist for this table
            if (partSpec == null) {
                // for partitionless table, initialize partValue to null
                partValues[i] = null;
            } else {
                partValues[i] = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi).convert(partSpec.get(key));
            }
            partObjectInspectors.add(oi);
        }
        opCtx.rowWithPart = new Object[] { null, partValues };
        opCtx.partObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(partNames, partObjectInspectors);
    }
    // In that case, it will be a Select, but the rowOI need not be amended
    if (opCtx.op instanceof TableScanOperator) {
        TableScanOperator tsOp = (TableScanOperator) opCtx.op;
        TableScanDesc tsDesc = tsOp.getConf();
        if (tsDesc != null && tsDesc.hasVirtualCols()) {
            opCtx.vcs = tsDesc.getVirtualCols();
            opCtx.vcValues = new Object[opCtx.vcs.size()];
            opCtx.vcsObjectInspector = VirtualColumn.getVCSObjectInspector(opCtx.vcs);
            if (opCtx.isPartitioned()) {
                opCtx.rowWithPartAndVC = Arrays.copyOfRange(opCtx.rowWithPart, 0, 3);
            } else {
                opCtx.rowWithPartAndVC = new Object[2];
            }
        }
    }
    if (!opCtx.hasVC() && !opCtx.isPartitioned()) {
        opCtx.rowObjectInspector = tableRowOI;
        return opCtx;
    }
    List<StructObjectInspector> inspectors = new ArrayList<StructObjectInspector>();
    inspectors.add(tableRowOI);
    if (opCtx.isPartitioned()) {
        inspectors.add(opCtx.partObjectInspector);
    }
    if (opCtx.hasVC()) {
        inspectors.add(opCtx.vcsObjectInspector);
    }
    opCtx.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(inspectors);
    return opCtx;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Properties(java.util.Properties) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 38 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class MapOperator method initEmptyInputChildren.

/*
   * This is the same as the setChildren method below but for empty tables.
   * It takes care of the following:
   * 1. Create the right object inspector.
   * 2. Set up the childrenOpToOI with the object inspector.
   * So as to ensure that the initialization happens correctly.
   */
public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf) throws SerDeException, Exception {
    setChildOperators(children);
    Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
    for (Operator<?> child : children) {
        TableScanOperator tsOp = (TableScanOperator) child;
        StructObjectInspector soi = null;
        PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias());
        Configuration newConf = tableNameToConf.get(partDesc.getTableDesc().getTableName());
        Deserializer serde = partDesc.getTableDesc().getDeserializer();
        partDesc.setProperties(partDesc.getProperties());
        MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc);
        StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector();
        initObjectInspector(newConf, opCtx, tableRowOI);
        soi = opCtx.rowObjectInspector;
        child.getParentOperators().add(this);
        childrenOpToOI.put(child, soi);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 39 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class MapOperator method getConvertedOI.

// Return the mapping for table descriptor to the expected table OI
/**
   * Traverse all the partitions for a table, and get the OI for the table.
   * Note that a conversion is required if any of the partition OI is different
   * from the table OI. For eg. if the query references table T (partitions P1, P2),
   * and P1's schema is same as T, whereas P2's scheme is different from T, conversion
   * might be needed for both P1 and P2, since SettableOI might be needed for T
   */
private Map<TableDesc, StructObjectInspector> getConvertedOI(Map<String, Configuration> tableToConf) throws HiveException {
    Map<TableDesc, StructObjectInspector> tableDescOI = new HashMap<TableDesc, StructObjectInspector>();
    Set<TableDesc> identityConverterTableDesc = new HashSet<TableDesc>();
    try {
        Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();
        for (Path onefile : conf.getPathToAliases().keySet()) {
            PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile);
            TableDesc tableDesc = pd.getTableDesc();
            Configuration hconf = tableToConf.get(tableDesc.getTableName());
            Deserializer partDeserializer = pd.getDeserializer(hconf);
            StructObjectInspector partRawRowObjectInspector;
            boolean isAcid = AcidUtils.isTablePropertyTransactional(tableDesc.getProperties());
            if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) {
                Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
                partRawRowObjectInspector = (StructObjectInspector) tblDeserializer.getObjectInspector();
            } else {
                partRawRowObjectInspector = (StructObjectInspector) partDeserializer.getObjectInspector();
            }
            StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc);
            if ((tblRawRowObjectInspector == null) || (identityConverterTableDesc.contains(tableDesc))) {
                Deserializer tblDeserializer = tableDesc.getDeserializer(hconf);
                tblRawRowObjectInspector = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(partRawRowObjectInspector, tblDeserializer.getObjectInspector(), oiSettableProperties);
                if (identityConverterTableDesc.contains(tableDesc)) {
                    if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
                        identityConverterTableDesc.remove(tableDesc);
                    }
                } else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) {
                    identityConverterTableDesc.add(tableDesc);
                }
                tableDescOI.put(tableDesc, tblRawRowObjectInspector);
            }
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    return tableDescOI;
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HashSet(java.util.HashSet)

Example 40 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class ProjectionPusher method pushProjectionsAndFilters.

public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException {
    // TODO: refactor this in HIVE-6366
    updateMrWork(jobConf);
    final JobConf cloneJobConf = new JobConf(jobConf);
    final PartitionDesc part = pathToPartitionInfo.get(path);
    if ((part != null) && (part.getTableDesc() != null)) {
        Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
    }
    pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().getPath());
    return cloneJobConf;
}
Also used : PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)58 Path (org.apache.hadoop.fs.Path)47 ArrayList (java.util.ArrayList)31 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)27 LinkedHashMap (java.util.LinkedHashMap)19 HashMap (java.util.HashMap)14 Map (java.util.Map)13 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)13 JobConf (org.apache.hadoop.mapred.JobConf)13 IOException (java.io.IOException)11 Properties (java.util.Properties)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)10 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)10 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Configuration (org.apache.hadoop.conf.Configuration)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)7 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)7