Search in sources :

Example 56 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class LlapInputFormat method createFakeVrbCtx.

static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveException {
    // This is based on Vectorizer code, minus the validation.
    // Add all non-virtual columns from the TableScan operator.
    RowSchema rowSchema = findTsOp(mapWork).getSchema();
    final List<String> colNames = new ArrayList<String>(rowSchema.getSignature().size());
    final List<TypeInfo> colTypes = new ArrayList<TypeInfo>(rowSchema.getSignature().size());
    for (ColumnInfo c : rowSchema.getSignature()) {
        String columnName = c.getInternalName();
        if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(columnName))
            continue;
        colNames.add(columnName);
        colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName()));
    }
    // Determine the partition columns using the first partition descriptor.
    // Note - like vectorizer, this assumes partition columns go after data columns.
    int partitionColumnCount = 0;
    Iterator<Path> paths = mapWork.getPathToAliases().keySet().iterator();
    if (paths.hasNext()) {
        PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next());
        if (partDesc != null) {
            LinkedHashMap<String, String> partSpec = partDesc.getPartSpec();
            if (partSpec != null && partSpec.isEmpty()) {
                partitionColumnCount = partSpec.size();
            }
        }
    }
    return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, new String[0]);
}
Also used : Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc)

Example 57 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project hive by apache.

the class SparkDynamicPartitionPruner method applyFilterToPartitions.

private void applyFilterToPartitions(MapWork work, ObjectInspectorConverters.Converter converter, ExprNodeEvaluator eval, String columnName, Set<Object> values) throws HiveException {
    Object[] row = new Object[1];
    Iterator<Path> it = work.getPathToPartitionInfo().keySet().iterator();
    while (it.hasNext()) {
        Path p = it.next();
        PartitionDesc desc = work.getPathToPartitionInfo().get(p);
        Map<String, String> spec = desc.getPartSpec();
        if (spec == null) {
            throw new AssertionException("No partition spec found in dynamic pruning");
        }
        String partValueString = spec.get(columnName);
        if (partValueString == null) {
            throw new AssertionException("Could not find partition value for column: " + columnName);
        }
        Object partValue = converter.convert(partValueString);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Converted partition value: " + partValue + " original (" + partValueString + ")");
        }
        row[0] = partValue;
        partValue = eval.evaluate(row);
        if (LOG.isDebugEnabled()) {
            LOG.debug("part key expr applied: " + partValue);
        }
        if (!values.contains(partValue)) {
            LOG.info("Pruning path: " + p);
            it.remove();
            work.removePathToAlias(p);
            // HIVE-12244 call currently ineffective
            work.getPartitionDescs().remove(desc);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AssertionException(javolution.testing.AssertionException) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc)

Example 58 with PartitionDesc

use of org.apache.hadoop.hive.ql.plan.PartitionDesc in project haivvreo by jghoman.

the class AvroSerDe method determineCorrectProperties.

// Hive passes different properties in at different times.  If we're in a MR job,
// we'll get properties for the partition rather than the table, which will give
// us old values for the schema (if it's evolved).  Therefore, in an MR job
// we need to extract the table properties.
// Also, in join queries, multiple properties will be included, so we need
// to extract out the one appropriate to the table we're serde'ing.
private Properties determineCorrectProperties(Configuration configuration, Properties properties) {
    if ((configuration instanceof JobConf) && HaivvreoUtils.insideMRJob((JobConf) configuration)) {
        LOG.info("In MR job, extracting table-level properties");
        MapWork mapWork = Utilities.getMapWork(configuration);
        LinkedHashMap<String, PartitionDesc> a = mapWork.getAliasToPartnInfo();
        if (a.size() == 1) {
            LOG.info("Only one PartitionDesc found.  Returning that Properties");
            PartitionDesc p = a.values().iterator().next();
            TableDesc tableDesc = p.getTableDesc();
            return tableDesc.getProperties();
        } else {
            String tableName = properties.getProperty("name");
            LOG.info("Multiple PartitionDescs.  Return properties for " + tableName);
            for (Map.Entry<String, PartitionDesc> partitionDescs : a.entrySet()) {
                Properties p = partitionDescs.getValue().getTableDesc().getProperties();
                if (p.get("name").equals(tableName)) {
                    // We've found the matching table partition
                    LOG.info("Matched table name against " + partitionDescs.getKey() + ", return its properties");
                    return p;
                }
            }
            // Didn't find anything in partitions to match on.  WARN, at least.
            LOG.warn("Couldn't find any matching properties for table: " + tableName + ". Returning original properties");
        }
    }
    return properties;
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Properties(java.util.Properties) JobConf(org.apache.hadoop.mapred.JobConf) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)58 Path (org.apache.hadoop.fs.Path)47 ArrayList (java.util.ArrayList)31 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)27 LinkedHashMap (java.util.LinkedHashMap)19 HashMap (java.util.HashMap)14 Map (java.util.Map)13 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)13 JobConf (org.apache.hadoop.mapred.JobConf)13 IOException (java.io.IOException)11 Properties (java.util.Properties)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)10 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)10 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Configuration (org.apache.hadoop.conf.Configuration)7 FileSystem (org.apache.hadoop.fs.FileSystem)7 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)7 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)7