Search in sources :

Example 1 with SortCol

use of org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol in project hive by apache.

the class MoveTask method updatePartitionBucketSortColumns.

/**
   * Alters the bucketing and/or sorting columns of the partition provided they meet some
   * validation criteria, e.g. the number of buckets match the number of files, and the
   * columns are not partition columns
   * @param table
   * @param partn
   * @param bucketCols
   * @param numBuckets
   * @param sortCols
   * @throws IOException
   * @throws InvalidOperationException
   * @throws HiveException
   */
private void updatePartitionBucketSortColumns(Hive db, Table table, Partition partn, List<BucketCol> bucketCols, int numBuckets, List<SortCol> sortCols) throws IOException, InvalidOperationException, HiveException {
    boolean updateBucketCols = false;
    if (bucketCols != null) {
        FileSystem fileSys = partn.getDataLocation().getFileSystem(conf);
        FileStatus[] fileStatus = HiveStatsUtils.getFileStatusRecurse(partn.getDataLocation(), 1, fileSys);
        // each bucket.
        if (fileStatus.length == numBuckets) {
            List<String> newBucketCols = new ArrayList<String>();
            updateBucketCols = true;
            for (BucketCol bucketCol : bucketCols) {
                if (bucketCol.getIndexes().get(0) < partn.getCols().size()) {
                    newBucketCols.add(partn.getCols().get(bucketCol.getIndexes().get(0)).getName());
                } else {
                    // If the table is bucketed on a partition column, not valid for bucketing
                    updateBucketCols = false;
                    break;
                }
            }
            if (updateBucketCols) {
                partn.getBucketCols().clear();
                partn.getBucketCols().addAll(newBucketCols);
                partn.getTPartition().getSd().setNumBuckets(numBuckets);
            }
        }
    }
    boolean updateSortCols = false;
    if (sortCols != null) {
        List<Order> newSortCols = new ArrayList<Order>();
        updateSortCols = true;
        for (SortCol sortCol : sortCols) {
            if (sortCol.getIndexes().get(0) < partn.getCols().size()) {
                newSortCols.add(new Order(partn.getCols().get(sortCol.getIndexes().get(0)).getName(), sortCol.getSortOrder() == '+' ? BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC : BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC));
            } else {
                // If the table is sorted on a partition column, not valid for sorting
                updateSortCols = false;
                break;
            }
        }
        if (updateSortCols) {
            partn.getSortCols().clear();
            partn.getSortCols().addAll(newSortCols);
        }
    }
    if (updateBucketCols || updateSortCols) {
        db.alterPartition(table.getDbName(), table.getTableName(), partn, null);
    }
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) FileStatus(org.apache.hadoop.fs.FileStatus) BucketCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol)

Example 2 with SortCol

use of org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol in project hive by apache.

the class BucketingSortingOpProcFactory method findBucketingSortingColumns.

/**
   * For each expression, check if it represents a column known to be bucketed/sorted.
   *
   * The methods setBucketingColsIfComplete and setSortingColsIfComplete should be used to assign
   * the values of newBucketCols and newSortCols as the bucketing/sorting columns of this operator
   * because these arrays may contain nulls indicating that the output of this operator is not
   * bucketed/sorted.
   *
   * @param exprs - list of expression
   * @param colInfos - list of column infos
   * @param bucketCols - list of bucketed columns from the input
   * @param sortCols - list of sorted columns from the input
   * @param newBucketCols - an array of bucket columns which should be the same length as
   *    bucketCols, updated such that the bucketed column(s) at index i in bucketCols became
   *    the bucketed column(s) at index i of newBucketCols in the output
   * @param newSortCols - an array of sort columns which should be the same length as
   *    sortCols, updated such that the sorted column(s) at index i in sortCols became
   *    the sorted column(s) at index i of sortCols in the output
   * @param colInfosOffset - the expressions are known to be represented by column infos
   *    beginning at this index
   */
private static void findBucketingSortingColumns(List<ExprNodeDesc> exprs, List<ColumnInfo> colInfos, List<BucketCol> bucketCols, List<SortCol> sortCols, BucketCol[] newBucketCols, SortCol[] newSortCols, int colInfosOffset) {
    for (int cnt = 0; cnt < exprs.size(); cnt++) {
        ExprNodeDesc expr = exprs.get(cnt);
        // voids any assumptions
        if (!(expr instanceof ExprNodeColumnDesc)) {
            continue;
        }
        ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) expr;
        int colInfosIndex = cnt + colInfosOffset;
        if (newBucketCols != null) {
            int bucketIndex = indexOfColName(bucketCols, columnExpr.getColumn());
            if (bucketIndex != -1) {
                if (newBucketCols[bucketIndex] == null) {
                    newBucketCols[bucketIndex] = new BucketCol();
                }
                newBucketCols[bucketIndex].addAlias(colInfos.get(colInfosIndex).getInternalName(), colInfosIndex);
            }
        }
        if (newSortCols != null) {
            int sortIndex = indexOfColName(sortCols, columnExpr.getColumn());
            if (sortIndex != -1) {
                if (newSortCols[sortIndex] == null) {
                    newSortCols[sortIndex] = new SortCol(sortCols.get(sortIndex).getSortOrder());
                }
                newSortCols[sortIndex].addAlias(colInfos.get(colInfosIndex).getInternalName(), colInfosIndex);
            }
        }
    }
}
Also used : BucketCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol) BucketSortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 3 with SortCol

use of org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol in project hive by apache.

the class BucketingSortingOpProcFactory method extractSortCols.

static List<SortCol> extractSortCols(ReduceSinkOperator rop, List<ExprNodeDesc> outputValues) {
    String sortOrder = rop.getConf().getOrder();
    List<SortCol> sortCols = new ArrayList<SortCol>();
    ArrayList<ExprNodeDesc> keyCols = rop.getConf().getKeyCols();
    for (int i = 0; i < keyCols.size(); i++) {
        ExprNodeDesc keyCol = keyCols.get(i);
        if (!(keyCol instanceof ExprNodeColumnDesc)) {
            break;
        }
        int index = ExprNodeDescUtils.indexOf(keyCol, outputValues);
        if (index < 0) {
            break;
        }
        sortCols.add(new SortCol(((ExprNodeColumnDesc) keyCol).getColumn(), index, sortOrder.charAt(i)));
    }
    // the columns seen up until now
    return sortCols;
}
Also used : ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol) BucketSortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 4 with SortCol

use of org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol in project hive by apache.

the class BucketingSortingOpProcFactory method extractTraits.

static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop, Operator<?> childop) throws SemanticException {
    List<ExprNodeDesc> outputValues = Collections.emptyList();
    if (childop instanceof SelectOperator) {
        SelectDesc select = ((SelectOperator) childop).getConf();
        outputValues = ExprNodeDescUtils.backtrack(select.getColList(), childop, rop);
    }
    if (outputValues.isEmpty()) {
        return;
    }
    // Go through the set of partition columns, and find their representatives in the values
    // These represent the bucketed columns
    List<BucketCol> bucketCols = extractBucketCols(rop, outputValues);
    // Go through the set of key columns, and find their representatives in the values
    // These represent the sorted columns
    List<SortCol> sortCols = extractSortCols(rop, outputValues);
    List<ColumnInfo> colInfos = childop.getSchema().getSignature();
    if (!bucketCols.isEmpty()) {
        List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
        bctx.setBucketedCols(childop, newBucketCols);
    }
    if (!sortCols.isEmpty()) {
        List<SortCol> newSortCols = getNewSortCols(sortCols, colInfos);
        bctx.setSortedCols(childop, newSortCols);
    }
}
Also used : SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) BucketCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol) BucketSortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 5 with SortCol

use of org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol in project hive by apache.

the class MoveTask method execute.

@Override
public int execute(DriverContext driverContext) {
    try {
        if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) {
            return 0;
        }
        Hive db = getHive();
        // Do any hive related operations like moving tables and files
        // to appropriate locations
        LoadFileDesc lfd = work.getLoadFileWork();
        if (lfd != null) {
            Path targetPath = lfd.getTargetDir();
            Path sourcePath = lfd.getSourcePath();
            moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
        }
        // Multi-file load is for dynamic partitions when some partitions do not
        // need to merge and they can simply be moved to the target directory.
        LoadMultiFilesDesc lmfd = work.getLoadMultiFilesWork();
        if (lmfd != null) {
            boolean isDfsDir = lmfd.getIsDfsDir();
            int i = 0;
            while (i < lmfd.getSourceDirs().size()) {
                Path srcPath = lmfd.getSourceDirs().get(i);
                Path destPath = lmfd.getTargetDirs().get(i);
                FileSystem fs = destPath.getFileSystem(conf);
                if (!fs.exists(destPath.getParent())) {
                    fs.mkdirs(destPath.getParent());
                }
                moveFile(srcPath, destPath, isDfsDir);
                i++;
            }
        }
        // Next we do this for tables and partitions
        LoadTableDesc tbd = work.getLoadTableWork();
        if (tbd != null) {
            StringBuilder mesg = new StringBuilder("Loading data to table ").append(tbd.getTable().getTableName());
            if (tbd.getPartitionSpec().size() > 0) {
                mesg.append(" partition (");
                Map<String, String> partSpec = tbd.getPartitionSpec();
                for (String key : partSpec.keySet()) {
                    mesg.append(key).append('=').append(partSpec.get(key)).append(", ");
                }
                mesg.setLength(mesg.length() - 2);
                mesg.append(')');
            }
            String mesg_detail = " from " + tbd.getSourcePath();
            console.printInfo(mesg.toString(), mesg_detail);
            Table table = db.getTable(tbd.getTable().getTableName());
            if (work.getCheckFileFormat()) {
                // Get all files from the src directory
                FileStatus[] dirs;
                ArrayList<FileStatus> files;
                // source filesystem
                FileSystem srcFs;
                try {
                    srcFs = tbd.getSourcePath().getFileSystem(conf);
                    dirs = srcFs.globStatus(tbd.getSourcePath());
                    files = new ArrayList<FileStatus>();
                    for (int i = 0; (dirs != null && i < dirs.length); i++) {
                        files.addAll(Arrays.asList(srcFs.listStatus(dirs[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER)));
                        // one.
                        if (files.size() > 0) {
                            break;
                        }
                    }
                } catch (IOException e) {
                    throw new HiveException("addFiles: filesystem error in check phase", e);
                }
                // handle file format check for table level
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
                    boolean flag = true;
                    // dynamic partition context is null
                    if (tbd.getDPCtx() == null) {
                        if (tbd.getPartitionSpec() == null || tbd.getPartitionSpec().isEmpty()) {
                            // Check if the file format of the file matches that of the table.
                            flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, tbd.getTable().getInputFileFormatClass(), files);
                        } else {
                            // Check if the file format of the file matches that of the partition
                            Partition oldPart = db.getPartition(table, tbd.getPartitionSpec(), false);
                            if (oldPart == null) {
                                // this means we have just created a table and are specifying partition in the
                                // load statement (without pre-creating the partition), in which case lets use
                                // table input format class. inheritTableSpecs defaults to true so when a new
                                // partition is created later it will automatically inherit input format
                                // from table object
                                flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, tbd.getTable().getInputFileFormatClass(), files);
                            } else {
                                flag = HiveFileFormatUtils.checkInputFormat(srcFs, conf, oldPart.getInputFormatClass(), files);
                            }
                        }
                        if (!flag) {
                            throw new HiveException("Wrong file format. Please check the file's format.");
                        }
                    } else {
                        LOG.warn("Skipping file format check as dpCtx is not null");
                    }
                }
            }
            // Create a data container
            DataContainer dc = null;
            if (tbd.getPartitionSpec().size() == 0) {
                dc = new DataContainer(table.getTTable());
                db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask());
                if (work.getOutputs() != null) {
                    DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
                }
            } else {
                LOG.info("Partition is: " + tbd.getPartitionSpec().toString());
                // Check if the bucketing and/or sorting columns were inferred
                List<BucketCol> bucketCols = null;
                List<SortCol> sortCols = null;
                int numBuckets = -1;
                Task task = this;
                String path = tbd.getSourcePath().toUri().toString();
                // (Either standard, local, or a merge)
                while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
                    task = (Task) task.getParentTasks().get(0);
                    // If it was a merge task or a local map reduce task, nothing can be inferred
                    if (task instanceof MergeFileTask || task instanceof MapredLocalTask) {
                        break;
                    }
                    // the directory this move task is moving
                    if (task instanceof MapRedTask) {
                        MapredWork work = (MapredWork) task.getWork();
                        MapWork mapWork = work.getMapWork();
                        bucketCols = mapWork.getBucketedColsByDirectory().get(path);
                        sortCols = mapWork.getSortedColsByDirectory().get(path);
                        if (work.getReduceWork() != null) {
                            numBuckets = work.getReduceWork().getNumReduceTasks();
                        }
                        if (bucketCols != null || sortCols != null) {
                            // operator that writes the final output)
                            assert work.isFinalMapRed();
                        }
                        break;
                    }
                    // condition for merging is not met, see GenMRFileSink1.
                    if (task instanceof MoveTask) {
                        if (((MoveTask) task).getWork().getLoadFileWork() != null) {
                            path = ((MoveTask) task).getWork().getLoadFileWork().getSourcePath().toUri().toString();
                        }
                    }
                }
                // deal with dynamic partitions
                DynamicPartitionCtx dpCtx = tbd.getDPCtx();
                if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
                    // dynamic partitions
                    List<LinkedHashMap<String, String>> dps = Utilities.getFullDPSpecs(conf, dpCtx);
                    console.printInfo(System.getProperty("line.separator"));
                    long startTime = System.currentTimeMillis();
                    // load the list of DP partitions and return the list of partition specs
                    // TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions
                    // to use Utilities.getFullDPSpecs() to get the list of full partSpecs.
                    // After that check the number of DPs created to not exceed the limit and
                    // iterate over it and call loadPartition() here.
                    // The reason we don't do inside HIVE-1361 is the latter is large and we
                    // want to isolate any potential issue it may introduce.
                    Map<Map<String, String>, Partition> dp = db.loadDynamicPartitions(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), dpCtx.getNumDPCols(), isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), work.getLoadTableWork().getWriteType());
                    // publish DP columns to its subscribers
                    if (dps != null && dps.size() > 0) {
                        pushFeed(FeedType.DYNAMIC_PARTITIONS, dp.values());
                    }
                    String loadTime = "\t Time taken to load dynamic partitions: " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds";
                    console.printInfo(loadTime);
                    LOG.info(loadTime);
                    if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) {
                        throw new HiveException("This query creates no partitions." + " To turn off this error, set hive.error.on.empty.partition=false.");
                    }
                    startTime = System.currentTimeMillis();
                    // and put it to WriteEntity for post-exec hook
                    for (Map.Entry<Map<String, String>, Partition> entry : dp.entrySet()) {
                        Partition partn = entry.getValue();
                        if (bucketCols != null || sortCols != null) {
                            updatePartitionBucketSortColumns(db, table, partn, bucketCols, numBuckets, sortCols);
                        }
                        WriteEntity enty = new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType()));
                        if (work.getOutputs() != null) {
                            DDLTask.addIfAbsentByName(enty, work.getOutputs());
                        }
                        // queryPlan here.
                        if (queryPlan.getOutputs() == null) {
                            queryPlan.setOutputs(new LinkedHashSet<WriteEntity>());
                        }
                        queryPlan.getOutputs().add(enty);
                        // update columnar lineage for each partition
                        dc = new DataContainer(table.getTTable(), partn.getTPartition());
                        // Don't set lineage on delete as we don't have all the columns
                        if (SessionState.get() != null && work.getLoadTableWork().getWriteType() != AcidUtils.Operation.DELETE && work.getLoadTableWork().getWriteType() != AcidUtils.Operation.UPDATE) {
                            SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, table.getCols());
                        }
                        LOG.info("\tLoading partition " + entry.getKey());
                    }
                    console.printInfo("\t Time taken for adding to write entity : " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
                    // reset data container to prevent it being added again.
                    dc = null;
                } else {
                    // static partitions
                    List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec());
                    db.validatePartitionNameCharacters(partVals);
                    db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask());
                    Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
                    if (bucketCols != null || sortCols != null) {
                        updatePartitionBucketSortColumns(db, table, partn, bucketCols, numBuckets, sortCols);
                    }
                    dc = new DataContainer(table.getTTable(), partn.getTPartition());
                    // add this partition to post-execution hook
                    if (work.getOutputs() != null) {
                        DDLTask.addIfAbsentByName(new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
                    }
                }
            }
            if (SessionState.get() != null && dc != null) {
                // If we are doing an update or a delete the number of columns in the table will not
                // match the number of columns in the file sink.  For update there will be one too many
                // (because of the ROW__ID), and in the case of the delete there will be just the
                // ROW__ID, which we don't need to worry about from a lineage perspective.
                List<FieldSchema> tableCols = null;
                switch(work.getLoadTableWork().getWriteType()) {
                    case DELETE:
                    case UPDATE:
                        // Pass an empty list as no columns will be written to the file.
                        // TODO I should be able to make this work for update
                        tableCols = new ArrayList<FieldSchema>();
                        break;
                    default:
                        tableCols = table.getCols();
                        break;
                }
                SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, tableCols);
            }
            releaseLocks(tbd);
        }
        return 0;
    } catch (Exception e) {
        console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
        setException(e);
        return (1);
    }
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) LinkedHashMap(java.util.LinkedHashMap) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DataContainer(org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) FileSystem(org.apache.hadoop.fs.FileSystem) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) BucketCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol) SortCol(org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol) IOException(java.io.IOException) LoadMultiFilesDesc(org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) Hive(org.apache.hadoop.hive.ql.metadata.Hive) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask)

Aggregations

SortCol (org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol)6 BucketCol (org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol)4 BucketSortCol (org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketSortCol)4 ArrayList (java.util.ArrayList)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 FileStatus (org.apache.hadoop.fs.FileStatus)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)2 IOException (java.io.IOException)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Path (org.apache.hadoop.fs.Path)1 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)1 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)1 Order (org.apache.hadoop.hive.metastore.api.Order)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)1 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)1 MapredLocalTask (org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask)1 DataContainer (org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer)1