Search in sources :

Example 11 with HiveStorageHandler

use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.

the class PlanUtils method configureJobPropertiesForStorageHandler.

private static void configureJobPropertiesForStorageHandler(boolean input, TableDesc tableDesc) {
    if (tableDesc == null) {
        return;
    }
    try {
        HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(Hive.get().getConf(), tableDesc.getProperties().getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE));
        if (storageHandler != null) {
            Map<String, String> jobProperties = new LinkedHashMap<String, String>();
            Map<String, String> jobSecrets = new LinkedHashMap<String, String>();
            if (input) {
                try {
                    storageHandler.configureInputJobProperties(tableDesc, jobProperties);
                } catch (AbstractMethodError e) {
                    LOG.info("configureInputJobProperties not found " + "using configureTableJobProperties", e);
                    storageHandler.configureTableJobProperties(tableDesc, jobProperties);
                }
                try {
                    storageHandler.configureInputJobCredentials(tableDesc, jobSecrets);
                } catch (AbstractMethodError e) {
                    // ignore
                    LOG.info("configureInputJobSecrets not found");
                }
            } else {
                try {
                    storageHandler.configureOutputJobProperties(tableDesc, jobProperties);
                } catch (AbstractMethodError e) {
                    LOG.info("configureOutputJobProperties not found" + "using configureTableJobProperties", e);
                    storageHandler.configureTableJobProperties(tableDesc, jobProperties);
                }
            }
            // plans.
            if (!jobProperties.isEmpty()) {
                tableDesc.setJobProperties(jobProperties);
            }
            // same idea, only set for non-native tables
            if (!jobSecrets.isEmpty()) {
                tableDesc.setJobSecrets(jobSecrets);
            }
        }
    } catch (HiveException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap)

Example 12 with HiveStorageHandler

use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.

the class Utilities method getInputSummaryWithPool.

@VisibleForTesting
static ContentSummary getInputSummaryWithPool(final Context ctx, Set<Path> pathNeedProcess, MapWork work, long[] summary, ExecutorService executor) throws IOException {
    List<Future<?>> results = new ArrayList<Future<?>>();
    final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
    HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {

        @Override
        public void interrupt() {
            for (Path path : pathNeedProcess) {
                try {
                    path.getFileSystem(ctx.getConf()).close();
                } catch (IOException ignore) {
                    LOG.debug("Failed to close filesystem", ignore);
                }
            }
            if (executor != null) {
                executor.shutdownNow();
            }
        }
    });
    try {
        Configuration conf = ctx.getConf();
        JobConf jobConf = new JobConf(conf);
        for (Path path : pathNeedProcess) {
            final Path p = path;
            final String pathStr = path.toString();
            // All threads share the same Configuration and JobConf based on the
            // assumption that they are thread safe if only read operations are
            // executed. It is not stated in Hadoop's javadoc, the sourcce codes
            // clearly showed that they made efforts for it and we believe it is
            // thread safe. Will revisit this piece of codes if we find the assumption
            // is not correct.
            final Configuration myConf = conf;
            final JobConf myJobConf = jobConf;
            final Map<String, Operator<?>> aliasToWork = work.getAliasToWork();
            final Map<Path, ArrayList<String>> pathToAlias = work.getPathToAliases();
            final PartitionDesc partDesc = work.getPathToPartitionInfo().get(p);
            Runnable r = new Runnable() {

                @Override
                public void run() {
                    try {
                        Class<? extends InputFormat> inputFormatCls = partDesc.getInputFileFormatClass();
                        InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(inputFormatCls, myJobConf);
                        if (inputFormatObj instanceof ContentSummaryInputFormat) {
                            ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                            resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                            return;
                        }
                        String metaTableStorage = null;
                        if (partDesc.getTableDesc() != null && partDesc.getTableDesc().getProperties() != null) {
                            metaTableStorage = partDesc.getTableDesc().getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, null);
                        }
                        if (partDesc.getProperties() != null) {
                            metaTableStorage = partDesc.getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, metaTableStorage);
                        }
                        HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf, metaTableStorage);
                        if (handler instanceof InputEstimator) {
                            long total = 0;
                            TableDesc tableDesc = partDesc.getTableDesc();
                            InputEstimator estimator = (InputEstimator) handler;
                            for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                                JobConf jobConf = new JobConf(myJobConf);
                                TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                                Utilities.setColumnNameList(jobConf, scanOp, true);
                                Utilities.setColumnTypeList(jobConf, scanOp, true);
                                PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                                Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                                total += estimator.estimate(jobConf, scanOp, -1).getTotalLength();
                            }
                            resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                        } else {
                            // todo: should nullify summary for non-native tables,
                            // not to be selected as a mapjoin target
                            FileSystem fs = p.getFileSystem(myConf);
                            resultMap.put(pathStr, fs.getContentSummary(p));
                        }
                    } catch (Exception e) {
                        // We safely ignore this exception for summary data.
                        // We don't update the cache to protect it from polluting other
                        // usages. The worst case is that IOException will always be
                        // retried for another getInputSummary(), which is fine as
                        // IOException is not considered as a common case.
                        LOG.info("Cannot get size of {}. Safely ignored.", pathStr);
                    }
                }
            };
            if (executor == null) {
                r.run();
            } else {
                Future<?> result = executor.submit(r);
                results.add(result);
            }
        }
        if (executor != null) {
            for (Future<?> result : results) {
                boolean executorDone = false;
                do {
                    try {
                        result.get();
                        executorDone = true;
                    } catch (InterruptedException e) {
                        LOG.info("Interrupted when waiting threads: ", e);
                        Thread.currentThread().interrupt();
                        break;
                    } catch (ExecutionException e) {
                        throw new IOException(e);
                    }
                } while (!executorDone);
            }
            executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
            ContentSummary cs = entry.getValue();
            summary[0] += cs.getLength();
            summary[1] += cs.getFileCount();
            summary[2] += cs.getDirectoryCount();
            ctx.addCS(entry.getKey(), cs);
            if (LOG.isInfoEnabled()) {
                LOG.info("Cache Content Summary for {} length: {} file count: {} " + " directory count: {}", entry.getKey(), cs.getLength(), cs.getFileCount(), cs.getDirectoryCount());
            }
        }
        return new ContentSummary(summary[0], summary[1], summary[2]);
    } finally {
        if (executor != null) {
            executor.shutdownNow();
        }
        HiveInterruptUtils.remove(interrup);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) ContentSummaryInputFormat(org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ExecutionException(java.util.concurrent.ExecutionException) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) InputEstimator(org.apache.hadoop.hive.ql.metadata.InputEstimator) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) HiveInterruptCallback(org.apache.hadoop.hive.common.HiveInterruptCallback) IOException(java.io.IOException) SQLFeatureNotSupportedException(java.sql.SQLFeatureNotSupportedException) SQLTransientException(java.sql.SQLTransientException) SQLException(java.sql.SQLException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) ReworkMapredInputFormat(org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat) ContentSummaryInputFormat(org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) OneNullRowInputFormat(org.apache.hadoop.hive.ql.io.OneNullRowInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ContentSummary(org.apache.hadoop.fs.ContentSummary) Future(java.util.concurrent.Future) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 13 with HiveStorageHandler

use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.

the class HCatBaseOutputFormat method configureOutputStorageHandler.

/**
 * Configure the output storage handler with allowing specification of missing dynamic partvals
 * @param jobContext the job context
 * @param dynamicPartVals
 * @throws IOException
 */
@SuppressWarnings("unchecked")
static void configureOutputStorageHandler(JobContext jobContext, List<String> dynamicPartVals) throws IOException {
    Configuration conf = jobContext.getConfiguration();
    try {
        OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO));
        HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(jobContext.getConfiguration(), jobInfo.getTableInfo().getStorerInfo());
        Map<String, String> partitionValues = jobInfo.getPartitionValues();
        String location = jobInfo.getLocation();
        if (dynamicPartVals != null) {
            // dynamic part vals specified
            List<String> dynamicPartKeys = jobInfo.getDynamicPartitioningKeys();
            if (dynamicPartVals.size() != dynamicPartKeys.size()) {
                throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Unable to configure dynamic partitioning for storage handler, mismatch between" + " number of partition values obtained[" + dynamicPartVals.size() + "] and number of partition values required[" + dynamicPartKeys.size() + "]");
            }
            for (int i = 0; i < dynamicPartKeys.size(); i++) {
                partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i));
            }
            // // re-home location, now that we know the rest of the partvals
            // Table table = jobInfo.getTableInfo().getTable();
            // 
            // List<String> partitionCols = new ArrayList<String>();
            // for(FieldSchema schema : table.getPartitionKeys()) {
            // partitionCols.add(schema.getName());
            // }
            jobInfo.setPartitionValues(partitionValues);
        }
        HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo);
    } catch (Exception e) {
        if (e instanceof HCatException) {
            throw (HCatException) e;
        } else {
            throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e);
        }
    }
}
Also used : HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Configuration(org.apache.hadoop.conf.Configuration) HCatException(org.apache.hive.hcatalog.common.HCatException) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException)

Example 14 with HiveStorageHandler

use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.

the class CreateTableDesc method toTable.

public Table toTable(HiveConf conf) throws HiveException {
    String databaseName = getDatabaseName();
    String tableName = getTableName();
    if (databaseName == null || tableName.contains(".")) {
        String[] names = Utilities.getDbTableName(tableName);
        databaseName = names[0];
        tableName = names[1];
    }
    Table tbl = new Table(databaseName, tableName);
    if (getTblProps() != null) {
        tbl.getTTable().getParameters().putAll(getTblProps());
    }
    if (getPartCols() != null) {
        tbl.setPartCols(getPartCols());
    }
    if (getNumBuckets() != -1) {
        tbl.setNumBuckets(getNumBuckets());
    }
    if (getStorageHandler() != null) {
        tbl.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, getStorageHandler());
    }
    HiveStorageHandler storageHandler = tbl.getStorageHandler();
    /*
     * If the user didn't specify a SerDe, we use the default.
     */
    String serDeClassName;
    if (getSerName() == null) {
        if (storageHandler == null) {
            serDeClassName = PlanUtils.getDefaultSerDe().getName();
            LOG.info("Default to " + serDeClassName + " for table " + tableName);
        } else {
            serDeClassName = storageHandler.getSerDeClass().getName();
            LOG.info("Use StorageHandler-supplied " + serDeClassName + " for table " + tableName);
        }
    } else {
        // let's validate that the serde exists
        serDeClassName = getSerName();
        DDLTask.validateSerDe(serDeClassName, conf);
    }
    tbl.setSerializationLib(serDeClassName);
    if (getFieldDelim() != null) {
        tbl.setSerdeParam(serdeConstants.FIELD_DELIM, getFieldDelim());
        tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, getFieldDelim());
    }
    if (getFieldEscape() != null) {
        tbl.setSerdeParam(serdeConstants.ESCAPE_CHAR, getFieldEscape());
    }
    if (getCollItemDelim() != null) {
        tbl.setSerdeParam(serdeConstants.COLLECTION_DELIM, getCollItemDelim());
    }
    if (getMapKeyDelim() != null) {
        tbl.setSerdeParam(serdeConstants.MAPKEY_DELIM, getMapKeyDelim());
    }
    if (getLineDelim() != null) {
        tbl.setSerdeParam(serdeConstants.LINE_DELIM, getLineDelim());
    }
    if (getNullFormat() != null) {
        tbl.setSerdeParam(serdeConstants.SERIALIZATION_NULL_FORMAT, getNullFormat());
    }
    if (getSerdeProps() != null) {
        Iterator<Map.Entry<String, String>> iter = getSerdeProps().entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<String, String> m = iter.next();
            tbl.setSerdeParam(m.getKey(), m.getValue());
        }
    }
    if (getCols() != null) {
        tbl.setFields(getCols());
    }
    if (getBucketCols() != null) {
        tbl.setBucketCols(getBucketCols());
    }
    if (getSortCols() != null) {
        tbl.setSortCols(getSortCols());
    }
    if (getComment() != null) {
        tbl.setProperty("comment", getComment());
    }
    if (getLocation() != null) {
        tbl.setDataLocation(new Path(getLocation()));
    }
    if (getSkewedColNames() != null) {
        tbl.setSkewedColNames(getSkewedColNames());
    }
    if (getSkewedColValues() != null) {
        tbl.setSkewedColValues(getSkewedColValues());
    }
    tbl.getTTable().setTemporary(isTemporary());
    tbl.setStoredAsSubDirectories(isStoredAsSubDirectories());
    tbl.setInputFormatClass(getInputFormat());
    tbl.setOutputFormatClass(getOutputFormat());
    // Otherwise, load lazily via StorageHandler at query time.
    if (getInputFormat() != null && !getInputFormat().isEmpty()) {
        tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
    }
    if (getOutputFormat() != null && !getOutputFormat().isEmpty()) {
        tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
    }
    if (DDLTask.doesTableNeedLocation(tbl)) {
        // If location is specified - ensure that it is a full qualified name
        DDLTask.makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tableName, conf);
    }
    if (isExternal()) {
        tbl.setProperty("EXTERNAL", "TRUE");
        tbl.setTableType(TableType.EXTERNAL_TABLE);
    }
    // 'n' columns where 'n' is the length of the bucketed columns.
    if ((tbl.getBucketCols() != null) && (tbl.getSortCols() != null)) {
        List<String> bucketCols = tbl.getBucketCols();
        List<Order> sortCols = tbl.getSortCols();
        if ((sortCols.size() > 0) && (sortCols.size() >= bucketCols.size())) {
            boolean found = true;
            Iterator<String> iterBucketCols = bucketCols.iterator();
            while (iterBucketCols.hasNext()) {
                String bucketCol = iterBucketCols.next();
                boolean colFound = false;
                for (int i = 0; i < bucketCols.size(); i++) {
                    if (bucketCol.equals(sortCols.get(i).getCol())) {
                        colFound = true;
                        break;
                    }
                }
                if (colFound == false) {
                    found = false;
                    break;
                }
            }
            if (found) {
                tbl.setProperty("SORTBUCKETCOLSPREFIX", "TRUE");
            }
        }
    }
    if (!this.isCTAS && (tbl.getPath() == null || (tbl.isEmpty() && !isExternal()))) {
        if (!tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
            StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE);
        }
    } else {
        StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), null, StatsSetupConst.FALSE);
    }
    return tbl;
}
Also used : Path(org.apache.hadoop.fs.Path) Order(org.apache.hadoop.hive.metastore.api.Order) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) Map(java.util.Map)

Example 15 with HiveStorageHandler

use of org.apache.hadoop.hive.ql.metadata.HiveStorageHandler in project hive by apache.

the class CreateViewDesc method toTable.

public Table toTable(HiveConf conf) throws HiveException {
    String[] names = Utilities.getDbTableName(getViewName());
    String databaseName = names[0];
    String tableName = names[1];
    Table tbl = new Table(databaseName, tableName);
    tbl.setViewOriginalText(getViewOriginalText());
    tbl.setViewExpandedText(getViewExpandedText());
    if (isMaterialized()) {
        tbl.setRewriteEnabled(isRewriteEnabled());
        tbl.setTableType(TableType.MATERIALIZED_VIEW);
    } else {
        tbl.setTableType(TableType.VIRTUAL_VIEW);
    }
    tbl.setSerializationLib(null);
    tbl.clearSerDeInfo();
    tbl.setFields(getSchema());
    if (getComment() != null) {
        tbl.setProperty("comment", getComment());
    }
    if (getTblProps() != null) {
        tbl.getTTable().getParameters().putAll(getTblProps());
    }
    if (getPartCols() != null) {
        tbl.setPartCols(getPartCols());
    }
    if (getInputFormat() != null) {
        tbl.setInputFormatClass(getInputFormat());
    }
    if (getOutputFormat() != null) {
        tbl.setOutputFormatClass(getOutputFormat());
    }
    if (isMaterialized()) {
        if (getLocation() != null) {
            tbl.setDataLocation(new Path(getLocation()));
        }
        if (getStorageHandler() != null) {
            tbl.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, getStorageHandler());
        }
        HiveStorageHandler storageHandler = tbl.getStorageHandler();
        /*
       * If the user didn't specify a SerDe, we use the default.
       */
        String serDeClassName;
        if (getSerde() == null) {
            if (storageHandler == null) {
                serDeClassName = PlanUtils.getDefaultSerDe().getName();
                LOG.info("Default to {} for materialized view {}", serDeClassName, getViewName());
            } else {
                serDeClassName = storageHandler.getSerDeClass().getName();
                LOG.info("Use StorageHandler-supplied {} for materialized view {}", serDeClassName, getViewName());
            }
        } else {
            // let's validate that the serde exists
            serDeClassName = getSerde();
            DDLTask.validateSerDe(serDeClassName, conf);
        }
        tbl.setSerializationLib(serDeClassName);
        // To remain consistent, we need to set input and output formats both
        // at the table level and the storage handler level.
        tbl.setInputFormatClass(getInputFormat());
        tbl.setOutputFormatClass(getOutputFormat());
        if (getInputFormat() != null && !getInputFormat().isEmpty()) {
            tbl.getSd().setInputFormat(tbl.getInputFormatClass().getName());
        }
        if (getOutputFormat() != null && !getOutputFormat().isEmpty()) {
            tbl.getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
        }
    }
    return tbl;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table)

Aggregations

HiveStorageHandler (org.apache.hadoop.hive.ql.metadata.HiveStorageHandler)18 IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 Table (org.apache.hadoop.hive.ql.metadata.Table)5 JobConf (org.apache.hadoop.mapred.JobConf)5 Map (java.util.Map)4 Configuration (org.apache.hadoop.conf.Configuration)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Properties (java.util.Properties)3 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)3 HCatException (org.apache.hive.hcatalog.common.HCatException)3 LinkedHashMap (java.util.LinkedHashMap)2 InputFormat (org.apache.hadoop.mapred.InputFormat)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 EOFException (java.io.EOFException)1 FileNotFoundException (java.io.FileNotFoundException)1 SQLException (java.sql.SQLException)1 SQLFeatureNotSupportedException (java.sql.SQLFeatureNotSupportedException)1