Search in sources :

Example 11 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class FileOutputFormatContainer method handleDuplicatePublish.

/**
   * Handles duplicate publish of partition or data into an unpartitioned table
   * if the table is immutable
   *
   * For partitioned tables, fails if partition already exists.
   * For non partitioned tables, fails if files are present in table directory.
   * For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time
   * @param context the job
   * @param outputInfo the output info
   * @param client the metastore client
   * @param table the table being written to
   * @throws IOException
   * @throws org.apache.hadoop.hive.metastore.api.MetaException
   * @throws org.apache.thrift.TException
   */
private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, IMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException {
    if (!table.isImmutable()) {
        return;
    }
    if (table.getPartitionKeys().size() > 0) {
        if (!outputInfo.isDynamicPartitioningUsed()) {
            List<String> partitionValues = getPartitionValueList(table, outputInfo.getPartitionValues());
            // fully-specified partition
            List<String> currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), outputInfo.getTableName(), partitionValues, (short) 1);
            if (currentParts.size() > 0) {
                // not need to check for emptiness to decide to throw an error
                throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION);
            }
        }
    } else {
        List<String> partitionValues = getPartitionValueList(table, outputInfo.getPartitionValues());
        // non-partitioned table
        Path tablePath = new Path(table.getTTable().getSd().getLocation());
        FileSystem fs = tablePath.getFileSystem(context.getConfiguration());
        if (!MetaStoreUtils.isDirEmpty(fs, tablePath)) {
            throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, table.getDbName() + "." + table.getTableName());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) HCatException(org.apache.hive.hcatalog.common.HCatException)

Example 12 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class HCatInputFormatReader method prepareRead.

@Override
public ReaderContext prepareRead() throws HCatException {
    try {
        Job job = new Job(conf);
        HCatInputFormat hcif = HCatInputFormat.setInput(job, re.getDbName(), re.getTableName(), re.getFilterString());
        ReaderContextImpl cntxt = new ReaderContextImpl();
        cntxt.setInputSplits(hcif.getSplits(ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null)));
        cntxt.setConf(job.getConfiguration());
        return cntxt;
    } catch (IOException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    } catch (InterruptedException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    }
}
Also used : HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) HCatInputFormat(org.apache.hive.hcatalog.mapreduce.HCatInputFormat)

Example 13 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class HCatOutputFormatWriter method prepareWrite.

@Override
public WriterContext prepareWrite() throws HCatException {
    OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), we.getTableName(), we.getPartitionKVs());
    Job job;
    try {
        job = new Job(conf);
        HCatOutputFormat.setOutput(job, jobInfo);
        HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job.getConfiguration()));
        HCatOutputFormat outFormat = new HCatOutputFormat();
        outFormat.checkOutputSpecs(job);
        outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job);
    } catch (IOException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    } catch (InterruptedException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    }
    WriterContextImpl cntxt = new WriterContextImpl();
    cntxt.setConf(job.getConfiguration());
    return cntxt;
}
Also used : HCatException(org.apache.hive.hcatalog.common.HCatException) OutputJobInfo(org.apache.hive.hcatalog.mapreduce.OutputJobInfo) HCatOutputFormat(org.apache.hive.hcatalog.mapreduce.HCatOutputFormat) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job)

Example 14 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class HarOutputCommitterPostProcessor method makeHar.

/**
   * Creates a har file from the contents of a given directory, using that as root.
   * @param dir Directory to archive
   * @param harFile The HAR file to create
   */
public static void makeHar(JobContext context, String dir, String harFile) throws IOException {
    //    Configuration conf = context.getConfiguration();
    //    Credentials creds = context.getCredentials();
    //    HCatUtil.logAllTokens(LOG,context);
    int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR);
    Path archivePath = new Path(harFile.substring(0, lastSep));
    final String[] args = { "-archiveName", harFile.substring(lastSep + 1, harFile.length()), "-p", dir, "*", archivePath.toString() };
    //    }
    try {
        Configuration newConf = new Configuration();
        FileSystem fs = archivePath.getFileSystem(newConf);
        String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION);
        if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) {
            newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting);
        //      LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+  System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]");
        }
        //      for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){
        //        LOG.info("src : "+ds.getPath().toUri().toString());
        //      }
        final HadoopArchives har = new HadoopArchives(newConf);
        int rc = ToolRunner.run(har, args);
        if (rc != 0) {
            throw new Exception("Har returned error code " + rc);
        }
        //      for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){
        //        LOG.info("dest : "+hs.getPath().toUri().toString());
        //      }
        //      doHarCheck(fs,harFile);
        //      LOG.info("Nuking " + dir);
        fs.delete(new Path(dir), true);
    } catch (Exception e) {
        throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) HCatException(org.apache.hive.hcatalog.common.HCatException) HadoopArchives(org.apache.hadoop.tools.HadoopArchives) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException)

Example 15 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class HCatOutputFormat method setOutput.

/**
   * Set the information about the output to write for the job. This queries the metadata server
   * to find the StorageHandler to use for the table.  It throws an error if the
   * partition is already published.
   * @param conf the Configuration object
   * @param credentials the Credentials object
   * @param outputJobInfo the table output information for the job
   * @throws IOException the exception in communicating with the metadata server
   */
@SuppressWarnings("unchecked")
public static void setOutput(Configuration conf, Credentials credentials, OutputJobInfo outputJobInfo) throws IOException {
    IMetaStoreClient client = null;
    try {
        HiveConf hiveConf = HCatUtil.getHiveConf(conf);
        client = HCatUtil.getHiveMetastoreClient(hiveConf);
        Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
        List<String> indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE);
        for (String indexName : indexList) {
            Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName);
            if (!index.isDeferredRebuild()) {
                throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported");
            }
        }
        StorageDescriptor sd = table.getTTable().getSd();
        if (sd.isCompressed()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
        }
        if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
        }
        if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
        }
        // Set up a common id hash for this job, so that when we create any temporary directory
        // later on, it is guaranteed to be unique.
        String idHash;
        if ((idHash = conf.get(HCatConstants.HCAT_OUTPUT_ID_HASH)) == null) {
            idHash = String.valueOf(Math.random());
        }
        conf.set(HCatConstants.HCAT_OUTPUT_ID_HASH, idHash);
        if (table.getTTable().getPartitionKeysSize() == 0) {
            if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) {
                // attempt made to save partition values in non-partitioned table - throw error.
                throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Partition values specified for non-partitioned table");
            }
            // non-partitioned table
            outputJobInfo.setPartitionValues(new HashMap<String, String>());
        } else {
            // partitioned table, we expect partition values
            // convert user specified map to have lower case key names
            Map<String, String> valueMap = new HashMap<String, String>();
            if (outputJobInfo.getPartitionValues() != null) {
                for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) {
                    valueMap.put(entry.getKey().toLowerCase(), entry.getValue());
                }
            }
            if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) {
                // dynamic partition usecase - partition values were null, or not all were specified
                // need to figure out which keys are not specified.
                List<String> dynamicPartitioningKeys = new ArrayList<String>();
                boolean firstItem = true;
                for (FieldSchema fs : table.getPartitionKeys()) {
                    if (!valueMap.containsKey(fs.getName().toLowerCase())) {
                        dynamicPartitioningKeys.add(fs.getName().toLowerCase());
                    }
                }
                if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) {
                    // If this isn't equal, then bogus key values have been inserted, error out.
                    throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified");
                }
                outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys);
                String dynHash;
                if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) {
                    dynHash = String.valueOf(Math.random());
                }
                conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);
                // if custom pattern is set in case of dynamic partitioning, configure custom path
                String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
                if (customPattern != null) {
                    HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
                }
            }
            outputJobInfo.setPartitionValues(valueMap);
        }
        // To get around hbase failure on single node, see BUG-4383
        conf.set("dfs.client.read.shortcircuit", "false");
        HCatSchema tableSchema = HCatUtil.extractSchema(table);
        StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
        List<String> partitionCols = new ArrayList<String>();
        for (FieldSchema schema : table.getPartitionKeys()) {
            partitionCols.add(schema.getName());
        }
        HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
        //Serialize the output info into the configuration
        outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
        outputJobInfo.setOutputSchema(tableSchema);
        harRequested = getHarRequested(hiveConf);
        outputJobInfo.setHarRequested(harRequested);
        maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
        outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
        HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo);
        Path tblPath = new Path(table.getTTable().getSd().getLocation());
        /*  Set the umask in conf such that files/dirs get created with table-dir
      * permissions. Following three assumptions are made:
      * 1. Actual files/dirs creation is done by RecordWriter of underlying
      * output format. It is assumed that they use default permissions while creation.
      * 2. Default Permissions = FsPermission.getDefault() = 777.
      * 3. UMask is honored by underlying filesystem.
      */
        FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask(tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission()));
        if (Security.getInstance().isSecurityEnabled()) {
            Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested);
        }
    } catch (Exception e) {
        if (e instanceof HCatException) {
            throw (HCatException) e;
        } else {
            throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
        }
    } finally {
        HCatUtil.closeHiveClientQuietly(client);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) HCatException(org.apache.hive.hcatalog.common.HCatException) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) Index(org.apache.hadoop.hive.metastore.api.Index) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

HCatException (org.apache.hive.hcatalog.common.HCatException)52 IOException (java.io.IOException)23 ArrayList (java.util.ArrayList)20 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)19 TException (org.apache.thrift.TException)14 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)13 HashMap (java.util.HashMap)11 Test (org.junit.Test)11 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)10 Configuration (org.apache.hadoop.conf.Configuration)9 Path (org.apache.hadoop.fs.Path)9 Partition (org.apache.hadoop.hive.metastore.api.Partition)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)7 Job (org.apache.hadoop.mapreduce.Job)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 HiveConf (org.apache.hadoop.hive.conf.HiveConf)4 CommandNeedRetryException (org.apache.hadoop.hive.ql.CommandNeedRetryException)4 Map (java.util.Map)3