Search in sources :

Example 16 with IMetaStoreClient

use of org.apache.hadoop.hive.metastore.IMetaStoreClient in project hive by apache.

the class HCatOutputFormat method setOutput.

/**
   * Set the information about the output to write for the job. This queries the metadata server
   * to find the StorageHandler to use for the table.  It throws an error if the
   * partition is already published.
   * @param conf the Configuration object
   * @param credentials the Credentials object
   * @param outputJobInfo the table output information for the job
   * @throws IOException the exception in communicating with the metadata server
   */
@SuppressWarnings("unchecked")
public static void setOutput(Configuration conf, Credentials credentials, OutputJobInfo outputJobInfo) throws IOException {
    IMetaStoreClient client = null;
    try {
        HiveConf hiveConf = HCatUtil.getHiveConf(conf);
        client = HCatUtil.getHiveMetastoreClient(hiveConf);
        Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
        List<String> indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE);
        for (String indexName : indexList) {
            Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName);
            if (!index.isDeferredRebuild()) {
                throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported");
            }
        }
        StorageDescriptor sd = table.getTTable().getSd();
        if (sd.isCompressed()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
        }
        if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
        }
        if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
        }
        // Set up a common id hash for this job, so that when we create any temporary directory
        // later on, it is guaranteed to be unique.
        String idHash;
        if ((idHash = conf.get(HCatConstants.HCAT_OUTPUT_ID_HASH)) == null) {
            idHash = String.valueOf(Math.random());
        }
        conf.set(HCatConstants.HCAT_OUTPUT_ID_HASH, idHash);
        if (table.getTTable().getPartitionKeysSize() == 0) {
            if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) {
                // attempt made to save partition values in non-partitioned table - throw error.
                throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Partition values specified for non-partitioned table");
            }
            // non-partitioned table
            outputJobInfo.setPartitionValues(new HashMap<String, String>());
        } else {
            // partitioned table, we expect partition values
            // convert user specified map to have lower case key names
            Map<String, String> valueMap = new HashMap<String, String>();
            if (outputJobInfo.getPartitionValues() != null) {
                for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) {
                    valueMap.put(entry.getKey().toLowerCase(), entry.getValue());
                }
            }
            if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) {
                // dynamic partition usecase - partition values were null, or not all were specified
                // need to figure out which keys are not specified.
                List<String> dynamicPartitioningKeys = new ArrayList<String>();
                boolean firstItem = true;
                for (FieldSchema fs : table.getPartitionKeys()) {
                    if (!valueMap.containsKey(fs.getName().toLowerCase())) {
                        dynamicPartitioningKeys.add(fs.getName().toLowerCase());
                    }
                }
                if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) {
                    // If this isn't equal, then bogus key values have been inserted, error out.
                    throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified");
                }
                outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys);
                String dynHash;
                if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) {
                    dynHash = String.valueOf(Math.random());
                }
                conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);
                // if custom pattern is set in case of dynamic partitioning, configure custom path
                String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
                if (customPattern != null) {
                    HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
                }
            }
            outputJobInfo.setPartitionValues(valueMap);
        }
        // To get around hbase failure on single node, see BUG-4383
        conf.set("dfs.client.read.shortcircuit", "false");
        HCatSchema tableSchema = HCatUtil.extractSchema(table);
        StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
        List<String> partitionCols = new ArrayList<String>();
        for (FieldSchema schema : table.getPartitionKeys()) {
            partitionCols.add(schema.getName());
        }
        HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
        //Serialize the output info into the configuration
        outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
        outputJobInfo.setOutputSchema(tableSchema);
        harRequested = getHarRequested(hiveConf);
        outputJobInfo.setHarRequested(harRequested);
        maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
        outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
        HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo);
        Path tblPath = new Path(table.getTTable().getSd().getLocation());
        /*  Set the umask in conf such that files/dirs get created with table-dir
      * permissions. Following three assumptions are made:
      * 1. Actual files/dirs creation is done by RecordWriter of underlying
      * output format. It is assumed that they use default permissions while creation.
      * 2. Default Permissions = FsPermission.getDefault() = 777.
      * 3. UMask is honored by underlying filesystem.
      */
        FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask(tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission()));
        if (Security.getInstance().isSecurityEnabled()) {
            Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested);
        }
    } catch (Exception e) {
        if (e instanceof HCatException) {
            throw (HCatException) e;
        } else {
            throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e);
        }
    } finally {
        HCatUtil.closeHiveClientQuietly(client);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) HCatException(org.apache.hive.hcatalog.common.HCatException) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) Index(org.apache.hadoop.hive.metastore.api.Index) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HCatException(org.apache.hive.hcatalog.common.HCatException) IOException(java.io.IOException) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HashMap(java.util.HashMap) Map(java.util.Map)

Example 17 with IMetaStoreClient

use of org.apache.hadoop.hive.metastore.IMetaStoreClient in project hive by apache.

the class CompleteDelegator method run.

public CompleteBean run(String id, String jobStatus) throws CallbackFailedException, IOException {
    if (id == null)
        acceptWithError("No jobid given");
    JobState state = null;
    /* we don't want to cancel the delegation token if we think the callback is going to
     to be retried, for example, because the job is not complete yet */
    boolean cancelMetastoreToken = false;
    try {
        state = new JobState(id, Main.getAppConfigInstance());
        if (state.getCompleteStatus() == null)
            failed("Job not yet complete. jobId=" + id + " Status from JobTracker=" + jobStatus, null);
        Long notified = state.getNotifiedTime();
        if (notified != null) {
            cancelMetastoreToken = true;
            return acceptWithError("Callback already run for jobId=" + id + " at " + new Date(notified));
        }
        String callback = state.getCallback();
        if (callback == null) {
            cancelMetastoreToken = true;
            return new CompleteBean("No callback registered");
        }
        try {
            doCallback(state.getId(), callback);
            cancelMetastoreToken = true;
        } catch (Exception e) {
            failed("Callback failed " + callback + " for " + id, e);
        }
        state.setNotifiedTime(System.currentTimeMillis());
        return new CompleteBean("Callback sent");
    } finally {
        state.close();
        IMetaStoreClient client = null;
        try {
            if (cancelMetastoreToken) {
                String metastoreTokenStrForm = DelegationTokenCache.getStringFormTokenCache().getDelegationToken(id);
                if (metastoreTokenStrForm != null) {
                    client = HCatUtil.getHiveMetastoreClient(new HiveConf());
                    client.cancelDelegationToken(metastoreTokenStrForm);
                    LOG.debug("Cancelled token for jobId=" + id + " status from JT=" + jobStatus);
                    DelegationTokenCache.getStringFormTokenCache().removeDelegationToken(id);
                }
            }
        } catch (Exception ex) {
            LOG.warn("Failed to cancel metastore delegation token for jobId=" + id, ex);
        } finally {
            HCatUtil.closeHiveClientQuietly(client);
        }
    }
}
Also used : JobState(org.apache.hive.hcatalog.templeton.tool.JobState) HiveConf(org.apache.hadoop.hive.conf.HiveConf) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Date(java.util.Date) IOException(java.io.IOException)

Example 18 with IMetaStoreClient

use of org.apache.hadoop.hive.metastore.IMetaStoreClient in project hive by apache.

the class TempletonControllerJob method buildHcatDelegationToken.

private String buildHcatDelegationToken(String user) throws IOException, InterruptedException, TException {
    final HiveConf c = new HiveConf();
    LOG.debug("Creating hive metastore delegation token for user " + user);
    final UserGroupInformation ugi = UgiFactory.getUgi(user);
    UserGroupInformation real = ugi.getRealUser();
    return real.doAs(new PrivilegedExceptionAction<String>() {

        @Override
        public String run() throws IOException, TException, InterruptedException {
            final IMetaStoreClient client = HCatUtil.getHiveMetastoreClient(c);
            return ugi.doAs(new PrivilegedExceptionAction<String>() {

                @Override
                public String run() throws IOException, TException, InterruptedException {
                    String u = ugi.getUserName();
                    return client.getDelegationToken(c.getUser(), u);
                }
            });
        }
    });
}
Also used : TException(org.apache.thrift.TException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) IOException(java.io.IOException) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 19 with IMetaStoreClient

use of org.apache.hadoop.hive.metastore.IMetaStoreClient in project hive by apache.

the class TestHiveClientCache method testMultipleThreadAccess.

/**
   * Check that a *new* client is created if asked from different threads even with
   * the same hive configuration
   * @throws ExecutionException
   * @throws InterruptedException
   */
@Test
public void testMultipleThreadAccess() throws ExecutionException, InterruptedException {
    final HiveClientCache cache = new HiveClientCache(1000);
    class GetHiveClient implements Callable<IMetaStoreClient> {

        @Override
        public IMetaStoreClient call() throws IOException, MetaException, LoginException {
            return cache.get(hiveConf);
        }
    }
    ExecutorService executor = Executors.newFixedThreadPool(2);
    Callable<IMetaStoreClient> worker1 = new GetHiveClient();
    Callable<IMetaStoreClient> worker2 = new GetHiveClient();
    Future<IMetaStoreClient> clientFuture1 = executor.submit(worker1);
    Future<IMetaStoreClient> clientFuture2 = executor.submit(worker2);
    IMetaStoreClient client1 = clientFuture1.get();
    IMetaStoreClient client2 = clientFuture2.get();
    assertNotNull(client1);
    assertNotNull(client2);
    assertNotSame(client1, client2);
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Callable(java.util.concurrent.Callable) Test(org.junit.Test)

Example 20 with IMetaStoreClient

use of org.apache.hadoop.hive.metastore.IMetaStoreClient in project hive by apache.

the class TestCompactor method minorCompactWhileStreaming.

@Test
public void minorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultFile = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004")) {
                resultFile = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
        }
        checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
    } finally {
        connection.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Aggregations

IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)41 TException (org.apache.thrift.TException)12 IOException (java.io.IOException)11 Path (org.apache.hadoop.fs.Path)11 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)11 HiveConf (org.apache.hadoop.hive.conf.HiveConf)10 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)10 Table (org.apache.hadoop.hive.metastore.api.Table)10 Test (org.junit.Test)10 FileStatus (org.apache.hadoop.fs.FileStatus)9 FileSystem (org.apache.hadoop.fs.FileSystem)9 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)8 CompactionRequest (org.apache.hadoop.hive.metastore.api.CompactionRequest)8 TxnStore (org.apache.hadoop.hive.metastore.txn.TxnStore)8 HiveEndPoint (org.apache.hive.hcatalog.streaming.HiveEndPoint)8 HiveSQLException (org.apache.hive.service.cli.HiveSQLException)7 ArrayList (java.util.ArrayList)6 DelimitedInputWriter (org.apache.hive.hcatalog.streaming.DelimitedInputWriter)6 StreamingConnection (org.apache.hive.hcatalog.streaming.StreamingConnection)6 Table (org.apache.hadoop.hive.ql.metadata.Table)5