Search in sources :

Example 41 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class HCatPartition method toHivePartition.

// For use from HCatClient.addPartitions(), to construct from user-input.
Partition toHivePartition() throws HCatException {
    Partition hivePtn = new Partition();
    hivePtn.setDbName(dbName);
    hivePtn.setTableName(tableName);
    hivePtn.setValues(values);
    hivePtn.setParameters(parameters);
    if (sd.getLocation() == null) {
        LOG.warn("Partition location is not set! Attempting to construct default partition location.");
        try {
            String partName = Warehouse.makePartName(HCatSchemaUtils.getFieldSchemas(hcatTable.getPartCols()), values);
            sd.setLocation(new Path(hcatTable.getSd().getLocation(), partName).toString());
        } catch (MetaException exception) {
            throw new HCatException("Could not construct default partition-path for " + hcatTable.getDbName() + "." + hcatTable.getTableName() + "[" + values + "]");
        }
    }
    hivePtn.setSd(sd);
    hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000));
    hivePtn.setLastAccessTimeIsSet(false);
    return hivePtn;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) HCatException(org.apache.hive.hcatalog.common.HCatException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 42 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class HCatTable method toHiveTable.

Table toHiveTable() throws HCatException {
    Table newTable = new Table();
    newTable.setDbName(dbName);
    newTable.setTableName(tableName);
    if (tblProps != null) {
        newTable.setParameters(tblProps);
    }
    if (isExternal) {
        newTable.putToParameters("EXTERNAL", "TRUE");
        newTable.setTableType(TableType.EXTERNAL_TABLE.toString());
    } else {
        newTable.setTableType(TableType.MANAGED_TABLE.toString());
    }
    if (StringUtils.isNotBlank(this.comment)) {
        newTable.putToParameters("comment", comment);
    }
    newTable.setSd(sd);
    if (partCols != null) {
        ArrayList<FieldSchema> hivePtnCols = new ArrayList<FieldSchema>();
        for (HCatFieldSchema fs : partCols) {
            hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs));
        }
        newTable.setPartitionKeys(hivePtnCols);
    }
    newTable.setCreateTime((int) (System.currentTimeMillis() / 1000));
    newTable.setLastAccessTimeIsSet(false);
    try {
        // TODO: Verify that this works for systems using UGI.doAs() (e.g. Oozie).
        newTable.setOwner(owner == null ? getConf().getUser() : owner);
    } catch (Exception exception) {
        throw new HCatException("Unable to determine owner of table (" + dbName + "." + tableName + ") from HiveConf.");
    }
    return newTable;
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) HCatException(org.apache.hive.hcatalog.common.HCatException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 43 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class MetadataJSONSerializer method deserializePartitionSpec.

@Override
public HCatPartitionSpec deserializePartitionSpec(List<String> hcatPartitionSpecStrings) throws HCatException {
    try {
        List<PartitionSpec> partitionSpecList = new ArrayList<PartitionSpec>();
        TDeserializer deserializer = new TDeserializer(new TJSONProtocol.Factory());
        for (String stringRep : hcatPartitionSpecStrings) {
            PartitionSpec partSpec = new PartitionSpec();
            deserializer.deserialize(partSpec, stringRep, "UTF-8");
            partitionSpecList.add(partSpec);
        }
        return new HCatPartitionSpec(null, PartitionSpecProxy.Factory.get(partitionSpecList));
    } catch (TException deserializationException) {
        throw new HCatException("Failed to deserialize!", deserializationException);
    }
}
Also used : TException(org.apache.thrift.TException) TJSONProtocol(org.apache.thrift.protocol.TJSONProtocol) TDeserializer(org.apache.thrift.TDeserializer) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) PartitionSpec(org.apache.hadoop.hive.metastore.api.PartitionSpec)

Example 44 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class FileOutputCommitterContainer method getFinalPath.

/**
   * Find the final name of a given output file, given the output directory
   * and the work directory. If immutable, attempt to create file of name
   * _aN till we find an item that does not exist.
   * @param file the file to move
   * @param src the source directory
   * @param dest the target directory
   * @return the final path for the specific output file
   * @throws java.io.IOException
   */
private Path getFinalPath(FileSystem fs, Path file, Path src, Path dest, final boolean immutable) throws IOException {
    URI taskOutputUri = file.toUri();
    URI relativePath = src.toUri().relativize(taskOutputUri);
    if (taskOutputUri == relativePath) {
        throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + src + " child = " + file);
    }
    if (relativePath.getPath().length() > 0) {
        Path itemDest = new Path(dest, relativePath.getPath());
        if (!immutable) {
            String name = relativePath.getPath();
            String filetype;
            int index = name.lastIndexOf('.');
            if (index >= 0) {
                filetype = name.substring(index);
                name = name.substring(0, index);
            } else {
                filetype = "";
            }
            // Attempt to find maxAppendAttempts possible alternatives to a filename by
            // appending _a_N and seeing if that destination also clashes. If we're
            // still clashing after that, give up.
            int counter = 1;
            for (; fs.exists(itemDest) && counter < maxAppendAttempts; counter++) {
                itemDest = new Path(dest, name + (APPEND_SUFFIX + counter) + filetype);
            }
            if (counter == maxAppendAttempts) {
                throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Could not find a unique destination path for move: file = " + file + " , src = " + src + ", dest = " + dest);
            } else if (counter > APPEND_COUNTER_WARN_THRESHOLD) {
                LOG.warn("Append job used filename clash counter [" + counter + "] which is greater than warning limit [" + APPEND_COUNTER_WARN_THRESHOLD + "]. Please compact this table so that performance is not impacted." + " Please see HIVE-9381 for details.");
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("FinalPath(file:" + file + ":" + src + "->" + dest + "=" + itemDest);
        }
        return itemDest;
    } else {
        return dest;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HCatException(org.apache.hive.hcatalog.common.HCatException) URI(java.net.URI)

Example 45 with HCatException

use of org.apache.hive.hcatalog.common.HCatException in project hive by apache.

the class FileOutputCommitterContainer method registerPartitions.

private void registerPartitions(JobContext context) throws IOException {
    if (dynamicPartitioningUsed) {
        discoverPartitions(context);
    }
    OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());
    Configuration conf = context.getConfiguration();
    Table table = new Table(jobInfo.getTableInfo().getTable());
    Path tblPath = new Path(table.getTTable().getSd().getLocation());
    FileSystem fs = tblPath.getFileSystem(conf);
    IMetaStoreClient client = null;
    HCatTableInfo tableInfo = jobInfo.getTableInfo();
    List<Partition> partitionsAdded = new ArrayList<Partition>();
    try {
        HiveConf hiveConf = HCatUtil.getHiveConf(conf);
        client = HCatUtil.getHiveMetastoreClient(hiveConf);
        if (table.getPartitionKeys().size() == 0) {
            // Move data from temp directory the actual table directory
            // No metastore operation required.
            Path src = new Path(jobInfo.getLocation());
            moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable());
            if (!src.equals(tblPath)) {
                fs.delete(src, true);
            }
            if (table.getParameters() != null && table.getParameters().containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
                table.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
                client.alter_table(table.getDbName(), table.getTableName(), table.getTTable());
            }
            return;
        }
        StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters());
        FileStatus tblStat = fs.getFileStatus(tblPath);
        String grpName = tblStat.getGroup();
        FsPermission perms = tblStat.getPermission();
        List<Partition> partitionsToAdd = new ArrayList<Partition>();
        if (!dynamicPartitioningUsed) {
            partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null, jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, grpName, perms));
        } else {
            for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) {
                partitionsToAdd.add(constructPartition(context, jobInfo, getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(), entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, grpName, perms));
            }
        }
        ArrayList<Map<String, String>> ptnInfos = new ArrayList<Map<String, String>>();
        for (Partition ptn : partitionsToAdd) {
            ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn));
        }
        //Publish the new partition(s)
        if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())) {
            if (!customDynamicLocationUsed) {
                Path src = new Path(ptnRootLocation);
                // check here for each dir we're copying out, to see if it
                // already exists, error out if so.
                // Also, treat dyn-writes as writes to immutable tables.
                // dryRun = true, immutable = true
                moveTaskOutputs(fs, src, src, tblPath, true, true);
                moveTaskOutputs(fs, src, src, tblPath, false, true);
                if (!src.equals(tblPath)) {
                    fs.delete(src, true);
                }
            } else {
                moveCustomLocationTaskOutputs(fs, table, hiveConf);
            }
            try {
                updateTableSchema(client, table, jobInfo.getOutputSchema());
                LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos);
                client.add_partitions(partitionsToAdd);
                partitionsAdded = partitionsToAdd;
            } catch (Exception e) {
                // There was an error adding partitions : rollback fs copy and rethrow
                for (Partition p : partitionsToAdd) {
                    Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation())));
                    if (fs.exists(ptnPath)) {
                        fs.delete(ptnPath, true);
                    }
                }
                throw e;
            }
        } else {
            // no harProcessor, regular operation
            updateTableSchema(client, table, jobInfo.getOutputSchema());
            LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos);
            if (partitionsToAdd.size() > 0) {
                if (!dynamicPartitioningUsed) {
                    //Move data from temp directory the actual table directory
                    if (partitionsToAdd.size() > 1) {
                        throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, "More than one partition to publish in non-dynamic partitioning job");
                    }
                    Partition p = partitionsToAdd.get(0);
                    Path src = new Path(jobInfo.getLocation());
                    Path dest = new Path(p.getSd().getLocation());
                    moveTaskOutputs(fs, src, src, dest, true, table.isImmutable());
                    moveTaskOutputs(fs, src, src, dest, false, table.isImmutable());
                    if (!src.equals(dest)) {
                        if (src.toString().matches(".*" + Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+.*")) {
                            // src is scratch directory, need to trim the part key value pairs from path
                            String diff = StringUtils.difference(src.toString(), dest.toString());
                            fs.delete(new Path(StringUtils.substringBefore(src.toString(), diff)), true);
                        } else {
                            fs.delete(src, true);
                        }
                    }
                    // Now, we check if the partition already exists. If not, we go ahead.
                    // If so, we error out if immutable, and if mutable, check that the partition's IF
                    // matches our current job's IF (table's IF) to check for compatibility. If compatible, we
                    // ignore and do not add. If incompatible, we error out again.
                    boolean publishRequired = false;
                    try {
                        Partition existingP = client.getPartition(p.getDbName(), p.getTableName(), p.getValues());
                        if (existingP != null) {
                            if (table.isImmutable()) {
                                throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION, "Attempted duplicate partition publish on to immutable table");
                            } else {
                                if (!existingP.getSd().getInputFormat().equals(table.getInputFormatClass().getName())) {
                                    throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, "Attempted partition append, where old partition format was " + existingP.getSd().getInputFormat() + " and table format was " + table.getInputFormatClass().getName());
                                }
                            }
                        } else {
                            publishRequired = true;
                        }
                    } catch (NoSuchObjectException e) {
                        // All good, no such partition exists, move on.
                        publishRequired = true;
                    }
                    if (publishRequired) {
                        client.add_partitions(partitionsToAdd);
                        partitionsAdded = partitionsToAdd;
                    }
                } else {
                    // Dynamic partitioning usecase
                    if (!customDynamicLocationUsed) {
                        Path src = new Path(ptnRootLocation);
                        // dryRun = true, immutable = true
                        moveTaskOutputs(fs, src, src, tblPath, true, true);
                        moveTaskOutputs(fs, src, src, tblPath, false, true);
                        if (!src.equals(tblPath)) {
                            fs.delete(src, true);
                        }
                    } else {
                        moveCustomLocationTaskOutputs(fs, table, hiveConf);
                    }
                    client.add_partitions(partitionsToAdd);
                    partitionsAdded = partitionsToAdd;
                }
            }
            // so as to have their permissions mimic the table permissions
            for (Partition p : partitionsAdded) {
                applyGroupAndPerms(fs, new Path(p.getSd().getLocation()), tblStat.getPermission(), tblStat.getGroup(), true);
            }
        }
    } catch (Exception e) {
        if (partitionsAdded.size() > 0) {
            try {
                // metastore
                for (Partition p : partitionsAdded) {
                    client.dropPartition(tableInfo.getDatabaseName(), tableInfo.getTableName(), p.getValues(), true);
                }
            } catch (Exception te) {
                // Keep cause as the original exception
                throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
            }
        }
        if (e instanceof HCatException) {
            throw (HCatException) e;
        } else {
            throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
        }
    } finally {
        HCatUtil.closeHiveClientQuietly(client);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HCatException(org.apache.hive.hcatalog.common.HCatException) TException(org.apache.thrift.TException) IOException(java.io.IOException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) FsPermission(org.apache.hadoop.fs.permission.FsPermission) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

HCatException (org.apache.hive.hcatalog.common.HCatException)52 IOException (java.io.IOException)23 ArrayList (java.util.ArrayList)20 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)19 TException (org.apache.thrift.TException)14 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)13 HashMap (java.util.HashMap)11 Test (org.junit.Test)11 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)10 Configuration (org.apache.hadoop.conf.Configuration)9 Path (org.apache.hadoop.fs.Path)9 Partition (org.apache.hadoop.hive.metastore.api.Partition)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)7 Job (org.apache.hadoop.mapreduce.Job)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 HiveConf (org.apache.hadoop.hive.conf.HiveConf)4 CommandNeedRetryException (org.apache.hadoop.hive.ql.CommandNeedRetryException)4 Map (java.util.Map)3