use of org.apache.hadoop.hive.ql.metadata.PartitionIterable in project hive by apache.
the class DDLTask method generateAddMmTasks.
private List<Task<?>> generateAddMmTasks(Table tbl) throws HiveException {
// We will move all the files in the table/partition directories into the first MM
// directory, then commit the first write ID.
List<Path> srcs = new ArrayList<>(), tgts = new ArrayList<>();
long mmWriteId = 0;
try {
HiveTxnManager txnManager = SessionState.get().getTxnMgr();
if (txnManager.isTxnOpen()) {
mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
} else {
txnManager.openTxn(new Context(conf), conf.getUser());
mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
txnManager.commitTxn();
}
} catch (Exception e) {
String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
}
int stmtId = 0;
String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId);
Hive db = getHive();
if (tbl.getPartitionKeys().size() > 0) {
PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
Iterator<Partition> partIter = parts.iterator();
while (partIter.hasNext()) {
Partition part = partIter.next();
checkMmLb(part);
Path src = part.getDataLocation(), tgt = new Path(src, mmDir);
srcs.add(src);
tgts.add(tgt);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
}
}
} else {
checkMmLb(tbl);
Path src = tbl.getDataLocation(), tgt = new Path(src, mmDir);
srcs.add(src);
tgts.add(tgt);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
}
}
// Don't set inputs and outputs - the locks have already been taken so it's pointless.
MoveWork mw = new MoveWork(null, null, null, null, false);
mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null));
ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId);
Task<?> mv = TaskFactory.get(mw), ic = TaskFactory.get(icw);
mv.addDependentTask(ic);
return Lists.<Task<?>>newArrayList(mv);
}
use of org.apache.hadoop.hive.ql.metadata.PartitionIterable in project hive by apache.
the class DDLTask method describeTable.
/**
* Write the description of a table to a file.
*
* @param db
* The database in question.
* @param descTbl
* This is the table we're interested in.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
* @throws MetaException
*/
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
String colPath = descTbl.getColumnPath();
String tableName = descTbl.getTableName();
// describe the table - populate the output stream
Table tbl = db.getTable(tableName, false);
if (tbl == null) {
throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
}
Partition part = null;
if (descTbl.getPartSpec() != null) {
part = db.getPartition(tbl, descTbl.getPartSpec(), false);
if (part == null) {
throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
}
tbl = part.getTable();
}
DataOutputStream outStream = getOutputStream(descTbl.getResFile());
try {
LOG.debug("DDLTask: got data for {}", tableName);
List<FieldSchema> cols = null;
List<ColumnStatisticsObj> colStats = null;
Deserializer deserializer = tbl.getDeserializer(true);
if (deserializer instanceof AbstractSerDe) {
String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
if (errorMsgs != null && !errorMsgs.isEmpty()) {
throw new SQLException(errorMsgs);
}
}
if (colPath.equals(tableName)) {
cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
if (!descTbl.isFormatted()) {
cols.addAll(tbl.getPartCols());
}
if (tbl.isPartitioned() && part == null) {
// No partitioned specified for partitioned table, lets fetch all.
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
Map<String, Long> valueMap = new HashMap<>();
Map<String, Boolean> stateMap = new HashMap<>();
for (String stat : StatsSetupConst.supportedStats) {
valueMap.put(stat, 0L);
stateMap.put(stat, true);
}
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
int numParts = 0;
for (Partition partition : parts) {
Map<String, String> props = partition.getParameters();
Boolean state = StatsSetupConst.areBasicStatsUptoDate(props);
for (String stat : StatsSetupConst.supportedStats) {
stateMap.put(stat, stateMap.get(stat) && state);
if (props != null && props.get(stat) != null) {
valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat)));
}
}
numParts++;
}
for (String stat : StatsSetupConst.supportedStats) {
StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
tblProps.put(stat, valueMap.get(stat).toString());
}
tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts));
tbl.setParameters(tblProps);
}
} else {
if (descTbl.isFormatted()) {
// when column name is specified in describe table DDL, colPath will
// will be table_name.column_name
String colName = colPath.split("\\.")[1];
String[] dbTab = Utilities.getDbTableName(tableName);
List<String> colNames = new ArrayList<String>();
colNames.add(colName.toLowerCase());
if (null == part) {
if (tbl.isPartitioned()) {
Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
if (tbl.isPartitionKey(colNames.get(0))) {
FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
cols = Collections.singletonList(partCol);
PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
ColumnStatisticsData data = new ColumnStatisticsData();
ColStatistics.Range r = cs.getRange();
StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
colStats = Collections.singletonList(cso);
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
colStats = aggrStats.getColStats();
if (parts.size() == aggrStats.getPartsFound()) {
StatsSetupConst.setColumnStatsState(tblProps, colNames);
} else {
StatsSetupConst.removeColumnStatsState(tblProps, colNames);
}
}
tbl.setParameters(tblProps);
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
}
} else {
List<String> partitions = new ArrayList<String>();
partitions.add(part.getName());
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
}
} else {
cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
}
}
PrimaryKeyInfo pkInfo = null;
ForeignKeyInfo fkInfo = null;
UniqueConstraint ukInfo = null;
NotNullConstraint nnInfo = null;
DefaultConstraint dInfo = null;
CheckConstraint cInfo = null;
if (descTbl.isExt() || descTbl.isFormatted()) {
pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
ukInfo = db.getUniqueConstraints(tbl.getDbName(), tbl.getTableName());
nnInfo = db.getNotNullConstraints(tbl.getDbName(), tbl.getTableName());
dInfo = db.getDefaultConstraints(tbl.getDbName(), tbl.getTableName());
cInfo = db.getCheckConstraints(tbl.getDbName(), tbl.getTableName());
}
fixDecimalColumnTypeName(cols);
// In case the query is served by HiveServer2, don't pad it with spaces,
// as HiveServer2 output is consumed by JDBC/ODBC clients.
boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), isOutputPadded, colStats, pkInfo, fkInfo, ukInfo, nnInfo, dInfo, cInfo);
LOG.debug("DDLTask: written data for {}", tableName);
} catch (SQLException e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
} finally {
IOUtils.closeStream(outStream);
}
return 0;
}
use of org.apache.hadoop.hive.ql.metadata.PartitionIterable in project hive by apache.
the class DDLTask method dropTable.
private void dropTable(Hive db, Table tbl, DropTableDesc dropTbl) throws HiveException {
// This is a true DROP TABLE
if (tbl != null && dropTbl.getValidationRequired()) {
if (tbl.isView()) {
if (!dropTbl.getExpectView()) {
if (dropTbl.getIfExists()) {
return;
}
if (dropTbl.getExpectMaterializedView()) {
throw new HiveException("Cannot drop a view with DROP MATERIALIZED VIEW");
} else {
throw new HiveException("Cannot drop a view with DROP TABLE");
}
}
} else if (tbl.isMaterializedView()) {
if (!dropTbl.getExpectMaterializedView()) {
if (dropTbl.getIfExists()) {
return;
}
if (dropTbl.getExpectView()) {
throw new HiveException("Cannot drop a materialized view with DROP VIEW");
} else {
throw new HiveException("Cannot drop a materialized view with DROP TABLE");
}
}
} else {
if (dropTbl.getExpectView()) {
if (dropTbl.getIfExists()) {
return;
}
throw new HiveException("Cannot drop a base table with DROP VIEW");
} else if (dropTbl.getExpectMaterializedView()) {
if (dropTbl.getIfExists()) {
return;
}
throw new HiveException("Cannot drop a base table with DROP MATERIALIZED VIEW");
}
}
}
ReplicationSpec replicationSpec = dropTbl.getReplicationSpec();
if ((tbl != null) && replicationSpec.isInReplicationScope()) {
/**
* DROP TABLE FOR REPLICATION behaves differently from DROP TABLE IF EXISTS - it more closely
* matches a DROP TABLE IF OLDER THAN(x) semantic.
*
* Ideally, commands executed under the scope of replication need to be idempotent and resilient
* to repeats. What can happen, sometimes, is that a drone processing a replication task can
* have been abandoned for not returning in time, but still execute its task after a while,
* which should not result in it mucking up data that has been impressed later on. So, for eg.,
* if we create partition P1, followed by droppping it, followed by creating it yet again,
* the replication of that drop should not drop the newer partition if it runs after the destination
* object is already in the newer state.
*
* Thus, we check the replicationSpec.allowEventReplacementInto to determine whether or not we can
* drop the object in question(will return false if object is newer than the event, true if not)
*
* In addition, since DROP TABLE FOR REPLICATION can result in a table not being dropped, while DROP
* TABLE will always drop the table, and the included partitions, DROP TABLE FOR REPLICATION must
* do one more thing - if it does not drop the table because the table is in a newer state, it must
* drop the partitions inside it that are older than this event. To wit, DROP TABLE FOR REPL
* acts like a recursive DROP TABLE IF OLDER.
*/
if (!replicationSpec.allowEventReplacementInto(tbl.getParameters())) {
// any partitions inside that are older.
if (tbl.isPartitioned()) {
PartitionIterable partitions = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
db.dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
}
}
LOG.debug("DDLTask: Drop Table is skipped as table {} is newer than update", dropTbl.getTableName());
// table is newer, leave it be.
return;
}
}
// drop the table
db.dropTable(dropTbl.getTableName(), dropTbl.getIfPurge());
if (tbl != null) {
// Remove from cache if it is a materialized view
if (tbl.isMaterializedView()) {
HiveMaterializedViewsRegistry.get().dropMaterializedView(tbl);
}
// We have already locked the table in DDLSemanticAnalyzer, don't do it again here
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
use of org.apache.hadoop.hive.ql.metadata.PartitionIterable in project hive by apache.
the class DropTableOperation method execute.
@Override
public int execute() throws HiveException {
Table table = getTable();
if (table == null) {
// dropping not existing table is handled by DropTableAnalyzer
return 0;
}
if (desc.getValidationRequired()) {
if (table.isView() || table.isMaterializedView()) {
if (desc.isIfExists()) {
return 0;
} else if (table.isView()) {
throw new HiveException("Cannot drop a view with DROP TABLE");
} else {
throw new HiveException("Cannot drop a materialized view with DROP TABLE");
}
}
}
ReplicationSpec replicationSpec = desc.getReplicationSpec();
if (replicationSpec.isInReplicationScope()) {
/**
* DROP TABLE FOR REPLICATION behaves differently from DROP TABLE IF EXISTS - it more closely
* matches a DROP TABLE IF OLDER THAN(x) semantic.
*
* Ideally, commands executed under the scope of replication need to be idempotent and resilient
* to repeats. What can happen, sometimes, is that a drone processing a replication task can
* have been abandoned for not returning in time, but still execute its task after a while,
* which should not result in it mucking up data that has been impressed later on. So, for eg.,
* if we create partition P1, followed by droppping it, followed by creating it yet again,
* the replication of that drop should not drop the newer partition if it runs after the destination
* object is already in the newer state.
*
* Thus, we check the replicationSpec.allowEventReplacementInto to determine whether or not we can
* drop the object in question(will return false if object is newer than the event, true if not)
*
* In addition, since DROP TABLE FOR REPLICATION can result in a table not being dropped, while DROP
* TABLE will always drop the table, and the included partitions, DROP TABLE FOR REPLICATION must
* do one more thing - if it does not drop the table because the table is in a newer state, it must
* drop the partitions inside it that are older than this event. To wit, DROP TABLE FOR REPL
* acts like a recursive DROP TABLE IF OLDER.
*/
Map<String, String> dbParams = context.getDb().getDatabase(table.getDbName()).getParameters();
if (!replicationSpec.allowEventReplacementInto(dbParams)) {
// any partitions inside that are older.
if (table.isPartitioned()) {
PartitionIterable partitions = new PartitionIterable(context.getDb(), table, null, MetastoreConf.getIntVar(context.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX));
for (Partition p : partitions) {
if (replicationSpec.allowEventReplacementInto(dbParams)) {
context.getDb().dropPartition(table.getDbName(), table.getTableName(), p.getValues(), true);
}
}
}
LOG.debug("DDLTask: Drop Table is skipped as table {} is newer than update", desc.getTableName());
// table is newer, leave it be.
return 0;
}
}
// TODO: API w/catalog name
context.getDb().dropTable(table, desc.isPurge());
DDLUtils.addIfAbsentByName(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK), context);
if (LlapHiveUtils.isLlapMode(context.getConf())) {
TableName tableName = HiveTableName.of(table);
ProactiveEviction.Request.Builder llapEvictRequestBuilder = ProactiveEviction.Request.Builder.create();
llapEvictRequestBuilder.addTable(tableName.getDb(), tableName.getTable());
ProactiveEviction.evict(context.getConf(), llapEvictRequestBuilder.build());
}
return 0;
}
use of org.apache.hadoop.hive.ql.metadata.PartitionIterable in project hive by apache.
the class AlterTableSetPropertiesOperation method generateAddMmTasks.
private List<Task<?>> generateAddMmTasks(Table table, Long writeId) throws HiveException {
// directory, then commit the first write ID.
if (writeId == null) {
throw new HiveException("Internal error - write ID not set for MM conversion");
}
List<Path> sources = new ArrayList<>();
List<Path> targets = new ArrayList<>();
int stmtId = 0;
String mmDir = AcidUtils.deltaSubdir(writeId, writeId, stmtId);
if (!table.getPartitionKeys().isEmpty()) {
PartitionIterable parts = new PartitionIterable(context.getDb(), table, null, MetastoreConf.getIntVar(context.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX));
for (Partition part : parts) {
checkMmLb(part);
Path source = part.getDataLocation();
Path target = new Path(source, mmDir);
sources.add(source);
targets.add(target);
Utilities.FILE_OP_LOGGER.trace("Will move " + source + " to " + target);
}
} else {
checkMmLb(table);
Path source = table.getDataLocation();
Path target = new Path(source, mmDir);
sources.add(source);
targets.add(target);
Utilities.FILE_OP_LOGGER.trace("Will move " + source + " to " + target);
}
// Don't set inputs and outputs - the locks have already been taken so it's pointless.
MoveWork mw = new MoveWork(null, null, null, null, false);
mw.setMultiFilesDesc(new LoadMultiFilesDesc(sources, targets, true, null, null));
return Lists.<Task<?>>newArrayList(TaskFactory.get(mw));
}
Aggregations