use of org.apache.hadoop.hive.metastore.api.EnvironmentContext in project hive by apache.
the class DDLTask method alterTableOrSinglePartition.
private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException {
EnvironmentContext environmentContext = alterTbl.getEnvironmentContext();
if (environmentContext == null) {
environmentContext = new EnvironmentContext();
alterTbl.setEnvironmentContext(environmentContext);
}
// do not need update stats in alter table/partition operations
if (environmentContext.getProperties() == null || environmentContext.getProperties().get(StatsSetupConst.DO_NOT_UPDATE_STATS) == null) {
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
}
if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) {
tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName()));
tbl.setTableName(Utilities.getTableName(alterTbl.getNewName()));
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String serializationLib = sd.getSerdeInfo().getSerializationLib();
AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
List<FieldSchema> newCols = alterTbl.getNewCols();
if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
sd.setCols(newCols);
} else {
// make sure the columns does not already exist
Iterator<FieldSchema> iterNewCols = newCols.iterator();
while (iterNewCols.hasNext()) {
FieldSchema newCol = iterNewCols.next();
String newColName = newCol.getName();
Iterator<FieldSchema> iterOldCols = oldCols.iterator();
while (iterOldCols.hasNext()) {
String oldColName = iterOldCols.next().getName();
if (oldColName.equalsIgnoreCase(newColName)) {
throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newColName);
}
}
oldCols.add(newCol);
}
sd.setCols(oldCols);
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String serializationLib = sd.getSerdeInfo().getSerializationLib();
AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
List<FieldSchema> newCols = new ArrayList<FieldSchema>();
Iterator<FieldSchema> iterOldCols = oldCols.iterator();
String oldName = alterTbl.getOldColName();
String newName = alterTbl.getNewColName();
String type = alterTbl.getNewColType();
String comment = alterTbl.getNewColComment();
boolean first = alterTbl.getFirst();
String afterCol = alterTbl.getAfterCol();
// if orc table, restrict reordering columns as it will break schema evolution
boolean isOrcSchemaEvolution = sd.getInputFormat().equals(OrcInputFormat.class.getName()) && isSchemaEvolutionEnabled(tbl);
if (isOrcSchemaEvolution && (first || (afterCol != null && !afterCol.trim().isEmpty()))) {
throw new HiveException(ErrorMsg.CANNOT_REORDER_COLUMNS, alterTbl.getOldName());
}
FieldSchema column = null;
boolean found = false;
int position = -1;
if (first) {
position = 0;
}
int i = 1;
while (iterOldCols.hasNext()) {
FieldSchema col = iterOldCols.next();
String oldColName = col.getName();
if (oldColName.equalsIgnoreCase(newName) && !oldColName.equalsIgnoreCase(oldName)) {
throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName);
} else if (oldColName.equalsIgnoreCase(oldName)) {
col.setName(newName);
if (type != null && !type.trim().equals("")) {
col.setType(type);
}
if (comment != null) {
col.setComment(comment);
}
found = true;
if (first || (afterCol != null && !afterCol.trim().equals(""))) {
column = col;
continue;
}
}
if (afterCol != null && !afterCol.trim().equals("") && oldColName.equalsIgnoreCase(afterCol)) {
position = i;
}
i++;
newCols.add(col);
}
// did not find the column
if (!found) {
throw new HiveException(ErrorMsg.INVALID_COLUMN, oldName);
}
// after column is not null, but we did not find it.
if ((afterCol != null && !afterCol.trim().equals("")) && position < 0) {
throw new HiveException(ErrorMsg.INVALID_COLUMN, afterCol);
}
if (position >= 0) {
newCols.add(position, column);
}
sd.setCols(newCols);
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
// change SerDe to LazySimpleSerDe if it is columnsetSerDe
String serializationLib = sd.getSerdeInfo().getSerializationLib();
if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
} else if (!serializationLib.equals(MetadataTypedColumnsetSerDe.class.getName()) && !serializationLib.equals(LazySimpleSerDe.class.getName()) && !serializationLib.equals(ColumnarSerDe.class.getName()) && !serializationLib.equals(DynamicSerDe.class.getName()) && !serializationLib.equals(ParquetHiveSerDe.class.getName()) && !serializationLib.equals(OrcSerde.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, alterTbl.getOldName());
}
final boolean isOrcSchemaEvolution = serializationLib.equals(OrcSerde.class.getName()) && isSchemaEvolutionEnabled(tbl);
// adding columns and limited integer type promotion is supported for ORC schema evolution
if (isOrcSchemaEvolution) {
final List<FieldSchema> existingCols = sd.getCols();
final List<FieldSchema> replaceCols = alterTbl.getNewCols();
if (replaceCols.size() < existingCols.size()) {
throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
}
}
sd.setCols(alterTbl.getNewCols());
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) {
if (StatsSetupConst.USER.equals(environmentContext.getProperties().get(StatsSetupConst.STATS_GENERATED))) {
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
}
if (part != null) {
part.getTPartition().getParameters().putAll(alterTbl.getProps());
} else {
tbl.getTTable().getParameters().putAll(alterTbl.getProps());
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) {
Iterator<String> keyItr = alterTbl.getProps().keySet().iterator();
if (StatsSetupConst.USER.equals(environmentContext.getProperties().get(StatsSetupConst.STATS_GENERATED))) {
// drop a stats parameter, which triggers recompute stats update automatically
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
}
while (keyItr.hasNext()) {
if (part != null) {
part.getTPartition().getParameters().remove(keyItr.next());
} else {
tbl.getTTable().getParameters().remove(keyItr.next());
}
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String serdeName = alterTbl.getSerdeName();
String oldSerdeName = sd.getSerdeInfo().getSerializationLib();
// if orc table, restrict changing the serde as it can break schema evolution
if (isSchemaEvolutionEnabled(tbl) && oldSerdeName.equalsIgnoreCase(OrcSerde.class.getName()) && !serdeName.equalsIgnoreCase(OrcSerde.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_CHANGE_SERDE, OrcSerde.class.getSimpleName(), alterTbl.getOldName());
}
sd.getSerdeInfo().setSerializationLib(serdeName);
if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) {
sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
}
if (part != null) {
// TODO: wtf? This doesn't do anything.
part.getTPartition().getSd().setCols(part.getTPartition().getSd().getCols());
} else {
if (Table.shouldStoreFieldsInMetastore(conf, serdeName, tbl.getParameters()) && !Table.hasMetastoreBasedSchema(conf, oldSerdeName)) {
// from old SerDe are too long to be stored in metastore, but there's nothing we can do.
try {
Deserializer oldSerde = MetaStoreUtils.getDeserializer(conf, tbl.getTTable(), false, oldSerdeName);
tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), oldSerde));
} catch (MetaException ex) {
throw new HiveException(ex);
}
}
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
// if orc table, restrict changing the file format as it can break schema evolution
if (isSchemaEvolutionEnabled(tbl) && sd.getInputFormat().equals(OrcInputFormat.class.getName()) && !alterTbl.getInputFormat().equals(OrcInputFormat.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_CHANGE_FILEFORMAT, "ORC", alterTbl.getOldName());
}
sd.setInputFormat(alterTbl.getInputFormat());
sd.setOutputFormat(alterTbl.getOutputFormat());
if (alterTbl.getSerdeName() != null) {
sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName());
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
// validate sort columns and bucket columns
List<String> columns = Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
if (!alterTbl.isTurnOffSorting()) {
Utilities.validateColumnNames(columns, alterTbl.getBucketColumns());
}
if (alterTbl.getSortColumns() != null) {
Utilities.validateColumnNames(columns, Utilities.getColumnNamesFromSortCols(alterTbl.getSortColumns()));
}
if (alterTbl.isTurnOffSorting()) {
sd.setSortCols(new ArrayList<Order>());
} else if (alterTbl.getNumberBuckets() == -1) {
// -1 buckets means to turn off bucketing
sd.setBucketCols(new ArrayList<String>());
sd.setNumBuckets(-1);
sd.setSortCols(new ArrayList<Order>());
} else {
sd.setBucketCols(alterTbl.getBucketColumns());
sd.setNumBuckets(alterTbl.getNumberBuckets());
sd.setSortCols(alterTbl.getSortColumns());
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String newLocation = alterTbl.getNewLocation();
try {
URI locUri = new URI(newLocation);
if (!new Path(locUri).isAbsolute()) {
throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation);
}
sd.setLocation(newLocation);
} catch (URISyntaxException e) {
throw new HiveException(e);
}
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSKEWEDBY) {
// Validation's been done at compile time. no validation is needed here.
List<String> skewedColNames = null;
List<List<String>> skewedValues = null;
if (alterTbl.isTurnOffSkewed()) {
// Convert skewed table to non-skewed table.
skewedColNames = new ArrayList<String>();
skewedValues = new ArrayList<List<String>>();
} else {
skewedColNames = alterTbl.getSkewedColNames();
skewedValues = alterTbl.getSkewedColValues();
}
if (null == tbl.getSkewedInfo()) {
// Convert non-skewed table to skewed table.
SkewedInfo skewedInfo = new SkewedInfo();
skewedInfo.setSkewedColNames(skewedColNames);
skewedInfo.setSkewedColValues(skewedValues);
tbl.setSkewedInfo(skewedInfo);
} else {
tbl.setSkewedColNames(skewedColNames);
tbl.setSkewedColValues(skewedValues);
}
tbl.setStoredAsSubDirectories(alterTbl.isStoredAsSubDirectories());
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERSKEWEDLOCATION) {
// process location one-by-one
Map<List<String>, String> locMaps = alterTbl.getSkewedLocations();
Set<List<String>> keys = locMaps.keySet();
for (List<String> key : keys) {
String newLocation = locMaps.get(key);
try {
URI locUri = new URI(newLocation);
if (part != null) {
List<String> slk = new ArrayList<String>(key);
part.setSkewedValueLocationMap(slk, locUri.toString());
} else {
List<String> slk = new ArrayList<String>(key);
tbl.setSkewedValueLocationMap(slk, locUri.toString());
}
} catch (URISyntaxException e) {
throw new HiveException(e);
}
}
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
} else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) {
if (part != null) {
if (part.getBucketCount() == alterTbl.getNumberBuckets()) {
return 0;
}
part.setBucketCount(alterTbl.getNumberBuckets());
} else {
if (tbl.getNumBuckets() == alterTbl.getNumberBuckets()) {
return 0;
}
tbl.setNumBuckets(alterTbl.getNumberBuckets());
}
} else {
throw new HiveException(ErrorMsg.UNSUPPORTED_ALTER_TBL_OP, alterTbl.getOp().toString());
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.EnvironmentContext in project hive by apache.
the class DDLTask method touch.
/**
* Rewrite the partition's metadata and force the pre/post execute hooks to
* be fired.
*
* @param db
* @param touchDesc
* @return
* @throws HiveException
*/
private int touch(Hive db, AlterTableSimpleDesc touchDesc) throws HiveException {
Table tbl = db.getTable(touchDesc.getTableName());
EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
if (touchDesc.getPartSpec() == null) {
try {
db.alterTable(touchDesc.getTableName(), tbl, environmentContext);
} catch (InvalidOperationException e) {
throw new HiveException("Uable to update table");
}
work.getInputs().add(new ReadEntity(tbl));
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
} else {
Partition part = db.getPartition(tbl, touchDesc.getPartSpec(), false);
if (part == null) {
throw new HiveException("Specified partition does not exist");
}
try {
db.alterPartition(touchDesc.getTableName(), part, environmentContext);
} catch (InvalidOperationException e) {
throw new HiveException(e);
}
work.getInputs().add(new ReadEntity(part));
addIfAbsentByName(new WriteEntity(part, WriteEntity.WriteType.DDL_NO_LOCK));
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.EnvironmentContext in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTableProps.
private void analyzeAlterTableProps(String[] qualified, HashMap<String, String> partSpec, ASTNode ast, boolean expectView, boolean isUnset) throws SemanticException {
String tableName = getDotName(qualified);
HashMap<String, String> mapProp = getProps((ASTNode) (ast.getChild(0)).getChild(0));
EnvironmentContext environmentContext = null;
// we need to check if the properties are valid, especially for stats.
// they might be changed via alter table .. update statistics or
// alter table .. set tblproperties. If the property is not row_count
// or raw_data_size, it could not be changed through update statistics
boolean changeStatsSucceeded = false;
for (Entry<String, String> entry : mapProp.entrySet()) {
// wrong.
if (entry.getKey().equals(StatsSetupConst.ROW_COUNT) || entry.getKey().equals(StatsSetupConst.RAW_DATA_SIZE)) {
try {
Long.parseLong(entry.getValue());
changeStatsSucceeded = true;
} catch (Exception e) {
throw new SemanticException("AlterTable " + entry.getKey() + " failed with value " + entry.getValue());
}
} else {
if (queryState.getCommandType().equals(HiveOperation.ALTERTABLE_UPDATETABLESTATS.getOperationName()) || queryState.getCommandType().equals(HiveOperation.ALTERTABLE_UPDATEPARTSTATS.getOperationName())) {
throw new SemanticException("AlterTable UpdateStats " + entry.getKey() + " failed because the only valid keys are " + StatsSetupConst.ROW_COUNT + " and " + StatsSetupConst.RAW_DATA_SIZE);
}
}
if (changeStatsSucceeded) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.USER);
}
}
AlterTableDesc alterTblDesc = null;
if (isUnset == true) {
alterTblDesc = new AlterTableDesc(AlterTableTypes.DROPPROPS, partSpec, expectView);
if (ast.getChild(1) != null) {
alterTblDesc.setDropIfExists(true);
}
} else {
alterTblDesc = new AlterTableDesc(AlterTableTypes.ADDPROPS, partSpec, expectView);
}
alterTblDesc.setProps(mapProp);
alterTblDesc.setEnvironmentContext(environmentContext);
alterTblDesc.setOldName(tableName);
addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf));
}
use of org.apache.hadoop.hive.metastore.api.EnvironmentContext in project hive by apache.
the class StatsTask method aggregateStats.
private int aggregateStats(Hive db) {
StatsAggregator statsAggregator = null;
int ret = 0;
StatsCollectionContext scc = null;
EnvironmentContext environmentContext = null;
try {
// Stats setup:
final Warehouse wh = new Warehouse(conf);
if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
try {
scc = getContext();
statsAggregator = createStatsAggregator(scc, conf);
} catch (HiveException e) {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw e;
}
console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
}
}
List<Partition> partitions = getPartitionsList(db);
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
String tableFullName = table.getDbName() + "." + table.getTableName();
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
// acidTable will not have accurate stats unless it is set through analyze command.
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
// non-partitioned tables:
if (!existStats(parameters) && atomic) {
return 0;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(wh, parameters, tTable.getSd());
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, null);
updateStats(statsAggregator, parameters, prefix, atomic);
}
// write table stats to metastore
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
}
LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
List<Partition> updates = new ArrayList<Partition>();
//Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
final List<Future<Void>> futures = Lists.newLinkedList();
LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
try {
for (final Partition partn : partitions) {
final String partitionName = partn.getName();
final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (!existStats(parameters) && atomic) {
continue;
}
futures.add(pool.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
fileStatusMap.put(partitionName, partfileStatus);
return null;
}
}));
}
pool.shutdown();
for (Future<Void> future : futures) {
future.get();
}
} catch (InterruptedException e) {
LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
//cancel other futures
for (Future future : futures) {
future.cancel(true);
}
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (pool != null) {
pool.shutdownNow();
}
LOG.debug("Finished getting file stats of all partitions");
}
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
//only when the stats exist, it is added to fileStatusMap
if (!fileStatusMap.containsKey(partn.getName())) {
continue;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, partn);
updateStats(statsAggregator, parameters, prefix, atomic);
}
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
updates.add(new Partition(table, tPart));
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
if (!updates.isEmpty()) {
db.alterPartitions(tableFullName, updates, environmentContext);
}
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection(scc);
}
}
// anything else indicates failure
return ret;
}
use of org.apache.hadoop.hive.metastore.api.EnvironmentContext in project hive by apache.
the class StatsNoJobTask method aggregateStats.
private int aggregateStats(ExecutorService threadPool, Hive db) {
int ret = 0;
try {
Collection<Partition> partitions = null;
if (work.getPrunedPartitionList() == null) {
partitions = getPartitionsList();
} else {
partitions = work.getPrunedPartitionList().getPartitions();
}
// non-partitioned table
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
try {
Path dir = new Path(tTable.getSd().getLocation());
long numRows = 0;
long rawDataSize = 0;
long fileSize = 0;
long numFiles = 0;
FileSystem fs = dir.getFileSystem(conf);
FileStatus[] fileList = HiveStatsUtils.getFileStatusRecurse(dir, -1, fs);
boolean statsAvailable = false;
for (FileStatus file : fileList) {
if (!file.isDir()) {
InputFormat<?, ?> inputFormat = ReflectionUtil.newInstance(table.getInputFormatClass(), jc);
InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { table.getDataLocation().toString() });
if (file.getLen() == 0) {
numFiles += 1;
statsAvailable = true;
} else {
org.apache.hadoop.mapred.RecordReader<?, ?> recordReader = inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
StatsProvidingRecordReader statsRR;
if (recordReader instanceof StatsProvidingRecordReader) {
statsRR = (StatsProvidingRecordReader) recordReader;
numRows += statsRR.getStats().getRowCount();
rawDataSize += statsRR.getStats().getRawDataSize();
fileSize += file.getLen();
numFiles += 1;
statsAvailable = true;
}
recordReader.close();
}
}
}
if (statsAvailable) {
parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows));
parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize));
parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize));
parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles));
EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
db.alterTable(tableFullName, new Table(tTable), environmentContext);
String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']';
LOG.debug(msg);
console.printInfo(msg);
} else {
String msg = "Table " + tableFullName + " does not provide stats.";
LOG.debug(msg);
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats for " + tableFullName + ".", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
}
} else {
// Partitioned table
for (Partition partn : partitions) {
threadPool.execute(new StatsCollection(partn));
}
LOG.debug("Stats collection waiting for threadpool to shutdown..");
shutdownAndAwaitTermination(threadPool);
LOG.debug("Stats collection threadpool shutdown successful.");
ret = updatePartitions(db);
}
} catch (Exception e) {
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = -1;
}
}
// anything else indicates failure
return ret;
}
Aggregations