use of org.apache.hadoop.hive.metastore.api.MetaException in project hive by apache.
the class MetaStoreUtils method getFieldsFromDeserializer.
/**
* @param tableName
* @param deserializer
* @return the list of fields
* @throws SerDeException
* @throws MetaException
*/
public static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
ObjectInspector oi = deserializer.getObjectInspector();
String[] names = tableName.split("\\.");
String last_name = names[names.length - 1];
for (int i = 1; i < names.length; i++) {
if (oi instanceof StructObjectInspector) {
StructObjectInspector soi = (StructObjectInspector) oi;
StructField sf = soi.getStructFieldRef(names[i]);
if (sf == null) {
throw new MetaException("Invalid Field " + names[i]);
} else {
oi = sf.getFieldObjectInspector();
}
} else if (oi instanceof ListObjectInspector && names[i].equalsIgnoreCase("$elem$")) {
ListObjectInspector loi = (ListObjectInspector) oi;
oi = loi.getListElementObjectInspector();
} else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$key$")) {
MapObjectInspector moi = (MapObjectInspector) oi;
oi = moi.getMapKeyObjectInspector();
} else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$value$")) {
MapObjectInspector moi = (MapObjectInspector) oi;
oi = moi.getMapValueObjectInspector();
} else {
throw new MetaException("Unknown type for " + names[i]);
}
}
ArrayList<FieldSchema> str_fields = new ArrayList<FieldSchema>();
// rules on how to recurse the ObjectInspector based on its type
if (oi.getCategory() != Category.STRUCT) {
str_fields.add(new FieldSchema(last_name, oi.getTypeName(), FROM_SERIALIZER));
} else {
List<? extends StructField> fields = ((StructObjectInspector) oi).getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
StructField structField = fields.get(i);
String fieldName = structField.getFieldName();
String fieldTypeName = structField.getFieldObjectInspector().getTypeName();
String fieldComment = determineFieldComment(structField.getFieldComment());
str_fields.add(new FieldSchema(fieldName, fieldTypeName, fieldComment));
}
}
return str_fields;
}
use of org.apache.hadoop.hive.metastore.api.MetaException in project hive by apache.
the class DruidStorageHandler method commitCreateTable.
@Override
public void commitCreateTable(Table table) throws MetaException {
if (MetaStoreUtils.isExternalTable(table)) {
return;
}
Lifecycle lifecycle = new Lifecycle();
LOG.info(String.format("Committing table [%s] to the druid metastore", table.getDbName()));
final Path tableDir = getSegmentDescriptorDir();
try {
List<DataSegment> segmentList = DruidStorageHandlerUtils.getPublishedSegments(tableDir, getConf());
LOG.info(String.format("Found [%d] segments under path [%s]", segmentList.size(), tableDir));
druidSqlMetadataStorageUpdaterJobHandler.publishSegments(druidMetadataStorageTablesConfig.getSegmentsTable(), segmentList, DruidStorageHandlerUtils.JSON_MAPPER);
final String coordinatorAddress = HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_COORDINATOR_DEFAULT_ADDRESS);
int maxTries = HiveConf.getIntVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_MAX_TRIES);
final String dataSourceName = table.getParameters().get(Constants.DRUID_DATA_SOURCE);
LOG.info(String.format("checking load status from coordinator [%s]", coordinatorAddress));
// check if the coordinator is up
httpClient = makeHttpClient(lifecycle);
try {
lifecycle.start();
} catch (Exception e) {
Throwables.propagate(e);
}
String coordinatorResponse = null;
try {
coordinatorResponse = RetryUtils.retry(new Callable<String>() {
@Override
public String call() throws Exception {
return DruidStorageHandlerUtils.getURL(httpClient, new URL(String.format("http://%s/status", coordinatorAddress)));
}
}, new Predicate<Throwable>() {
@Override
public boolean apply(@Nullable Throwable input) {
return input instanceof IOException;
}
}, maxTries);
} catch (Exception e) {
console.printInfo("Will skip waiting for data loading");
return;
}
if (Strings.isNullOrEmpty(coordinatorResponse)) {
console.printInfo("Will skip waiting for data loading");
return;
}
console.printInfo(String.format("Waiting for the loading of [%s] segments", segmentList.size()));
long passiveWaitTimeMs = HiveConf.getLongVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_PASSIVE_WAIT_TIME);
ImmutableSet<URL> setOfUrls = FluentIterable.from(segmentList).transform(new Function<DataSegment, URL>() {
@Override
public URL apply(DataSegment dataSegment) {
try {
//Need to make sure that we are using UTC since most of the druid cluster use UTC by default
return new URL(String.format("http://%s/druid/coordinator/v1/datasources/%s/segments/%s", coordinatorAddress, dataSourceName, DataSegment.makeDataSegmentIdentifier(dataSegment.getDataSource(), new DateTime(dataSegment.getInterval().getStartMillis(), DateTimeZone.UTC), new DateTime(dataSegment.getInterval().getEndMillis(), DateTimeZone.UTC), dataSegment.getVersion(), dataSegment.getShardSpec())));
} catch (MalformedURLException e) {
Throwables.propagate(e);
}
return null;
}
}).toSet();
int numRetries = 0;
while (numRetries++ < maxTries && !setOfUrls.isEmpty()) {
setOfUrls = ImmutableSet.copyOf(Sets.filter(setOfUrls, new Predicate<URL>() {
@Override
public boolean apply(URL input) {
try {
String result = DruidStorageHandlerUtils.getURL(httpClient, input);
LOG.debug(String.format("Checking segment [%s] response is [%s]", input, result));
return Strings.isNullOrEmpty(result);
} catch (IOException e) {
LOG.error(String.format("Error while checking URL [%s]", input), e);
return true;
}
}
}));
try {
if (!setOfUrls.isEmpty()) {
Thread.sleep(passiveWaitTimeMs);
}
} catch (InterruptedException e) {
Thread.interrupted();
Throwables.propagate(e);
}
}
if (!setOfUrls.isEmpty()) {
// We are not Throwing an exception since it might be a transient issue that is blocking loading
console.printError(String.format("Wait time exhausted and we have [%s] out of [%s] segments not loaded yet", setOfUrls.size(), segmentList.size()));
}
} catch (IOException e) {
LOG.error("Exception while commit", e);
Throwables.propagate(e);
} finally {
cleanWorkingDir();
lifecycle.stop();
}
}
use of org.apache.hadoop.hive.metastore.api.MetaException in project hive by apache.
the class Hive method loadPartition.
/**
* Load a directory into a Hive Table Partition - Alters existing content of
* the partition with the contents of loadPath. - If the partition does not
* exist - one is created - files in loadPath are moved into Hive. But the
* directory itself is not removed.
*
* @param loadPath
* Directory containing files to load into Table
* @param tbl
* name of table to be loaded.
* @param partSpec
* defines which partition needs to be loaded
* @param replace
* if true - replace files in the partition, otherwise add files to
* the partition
* @param inheritTableSpecs if true, on [re]creating the partition, take the
* location/inputformat/outputformat/serde details from table spec
* @param isSrcLocal
* If the source directory is LOCAL
* @param isAcid true if this is an ACID operation
*/
public Partition loadPartition(Path loadPath, Table tbl, Map<String, String> partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
Path tblDataLocationPath = tbl.getDataLocation();
try {
Partition oldPart = getPartition(tbl, partSpec, false);
/**
* Move files before creating the partition since down stream processes
* check for existence of partition in metadata before accessing the data.
* If partition is created before data is moved, downstream waiting
* processes might move forward with partial data
*/
Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
Path newPartPath = null;
if (inheritTableSpecs) {
Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec));
newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath());
if (oldPart != null) {
/*
* If we are moving the partition across filesystem boundaries
* inherit from the table properties. Otherwise (same filesystem) use the
* original partition location.
*
* See: HIVE-1707 and HIVE-2117 for background
*/
FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
FileSystem loadPathFS = loadPath.getFileSystem(getConf());
if (FileUtils.equalsFileSystem(oldPartPathFS, loadPathFS)) {
newPartPath = oldPartPath;
}
}
} else {
newPartPath = oldPartPath;
}
List<Path> newFiles = null;
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin("MoveTask", "FileMoves");
if (replace || (oldPart == null && !isAcid)) {
replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(), isSrcLocal);
} else {
if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null) {
newFiles = Collections.synchronizedList(new ArrayList<Path>());
}
FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles);
}
perfLogger.PerfLogEnd("MoveTask", "FileMoves");
Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
validatePartition(newTPart);
if ((null != newFiles) || replace) {
fireInsertEvent(tbl, partSpec, newFiles);
} else {
LOG.debug("No new files were created, and is not a replace. Skipping generating INSERT event.");
}
//column stats will be inaccurate
StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
// recreate the partition if it existed before
if (isSkewedStoreAsSubdir) {
org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
/* Construct list bucketing location mappings from sub-directory name. */
Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(newPartPath, skewedInfo);
/* Add list bucketing location mappings. */
skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
}
if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
}
if (oldPart == null) {
newTPart.getTPartition().setParameters(new HashMap<String, String>());
if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsStateForCreateTable(newTPart.getParameters(), StatsSetupConst.TRUE);
}
MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters());
try {
LOG.debug("Adding new partition " + newTPart.getSpec());
getSychronizedMSC().add_partition(newTPart.getTPartition());
} catch (AlreadyExistsException aee) {
// With multiple users concurrently issuing insert statements on the same partition has
// a side effect that some queries may not see a partition at the time when they're issued,
// but will realize the partition is actually there when it is trying to add such partition
// to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
// For example, imagine such a table is created:
// create table T (name char(50)) partitioned by (ds string);
// and the following two queries are launched at the same time, from different sessions:
// insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
// insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
// In that case, we want to retry with alterPartition.
LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
} else {
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
return newTPart;
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (MetaException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (InvalidOperationException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (TException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.metastore.api.MetaException in project hive by apache.
the class GenMapRedUtils method getInputPathsForPartialScan.
public static List<Path> getInputPathsForPartialScan(TableScanOperator tableScanOp, Appendable aggregationKey) throws SemanticException {
List<Path> inputPaths = new ArrayList<Path>();
switch(tableScanOp.getConf().getTableMetadata().getTableSpec().specType) {
case TABLE_ONLY:
inputPaths.add(tableScanOp.getConf().getTableMetadata().getTableSpec().tableHandle.getPath());
break;
case STATIC_PARTITION:
Partition part = tableScanOp.getConf().getTableMetadata().getTableSpec().partHandle;
try {
aggregationKey.append(Warehouse.makePartPath(part.getSpec()));
} catch (MetaException e) {
throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg(part.getDataLocation().toString() + e.getMessage()));
} catch (IOException e) {
throw new RuntimeException(e);
}
inputPaths.add(part.getDataLocation());
break;
default:
assert false;
}
return inputPaths;
}
use of org.apache.hadoop.hive.metastore.api.MetaException in project hive by apache.
the class BinaryColumnStatsAggregator method aggregate.
@Override
public ColumnStatisticsObj aggregate(String colName, List<String> partNames, List<ColumnStatistics> css) throws MetaException {
ColumnStatisticsObj statsObj = null;
BinaryColumnStatsData aggregateData = null;
String colType = null;
for (ColumnStatistics cs : css) {
if (cs.getStatsObjSize() != 1) {
throw new MetaException("The number of columns should be exactly one in aggrStats, but found " + cs.getStatsObjSize());
}
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
if (statsObj == null) {
colType = cso.getColType();
statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField());
}
BinaryColumnStatsData newData = cso.getStatsData().getBinaryStats();
if (aggregateData == null) {
aggregateData = newData.deepCopy();
} else {
aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
}
}
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
columnStatisticsData.setBinaryStats(aggregateData);
statsObj.setStatsData(columnStatisticsData);
return statsObj;
}
Aggregations