use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class MetaStoreDirectSql method getPartitionsFromPartitionIds.
/** Should be called with the list short enough to not trip up Oracle/etc. */
private List<Partition> getPartitionsFromPartitionIds(String dbName, String tblName, Boolean isView, List<Object> partIdList) throws MetaException {
boolean doTrace = LOG.isDebugEnabled();
// 1 for comma
int idStringWidth = (int) Math.ceil(Math.log10(partIdList.size())) + 1;
int sbCapacity = partIdList.size() * idStringWidth;
// Prepare StringBuilder for "PART_ID in (...)" to use in future queries.
StringBuilder partSb = new StringBuilder(sbCapacity);
for (Object partitionId : partIdList) {
partSb.append(extractSqlLong(partitionId)).append(",");
}
String partIds = trimCommaList(partSb);
// Get most of the fields for the IDs provided.
// Assume db and table names are the same for all partition, as provided in arguments.
String queryText = "select \"PARTITIONS\".\"PART_ID\", \"SDS\".\"SD_ID\", \"SDS\".\"CD_ID\"," + " \"SERDES\".\"SERDE_ID\", \"PARTITIONS\".\"CREATE_TIME\"," + " \"PARTITIONS\".\"LAST_ACCESS_TIME\", \"SDS\".\"INPUT_FORMAT\", \"SDS\".\"IS_COMPRESSED\"," + " \"SDS\".\"IS_STOREDASSUBDIRECTORIES\", \"SDS\".\"LOCATION\", \"SDS\".\"NUM_BUCKETS\"," + " \"SDS\".\"OUTPUT_FORMAT\", \"SERDES\".\"NAME\", \"SERDES\".\"SLIB\" " + "from \"PARTITIONS\"" + " left outer join \"SDS\" on \"PARTITIONS\".\"SD_ID\" = \"SDS\".\"SD_ID\" " + " left outer join \"SERDES\" on \"SDS\".\"SERDE_ID\" = \"SERDES\".\"SERDE_ID\" " + "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc";
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
List<Object[]> sqlResult = executeWithArray(query, null, queryText);
long queryTime = doTrace ? System.nanoTime() : 0;
Deadline.checkTimeout();
// Read all the fields and create partitions, SDs and serdes.
TreeMap<Long, Partition> partitions = new TreeMap<Long, Partition>();
TreeMap<Long, StorageDescriptor> sds = new TreeMap<Long, StorageDescriptor>();
TreeMap<Long, SerDeInfo> serdes = new TreeMap<Long, SerDeInfo>();
TreeMap<Long, List<FieldSchema>> colss = new TreeMap<Long, List<FieldSchema>>();
// Keep order by name, consistent with JDO.
ArrayList<Partition> orderedResult = new ArrayList<Partition>(partIdList.size());
// Prepare StringBuilder-s for "in (...)" lists to use in one-to-many queries.
StringBuilder sdSb = new StringBuilder(sbCapacity), serdeSb = new StringBuilder(sbCapacity);
// We expect that there's only one field schema.
StringBuilder colsSb = new StringBuilder(7);
tblName = tblName.toLowerCase();
dbName = dbName.toLowerCase();
for (Object[] fields : sqlResult) {
// Here comes the ugly part...
long partitionId = extractSqlLong(fields[0]);
Long sdId = extractSqlLong(fields[1]);
Long colId = extractSqlLong(fields[2]);
Long serdeId = extractSqlLong(fields[3]);
// A partition must have at least sdId and serdeId set, or nothing set if it's a view.
if (sdId == null || serdeId == null) {
if (isView == null) {
isView = isViewTable(dbName, tblName);
}
if ((sdId != null || colId != null || serdeId != null) || !isView) {
throw new MetaException("Unexpected null for one of the IDs, SD " + sdId + ", serde " + serdeId + " for a " + (isView ? "" : "non-") + " view");
}
}
Partition part = new Partition();
orderedResult.add(part);
// Set the collection fields; some code might not check presence before accessing them.
part.setParameters(new HashMap<String, String>());
part.setValues(new ArrayList<String>());
part.setDbName(dbName);
part.setTableName(tblName);
if (fields[4] != null)
part.setCreateTime(extractSqlInt(fields[4]));
if (fields[5] != null)
part.setLastAccessTime(extractSqlInt(fields[5]));
partitions.put(partitionId, part);
// Probably a view.
if (sdId == null)
continue;
assert serdeId != null;
// We assume each partition has an unique SD.
StorageDescriptor sd = new StorageDescriptor();
StorageDescriptor oldSd = sds.put(sdId, sd);
if (oldSd != null) {
throw new MetaException("Partitions reuse SDs; we don't expect that");
}
// Set the collection fields; some code might not check presence before accessing them.
sd.setSortCols(new ArrayList<Order>());
sd.setBucketCols(new ArrayList<String>());
sd.setParameters(new HashMap<String, String>());
sd.setSkewedInfo(new SkewedInfo(new ArrayList<String>(), new ArrayList<List<String>>(), new HashMap<List<String>, String>()));
sd.setInputFormat((String) fields[6]);
Boolean tmpBoolean = extractSqlBoolean(fields[7]);
if (tmpBoolean != null)
sd.setCompressed(tmpBoolean);
tmpBoolean = extractSqlBoolean(fields[8]);
if (tmpBoolean != null)
sd.setStoredAsSubDirectories(tmpBoolean);
sd.setLocation((String) fields[9]);
if (fields[10] != null)
sd.setNumBuckets(extractSqlInt(fields[10]));
sd.setOutputFormat((String) fields[11]);
sdSb.append(sdId).append(",");
part.setSd(sd);
if (colId != null) {
List<FieldSchema> cols = colss.get(colId);
// We expect that colId will be the same for all (or many) SDs.
if (cols == null) {
cols = new ArrayList<FieldSchema>();
colss.put(colId, cols);
colsSb.append(colId).append(",");
}
sd.setCols(cols);
}
// We assume each SD has an unique serde.
SerDeInfo serde = new SerDeInfo();
SerDeInfo oldSerde = serdes.put(serdeId, serde);
if (oldSerde != null) {
throw new MetaException("SDs reuse serdes; we don't expect that");
}
serde.setParameters(new HashMap<String, String>());
serde.setName((String) fields[12]);
serde.setSerializationLib((String) fields[13]);
serdeSb.append(serdeId).append(",");
sd.setSerdeInfo(serde);
Deadline.checkTimeout();
}
query.closeAll();
timingTrace(doTrace, queryText, start, queryTime);
// Now get all the one-to-many things. Start with partitions.
queryText = "select \"PART_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from \"PARTITION_PARAMS\"" + " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null" + " order by \"PART_ID\" asc";
loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
@Override
public void apply(Partition t, Object[] fields) {
t.putToParameters((String) fields[1], (String) fields[2]);
}
});
// Perform conversion of null map values
for (Partition t : partitions.values()) {
t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings));
}
queryText = "select \"PART_ID\", \"PART_KEY_VAL\" from \"PARTITION_KEY_VALS\"" + " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
@Override
public void apply(Partition t, Object[] fields) {
t.addToValues((String) fields[1]);
}
});
// Prepare IN (blah) lists for the following queries. Cut off the final ','s.
if (sdSb.length() == 0) {
assert serdeSb.length() == 0 && colsSb.length() == 0;
// No SDs, probably a view.
return orderedResult;
}
String sdIds = trimCommaList(sdSb);
String serdeIds = trimCommaList(serdeSb);
String colIds = trimCommaList(colsSb);
// Get all the stuff for SD. Don't do empty-list check - we expect partitions do have SDs.
queryText = "select \"SD_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from \"SD_PARAMS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null" + " order by \"SD_ID\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
@Override
public void apply(StorageDescriptor t, Object[] fields) {
t.putToParameters((String) fields[1], (String) fields[2]);
}
});
// Perform conversion of null map values
for (StorageDescriptor t : sds.values()) {
t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings));
}
queryText = "select \"SD_ID\", \"COLUMN_NAME\", \"SORT_COLS\".\"ORDER\"" + " from \"SORT_COLS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
@Override
public void apply(StorageDescriptor t, Object[] fields) {
if (fields[2] == null)
return;
t.addToSortCols(new Order((String) fields[1], extractSqlInt(fields[2])));
}
});
queryText = "select \"SD_ID\", \"BUCKET_COL_NAME\" from \"BUCKETING_COLS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
@Override
public void apply(StorageDescriptor t, Object[] fields) {
t.addToBucketCols((String) fields[1]);
}
});
// Skewed columns stuff.
queryText = "select \"SD_ID\", \"SKEWED_COL_NAME\" from \"SKEWED_COL_NAMES\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
boolean hasSkewedColumns = loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
@Override
public void apply(StorageDescriptor t, Object[] fields) {
if (!t.isSetSkewedInfo())
t.setSkewedInfo(new SkewedInfo());
t.getSkewedInfo().addToSkewedColNames((String) fields[1]);
}
}) > 0;
// Assume we don't need to fetch the rest of the skewed column data if we have no columns.
if (hasSkewedColumns) {
// We are skipping the SKEWED_STRING_LIST table here, as it seems to be totally useless.
queryText = "select \"SKEWED_VALUES\".\"SD_ID_OID\"," + " \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\"," + " \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_VALUE\" " + "from \"SKEWED_VALUES\" " + " left outer join \"SKEWED_STRING_LIST_VALUES\" on \"SKEWED_VALUES\"." + "\"STRING_LIST_ID_EID\" = \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\" " + "where \"SKEWED_VALUES\".\"SD_ID_OID\" in (" + sdIds + ") " + " and \"SKEWED_VALUES\".\"STRING_LIST_ID_EID\" is not null " + " and \"SKEWED_VALUES\".\"INTEGER_IDX\" >= 0 " + "order by \"SKEWED_VALUES\".\"SD_ID_OID\" asc, \"SKEWED_VALUES\".\"INTEGER_IDX\" asc," + " \"SKEWED_STRING_LIST_VALUES\".\"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
private Long currentListId;
private List<String> currentList;
@Override
public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
if (!t.isSetSkewedInfo())
t.setSkewedInfo(new SkewedInfo());
// the last list. Instead we add list to SD first, as well as locally to add elements.
if (fields[1] == null) {
// left outer join produced a list with no values
currentList = null;
currentListId = null;
t.getSkewedInfo().addToSkewedColValues(new ArrayList<String>());
} else {
long fieldsListId = extractSqlLong(fields[1]);
if (currentListId == null || fieldsListId != currentListId) {
currentList = new ArrayList<String>();
currentListId = fieldsListId;
t.getSkewedInfo().addToSkewedColValues(currentList);
}
currentList.add((String) fields[2]);
}
}
});
// We are skipping the SKEWED_STRING_LIST table here, as it seems to be totally useless.
queryText = "select \"SKEWED_COL_VALUE_LOC_MAP\".\"SD_ID\"," + " \"SKEWED_STRING_LIST_VALUES\".STRING_LIST_ID," + " \"SKEWED_COL_VALUE_LOC_MAP\".\"LOCATION\"," + " \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_VALUE\" " + "from \"SKEWED_COL_VALUE_LOC_MAP\"" + " left outer join \"SKEWED_STRING_LIST_VALUES\" on \"SKEWED_COL_VALUE_LOC_MAP\"." + "\"STRING_LIST_ID_KID\" = \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\" " + "where \"SKEWED_COL_VALUE_LOC_MAP\".\"SD_ID\" in (" + sdIds + ")" + " and \"SKEWED_COL_VALUE_LOC_MAP\".\"STRING_LIST_ID_KID\" is not null " + "order by \"SKEWED_COL_VALUE_LOC_MAP\".\"SD_ID\" asc," + " \"SKEWED_STRING_LIST_VALUES\".\"STRING_LIST_ID\" asc," + " \"SKEWED_STRING_LIST_VALUES\".\"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
private Long currentListId;
private List<String> currentList;
@Override
public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
if (!t.isSetSkewedInfo()) {
SkewedInfo skewedInfo = new SkewedInfo();
skewedInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
t.setSkewedInfo(skewedInfo);
}
Map<List<String>, String> skewMap = t.getSkewedInfo().getSkewedColValueLocationMaps();
// the last list. Instead we add list to SD first, as well as locally to add elements.
if (fields[1] == null) {
// left outer join produced a list with no values
currentList = new ArrayList<String>();
currentListId = null;
} else {
long fieldsListId = extractSqlLong(fields[1]);
if (currentListId == null || fieldsListId != currentListId) {
currentList = new ArrayList<String>();
currentListId = fieldsListId;
} else {
// value based compare.. remove first
skewMap.remove(currentList);
}
currentList.add((String) fields[3]);
}
skewMap.put(currentList, (String) fields[2]);
}
});
}
// Get FieldSchema stuff if any.
if (!colss.isEmpty()) {
// We are skipping the CDS table here, as it seems to be totally useless.
queryText = "select \"CD_ID\", \"COMMENT\", \"COLUMN_NAME\", \"TYPE_NAME\"" + " from \"COLUMNS_V2\" where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>() {
@Override
public void apply(List<FieldSchema> t, Object[] fields) {
t.add(new FieldSchema((String) fields[2], (String) fields[3], (String) fields[1]));
}
});
}
// Finally, get all the stuff for serdes - just the params.
queryText = "select \"SERDE_ID\", \"PARAM_KEY\", \"PARAM_VALUE\" from \"SERDE_PARAMS\"" + " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null" + " order by \"SERDE_ID\" asc";
loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() {
@Override
public void apply(SerDeInfo t, Object[] fields) {
t.putToParameters((String) fields[1], (String) fields[2]);
}
});
// Perform conversion of null map values
for (SerDeInfo t : serdes.values()) {
t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings));
}
return orderedResult;
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class DDLSemanticAnalyzer method analyzeAlterTableRenameCol.
private void analyzeAlterTableRenameCol(String[] qualified, ASTNode ast, HashMap<String, String> partSpec) throws SemanticException {
String newComment = null;
boolean first = false;
String flagCol = null;
boolean isCascade = false;
//col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT]
String oldColName = ast.getChild(0).getText();
String newColName = ast.getChild(1).getText();
String newType = getTypeStringFromAST((ASTNode) ast.getChild(2));
int childCount = ast.getChildCount();
for (int i = 3; i < childCount; i++) {
ASTNode child = (ASTNode) ast.getChild(i);
switch(child.getToken().getType()) {
case HiveParser.StringLiteral:
newComment = unescapeSQLString(child.getText());
break;
case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION:
flagCol = unescapeIdentifier(child.getChild(0).getText());
break;
case HiveParser.KW_FIRST:
first = true;
break;
case HiveParser.TOK_CASCADE:
isCascade = true;
break;
case HiveParser.TOK_RESTRICT:
break;
default:
throw new SemanticException("Unsupported token: " + child.getToken() + " for alter table");
}
}
/* Validate the operation of renaming a column name. */
Table tab = getTable(qualified);
SkewedInfo skewInfo = tab.getTTable().getSd().getSkewedInfo();
if ((null != skewInfo) && (null != skewInfo.getSkewedColNames()) && skewInfo.getSkewedColNames().contains(oldColName)) {
throw new SemanticException(oldColName + ErrorMsg.ALTER_TABLE_NOT_ALLOWED_RENAME_SKEWED_COLUMN.getMsg());
}
String tblName = getDotName(qualified);
AlterTableDesc alterTblDesc = new AlterTableDesc(tblName, partSpec, unescapeIdentifier(oldColName), unescapeIdentifier(newColName), newType, newComment, first, flagCol, isCascade);
addInputsOutputsAlterTable(tblName, partSpec, alterTblDesc);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf));
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class HBaseUtils method serializeStorageDescriptor.
/**
* Serialize a storage descriptor.
* @param sd storage descriptor to serialize
* @return serialized storage descriptor.
*/
static byte[] serializeStorageDescriptor(StorageDescriptor sd) {
HbaseMetastoreProto.StorageDescriptor.Builder builder = HbaseMetastoreProto.StorageDescriptor.newBuilder();
builder.addAllCols(convertFieldSchemaListToProto(sd.getCols()));
if (sd.getInputFormat() != null) {
builder.setInputFormat(sd.getInputFormat());
}
if (sd.getOutputFormat() != null) {
builder.setOutputFormat(sd.getOutputFormat());
}
builder.setIsCompressed(sd.isCompressed());
builder.setNumBuckets(sd.getNumBuckets());
if (sd.getSerdeInfo() != null) {
HbaseMetastoreProto.StorageDescriptor.SerDeInfo.Builder serdeBuilder = HbaseMetastoreProto.StorageDescriptor.SerDeInfo.newBuilder();
SerDeInfo serde = sd.getSerdeInfo();
if (serde.getName() != null) {
serdeBuilder.setName(serde.getName());
}
if (serde.getSerializationLib() != null) {
serdeBuilder.setSerializationLib(serde.getSerializationLib());
}
if (serde.getParameters() != null) {
serdeBuilder.setParameters(buildParameters(serde.getParameters()));
}
builder.setSerdeInfo(serdeBuilder);
}
if (sd.getBucketCols() != null) {
builder.addAllBucketCols(sd.getBucketCols());
}
if (sd.getSortCols() != null) {
List<Order> orders = sd.getSortCols();
List<HbaseMetastoreProto.StorageDescriptor.Order> protoList = new ArrayList<>(orders.size());
for (Order order : orders) {
protoList.add(HbaseMetastoreProto.StorageDescriptor.Order.newBuilder().setColumnName(order.getCol()).setOrder(order.getOrder()).build());
}
builder.addAllSortCols(protoList);
}
if (sd.getSkewedInfo() != null) {
HbaseMetastoreProto.StorageDescriptor.SkewedInfo.Builder skewBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.newBuilder();
SkewedInfo skewed = sd.getSkewedInfo();
if (skewed.getSkewedColNames() != null) {
skewBuilder.addAllSkewedColNames(skewed.getSkewedColNames());
}
if (skewed.getSkewedColValues() != null) {
for (List<String> innerList : skewed.getSkewedColValues()) {
HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.Builder listBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueList.newBuilder();
listBuilder.addAllSkewedColValue(innerList);
skewBuilder.addSkewedColValues(listBuilder);
}
}
if (skewed.getSkewedColValueLocationMaps() != null) {
for (Map.Entry<List<String>, String> e : skewed.getSkewedColValueLocationMaps().entrySet()) {
HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.Builder mapBuilder = HbaseMetastoreProto.StorageDescriptor.SkewedInfo.SkewedColValueLocationMap.newBuilder();
mapBuilder.addAllKey(e.getKey());
mapBuilder.setValue(e.getValue());
skewBuilder.addSkewedColValueLocationMaps(mapBuilder);
}
}
builder.setSkewedInfo(skewBuilder);
}
builder.setStoredAsSubDirectories(sd.isStoredAsSubDirectories());
return builder.build().toByteArray();
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class Table method getEmptyTable.
/**
* Initialize an empty table.
*/
public static org.apache.hadoop.hive.metastore.api.Table getEmptyTable(String databaseName, String tableName) {
StorageDescriptor sd = new StorageDescriptor();
{
sd.setSerdeInfo(new SerDeInfo());
sd.setNumBuckets(-1);
sd.setBucketCols(new ArrayList<String>());
sd.setCols(new ArrayList<FieldSchema>());
sd.setParameters(new HashMap<String, String>());
sd.setSortCols(new ArrayList<Order>());
sd.getSerdeInfo().setParameters(new HashMap<String, String>());
// We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does
// not support a table with no columns.
sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName());
sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
sd.setInputFormat(SequenceFileInputFormat.class.getName());
sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName());
SkewedInfo skewInfo = new SkewedInfo();
skewInfo.setSkewedColNames(new ArrayList<String>());
skewInfo.setSkewedColValues(new ArrayList<List<String>>());
skewInfo.setSkewedColValueLocationMaps(new HashMap<List<String>, String>());
sd.setSkewedInfo(skewInfo);
}
org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table();
{
t.setSd(sd);
t.setPartitionKeys(new ArrayList<FieldSchema>());
t.setParameters(new HashMap<String, String>());
t.setTableType(TableType.MANAGED_TABLE.toString());
t.setDbName(databaseName);
t.setTableName(tableName);
t.setOwner(SessionState.getUserFromAuthenticator());
// set create time
t.setCreateTime((int) (System.currentTimeMillis() / 1000));
}
return t;
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class Hive method loadTable.
/**
* Load a directory into a Hive Table. - Alters existing content of table with
* the contents of loadPath. - If table does not exist - an exception is
* thrown - files in loadPath are moved into Hive. But the directory itself is
* not removed.
*
* @param loadPath
* Directory containing files to load into Table
* @param tableName
* name of table to be loaded.
* @param replace
* if true - replace files in the table, otherwise add files to table
* @param isSrcLocal
* If the source directory is LOCAL
* @param isSkewedStoreAsSubdir
* if list bucketing enabled
* @param hasFollowingStatsTask
* if there is any following stats task
* @param isAcid true if this is an ACID based write
*/
public void loadTable(Path loadPath, String tableName, boolean replace, boolean isSrcLocal, boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
List<Path> newFiles = null;
Table tbl = getTable(tableName);
HiveConf sessionConf = SessionState.getSessionConf();
if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) {
newFiles = Collections.synchronizedList(new ArrayList<Path>());
}
if (replace) {
Path tableDest = tbl.getPath();
replaceFiles(tableDest, loadPath, tableDest, tableDest, sessionConf, isSrcLocal);
} else {
FileSystem fs;
try {
fs = tbl.getDataLocation().getFileSystem(sessionConf);
copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles);
} catch (IOException e) {
throw new HiveException("addFiles: filesystem error in check phase", e);
}
}
if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE);
}
//column stats will be inaccurate
StatsSetupConst.clearColumnStatsState(tbl.getParameters());
try {
if (isSkewedStoreAsSubdir) {
SkewedInfo skewedInfo = tbl.getSkewedInfo();
// Construct list bucketing location mappings from sub-directory name.
Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(tbl.getPath(), skewedInfo);
// Add list bucketing location mappings.
skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
}
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
}
EnvironmentContext environmentContext = null;
if (hasFollowingStatsTask) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
}
try {
alterTable(tableName, tbl, environmentContext);
} catch (InvalidOperationException e) {
throw new HiveException(e);
}
fireInsertEvent(tbl, null, newFiles);
}
Aggregations