use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class TestHBaseStore method hashSd.
@Test
public void hashSd() throws Exception {
List<FieldSchema> cols = new ArrayList<FieldSchema>();
cols.add(new FieldSchema("col1", "int", ""));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", true, 0, serde, null, null, emptyParameters);
Map<List<String>, String> map = new HashMap<List<String>, String>();
map.put(Arrays.asList("col3"), "col4");
SkewedInfo skew = new SkewedInfo(Arrays.asList("col1"), Arrays.asList(Arrays.asList("col2")), map);
sd.setSkewedInfo(skew);
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] baseHash = HBaseUtils.hashStorageDescriptor(sd, md);
StorageDescriptor changeSchema = new StorageDescriptor(sd);
changeSchema.getCols().add(new FieldSchema("col2", "varchar(32)", "a comment"));
byte[] schemaHash = HBaseUtils.hashStorageDescriptor(changeSchema, md);
Assert.assertFalse(Arrays.equals(baseHash, schemaHash));
StorageDescriptor changeLocation = new StorageDescriptor(sd);
changeLocation.setLocation("file:/somewhere/else");
byte[] locationHash = HBaseUtils.hashStorageDescriptor(changeLocation, md);
Assert.assertArrayEquals(baseHash, locationHash);
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class TestHBaseStore method skewInfo.
@Test
public void skewInfo() throws Exception {
String tableName = "mytable";
int startTime = (int) (System.currentTimeMillis() / 1000);
List<FieldSchema> cols = new ArrayList<FieldSchema>();
cols.add(new FieldSchema("col1", "int", ""));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", true, 0, serde, null, null, emptyParameters);
Map<List<String>, String> map = new HashMap<List<String>, String>();
map.put(Arrays.asList("col3"), "col4");
SkewedInfo skew = new SkewedInfo(Arrays.asList("col1"), Arrays.asList(Arrays.asList("col2")), map);
sd.setSkewedInfo(skew);
Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
store.createTable(table);
Table t = store.getTable("default", tableName);
Assert.assertEquals(1, t.getSd().getColsSize());
Assert.assertEquals("col1", t.getSd().getCols().get(0).getName());
Assert.assertEquals("int", t.getSd().getCols().get(0).getType());
Assert.assertEquals("", t.getSd().getCols().get(0).getComment());
Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName());
Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib());
Assert.assertEquals("file:/tmp", t.getSd().getLocation());
Assert.assertEquals("input", t.getSd().getInputFormat());
Assert.assertEquals("output", t.getSd().getOutputFormat());
Assert.assertTrue(t.getSd().isCompressed());
Assert.assertEquals(0, t.getSd().getNumBuckets());
Assert.assertEquals(0, t.getSd().getSortColsSize());
Assert.assertEquals("me", t.getOwner());
Assert.assertEquals("default", t.getDbName());
Assert.assertEquals(tableName, t.getTableName());
Assert.assertEquals(0, t.getParametersSize());
skew = t.getSd().getSkewedInfo();
Assert.assertNotNull(skew);
Assert.assertEquals(1, skew.getSkewedColNamesSize());
Assert.assertEquals("col1", skew.getSkewedColNames().get(0));
Assert.assertEquals(1, skew.getSkewedColValuesSize());
Assert.assertEquals("col2", skew.getSkewedColValues().get(0).get(0));
Assert.assertEquals(1, skew.getSkewedColValueLocationMapsSize());
Assert.assertEquals("col4", skew.getSkewedColValueLocationMaps().get(Arrays.asList("col3")));
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class DDLTask method alterTableOrSinglePartition.
private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException {
EnvironmentContext environmentContext = alterTbl.getEnvironmentContext();
if (environmentContext == null) {
environmentContext = new EnvironmentContext();
alterTbl.setEnvironmentContext(environmentContext);
}
// do not need update stats in alter table/partition operations
if (environmentContext.getProperties() == null || environmentContext.getProperties().get(StatsSetupConst.DO_NOT_UPDATE_STATS) == null) {
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
}
if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) {
tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName()));
tbl.setTableName(Utilities.getTableName(alterTbl.getNewName()));
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String serializationLib = sd.getSerdeInfo().getSerializationLib();
AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
List<FieldSchema> newCols = alterTbl.getNewCols();
if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
sd.setCols(newCols);
} else {
// make sure the columns does not already exist
Iterator<FieldSchema> iterNewCols = newCols.iterator();
while (iterNewCols.hasNext()) {
FieldSchema newCol = iterNewCols.next();
String newColName = newCol.getName();
Iterator<FieldSchema> iterOldCols = oldCols.iterator();
while (iterOldCols.hasNext()) {
String oldColName = iterOldCols.next().getName();
if (oldColName.equalsIgnoreCase(newColName)) {
throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newColName);
}
}
oldCols.add(newCol);
}
sd.setCols(oldCols);
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String serializationLib = sd.getSerdeInfo().getSerializationLib();
AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
List<FieldSchema> newCols = new ArrayList<FieldSchema>();
Iterator<FieldSchema> iterOldCols = oldCols.iterator();
String oldName = alterTbl.getOldColName();
String newName = alterTbl.getNewColName();
String type = alterTbl.getNewColType();
String comment = alterTbl.getNewColComment();
boolean first = alterTbl.getFirst();
String afterCol = alterTbl.getAfterCol();
// if orc table, restrict reordering columns as it will break schema evolution
boolean isOrcSchemaEvolution = sd.getInputFormat().equals(OrcInputFormat.class.getName()) && isSchemaEvolutionEnabled(tbl);
if (isOrcSchemaEvolution && (first || (afterCol != null && !afterCol.trim().isEmpty()))) {
throw new HiveException(ErrorMsg.CANNOT_REORDER_COLUMNS, alterTbl.getOldName());
}
FieldSchema column = null;
boolean found = false;
int position = -1;
if (first) {
position = 0;
}
int i = 1;
while (iterOldCols.hasNext()) {
FieldSchema col = iterOldCols.next();
String oldColName = col.getName();
if (oldColName.equalsIgnoreCase(newName) && !oldColName.equalsIgnoreCase(oldName)) {
throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName);
} else if (oldColName.equalsIgnoreCase(oldName)) {
col.setName(newName);
if (type != null && !type.trim().equals("")) {
col.setType(type);
}
if (comment != null) {
col.setComment(comment);
}
found = true;
if (first || (afterCol != null && !afterCol.trim().equals(""))) {
column = col;
continue;
}
}
if (afterCol != null && !afterCol.trim().equals("") && oldColName.equalsIgnoreCase(afterCol)) {
position = i;
}
i++;
newCols.add(col);
}
// did not find the column
if (!found) {
throw new HiveException(ErrorMsg.INVALID_COLUMN, oldName);
}
// after column is not null, but we did not find it.
if ((afterCol != null && !afterCol.trim().equals("")) && position < 0) {
throw new HiveException(ErrorMsg.INVALID_COLUMN, afterCol);
}
if (position >= 0) {
newCols.add(position, column);
}
sd.setCols(newCols);
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
// change SerDe to LazySimpleSerDe if it is columnsetSerDe
String serializationLib = sd.getSerdeInfo().getSerializationLib();
if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
} else if (!serializationLib.equals(MetadataTypedColumnsetSerDe.class.getName()) && !serializationLib.equals(LazySimpleSerDe.class.getName()) && !serializationLib.equals(ColumnarSerDe.class.getName()) && !serializationLib.equals(DynamicSerDe.class.getName()) && !serializationLib.equals(ParquetHiveSerDe.class.getName()) && !serializationLib.equals(OrcSerde.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, alterTbl.getOldName());
}
final boolean isOrcSchemaEvolution = serializationLib.equals(OrcSerde.class.getName()) && isSchemaEvolutionEnabled(tbl);
// adding columns and limited integer type promotion is supported for ORC schema evolution
if (isOrcSchemaEvolution) {
final List<FieldSchema> existingCols = sd.getCols();
final List<FieldSchema> replaceCols = alterTbl.getNewCols();
if (replaceCols.size() < existingCols.size()) {
throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
}
}
sd.setCols(alterTbl.getNewCols());
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) {
if (StatsSetupConst.USER.equals(environmentContext.getProperties().get(StatsSetupConst.STATS_GENERATED))) {
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
}
if (part != null) {
part.getTPartition().getParameters().putAll(alterTbl.getProps());
} else {
tbl.getTTable().getParameters().putAll(alterTbl.getProps());
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) {
Iterator<String> keyItr = alterTbl.getProps().keySet().iterator();
if (StatsSetupConst.USER.equals(environmentContext.getProperties().get(StatsSetupConst.STATS_GENERATED))) {
// drop a stats parameter, which triggers recompute stats update automatically
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
}
while (keyItr.hasNext()) {
if (part != null) {
part.getTPartition().getParameters().remove(keyItr.next());
} else {
tbl.getTTable().getParameters().remove(keyItr.next());
}
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String serdeName = alterTbl.getSerdeName();
String oldSerdeName = sd.getSerdeInfo().getSerializationLib();
// if orc table, restrict changing the serde as it can break schema evolution
if (isSchemaEvolutionEnabled(tbl) && oldSerdeName.equalsIgnoreCase(OrcSerde.class.getName()) && !serdeName.equalsIgnoreCase(OrcSerde.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_CHANGE_SERDE, OrcSerde.class.getSimpleName(), alterTbl.getOldName());
}
sd.getSerdeInfo().setSerializationLib(serdeName);
if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) {
sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
}
if (part != null) {
// TODO: wtf? This doesn't do anything.
part.getTPartition().getSd().setCols(part.getTPartition().getSd().getCols());
} else {
if (Table.shouldStoreFieldsInMetastore(conf, serdeName, tbl.getParameters()) && !Table.hasMetastoreBasedSchema(conf, oldSerdeName)) {
// from old SerDe are too long to be stored in metastore, but there's nothing we can do.
try {
Deserializer oldSerde = MetaStoreUtils.getDeserializer(conf, tbl.getTTable(), false, oldSerdeName);
tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), oldSerde));
} catch (MetaException ex) {
throw new HiveException(ex);
}
}
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
// if orc table, restrict changing the file format as it can break schema evolution
if (isSchemaEvolutionEnabled(tbl) && sd.getInputFormat().equals(OrcInputFormat.class.getName()) && !alterTbl.getInputFormat().equals(OrcInputFormat.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_CHANGE_FILEFORMAT, "ORC", alterTbl.getOldName());
}
sd.setInputFormat(alterTbl.getInputFormat());
sd.setOutputFormat(alterTbl.getOutputFormat());
if (alterTbl.getSerdeName() != null) {
sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName());
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
// validate sort columns and bucket columns
List<String> columns = Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
if (!alterTbl.isTurnOffSorting()) {
Utilities.validateColumnNames(columns, alterTbl.getBucketColumns());
}
if (alterTbl.getSortColumns() != null) {
Utilities.validateColumnNames(columns, Utilities.getColumnNamesFromSortCols(alterTbl.getSortColumns()));
}
if (alterTbl.isTurnOffSorting()) {
sd.setSortCols(new ArrayList<Order>());
} else if (alterTbl.getNumberBuckets() == -1) {
// -1 buckets means to turn off bucketing
sd.setBucketCols(new ArrayList<String>());
sd.setNumBuckets(-1);
sd.setSortCols(new ArrayList<Order>());
} else {
sd.setBucketCols(alterTbl.getBucketColumns());
sd.setNumBuckets(alterTbl.getNumberBuckets());
sd.setSortCols(alterTbl.getSortColumns());
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
String newLocation = alterTbl.getNewLocation();
try {
URI locUri = new URI(newLocation);
if (!new Path(locUri).isAbsolute()) {
throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation);
}
sd.setLocation(newLocation);
} catch (URISyntaxException e) {
throw new HiveException(e);
}
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSKEWEDBY) {
// Validation's been done at compile time. no validation is needed here.
List<String> skewedColNames = null;
List<List<String>> skewedValues = null;
if (alterTbl.isTurnOffSkewed()) {
// Convert skewed table to non-skewed table.
skewedColNames = new ArrayList<String>();
skewedValues = new ArrayList<List<String>>();
} else {
skewedColNames = alterTbl.getSkewedColNames();
skewedValues = alterTbl.getSkewedColValues();
}
if (null == tbl.getSkewedInfo()) {
// Convert non-skewed table to skewed table.
SkewedInfo skewedInfo = new SkewedInfo();
skewedInfo.setSkewedColNames(skewedColNames);
skewedInfo.setSkewedColValues(skewedValues);
tbl.setSkewedInfo(skewedInfo);
} else {
tbl.setSkewedColNames(skewedColNames);
tbl.setSkewedColValues(skewedValues);
}
tbl.setStoredAsSubDirectories(alterTbl.isStoredAsSubDirectories());
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERSKEWEDLOCATION) {
// process location one-by-one
Map<List<String>, String> locMaps = alterTbl.getSkewedLocations();
Set<List<String>> keys = locMaps.keySet();
for (List<String> key : keys) {
String newLocation = locMaps.get(key);
try {
URI locUri = new URI(newLocation);
if (part != null) {
List<String> slk = new ArrayList<String>(key);
part.setSkewedValueLocationMap(slk, locUri.toString());
} else {
List<String> slk = new ArrayList<String>(key);
tbl.setSkewedValueLocationMap(slk, locUri.toString());
}
} catch (URISyntaxException e) {
throw new HiveException(e);
}
}
environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
} else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) {
if (part != null) {
if (part.getBucketCount() == alterTbl.getNumberBuckets()) {
return 0;
}
part.setBucketCount(alterTbl.getNumberBuckets());
} else {
if (tbl.getNumBuckets() == alterTbl.getNumberBuckets()) {
return 0;
}
tbl.setNumBuckets(alterTbl.getNumberBuckets());
}
} else {
throw new HiveException(ErrorMsg.UNSUPPORTED_ALTER_TBL_OP, alterTbl.getOp().toString());
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class DDLTask method showCreateTable.
private int showCreateTable(Hive db, DataOutputStream outStream, String tableName) throws HiveException {
final String EXTERNAL = "external";
final String TEMPORARY = "temporary";
final String LIST_COLUMNS = "columns";
final String TBL_COMMENT = "tbl_comment";
final String LIST_PARTITIONS = "partitions";
final String SORT_BUCKET = "sort_bucket";
final String SKEWED_INFO = "tbl_skewedinfo";
final String ROW_FORMAT = "row_format";
final String TBL_LOCATION = "tbl_location";
final String TBL_PROPERTIES = "tbl_properties";
boolean needsLocation = true;
StringBuilder createTab_str = new StringBuilder();
Table tbl = db.getTable(tableName, false);
List<String> duplicateProps = new ArrayList<String>();
try {
needsLocation = doesTableNeedLocation(tbl);
if (tbl.isView()) {
String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + tbl.getViewExpandedText();
outStream.write(createTab_stmt.getBytes(StandardCharsets.UTF_8));
return 0;
}
createTab_str.append("CREATE <" + TEMPORARY + "><" + EXTERNAL + ">TABLE `");
createTab_str.append(tableName + "`(\n");
createTab_str.append("<" + LIST_COLUMNS + ">)\n");
createTab_str.append("<" + TBL_COMMENT + ">\n");
createTab_str.append("<" + LIST_PARTITIONS + ">\n");
createTab_str.append("<" + SORT_BUCKET + ">\n");
createTab_str.append("<" + SKEWED_INFO + ">\n");
createTab_str.append("<" + ROW_FORMAT + ">\n");
if (needsLocation) {
createTab_str.append("LOCATION\n");
createTab_str.append("<" + TBL_LOCATION + ">\n");
}
createTab_str.append("TBLPROPERTIES (\n");
createTab_str.append("<" + TBL_PROPERTIES + ">)\n");
ST createTab_stmt = new ST(createTab_str.toString());
// For cases where the table is temporary
String tbl_temp = "";
if (tbl.isTemporary()) {
duplicateProps.add("TEMPORARY");
tbl_temp = "TEMPORARY ";
}
// For cases where the table is external
String tbl_external = "";
if (tbl.getTableType() == TableType.EXTERNAL_TABLE) {
duplicateProps.add("EXTERNAL");
tbl_external = "EXTERNAL ";
}
// Columns
String tbl_columns = "";
List<FieldSchema> cols = tbl.getCols();
List<String> columns = new ArrayList<String>();
for (FieldSchema col : cols) {
String columnDesc = " `" + col.getName() + "` " + col.getType();
if (col.getComment() != null) {
columnDesc = columnDesc + " COMMENT '" + HiveStringUtils.escapeHiveCommand(col.getComment()) + "'";
}
columns.add(columnDesc);
}
tbl_columns = StringUtils.join(columns, ", \n");
// Table comment
String tbl_comment = "";
String tabComment = tbl.getProperty("comment");
if (tabComment != null) {
duplicateProps.add("comment");
tbl_comment = "COMMENT '" + HiveStringUtils.escapeHiveCommand(tabComment) + "'";
}
// Partitions
String tbl_partitions = "";
List<FieldSchema> partKeys = tbl.getPartitionKeys();
if (partKeys.size() > 0) {
tbl_partitions += "PARTITIONED BY ( \n";
List<String> partCols = new ArrayList<String>();
for (FieldSchema partKey : partKeys) {
String partColDesc = " `" + partKey.getName() + "` " + partKey.getType();
if (partKey.getComment() != null) {
partColDesc = partColDesc + " COMMENT '" + HiveStringUtils.escapeHiveCommand(partKey.getComment()) + "'";
}
partCols.add(partColDesc);
}
tbl_partitions += StringUtils.join(partCols, ", \n");
tbl_partitions += ")";
}
// Clusters (Buckets)
String tbl_sort_bucket = "";
List<String> buckCols = tbl.getBucketCols();
if (buckCols.size() > 0) {
duplicateProps.add("SORTBUCKETCOLSPREFIX");
tbl_sort_bucket += "CLUSTERED BY ( \n ";
tbl_sort_bucket += StringUtils.join(buckCols, ", \n ");
tbl_sort_bucket += ") \n";
List<Order> sortCols = tbl.getSortCols();
if (sortCols.size() > 0) {
tbl_sort_bucket += "SORTED BY ( \n";
// Order
List<String> sortKeys = new ArrayList<String>();
for (Order sortCol : sortCols) {
String sortKeyDesc = " " + sortCol.getCol() + " ";
if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC) {
sortKeyDesc = sortKeyDesc + "ASC";
} else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) {
sortKeyDesc = sortKeyDesc + "DESC";
}
sortKeys.add(sortKeyDesc);
}
tbl_sort_bucket += StringUtils.join(sortKeys, ", \n");
tbl_sort_bucket += ") \n";
}
tbl_sort_bucket += "INTO " + tbl.getNumBuckets() + " BUCKETS";
}
// Skewed Info
StringBuilder tbl_skewedinfo = new StringBuilder();
SkewedInfo skewedInfo = tbl.getSkewedInfo();
if (skewedInfo != null && !skewedInfo.getSkewedColNames().isEmpty()) {
tbl_skewedinfo.append("SKEWED BY (" + StringUtils.join(skewedInfo.getSkewedColNames(), ",") + ")\n");
tbl_skewedinfo.append(" ON (");
List<String> colValueList = new ArrayList<String>();
for (List<String> colValues : skewedInfo.getSkewedColValues()) {
colValueList.add("('" + StringUtils.join(colValues, "','") + "')");
}
tbl_skewedinfo.append(StringUtils.join(colValueList, ",") + ")");
if (tbl.isStoredAsSubDirectories()) {
tbl_skewedinfo.append("\n STORED AS DIRECTORIES");
}
}
// Row format (SerDe)
StringBuilder tbl_row_format = new StringBuilder();
StorageDescriptor sd = tbl.getTTable().getSd();
SerDeInfo serdeInfo = sd.getSerdeInfo();
Map<String, String> serdeParams = serdeInfo.getParameters();
tbl_row_format.append("ROW FORMAT SERDE \n");
tbl_row_format.append(" '" + HiveStringUtils.escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
if (tbl.getStorageHandler() == null) {
// SERDE properties
if (MetaStoreUtils.DEFAULT_SERIALIZATION_FORMAT.equals(serdeParams.get(serdeConstants.SERIALIZATION_FORMAT))) {
serdeParams.remove(serdeConstants.SERIALIZATION_FORMAT);
}
if (!serdeParams.isEmpty()) {
appendSerdeParams(tbl_row_format, serdeParams).append(" \n");
}
tbl_row_format.append("STORED AS INPUTFORMAT \n '" + HiveStringUtils.escapeHiveCommand(sd.getInputFormat()) + "' \n");
tbl_row_format.append("OUTPUTFORMAT \n '" + HiveStringUtils.escapeHiveCommand(sd.getOutputFormat()) + "'");
} else {
duplicateProps.add(META_TABLE_STORAGE);
tbl_row_format.append("STORED BY \n '" + HiveStringUtils.escapeHiveCommand(tbl.getParameters().get(META_TABLE_STORAGE)) + "' \n");
// SerDe Properties
if (!serdeParams.isEmpty()) {
appendSerdeParams(tbl_row_format, serdeInfo.getParameters());
}
}
String tbl_location = " '" + HiveStringUtils.escapeHiveCommand(sd.getLocation()) + "'";
// Table properties
duplicateProps.addAll(Arrays.asList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS));
String tbl_properties = propertiesToString(tbl.getParameters(), duplicateProps);
createTab_stmt.add(TEMPORARY, tbl_temp);
createTab_stmt.add(EXTERNAL, tbl_external);
createTab_stmt.add(LIST_COLUMNS, tbl_columns);
createTab_stmt.add(TBL_COMMENT, tbl_comment);
createTab_stmt.add(LIST_PARTITIONS, tbl_partitions);
createTab_stmt.add(SORT_BUCKET, tbl_sort_bucket);
createTab_stmt.add(SKEWED_INFO, tbl_skewedinfo);
createTab_stmt.add(ROW_FORMAT, tbl_row_format);
// Table location should not be printed with hbase backed tables
if (needsLocation) {
createTab_stmt.add(TBL_LOCATION, tbl_location);
}
createTab_stmt.add(TBL_PROPERTIES, tbl_properties);
outStream.write(createTab_stmt.render().getBytes(StandardCharsets.UTF_8));
} catch (IOException e) {
LOG.info("show create table: " + stringifyException(e));
return 1;
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.
the class Hive method loadPartition.
/**
* Load a directory into a Hive Table Partition - Alters existing content of
* the partition with the contents of loadPath. - If the partition does not
* exist - one is created - files in loadPath are moved into Hive. But the
* directory itself is not removed.
*
* @param loadPath
* Directory containing files to load into Table
* @param tbl
* name of table to be loaded.
* @param partSpec
* defines which partition needs to be loaded
* @param replace
* if true - replace files in the partition, otherwise add files to
* the partition
* @param inheritTableSpecs if true, on [re]creating the partition, take the
* location/inputformat/outputformat/serde details from table spec
* @param isSrcLocal
* If the source directory is LOCAL
* @param isAcid true if this is an ACID operation
*/
public Partition loadPartition(Path loadPath, Table tbl, Map<String, String> partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
Path tblDataLocationPath = tbl.getDataLocation();
try {
Partition oldPart = getPartition(tbl, partSpec, false);
/**
* Move files before creating the partition since down stream processes
* check for existence of partition in metadata before accessing the data.
* If partition is created before data is moved, downstream waiting
* processes might move forward with partial data
*/
Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
Path newPartPath = null;
if (inheritTableSpecs) {
Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec));
newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath());
if (oldPart != null) {
/*
* If we are moving the partition across filesystem boundaries
* inherit from the table properties. Otherwise (same filesystem) use the
* original partition location.
*
* See: HIVE-1707 and HIVE-2117 for background
*/
FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
FileSystem loadPathFS = loadPath.getFileSystem(getConf());
if (FileUtils.equalsFileSystem(oldPartPathFS, loadPathFS)) {
newPartPath = oldPartPath;
}
}
} else {
newPartPath = oldPartPath;
}
List<Path> newFiles = null;
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin("MoveTask", "FileMoves");
if (replace || (oldPart == null && !isAcid)) {
replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(), isSrcLocal);
} else {
if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null) {
newFiles = Collections.synchronizedList(new ArrayList<Path>());
}
FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles);
}
perfLogger.PerfLogEnd("MoveTask", "FileMoves");
Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
validatePartition(newTPart);
if ((null != newFiles) || replace) {
fireInsertEvent(tbl, partSpec, newFiles);
} else {
LOG.debug("No new files were created, and is not a replace. Skipping generating INSERT event.");
}
//column stats will be inaccurate
StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
// recreate the partition if it existed before
if (isSkewedStoreAsSubdir) {
org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
/* Construct list bucketing location mappings from sub-directory name. */
Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(newPartPath, skewedInfo);
/* Add list bucketing location mappings. */
skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
}
if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
}
if (oldPart == null) {
newTPart.getTPartition().setParameters(new HashMap<String, String>());
if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsStateForCreateTable(newTPart.getParameters(), StatsSetupConst.TRUE);
}
MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters());
try {
LOG.debug("Adding new partition " + newTPart.getSpec());
getSychronizedMSC().add_partition(newTPart.getTPartition());
} catch (AlreadyExistsException aee) {
// With multiple users concurrently issuing insert statements on the same partition has
// a side effect that some queries may not see a partition at the time when they're issued,
// but will realize the partition is actually there when it is trying to add such partition
// to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
// For example, imagine such a table is created:
// create table T (name char(50)) partitioned by (ds string);
// and the following two queries are launched at the same time, from different sessions:
// insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
// insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
// In that case, we want to retry with alterPartition.
LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
} else {
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
return newTPart;
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (MetaException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (InvalidOperationException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (TException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
}
}
Aggregations