use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class DDLTask method createTableLike.
/**
* Create a new table like an existing table.
*
* @param db
* The database in question.
* @param crtTbl
* This is the table we're creating.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
*/
private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exception {
// Get the existing table
Table oldtbl = db.getTable(crtTbl.getLikeTableName());
Table tbl;
if (oldtbl.getTableType() == TableType.VIRTUAL_VIEW || oldtbl.getTableType() == TableType.MATERIALIZED_VIEW) {
String targetTableName = crtTbl.getTableName();
tbl = db.newTable(targetTableName);
if (crtTbl.getTblProps() != null) {
tbl.getTTable().getParameters().putAll(crtTbl.getTblProps());
}
tbl.setTableType(TableType.MANAGED_TABLE);
if (crtTbl.isExternal()) {
tbl.setProperty("EXTERNAL", "TRUE");
tbl.setTableType(TableType.EXTERNAL_TABLE);
}
tbl.setFields(oldtbl.getCols());
tbl.setPartCols(oldtbl.getPartCols());
if (crtTbl.getDefaultSerName() == null) {
LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName());
tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
// let's validate that the serde exists
validateSerDe(crtTbl.getDefaultSerName());
tbl.setSerializationLib(crtTbl.getDefaultSerName());
}
if (crtTbl.getDefaultSerdeProps() != null) {
Iterator<Entry<String, String>> iter = crtTbl.getDefaultSerdeProps().entrySet().iterator();
while (iter.hasNext()) {
Entry<String, String> m = iter.next();
tbl.setSerdeParam(m.getKey(), m.getValue());
}
}
tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
} else {
tbl = oldtbl;
// find out database name and table name of target table
String targetTableName = crtTbl.getTableName();
String[] names = Utilities.getDbTableName(targetTableName);
tbl.setDbName(names[0]);
tbl.setTableName(names[1]);
// using old table object, hence reset the owner to current user for new table.
tbl.setOwner(SessionState.getUserFromAuthenticator());
if (crtTbl.getLocation() != null) {
tbl.setDataLocation(new Path(crtTbl.getLocation()));
} else {
tbl.unsetDataLocation();
}
Class<? extends Deserializer> serdeClass = oldtbl.getDeserializerClass();
Map<String, String> params = tbl.getParameters();
// We should copy only those table parameters that are specified in the config.
SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
String paramsStr = HiveConf.getVar(conf, HiveConf.ConfVars.DDL_CTL_PARAMETERS_WHITELIST);
Set<String> retainer = new HashSet<String>();
// for non-native table, property storage_handler should be retained
retainer.add(META_TABLE_STORAGE);
if (spec != null && spec.schemaProps() != null) {
retainer.addAll(Arrays.asList(spec.schemaProps()));
}
if (paramsStr != null) {
retainer.addAll(Arrays.asList(paramsStr.split(",")));
}
// Retain Parquet INT96 write zone property to keep Parquet timezone bugfixes.
if (params.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY) != null) {
retainer.add(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY);
}
if (!retainer.isEmpty()) {
params.keySet().retainAll(retainer);
} else {
params.clear();
}
if (crtTbl.getTblProps() != null) {
params.putAll(crtTbl.getTblProps());
}
if (crtTbl.isUserStorageFormat()) {
tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
if (crtTbl.getDefaultSerName() == null) {
LOG.info("Default to LazySimpleSerDe for like table " + crtTbl.getTableName());
tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
// let's validate that the serde exists
validateSerDe(crtTbl.getDefaultSerName());
tbl.setSerializationLib(crtTbl.getDefaultSerName());
}
}
tbl.getTTable().setTemporary(crtTbl.isTemporary());
if (crtTbl.isExternal()) {
tbl.setProperty("EXTERNAL", "TRUE");
tbl.setTableType(TableType.EXTERNAL_TABLE);
} else {
tbl.getParameters().remove("EXTERNAL");
}
}
if (!Utilities.isDefaultNameNode(conf)) {
// If location is specified - ensure that it is a full qualified name
makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName(), conf);
}
if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.TRUE);
}
// create the table
db.createTable(tbl, crtTbl.getIfNotExists());
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class DDLTask method renamePartition.
/**
* Rename a partition in a table
*
* @param db
* Database to rename the partition.
* @param renamePartitionDesc
* rename old Partition to new one.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
*/
private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException {
Table tbl = db.getTable(renamePartitionDesc.getTableName());
LinkedHashMap<String, String> oldPartSpec = renamePartitionDesc.getOldPartSpec();
Partition oldPart = db.getPartition(tbl, oldPartSpec, false);
if (oldPart == null) {
String partName = FileUtils.makePartName(new ArrayList<String>(oldPartSpec.keySet()), new ArrayList<String>(oldPartSpec.values()));
throw new HiveException("Rename partition: source partition [" + partName + "] does not exist.");
}
Partition part = db.getPartition(tbl, oldPartSpec, false);
part.setValues(renamePartitionDesc.getNewPartSpec());
db.renamePartition(tbl, oldPartSpec, part);
Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false);
work.getInputs().add(new ReadEntity(oldPart));
// We've already obtained a lock on the table, don't lock the partition too
addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHook method addOutputs.
private static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
if (sortedOutputs != null) {
for (WriteEntity output : sortedOutputs) {
final Entity entity = output;
if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (addQueryType(op, (WriteEntity) entity)) {
buffer.append(SEP);
buffer.append(((WriteEntity) entity).getWriteType().name());
}
if (ignoreHDFSPathsInQFName && (Type.DFS_DIR.equals(output.getType()) || Type.LOCAL_DIR.equals(output.getType()))) {
LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
} else if (refs.containsKey(output)) {
if (output.getType() == Type.PARTITION || output.getType() == Type.TABLE) {
final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(hiveBridge.hiveClient.getTable(output.getTable().getDbName(), output.getTable().getTableName()));
addDataset(buffer, refs.get(output), createTime.getTime());
} else {
addDataset(buffer, refs.get(output));
}
}
dataSetsProcessed.add(output.getName().toLowerCase());
}
}
}
}
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHook method handleExternalTables.
private void handleExternalTables(final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException {
List<Referenceable> entities = new ArrayList<>();
final WriteEntity hiveEntity = (WriteEntity) getEntityByType(event.getOutputs(), Type.TABLE);
Table hiveTable = hiveEntity == null ? null : hiveEntity.getTable();
//Refresh to get the correct location
if (hiveTable != null) {
hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());
}
if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
LOG.info("Registering external table process {} ", event.getQueryStr());
final String location = lower(hiveTable.getDataLocation().toString());
final ReadEntity dfsEntity = new ReadEntity();
dfsEntity.setTyp(Type.DFS_DIR);
dfsEntity.setD(new Path(location));
SortedMap<ReadEntity, Referenceable> hiveInputsMap = new TreeMap<ReadEntity, Referenceable>(entityComparator) {
{
put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
}
};
SortedMap<WriteEntity, Referenceable> hiveOutputsMap = new TreeMap<WriteEntity, Referenceable>(entityComparator) {
{
put(hiveEntity, tables.get(Type.TABLE));
}
};
SortedSet<ReadEntity> sortedIps = new TreeSet<>(entityComparator);
sortedIps.addAll(hiveInputsMap.keySet());
SortedSet<WriteEntity> sortedOps = new TreeSet<>(entityComparator);
sortedOps.addAll(hiveOutputsMap.keySet());
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, sortedIps, sortedOps, hiveInputsMap, hiveOutputsMap);
entities.addAll(tables.values());
entities.add(processReferenceable);
event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
}
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHook method registerProcess.
private void registerProcess(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
try {
Set<ReadEntity> inputs = event.getInputs();
Set<WriteEntity> outputs = event.getOutputs();
//Even explain CTAS has operation name as CREATETABLE_AS_SELECT
if (inputs.isEmpty() && outputs.isEmpty()) {
LOG.info("Explain statement. Skipping...");
return;
}
if (event.getQueryId() == null) {
LOG.info("Query id/plan is missing for {}", event.getQueryStr());
}
final SortedMap<ReadEntity, Referenceable> source = new TreeMap<>(entityComparator);
final SortedMap<WriteEntity, Referenceable> target = new TreeMap<>(entityComparator);
final Set<String> dataSets = new HashSet<>();
final Set<Referenceable> entities = new LinkedHashSet<>();
boolean isSelectQuery = isSelectQuery(event);
// filter out select queries which do not modify data
if (!isSelectQuery) {
SortedSet<ReadEntity> sortedHiveInputs = new TreeSet<>(entityComparator);
if (event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
SortedSet<WriteEntity> sortedHiveOutputs = new TreeSet<>(entityComparator);
if (event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
for (ReadEntity readEntity : sortedHiveInputs) {
processHiveEntity(dgiBridge, event, readEntity, dataSets, source, entities);
}
for (WriteEntity writeEntity : sortedHiveOutputs) {
processHiveEntity(dgiBridge, event, writeEntity, dataSets, target, entities);
}
if (source.size() > 0 || target.size() > 0) {
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, sortedHiveInputs, sortedHiveOutputs, source, target);
// setup Column Lineage
List<Referenceable> sourceList = new ArrayList<>(source.values());
List<Referenceable> targetList = new ArrayList<>(target.values());
List<Referenceable> colLineageProcessInstances = new ArrayList<>();
try {
Map<String, Referenceable> columnQNameToRef = ColumnLineageUtils.buildColumnReferenceableMap(sourceList, targetList);
colLineageProcessInstances = createColumnLineageProcessInstances(processReferenceable, event.lineageInfo, columnQNameToRef);
} catch (Exception e) {
LOG.warn("Column lineage process setup failed with exception {}", e);
}
colLineageProcessInstances.add(0, processReferenceable);
entities.addAll(colLineageProcessInstances);
event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), new ArrayList<>(entities)));
} else {
LOG.info("Skipped query {} since it has no getInputs() or resulting getOutputs()", event.getQueryStr());
}
} else {
LOG.info("Skipped query {} for processing since it is a select query ", event.getQueryStr());
}
} catch (Exception e) {
throw new AtlasHookException("HiveHook.registerProcess() failed.", e);
}
}
Aggregations