Search in sources :

Example 66 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLTask method createTableLike.

/**
   * Create a new table like an existing table.
   *
   * @param db
   *          The database in question.
   * @param crtTbl
   *          This is the table we're creating.
   * @return Returns 0 when execution succeeds and above 0 if it fails.
   * @throws HiveException
   *           Throws this exception if an unexpected error occurs.
   */
private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exception {
    // Get the existing table
    Table oldtbl = db.getTable(crtTbl.getLikeTableName());
    Table tbl;
    if (oldtbl.getTableType() == TableType.VIRTUAL_VIEW || oldtbl.getTableType() == TableType.MATERIALIZED_VIEW) {
        String targetTableName = crtTbl.getTableName();
        tbl = db.newTable(targetTableName);
        if (crtTbl.getTblProps() != null) {
            tbl.getTTable().getParameters().putAll(crtTbl.getTblProps());
        }
        tbl.setTableType(TableType.MANAGED_TABLE);
        if (crtTbl.isExternal()) {
            tbl.setProperty("EXTERNAL", "TRUE");
            tbl.setTableType(TableType.EXTERNAL_TABLE);
        }
        tbl.setFields(oldtbl.getCols());
        tbl.setPartCols(oldtbl.getPartCols());
        if (crtTbl.getDefaultSerName() == null) {
            LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName());
            tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
        } else {
            // let's validate that the serde exists
            validateSerDe(crtTbl.getDefaultSerName());
            tbl.setSerializationLib(crtTbl.getDefaultSerName());
        }
        if (crtTbl.getDefaultSerdeProps() != null) {
            Iterator<Entry<String, String>> iter = crtTbl.getDefaultSerdeProps().entrySet().iterator();
            while (iter.hasNext()) {
                Entry<String, String> m = iter.next();
                tbl.setSerdeParam(m.getKey(), m.getValue());
            }
        }
        tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
        tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
        tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
        tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
    } else {
        tbl = oldtbl;
        // find out database name and table name of target table
        String targetTableName = crtTbl.getTableName();
        String[] names = Utilities.getDbTableName(targetTableName);
        tbl.setDbName(names[0]);
        tbl.setTableName(names[1]);
        // using old table object, hence reset the owner to current user for new table.
        tbl.setOwner(SessionState.getUserFromAuthenticator());
        if (crtTbl.getLocation() != null) {
            tbl.setDataLocation(new Path(crtTbl.getLocation()));
        } else {
            tbl.unsetDataLocation();
        }
        Class<? extends Deserializer> serdeClass = oldtbl.getDeserializerClass();
        Map<String, String> params = tbl.getParameters();
        // We should copy only those table parameters that are specified in the config.
        SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
        String paramsStr = HiveConf.getVar(conf, HiveConf.ConfVars.DDL_CTL_PARAMETERS_WHITELIST);
        Set<String> retainer = new HashSet<String>();
        // for non-native table, property storage_handler should be retained
        retainer.add(META_TABLE_STORAGE);
        if (spec != null && spec.schemaProps() != null) {
            retainer.addAll(Arrays.asList(spec.schemaProps()));
        }
        if (paramsStr != null) {
            retainer.addAll(Arrays.asList(paramsStr.split(",")));
        }
        // Retain Parquet INT96 write zone property to keep Parquet timezone bugfixes.
        if (params.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY) != null) {
            retainer.add(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY);
        }
        if (!retainer.isEmpty()) {
            params.keySet().retainAll(retainer);
        } else {
            params.clear();
        }
        if (crtTbl.getTblProps() != null) {
            params.putAll(crtTbl.getTblProps());
        }
        if (crtTbl.isUserStorageFormat()) {
            tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
            tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
            tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
            tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
            if (crtTbl.getDefaultSerName() == null) {
                LOG.info("Default to LazySimpleSerDe for like table " + crtTbl.getTableName());
                tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
            } else {
                // let's validate that the serde exists
                validateSerDe(crtTbl.getDefaultSerName());
                tbl.setSerializationLib(crtTbl.getDefaultSerName());
            }
        }
        tbl.getTTable().setTemporary(crtTbl.isTemporary());
        if (crtTbl.isExternal()) {
            tbl.setProperty("EXTERNAL", "TRUE");
            tbl.setTableType(TableType.EXTERNAL_TABLE);
        } else {
            tbl.getParameters().remove("EXTERNAL");
        }
    }
    if (!Utilities.isDefaultNameNode(conf)) {
        // If location is specified - ensure that it is a full qualified name
        makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName(), conf);
    }
    if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.TRUE);
    }
    // create the table
    db.createTable(tbl, crtTbl.getIfNotExists());
    addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) SerDeSpec(org.apache.hadoop.hive.serde2.SerDeSpec) Entry(java.util.Map.Entry) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HashSet(java.util.HashSet)

Example 67 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLTask method renamePartition.

/**
   * Rename a partition in a table
   *
   * @param db
   *          Database to rename the partition.
   * @param renamePartitionDesc
   *          rename old Partition to new one.
   * @return Returns 0 when execution succeeds and above 0 if it fails.
   * @throws HiveException
   */
private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException {
    Table tbl = db.getTable(renamePartitionDesc.getTableName());
    LinkedHashMap<String, String> oldPartSpec = renamePartitionDesc.getOldPartSpec();
    Partition oldPart = db.getPartition(tbl, oldPartSpec, false);
    if (oldPart == null) {
        String partName = FileUtils.makePartName(new ArrayList<String>(oldPartSpec.keySet()), new ArrayList<String>(oldPartSpec.values()));
        throw new HiveException("Rename partition: source partition [" + partName + "] does not exist.");
    }
    Partition part = db.getPartition(tbl, oldPartSpec, false);
    part.setValues(renamePartitionDesc.getNewPartSpec());
    db.renamePartition(tbl, oldPartSpec, part);
    Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false);
    work.getInputs().add(new ReadEntity(oldPart));
    // We've already obtained a lock on the table, don't lock the partition too
    addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 68 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHook method addOutputs.

private static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
    if (refs != null) {
        Set<String> dataSetsProcessed = new LinkedHashSet<>();
        if (sortedOutputs != null) {
            for (WriteEntity output : sortedOutputs) {
                final Entity entity = output;
                if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
                    //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
                    if (addQueryType(op, (WriteEntity) entity)) {
                        buffer.append(SEP);
                        buffer.append(((WriteEntity) entity).getWriteType().name());
                    }
                    if (ignoreHDFSPathsInQFName && (Type.DFS_DIR.equals(output.getType()) || Type.LOCAL_DIR.equals(output.getType()))) {
                        LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
                    } else if (refs.containsKey(output)) {
                        if (output.getType() == Type.PARTITION || output.getType() == Type.TABLE) {
                            final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(hiveBridge.hiveClient.getTable(output.getTable().getDbName(), output.getTable().getTableName()));
                            addDataset(buffer, refs.get(output), createTime.getTime());
                        } else {
                            addDataset(buffer, refs.get(output));
                        }
                    }
                    dataSetsProcessed.add(output.getName().toLowerCase());
                }
            }
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Entity(org.apache.hadoop.hive.ql.hooks.Entity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Date(java.util.Date)

Example 69 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHook method handleExternalTables.

private void handleExternalTables(final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException {
    List<Referenceable> entities = new ArrayList<>();
    final WriteEntity hiveEntity = (WriteEntity) getEntityByType(event.getOutputs(), Type.TABLE);
    Table hiveTable = hiveEntity == null ? null : hiveEntity.getTable();
    //Refresh to get the correct location
    if (hiveTable != null) {
        hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());
    }
    if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
        LOG.info("Registering external table process {} ", event.getQueryStr());
        final String location = lower(hiveTable.getDataLocation().toString());
        final ReadEntity dfsEntity = new ReadEntity();
        dfsEntity.setTyp(Type.DFS_DIR);
        dfsEntity.setD(new Path(location));
        SortedMap<ReadEntity, Referenceable> hiveInputsMap = new TreeMap<ReadEntity, Referenceable>(entityComparator) {

            {
                put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
            }
        };
        SortedMap<WriteEntity, Referenceable> hiveOutputsMap = new TreeMap<WriteEntity, Referenceable>(entityComparator) {

            {
                put(hiveEntity, tables.get(Type.TABLE));
            }
        };
        SortedSet<ReadEntity> sortedIps = new TreeSet<>(entityComparator);
        sortedIps.addAll(hiveInputsMap.keySet());
        SortedSet<WriteEntity> sortedOps = new TreeSet<>(entityComparator);
        sortedOps.addAll(hiveOutputsMap.keySet());
        Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, sortedIps, sortedOps, hiveInputsMap, hiveOutputsMap);
        entities.addAll(tables.values());
        entities.add(processReferenceable);
        event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) HookNotification(org.apache.atlas.notification.hook.HookNotification) Referenceable(org.apache.atlas.typesystem.Referenceable) TreeSet(java.util.TreeSet) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 70 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHook method registerProcess.

private void registerProcess(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
    try {
        Set<ReadEntity> inputs = event.getInputs();
        Set<WriteEntity> outputs = event.getOutputs();
        //Even explain CTAS has operation name as CREATETABLE_AS_SELECT
        if (inputs.isEmpty() && outputs.isEmpty()) {
            LOG.info("Explain statement. Skipping...");
            return;
        }
        if (event.getQueryId() == null) {
            LOG.info("Query id/plan is missing for {}", event.getQueryStr());
        }
        final SortedMap<ReadEntity, Referenceable> source = new TreeMap<>(entityComparator);
        final SortedMap<WriteEntity, Referenceable> target = new TreeMap<>(entityComparator);
        final Set<String> dataSets = new HashSet<>();
        final Set<Referenceable> entities = new LinkedHashSet<>();
        boolean isSelectQuery = isSelectQuery(event);
        // filter out select queries which do not modify data
        if (!isSelectQuery) {
            SortedSet<ReadEntity> sortedHiveInputs = new TreeSet<>(entityComparator);
            if (event.getInputs() != null) {
                sortedHiveInputs.addAll(event.getInputs());
            }
            SortedSet<WriteEntity> sortedHiveOutputs = new TreeSet<>(entityComparator);
            if (event.getOutputs() != null) {
                sortedHiveOutputs.addAll(event.getOutputs());
            }
            for (ReadEntity readEntity : sortedHiveInputs) {
                processHiveEntity(dgiBridge, event, readEntity, dataSets, source, entities);
            }
            for (WriteEntity writeEntity : sortedHiveOutputs) {
                processHiveEntity(dgiBridge, event, writeEntity, dataSets, target, entities);
            }
            if (source.size() > 0 || target.size() > 0) {
                Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event, sortedHiveInputs, sortedHiveOutputs, source, target);
                // setup Column Lineage
                List<Referenceable> sourceList = new ArrayList<>(source.values());
                List<Referenceable> targetList = new ArrayList<>(target.values());
                List<Referenceable> colLineageProcessInstances = new ArrayList<>();
                try {
                    Map<String, Referenceable> columnQNameToRef = ColumnLineageUtils.buildColumnReferenceableMap(sourceList, targetList);
                    colLineageProcessInstances = createColumnLineageProcessInstances(processReferenceable, event.lineageInfo, columnQNameToRef);
                } catch (Exception e) {
                    LOG.warn("Column lineage process setup failed with exception {}", e);
                }
                colLineageProcessInstances.add(0, processReferenceable);
                entities.addAll(colLineageProcessInstances);
                event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), new ArrayList<>(entities)));
            } else {
                LOG.info("Skipped query {} since it has no getInputs() or resulting getOutputs()", event.getQueryStr());
            }
        } else {
            LOG.info("Skipped query {} for processing since it is a select query ", event.getQueryStr());
        }
    } catch (Exception e) {
        throw new AtlasHookException("HiveHook.registerProcess() failed.", e);
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) AtlasHookException(org.apache.atlas.hook.AtlasHookException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MalformedURLException(java.net.MalformedURLException) AtlasHookException(org.apache.atlas.hook.AtlasHookException) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) HookNotification(org.apache.atlas.notification.hook.HookNotification) Referenceable(org.apache.atlas.typesystem.Referenceable) TreeSet(java.util.TreeSet) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)77 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)33 Table (org.apache.hadoop.hive.ql.metadata.Table)33 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)22 Partition (org.apache.hadoop.hive.ql.metadata.Partition)21 ArrayList (java.util.ArrayList)14 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)12 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)12 Referenceable (org.apache.atlas.typesystem.Referenceable)11 Path (org.apache.hadoop.fs.Path)11 Test (org.junit.Test)11 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)10 Test (org.testng.annotations.Test)9 HashMap (java.util.HashMap)8 LinkedHashMap (java.util.LinkedHashMap)8 Database (org.apache.hadoop.hive.metastore.api.Database)8 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)7 IOException (java.io.IOException)6 Map (java.util.Map)6