Search in sources :

Example 1 with HiveSourceWatermarker

use of org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker in project incubator-gobblin by apache.

the class HiveTask method executePublishQueries.

protected void executePublishQueries(QueryBasedHivePublishEntity publishEntity) {
    Set<String> cleanUpQueries = Sets.newLinkedHashSet();
    Set<String> publishQueries = Sets.newLinkedHashSet();
    List<String> directoriesToDelete = Lists.newArrayList();
    FileSystem fs = null;
    try {
        fs = HiveSource.getSourceFs(workUnitState);
        if (publishEntity.getCleanupQueries() != null) {
            cleanUpQueries.addAll(publishEntity.getCleanupQueries());
        }
        if (publishEntity.getCleanupDirectories() != null) {
            directoriesToDelete.addAll(publishEntity.getCleanupDirectories());
        }
        if (publishEntity.getPublishDirectories() != null) {
            // Publish snapshot / partition directories
            Map<String, String> publishDirectories = publishEntity.getPublishDirectories();
            try {
                for (Map.Entry<String, String> publishDir : publishDirectories.entrySet()) {
                    HadoopUtils.renamePath(fs, new Path(publishDir.getKey()), new Path(publishDir.getValue()), true);
                }
            } catch (Throwable t) {
                throw Throwables.propagate(t);
            }
        }
        if (publishEntity.getPublishQueries() != null) {
            publishQueries.addAll(publishEntity.getPublishQueries());
        }
        WorkUnitState wus = this.workUnitState;
        this.hiveJdbcConnector.executeStatements(publishQueries.toArray(new String[publishQueries.size()]));
        wus.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
        if (wus.getPropAsBoolean(USE_WATERMARKER_KEY, true)) {
            HiveSourceWatermarker watermarker = GobblinConstructorUtils.invokeConstructor(HiveSourceWatermarkerFactory.class, wus.getProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, HiveSource.DEFAULT_HIVE_SOURCE_WATERMARKER_FACTORY_CLASS)).createFromState(wus);
            watermarker.setActualHighWatermark(wus);
        }
    } catch (RuntimeException re) {
        throw re;
    } catch (Exception e) {
        log.error("Error in HiveMaterializer generate publish queries", e);
    } finally {
        try {
            this.hiveJdbcConnector.executeStatements(cleanUpQueries.toArray(new String[cleanUpQueries.size()]));
            HadoopUtils.deleteDirectories(fs, directoriesToDelete, true, true);
        } catch (RuntimeException re) {
            throw re;
        } catch (Exception e) {
            log.error("Failed to cleanup staging entities.", e);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SQLException(java.sql.SQLException) HiveSourceWatermarker(org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSourceWatermarkerFactory(org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarkerFactory) Map(java.util.Map)

Aggregations

SQLException (java.sql.SQLException)1 Map (java.util.Map)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 HiveSourceWatermarker (org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker)1 HiveSourceWatermarkerFactory (org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarkerFactory)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1