Search in sources :

Example 36 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class Log4jConfigHelper method updateLog4jConfiguration.

/**
 * Update the log4j configuration.
 *
 * @param targetClass the target class used to get the original log4j configuration file as a resource
 * @param log4jFileName the custom log4j configuration properties file name
 * @throws IOException if there's something wrong with updating the log4j configuration
 */
public static void updateLog4jConfiguration(Class<?> targetClass, String log4jFileName) throws IOException {
    final Closer closer = Closer.create();
    try {
        final InputStream inputStream = closer.register(targetClass.getResourceAsStream("/" + log4jFileName));
        final Properties originalProperties = new Properties();
        originalProperties.load(inputStream);
        LogManager.resetConfiguration();
        PropertyConfigurator.configure(originalProperties);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : Closer(com.google.common.io.Closer) InputStream(java.io.InputStream) Properties(java.util.Properties)

Example 37 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class HivePartitionFileSet method generateCopyEntities.

@Override
protected Collection<CopyEntity> generateCopyEntities() throws IOException {
    try (Closer closer = Closer.create()) {
        MultiTimingEvent multiTimer = closer.register(new MultiTimingEvent(this.eventSubmitter, "PartitionCopy", true));
        int stepPriority = 0;
        String fileSet = HiveCopyEntityHelper.gson.toJson(this.partition.getValues());
        List<CopyEntity> copyEntities = Lists.newArrayList();
        stepPriority = hiveCopyEntityHelper.addSharedSteps(copyEntities, fileSet, stepPriority);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.COMPUTE_TARGETS);
        Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getDataset().fs, hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition));
        Partition targetPartition = getTargetPartition(this.partition, targetPath);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.EXISTING_PARTITION);
        if (this.existingTargetPartition.isPresent()) {
            hiveCopyEntityHelper.getTargetPartitions().remove(this.partition.getValues());
            try {
                checkPartitionCompatibility(targetPartition, this.existingTargetPartition.get());
            } catch (IOException ioe) {
                if (hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_PARTITIONS && hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_TABLE_AND_PARTITIONS) {
                    log.error("Source and target partitions are not compatible. Aborting copy of partition " + this.partition, ioe);
                    return Lists.newArrayList();
                }
                log.warn("Source and target partitions are not compatible. Will override target partition: " + ioe.getMessage());
                log.debug("Incompatibility details: ", ioe);
                stepPriority = hiveCopyEntityHelper.addPartitionDeregisterSteps(copyEntities, fileSet, stepPriority, hiveCopyEntityHelper.getTargetTable(), this.existingTargetPartition.get());
                this.existingTargetPartition = Optional.absent();
            }
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.PARTITION_SKIP_PREDICATE);
        if (hiveCopyEntityHelper.getFastPartitionSkip().isPresent() && hiveCopyEntityHelper.getFastPartitionSkip().get().apply(this)) {
            log.info(String.format("Skipping copy of partition %s due to fast partition skip predicate.", this.partition.getCompleteName()));
            return Lists.newArrayList();
        }
        HiveSpec partitionHiveSpec = new SimpleHiveSpec.Builder<>(targetPath).withTable(HiveMetaStoreUtils.getHiveTable(hiveCopyEntityHelper.getTargetTable().getTTable())).withPartition(Optional.of(HiveMetaStoreUtils.getHivePartition(targetPartition.getTPartition()))).build();
        HiveRegisterStep register = new HiveRegisterStep(hiveCopyEntityHelper.getTargetURI(), partitionHiveSpec, hiveCopyEntityHelper.getHiveRegProps());
        copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), register, stepPriority++));
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_LOCATIONS);
        HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties);
        HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties);
        Optional<HiveLocationDescriptor> existingTargetLocation = this.existingTargetPartition.isPresent() ? Optional.of(HiveLocationDescriptor.forPartition(this.existingTargetPartition.get(), hiveCopyEntityHelper.getTargetFs(), this.properties)) : Optional.<HiveLocationDescriptor>absent();
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF);
        HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition>absent(), multiTimer, hiveCopyEntityHelper);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_DELETE_UNITS);
        if (diffPathSet.pathsToDelete.size() > 0) {
            DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties);
            copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String>newHashMap(), deleteStep, stepPriority++));
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_COPY_UNITS);
        for (CopyableFile.Builder builder : hiveCopyEntityHelper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, hiveCopyEntityHelper.getConfiguration(), Optional.of(this.partition))) {
            CopyableFile fileEntity = builder.fileSet(fileSet).checksum(new byte[0]).datasetOutputPath(desiredTargetLocation.location.toString()).build();
            this.hiveCopyEntityHelper.setCopyableFileDatasets(fileEntity);
            copyEntities.add(fileEntity);
        }
        log.info("Created {} copy entities for partition {}", copyEntities.size(), this.partition.getCompleteName());
        return copyEntities;
    }
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) PostPublishStep(org.apache.gobblin.data.management.copy.entities.PostPublishStep) MultiTimingEvent(org.apache.gobblin.metrics.event.MultiTimingEvent) IOException(java.io.IOException) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) HiveRegisterStep(org.apache.gobblin.hive.HiveRegisterStep) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec)

Example 38 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class SafeDatasetCommit method call.

@Override
public Void call() throws Exception {
    if (this.datasetState.getState() == JobState.RunningState.COMMITTED) {
        log.info(this.datasetUrn + " have been committed.");
        return null;
    }
    metricContext = Instrumented.getMetricContext(datasetState, SafeDatasetCommit.class);
    finalizeDatasetStateBeforeCommit(this.datasetState);
    Class<? extends DataPublisher> dataPublisherClass;
    try (Closer closer = Closer.create()) {
        dataPublisherClass = JobContext.getJobDataPublisherClass(this.jobContext.getJobState()).or((Class<? extends DataPublisher>) Class.forName(ConfigurationKeys.DEFAULT_DATA_PUBLISHER_TYPE));
        if (!canCommitDataset(datasetState)) {
            log.warn(String.format("Not committing dataset %s of job %s with commit policy %s and state %s", this.datasetUrn, this.jobContext.getJobId(), this.jobContext.getJobCommitPolicy(), this.datasetState.getState()));
            checkForUnpublishedWUHandling(this.datasetUrn, this.datasetState, dataPublisherClass, closer);
            throw new RuntimeException(String.format("Not committing dataset %s of job %s with commit policy %s and state %s", this.datasetUrn, this.jobContext.getJobId(), this.jobContext.getJobCommitPolicy(), this.datasetState.getState()));
        }
    } catch (ReflectiveOperationException roe) {
        log.error("Failed to instantiate data publisher for dataset %s of job %s.", this.datasetUrn, this.jobContext.getJobId(), roe);
        throw new RuntimeException(roe);
    } finally {
        maySubmitFailureEvent(datasetState);
    }
    if (this.isJobCancelled) {
        log.info("Executing commit steps although job is cancelled due to job commit policy: " + this.jobContext.getJobCommitPolicy());
    }
    Optional<CommitSequence.Builder> commitSequenceBuilder = Optional.absent();
    boolean canPersistStates = true;
    try (Closer closer = Closer.create()) {
        if (this.shouldCommitDataInJob) {
            log.info(String.format("Committing dataset %s of job %s with commit policy %s and state %s", this.datasetUrn, this.jobContext.getJobId(), this.jobContext.getJobCommitPolicy(), this.datasetState.getState()));
            ListMultimap<TaskFactoryWrapper, TaskState> taskStatesByFactory = groupByTaskFactory(this.datasetState);
            for (Map.Entry<TaskFactoryWrapper, Collection<TaskState>> entry : taskStatesByFactory.asMap().entrySet()) {
                TaskFactory taskFactory = entry.getKey().getTaskFactory();
                if (this.deliverySemantics == DeliverySemantics.EXACTLY_ONCE) {
                    if (taskFactory != null) {
                        throw new RuntimeException("Custom task factories do not support exactly once delivery semantics.");
                    }
                    generateCommitSequenceBuilder(this.datasetState, entry.getValue());
                } else {
                    DataPublisher publisher;
                    if (taskFactory == null) {
                        publisher = DataPublisherFactory.get(dataPublisherClass.getName(), this.jobContext.getJobState(), this.jobContext.getJobBroker());
                        // the closer
                        if (!DataPublisherFactory.isPublisherCacheable(publisher)) {
                            closer.register(publisher);
                        }
                    } else {
                        // NOTE: sharing of publishers is not supported when they are instantiated through the TaskFactory.
                        // This should be revisited if sharing is required.
                        publisher = taskFactory.createDataPublisher(this.datasetState);
                    }
                    if (this.isJobCancelled) {
                        if (publisher.canBeSkipped()) {
                            log.warn(publisher.getClass() + " will be skipped.");
                        } else {
                            canPersistStates = false;
                            throw new RuntimeException("Cannot persist state upon cancellation because publisher has unfinished work and cannot be skipped.");
                        }
                    } else if (this.isMultithreaded && !publisher.isThreadSafe()) {
                        log.warn(String.format("Gobblin is set up to parallelize publishing, however the publisher %s is not thread-safe. " + "Falling back to serial publishing.", publisher.getClass().getName()));
                        safeCommitDataset(entry.getValue(), publisher);
                    } else {
                        commitDataset(entry.getValue(), publisher);
                    }
                }
            }
            this.datasetState.setState(JobState.RunningState.COMMITTED);
        } else {
            if (this.datasetState.getState() == JobState.RunningState.SUCCESSFUL) {
                this.datasetState.setState(JobState.RunningState.COMMITTED);
            }
        }
    } catch (Throwable throwable) {
        log.error(String.format("Failed to commit dataset state for dataset %s of job %s", this.datasetUrn, this.jobContext.getJobId()), throwable);
        throw new RuntimeException(throwable);
    } finally {
        try {
            finalizeDatasetState(datasetState, datasetUrn);
            maySubmitFailureEvent(datasetState);
            maySubmitLineageEvent(datasetState);
            if (commitSequenceBuilder.isPresent()) {
                buildAndExecuteCommitSequence(commitSequenceBuilder.get(), datasetState, datasetUrn);
                datasetState.setState(JobState.RunningState.COMMITTED);
            } else if (canPersistStates) {
                persistDatasetState(datasetUrn, datasetState);
            }
        } catch (IOException | RuntimeException ioe) {
            log.error(String.format("Failed to persist dataset state for dataset %s of job %s", datasetUrn, this.jobContext.getJobId()), ioe);
            throw new RuntimeException(ioe);
        }
    }
    return null;
}
Also used : Closer(com.google.common.io.Closer) FailureEventBuilder(org.apache.gobblin.metrics.event.FailureEventBuilder) LineageEventBuilder(org.apache.gobblin.metrics.event.lineage.LineageEventBuilder) IOException(java.io.IOException) DataPublisher(org.apache.gobblin.publisher.DataPublisher) TaskFactory(org.apache.gobblin.runtime.task.TaskFactory) Collection(java.util.Collection) Map(java.util.Map)

Example 39 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class SafeDatasetCommit method generateCommitSequenceBuilder.

@SuppressWarnings("unchecked")
private Optional<CommitSequence.Builder> generateCommitSequenceBuilder(JobState.DatasetState datasetState, Collection<TaskState> taskStates) {
    try (Closer closer = Closer.create()) {
        Class<? extends CommitSequencePublisher> dataPublisherClass = (Class<? extends CommitSequencePublisher>) Class.forName(datasetState.getProp(ConfigurationKeys.DATA_PUBLISHER_TYPE, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_TYPE));
        CommitSequencePublisher publisher = (CommitSequencePublisher) closer.register(DataPublisher.getInstance(dataPublisherClass, this.jobContext.getJobState()));
        publisher.publish(taskStates);
        return publisher.getCommitSequenceBuilder();
    } catch (Throwable t) {
        log.error("Failed to generate commit sequence", t);
        setTaskFailureException(datasetState.getTaskStates(), t);
        throw Throwables.propagate(t);
    }
}
Also used : Closer(com.google.common.io.Closer) CommitSequencePublisher(org.apache.gobblin.publisher.CommitSequencePublisher)

Example 40 with Closer

use of org.apache.flink.shaded.guava30.com.google.common.io.Closer in project incubator-gobblin by apache.

the class Task method publishTaskData.

private void publishTaskData() throws IOException {
    Closer closer = Closer.create();
    try {
        Class<? extends DataPublisher> dataPublisherClass = getTaskPublisherClass();
        SingleTaskDataPublisher publisher = closer.register(SingleTaskDataPublisher.getInstance(dataPublisherClass, this.taskState));
        LOG.info("Publishing data from task " + this.taskId);
        publisher.publish(this.taskState);
    } catch (ClassCastException e) {
        LOG.error(String.format("To publish data in task, the publisher class must extend %s", SingleTaskDataPublisher.class.getSimpleName()), e);
        this.taskState.setTaskFailureException(e);
        throw closer.rethrow(e);
    } catch (Throwable t) {
        this.taskState.setTaskFailureException(t);
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : Closer(com.google.common.io.Closer) SingleTaskDataPublisher(org.apache.gobblin.publisher.SingleTaskDataPublisher)

Aggregations

Closer (com.google.common.io.Closer)200 IOException (java.io.IOException)94 File (java.io.File)22 Test (org.junit.Test)18 Test (org.testng.annotations.Test)18 Closer (org.apache.flink.shaded.guava30.com.google.common.io.Closer)16 Path (org.apache.hadoop.fs.Path)16 ArrayList (java.util.ArrayList)15 Properties (java.util.Properties)14 FileOutputStream (java.io.FileOutputStream)13 FileInputStream (java.io.FileInputStream)12 InputStream (java.io.InputStream)12 Map (java.util.Map)12 OutputStream (java.io.OutputStream)11 UncheckedIOException (java.io.UncheckedIOException)10 NettyShuffleEnvironment (org.apache.flink.runtime.io.network.NettyShuffleEnvironment)9 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)9 AlluxioException (alluxio.exception.AlluxioException)8 InputStreamReader (java.io.InputStreamReader)8 HashMap (java.util.HashMap)8