use of org.apache.gobblin.hive.policy.HiveRegistrationPolicy in project incubator-gobblin by apache.
the class CompactionHiveRegistrationAction method onCompactionJobComplete.
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
if (state.contains(ConfigurationKeys.HIVE_REGISTRATION_POLICY)) {
HiveRegister hiveRegister = HiveRegister.get(state);
HiveRegistrationPolicy hiveRegistrationPolicy = HiveRegistrationPolicyBase.getPolicy(state);
CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
List<String> paths = new ArrayList<>();
for (HiveSpec spec : hiveRegistrationPolicy.getHiveSpecs(new Path(result.getDstAbsoluteDir()))) {
hiveRegister.register(spec);
paths.add(spec.getPath().toUri().toASCIIString());
log.info("Hive registration is done for {}", result.getDstAbsoluteDir());
}
// submit events for hive registration
if (eventSubmitter != null) {
Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.HIVE_REGISTRATION_PATHS, Joiner.on(',').join(paths));
this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_HIVE_REGISTRATION_EVENT, eventMetadataMap);
}
}
}
use of org.apache.gobblin.hive.policy.HiveRegistrationPolicy in project incubator-gobblin by apache.
the class HiveRegistrationPublisher method publishData.
/**
* @param states This is a collection of TaskState.
*/
@Override
public void publishData(Collection<? extends WorkUnitState> states) throws IOException {
CompletionService<Collection<HiveSpec>> completionService = new ExecutorCompletionService<>(this.hivePolicyExecutor);
// Each state in states is task-level State, while superState is the Job-level State.
// Using both State objects to distinguish each HiveRegistrationPolicy so that
// they can carry task-level information to pass into Hive Partition and its corresponding Hive Table.
// Here all runtime task-level props are injected into superstate which installed in each Policy Object.
// runtime.props are comma-separated props collected in runtime.
int toRegisterPathCount = 0;
for (State state : states) {
State taskSpecificState = state;
if (state.contains(ConfigurationKeys.PUBLISHER_DIRS)) {
// Upstream data attribute is specified, need to inject these info into superState as runtimeTableProps.
if (this.hiveRegister.getProps().getUpstreamDataAttrName().isPresent()) {
for (String attrName : LIST_SPLITTER_COMMA.splitToList(this.hiveRegister.getProps().getUpstreamDataAttrName().get())) {
if (state.contains(attrName)) {
taskSpecificState.appendToListProp(HiveMetaStoreUtils.RUNTIME_PROPS, attrName + ":" + state.getProp(attrName));
}
}
}
final HiveRegistrationPolicy policy = HiveRegistrationPolicyBase.getPolicy(taskSpecificState);
for (final String path : state.getPropAsList(ConfigurationKeys.PUBLISHER_DIRS)) {
if (isPathDedupeEnabled && pathsToRegisterFromSingleState.contains(path)) {
continue;
}
pathsToRegisterFromSingleState.add(path);
toRegisterPathCount += 1;
completionService.submit(new Callable<Collection<HiveSpec>>() {
@Override
public Collection<HiveSpec> call() throws Exception {
try (Timer.Context context = metricContext.timer(HIVE_SPEC_COMPUTATION_TIMER).time()) {
return policy.getHiveSpecs(new Path(path));
}
}
});
}
} else
continue;
}
for (int i = 0; i < toRegisterPathCount; i++) {
try {
for (HiveSpec spec : completionService.take().get()) {
this.hiveRegister.register(spec);
}
} catch (InterruptedException | ExecutionException e) {
log.info("Failed to generate HiveSpec", e);
throw new IOException(e);
}
}
log.info("Finished registering all HiveSpecs");
}
Aggregations