use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.
the class HiveRegistrationPublisher method publishData.
/**
* @param states This is a collection of TaskState.
*/
@Override
public void publishData(Collection<? extends WorkUnitState> states) throws IOException {
CompletionService<Collection<HiveSpec>> completionService = new ExecutorCompletionService<>(this.hivePolicyExecutor);
// Each state in states is task-level State, while superState is the Job-level State.
// Using both State objects to distinguish each HiveRegistrationPolicy so that
// they can carry task-level information to pass into Hive Partition and its corresponding Hive Table.
// Here all runtime task-level props are injected into superstate which installed in each Policy Object.
// runtime.props are comma-separated props collected in runtime.
int toRegisterPathCount = 0;
for (State state : states) {
State taskSpecificState = state;
if (state.contains(ConfigurationKeys.PUBLISHER_DIRS)) {
// Upstream data attribute is specified, need to inject these info into superState as runtimeTableProps.
if (this.hiveRegister.getProps().getUpstreamDataAttrName().isPresent()) {
for (String attrName : LIST_SPLITTER_COMMA.splitToList(this.hiveRegister.getProps().getUpstreamDataAttrName().get())) {
if (state.contains(attrName)) {
taskSpecificState.appendToListProp(HiveMetaStoreUtils.RUNTIME_PROPS, attrName + ":" + state.getProp(attrName));
}
}
}
final HiveRegistrationPolicy policy = HiveRegistrationPolicyBase.getPolicy(taskSpecificState);
for (final String path : state.getPropAsList(ConfigurationKeys.PUBLISHER_DIRS)) {
if (isPathDedupeEnabled && pathsToRegisterFromSingleState.contains(path)) {
continue;
}
pathsToRegisterFromSingleState.add(path);
toRegisterPathCount += 1;
completionService.submit(new Callable<Collection<HiveSpec>>() {
@Override
public Collection<HiveSpec> call() throws Exception {
try (Timer.Context context = metricContext.timer(HIVE_SPEC_COMPUTATION_TIMER).time()) {
return policy.getHiveSpecs(new Path(path));
}
}
});
}
} else
continue;
}
for (int i = 0; i < toRegisterPathCount; i++) {
try {
for (HiveSpec spec : completionService.take().get()) {
this.hiveRegister.register(spec);
}
} catch (InterruptedException | ExecutionException e) {
log.info("Failed to generate HiveSpec", e);
throw new IOException(e);
}
}
log.info("Finished registering all HiveSpecs");
}
Aggregations