use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.
the class CompactionHiveRegistrationAction method onCompactionJobComplete.
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
if (state.contains(ConfigurationKeys.HIVE_REGISTRATION_POLICY)) {
HiveRegister hiveRegister = HiveRegister.get(state);
HiveRegistrationPolicy hiveRegistrationPolicy = HiveRegistrationPolicyBase.getPolicy(state);
CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
List<String> paths = new ArrayList<>();
for (HiveSpec spec : hiveRegistrationPolicy.getHiveSpecs(new Path(result.getDstAbsoluteDir()))) {
hiveRegister.register(spec);
paths.add(spec.getPath().toUri().toASCIIString());
log.info("Hive registration is done for {}", result.getDstAbsoluteDir());
}
// submit events for hive registration
if (eventSubmitter != null) {
Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.HIVE_REGISTRATION_PATHS, Joiner.on(',').join(paths));
this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_HIVE_REGISTRATION_EVENT, eventMetadataMap);
}
}
}
use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.
the class HiveRegistrationPolicyBaseTest method testGetHiveSpecsWithDBFilter.
@Test
public void testGetHiveSpecsWithDBFilter() throws IOException {
State state = new State();
state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, "db1");
state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, "db2");
state.appendToListProp(HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "tbl1");
state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_TABLE_NAMES, "tbl2,tbl3,$PRIMARY_TABLE_col");
state.appendToListProp("db2." + HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "$PRIMARY_TABLE_col,tbl4,tbl5");
this.path = new Path(getClass().getResource("/test-hive-table").toString());
Collection<HiveSpec> specs = new HiveRegistrationPolicyBase(state).getHiveSpecs(this.path);
Assert.assertEquals(specs.size(), 7);
Iterator<HiveSpec> iterator = specs.iterator();
HiveSpec spec = iterator.next();
examine(spec, "db1", "tbl1");
spec = iterator.next();
examine(spec, "db1", "tbl2");
spec = iterator.next();
examine(spec, "db1", "tbl3");
spec = iterator.next();
examine(spec, "db1", "tbl1_col");
spec = iterator.next();
examine(spec, "db2", "tbl1_col");
spec = iterator.next();
examine(spec, "db2", "tbl4");
spec = iterator.next();
examine(spec, "db2", "tbl5");
}
use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.
the class HivePartitionFileSet method generateCopyEntities.
@Override
protected Collection<CopyEntity> generateCopyEntities() throws IOException {
try (Closer closer = Closer.create()) {
MultiTimingEvent multiTimer = closer.register(new MultiTimingEvent(this.eventSubmitter, "PartitionCopy", true));
int stepPriority = 0;
String fileSet = HiveCopyEntityHelper.gson.toJson(this.partition.getValues());
List<CopyEntity> copyEntities = Lists.newArrayList();
stepPriority = hiveCopyEntityHelper.addSharedSteps(copyEntities, fileSet, stepPriority);
multiTimer.nextStage(HiveCopyEntityHelper.Stages.COMPUTE_TARGETS);
Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getDataset().fs, hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition));
Partition targetPartition = getTargetPartition(this.partition, targetPath);
multiTimer.nextStage(HiveCopyEntityHelper.Stages.EXISTING_PARTITION);
if (this.existingTargetPartition.isPresent()) {
hiveCopyEntityHelper.getTargetPartitions().remove(this.partition.getValues());
try {
checkPartitionCompatibility(targetPartition, this.existingTargetPartition.get());
} catch (IOException ioe) {
if (hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_PARTITIONS && hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_TABLE_AND_PARTITIONS) {
log.error("Source and target partitions are not compatible. Aborting copy of partition " + this.partition, ioe);
return Lists.newArrayList();
}
log.warn("Source and target partitions are not compatible. Will override target partition: " + ioe.getMessage());
log.debug("Incompatibility details: ", ioe);
stepPriority = hiveCopyEntityHelper.addPartitionDeregisterSteps(copyEntities, fileSet, stepPriority, hiveCopyEntityHelper.getTargetTable(), this.existingTargetPartition.get());
this.existingTargetPartition = Optional.absent();
}
}
multiTimer.nextStage(HiveCopyEntityHelper.Stages.PARTITION_SKIP_PREDICATE);
if (hiveCopyEntityHelper.getFastPartitionSkip().isPresent() && hiveCopyEntityHelper.getFastPartitionSkip().get().apply(this)) {
log.info(String.format("Skipping copy of partition %s due to fast partition skip predicate.", this.partition.getCompleteName()));
return Lists.newArrayList();
}
HiveSpec partitionHiveSpec = new SimpleHiveSpec.Builder<>(targetPath).withTable(HiveMetaStoreUtils.getHiveTable(hiveCopyEntityHelper.getTargetTable().getTTable())).withPartition(Optional.of(HiveMetaStoreUtils.getHivePartition(targetPartition.getTPartition()))).build();
HiveRegisterStep register = new HiveRegisterStep(hiveCopyEntityHelper.getTargetURI(), partitionHiveSpec, hiveCopyEntityHelper.getHiveRegProps());
copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), register, stepPriority++));
multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_LOCATIONS);
HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties);
HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties);
Optional<HiveLocationDescriptor> existingTargetLocation = this.existingTargetPartition.isPresent() ? Optional.of(HiveLocationDescriptor.forPartition(this.existingTargetPartition.get(), hiveCopyEntityHelper.getTargetFs(), this.properties)) : Optional.<HiveLocationDescriptor>absent();
multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF);
HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition>absent(), multiTimer, hiveCopyEntityHelper);
multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_DELETE_UNITS);
if (diffPathSet.pathsToDelete.size() > 0) {
DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties);
copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String>newHashMap(), deleteStep, stepPriority++));
}
multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_COPY_UNITS);
for (CopyableFile.Builder builder : hiveCopyEntityHelper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, hiveCopyEntityHelper.getConfiguration(), Optional.of(this.partition))) {
CopyableFile fileEntity = builder.fileSet(fileSet).checksum(new byte[0]).datasetOutputPath(desiredTargetLocation.location.toString()).build();
this.hiveCopyEntityHelper.setCopyableFileDatasets(fileEntity);
copyEntities.add(fileEntity);
}
log.info("Created {} copy entities for partition {}", copyEntities.size(), this.partition.getCompleteName());
return copyEntities;
}
}
use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.
the class HiveSnapshotRegistrationPolicy method getHiveSpecs.
/**
* @param path The root directory of snapshots. This directory may contain zero or more snapshots.
*/
@Override
public Collection<HiveSpec> getHiveSpecs(Path path) throws IOException {
List<HiveTable> tables = getTables(path);
if (tables.isEmpty()) {
return ImmutableList.<HiveSpec>of();
}
Collection<HiveSpec> specs = Lists.newArrayList();
for (HiveTable table : tables) {
specs.add(new SimpleHiveSpec.Builder<>(path).withTable(table).withPartition(getPartition(path, table)).build());
}
return specs;
}
use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.
the class HiveRegistrationPolicyBaseTest method testGetHiveSpecs.
@Test
public void testGetHiveSpecs() throws IOException {
State state = new State();
state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, "db1");
state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, "db2");
state.appendToListProp(HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "tbl1");
state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_TABLE_NAMES, "tbl2,tbl3");
this.path = new Path(getClass().getResource("/test-hive-table").toString());
Collection<HiveSpec> specs = new HiveRegistrationPolicyBase(state).getHiveSpecs(this.path);
Assert.assertEquals(specs.size(), 6);
Iterator<HiveSpec> iterator = specs.iterator();
HiveSpec spec = iterator.next();
examine(spec, "db1", "tbl1");
spec = iterator.next();
examine(spec, "db1", "tbl2");
spec = iterator.next();
examine(spec, "db1", "tbl3");
spec = iterator.next();
examine(spec, "db2", "tbl1");
spec = iterator.next();
examine(spec, "db2", "tbl2");
spec = iterator.next();
examine(spec, "db2", "tbl3");
}
Aggregations