Search in sources :

Example 1 with HiveSpec

use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.

the class CompactionHiveRegistrationAction method onCompactionJobComplete.

public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
    if (state.contains(ConfigurationKeys.HIVE_REGISTRATION_POLICY)) {
        HiveRegister hiveRegister = HiveRegister.get(state);
        HiveRegistrationPolicy hiveRegistrationPolicy = HiveRegistrationPolicyBase.getPolicy(state);
        CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
        List<String> paths = new ArrayList<>();
        for (HiveSpec spec : hiveRegistrationPolicy.getHiveSpecs(new Path(result.getDstAbsoluteDir()))) {
            hiveRegister.register(spec);
            paths.add(spec.getPath().toUri().toASCIIString());
            log.info("Hive registration is done for {}", result.getDstAbsoluteDir());
        }
        // submit events for hive registration
        if (eventSubmitter != null) {
            Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.HIVE_REGISTRATION_PATHS, Joiner.on(',').join(paths));
            this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_HIVE_REGISTRATION_EVENT, eventMetadataMap);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveRegister(org.apache.gobblin.hive.HiveRegister) ArrayList(java.util.ArrayList) CompactionPathParser(org.apache.gobblin.compaction.parser.CompactionPathParser) HiveRegistrationPolicy(org.apache.gobblin.hive.policy.HiveRegistrationPolicy) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec)

Example 2 with HiveSpec

use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.

the class HiveRegistrationPolicyBaseTest method testGetHiveSpecsWithDBFilter.

@Test
public void testGetHiveSpecsWithDBFilter() throws IOException {
    State state = new State();
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, "db1");
    state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, "db2");
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "tbl1");
    state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_TABLE_NAMES, "tbl2,tbl3,$PRIMARY_TABLE_col");
    state.appendToListProp("db2." + HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "$PRIMARY_TABLE_col,tbl4,tbl5");
    this.path = new Path(getClass().getResource("/test-hive-table").toString());
    Collection<HiveSpec> specs = new HiveRegistrationPolicyBase(state).getHiveSpecs(this.path);
    Assert.assertEquals(specs.size(), 7);
    Iterator<HiveSpec> iterator = specs.iterator();
    HiveSpec spec = iterator.next();
    examine(spec, "db1", "tbl1");
    spec = iterator.next();
    examine(spec, "db1", "tbl2");
    spec = iterator.next();
    examine(spec, "db1", "tbl3");
    spec = iterator.next();
    examine(spec, "db1", "tbl1_col");
    spec = iterator.next();
    examine(spec, "db2", "tbl1_col");
    spec = iterator.next();
    examine(spec, "db2", "tbl4");
    spec = iterator.next();
    examine(spec, "db2", "tbl5");
}
Also used : Path(org.apache.hadoop.fs.Path) State(org.apache.gobblin.configuration.State) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) Test(org.testng.annotations.Test)

Example 3 with HiveSpec

use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.

the class HivePartitionFileSet method generateCopyEntities.

@Override
protected Collection<CopyEntity> generateCopyEntities() throws IOException {
    try (Closer closer = Closer.create()) {
        MultiTimingEvent multiTimer = closer.register(new MultiTimingEvent(this.eventSubmitter, "PartitionCopy", true));
        int stepPriority = 0;
        String fileSet = HiveCopyEntityHelper.gson.toJson(this.partition.getValues());
        List<CopyEntity> copyEntities = Lists.newArrayList();
        stepPriority = hiveCopyEntityHelper.addSharedSteps(copyEntities, fileSet, stepPriority);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.COMPUTE_TARGETS);
        Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getDataset().fs, hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition));
        Partition targetPartition = getTargetPartition(this.partition, targetPath);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.EXISTING_PARTITION);
        if (this.existingTargetPartition.isPresent()) {
            hiveCopyEntityHelper.getTargetPartitions().remove(this.partition.getValues());
            try {
                checkPartitionCompatibility(targetPartition, this.existingTargetPartition.get());
            } catch (IOException ioe) {
                if (hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_PARTITIONS && hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_TABLE_AND_PARTITIONS) {
                    log.error("Source and target partitions are not compatible. Aborting copy of partition " + this.partition, ioe);
                    return Lists.newArrayList();
                }
                log.warn("Source and target partitions are not compatible. Will override target partition: " + ioe.getMessage());
                log.debug("Incompatibility details: ", ioe);
                stepPriority = hiveCopyEntityHelper.addPartitionDeregisterSteps(copyEntities, fileSet, stepPriority, hiveCopyEntityHelper.getTargetTable(), this.existingTargetPartition.get());
                this.existingTargetPartition = Optional.absent();
            }
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.PARTITION_SKIP_PREDICATE);
        if (hiveCopyEntityHelper.getFastPartitionSkip().isPresent() && hiveCopyEntityHelper.getFastPartitionSkip().get().apply(this)) {
            log.info(String.format("Skipping copy of partition %s due to fast partition skip predicate.", this.partition.getCompleteName()));
            return Lists.newArrayList();
        }
        HiveSpec partitionHiveSpec = new SimpleHiveSpec.Builder<>(targetPath).withTable(HiveMetaStoreUtils.getHiveTable(hiveCopyEntityHelper.getTargetTable().getTTable())).withPartition(Optional.of(HiveMetaStoreUtils.getHivePartition(targetPartition.getTPartition()))).build();
        HiveRegisterStep register = new HiveRegisterStep(hiveCopyEntityHelper.getTargetURI(), partitionHiveSpec, hiveCopyEntityHelper.getHiveRegProps());
        copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), register, stepPriority++));
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_LOCATIONS);
        HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties);
        HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties);
        Optional<HiveLocationDescriptor> existingTargetLocation = this.existingTargetPartition.isPresent() ? Optional.of(HiveLocationDescriptor.forPartition(this.existingTargetPartition.get(), hiveCopyEntityHelper.getTargetFs(), this.properties)) : Optional.<HiveLocationDescriptor>absent();
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF);
        HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition>absent(), multiTimer, hiveCopyEntityHelper);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_DELETE_UNITS);
        if (diffPathSet.pathsToDelete.size() > 0) {
            DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties);
            copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String>newHashMap(), deleteStep, stepPriority++));
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_COPY_UNITS);
        for (CopyableFile.Builder builder : hiveCopyEntityHelper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, hiveCopyEntityHelper.getConfiguration(), Optional.of(this.partition))) {
            CopyableFile fileEntity = builder.fileSet(fileSet).checksum(new byte[0]).datasetOutputPath(desiredTargetLocation.location.toString()).build();
            this.hiveCopyEntityHelper.setCopyableFileDatasets(fileEntity);
            copyEntities.add(fileEntity);
        }
        log.info("Created {} copy entities for partition {}", copyEntities.size(), this.partition.getCompleteName());
        return copyEntities;
    }
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) PostPublishStep(org.apache.gobblin.data.management.copy.entities.PostPublishStep) MultiTimingEvent(org.apache.gobblin.metrics.event.MultiTimingEvent) IOException(java.io.IOException) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) HiveRegisterStep(org.apache.gobblin.hive.HiveRegisterStep) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec)

Example 4 with HiveSpec

use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.

the class HiveSnapshotRegistrationPolicy method getHiveSpecs.

/**
 * @param path The root directory of snapshots. This directory may contain zero or more snapshots.
 */
@Override
public Collection<HiveSpec> getHiveSpecs(Path path) throws IOException {
    List<HiveTable> tables = getTables(path);
    if (tables.isEmpty()) {
        return ImmutableList.<HiveSpec>of();
    }
    Collection<HiveSpec> specs = Lists.newArrayList();
    for (HiveTable table : tables) {
        specs.add(new SimpleHiveSpec.Builder<>(path).withTable(table).withPartition(getPartition(path, table)).build());
    }
    return specs;
}
Also used : HiveTable(org.apache.gobblin.hive.HiveTable) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec)

Example 5 with HiveSpec

use of org.apache.gobblin.hive.spec.HiveSpec in project incubator-gobblin by apache.

the class HiveRegistrationPolicyBaseTest method testGetHiveSpecs.

@Test
public void testGetHiveSpecs() throws IOException {
    State state = new State();
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, "db1");
    state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, "db2");
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "tbl1");
    state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_TABLE_NAMES, "tbl2,tbl3");
    this.path = new Path(getClass().getResource("/test-hive-table").toString());
    Collection<HiveSpec> specs = new HiveRegistrationPolicyBase(state).getHiveSpecs(this.path);
    Assert.assertEquals(specs.size(), 6);
    Iterator<HiveSpec> iterator = specs.iterator();
    HiveSpec spec = iterator.next();
    examine(spec, "db1", "tbl1");
    spec = iterator.next();
    examine(spec, "db1", "tbl2");
    spec = iterator.next();
    examine(spec, "db1", "tbl3");
    spec = iterator.next();
    examine(spec, "db2", "tbl1");
    spec = iterator.next();
    examine(spec, "db2", "tbl2");
    spec = iterator.next();
    examine(spec, "db2", "tbl3");
}
Also used : Path(org.apache.hadoop.fs.Path) State(org.apache.gobblin.configuration.State) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) Test(org.testng.annotations.Test)

Aggregations

HiveSpec (org.apache.gobblin.hive.spec.HiveSpec)6 Path (org.apache.hadoop.fs.Path)5 SimpleHiveSpec (org.apache.gobblin.hive.spec.SimpleHiveSpec)4 State (org.apache.gobblin.configuration.State)3 IOException (java.io.IOException)2 HiveRegistrationPolicy (org.apache.gobblin.hive.policy.HiveRegistrationPolicy)2 Test (org.testng.annotations.Test)2 Closer (com.google.common.io.Closer)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)1 CompactionPathParser (org.apache.gobblin.compaction.parser.CompactionPathParser)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 CopyEntity (org.apache.gobblin.data.management.copy.CopyEntity)1 CopyableFile (org.apache.gobblin.data.management.copy.CopyableFile)1 PostPublishStep (org.apache.gobblin.data.management.copy.entities.PostPublishStep)1 PrePublishStep (org.apache.gobblin.data.management.copy.entities.PrePublishStep)1 HiveRegister (org.apache.gobblin.hive.HiveRegister)1 HiveRegisterStep (org.apache.gobblin.hive.HiveRegisterStep)1