Search in sources :

Example 21 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HiveMetaStoreUtils method getPartition.

/**
 * Convert a {@link HivePartition} into a {@link Partition}.
 */
public static Partition getPartition(HivePartition hivePartition) {
    State props = hivePartition.getProps();
    Partition partition = new Partition();
    partition.setDbName(hivePartition.getDbName());
    partition.setTableName(hivePartition.getTableName());
    partition.setValues(hivePartition.getValues());
    partition.setParameters(getParameters(props));
    if (hivePartition.getCreateTime().isPresent()) {
        partition.setCreateTime(Ints.checkedCast(hivePartition.getCreateTime().get()));
    } else if (props.contains(HiveConstants.CREATE_TIME)) {
        partition.setCreateTime(props.getPropAsInt(HiveConstants.CREATE_TIME));
    }
    if (props.contains(HiveConstants.LAST_ACCESS_TIME)) {
        partition.setLastAccessTime(props.getPropAsInt(HiveConstants.LAST_ACCESS_TIME));
    }
    partition.setSd(getStorageDescriptor(hivePartition));
    return partition;
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) HivePartition(org.apache.gobblin.hive.HivePartition) State(org.apache.gobblin.configuration.State)

Example 22 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HiveMetaStoreUtils method getStorageDescriptor.

private static StorageDescriptor getStorageDescriptor(HiveRegistrationUnit unit) {
    State props = unit.getStorageProps();
    StorageDescriptor sd = new StorageDescriptor();
    sd.setParameters(getParameters(props));
    sd.setCols(getFieldSchemas(unit));
    if (unit.getLocation().isPresent()) {
        sd.setLocation(unit.getLocation().get());
    }
    if (unit.getInputFormat().isPresent()) {
        sd.setInputFormat(unit.getInputFormat().get());
    }
    if (unit.getOutputFormat().isPresent()) {
        sd.setOutputFormat(unit.getOutputFormat().get());
    }
    if (unit.getIsCompressed().isPresent()) {
        sd.setCompressed(unit.getIsCompressed().get());
    }
    if (unit.getNumBuckets().isPresent()) {
        sd.setNumBuckets(unit.getNumBuckets().get());
    }
    if (unit.getBucketColumns().isPresent()) {
        sd.setBucketCols(unit.getBucketColumns().get());
    }
    if (unit.getIsStoredAsSubDirs().isPresent()) {
        sd.setStoredAsSubDirectories(unit.getIsStoredAsSubDirs().get());
    }
    sd.setSerdeInfo(getSerDeInfo(unit));
    return sd;
}
Also used : State(org.apache.gobblin.configuration.State) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor)

Example 23 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HiveRegistrationUnitComparatorTest method testCheckExistingIsSuperstate.

@Test
public void testCheckExistingIsSuperstate() throws Exception {
    String key1 = "key1";
    String value1 = "value1";
    String key2 = "key2";
    String value2 = "value2";
    State existingState = new State();
    State newState = new State();
    HiveRegistrationUnitComparator comparator = new HiveRegistrationUnitComparator<>(null, null);
    comparator.checkExistingIsSuperstate(existingState, newState);
    Assert.assertFalse(comparator.result);
    newState.setProp(key1, value1);
    comparator = new HiveRegistrationUnitComparator<>(null, null);
    comparator.checkExistingIsSuperstate(existingState, newState);
    Assert.assertTrue(comparator.result);
    existingState.setProp(key1, value2);
    comparator = new HiveRegistrationUnitComparator<>(null, null);
    comparator.checkExistingIsSuperstate(existingState, newState);
    Assert.assertTrue(comparator.result);
    existingState.setProp(key1, value1);
    existingState.setProp(key2, value2);
    comparator = new HiveRegistrationUnitComparator<>(null, null);
    comparator.checkExistingIsSuperstate(existingState, newState);
    Assert.assertFalse(comparator.result);
}
Also used : State(org.apache.gobblin.configuration.State) Test(org.testng.annotations.Test)

Example 24 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HiveRegistrationPolicyBaseTest method testTableRegexp.

@Test
public void testTableRegexp() throws IOException {
    State state = new State();
    String regexp = ".*test_bucket/(.*)/staging/.*";
    Optional<Pattern> pattern = Optional.of(Pattern.compile(regexp));
    Path path = new Path("s3://test_bucket/topic/staging/2017-10-21/");
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, regexp);
    HiveRegistrationPolicyBase registrationPolicyBase = new HiveRegistrationPolicyBase(state);
    String resultTable = registrationPolicyBase.getDatabaseOrTableName(path, HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, pattern);
    Assert.assertEquals(resultTable, "topic");
}
Also used : Path(org.apache.hadoop.fs.Path) Pattern(java.util.regex.Pattern) State(org.apache.gobblin.configuration.State) Test(org.testng.annotations.Test)

Example 25 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HiveRegistrationPolicyBaseTest method testTableRegexpWithoutGroupShouldFail.

@Test(expectedExceptions = IllegalStateException.class)
public void testTableRegexpWithoutGroupShouldFail() throws IOException {
    State state = new State();
    String regexp = ".*test_bucket/.*/staging/.*";
    Optional<Pattern> pattern = Optional.of(Pattern.compile(regexp));
    Path path = new Path("s3://test_bucket/topic/staging/2017-10-21/");
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, regexp);
    HiveRegistrationPolicyBase registrationPolicyBase = new HiveRegistrationPolicyBase(state);
    String resultTable = registrationPolicyBase.getDatabaseOrTableName(path, HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, pattern);
    Assert.assertEquals(resultTable, "topic");
}
Also used : Path(org.apache.hadoop.fs.Path) Pattern(java.util.regex.Pattern) State(org.apache.gobblin.configuration.State) Test(org.testng.annotations.Test)

Aggregations

State (org.apache.gobblin.configuration.State)195 Test (org.testng.annotations.Test)103 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)74 SourceState (org.apache.gobblin.configuration.SourceState)38 Path (org.apache.hadoop.fs.Path)30 File (java.io.File)20 IOException (java.io.IOException)16 Map (java.util.Map)14 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)14 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)13 Properties (java.util.Properties)12 FinalState (org.apache.gobblin.util.FinalState)12 Configuration (org.apache.hadoop.conf.Configuration)12 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)9 Config (com.typesafe.config.Config)8 ArrayList (java.util.ArrayList)8 GenericRecord (org.apache.avro.generic.GenericRecord)8 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 FileInputStream (java.io.FileInputStream)6