Search in sources :

Example 1 with StateStorePredicate

use of org.apache.gobblin.metastore.predicates.StateStorePredicate in project incubator-gobblin by apache.

the class DatasetStoreDatasetFinder method buildPredicate.

private StateStorePredicate buildPredicate() {
    StateStorePredicate predicate = null;
    String storeName = null;
    String datasetUrn;
    if (ConfigUtils.hasNonEmptyPath(this.config, STORE_NAME_FILTER)) {
        storeName = this.config.getString(STORE_NAME_FILTER);
        predicate = new StoreNamePredicate(storeName, x -> true);
    }
    if (ConfigUtils.hasNonEmptyPath(this.config, DATASET_URN_FILTER)) {
        if (storeName == null) {
            throw new IllegalArgumentException(DATASET_URN_FILTER + " requires " + STORE_NAME_FILTER + " to also be defined.");
        }
        datasetUrn = this.config.getString(DATASET_URN_FILTER);
        predicate = new DatasetPredicate(storeName, datasetUrn, x -> true);
    }
    return predicate == null ? new StateStorePredicate(x -> true) : predicate;
}
Also used : Properties(java.util.Properties) DatasetPredicate(org.apache.gobblin.metastore.predicates.DatasetPredicate) StateStorePredicate(org.apache.gobblin.metastore.predicates.StateStorePredicate) Config(com.typesafe.config.Config) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) StoreNamePredicate(org.apache.gobblin.metastore.predicates.StoreNamePredicate) ConfigUtils(org.apache.gobblin.util.ConfigUtils) Collectors(java.util.stream.Collectors) DatasetStateStoreEntryManager(org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager) List(java.util.List) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) DatasetsFinder(org.apache.gobblin.dataset.DatasetsFinder) Path(org.apache.hadoop.fs.Path) ConfigFactory(com.typesafe.config.ConfigFactory) StateStorePredicate(org.apache.gobblin.metastore.predicates.StateStorePredicate) DatasetPredicate(org.apache.gobblin.metastore.predicates.DatasetPredicate) StoreNamePredicate(org.apache.gobblin.metastore.predicates.StoreNamePredicate)

Example 2 with StateStorePredicate

use of org.apache.gobblin.metastore.predicates.StateStorePredicate in project incubator-gobblin by apache.

the class FsDatasetStateStoreTest method testGetMetadataForTables.

@Test
public void testGetMetadataForTables() throws Exception {
    File tmpDir = Files.createTempDir();
    tmpDir.deleteOnExit();
    FsDatasetStateStore store = new FsDatasetStateStore(FileSystem.getLocal(new Configuration()), tmpDir.getAbsolutePath());
    JobState.DatasetState dataset2State = new JobState.DatasetState("job1", "job1_id2");
    dataset2State.setDatasetUrn("dataset2");
    dataset2State.setId("dataset2");
    TaskState taskState = new TaskState();
    taskState.setJobId("job1_id2");
    taskState.setTaskId("task123");
    taskState.setProp("key", "value");
    dataset2State.addTaskState(taskState);
    store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id1"));
    store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id2"));
    store.persistDatasetState("dataset2", dataset2State);
    store.persistDatasetState("dataset1", new JobState.DatasetState("job2", "job2_id1"));
    store.persistDatasetState("", new JobState.DatasetState("job3", "job3_id1"));
    List<FsDatasetStateStoreEntryManager> metadataList = store.getMetadataForTables(new StateStorePredicate(x -> true));
    // 5 explicitly stored states, plus 4 current links, one per job-dataset
    Assert.assertEquals(metadataList.size(), 9);
    metadataList = store.getMetadataForTables(new StoreNamePredicate("job1", x -> true));
    // 3 explicitly stored states, plus 2 current links, one per dataset
    Assert.assertEquals(metadataList.size(), 5);
    metadataList = store.getMetadataForTables(new DatasetPredicate("job1", "dataset1", x -> true));
    Assert.assertEquals(metadataList.size(), 3);
    metadataList = store.getMetadataForTables(new DatasetPredicate("job1", "dataset2", meta -> ((DatasetStateStoreEntryManager) meta).getStateId().equals(DatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX)));
    Assert.assertEquals(metadataList.size(), 1);
    DatasetStateStoreEntryManager metadata = metadataList.get(0);
    Assert.assertEquals(metadata.getStoreName(), "job1");
    Assert.assertEquals(metadata.getSanitizedDatasetUrn(), "dataset2");
    Assert.assertEquals(metadata.getStateId(), DatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX);
    Assert.assertEquals(metadata.getDatasetStateStore(), store);
    JobState.DatasetState readState = (JobState.DatasetState) metadata.readState();
    TaskState readTaskState = readState.getTaskStates().get(0);
    Assert.assertEquals(readTaskState.getProp("key"), "value");
    metadata.delete();
    // verify it got deleted
    metadataList = store.getMetadataForTables(new DatasetPredicate("job1", "dataset2", meta -> ((DatasetStateStoreEntryManager) meta).getStateId().equals(DatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX)));
    Assert.assertTrue(metadataList.isEmpty());
}
Also used : DatasetStateStore(org.apache.gobblin.metastore.DatasetStateStore) AfterClass(org.testng.annotations.AfterClass) DatasetPredicate(org.apache.gobblin.metastore.predicates.DatasetPredicate) StateStorePredicate(org.apache.gobblin.metastore.predicates.StateStorePredicate) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileSystem(org.apache.hadoop.fs.FileSystem) BeforeClass(org.testng.annotations.BeforeClass) FsStateStore(org.apache.gobblin.metastore.FsStateStore) IOException(java.io.IOException) Test(org.testng.annotations.Test) StoreNamePredicate(org.apache.gobblin.metastore.predicates.StoreNamePredicate) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) File(java.io.File) FsDatasetStateStoreEntryManager(org.apache.gobblin.runtime.metastore.filesystem.FsDatasetStateStoreEntryManager) DatasetStateStoreEntryManager(org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager) List(java.util.List) Assert(org.testng.Assert) Files(com.google.common.io.Files) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) StateStore(org.apache.gobblin.metastore.StateStore) StoreNamePredicate(org.apache.gobblin.metastore.predicates.StoreNamePredicate) Configuration(org.apache.hadoop.conf.Configuration) FsDatasetStateStoreEntryManager(org.apache.gobblin.runtime.metastore.filesystem.FsDatasetStateStoreEntryManager) DatasetStateStoreEntryManager(org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager) FsDatasetStateStoreEntryManager(org.apache.gobblin.runtime.metastore.filesystem.FsDatasetStateStoreEntryManager) StateStorePredicate(org.apache.gobblin.metastore.predicates.StateStorePredicate) DatasetPredicate(org.apache.gobblin.metastore.predicates.DatasetPredicate) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

IOException (java.io.IOException)2 List (java.util.List)2 Map (java.util.Map)2 DatasetStateStoreEntryManager (org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager)2 DatasetPredicate (org.apache.gobblin.metastore.predicates.DatasetPredicate)2 StateStorePredicate (org.apache.gobblin.metastore.predicates.StateStorePredicate)2 StoreNamePredicate (org.apache.gobblin.metastore.predicates.StoreNamePredicate)2 Configuration (org.apache.hadoop.conf.Configuration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 Files (com.google.common.io.Files)1 Config (com.typesafe.config.Config)1 ConfigFactory (com.typesafe.config.ConfigFactory)1 File (java.io.File)1 Properties (java.util.Properties)1 Collectors (java.util.stream.Collectors)1 ConfigurationKeys (org.apache.gobblin.configuration.ConfigurationKeys)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 DatasetsFinder (org.apache.gobblin.dataset.DatasetsFinder)1 DatasetStateStore (org.apache.gobblin.metastore.DatasetStateStore)1