Search in sources :

Example 1 with DatasetStateStoreEntryManager

use of org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager in project incubator-gobblin by apache.

the class TimestampedDatasetStateStoreVersionFinder method findDatasetVersions.

@Override
public Collection<TimestampedDatasetStateStoreVersion> findDatasetVersions(Dataset dataset) throws IOException {
    DatasetStoreDataset storeDataset = ((DatasetStoreDataset) dataset);
    List<TimestampedDatasetStateStoreVersion> versions = Lists.newArrayList();
    for (DatasetStateStoreEntryManager entry : storeDataset.getDatasetStateStoreMetadataEntries()) {
        versions.add(new TimestampedDatasetStateStoreVersion(entry));
    }
    return versions;
}
Also used : DatasetStateStoreEntryManager(org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager) DatasetStoreDataset(org.apache.gobblin.metastore.DatasetStoreDataset) TimestampedDatasetStateStoreVersion(org.apache.gobblin.data.management.version.TimestampedDatasetStateStoreVersion)

Example 2 with DatasetStateStoreEntryManager

use of org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager in project incubator-gobblin by apache.

the class FsDatasetStateStoreTest method testGetMetadataForTables.

@Test
public void testGetMetadataForTables() throws Exception {
    File tmpDir = Files.createTempDir();
    tmpDir.deleteOnExit();
    FsDatasetStateStore store = new FsDatasetStateStore(FileSystem.getLocal(new Configuration()), tmpDir.getAbsolutePath());
    JobState.DatasetState dataset2State = new JobState.DatasetState("job1", "job1_id2");
    dataset2State.setDatasetUrn("dataset2");
    dataset2State.setId("dataset2");
    TaskState taskState = new TaskState();
    taskState.setJobId("job1_id2");
    taskState.setTaskId("task123");
    taskState.setProp("key", "value");
    dataset2State.addTaskState(taskState);
    store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id1"));
    store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id2"));
    store.persistDatasetState("dataset2", dataset2State);
    store.persistDatasetState("dataset1", new JobState.DatasetState("job2", "job2_id1"));
    store.persistDatasetState("", new JobState.DatasetState("job3", "job3_id1"));
    List<FsDatasetStateStoreEntryManager> metadataList = store.getMetadataForTables(new StateStorePredicate(x -> true));
    // 5 explicitly stored states, plus 4 current links, one per job-dataset
    Assert.assertEquals(metadataList.size(), 9);
    metadataList = store.getMetadataForTables(new StoreNamePredicate("job1", x -> true));
    // 3 explicitly stored states, plus 2 current links, one per dataset
    Assert.assertEquals(metadataList.size(), 5);
    metadataList = store.getMetadataForTables(new DatasetPredicate("job1", "dataset1", x -> true));
    Assert.assertEquals(metadataList.size(), 3);
    metadataList = store.getMetadataForTables(new DatasetPredicate("job1", "dataset2", meta -> ((DatasetStateStoreEntryManager) meta).getStateId().equals(DatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX)));
    Assert.assertEquals(metadataList.size(), 1);
    DatasetStateStoreEntryManager metadata = metadataList.get(0);
    Assert.assertEquals(metadata.getStoreName(), "job1");
    Assert.assertEquals(metadata.getSanitizedDatasetUrn(), "dataset2");
    Assert.assertEquals(metadata.getStateId(), DatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX);
    Assert.assertEquals(metadata.getDatasetStateStore(), store);
    JobState.DatasetState readState = (JobState.DatasetState) metadata.readState();
    TaskState readTaskState = readState.getTaskStates().get(0);
    Assert.assertEquals(readTaskState.getProp("key"), "value");
    metadata.delete();
    // verify it got deleted
    metadataList = store.getMetadataForTables(new DatasetPredicate("job1", "dataset2", meta -> ((DatasetStateStoreEntryManager) meta).getStateId().equals(DatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX)));
    Assert.assertTrue(metadataList.isEmpty());
}
Also used : DatasetStateStore(org.apache.gobblin.metastore.DatasetStateStore) AfterClass(org.testng.annotations.AfterClass) DatasetPredicate(org.apache.gobblin.metastore.predicates.DatasetPredicate) StateStorePredicate(org.apache.gobblin.metastore.predicates.StateStorePredicate) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileSystem(org.apache.hadoop.fs.FileSystem) BeforeClass(org.testng.annotations.BeforeClass) FsStateStore(org.apache.gobblin.metastore.FsStateStore) IOException(java.io.IOException) Test(org.testng.annotations.Test) StoreNamePredicate(org.apache.gobblin.metastore.predicates.StoreNamePredicate) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) File(java.io.File) FsDatasetStateStoreEntryManager(org.apache.gobblin.runtime.metastore.filesystem.FsDatasetStateStoreEntryManager) DatasetStateStoreEntryManager(org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager) List(java.util.List) Assert(org.testng.Assert) Files(com.google.common.io.Files) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) StateStore(org.apache.gobblin.metastore.StateStore) StoreNamePredicate(org.apache.gobblin.metastore.predicates.StoreNamePredicate) Configuration(org.apache.hadoop.conf.Configuration) FsDatasetStateStoreEntryManager(org.apache.gobblin.runtime.metastore.filesystem.FsDatasetStateStoreEntryManager) DatasetStateStoreEntryManager(org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager) FsDatasetStateStoreEntryManager(org.apache.gobblin.runtime.metastore.filesystem.FsDatasetStateStoreEntryManager) StateStorePredicate(org.apache.gobblin.metastore.predicates.StateStorePredicate) DatasetPredicate(org.apache.gobblin.metastore.predicates.DatasetPredicate) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

DatasetStateStoreEntryManager (org.apache.gobblin.metastore.metadata.DatasetStateStoreEntryManager)2 Files (com.google.common.io.Files)1 File (java.io.File)1 IOException (java.io.IOException)1 List (java.util.List)1 Map (java.util.Map)1 ConfigurationKeys (org.apache.gobblin.configuration.ConfigurationKeys)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 TimestampedDatasetStateStoreVersion (org.apache.gobblin.data.management.version.TimestampedDatasetStateStoreVersion)1 DatasetStateStore (org.apache.gobblin.metastore.DatasetStateStore)1 DatasetStoreDataset (org.apache.gobblin.metastore.DatasetStoreDataset)1 FsStateStore (org.apache.gobblin.metastore.FsStateStore)1 StateStore (org.apache.gobblin.metastore.StateStore)1 DatasetPredicate (org.apache.gobblin.metastore.predicates.DatasetPredicate)1 StateStorePredicate (org.apache.gobblin.metastore.predicates.StateStorePredicate)1 StoreNamePredicate (org.apache.gobblin.metastore.predicates.StoreNamePredicate)1 FsDatasetStateStoreEntryManager (org.apache.gobblin.runtime.metastore.filesystem.FsDatasetStateStoreEntryManager)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1