Search in sources :

Example 1 with DatasetStateStore

use of org.apache.gobblin.metastore.DatasetStateStore in project incubator-gobblin by apache.

the class FsDatasetStateStore method createStateStore.

protected static DatasetStateStore<JobState.DatasetState> createStateStore(Config config, String className) {
    // Add all job configuration properties so they are picked up by Hadoop
    Configuration conf = new Configuration();
    for (Map.Entry<String, ConfigValue> entry : config.entrySet()) {
        conf.set(entry.getKey(), entry.getValue().unwrapped().toString());
    }
    try {
        String stateStoreFsUri = ConfigUtils.getString(config, ConfigurationKeys.STATE_STORE_FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI);
        final FileSystem stateStoreFs = FileSystem.get(URI.create(stateStoreFsUri), conf);
        String stateStoreRootDir = config.getString(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY);
        Integer threadPoolOfGettingDatasetState = ConfigUtils.getInt(config, ConfigurationKeys.THREADPOOL_SIZE_OF_LISTING_FS_DATASET_STATESTORE, ConfigurationKeys.DEFAULT_THREADPOOL_SIZE_OF_LISTING_FS_DATASET_STATESTORE);
        final String datasetUrnStateStoreNameParserClass = ConfigUtils.getString(config, ConfigurationKeys.DATASETURN_STATESTORE_NAME_PARSER, SimpleDatasetUrnStateStoreNameParser.class.getName());
        LoadingCache<Path, DatasetUrnStateStoreNameParser> stateStoreNameParserLoadingCache = CacheBuilder.newBuilder().maximumSize(CACHE_SIZE).build(new CacheLoader<Path, DatasetUrnStateStoreNameParser>() {

            @Override
            public DatasetUrnStateStoreNameParser load(Path stateStoreDirWithStoreName) throws Exception {
                return (DatasetUrnStateStoreNameParser) GobblinConstructorUtils.invokeLongestConstructor(Class.forName(datasetUrnStateStoreNameParserClass), stateStoreFs, stateStoreDirWithStoreName);
            }
        });
        return (DatasetStateStore<JobState.DatasetState>) GobblinConstructorUtils.invokeLongestConstructor(Class.forName(className), stateStoreFs, stateStoreRootDir, threadPoolOfGettingDatasetState, stateStoreNameParserLoadingCache);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (ReflectiveOperationException e) {
        throw new RuntimeException("Failed to instantiate " + className, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ConfigValue(com.typesafe.config.ConfigValue) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) SimpleDatasetUrnStateStoreNameParser(org.apache.gobblin.metastore.nameParser.SimpleDatasetUrnStateStoreNameParser) DatasetStateStore(org.apache.gobblin.metastore.DatasetStateStore) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) FileSystem(org.apache.hadoop.fs.FileSystem) DatasetUrnStateStoreNameParser(org.apache.gobblin.metastore.nameParser.DatasetUrnStateStoreNameParser) SimpleDatasetUrnStateStoreNameParser(org.apache.gobblin.metastore.nameParser.SimpleDatasetUrnStateStoreNameParser) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 2 with DatasetStateStore

use of org.apache.gobblin.metastore.DatasetStateStore in project incubator-gobblin by apache.

the class JobContext method createStateStore.

protected DatasetStateStore createStateStore(Config jobConfig) throws IOException {
    boolean stateStoreEnabled = !jobConfig.hasPath(ConfigurationKeys.STATE_STORE_ENABLED) || jobConfig.getBoolean(ConfigurationKeys.STATE_STORE_ENABLED);
    String stateStoreType;
    if (!stateStoreEnabled) {
        stateStoreType = ConfigurationKeys.STATE_STORE_TYPE_NOOP;
    } else {
        stateStoreType = ConfigUtils.getString(jobConfig, ConfigurationKeys.DATASET_STATE_STORE_TYPE_KEY, ConfigUtils.getString(jobConfig, ConfigurationKeys.STATE_STORE_TYPE_KEY, ConfigurationKeys.DEFAULT_STATE_STORE_TYPE));
    }
    ClassAliasResolver<DatasetStateStore.Factory> resolver = new ClassAliasResolver<>(DatasetStateStore.Factory.class);
    try {
        DatasetStateStore.Factory stateStoreFactory = resolver.resolveClass(stateStoreType).newInstance();
        return stateStoreFactory.createStateStore(jobConfig);
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : ClassAliasResolver(org.apache.gobblin.util.ClassAliasResolver) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) DatasetStateStore(org.apache.gobblin.metastore.DatasetStateStore) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with DatasetStateStore

use of org.apache.gobblin.metastore.DatasetStateStore in project incubator-gobblin by apache.

the class StateStoreMigrationCli method run.

@Override
public void run(String[] args) throws Exception {
    CliObjectFactory<Command> factory = new ConstructorAndPublicMethodsCliObjectFactory<>(Command.class);
    Command command = factory.buildObject(args, 1, true, args[0]);
    FileSystem fs = FileSystem.get(new Configuration());
    FSDataInputStream inputStream = fs.open(command.path);
    Config config = ConfigFactory.parseReader(new InputStreamReader(inputStream, Charset.defaultCharset()));
    Preconditions.checkNotNull(config.getObject(SOURCE_KEY));
    Preconditions.checkNotNull(config.getObject(DESTINATION_KEY));
    DatasetStateStore dstDatasetStateStore = DatasetStateStore.buildDatasetStateStore(config.getConfig(DESTINATION_KEY));
    DatasetStateStore srcDatasetStateStore = DatasetStateStore.buildDatasetStateStore(config.getConfig(SOURCE_KEY));
    Map<String, JobState.DatasetState> map;
    // if migrating state for all jobs then list the store names (job names) and copy the current jst files
    if (ConfigUtils.getBoolean(config, MIGRATE_ALL_JOBS, Boolean.valueOf(DEFAULT_MIGRATE_ALL_JOBS))) {
        List<String> jobNames = srcDatasetStateStore.getStoreNames(Predicates.alwaysTrue());
        for (String jobName : jobNames) {
            migrateStateForJob(srcDatasetStateStore, dstDatasetStateStore, jobName, command.deleteSourceStateStore);
        }
    } else {
        Preconditions.checkNotNull(config.getString(JOB_NAME_KEY));
        migrateStateForJob(srcDatasetStateStore, dstDatasetStateStore, config.getString(JOB_NAME_KEY), command.deleteSourceStateStore);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) Config(com.typesafe.config.Config) DatasetStateStore(org.apache.gobblin.metastore.DatasetStateStore) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ConstructorAndPublicMethodsCliObjectFactory(org.apache.gobblin.runtime.cli.ConstructorAndPublicMethodsCliObjectFactory)

Example 4 with DatasetStateStore

use of org.apache.gobblin.metastore.DatasetStateStore in project incubator-gobblin by apache.

the class DatasetStateCommitStep method getDatasetStateStore.

private DatasetStateStore getDatasetStateStore() throws IOException {
    if (this.stateStore == null) {
        ClassAliasResolver<DatasetStateStore.Factory> resolver = new ClassAliasResolver<>(DatasetStateStore.Factory.class);
        String stateStoreType = this.props.getProp(ConfigurationKeys.DATASET_STATE_STORE_TYPE_KEY, this.props.getProp(ConfigurationKeys.STATE_STORE_TYPE_KEY, ConfigurationKeys.DEFAULT_STATE_STORE_TYPE));
        try {
            DatasetStateStore.Factory stateStoreFactory = resolver.resolveClass(stateStoreType).newInstance();
            this.stateStore = stateStoreFactory.createStateStore(ConfigFactory.parseProperties(props.getProperties()));
        } catch (RuntimeException e) {
            throw e;
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    return this.stateStore;
}
Also used : ClassAliasResolver(org.apache.gobblin.util.ClassAliasResolver) ConfigFactory(com.typesafe.config.ConfigFactory) IOException(java.io.IOException) DatasetStateStore(org.apache.gobblin.metastore.DatasetStateStore) IOException(java.io.IOException)

Aggregations

DatasetStateStore (org.apache.gobblin.metastore.DatasetStateStore)4 IOException (java.io.IOException)3 ExecutionException (java.util.concurrent.ExecutionException)2 ClassAliasResolver (org.apache.gobblin.util.ClassAliasResolver)2 Configuration (org.apache.hadoop.conf.Configuration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 Config (com.typesafe.config.Config)1 ConfigFactory (com.typesafe.config.ConfigFactory)1 ConfigValue (com.typesafe.config.ConfigValue)1 InputStreamReader (java.io.InputStreamReader)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 DatasetUrnStateStoreNameParser (org.apache.gobblin.metastore.nameParser.DatasetUrnStateStoreNameParser)1 SimpleDatasetUrnStateStoreNameParser (org.apache.gobblin.metastore.nameParser.SimpleDatasetUrnStateStoreNameParser)1 ConstructorAndPublicMethodsCliObjectFactory (org.apache.gobblin.runtime.cli.ConstructorAndPublicMethodsCliObjectFactory)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 Path (org.apache.hadoop.fs.Path)1 LoggerFactory (org.slf4j.LoggerFactory)1