Search in sources :

Example 1 with ConfigClient

use of org.apache.gobblin.config.client.ConfigClient in project incubator-gobblin by apache.

the class ConfigStoreUtils method getTopicsFromConfigStore.

/**
 * Get topics from config store.
 * Topics will either be whitelisted or blacklisted using tag.
 * After filtering out topics via tag, their config property is checked.
 * For each shortlisted topic, config must contain either property topic.blacklist or topic.whitelist
 *
 * If tags are not provided, it will return all topics
 */
public static List<KafkaTopic> getTopicsFromConfigStore(Properties properties, String configStoreUri, GobblinKafkaConsumerClient kafkaConsumerClient) {
    ConfigClient configClient = ConfigClient.createConfigClient(VersionStabilityPolicy.WEAK_LOCAL_STABILITY);
    State state = new State();
    state.setProp(KafkaSource.TOPIC_WHITELIST, ".*");
    state.setProp(KafkaSource.TOPIC_BLACKLIST, StringUtils.EMPTY);
    List<KafkaTopic> allTopics = kafkaConsumerClient.getFilteredTopics(DatasetFilterUtils.getPatternList(state, KafkaSource.TOPIC_BLACKLIST), DatasetFilterUtils.getPatternList(state, KafkaSource.TOPIC_WHITELIST));
    Optional<Config> runtimeConfig = ConfigClientUtils.getOptionalRuntimeConfig(properties);
    if (properties.containsKey(GOBBLIN_CONFIG_TAGS_WHITELIST)) {
        Preconditions.checkArgument(properties.containsKey(GOBBLIN_CONFIG_FILTER), "Missing required property " + GOBBLIN_CONFIG_FILTER);
        String filterString = properties.getProperty(GOBBLIN_CONFIG_FILTER);
        Path whiteListTagUri = PathUtils.mergePaths(new Path(configStoreUri), new Path(properties.getProperty(GOBBLIN_CONFIG_TAGS_WHITELIST)));
        List<String> whitelistedTopics = new ArrayList<>();
        ConfigStoreUtils.getTopicsURIFromConfigStore(configClient, whiteListTagUri, filterString, runtimeConfig).stream().filter((URI u) -> ConfigUtils.getBoolean(ConfigStoreUtils.getConfig(configClient, u, runtimeConfig), KafkaSource.TOPIC_WHITELIST, false)).forEach(((URI u) -> whitelistedTopics.add(ConfigStoreUtils.getTopicNameFromURI(u))));
        return allTopics.stream().filter((KafkaTopic p) -> whitelistedTopics.contains(p.getName())).collect(Collectors.toList());
    } else if (properties.containsKey(GOBBLIN_CONFIG_TAGS_BLACKLIST)) {
        Preconditions.checkArgument(properties.containsKey(GOBBLIN_CONFIG_FILTER), "Missing required property " + GOBBLIN_CONFIG_FILTER);
        String filterString = properties.getProperty(GOBBLIN_CONFIG_FILTER);
        Path blackListTagUri = PathUtils.mergePaths(new Path(configStoreUri), new Path(properties.getProperty(GOBBLIN_CONFIG_TAGS_BLACKLIST)));
        List<String> blacklistedTopics = new ArrayList<>();
        ConfigStoreUtils.getTopicsURIFromConfigStore(configClient, blackListTagUri, filterString, runtimeConfig).stream().filter((URI u) -> ConfigUtils.getBoolean(ConfigStoreUtils.getConfig(configClient, u, runtimeConfig), KafkaSource.TOPIC_BLACKLIST, false)).forEach(((URI u) -> blacklistedTopics.add(ConfigStoreUtils.getTopicNameFromURI(u))));
        return allTopics.stream().filter((KafkaTopic p) -> !blacklistedTopics.contains(p.getName())).collect(Collectors.toList());
    } else {
        log.warn("None of the blacklist or whitelist tags are provided");
        return allTopics;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ConfigClient(org.apache.gobblin.config.client.ConfigClient) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) URI(java.net.URI) State(org.apache.gobblin.configuration.State) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with ConfigClient

use of org.apache.gobblin.config.client.ConfigClient in project incubator-gobblin by apache.

the class ConfigStoreUtils method setTopicsFromConfigStore.

/**
 * Shortlist topics from config store based on whitelist/blacklist tags and
 * add it to {@param whitelist}/{@param blacklist}
 *
 * If tags are not provided, blacklist and whitelist won't be modified
 */
public static void setTopicsFromConfigStore(Properties properties, Set<String> blacklist, Set<String> whitelist, final String _blacklistTopicKey, final String _whitelistTopicKey) {
    Optional<String> configStoreUri = getConfigStoreUri(properties);
    if (!configStoreUri.isPresent()) {
        return;
    }
    ConfigClient configClient = ConfigClient.createConfigClient(VersionStabilityPolicy.WEAK_LOCAL_STABILITY);
    Optional<Config> runtimeConfig = ConfigClientUtils.getOptionalRuntimeConfig(properties);
    if (properties.containsKey(GOBBLIN_CONFIG_TAGS_WHITELIST)) {
        Preconditions.checkArgument(properties.containsKey(GOBBLIN_CONFIG_FILTER), "Missing required property " + GOBBLIN_CONFIG_FILTER);
        String filterString = properties.getProperty(GOBBLIN_CONFIG_FILTER);
        Path whiteListTagUri = PathUtils.mergePaths(new Path(configStoreUri.get()), new Path(properties.getProperty(GOBBLIN_CONFIG_TAGS_WHITELIST)));
        getTopicsURIFromConfigStore(configClient, whiteListTagUri, filterString, runtimeConfig).stream().filter((URI u) -> ConfigUtils.getBoolean(getConfig(configClient, u, runtimeConfig), _whitelistTopicKey, false)).forEach(((URI u) -> whitelist.add(getTopicNameFromURI(u))));
    } else if (properties.containsKey(GOBBLIN_CONFIG_TAGS_BLACKLIST)) {
        Preconditions.checkArgument(properties.containsKey(GOBBLIN_CONFIG_FILTER), "Missing required property " + GOBBLIN_CONFIG_FILTER);
        String filterString = properties.getProperty(GOBBLIN_CONFIG_FILTER);
        Path blackListTagUri = PathUtils.mergePaths(new Path(configStoreUri.get()), new Path(properties.getProperty(GOBBLIN_CONFIG_TAGS_BLACKLIST)));
        getTopicsURIFromConfigStore(configClient, blackListTagUri, filterString, runtimeConfig).stream().filter((URI u) -> ConfigUtils.getBoolean(getConfig(configClient, u, runtimeConfig), _blacklistTopicKey, false)).forEach(((URI u) -> blacklist.add(getTopicNameFromURI(u))));
    } else {
        log.warn("None of the blacklist or whitelist tags are provided");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ConfigClient(org.apache.gobblin.config.client.ConfigClient) Config(com.typesafe.config.Config) URI(java.net.URI)

Example 3 with ConfigClient

use of org.apache.gobblin.config.client.ConfigClient in project incubator-gobblin by apache.

the class RetentionTestHelper method clean.

/**
 * Does gobblin retention for test data. {@link DatasetCleaner} which does retention in production can not be directly called as we need to resolve some
 * runtime properties like ${testNameTempPath}. This directory contains all the setup data created for a test by {@link RetentionTestDataGenerator#setup()}.
 * It is unique for each test.
 * The default {@link ConfigClient} used by {@link DatasetCleaner} connects to config store configs. We need to provide a
 * mock {@link ConfigClient} since the configs are in classpath and not on config store.
 *
 * @param retentionConfigClasspathResource this is the same jobProps/config files used while running a real retention job
 * @param testNameTempPath temp path for this test where test data is generated
 */
public static void clean(FileSystem fs, Path retentionConfigClasspathResource, Optional<Path> additionalJobPropsClasspathResource, Path testNameTempPath) throws Exception {
    Properties additionalJobProps = new Properties();
    if (additionalJobPropsClasspathResource.isPresent()) {
        try (final InputStream stream = RetentionTestHelper.class.getClassLoader().getResourceAsStream(additionalJobPropsClasspathResource.get().toString())) {
            additionalJobProps.load(stream);
        }
    }
    if (retentionConfigClasspathResource.getName().endsWith(".job")) {
        Properties jobProps = new Properties();
        try (final InputStream stream = RetentionTestHelper.class.getClassLoader().getResourceAsStream(retentionConfigClasspathResource.toString())) {
            jobProps.load(stream);
            for (Entry<Object, Object> entry : jobProps.entrySet()) {
                jobProps.put(entry.getKey(), StringUtils.replace((String) entry.getValue(), "${testNameTempPath}", testNameTempPath.toString()));
            }
        }
        MultiCleanableDatasetFinder finder = new MultiCleanableDatasetFinder(fs, jobProps);
        for (Dataset dataset : finder.findDatasets()) {
            ((CleanableDataset) dataset).clean();
        }
    } else {
        Config testConfig = ConfigFactory.parseResources(retentionConfigClasspathResource.toString()).withFallback(ConfigFactory.parseMap(ImmutableMap.of("testNameTempPath", PathUtils.getPathWithoutSchemeAndAuthority(testNameTempPath).toString()))).resolve();
        ConfigClient client = mock(ConfigClient.class);
        when(client.getConfig(any(String.class))).thenReturn(testConfig);
        Properties jobProps = new Properties();
        jobProps.setProperty(CleanableDatasetBase.SKIP_TRASH_KEY, Boolean.toString(true));
        jobProps.setProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI, "dummy");
        jobProps.setProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_ENABLED, "true");
        jobProps.putAll(additionalJobProps);
        @SuppressWarnings("unchecked") DatasetsFinder<CleanableDataset> finder = (DatasetsFinder<CleanableDataset>) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(testConfig.getString(MultiCleanableDatasetFinder.DATASET_FINDER_CLASS_KEY)), ImmutableList.of(fs, jobProps, testConfig, client), ImmutableList.of(fs, jobProps, client));
        for (CleanableDataset dataset : finder.findDatasets()) {
            dataset.clean();
        }
    }
}
Also used : ConfigClient(org.apache.gobblin.config.client.ConfigClient) CleanableDataset(org.apache.gobblin.data.management.retention.dataset.CleanableDataset) InputStream(java.io.InputStream) Dataset(org.apache.gobblin.dataset.Dataset) CleanableDataset(org.apache.gobblin.data.management.retention.dataset.CleanableDataset) Config(com.typesafe.config.Config) Properties(java.util.Properties) DatasetsFinder(org.apache.gobblin.dataset.DatasetsFinder) MultiCleanableDatasetFinder(org.apache.gobblin.data.management.retention.profile.MultiCleanableDatasetFinder)

Example 4 with ConfigClient

use of org.apache.gobblin.config.client.ConfigClient in project incubator-gobblin by apache.

the class ValidationJob method runFileFormatValidation.

/**
 * Validates that partitions are in a given format
 * Partitions to be processed are picked up from the config store which are tagged.
 * Tag can be passed through key GOBBLIN_CONFIG_TAGS_WHITELIST
 * Datasets tagged by the above key will be picked up.
 * PathName will be treated as tableName and ParentPathName will be treated as dbName
 *
 * For example if the dataset uri picked up by is /data/hive/myDb/myTable
 * Then myTable is tableName and myDb is dbName
 */
private void runFileFormatValidation() throws IOException {
    Preconditions.checkArgument(this.props.containsKey(VALIDATION_FILE_FORMAT_KEY));
    this.configStoreUri = StringUtils.isNotBlank(this.props.getProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI)) ? Optional.of(this.props.getProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI)) : Optional.<String>absent();
    if (!Boolean.valueOf(this.props.getProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_ENABLED, ConfigurationKeys.DEFAULT_CONFIG_MANAGEMENT_STORE_ENABLED))) {
        this.configStoreUri = Optional.<String>absent();
    }
    List<Partition> partitions = new ArrayList<>();
    if (this.configStoreUri.isPresent()) {
        Preconditions.checkArgument(this.props.containsKey(GOBBLIN_CONFIG_TAGS_WHITELIST), "Missing required property " + GOBBLIN_CONFIG_TAGS_WHITELIST);
        String tag = this.props.getProperty(GOBBLIN_CONFIG_TAGS_WHITELIST);
        ConfigClient configClient = ConfigClient.createConfigClient(VersionStabilityPolicy.WEAK_LOCAL_STABILITY);
        Path tagUri = PathUtils.mergePaths(new Path(this.configStoreUri.get()), new Path(tag));
        try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
            Collection<URI> importedBy = configClient.getImportedBy(new URI(tagUri.toString()), true);
            for (URI uri : importedBy) {
                String dbName = new Path(uri).getParent().getName();
                Table table = new Table(client.get().getTable(dbName, new Path(uri).getName()));
                for (org.apache.hadoop.hive.metastore.api.Partition partition : client.get().listPartitions(dbName, table.getTableName(), maxParts)) {
                    partitions.add(new Partition(table, partition));
                }
            }
        } catch (Exception e) {
            this.throwables.add(e);
        }
    }
    for (Partition partition : partitions) {
        if (!shouldValidate(partition)) {
            continue;
        }
        String fileFormat = this.props.getProperty(VALIDATION_FILE_FORMAT_KEY);
        Optional<HiveSerDeWrapper.BuiltInHiveSerDe> hiveSerDe = Enums.getIfPresent(HiveSerDeWrapper.BuiltInHiveSerDe.class, fileFormat.toUpperCase());
        if (!hiveSerDe.isPresent()) {
            throwables.add(new Throwable("Partition SerDe is either not supported or absent"));
            continue;
        }
        String serdeLib = partition.getTPartition().getSd().getSerdeInfo().getSerializationLib();
        if (!hiveSerDe.get().toString().equalsIgnoreCase(serdeLib)) {
            throwables.add(new Throwable("Partition " + partition.getCompleteName() + " SerDe " + serdeLib + " doesn't match with the required SerDe " + hiveSerDe.get().toString()));
        }
    }
    if (!this.throwables.isEmpty()) {
        for (Throwable e : this.throwables) {
            log.error("Failed to validate due to " + e);
        }
        throw new RuntimeException("Validation Job Failed");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ConfigClient(org.apache.gobblin.config.client.ConfigClient) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) URI(java.net.URI) HiveSerDeWrapper(org.apache.gobblin.hive.HiveSerDeWrapper) ParseException(java.text.ParseException) SQLException(java.sql.SQLException) UpdateNotFoundException(org.apache.gobblin.data.management.conversion.hive.provider.UpdateNotFoundException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) IOException(java.io.IOException)

Example 5 with ConfigClient

use of org.apache.gobblin.config.client.ConfigClient in project incubator-gobblin by apache.

the class QueryBasedSource method getTableSpecificPropsFromConfigStore.

private static Map<SourceEntity, State> getTableSpecificPropsFromConfigStore(Collection<SourceEntity> tables, State state) {
    ConfigClient client = ConfigClientCache.getClient(VersionStabilityPolicy.STRONG_LOCAL_STABILITY);
    String configStoreUri = state.getProp(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI);
    Preconditions.checkNotNull(configStoreUri);
    Map<SourceEntity, State> result = Maps.newHashMap();
    for (SourceEntity table : tables) {
        try {
            result.put(table, ConfigUtils.configToState(client.getConfig(PathUtils.combinePaths(configStoreUri, QUERY_BASED_SOURCE, table.getDatasetName()).toUri())));
        } catch (VersionDoesNotExistException | ConfigStoreFactoryDoesNotExistsException | ConfigStoreCreationException e) {
            throw new RuntimeException("Unable to get table config for " + table, e);
        }
    }
    return result;
}
Also used : ConfigClient(org.apache.gobblin.config.client.ConfigClient) VersionDoesNotExistException(org.apache.gobblin.config.store.api.VersionDoesNotExistException) ConfigStoreCreationException(org.apache.gobblin.config.store.api.ConfigStoreCreationException) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkingState(org.apache.gobblin.configuration.WorkUnitState.WorkingState) SourceState(org.apache.gobblin.configuration.SourceState) ConfigStoreFactoryDoesNotExistsException(org.apache.gobblin.config.client.api.ConfigStoreFactoryDoesNotExistsException)

Aggregations

ConfigClient (org.apache.gobblin.config.client.ConfigClient)5 Config (com.typesafe.config.Config)3 URI (java.net.URI)3 Path (org.apache.hadoop.fs.Path)3 ArrayList (java.util.ArrayList)2 State (org.apache.gobblin.configuration.State)2 UncheckedExecutionException (com.google.common.util.concurrent.UncheckedExecutionException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 SQLException (java.sql.SQLException)1 ParseException (java.text.ParseException)1 List (java.util.List)1 Properties (java.util.Properties)1 ConfigStoreFactoryDoesNotExistsException (org.apache.gobblin.config.client.api.ConfigStoreFactoryDoesNotExistsException)1 ConfigStoreCreationException (org.apache.gobblin.config.store.api.ConfigStoreCreationException)1 VersionDoesNotExistException (org.apache.gobblin.config.store.api.VersionDoesNotExistException)1 SourceState (org.apache.gobblin.configuration.SourceState)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)1 UpdateNotFoundException (org.apache.gobblin.data.management.conversion.hive.provider.UpdateNotFoundException)1