use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class RecoveryHelperTest method testPurge.
@Test
public void testPurge() throws Exception {
String content = "contents";
File persistDirBase = Files.createTempDir();
persistDirBase.deleteOnExit();
State state = new State();
state.setProp(RecoveryHelper.PERSIST_DIR_KEY, persistDirBase.getAbsolutePath());
state.setProp(RecoveryHelper.PERSIST_RETENTION_KEY, "1");
RecoveryHelper recoveryHelper = new RecoveryHelper(FileSystem.getLocal(new Configuration()), state);
File persistDir = new File(RecoveryHelper.getPersistDir(state).get().toString());
persistDir.mkdir();
File file = new File(persistDir, "file1");
OutputStream os = new FileOutputStream(file);
IOUtils.write(content, os);
os.close();
file.setLastModified(System.currentTimeMillis() - TimeUnit.HOURS.toMillis(2));
File file2 = new File(persistDir, "file2");
OutputStream os2 = new FileOutputStream(file2);
IOUtils.write(content, os2);
os2.close();
Assert.assertEquals(persistDir.listFiles().length, 2);
recoveryHelper.purgeOldPersistedFile();
Assert.assertEquals(persistDir.listFiles().length, 1);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class RecoveryHelperTest method testPersistFile.
@Test
public void testPersistFile() throws Exception {
String content = "contents";
File stagingDir = Files.createTempDir();
stagingDir.deleteOnExit();
File file = new File(stagingDir, "file");
OutputStream os = new FileOutputStream(file);
IOUtils.write(content, os);
os.close();
Assert.assertEquals(stagingDir.listFiles().length, 1);
State state = new State();
state.setProp(RecoveryHelper.PERSIST_DIR_KEY, this.tmpDir.getAbsolutePath());
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
File recoveryDir = new File(RecoveryHelper.getPersistDir(state).get().toUri().getPath());
FileSystem fs = FileSystem.getLocal(new Configuration());
CopyableFile copyableFile = CopyableFile.builder(fs, new FileStatus(0, false, 0, 0, 0, new Path("/file")), new Path("/dataset"), CopyConfiguration.builder(fs, state.getProperties()).preserve(PreserveAttributes.fromMnemonicString("")).build()).build();
CopySource.setWorkUnitGuid(state, Guid.fromHasGuid(copyableFile));
RecoveryHelper recoveryHelper = new RecoveryHelper(FileSystem.getLocal(new Configuration()), state);
recoveryHelper.persistFile(state, copyableFile, new Path(file.getAbsolutePath()));
Assert.assertEquals(stagingDir.listFiles().length, 0);
Assert.assertEquals(recoveryDir.listFiles().length, 1);
File fileInRecovery = recoveryDir.listFiles()[0].listFiles()[0];
Assert.assertEquals(IOUtils.readLines(new FileInputStream(fileInRecovery)).get(0), content);
Optional<FileStatus> fileToRecover = recoveryHelper.findPersistedFile(state, copyableFile, Predicates.<FileStatus>alwaysTrue());
Assert.assertTrue(fileToRecover.isPresent());
Assert.assertEquals(fileToRecover.get().getPath().toUri().getPath(), fileInRecovery.getAbsolutePath());
fileToRecover = recoveryHelper.findPersistedFile(state, copyableFile, Predicates.<FileStatus>alwaysFalse());
Assert.assertFalse(fileToRecover.isPresent());
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class ConfigStoreUtils method getTopicsFromConfigStore.
/**
* Get topics from config store.
* Topics will either be whitelisted or blacklisted using tag.
* After filtering out topics via tag, their config property is checked.
* For each shortlisted topic, config must contain either property topic.blacklist or topic.whitelist
*
* If tags are not provided, it will return all topics
*/
public static List<KafkaTopic> getTopicsFromConfigStore(Properties properties, String configStoreUri, GobblinKafkaConsumerClient kafkaConsumerClient) {
ConfigClient configClient = ConfigClient.createConfigClient(VersionStabilityPolicy.WEAK_LOCAL_STABILITY);
State state = new State();
state.setProp(KafkaSource.TOPIC_WHITELIST, ".*");
state.setProp(KafkaSource.TOPIC_BLACKLIST, StringUtils.EMPTY);
List<KafkaTopic> allTopics = kafkaConsumerClient.getFilteredTopics(DatasetFilterUtils.getPatternList(state, KafkaSource.TOPIC_BLACKLIST), DatasetFilterUtils.getPatternList(state, KafkaSource.TOPIC_WHITELIST));
Optional<Config> runtimeConfig = ConfigClientUtils.getOptionalRuntimeConfig(properties);
if (properties.containsKey(GOBBLIN_CONFIG_TAGS_WHITELIST)) {
Preconditions.checkArgument(properties.containsKey(GOBBLIN_CONFIG_FILTER), "Missing required property " + GOBBLIN_CONFIG_FILTER);
String filterString = properties.getProperty(GOBBLIN_CONFIG_FILTER);
Path whiteListTagUri = PathUtils.mergePaths(new Path(configStoreUri), new Path(properties.getProperty(GOBBLIN_CONFIG_TAGS_WHITELIST)));
List<String> whitelistedTopics = new ArrayList<>();
ConfigStoreUtils.getTopicsURIFromConfigStore(configClient, whiteListTagUri, filterString, runtimeConfig).stream().filter((URI u) -> ConfigUtils.getBoolean(ConfigStoreUtils.getConfig(configClient, u, runtimeConfig), KafkaSource.TOPIC_WHITELIST, false)).forEach(((URI u) -> whitelistedTopics.add(ConfigStoreUtils.getTopicNameFromURI(u))));
return allTopics.stream().filter((KafkaTopic p) -> whitelistedTopics.contains(p.getName())).collect(Collectors.toList());
} else if (properties.containsKey(GOBBLIN_CONFIG_TAGS_BLACKLIST)) {
Preconditions.checkArgument(properties.containsKey(GOBBLIN_CONFIG_FILTER), "Missing required property " + GOBBLIN_CONFIG_FILTER);
String filterString = properties.getProperty(GOBBLIN_CONFIG_FILTER);
Path blackListTagUri = PathUtils.mergePaths(new Path(configStoreUri), new Path(properties.getProperty(GOBBLIN_CONFIG_TAGS_BLACKLIST)));
List<String> blacklistedTopics = new ArrayList<>();
ConfigStoreUtils.getTopicsURIFromConfigStore(configClient, blackListTagUri, filterString, runtimeConfig).stream().filter((URI u) -> ConfigUtils.getBoolean(ConfigStoreUtils.getConfig(configClient, u, runtimeConfig), KafkaSource.TOPIC_BLACKLIST, false)).forEach(((URI u) -> blacklistedTopics.add(ConfigStoreUtils.getTopicNameFromURI(u))));
return allTopics.stream().filter((KafkaTopic p) -> !blacklistedTopics.contains(p.getName())).collect(Collectors.toList());
} else {
log.warn("None of the blacklist or whitelist tags are provided");
return allTopics;
}
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class KafkaSource method getWorkUnitForTopicPartition.
private WorkUnit getWorkUnitForTopicPartition(KafkaPartition partition, Offsets offsets, Optional<State> topicSpecificState) {
// Default to job level configurations
Extract.TableType currentTableType = tableType;
String currentExtractNamespace = extractNamespace;
String currentExtractTableName = partition.getTopicName();
boolean isCurrentFullExtract = isFullExtract;
// Update to topic specific configurations if any
if (topicSpecificState.isPresent()) {
State topicState = topicSpecificState.get();
if (topicState.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY)) {
currentTableType = Extract.TableType.valueOf(topicState.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY));
}
currentExtractNamespace = topicState.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, extractNamespace);
currentExtractTableName = topicState.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, partition.getTopicName());
isCurrentFullExtract = topicState.getPropAsBoolean(ConfigurationKeys.EXTRACT_IS_FULL_KEY, isFullExtract);
}
Extract extract = this.createExtract(currentTableType, currentExtractNamespace, currentExtractTableName);
if (isCurrentFullExtract) {
extract.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
}
WorkUnit workUnit = WorkUnit.create(extract);
workUnit.setProp(TOPIC_NAME, partition.getTopicName());
addDatasetUrnOptionally(workUnit);
workUnit.setProp(PARTITION_ID, partition.getId());
workUnit.setProp(LEADER_ID, partition.getLeader().getId());
workUnit.setProp(LEADER_HOSTANDPORT, partition.getLeader().getHostAndPort().toString());
workUnit.setProp(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY, offsets.getStartOffset());
workUnit.setProp(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY, offsets.getLatestOffset());
workUnit.setProp(PREVIOUS_OFFSET_FETCH_EPOCH_TIME, offsets.getPreviousOffsetFetchEpochTime());
workUnit.setProp(OFFSET_FETCH_EPOCH_TIME, offsets.getOffsetFetchEpochTime());
workUnit.setProp(PREVIOUS_LATEST_OFFSET, offsets.getPreviousLatestOffset());
// Add lineage info
DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_KAFKA, partition.getTopicName());
source.addMetadata(DatasetConstants.BROKERS, kafkaBrokers);
if (this.lineageInfo.isPresent()) {
this.lineageInfo.get().setSource(source, workUnit);
}
LOG.info(String.format("Created workunit for partition %s: lowWatermark=%d, highWatermark=%d, range=%d", partition, offsets.getStartOffset(), offsets.getLatestOffset(), offsets.getLatestOffset() - offsets.getStartOffset()));
return workUnit;
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class JdbcExtractorTest method testUnsignedInt.
/**
* Test for the metadata query to see if the check for unsigned int is present
*/
@Test
public void testUnsignedInt() throws SchemaException {
State state = new WorkUnitState();
state.setId("id");
MysqlExtractor mysqlExtractor = new MysqlExtractor((WorkUnitState) state);
List<Command> commands = mysqlExtractor.getSchemaMetadata("db", "table");
assertTrue(commands.get(0).getCommandType() == JdbcCommand.JdbcCommandType.QUERY);
assertTrue(commands.get(0).getParams().get(0).contains("bigint"));
assertTrue(commands.get(1).getCommandType() == JdbcCommand.JdbcCommandType.QUERYPARAMS);
assertTrue(!commands.get(1).getParams().get(0).contains("unsigned"));
// set option to promote unsigned int to bigint
state.setProp(ConfigurationKeys.SOURCE_QUERYBASED_PROMOTE_UNSIGNED_INT_TO_BIGINT, "true");
commands = mysqlExtractor.getSchemaMetadata("db", "table");
assertTrue(commands.get(0).getCommandType() == JdbcCommand.JdbcCommandType.QUERY);
assertTrue(commands.get(0).getParams().get(0).contains("bigint"));
assertTrue(commands.get(1).getCommandType() == JdbcCommand.JdbcCommandType.QUERYPARAMS);
assertTrue(commands.get(1).getParams().get(0).contains("unsigned"));
}
Aggregations