use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class AbstractSourceTest method testGetPreviousWorkUnitStatesOnFullRetryPartialCommit.
/**
* Test when work unit retry policy is on full, but the job commit policy is "partial".
*/
@Test
public void testGetPreviousWorkUnitStatesOnFullRetryPartialCommit() {
SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates);
sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "onfull");
sourceState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "partial");
Assert.assertEquals(this.testSource.getPreviousWorkUnitStatesForRetry(sourceState), Collections.EMPTY_LIST);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class AbstractSourceTest method testGetPreviousWorkUnitStatesEnabledRetry.
/**
* Test the always-retry policy, with WORK_UNIT_RETRY_ENABLED_KEY enabled.
*/
@Test
public void testGetPreviousWorkUnitStatesEnabledRetry() {
SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates);
sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_ENABLED_KEY, Boolean.TRUE);
List<WorkUnitState> returnedWorkUnitStates = this.testSource.getPreviousWorkUnitStatesForRetry(sourceState);
Assert.assertEquals(returnedWorkUnitStates, this.expectedPreviousWorkUnitStates);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class EncryptionConfigParserTest method testWithWriterPrefix.
private void testWithWriterPrefix(int numBranches, int branch) {
String branchString = "";
if (numBranches > 1) {
branchString = String.format(".%d", branch);
}
Properties properties = new Properties();
properties.put(EncryptionConfigParser.WRITER_ENCRYPT_PREFIX + "." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY + branchString, "any");
properties.put(EncryptionConfigParser.WRITER_ENCRYPT_PREFIX + "." + EncryptionConfigParser.ENCRYPTION_KEYSTORE_PATH_KEY + branchString, "/tmp/foobar");
properties.put(EncryptionConfigParser.WRITER_ENCRYPT_PREFIX + "." + EncryptionConfigParser.ENCRYPTION_KEYSTORE_PASSWORD_KEY + branchString, "abracadabra");
State s = new State(properties);
Map<String, Object> parsedProperties = EncryptionConfigParser.getConfigForBranch(EncryptionConfigParser.EntityType.WRITER, s, numBranches, branch);
Assert.assertNotNull(parsedProperties, "Expected parser to only return one record");
Assert.assertEquals(EncryptionConfigParser.getEncryptionType(parsedProperties), "any");
Assert.assertEquals(EncryptionConfigParser.getKeystorePath(parsedProperties), "/tmp/foobar");
Assert.assertEquals(EncryptionConfigParser.getKeystorePassword(parsedProperties), "abracadabra");
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class CsvFileDownloader method downloadFile.
/**
* Provide iterator via OpenCSV's CSVReader.
* Provides a way to skip top rows by providing regex.(This is useful when CSV file comes with comments on top rows, but not in fixed size.
* It also provides validation on schema by matching header names between property's schema and header name in CSV file.
*
* {@inheritDoc}
* @see org.apache.gobblin.source.extractor.filebased.FileDownloader#downloadFile(java.lang.String)
*/
@SuppressWarnings("unchecked")
@Override
public Iterator<String[]> downloadFile(String file) throws IOException {
log.info("Beginning to download file: " + file);
final State state = fileBasedExtractor.workUnitState;
CSVReader reader;
try {
if (state.contains(DELIMITER)) {
String delimiterStr = state.getProp(DELIMITER).trim();
Preconditions.checkArgument(delimiterStr.length() == 1, "Delimiter should be a character.");
char delimiter = delimiterStr.charAt(0);
log.info("Using " + delimiter + " as a delimiter.");
reader = this.fileBasedExtractor.getCloser().register(new CSVReader(new InputStreamReader(this.fileBasedExtractor.getFsHelper().getFileStream(file), ConfigurationKeys.DEFAULT_CHARSET_ENCODING), delimiter));
} else {
reader = this.fileBasedExtractor.getCloser().register(new CSVReader(new InputStreamReader(this.fileBasedExtractor.getFsHelper().getFileStream(file), ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
}
} catch (FileBasedHelperException e) {
throw new IOException(e);
}
PeekingIterator<String[]> iterator = Iterators.peekingIterator(reader.iterator());
if (state.contains(SKIP_TOP_ROWS_REGEX)) {
String regex = state.getProp(SKIP_TOP_ROWS_REGEX);
log.info("Trying to skip with regex: " + regex);
while (iterator.hasNext()) {
String[] row = iterator.peek();
if (row.length == 0) {
break;
}
if (!row[0].matches(regex)) {
break;
}
iterator.next();
}
}
if (this.fileBasedExtractor.isShouldSkipFirstRecord() && iterator.hasNext()) {
log.info("Skipping first record");
iterator.next();
}
return iterator;
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class HiveSerDeTest method testAvroOrcSerDes.
/**
* This test uses Avro SerDe to deserialize data from Avro files, and use ORC SerDe
* to serialize them into ORC files.
*/
@Test(groups = { "gobblin.serde" })
public void testAvroOrcSerDes() throws IOException, DataRecordException, DataConversionException {
Properties properties = new Properties();
properties.load(new FileReader("gobblin-core/src/test/resources/serde/serde.properties"));
SourceState sourceState = new SourceState(new State(properties), ImmutableList.<WorkUnitState>of());
OldApiWritableFileSource source = new OldApiWritableFileSource();
List<WorkUnit> workUnits = source.getWorkunits(sourceState);
Assert.assertEquals(workUnits.size(), 1);
WorkUnitState wus = new WorkUnitState(workUnits.get(0));
wus.addAll(sourceState);
Closer closer = Closer.create();
HiveWritableHdfsDataWriter writer = null;
try {
OldApiWritableFileExtractor extractor = closer.register((OldApiWritableFileExtractor) source.getExtractor(wus));
HiveSerDeConverter converter = closer.register(new HiveSerDeConverter());
writer = closer.register((HiveWritableHdfsDataWriter) new HiveWritableHdfsDataWriterBuilder<>().withBranches(1).withWriterId("0").writeTo(Destination.of(DestinationType.HDFS, sourceState)).writeInFormat(WriterOutputFormat.ORC).build());
converter.init(wus);
Writable record;
while ((record = extractor.readRecord(null)) != null) {
Iterable<Writable> convertedRecordIterable = converter.convertRecordImpl(null, record, wus);
Assert.assertEquals(Iterators.size(convertedRecordIterable.iterator()), 1);
writer.write(convertedRecordIterable.iterator().next());
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
if (writer != null) {
writer.commit();
}
Assert.assertTrue(this.fs.exists(new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), sourceState.getProp(ConfigurationKeys.WRITER_FILE_NAME))));
HadoopUtils.deletePath(this.fs, new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)), true);
}
}
Aggregations