use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class DatePartitionedAvroFileExtractorTest method setUp.
@BeforeClass
public void setUp() throws IOException {
this.schema = new Schema.Parser().parse(AVRO_SCHEMA);
// set up datetime objects
DateTime now = new DateTime(TZ).minusHours(6);
this.startDateTime = new DateTime(now.getYear(), now.getMonthOfYear(), now.getDayOfMonth(), now.getHourOfDay(), 30, 0, TZ);
// create records, shift their timestamp by 1 minute
DateTime recordDt = startDateTime;
recordTimestamps[0] = recordDt.getMillis();
recordDt = recordDt.plusHours(4);
for (int i = 1; i < RECORD_SIZE; i++) {
recordDt = recordDt.plusMinutes(1);
recordTimestamps[i] = recordDt.getMillis();
}
// create dummy data partitioned by minutes
State state = new State();
state.setProp(TimeBasedAvroWriterPartitioner.WRITER_PARTITION_COLUMNS, PARTITION_COLUMN_NAME);
state.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, STAGING_DIR);
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, OUTPUT_DIR);
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, SOURCE_ENTITY);
state.setProp(ConfigurationKeys.WRITER_FILE_NAME, FILE_NAME);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PATTERN, DATE_PATTERN);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_PREFIX, PREFIX);
state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_SUFFIX, SUFFIX);
state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TimeBasedAvroWriterPartitioner.class.getName());
DataWriterBuilder<Schema, GenericRecord> builder = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, state)).writeInFormat(WriterOutputFormat.AVRO).withWriterId("writer-1").withSchema(this.schema).withBranches(1).forBranch(0);
this.writer = new PartitionedDataWriter<Schema, GenericRecord>(builder, state);
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(this.schema);
for (int i = 0; i < RECORD_SIZE; i++) {
genericRecordBuilder.set(PARTITION_COLUMN_NAME, recordTimestamps[i]);
this.writer.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
}
this.writer.close();
this.writer.commit();
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class QueryBasedExtractorTest method testDataPullUpperBoundsRemovedInLastWorkUnit.
@Test
public void testDataPullUpperBoundsRemovedInLastWorkUnit() {
int totalCount = 5;
ArrayList<DataRecord> records = this.generateRecords(totalCount);
WorkUnit workUnit = WorkUnit.createEmpty();
workUnit.setProp(Partition.IS_LAST_PARTIITON, true);
workUnit.setProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE, "SNAPSHOT");
WorkUnitState workUnitState = new WorkUnitState(workUnit, new State());
workUnitState.setId("testDataPullUpperBoundsRemovedInLastWorkUnit");
TestQueryBasedExtractor testExtractor = new TestQueryBasedExtractor(workUnitState, records);
testExtractor.setRangePredicates(1, 3);
this.verify(testExtractor, totalCount);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class QueryBasedExtractorTest method testDataPullUpperBoundsNotRemovedInLastWorkUnit.
@Test
public void testDataPullUpperBoundsNotRemovedInLastWorkUnit() {
int totalCount = 5;
ArrayList<DataRecord> records = this.generateRecords(totalCount);
WorkUnit workUnit = WorkUnit.createEmpty();
WorkUnitState workUnitState = new WorkUnitState(workUnit, new State());
workUnitState.setId("testDataPullUpperBoundsNotRemovedInLastWorkUnit");
// It's not a last work unit
TestQueryBasedExtractor testExtractor = new TestQueryBasedExtractor(workUnitState, records);
testExtractor.setRangePredicates(1, 3);
this.verify(testExtractor, 3);
// It's a last work unit but user specifies high watermark
workUnit.setProp(Partition.IS_LAST_PARTIITON, true);
workUnit.setProp(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK, true);
testExtractor.reset();
testExtractor.setRangePredicates(1, 3);
this.verify(testExtractor, 3);
// It's a last work unit but it has WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY on record
workUnit.removeProp(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK);
workUnit.setProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY, "3");
testExtractor.reset();
testExtractor.setRangePredicates(1, 3);
this.verify(testExtractor, 3);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class QueryBasedSourceTest method testGetTableSpecificPropsFromState.
@Test
public void testGetTableSpecificPropsFromState() {
SourceState state = new SourceState();
state.setProp(DatasetUtils.DATASET_SPECIFIC_PROPS, "[{\"dataset\":\"Entity1\", \"value\": 1}, {\"dataset\":\"Table2\", \"value\":2}]");
// We should look in the dataset specific properties using the entity name, not table name
SourceEntity se1 = new SourceEntity("Entity1", "Table2");
SourceEntity se3 = new SourceEntity("Entity3", "Table3");
Set<SourceEntity> entities = ImmutableSet.of(se1, se3);
Map<SourceEntity, State> datasetProps = QueryBasedSource.getTableSpecificPropsFromState(entities, state);
// Value 1 should be returned for se1, no prpos should be returned for se3
Assert.assertEquals(datasetProps.size(), 1);
Assert.assertTrue(datasetProps.containsKey(se1));
State se1Props = datasetProps.get(se1);
Assert.assertEquals(se1Props.getProp("value"), "1");
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class QueryBasedSourceTest method testSourceEntity.
@Test
public void testSourceEntity() {
SourceEntity se1 = SourceEntity.fromSourceEntityName("SourceEntity1");
Assert.assertEquals(se1.getSourceEntityName(), "SourceEntity1");
Assert.assertEquals(se1.getDestTableName(), "SourceEntity1");
Assert.assertEquals(se1.getDatasetName(), "SourceEntity1");
SourceEntity se2 = SourceEntity.fromSourceEntityName("SourceEntity$2");
Assert.assertEquals(se2.getSourceEntityName(), "SourceEntity$2");
Assert.assertEquals(se2.getDestTableName(), "SourceEntity_2");
Assert.assertEquals(se2.getDatasetName(), "SourceEntity$2");
State st1 = new State();
st1.setProp(ConfigurationKeys.SOURCE_ENTITY, "SourceEntity3");
st1.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "SourceEntity3_Table");
Optional<SourceEntity> se3 = SourceEntity.fromState(st1);
Assert.assertTrue(se3.isPresent());
Assert.assertEquals(se3.get().getSourceEntityName(), "SourceEntity3");
Assert.assertEquals(se3.get().getDestTableName(), "SourceEntity3_Table");
Assert.assertEquals(se3.get().getDatasetName(), "SourceEntity3");
Assert.assertEquals(se3.get(), new SourceEntity("SourceEntity3", "SourceEntity3_Table"));
State st2 = new State();
st2.setProp(ConfigurationKeys.SOURCE_ENTITY, "SourceEntity$4");
Optional<SourceEntity> se4 = SourceEntity.fromState(st2);
Assert.assertTrue(se4.isPresent());
Assert.assertEquals(se4.get(), SourceEntity.fromSourceEntityName("SourceEntity$4"));
State st3 = new State();
st3.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "Table5");
Optional<SourceEntity> se5 = SourceEntity.fromState(st3);
Assert.assertTrue(se5.isPresent());
Assert.assertEquals(se5.get(), SourceEntity.fromSourceEntityName("Table5"));
}
Aggregations