use of org.apache.gobblin.metadata.GobblinMetadataChangeEvent in project incubator-gobblin by apache.
the class IcebergMetadataWriter method writeEnvelope.
@Override
public void writeEnvelope(RecordEnvelope<GenericRecord> recordEnvelope, Map<String, Collection<HiveSpec>> newSpecsMap, Map<String, Collection<HiveSpec>> oldSpecsMap, HiveSpec tableSpec) throws IOException {
Lock readLock = readWriteLock.readLock();
readLock.lock();
try {
GenericRecord genericRecord = recordEnvelope.getRecord();
GobblinMetadataChangeEvent gmce = (GobblinMetadataChangeEvent) SpecificData.get().deepCopy(genericRecord.getSchema(), genericRecord);
String dbName = tableSpec.getTable().getDbName();
String tableName = tableSpec.getTable().getTableName();
if (whitelistBlacklist.acceptTable(dbName, tableName)) {
TableIdentifier tid = TableIdentifier.of(dbName, tableName);
String topicPartition = tableTopicPartitionMap.computeIfAbsent(tid, t -> recordEnvelope.getWatermark().getSource());
Long currentWatermark = getAndPersistCurrentWatermark(tid, topicPartition);
Long currentOffset = ((LongWatermark) recordEnvelope.getWatermark().getWatermark()).getValue();
if (currentOffset > currentWatermark) {
if (!tableMetadataMap.computeIfAbsent(tid, t -> new TableMetadata()).lowWatermark.isPresent()) {
// This means we haven't register this table or met some error before, we need to reset the low watermark
tableMetadataMap.get(tid).lowWatermark = Optional.of(currentOffset - 1);
tableMetadataMap.get(tid).setDatasetName(gmce.getDatasetIdentifier().getNativeName());
if (this.newPartitionEnabled && this.newPartitionTableWhitelistBlacklist.acceptTable(dbName, tableName)) {
tableMetadataMap.get(tid).newPartitionColumnEnabled = true;
if (this.completenessEnabled && this.completenessWhitelistBlacklist.acceptTable(dbName, tableName)) {
tableMetadataMap.get(tid).completenessEnabled = true;
}
}
}
write(gmce, newSpecsMap, oldSpecsMap, tableSpec);
tableCurrentWatermarkMap.put(tid, currentOffset);
} else {
log.warn(String.format("Skip processing record %s since it has lower watermark", genericRecord.toString()));
}
} else {
log.info(String.format("Skip table %s.%s since it's not selected", tableSpec.getTable().getDbName(), tableSpec.getTable().getTableName()));
}
} finally {
readLock.unlock();
}
}
use of org.apache.gobblin.metadata.GobblinMetadataChangeEvent in project incubator-gobblin by apache.
the class GobblinMCEPublisherTest method testPublishGMCEForORC.
@Test
public void testPublishGMCEForORC() throws IOException {
GobblinMCEProducer producer = Mockito.mock(GobblinMCEProducer.class);
Mockito.doCallRealMethod().when(producer).getGobblinMetadataChangeEvent(anyMap(), anyList(), anyList(), anyMap(), any(), any());
Mockito.doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
GobblinMetadataChangeEvent gmce = producer.getGobblinMetadataChangeEvent((Map<Path, Metrics>) args[0], null, null, (Map<String, String>) args[1], OperationType.add_files, SchemaSource.SCHEMAREGISTRY);
Assert.assertEquals(gmce.getNewFiles().size(), 1);
FileSystem fs = FileSystem.get(new Configuration());
Charset charset = Charset.forName("UTF-8");
CharsetEncoder encoder = charset.newEncoder();
Assert.assertEquals(gmce.getNewFiles().get(0).getFilePath(), orcFilePath.makeQualified(fs.getUri(), new Path("/")).toString());
Assert.assertEquals(gmce.getNewFiles().get(0).getFileMetrics().getLowerBounds().get(1).getValue(), encoder.encode(CharBuffer.wrap("Alyssa")));
Assert.assertEquals(gmce.getNewFiles().get(0).getFileMetrics().getUpperBounds().get(1).getValue(), encoder.encode(CharBuffer.wrap("Bob")));
return null;
}
}).when(producer).sendGMCE(anyMap(), anyList(), anyList(), anyMap(), any(), any());
WorkUnitState state = new WorkUnitState();
setGMCEPublisherStateForOrcFile(state);
Mockito.doCallRealMethod().when(producer).setState(state);
producer.setState(state);
GobblinMCEPublisher publisher = new GobblinMCEPublisher(state, producer);
publisher.publishData(Arrays.asList(state));
}
Aggregations