use of org.apache.gobblin.hive.writer.MetadataWriter in project incubator-gobblin by apache.
the class IcebergMetadataWriterTest method testFaultTolerant.
@Test(dependsOnMethods = { "testWriteAddFileGMCECompleteness" }, groups = { "icebergMetadataWriterTest" })
public void testFaultTolerant() throws Exception {
// Set fault tolerant dataset number to be 1
gobblinMCEWriter.setMaxErrorDataset(1);
// Add a mock writer that always throws exception so that write will fail
MetadataWriter mockWriter = Mockito.mock(MetadataWriter.class);
Mockito.doThrow(new IOException("Test failure")).when(mockWriter).writeEnvelope(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any());
gobblinMCEWriter.metadataWriters.add(0, mockWriter);
GenericRecord genericGmce = GenericData.get().deepCopy(gmce.getSchema(), gmce);
gobblinMCEWriter.writeEnvelope(new RecordEnvelope<>(genericGmce, new KafkaStreamingExtractor.KafkaWatermark(new KafkaPartition.Builder().withTopicName("GobblinMetadataChangeEvent_test").withId(1).build(), new LongWatermark(51L))));
gobblinMCEWriter.writeEnvelope(new RecordEnvelope<>(genericGmce, new KafkaStreamingExtractor.KafkaWatermark(new KafkaPartition.Builder().withTopicName("GobblinMetadataChangeEvent_test").withId(1).build(), new LongWatermark(52L))));
Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().size(), 1);
Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().values().iterator().next().size(), 1);
Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().get(new File(tmpDir, "data/tracking/testIcebergTable").getAbsolutePath()).get("hivedb.testIcebergTable").lowWatermark, 50L);
Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().get(new File(tmpDir, "data/tracking/testIcebergTable").getAbsolutePath()).get("hivedb.testIcebergTable").highWatermark, 52L);
// No events sent yet since the topic has not been flushed
Assert.assertEquals(eventsSent.size(), 0);
// We should not see exception as we have fault tolerant
gobblinMCEWriter.flush();
// Since this topic has been flushed, there should be an event sent for previous failure, and the table
// should be removed from the error map
Assert.assertEquals(eventsSent.size(), 1);
Assert.assertEquals(eventsSent.get(0).getMetadata().get(IcebergMCEMetadataKeys.FAILURE_EVENT_TABLE_NAME), "testIcebergTable");
Assert.assertEquals(eventsSent.get(0).getMetadata().get(IcebergMCEMetadataKeys.GMCE_LOW_WATERMARK), "50");
Assert.assertEquals(eventsSent.get(0).getMetadata().get(IcebergMCEMetadataKeys.GMCE_HIGH_WATERMARK), "52");
Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().values().iterator().next().size(), 0);
gmce.getDatasetIdentifier().setNativeName("data/tracking/testFaultTolerant");
GenericRecord genericGmce_differentDb = GenericData.get().deepCopy(gmce.getSchema(), gmce);
Assert.expectThrows(IOException.class, () -> gobblinMCEWriter.writeEnvelope((new RecordEnvelope<>(genericGmce_differentDb, new KafkaStreamingExtractor.KafkaWatermark(new KafkaPartition.Builder().withTopicName("GobblinMetadataChangeEvent_test").withId(1).build(), new LongWatermark(54L))))));
gobblinMCEWriter.metadataWriters.remove(0);
}
use of org.apache.gobblin.hive.writer.MetadataWriter in project incubator-gobblin by apache.
the class GobblinMCEWriter method flush.
// Add fault tolerant ability and make sure we can emit GTE as desired
private void flush(String dbName, String tableName) throws IOException {
boolean meetException = false;
String tableString = Joiner.on(TABLE_NAME_DELIMITER).join(dbName, tableName);
if (tableOperationTypeMap.get(tableString).gmceLowWatermark == tableOperationTypeMap.get(tableString).gmceHighWatermark) {
// No need to flush
return;
}
for (MetadataWriter writer : metadataWriters) {
if (meetException) {
writer.reset(dbName, tableName);
} else {
try {
writer.flush(dbName, tableName);
} catch (IOException e) {
meetException = true;
writer.reset(dbName, tableName);
addOrThrowException(e, tableString, dbName, tableName);
}
}
}
String datasetPath = tableOperationTypeMap.get(tableString).datasetPath;
if (!meetException && datasetErrorMap.containsKey(datasetPath) && datasetErrorMap.get(datasetPath).containsKey(tableString)) {
// We only want to emit GTE when the table watermark moves. There can be two scenario that watermark move, one is after one flush interval,
// we commit new watermark to state store, anther is here, where during the flush interval, we flush table because table operation changes.
// Under this condition, error map contains this dataset means we met error before this flush, but this time when flush succeed and
// the watermark inside the table moves, so we want to emit GTE to indicate there is some data loss here
submitFailureEvent(datasetErrorMap.get(datasetPath).get(tableString));
this.datasetErrorMap.get(datasetPath).remove(tableString);
}
}
use of org.apache.gobblin.hive.writer.MetadataWriter in project incubator-gobblin by apache.
the class GobblinMCEWriter method write.
// Add fault tolerant ability and make sure we can emit GTE as desired
private void write(RecordEnvelope recordEnvelope, ConcurrentHashMap newSpecsMap, ConcurrentHashMap oldSpecsMap, HiveSpec spec) throws IOException {
boolean meetException = false;
String dbName = spec.getTable().getDbName();
String tableName = spec.getTable().getTableName();
String tableString = Joiner.on(TABLE_NAME_DELIMITER).join(dbName, tableName);
for (MetadataWriter writer : metadataWriters) {
if (meetException) {
writer.reset(dbName, tableName);
} else {
try {
writer.writeEnvelope(recordEnvelope, newSpecsMap, oldSpecsMap, spec);
} catch (Exception e) {
meetException = true;
writer.reset(dbName, tableName);
addOrThrowException(e, tableString, dbName, tableName);
}
}
}
}
Aggregations