Search in sources :

Example 1 with MetadataWriter

use of org.apache.gobblin.hive.writer.MetadataWriter in project incubator-gobblin by apache.

the class IcebergMetadataWriterTest method testFaultTolerant.

@Test(dependsOnMethods = { "testWriteAddFileGMCECompleteness" }, groups = { "icebergMetadataWriterTest" })
public void testFaultTolerant() throws Exception {
    // Set fault tolerant dataset number to be 1
    gobblinMCEWriter.setMaxErrorDataset(1);
    // Add a mock writer that always throws exception so that write will fail
    MetadataWriter mockWriter = Mockito.mock(MetadataWriter.class);
    Mockito.doThrow(new IOException("Test failure")).when(mockWriter).writeEnvelope(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any());
    gobblinMCEWriter.metadataWriters.add(0, mockWriter);
    GenericRecord genericGmce = GenericData.get().deepCopy(gmce.getSchema(), gmce);
    gobblinMCEWriter.writeEnvelope(new RecordEnvelope<>(genericGmce, new KafkaStreamingExtractor.KafkaWatermark(new KafkaPartition.Builder().withTopicName("GobblinMetadataChangeEvent_test").withId(1).build(), new LongWatermark(51L))));
    gobblinMCEWriter.writeEnvelope(new RecordEnvelope<>(genericGmce, new KafkaStreamingExtractor.KafkaWatermark(new KafkaPartition.Builder().withTopicName("GobblinMetadataChangeEvent_test").withId(1).build(), new LongWatermark(52L))));
    Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().size(), 1);
    Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().values().iterator().next().size(), 1);
    Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().get(new File(tmpDir, "data/tracking/testIcebergTable").getAbsolutePath()).get("hivedb.testIcebergTable").lowWatermark, 50L);
    Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().get(new File(tmpDir, "data/tracking/testIcebergTable").getAbsolutePath()).get("hivedb.testIcebergTable").highWatermark, 52L);
    // No events sent yet since the topic has not been flushed
    Assert.assertEquals(eventsSent.size(), 0);
    // We should not see exception as we have fault tolerant
    gobblinMCEWriter.flush();
    // Since this topic has been flushed, there should be an event sent for previous failure, and the table
    // should be removed from the error map
    Assert.assertEquals(eventsSent.size(), 1);
    Assert.assertEquals(eventsSent.get(0).getMetadata().get(IcebergMCEMetadataKeys.FAILURE_EVENT_TABLE_NAME), "testIcebergTable");
    Assert.assertEquals(eventsSent.get(0).getMetadata().get(IcebergMCEMetadataKeys.GMCE_LOW_WATERMARK), "50");
    Assert.assertEquals(eventsSent.get(0).getMetadata().get(IcebergMCEMetadataKeys.GMCE_HIGH_WATERMARK), "52");
    Assert.assertEquals(gobblinMCEWriter.getDatasetErrorMap().values().iterator().next().size(), 0);
    gmce.getDatasetIdentifier().setNativeName("data/tracking/testFaultTolerant");
    GenericRecord genericGmce_differentDb = GenericData.get().deepCopy(gmce.getSchema(), gmce);
    Assert.expectThrows(IOException.class, () -> gobblinMCEWriter.writeEnvelope((new RecordEnvelope<>(genericGmce_differentDb, new KafkaStreamingExtractor.KafkaWatermark(new KafkaPartition.Builder().withTopicName("GobblinMetadataChangeEvent_test").withId(1).build(), new LongWatermark(54L))))));
    gobblinMCEWriter.metadataWriters.remove(0);
}
Also used : GobblinEventBuilder(org.apache.gobblin.metrics.event.GobblinEventBuilder) SchemaBuilder(org.apache.avro.SchemaBuilder) IOException(java.io.IOException) MetadataWriter(org.apache.gobblin.hive.writer.MetadataWriter) GenericRecord(org.apache.avro.generic.GenericRecord) DataFile(org.apache.gobblin.metadata.DataFile) File(java.io.File) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test) HiveMetastoreTest(org.apache.iceberg.hive.HiveMetastoreTest)

Example 2 with MetadataWriter

use of org.apache.gobblin.hive.writer.MetadataWriter in project incubator-gobblin by apache.

the class GobblinMCEWriter method flush.

// Add fault tolerant ability and make sure we can emit GTE as desired
private void flush(String dbName, String tableName) throws IOException {
    boolean meetException = false;
    String tableString = Joiner.on(TABLE_NAME_DELIMITER).join(dbName, tableName);
    if (tableOperationTypeMap.get(tableString).gmceLowWatermark == tableOperationTypeMap.get(tableString).gmceHighWatermark) {
        // No need to flush
        return;
    }
    for (MetadataWriter writer : metadataWriters) {
        if (meetException) {
            writer.reset(dbName, tableName);
        } else {
            try {
                writer.flush(dbName, tableName);
            } catch (IOException e) {
                meetException = true;
                writer.reset(dbName, tableName);
                addOrThrowException(e, tableString, dbName, tableName);
            }
        }
    }
    String datasetPath = tableOperationTypeMap.get(tableString).datasetPath;
    if (!meetException && datasetErrorMap.containsKey(datasetPath) && datasetErrorMap.get(datasetPath).containsKey(tableString)) {
        // We only want to emit GTE when the table watermark moves. There can be two scenario that watermark move, one is after one flush interval,
        // we commit new watermark to state store, anther is here, where during the flush interval, we flush table because table operation changes.
        // Under this condition, error map contains this dataset means we met error before this flush, but this time when flush succeed and
        // the watermark inside the table moves, so we want to emit GTE to indicate there is some data loss here
        submitFailureEvent(datasetErrorMap.get(datasetPath).get(tableString));
        this.datasetErrorMap.get(datasetPath).remove(tableString);
    }
}
Also used : IOException(java.io.IOException) MetadataWriter(org.apache.gobblin.hive.writer.MetadataWriter)

Example 3 with MetadataWriter

use of org.apache.gobblin.hive.writer.MetadataWriter in project incubator-gobblin by apache.

the class GobblinMCEWriter method write.

// Add fault tolerant ability and make sure we can emit GTE as desired
private void write(RecordEnvelope recordEnvelope, ConcurrentHashMap newSpecsMap, ConcurrentHashMap oldSpecsMap, HiveSpec spec) throws IOException {
    boolean meetException = false;
    String dbName = spec.getTable().getDbName();
    String tableName = spec.getTable().getTableName();
    String tableString = Joiner.on(TABLE_NAME_DELIMITER).join(dbName, tableName);
    for (MetadataWriter writer : metadataWriters) {
        if (meetException) {
            writer.reset(dbName, tableName);
        } else {
            try {
                writer.writeEnvelope(recordEnvelope, newSpecsMap, oldSpecsMap, spec);
            } catch (Exception e) {
                meetException = true;
                writer.reset(dbName, tableName);
                addOrThrowException(e, tableString, dbName, tableName);
            }
        }
    }
}
Also used : MetadataWriter(org.apache.gobblin.hive.writer.MetadataWriter) IOException(java.io.IOException)

Aggregations

IOException (java.io.IOException)3 MetadataWriter (org.apache.gobblin.hive.writer.MetadataWriter)3 File (java.io.File)1 SchemaBuilder (org.apache.avro.SchemaBuilder)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 DataFile (org.apache.gobblin.metadata.DataFile)1 GobblinEventBuilder (org.apache.gobblin.metrics.event.GobblinEventBuilder)1 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)1 HiveMetastoreTest (org.apache.iceberg.hive.HiveMetastoreTest)1 Test (org.testng.annotations.Test)1