use of org.apache.gobblin.iceberg.GobblinMCEProducer in project incubator-gobblin by apache.
the class GobblinMCEPublisherTest method testPublishGMCEWithoutFile.
@Test(dependsOnMethods = { "testPublishGMCEForAvro" })
public void testPublishGMCEWithoutFile() throws IOException {
GobblinMCEProducer producer = Mockito.mock(GobblinMCEProducer.class);
Mockito.doCallRealMethod().when(producer).getGobblinMetadataChangeEvent(anyMap(), anyList(), anyList(), anyMap(), any(), any());
Mockito.doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
GobblinMetadataChangeEvent gmce = producer.getGobblinMetadataChangeEvent((Map<Path, Metrics>) args[0], null, null, (Map<String, String>) args[1], OperationType.change_property, SchemaSource.NONE);
Assert.assertEquals(gmce.getNewFiles().size(), 1);
Assert.assertNull(gmce.getOldFiles());
Assert.assertNull(gmce.getOldFilePrefixes());
Assert.assertEquals(gmce.getOperationType(), OperationType.change_property);
return null;
}
}).when(producer).sendGMCE(anyMap(), anyList(), anyList(), anyMap(), any(), any());
WorkUnitState state = new WorkUnitState();
setGMCEPublisherStateWithoutNewFile(state);
Mockito.doCallRealMethod().when(producer).setState(state);
producer.setState(state);
GobblinMCEPublisher publisher = new GobblinMCEPublisher(state, producer);
publisher.publishData(Arrays.asList(state));
}
use of org.apache.gobblin.iceberg.GobblinMCEProducer in project incubator-gobblin by apache.
the class GobblinMCEPublisherTest method testPublishGMCEForAvro.
@Test
public void testPublishGMCEForAvro() throws IOException {
GobblinMCEProducer producer = Mockito.mock(GobblinMCEProducer.class);
Mockito.doCallRealMethod().when(producer).getGobblinMetadataChangeEvent(anyMap(), anyList(), anyList(), anyMap(), any(), any());
Mockito.doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
GobblinMetadataChangeEvent gmce = producer.getGobblinMetadataChangeEvent((Map<Path, Metrics>) args[0], null, null, (Map<String, String>) args[1], OperationType.add_files, SchemaSource.SCHEMAREGISTRY);
Assert.assertEquals(gmce.getNewFiles().size(), 1);
FileSystem fs = FileSystem.get(new Configuration());
Assert.assertEquals(gmce.getNewFiles().get(0).getFilePath(), new Path(dataFile.getAbsolutePath()).makeQualified(fs.getUri(), new Path("/")).toString());
return null;
}
}).when(producer).sendGMCE(anyMap(), anyList(), anyList(), anyMap(), any(), any());
WorkUnitState state = new WorkUnitState();
setGMCEPublisherStateForAvroFile(state);
Mockito.doCallRealMethod().when(producer).setState(state);
producer.setState(state);
GobblinMCEPublisher publisher = new GobblinMCEPublisher(state, producer);
publisher.publishData(Arrays.asList(state));
}
use of org.apache.gobblin.iceberg.GobblinMCEProducer in project incubator-gobblin by apache.
the class CleanableIcebergDataset method cleanImpl.
/**
* Only in charge of filing {@link org.apache.gobblin.metadata.GobblinMetadataChangeEvent}
* The processing of these events can be seen in {@link org.apache.gobblin.iceberg.writer.IcebergMetadataWriter}.
*/
protected void cleanImpl(Collection<T> deletableVersions, Config retentionConfig) throws IOException {
List<String> deletablePrefix = new ArrayList<>();
for (T version : deletableVersions) {
version.getPaths().forEach(p -> deletablePrefix.add(fs.makeQualified(p).toString()));
}
if (deletablePrefix.isEmpty()) {
return;
}
Preconditions.checkArgument(retentionConfig.hasPath(ConfigurationKeys.HIVE_REGISTRATION_POLICY));
Preconditions.checkArgument(retentionConfig.hasPath(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME));
Properties prop = new Properties();
prop.putAll(jobProps);
State producerState = new State(prop);
producerState.setProp(ConfigurationKeys.HIVE_REGISTRATION_POLICY, retentionConfig.getString(ConfigurationKeys.HIVE_REGISTRATION_POLICY));
producerState.setProp(GobblinMCEProducer.OLD_FILES_HIVE_REGISTRATION_KEY, retentionConfig.getString(ConfigurationKeys.HIVE_REGISTRATION_POLICY));
producerState.setProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, retentionConfig.getString(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME));
if (retentionConfig.hasPath(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES)) {
producerState.setProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, retentionConfig.getString(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES));
}
producerState.setProp(ConfigurationKeys.DATA_PUBLISHER_DATASET_DIR, this.datasetURN());
if (!this.simulate) {
try (GobblinMCEProducer producer = GobblinMCEProducer.getGobblinMCEProducer(producerState)) {
producer.sendGMCE(null, null, deletablePrefix, null, OperationType.drop_files, SchemaSource.NONE);
log.info("Sent gmce to delete path {} from icebergTable", deletablePrefix.stream().map(Object::toString).collect(Collectors.joining(",")));
}
} else {
log.info("In simulate mode, going to send gmce to delete path {} from icebergTable", deletablePrefix.stream().map(Object::toString).collect(Collectors.joining(",")));
}
}
use of org.apache.gobblin.iceberg.GobblinMCEProducer in project incubator-gobblin by apache.
the class GobblinMCEPublisherTest method testPublishGMCEForORC.
@Test
public void testPublishGMCEForORC() throws IOException {
GobblinMCEProducer producer = Mockito.mock(GobblinMCEProducer.class);
Mockito.doCallRealMethod().when(producer).getGobblinMetadataChangeEvent(anyMap(), anyList(), anyList(), anyMap(), any(), any());
Mockito.doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
GobblinMetadataChangeEvent gmce = producer.getGobblinMetadataChangeEvent((Map<Path, Metrics>) args[0], null, null, (Map<String, String>) args[1], OperationType.add_files, SchemaSource.SCHEMAREGISTRY);
Assert.assertEquals(gmce.getNewFiles().size(), 1);
FileSystem fs = FileSystem.get(new Configuration());
Charset charset = Charset.forName("UTF-8");
CharsetEncoder encoder = charset.newEncoder();
Assert.assertEquals(gmce.getNewFiles().get(0).getFilePath(), orcFilePath.makeQualified(fs.getUri(), new Path("/")).toString());
Assert.assertEquals(gmce.getNewFiles().get(0).getFileMetrics().getLowerBounds().get(1).getValue(), encoder.encode(CharBuffer.wrap("Alyssa")));
Assert.assertEquals(gmce.getNewFiles().get(0).getFileMetrics().getUpperBounds().get(1).getValue(), encoder.encode(CharBuffer.wrap("Bob")));
return null;
}
}).when(producer).sendGMCE(anyMap(), anyList(), anyList(), anyMap(), any(), any());
WorkUnitState state = new WorkUnitState();
setGMCEPublisherStateForOrcFile(state);
Mockito.doCallRealMethod().when(producer).setState(state);
producer.setState(state);
GobblinMCEPublisher publisher = new GobblinMCEPublisher(state, producer);
publisher.publishData(Arrays.asList(state));
}
use of org.apache.gobblin.iceberg.GobblinMCEProducer in project incubator-gobblin by apache.
the class CompactionGMCEPublishingAction method onCompactionJobComplete.
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
if (dataset.isVirtual()) {
return;
}
CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
String datasetDir = Joiner.on("/").join(result.getDstBaseDir(), result.getDatasetName());
state.setProp(ConfigurationKeys.DATA_PUBLISHER_DATASET_DIR, datasetDir);
try (GobblinMCEProducer producer = GobblinMCEProducer.getGobblinMCEProducer(state)) {
producer.sendGMCE(getNewFileMetrics(result), null, Lists.newArrayList(this.configurator.getOldFiles()), null, OperationType.rewrite_files, SchemaSource.NONE);
}
State compactionState = helper.loadState(new Path(result.getDstAbsoluteDir()));
// Set the prop to be true to indicate that gmce has been emitted
compactionState.setProp(GMCE_EMITTED_KEY, true);
helper.saveState(new Path(result.getDstAbsoluteDir()), compactionState);
// clear old files to release memory
this.configurator.getOldFiles().clear();
}
Aggregations