use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testWithFsMetricsNoPartitions.
@Test
public void testWithFsMetricsNoPartitions() throws IOException {
File publishPath = Files.createTempDir();
try {
State s = buildDefaultState(1);
String md = new GlobalMetadata().toJson();
s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
WorkUnitState wuState1 = new WorkUnitState();
FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", null, 0, 10);
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState1);
WorkUnitState wuState2 = new WorkUnitState();
FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", null, 1, 30);
wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson());
addStateToWorkunit(s, wuState2);
WorkUnitState wuState3 = new WorkUnitState();
FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", null, 2, 55);
wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson());
addStateToWorkunit(s, wuState3);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(ImmutableList.of(wuState1, wuState2, wuState3));
checkMetadata(new File(publishPath.getAbsolutePath(), "metadata.json"), 3, 95, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10), new FsWriterMetrics.FileInfo("foo4.json", 55));
} finally {
FileUtils.deleteDirectory(publishPath);
}
}
use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testWithFsMetricsAndPartitions.
@Test
public void testWithFsMetricsAndPartitions() throws IOException {
File publishPath = Files.createTempDir();
try {
File part1 = new File(publishPath, "1-2-3-4");
part1.mkdir();
File part2 = new File(publishPath, "5-6-7-8");
part2.mkdir();
State s = buildDefaultState(1);
String md = new GlobalMetadata().toJson();
s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
WorkUnitState wuState1 = new WorkUnitState();
FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", "1-2-3-4", 0, 10);
FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8", 10, 20);
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4");
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_0", "1-2-3-4");
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _0", metrics2.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_1", "5-6-7-8");
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _1", metrics2.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState1);
WorkUnitState wuState2 = new WorkUnitState();
FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", "1-2-3-4", 1, 30);
wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4");
wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson());
addStateToWorkunit(s, wuState2);
WorkUnitState wuState3 = new WorkUnitState();
FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", "5-6-7-8", 2, 55);
wuState3.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8");
wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson());
addStateToWorkunit(s, wuState3);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(ImmutableList.of(wuState1, wuState2, wuState3));
checkMetadata(new File(part1, "metadata.json"), 2, 40, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10));
checkMetadata(new File(part2, "metadata.json"), 2, 75, new FsWriterMetrics.FileInfo("foo1.json", 20), new FsWriterMetrics.FileInfo("foo4.json", 55));
} finally {
FileUtils.deleteDirectory(publishPath);
}
}
use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testMergesExistingMetadata.
@Test
public void testMergesExistingMetadata() throws IOException {
File publishPath = Files.createTempDir();
try {
// Copy the metadata file from resources into the publish path
InputStream mdStream = this.getClass().getClassLoader().getResourceAsStream("publisher/sample_metadata.json");
try (FileOutputStream fOs = new FileOutputStream(new File(publishPath, "metadata.json"))) {
IOUtils.copy(mdStream, fOs);
}
State s = buildDefaultState(1);
String md = new GlobalMetadata().toJson();
s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
WorkUnitState wuState1 = new WorkUnitState();
FsWriterMetrics metrics1 = buildWriterMetrics("newfile.json", null, 0, 90);
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState1);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(ImmutableList.of(wuState1));
checkMetadata(new File(publishPath.getAbsolutePath(), "metadata.json"), 4, 185, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10), new FsWriterMetrics.FileInfo("foo4.json", 55), new FsWriterMetrics.FileInfo("newfile.json", 90));
} finally {
FileUtils.deleteDirectory(publishPath);
}
}
use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testWithFsMetricsBranchesAndPartitions.
@Test
public void testWithFsMetricsBranchesAndPartitions() throws IOException {
File[] publishPaths = new File[] { // branch 0
Files.createTempDir(), // branch 1
Files.createTempDir() };
try {
List<File[]> branchPaths = Arrays.stream(publishPaths).map(branchPath -> new File[] { new File(branchPath, "1-2-3-4"), new File(branchPath, "5-6-7-8") }).collect(Collectors.toList());
branchPaths.forEach(partitionPaths -> Arrays.stream(partitionPaths).forEach(File::mkdir));
State s = buildDefaultState(2);
String md = new GlobalMetadata().toJson();
s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY + ".0", "true");
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY + ".1", "true");
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".0", md);
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".1", md);
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR + ".0", publishPaths[0].getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR + ".1", publishPaths[1].getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR + ".0", "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR + ".1", "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
WorkUnitState wuState1 = new WorkUnitState();
FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", "1-2-3-4", 0, 10);
FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8", 10, 20);
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0", "1-2-3-4");
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0", metrics1.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0_0", "1-2-3-4");
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0_0", metrics2.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0" + "_1", "5-6-7-8");
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0_1", metrics2.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".0", md);
addStateToWorkunit(s, wuState1);
WorkUnitState wuState2 = new WorkUnitState();
FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", "1-2-3-4", 1, 1, 30);
wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".1", "1-2-3-4");
wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".1", md);
wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".1", metrics3.toJson());
addStateToWorkunit(s, wuState2);
WorkUnitState wuState3 = new WorkUnitState();
FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", "5-6-7-8", 2, 55);
wuState3.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0", "5-6-7-8");
wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".0", md);
wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0", metrics4.toJson());
addStateToWorkunit(s, wuState3);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(ImmutableList.of(wuState1, wuState2, wuState3));
checkMetadata(new File(branchPaths.get(0)[0], "metadata.json.0"), 1, 10, new FsWriterMetrics.FileInfo("foo1.json", 10));
checkMetadata(new File(branchPaths.get(0)[1], "metadata.json.0"), 2, 75, new FsWriterMetrics.FileInfo("foo1.json", 20), new FsWriterMetrics.FileInfo("foo4.json", 55));
checkMetadata(new File(branchPaths.get(1)[0], "metadata.json.1"), 1, 30, new FsWriterMetrics.FileInfo("foo3.json", 30));
} finally {
Arrays.stream(publishPaths).forEach(dir -> {
try {
FileUtils.deleteDirectory(dir);
} catch (IOException e) {
throw new RuntimeException("IOError");
}
});
}
}
use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.
the class MetadataWriterWrapper method write.
@Override
@SuppressWarnings("unchecked")
public void write(Object untypedRecord) throws IOException {
if (untypedRecord instanceof RecordWithMetadata) {
RecordWithMetadata record = (RecordWithMetadata) untypedRecord;
GlobalMetadata globalMetadata = record.getMetadata().getGlobalMetadata();
metadataCollector.processMetadata(globalMetadata);
if (RecordWithMetadata.class.isAssignableFrom(writerDataClass)) {
wrappedWriter.write(record);
} else {
wrappedWriter.write(record.getRecord());
}
} else {
metadataCollector.processMetadata(null);
wrappedWriter.write(untypedRecord);
}
}
Aggregations