Search in sources :

Example 6 with GlobalMetadata

use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.

the class BaseDataPublisherTest method testWithPartitionKey.

@Test
public void testWithPartitionKey() throws IOException {
    File publishPath = Files.createTempDir();
    try {
        File part1 = new File(publishPath, "1-2-3-4");
        part1.mkdir();
        File part2 = new File(publishPath, "5-6-7-8");
        part2.mkdir();
        State s = buildDefaultState(1);
        String md = new GlobalMetadata().toJson();
        s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
        s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
        s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
        WorkUnitState wuState1 = new WorkUnitState();
        wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4");
        wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        addStateToWorkunit(s, wuState1);
        WorkUnitState wuState2 = new WorkUnitState();
        wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8");
        wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        addStateToWorkunit(s, wuState2);
        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(ImmutableList.of(wuState1, wuState2));
        Assert.assertTrue(new File(part1, "metadata.json").exists());
        Assert.assertTrue(new File(part2, "metadata.json").exists());
    } finally {
        FileUtils.deleteDirectory(publishPath);
    }
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata) TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) File(java.io.File) Test(org.testng.annotations.Test)

Example 7 with GlobalMetadata

use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.

the class GlobalMetadataCollectorTest method buildMetadataWithUrn.

private GlobalMetadata buildMetadataWithUrn(String urn) {
    GlobalMetadata metadata = new GlobalMetadata();
    metadata.setDatasetUrn(urn);
    return metadata;
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata)

Example 8 with GlobalMetadata

use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.

the class GlobalMetadataCollectorTest method testMergesWithDefaults.

@Test
public void testMergesWithDefaults() {
    final String DATASET_URN = "foo";
    GlobalMetadata defaultMetadata = new GlobalMetadata();
    defaultMetadata.setDatasetUrn(DATASET_URN);
    GlobalMetadataCollector collector = new GlobalMetadataCollector(defaultMetadata, -1);
    GlobalMetadata metadataRecord = new GlobalMetadata();
    metadataRecord.setContentType(CONTENT_TYPE);
    GlobalMetadata mergedRecord = collector.processMetadata(metadataRecord);
    Assert.assertEquals(mergedRecord.getDatasetUrn(), DATASET_URN);
    Assert.assertEquals(mergedRecord.getContentType(), CONTENT_TYPE);
    Assert.assertEquals(1, collector.getMetadataRecords().size());
    Assert.assertTrue(collector.getMetadataRecords().contains(mergedRecord), "Expected merged record to exist in metadata cache");
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata) Test(org.testng.annotations.Test)

Example 9 with GlobalMetadata

use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.

the class GlobalMetadataCollectorTest method evictsRecordsLRUBased.

@Test
public void evictsRecordsLRUBased() {
    GlobalMetadata r1 = buildMetadataWithContentType(CONTENT_TYPE + "_1");
    GlobalMetadata r1_1 = buildMetadataWithContentType(r1.getContentType());
    GlobalMetadata r2 = buildMetadataWithContentType(CONTENT_TYPE + "_2");
    GlobalMetadata r3 = buildMetadataWithContentType(CONTENT_TYPE + "_3");
    GlobalMetadataCollector collector = new GlobalMetadataCollector(2);
    GlobalMetadata newRecord;
    newRecord = collector.processMetadata(r1);
    Assert.assertNotNull(newRecord);
    newRecord = collector.processMetadata(r2);
    Assert.assertNotNull(newRecord);
    newRecord = collector.processMetadata(r1_1);
    Assert.assertNull(newRecord);
    // r2 should be evicted as r1 was more recently seen
    newRecord = collector.processMetadata(r3);
    Assert.assertNotNull(newRecord);
    Set<GlobalMetadata> cachedRecords = collector.getMetadataRecords();
    Assert.assertEquals(cachedRecords.size(), 2);
    Assert.assertTrue(cachedRecords.contains(r1));
    Assert.assertTrue(cachedRecords.contains(r3));
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata) Test(org.testng.annotations.Test)

Example 10 with GlobalMetadata

use of org.apache.gobblin.metadata.types.GlobalMetadata in project incubator-gobblin by apache.

the class GlobalMetadataCollectorTest method buildMetadataWithContentType.

private GlobalMetadata buildMetadataWithContentType(String contentType) {
    GlobalMetadata metadata = new GlobalMetadata();
    metadata.setContentType(contentType);
    return metadata;
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata)

Aggregations

GlobalMetadata (org.apache.gobblin.metadata.types.GlobalMetadata)16 Test (org.testng.annotations.Test)9 File (java.io.File)5 State (org.apache.gobblin.configuration.State)5 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)5 FsWriterMetrics (org.apache.gobblin.writer.FsWriterMetrics)5 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)5 FileInputStream (java.io.FileInputStream)3 FileOutputStream (java.io.FileOutputStream)2 InputStream (java.io.InputStream)2 ImmutableList (com.google.common.collect.ImmutableList)1 Files (com.google.common.io.Files)1 ConfigFactory (com.typesafe.config.ConfigFactory)1 IOException (java.io.IOException)1 StandardCharsets (java.nio.charset.StandardCharsets)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1