Search in sources :

Example 46 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class MRCompactorAvroKeyDedupJobRunnerTest method setUp.

@BeforeClass
public void setUp() throws IOException {
    State state = new State();
    state.setProp(ConfigurationKeys.JOB_NAME_KEY, "MRCompactorAvroKeyDedupJobRunnerTest");
    state.setProp(MRCompactor.COMPACTION_SHOULD_DEDUPLICATE, "true");
    Dataset.Builder datasetBuilder = (new Dataset.Builder()).withInputPath(new Path("/tmp"));
    Dataset dataset = datasetBuilder.build();
    dataset.setJobProps(state);
    this.runner = new MRCompactorAvroKeyDedupJobRunner(dataset, FileSystem.get(new Configuration()));
    this.job = Job.getInstance();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) State(org.apache.gobblin.configuration.State) Dataset(org.apache.gobblin.compaction.dataset.Dataset) BeforeClass(org.testng.annotations.BeforeClass)

Example 47 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class PinotAuditCountVerifierTest method testTier.

@Test
public void testTier() throws Exception {
    final String topic = "randomTopic";
    final String input = "/base/input";
    final String output = "/base/output";
    final String inputSub = "hourly";
    final String outputSub = "hourly";
    TestAuditCountClient client = new TestAuditCountClient();
    FileSystemDataset dataset = new FileSystemDataset() {

        @Override
        public Path datasetRoot() {
            return new Path(input + topic + inputSub + "/2017/04/03/10");
        }

        @Override
        public String datasetURN() {
            return input + topic + inputSub + "/2017/04/03/10";
        }
    };
    State props = new State();
    props.setProp(CompactionAuditCountVerifier.PRODUCER_TIER, PRODUCER_TIER);
    props.setProp(CompactionAuditCountVerifier.ORIGIN_TIER, ORIGIN_TIER);
    props.setProp(CompactionAuditCountVerifier.GOBBLIN_TIER, GOBBLIN_TIER);
    props.setProp(MRCompactor.COMPACTION_INPUT_DIR, input);
    props.setProp(MRCompactor.COMPACTION_INPUT_SUBDIR, inputSub);
    props.setProp(MRCompactor.COMPACTION_DEST_DIR, output);
    props.setProp(MRCompactor.COMPACTION_DEST_SUBDIR, outputSub);
    props.setProp(MRCompactor.COMPACTION_TMP_DEST_DIR, "/tmp/compaction/verifier");
    props.setProp(TimeBasedSubDirDatasetsFinder.COMPACTION_TIMEBASED_MAX_TIME_AGO, "3000d");
    props.setProp(TimeBasedSubDirDatasetsFinder.COMPACTION_TIMEBASED_MIN_TIME_AGO, "1d");
    CompactionAuditCountVerifier verifier = new CompactionAuditCountVerifier(props, client);
    // All complete
    client.setCounts(ImmutableMap.of(PRODUCER_TIER, 1000L, ORIGIN_TIER, 1000L, GOBBLIN_TIER, 1000L));
    Assert.assertTrue(verifier.verify(dataset).isSuccessful);
    // test true because GOBBLIN_TIER / PRODUCER_TIER is above threshold
    client.setCounts(ImmutableMap.of(PRODUCER_TIER, 1000L, ORIGIN_TIER, 1100L, GOBBLIN_TIER, 1000L));
    Assert.assertTrue(verifier.verify(dataset).isSuccessful);
    // test false because GOBBLIN_TIER / (PRODUCER_TIER || ORIGIN_TIER) is below threshold
    client.setCounts(ImmutableMap.of(PRODUCER_TIER, 1100L, ORIGIN_TIER, 1100L, GOBBLIN_TIER, 1000L));
    Assert.assertFalse(verifier.verify(dataset).isSuccessful);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystemDataset(org.apache.gobblin.dataset.FileSystemDataset) State(org.apache.gobblin.configuration.State) Test(org.testng.annotations.Test)

Example 48 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class InputRecordCountHelper method writeRecordCount.

/**
 * Write record count to a specific directory.
 * File name is {@link InputRecordCountHelper#RECORD_COUNT_FILE}
 * @param fs file system in use
 * @param dir directory where a record file is located
 */
@Deprecated
public static void writeRecordCount(FileSystem fs, Path dir, long count) throws IOException {
    State state = loadState(fs, dir);
    state.setProp(CompactionSlaEventHelper.RECORD_COUNT_TOTAL, count);
    saveState(fs, dir, state);
}
Also used : State(org.apache.gobblin.configuration.State)

Example 49 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class DataPublisherFactory method createResource.

@Override
public SharedResourceFactoryResponse<DataPublisher> createResource(SharedResourcesBroker<S> broker, ScopedConfigView<S, DataPublisherKey> config) throws NotConfiguredException {
    try {
        DataPublisherKey key = config.getKey();
        String publisherClassName = key.getPublisherClassName();
        State state = key.getState();
        Class<? extends DataPublisher> dataPublisherClass = (Class<? extends DataPublisher>) Class.forName(publisherClassName);
        log.info("Creating data publisher with class {} in scope {}. ", publisherClassName, config.getScope().toString());
        DataPublisher publisher = DataPublisher.getInstance(dataPublisherClass, state);
        // once from the broker.
        if (isPublisherCacheable(publisher)) {
            return new ResourceInstance<>(publisher);
        } else {
            return new ImmediatelyInvalidResourceEntry<>(publisher);
        }
    } catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
    }
}
Also used : ImmediatelyInvalidResourceEntry(org.apache.gobblin.broker.ImmediatelyInvalidResourceEntry) State(org.apache.gobblin.configuration.State) ResourceInstance(org.apache.gobblin.broker.ResourceInstance)

Example 50 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class BaseDataPublisherTest method testWithFsMetricsNoPartitions.

@Test
public void testWithFsMetricsNoPartitions() throws IOException {
    File publishPath = Files.createTempDir();
    try {
        State s = buildDefaultState(1);
        String md = new GlobalMetadata().toJson();
        s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
        s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
        s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
        WorkUnitState wuState1 = new WorkUnitState();
        FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", null, 0, 10);
        wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson());
        wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        addStateToWorkunit(s, wuState1);
        WorkUnitState wuState2 = new WorkUnitState();
        FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", null, 1, 30);
        wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson());
        addStateToWorkunit(s, wuState2);
        WorkUnitState wuState3 = new WorkUnitState();
        FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", null, 2, 55);
        wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson());
        addStateToWorkunit(s, wuState3);
        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(ImmutableList.of(wuState1, wuState2, wuState3));
        checkMetadata(new File(publishPath.getAbsolutePath(), "metadata.json"), 3, 95, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10), new FsWriterMetrics.FileInfo("foo4.json", 55));
    } finally {
        FileUtils.deleteDirectory(publishPath);
    }
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata) TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FsWriterMetrics(org.apache.gobblin.writer.FsWriterMetrics) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

State (org.apache.gobblin.configuration.State)195 Test (org.testng.annotations.Test)103 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)74 SourceState (org.apache.gobblin.configuration.SourceState)38 Path (org.apache.hadoop.fs.Path)30 File (java.io.File)20 IOException (java.io.IOException)16 Map (java.util.Map)14 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)14 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)13 Properties (java.util.Properties)12 FinalState (org.apache.gobblin.util.FinalState)12 Configuration (org.apache.hadoop.conf.Configuration)12 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)9 Config (com.typesafe.config.Config)8 ArrayList (java.util.ArrayList)8 GenericRecord (org.apache.avro.generic.GenericRecord)8 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 FileInputStream (java.io.FileInputStream)6