Search in sources :

Example 91 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class JdbcExtractorTest method testUnsignedInt.

/**
 * Test for the metadata query to see if the check for unsigned int is present
 */
@Test
public void testUnsignedInt() throws SchemaException {
    State state = new WorkUnitState();
    state.setId("id");
    MysqlExtractor mysqlExtractor = new MysqlExtractor((WorkUnitState) state);
    List<Command> commands = mysqlExtractor.getSchemaMetadata("db", "table");
    assertTrue(commands.get(0).getCommandType() == JdbcCommand.JdbcCommandType.QUERY);
    assertTrue(commands.get(0).getParams().get(0).contains("bigint"));
    assertTrue(commands.get(1).getCommandType() == JdbcCommand.JdbcCommandType.QUERYPARAMS);
    assertTrue(!commands.get(1).getParams().get(0).contains("unsigned"));
    // set option to promote unsigned int to bigint
    state.setProp(ConfigurationKeys.SOURCE_QUERYBASED_PROMOTE_UNSIGNED_INT_TO_BIGINT, "true");
    commands = mysqlExtractor.getSchemaMetadata("db", "table");
    assertTrue(commands.get(0).getCommandType() == JdbcCommand.JdbcCommandType.QUERY);
    assertTrue(commands.get(0).getParams().get(0).contains("bigint"));
    assertTrue(commands.get(1).getCommandType() == JdbcCommand.JdbcCommandType.QUERYPARAMS);
    assertTrue(commands.get(1).getParams().get(0).contains("unsigned"));
}
Also used : Command(org.apache.gobblin.source.extractor.extract.Command) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 92 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class PostgresqlExtractorTest method setup.

@BeforeClass
public void setup() {
    output = new JdbcCommandOutput();
    try {
        output.put(new JdbcCommand(), buildMockResultSet());
    } catch (Exception e) {
        // hack for test failure
        assertEquals("PostgresqlExtractorTest: error initializing mock result set", "false");
    }
    state = new WorkUnitState();
    state.setId("id");
    postgresqlExtractor = new PostgresqlExtractor((WorkUnitState) state);
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) BeforeClass(org.testng.annotations.BeforeClass)

Example 93 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class JsonIntermediateToParquetGroupConverterTest method setUp.

@BeforeClass
public static void setUp() {
    Type listType = new TypeToken<JsonObject>() {
    }.getType();
    Gson gson = new Gson();
    JsonObject testData = gson.fromJson(new InputStreamReader(JsonIntermediateToParquetGroupConverter.class.getResourceAsStream(RESOURCE_PATH)), listType);
    testCases = testData.getAsJsonObject();
    SourceState source = new SourceState();
    workUnit = new WorkUnitState(source.createWorkUnit(source.createExtract(Extract.TableType.SNAPSHOT_ONLY, "test_namespace", "test_table")));
}
Also used : MessageType(parquet.schema.MessageType) Type(java.lang.reflect.Type) SourceState(org.apache.gobblin.configuration.SourceState) InputStreamReader(java.io.InputStreamReader) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) JsonObject(com.google.gson.JsonObject) Gson(com.google.gson.Gson) BeforeClass(org.testng.annotations.BeforeClass)

Example 94 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class JdbcPublisher method getStagingTables.

private static Map<String, List<WorkUnitState>> getStagingTables(Collection<? extends WorkUnitState> states, int branches, int i) {
    Map<String, List<WorkUnitState>> stagingTables = Maps.newHashMap();
    for (WorkUnitState workUnitState : states) {
        String stagingTableKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_TABLE, branches, i);
        String stagingTable = Preconditions.checkNotNull(workUnitState.getProp(stagingTableKey));
        List<WorkUnitState> existing = stagingTables.get(stagingTable);
        if (existing == null) {
            existing = Lists.newArrayList();
            stagingTables.put(stagingTable, existing);
        }
        existing.add(workUnitState);
    }
    return stagingTables;
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) List(java.util.List)

Example 95 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class GobblinMultiTaskAttempt method runWorkUnits.

/**
 * Run a given list of {@link WorkUnit}s of a job.
 *
 * <p>
 *   This method assumes that the given list of {@link WorkUnit}s have already been flattened and
 *   each {@link WorkUnit} contains the task ID in the property {@link ConfigurationKeys#TASK_ID_KEY}.
 * </p>
 *
 * @param countDownLatch a {@link java.util.concurrent.CountDownLatch} waited on for job completion
 * @return a list of {@link Task}s from the {@link WorkUnit}s
 */
private List<Task> runWorkUnits(CountUpAndDownLatch countDownLatch) {
    List<Task> tasks = Lists.newArrayList();
    while (this.workUnits.hasNext()) {
        WorkUnit workUnit = this.workUnits.next();
        String taskId = workUnit.getProp(ConfigurationKeys.TASK_ID_KEY);
        // skip tasks that executed successsfully in a prior attempt
        if (taskSuccessfulInPriorAttempt(taskId)) {
            continue;
        }
        countDownLatch.countUp();
        SubscopedBrokerBuilder<GobblinScopeTypes, ?> taskBrokerBuilder = this.jobBroker.newSubscopedBuilder(new TaskScopeInstance(taskId));
        WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState, taskBrokerBuilder);
        workUnitState.setId(taskId);
        workUnitState.setProp(ConfigurationKeys.JOB_ID_KEY, this.jobId);
        workUnitState.setProp(ConfigurationKeys.TASK_ID_KEY, taskId);
        if (this.containerIdOptional.isPresent()) {
            workUnitState.setProp(ConfigurationKeys.TASK_ATTEMPT_ID_KEY, this.containerIdOptional.get());
        }
        // Create a new task from the work unit and submit the task to run
        Task task = createTaskRunnable(workUnitState, countDownLatch);
        this.taskStateTracker.registerNewTask(task);
        task.setTaskFuture(this.taskExecutor.submit(task));
        tasks.add(task);
    }
    new EventSubmitter.Builder(JobMetrics.get(this.jobId).getMetricContext(), "gobblin.runtime").build().submit(JobEvent.TASKS_SUBMITTED, "tasksCount", Long.toString(countDownLatch.getRegisteredParties()));
    return tasks;
}
Also used : GobblinScopeTypes(org.apache.gobblin.broker.gobblin_scopes.GobblinScopeTypes) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SubscopedBrokerBuilder(org.apache.gobblin.broker.iface.SubscopedBrokerBuilder) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TaskScopeInstance(org.apache.gobblin.broker.gobblin_scopes.TaskScopeInstance)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8