Search in sources :

Example 1 with JdbcWriterCommands

use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.

the class JdbcWriterInitializer method close.

/**
 * Drop table if it's created by this instance.
 * Truncate staging tables passed by user.
 * {@inheritDoc}
 * @see org.apache.gobblin.Initializer#close()
 */
@Override
public void close() {
    LOG.info("Closing " + this.getClass().getSimpleName());
    try (Connection conn = createConnection()) {
        JdbcWriterCommands commands = createJdbcWriterCommands(conn);
        if (!this.createdStagingTables.isEmpty()) {
            for (String stagingTable : this.createdStagingTables) {
                LOG.info("Dropping staging table " + this.createdStagingTables);
                commands.drop(database, stagingTable);
            }
        }
        if (this.userCreatedStagingTable != null) {
            LOG.info("Truncating staging table " + this.userCreatedStagingTable);
            commands.truncate(database, this.userCreatedStagingTable);
        }
    } catch (SQLException e) {
        throw new RuntimeException("Failed to close", e);
    }
}
Also used : SQLException(java.sql.SQLException) JdbcWriterCommands(org.apache.gobblin.writer.commands.JdbcWriterCommands) Connection(java.sql.Connection) ToString(lombok.ToString)

Example 2 with JdbcWriterCommands

use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.

the class AvroToJdbcEntryConverterTest method testFlattening.

@Test
public void testFlattening() throws IOException, SchemaConversionException, SQLException, URISyntaxException, DataConversionException {
    final String db = "db";
    final String table = "users";
    Map<String, JdbcType> dateColums = new HashMap<>();
    dateColums.put("date_of_birth", JdbcType.DATE);
    dateColums.put("last_modified", JdbcType.TIME);
    dateColums.put("created", JdbcType.TIMESTAMP);
    JdbcWriterCommands mockWriterCommands = mock(JdbcWriterCommands.class);
    when(mockWriterCommands.retrieveDateColumns(db, table)).thenReturn(dateColums);
    JdbcWriterCommandsFactory factory = mock(JdbcWriterCommandsFactory.class);
    when(factory.newInstance(any(State.class), any(Connection.class))).thenReturn(mockWriterCommands);
    List<JdbcEntryMetaDatum> jdbcEntryMetaData = new ArrayList<>();
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("name", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_number", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_color", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("date_of_birth", JdbcType.DATE));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("last_modified", JdbcType.TIME));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("created", JdbcType.TIMESTAMP));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested1_string", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested1_int", JdbcType.INTEGER));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested2_union_nested2_string", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested2_union_nested2_int", JdbcType.INTEGER));
    JdbcEntrySchema expected = new JdbcEntrySchema(jdbcEntryMetaData);
    Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/pickfields_nested_with_union.avsc"));
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.appendToListProp(JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, table);
    AvroToJdbcEntryConverter converter = new AvroToJdbcEntryConverter(workUnitState);
    Map<String, JdbcType> dateColumnMapping = Maps.newHashMap();
    dateColumnMapping.put("date_of_birth", JdbcType.DATE);
    dateColumnMapping.put("last_modified", JdbcType.TIME);
    dateColumnMapping.put("created", JdbcType.TIMESTAMP);
    workUnitState.appendToListProp(AvroToJdbcEntryConverter.CONVERTER_AVRO_JDBC_DATE_FIELDS, new Gson().toJson(dateColumnMapping));
    JdbcEntrySchema actualSchema = converter.convertSchema(inputSchema, workUnitState);
    Assert.assertEquals(expected, actualSchema);
    try (DataFileReader<GenericRecord> srcDataFileReader = new DataFileReader<GenericRecord>(new File(getClass().getResource("/converter/pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(inputSchema))) {
        List<JdbcEntryData> entries = new ArrayList<>();
        while (srcDataFileReader.hasNext()) {
            JdbcEntryData actualData = converter.convertRecord(actualSchema, srcDataFileReader.next(), workUnitState).iterator().next();
            entries.add(actualData);
        }
        final JsonSerializer<JdbcEntryDatum> datumSer = new JsonSerializer<JdbcEntryDatum>() {

            @Override
            public JsonElement serialize(JdbcEntryDatum datum, Type typeOfSrc, JsonSerializationContext context) {
                JsonObject jso = new JsonObject();
                if (datum.getVal() == null) {
                    jso.add(datum.getColumnName(), null);
                    return jso;
                }
                if (datum.getVal() instanceof Date) {
                    jso.addProperty(datum.getColumnName(), ((Date) datum.getVal()).getTime());
                } else if (datum.getVal() instanceof Timestamp) {
                    jso.addProperty(datum.getColumnName(), ((Timestamp) datum.getVal()).getTime());
                } else if (datum.getVal() instanceof Time) {
                    jso.addProperty(datum.getColumnName(), ((Time) datum.getVal()).getTime());
                } else {
                    jso.addProperty(datum.getColumnName(), datum.getVal().toString());
                }
                return jso;
            }
        };
        JsonSerializer<JdbcEntryData> serializer = new JsonSerializer<JdbcEntryData>() {

            @Override
            public JsonElement serialize(JdbcEntryData src, Type typeOfSrc, JsonSerializationContext context) {
                JsonArray arr = new JsonArray();
                for (JdbcEntryDatum datum : src) {
                    arr.add(datumSer.serialize(datum, datum.getClass(), context));
                }
                return arr;
            }
        };
        Gson gson = new GsonBuilder().registerTypeAdapter(JdbcEntryData.class, serializer).serializeNulls().create();
        JsonElement actualSerialized = gson.toJsonTree(entries);
        JsonElement expectedSerialized = new JsonParser().parse(new InputStreamReader(getClass().getResourceAsStream("/converter/pickfields_nested_with_union.json")));
        Assert.assertEquals(actualSerialized, expectedSerialized);
    }
    converter.close();
}
Also used : HashMap(java.util.HashMap) JdbcWriterCommands(org.apache.gobblin.writer.commands.JdbcWriterCommands) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) JsonObject(com.google.gson.JsonObject) Time(java.sql.Time) JsonSerializer(com.google.gson.JsonSerializer) Timestamp(java.sql.Timestamp) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) JdbcWriterCommandsFactory(org.apache.gobblin.writer.commands.JdbcWriterCommandsFactory) JsonParser(com.google.gson.JsonParser) InputStreamReader(java.io.InputStreamReader) GsonBuilder(com.google.gson.GsonBuilder) Connection(java.sql.Connection) Date(java.sql.Date) JsonArray(com.google.gson.JsonArray) DestinationType(org.apache.gobblin.writer.Destination.DestinationType) Type(java.lang.reflect.Type) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) JsonElement(com.google.gson.JsonElement) JsonSerializationContext(com.google.gson.JsonSerializationContext) File(java.io.File) Test(org.testng.annotations.Test)

Example 3 with JdbcWriterCommands

use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.

the class AvroToJdbcEntryConverterTest method testDateConversion.

@Test
public void testDateConversion() throws IOException, SchemaConversionException, SQLException {
    final String db = "db";
    final String table = "users";
    Map<String, JdbcType> dateColums = new HashMap<>();
    dateColums.put("date_of_birth", JdbcType.DATE);
    dateColums.put("last_modified", JdbcType.TIME);
    dateColums.put("created", JdbcType.TIMESTAMP);
    JdbcWriterCommands mockWriterCommands = mock(JdbcWriterCommands.class);
    when(mockWriterCommands.retrieveDateColumns(db, table)).thenReturn(dateColums);
    JdbcWriterCommandsFactory factory = mock(JdbcWriterCommandsFactory.class);
    when(factory.newInstance(any(State.class), any(Connection.class))).thenReturn(mockWriterCommands);
    List<JdbcEntryMetaDatum> jdbcEntryMetaData = new ArrayList<>();
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("name", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_number", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_color", JdbcType.VARCHAR));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("date_of_birth", JdbcType.DATE));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("last_modified", JdbcType.TIME));
    jdbcEntryMetaData.add(new JdbcEntryMetaDatum("created", JdbcType.TIMESTAMP));
    JdbcEntrySchema expected = new JdbcEntrySchema(jdbcEntryMetaData);
    Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/fieldPickInput.avsc"));
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.appendToListProp(JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, table);
    AvroToJdbcEntryConverter converter = new AvroToJdbcEntryConverter(workUnitState);
    Map<String, JdbcType> dateColumnMapping = Maps.newHashMap();
    dateColumnMapping.put("date_of_birth", JdbcType.DATE);
    dateColumnMapping.put("last_modified", JdbcType.TIME);
    dateColumnMapping.put("created", JdbcType.TIMESTAMP);
    workUnitState.appendToListProp(AvroToJdbcEntryConverter.CONVERTER_AVRO_JDBC_DATE_FIELDS, new Gson().toJson(dateColumnMapping));
    JdbcEntrySchema actual = converter.convertSchema(inputSchema, workUnitState);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) JdbcWriterCommands(org.apache.gobblin.writer.commands.JdbcWriterCommands) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Schema(org.apache.avro.Schema) Connection(java.sql.Connection) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) JdbcWriterCommandsFactory(org.apache.gobblin.writer.commands.JdbcWriterCommandsFactory) Test(org.testng.annotations.Test)

Example 4 with JdbcWriterCommands

use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.

the class AvroToJdbcEntryConverterInitializer method initialize.

/**
 * AvroToJdbcEntryConverter list of date columns existing in the table. As we don't want each converter
 * making a connection against database to get the same information. Here, ConverterInitializer will
 * retrieve it and store it into WorkUnit so that AvroToJdbcEntryConverter will use it later.
 *
 * {@inheritDoc}
 * @see org.apache.gobblin.initializer.Initializer#initialize()
 */
@Override
public void initialize() {
    String table = Preconditions.checkNotNull(this.state.getProp(ForkOperatorUtils.getPropertyNameForBranch(JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, this.branches, this.branchId)));
    String db = Preconditions.checkNotNull(this.state.getProp(ForkOperatorUtils.getPropertyNameForBranch(JdbcPublisher.JDBC_PUBLISHER_DATABASE_NAME, this.branches, this.branchId)));
    try (Connection conn = createConnection()) {
        JdbcWriterCommands commands = this.jdbcWriterCommandsFactory.newInstance(this.state, conn);
        Map<String, JdbcType> dateColumnMapping = commands.retrieveDateColumns(db, table);
        LOG.info("Date column mapping: " + dateColumnMapping);
        final String dateFieldsKey = ForkOperatorUtils.getPropertyNameForBranch(AvroToJdbcEntryConverter.CONVERTER_AVRO_JDBC_DATE_FIELDS, this.branches, this.branchId);
        for (WorkUnit wu : this.workUnits) {
            wu.setProp(dateFieldsKey, new Gson().toJson(dateColumnMapping));
        }
    } catch (SQLException e) {
        throw new RuntimeException(e);
    }
}
Also used : SQLException(java.sql.SQLException) JdbcWriterCommands(org.apache.gobblin.writer.commands.JdbcWriterCommands) Connection(java.sql.Connection) JdbcType(org.apache.gobblin.converter.jdbc.JdbcType) Gson(com.google.gson.Gson) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Example 5 with JdbcWriterCommands

use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.

the class JdbcWriterInitializer method initialize.

/**
 * Initializes AvroFileJdbcSource for Writer that needs to be happen in single threaded environment.
 * On each branch:
 * 1. Check if user chose to skip the staging table
 * 1.1. If user chose to skip the staging table, and user decided to replace final table, truncate final table.
 * 2. (User didn't choose to skip the staging table.) Check if user passed the staging table.
 * 2.1. Truncate staging table, if requested.
 * 2.2. Confirm if staging table is empty.
 * 3. Create staging table (At this point user hasn't passed the staging table, and not skipping staging table).
 * 3.1. Create staging table with unique name.
 * 3.2. Try to drop and recreate the table to confirm if we can drop it later.
 * 4. Update Workunit state with staging table information.
 * @param state
 */
@Override
public void initialize() {
    try (Connection conn = createConnection()) {
        JdbcWriterCommands commands = createJdbcWriterCommands(conn);
        // 1. Check if user chose to skip the staging table
        JobCommitPolicy jobCommitPolicy = JobCommitPolicy.getCommitPolicy(this.state);
        boolean isSkipStaging = !JobCommitPolicy.COMMIT_ON_FULL_SUCCESS.equals(jobCommitPolicy);
        if (isSkipStaging) {
            LOG.info("Writer will write directly to destination table as JobCommitPolicy is " + jobCommitPolicy);
        }
        final String publishTable = getProp(this.state, JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, this.branches, this.branchId);
        final String stagingTableKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_TABLE, this.branches, this.branchId);
        String stagingTable = this.state.getProp(stagingTableKey);
        int i = -1;
        for (WorkUnit wu : this.workUnits) {
            i++;
            if (isSkipStaging) {
                LOG.info("User chose to skip staing table on branch " + this.branchId + " workunit " + i);
                wu.setProp(stagingTableKey, publishTable);
                if (i == 0) {
                    // 1.1. If user chose to skip the staging table, and user decided to replace final table, truncate final table.
                    if (getPropAsBoolean(this.state, JdbcPublisher.JDBC_PUBLISHER_REPLACE_FINAL_TABLE, this.branches, this.branchId)) {
                        LOG.info("User chose to replace final table " + publishTable + " on branch " + this.branchId + " workunit " + i);
                        commands.truncate(database, publishTable);
                    }
                }
                continue;
            }
            // 2. (User didn't choose to skip the staging table.) Check if user passed the staging table.
            if (!StringUtils.isEmpty(stagingTable)) {
                LOG.info("Staging table for branch " + this.branchId + " from user: " + stagingTable);
                wu.setProp(stagingTableKey, stagingTable);
                if (i == 0) {
                    // 2.1. Truncate staging table, if requested.
                    if (this.state.getPropAsBoolean(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_TRUNCATE_STAGING_TABLE, this.branches, this.branchId), false)) {
                        LOG.info("Truncating staging table " + stagingTable + " as requested.");
                        commands.truncate(database, stagingTable);
                    }
                    // 2.2. Confirm if staging table is empty.
                    if (!commands.isEmpty(database, stagingTable)) {
                        LOG.error("Staging table " + stagingTable + " is not empty. Failing.");
                        throw new IllegalArgumentException("Staging table " + stagingTable + " should be empty.");
                    }
                    this.userCreatedStagingTable = stagingTable;
                }
                continue;
            }
            // 3. Create staging table (At this point user hasn't passed the staging table, and not skipping staging table).
            LOG.info("Staging table has not been passed from user for branch " + this.branchId + " workunit " + i + " . Creating.");
            String createdStagingTable = createStagingTable(conn, commands);
            wu.setProp(stagingTableKey, createdStagingTable);
            this.createdStagingTables.add(createdStagingTable);
            LOG.info("Staging table " + createdStagingTable + " has been created for branchId " + this.branchId + " workunit " + i);
        }
    } catch (SQLException e) {
        throw new RuntimeException("Failed with SQL", e);
    }
}
Also used : SQLException(java.sql.SQLException) JdbcWriterCommands(org.apache.gobblin.writer.commands.JdbcWriterCommands) JobCommitPolicy(org.apache.gobblin.source.extractor.JobCommitPolicy) Connection(java.sql.Connection) ToString(lombok.ToString) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Aggregations

Connection (java.sql.Connection)9 JdbcWriterCommands (org.apache.gobblin.writer.commands.JdbcWriterCommands)9 State (org.apache.gobblin.configuration.State)5 Test (org.testng.annotations.Test)5 Gson (com.google.gson.Gson)4 SQLException (java.sql.SQLException)4 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Schema (org.apache.avro.Schema)3 JdbcWriterCommandsFactory (org.apache.gobblin.writer.commands.JdbcWriterCommandsFactory)3 ToString (lombok.ToString)2 JdbcEntryData (org.apache.gobblin.converter.jdbc.JdbcEntryData)2 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)2 JdbcWriter (org.apache.gobblin.writer.JdbcWriter)2 GsonBuilder (com.google.gson.GsonBuilder)1 JsonArray (com.google.gson.JsonArray)1 JsonElement (com.google.gson.JsonElement)1 JsonObject (com.google.gson.JsonObject)1 JsonParser (com.google.gson.JsonParser)1