use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.
the class JdbcWriterInitializer method close.
/**
* Drop table if it's created by this instance.
* Truncate staging tables passed by user.
* {@inheritDoc}
* @see org.apache.gobblin.Initializer#close()
*/
@Override
public void close() {
LOG.info("Closing " + this.getClass().getSimpleName());
try (Connection conn = createConnection()) {
JdbcWriterCommands commands = createJdbcWriterCommands(conn);
if (!this.createdStagingTables.isEmpty()) {
for (String stagingTable : this.createdStagingTables) {
LOG.info("Dropping staging table " + this.createdStagingTables);
commands.drop(database, stagingTable);
}
}
if (this.userCreatedStagingTable != null) {
LOG.info("Truncating staging table " + this.userCreatedStagingTable);
commands.truncate(database, this.userCreatedStagingTable);
}
} catch (SQLException e) {
throw new RuntimeException("Failed to close", e);
}
}
use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.
the class AvroToJdbcEntryConverterTest method testFlattening.
@Test
public void testFlattening() throws IOException, SchemaConversionException, SQLException, URISyntaxException, DataConversionException {
final String db = "db";
final String table = "users";
Map<String, JdbcType> dateColums = new HashMap<>();
dateColums.put("date_of_birth", JdbcType.DATE);
dateColums.put("last_modified", JdbcType.TIME);
dateColums.put("created", JdbcType.TIMESTAMP);
JdbcWriterCommands mockWriterCommands = mock(JdbcWriterCommands.class);
when(mockWriterCommands.retrieveDateColumns(db, table)).thenReturn(dateColums);
JdbcWriterCommandsFactory factory = mock(JdbcWriterCommandsFactory.class);
when(factory.newInstance(any(State.class), any(Connection.class))).thenReturn(mockWriterCommands);
List<JdbcEntryMetaDatum> jdbcEntryMetaData = new ArrayList<>();
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("name", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_number", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_color", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("date_of_birth", JdbcType.DATE));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("last_modified", JdbcType.TIME));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("created", JdbcType.TIMESTAMP));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested1_string", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested1_int", JdbcType.INTEGER));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested2_union_nested2_string", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("nested1_nested2_union_nested2_int", JdbcType.INTEGER));
JdbcEntrySchema expected = new JdbcEntrySchema(jdbcEntryMetaData);
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/pickfields_nested_with_union.avsc"));
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.appendToListProp(JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, table);
AvroToJdbcEntryConverter converter = new AvroToJdbcEntryConverter(workUnitState);
Map<String, JdbcType> dateColumnMapping = Maps.newHashMap();
dateColumnMapping.put("date_of_birth", JdbcType.DATE);
dateColumnMapping.put("last_modified", JdbcType.TIME);
dateColumnMapping.put("created", JdbcType.TIMESTAMP);
workUnitState.appendToListProp(AvroToJdbcEntryConverter.CONVERTER_AVRO_JDBC_DATE_FIELDS, new Gson().toJson(dateColumnMapping));
JdbcEntrySchema actualSchema = converter.convertSchema(inputSchema, workUnitState);
Assert.assertEquals(expected, actualSchema);
try (DataFileReader<GenericRecord> srcDataFileReader = new DataFileReader<GenericRecord>(new File(getClass().getResource("/converter/pickfields_nested_with_union.avro").toURI()), new GenericDatumReader<GenericRecord>(inputSchema))) {
List<JdbcEntryData> entries = new ArrayList<>();
while (srcDataFileReader.hasNext()) {
JdbcEntryData actualData = converter.convertRecord(actualSchema, srcDataFileReader.next(), workUnitState).iterator().next();
entries.add(actualData);
}
final JsonSerializer<JdbcEntryDatum> datumSer = new JsonSerializer<JdbcEntryDatum>() {
@Override
public JsonElement serialize(JdbcEntryDatum datum, Type typeOfSrc, JsonSerializationContext context) {
JsonObject jso = new JsonObject();
if (datum.getVal() == null) {
jso.add(datum.getColumnName(), null);
return jso;
}
if (datum.getVal() instanceof Date) {
jso.addProperty(datum.getColumnName(), ((Date) datum.getVal()).getTime());
} else if (datum.getVal() instanceof Timestamp) {
jso.addProperty(datum.getColumnName(), ((Timestamp) datum.getVal()).getTime());
} else if (datum.getVal() instanceof Time) {
jso.addProperty(datum.getColumnName(), ((Time) datum.getVal()).getTime());
} else {
jso.addProperty(datum.getColumnName(), datum.getVal().toString());
}
return jso;
}
};
JsonSerializer<JdbcEntryData> serializer = new JsonSerializer<JdbcEntryData>() {
@Override
public JsonElement serialize(JdbcEntryData src, Type typeOfSrc, JsonSerializationContext context) {
JsonArray arr = new JsonArray();
for (JdbcEntryDatum datum : src) {
arr.add(datumSer.serialize(datum, datum.getClass(), context));
}
return arr;
}
};
Gson gson = new GsonBuilder().registerTypeAdapter(JdbcEntryData.class, serializer).serializeNulls().create();
JsonElement actualSerialized = gson.toJsonTree(entries);
JsonElement expectedSerialized = new JsonParser().parse(new InputStreamReader(getClass().getResourceAsStream("/converter/pickfields_nested_with_union.json")));
Assert.assertEquals(actualSerialized, expectedSerialized);
}
converter.close();
}
use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.
the class AvroToJdbcEntryConverterTest method testDateConversion.
@Test
public void testDateConversion() throws IOException, SchemaConversionException, SQLException {
final String db = "db";
final String table = "users";
Map<String, JdbcType> dateColums = new HashMap<>();
dateColums.put("date_of_birth", JdbcType.DATE);
dateColums.put("last_modified", JdbcType.TIME);
dateColums.put("created", JdbcType.TIMESTAMP);
JdbcWriterCommands mockWriterCommands = mock(JdbcWriterCommands.class);
when(mockWriterCommands.retrieveDateColumns(db, table)).thenReturn(dateColums);
JdbcWriterCommandsFactory factory = mock(JdbcWriterCommandsFactory.class);
when(factory.newInstance(any(State.class), any(Connection.class))).thenReturn(mockWriterCommands);
List<JdbcEntryMetaDatum> jdbcEntryMetaData = new ArrayList<>();
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("name", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_number", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("favorite_color", JdbcType.VARCHAR));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("date_of_birth", JdbcType.DATE));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("last_modified", JdbcType.TIME));
jdbcEntryMetaData.add(new JdbcEntryMetaDatum("created", JdbcType.TIMESTAMP));
JdbcEntrySchema expected = new JdbcEntrySchema(jdbcEntryMetaData);
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/fieldPickInput.avsc"));
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.appendToListProp(JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, table);
AvroToJdbcEntryConverter converter = new AvroToJdbcEntryConverter(workUnitState);
Map<String, JdbcType> dateColumnMapping = Maps.newHashMap();
dateColumnMapping.put("date_of_birth", JdbcType.DATE);
dateColumnMapping.put("last_modified", JdbcType.TIME);
dateColumnMapping.put("created", JdbcType.TIMESTAMP);
workUnitState.appendToListProp(AvroToJdbcEntryConverter.CONVERTER_AVRO_JDBC_DATE_FIELDS, new Gson().toJson(dateColumnMapping));
JdbcEntrySchema actual = converter.convertSchema(inputSchema, workUnitState);
Assert.assertEquals(expected, actual);
}
use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.
the class AvroToJdbcEntryConverterInitializer method initialize.
/**
* AvroToJdbcEntryConverter list of date columns existing in the table. As we don't want each converter
* making a connection against database to get the same information. Here, ConverterInitializer will
* retrieve it and store it into WorkUnit so that AvroToJdbcEntryConverter will use it later.
*
* {@inheritDoc}
* @see org.apache.gobblin.initializer.Initializer#initialize()
*/
@Override
public void initialize() {
String table = Preconditions.checkNotNull(this.state.getProp(ForkOperatorUtils.getPropertyNameForBranch(JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, this.branches, this.branchId)));
String db = Preconditions.checkNotNull(this.state.getProp(ForkOperatorUtils.getPropertyNameForBranch(JdbcPublisher.JDBC_PUBLISHER_DATABASE_NAME, this.branches, this.branchId)));
try (Connection conn = createConnection()) {
JdbcWriterCommands commands = this.jdbcWriterCommandsFactory.newInstance(this.state, conn);
Map<String, JdbcType> dateColumnMapping = commands.retrieveDateColumns(db, table);
LOG.info("Date column mapping: " + dateColumnMapping);
final String dateFieldsKey = ForkOperatorUtils.getPropertyNameForBranch(AvroToJdbcEntryConverter.CONVERTER_AVRO_JDBC_DATE_FIELDS, this.branches, this.branchId);
for (WorkUnit wu : this.workUnits) {
wu.setProp(dateFieldsKey, new Gson().toJson(dateColumnMapping));
}
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
use of org.apache.gobblin.writer.commands.JdbcWriterCommands in project incubator-gobblin by apache.
the class JdbcWriterInitializer method initialize.
/**
* Initializes AvroFileJdbcSource for Writer that needs to be happen in single threaded environment.
* On each branch:
* 1. Check if user chose to skip the staging table
* 1.1. If user chose to skip the staging table, and user decided to replace final table, truncate final table.
* 2. (User didn't choose to skip the staging table.) Check if user passed the staging table.
* 2.1. Truncate staging table, if requested.
* 2.2. Confirm if staging table is empty.
* 3. Create staging table (At this point user hasn't passed the staging table, and not skipping staging table).
* 3.1. Create staging table with unique name.
* 3.2. Try to drop and recreate the table to confirm if we can drop it later.
* 4. Update Workunit state with staging table information.
* @param state
*/
@Override
public void initialize() {
try (Connection conn = createConnection()) {
JdbcWriterCommands commands = createJdbcWriterCommands(conn);
// 1. Check if user chose to skip the staging table
JobCommitPolicy jobCommitPolicy = JobCommitPolicy.getCommitPolicy(this.state);
boolean isSkipStaging = !JobCommitPolicy.COMMIT_ON_FULL_SUCCESS.equals(jobCommitPolicy);
if (isSkipStaging) {
LOG.info("Writer will write directly to destination table as JobCommitPolicy is " + jobCommitPolicy);
}
final String publishTable = getProp(this.state, JdbcPublisher.JDBC_PUBLISHER_FINAL_TABLE_NAME, this.branches, this.branchId);
final String stagingTableKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_TABLE, this.branches, this.branchId);
String stagingTable = this.state.getProp(stagingTableKey);
int i = -1;
for (WorkUnit wu : this.workUnits) {
i++;
if (isSkipStaging) {
LOG.info("User chose to skip staing table on branch " + this.branchId + " workunit " + i);
wu.setProp(stagingTableKey, publishTable);
if (i == 0) {
// 1.1. If user chose to skip the staging table, and user decided to replace final table, truncate final table.
if (getPropAsBoolean(this.state, JdbcPublisher.JDBC_PUBLISHER_REPLACE_FINAL_TABLE, this.branches, this.branchId)) {
LOG.info("User chose to replace final table " + publishTable + " on branch " + this.branchId + " workunit " + i);
commands.truncate(database, publishTable);
}
}
continue;
}
// 2. (User didn't choose to skip the staging table.) Check if user passed the staging table.
if (!StringUtils.isEmpty(stagingTable)) {
LOG.info("Staging table for branch " + this.branchId + " from user: " + stagingTable);
wu.setProp(stagingTableKey, stagingTable);
if (i == 0) {
// 2.1. Truncate staging table, if requested.
if (this.state.getPropAsBoolean(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_TRUNCATE_STAGING_TABLE, this.branches, this.branchId), false)) {
LOG.info("Truncating staging table " + stagingTable + " as requested.");
commands.truncate(database, stagingTable);
}
// 2.2. Confirm if staging table is empty.
if (!commands.isEmpty(database, stagingTable)) {
LOG.error("Staging table " + stagingTable + " is not empty. Failing.");
throw new IllegalArgumentException("Staging table " + stagingTable + " should be empty.");
}
this.userCreatedStagingTable = stagingTable;
}
continue;
}
// 3. Create staging table (At this point user hasn't passed the staging table, and not skipping staging table).
LOG.info("Staging table has not been passed from user for branch " + this.branchId + " workunit " + i + " . Creating.");
String createdStagingTable = createStagingTable(conn, commands);
wu.setProp(stagingTableKey, createdStagingTable);
this.createdStagingTables.add(createdStagingTable);
LOG.info("Staging table " + createdStagingTable + " has been created for branchId " + this.branchId + " workunit " + i);
}
} catch (SQLException e) {
throw new RuntimeException("Failed with SQL", e);
}
}
Aggregations