use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.
the class ImportMySQL method main.
public static void main(String[] args) throws SQLException, SparkException, ExecutionException, InvocationTargetException {
SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
StructType schema = new StructType().add("date", "string").add("time", "timestamp").add("amount", "double").add("pay_no", "long");
List<Row> list = Lists.newArrayList();
for (int i = 0; i < 10; i++) {
UUID uuid = UUID.randomUUID();
long id = -1 * uuid.getLeastSignificantBits();
double randomAmount = Math.random() * 10;
double amount = Double.parseDouble(String.format("%.2f", randomAmount));
Row row = RowFactory.create(LocalDate.now().toString(), Timestamp.valueOf(LocalDateTime.now()), amount, id);
list.add(row);
}
Dataset<Row> ds = spark.createDataFrame(list, schema);
ds.show(false);
String uri = "jdbc:mysql://localhost:3306/test_boluo?characterEncoding=UTF-8&serverTimezone=GMT%2B8&rewriteBatchedStatements=true&user=root&password=root";
Map<String, String> props = new Hashtable<>();
props.put(JDBCOptions.JDBC_DRIVER_CLASS(), "com.mysql.cj.jdbc.Driver");
props.put(JDBCOptions.JDBC_TABLE_NAME(), "table_name");
props.put(JdbcOptionsInWrite.JDBC_CREATE_TABLE_COLUMN_TYPES(), String.join(",", "date varchar(64)"));
Seq<Tuple2<String, String>> tuple2Seq = JavaConverters.mapAsScalaMapConverter(props).asScala().toSeq();
JdbcOptionsInWrite opt = new JdbcOptionsInWrite((scala.collection.immutable.Map<String, String>) Map$.MODULE$.apply(tuple2Seq));
// TODO create table
MySQL.replace(ds, opt, "1=1");
}
use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.
the class Jdbcs method execute.
@SuppressWarnings("unchecked")
public static void execute(String uri, String sql, String... sql2) throws SQLException {
JdbcOptionsInWrite options_ = options(uri, "t");
try (Connection conn = JdbcUtils.createConnectionFactory(options_).apply()) {
conn.setAutoCommit(false);
try (Statement statement = conn.createStatement()) {
statement.execute(sql);
for (String s : sql2) {
statement.execute(s);
}
}
conn.commit();
}
}
use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.
the class InsertMySQL method main.
public static void main(String[] args) throws Exception {
SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
String uri = "jdbc:mysql://local.boluo.com/test_boluo?characterEncoding=UTF-8&serverTimezone=GMT%2B8&rewriteBatchedStatements=true&user=root&password=123";
String filterPath = "file:///D:/filter";
StructType schema = new StructType().add("name", "string");
List<Row> list = Lists.newArrayList();
list.add(RowFactory.create("1"));
list.add(RowFactory.create("2"));
list.add(RowFactory.create("3"));
list.add(RowFactory.create("4"));
list.add(RowFactory.create("5"));
// Dataset<Row> ds = spark.read().format("delta").load(filterPath);
Dataset<Row> ds = spark.createDataFrame(list, schema);
long startTime = System.currentTimeMillis();
String name = "测试一百万条数据";
JdbcOptionsInWrite opt = Jdbcs.options(uri, name, "name varchar(64)");
// Outputs.createTable(ds, opt, "KEY(name)");
Outputs.replace(ds, opt, "1=1");
long endTime = System.currentTimeMillis();
System.out.println("耗时: " + (endTime - startTime));
}
use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.
the class Outputs method createTable.
public static void createTable(Dataset<Row> rows, JdbcOptionsInWrite options, String keys, Jdbcs.ConnectConsumer consumer) throws Exception {
JdbcDialect dialect = JdbcDialects.get(options.url());
String keyStr = "";
if (!Strings.isNullOrEmpty(keys)) {
Pattern patternKey = Pattern.compile("(primary\\s+)?key\\s*\\([^)]*\\)", Pattern.CASE_INSENSITIVE);
Matcher matcher = patternKey.matcher(keys);
if (matcher.find()) {
keyStr = "," + keys;
} else {
keyStr = ",PRIMARY KEY(" + Streams.stream(Splitter.on(",").trimResults().omitEmptyStrings().split(keys)).map(i -> CharMatcher.anyOf("`").trimFrom(i)).map(i -> String.format("`%s`", i)).collect(Collectors.joining(",")) + ")";
}
}
// 自定义字段类型
String strSchema = JdbcUtils.schemaString(rows, options.url(), Option.empty());
if (!options.createTableColumnTypes().isEmpty()) {
Pattern pat = Pattern.compile("\\s*`?\"?(.+?)`?\"?\\s+(.+)");
Map<String, String> collect1 = Arrays.stream(strSchema.split(" , ")).collect(Collectors.toMap(i -> {
Matcher matcher = pat.matcher(i);
Preconditions.checkArgument(matcher.matches(), i);
return dialect.quoteIdentifier(matcher.group(1));
}, i -> i));
Arrays.stream(options.createTableColumnTypes().get().split(",")).forEach(i -> {
Matcher matcher = pat.matcher(i);
Preconditions.checkArgument(matcher.matches());
String k = dialect.quoteIdentifier(matcher.group(1));
String t = matcher.group(2);
Preconditions.checkArgument(collect1.containsKey(k), String.format("%s not in %s", k, collect1));
collect1.put(k, k + " " + t);
});
strSchema = Joiner.on(",").join(collect1.values());
}
String table = options.table();
String createTableOptions = options.createTableOptions();
String sql;
if (options.driverClass().equals("org.postgresql.Driver")) {
sql = String.format("CREATE TABLE %s (%s%s) %s", table, strSchema, keyStr, createTableOptions);
} else {
sql = String.format("CREATE TABLE %s (%s%s) DEFAULT CHARSET=utf8mb4 %s", table, strSchema, keyStr, createTableOptions);
}
try (Connection conn = JdbcUtils.createConnectionFactory(options).apply();
Statement statement = conn.createStatement()) {
boolean exists = JdbcUtils.tableExists(conn, options);
if (!exists) {
statement.setQueryTimeout(options.queryTimeout());
statement.executeUpdate(sql);
}
if (Objects.nonNull(consumer)) {
consumer.accept(statement, exists);
}
}
}
use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.
the class Outputs method replace.
/**
***************************************************************************************************************
*/
public static void replace(Dataset<Row> ds, JdbcOptionsInWrite opts, String where) throws SQLException, ExecutionException, SparkException, InvocationTargetException {
if (ds != null) {
/*ds.foreachPartition((it) -> {
new IllegalArgumentException("");
});*/
// throw new UndeclaredThrowableException(new IllegalArgumentException(""));
}
JdbcDialect dialect = JdbcDialects.get(opts.url());
long countBefore;
try (Connection conn = JdbcUtils.createConnectionFactory(opts).apply();
Statement statement = conn.createStatement()) {
if (Objects.isNull(where) || where.equals("1=1")) {
String sql = String.format("truncate table %s", opts.table());
statement.executeUpdate(sql);
System.out.println(sql);
} else {
String sql = String.format("delete from %s where %s", opts.table(), where);
// countBefore -= statement.executeUpdate(sql);
statement.executeUpdate(sql);
System.out.println(sql);
}
}
// ds.write().format("jdbc")
// .mode(SaveMode.Append)
// .options(opts.parameters())
// .save();
StructType schema = ds.schema();
String sql_ = JdbcUtils.getInsertStatement(opts.table(), schema, Option.empty(), true, dialect);
String sql;
if (opts.driverClass().equals("org.postgresql.Driver")) {
sql = sql_;
/*+ " on conflict do update " +
Arrays.stream(schema.fieldNames())
.map(i -> String.format("set %s=excluded.%s", dialect.quoteIdentifier(i), dialect.quoteIdentifier(i)))
.collect(Collectors.joining(","));*/
} else {
sql = sql_ + " on duplicate key update " + Arrays.stream(schema.fieldNames()).map(i -> String.format("%s=values(%s)", dialect.quoteIdentifier(i), dialect.quoteIdentifier(i))).collect(Collectors.joining(","));
}
// ds.cache();
// long countDelta = ds.count();
StringBuilder sb = new StringBuilder();
sb.append(sql_);
String tail = sql_.substring(sql_.indexOf("(?"));
ds.foreachPartition((rows) -> {
String sql2 = "insert into xx() values (?,?),(?,?)...(?,?)";
// Function0<Connection> connectionFactory = JdbcUtils.createConnectionFactory(opts);
// JdbcUtils.savePartition(connectionFactory,
// opts.table(),
// JavaConverters.asScalaIteratorConverter(rows).asScala(),
// schema, sql, 2000,
// dialect, Connection.TRANSACTION_READ_UNCOMMITTED, opts);
Connection conn = JdbcUtils.createConnectionFactory(opts).apply();
conn.setAutoCommit(false);
// PreparedStatement preparedStatement = conn.prepareStatement(sql_);
int numFields = schema.fields().length;
int count = 0;
for (int k = 0; k < 5 - 1; k++) {
sb.append(",").append(tail);
}
PreparedStatement preparedStatement = conn.prepareStatement(sb.toString());
while (rows.hasNext()) {
Row row = rows.next();
int i = 0;
while (i < numFields) {
preparedStatement.setObject((count * numFields) + i + 1, row.get(i));
i++;
}
count++;
if (count % 5 == 0) {
// sb.delete(0, sb.length());
// sb.append(sql_);
preparedStatement.addBatch();
preparedStatement.executeBatch();
count = 0;
}
}
conn.commit();
});
// ds.unpersist();
/*long countAfter;
try (Connection conn = JdbcUtils.createConnectionFactory(opts).apply();
Statement statement = conn.createStatement()) {
ResultSet rs = statement.executeQuery(String.format("select count(0) from `%s`", opts.table()));
Preconditions.checkArgument(rs.next());
countAfter = rs.getLong(1);
}*/
// logger.info("before={},after={},rows={}", countBefore, countAfter, -1);
// if (countBefore + countDelta != countAfter) {
// String msg = String.format("%d+%d!=%d", countBefore, countDelta, countAfter);
// throw new ExecutionException(new IllegalStateException(msg));
// }
}
Aggregations