Search in sources :

Example 1 with JdbcOptionsInWrite

use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.

the class ImportMySQL method main.

public static void main(String[] args) throws SQLException, SparkException, ExecutionException, InvocationTargetException {
    SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
    StructType schema = new StructType().add("date", "string").add("time", "timestamp").add("amount", "double").add("pay_no", "long");
    List<Row> list = Lists.newArrayList();
    for (int i = 0; i < 10; i++) {
        UUID uuid = UUID.randomUUID();
        long id = -1 * uuid.getLeastSignificantBits();
        double randomAmount = Math.random() * 10;
        double amount = Double.parseDouble(String.format("%.2f", randomAmount));
        Row row = RowFactory.create(LocalDate.now().toString(), Timestamp.valueOf(LocalDateTime.now()), amount, id);
        list.add(row);
    }
    Dataset<Row> ds = spark.createDataFrame(list, schema);
    ds.show(false);
    String uri = "jdbc:mysql://localhost:3306/test_boluo?characterEncoding=UTF-8&serverTimezone=GMT%2B8&rewriteBatchedStatements=true&user=root&password=root";
    Map<String, String> props = new Hashtable<>();
    props.put(JDBCOptions.JDBC_DRIVER_CLASS(), "com.mysql.cj.jdbc.Driver");
    props.put(JDBCOptions.JDBC_TABLE_NAME(), "table_name");
    props.put(JdbcOptionsInWrite.JDBC_CREATE_TABLE_COLUMN_TYPES(), String.join(",", "date varchar(64)"));
    Seq<Tuple2<String, String>> tuple2Seq = JavaConverters.mapAsScalaMapConverter(props).asScala().toSeq();
    JdbcOptionsInWrite opt = new JdbcOptionsInWrite((scala.collection.immutable.Map<String, String>) Map$.MODULE$.apply(tuple2Seq));
    // TODO create table
    MySQL.replace(ds, opt, "1=1");
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) StructType(org.apache.spark.sql.types.StructType) Hashtable(java.util.Hashtable) Tuple2(scala.Tuple2) Row(org.apache.spark.sql.Row) UUID(java.util.UUID) JdbcOptionsInWrite(org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite)

Example 2 with JdbcOptionsInWrite

use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.

the class Jdbcs method execute.

@SuppressWarnings("unchecked")
public static void execute(String uri, String sql, String... sql2) throws SQLException {
    JdbcOptionsInWrite options_ = options(uri, "t");
    try (Connection conn = JdbcUtils.createConnectionFactory(options_).apply()) {
        conn.setAutoCommit(false);
        try (Statement statement = conn.createStatement()) {
            statement.execute(sql);
            for (String s : sql2) {
                statement.execute(s);
            }
        }
        conn.commit();
    }
}
Also used : JdbcOptionsInWrite(org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite)

Example 3 with JdbcOptionsInWrite

use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.

the class InsertMySQL method main.

public static void main(String[] args) throws Exception {
    SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
    String uri = "jdbc:mysql://local.boluo.com/test_boluo?characterEncoding=UTF-8&serverTimezone=GMT%2B8&rewriteBatchedStatements=true&user=root&password=123";
    String filterPath = "file:///D:/filter";
    StructType schema = new StructType().add("name", "string");
    List<Row> list = Lists.newArrayList();
    list.add(RowFactory.create("1"));
    list.add(RowFactory.create("2"));
    list.add(RowFactory.create("3"));
    list.add(RowFactory.create("4"));
    list.add(RowFactory.create("5"));
    // Dataset<Row> ds = spark.read().format("delta").load(filterPath);
    Dataset<Row> ds = spark.createDataFrame(list, schema);
    long startTime = System.currentTimeMillis();
    String name = "测试一百万条数据";
    JdbcOptionsInWrite opt = Jdbcs.options(uri, name, "name varchar(64)");
    // Outputs.createTable(ds, opt, "KEY(name)");
    Outputs.replace(ds, opt, "1=1");
    long endTime = System.currentTimeMillis();
    System.out.println("耗时: " + (endTime - startTime));
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) StructType(org.apache.spark.sql.types.StructType) Row(org.apache.spark.sql.Row) JdbcOptionsInWrite(org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite)

Example 4 with JdbcOptionsInWrite

use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.

the class Outputs method createTable.

public static void createTable(Dataset<Row> rows, JdbcOptionsInWrite options, String keys, Jdbcs.ConnectConsumer consumer) throws Exception {
    JdbcDialect dialect = JdbcDialects.get(options.url());
    String keyStr = "";
    if (!Strings.isNullOrEmpty(keys)) {
        Pattern patternKey = Pattern.compile("(primary\\s+)?key\\s*\\([^)]*\\)", Pattern.CASE_INSENSITIVE);
        Matcher matcher = patternKey.matcher(keys);
        if (matcher.find()) {
            keyStr = "," + keys;
        } else {
            keyStr = ",PRIMARY KEY(" + Streams.stream(Splitter.on(",").trimResults().omitEmptyStrings().split(keys)).map(i -> CharMatcher.anyOf("`").trimFrom(i)).map(i -> String.format("`%s`", i)).collect(Collectors.joining(",")) + ")";
        }
    }
    // 自定义字段类型
    String strSchema = JdbcUtils.schemaString(rows, options.url(), Option.empty());
    if (!options.createTableColumnTypes().isEmpty()) {
        Pattern pat = Pattern.compile("\\s*`?\"?(.+?)`?\"?\\s+(.+)");
        Map<String, String> collect1 = Arrays.stream(strSchema.split(" , ")).collect(Collectors.toMap(i -> {
            Matcher matcher = pat.matcher(i);
            Preconditions.checkArgument(matcher.matches(), i);
            return dialect.quoteIdentifier(matcher.group(1));
        }, i -> i));
        Arrays.stream(options.createTableColumnTypes().get().split(",")).forEach(i -> {
            Matcher matcher = pat.matcher(i);
            Preconditions.checkArgument(matcher.matches());
            String k = dialect.quoteIdentifier(matcher.group(1));
            String t = matcher.group(2);
            Preconditions.checkArgument(collect1.containsKey(k), String.format("%s not in %s", k, collect1));
            collect1.put(k, k + " " + t);
        });
        strSchema = Joiner.on(",").join(collect1.values());
    }
    String table = options.table();
    String createTableOptions = options.createTableOptions();
    String sql;
    if (options.driverClass().equals("org.postgresql.Driver")) {
        sql = String.format("CREATE TABLE %s (%s%s) %s", table, strSchema, keyStr, createTableOptions);
    } else {
        sql = String.format("CREATE TABLE %s (%s%s) DEFAULT CHARSET=utf8mb4 %s", table, strSchema, keyStr, createTableOptions);
    }
    try (Connection conn = JdbcUtils.createConnectionFactory(options).apply();
        Statement statement = conn.createStatement()) {
        boolean exists = JdbcUtils.tableExists(conn, options);
        if (!exists) {
            statement.setQueryTimeout(options.queryTimeout());
            statement.executeUpdate(sql);
        }
        if (Objects.nonNull(consumer)) {
            consumer.accept(statement, exists);
        }
    }
}
Also used : java.sql(java.sql) Iterables(com.google.common.collect.Iterables) JdbcDialect(org.apache.spark.sql.jdbc.JdbcDialect) Arrays(java.util.Arrays) URISyntaxException(java.net.URISyntaxException) LoggerFactory(org.slf4j.LoggerFactory) Function0(scala.Function0) JdbcOptionsInWrite(org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite) Lists(com.google.common.collect.Lists) Matcher(java.util.regex.Matcher) Map(java.util.Map) JDBCOptions(org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions) URI(java.net.URI) SparkException(org.apache.spark.SparkException) StructType(org.apache.spark.sql.types.StructType) Logger(org.slf4j.Logger) JdbcUtils(org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils) com.google.common.base(com.google.common.base) IOException(java.io.IOException) Streams(com.google.common.collect.Streams) Option(scala.Option) Tuple2(scala.Tuple2) Instant(java.time.Instant) Maps(com.google.common.collect.Maps) DeltaTable(io.delta.tables.DeltaTable) Collectors(java.util.stream.Collectors) InvocationTargetException(java.lang.reflect.InvocationTargetException) Objects(java.util.Objects) ExecutionException(java.util.concurrent.ExecutionException) List(java.util.List) org.apache.spark.sql(org.apache.spark.sql) JavaConverters(scala.collection.JavaConverters) Pattern(java.util.regex.Pattern) JdbcDialects(org.apache.spark.sql.jdbc.JdbcDialects) Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) JdbcDialect(org.apache.spark.sql.jdbc.JdbcDialect)

Example 5 with JdbcOptionsInWrite

use of org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite in project parent by Daytime-Don-t-Know-Dark-Night.

the class Outputs method replace.

/**
 ***************************************************************************************************************
 */
public static void replace(Dataset<Row> ds, JdbcOptionsInWrite opts, String where) throws SQLException, ExecutionException, SparkException, InvocationTargetException {
    if (ds != null) {
    /*ds.foreachPartition((it) -> {
                new IllegalArgumentException("");
            });*/
    // throw new UndeclaredThrowableException(new IllegalArgumentException(""));
    }
    JdbcDialect dialect = JdbcDialects.get(opts.url());
    long countBefore;
    try (Connection conn = JdbcUtils.createConnectionFactory(opts).apply();
        Statement statement = conn.createStatement()) {
        if (Objects.isNull(where) || where.equals("1=1")) {
            String sql = String.format("truncate table %s", opts.table());
            statement.executeUpdate(sql);
            System.out.println(sql);
        } else {
            String sql = String.format("delete from %s where %s", opts.table(), where);
            // countBefore -= statement.executeUpdate(sql);
            statement.executeUpdate(sql);
            System.out.println(sql);
        }
    }
    // ds.write().format("jdbc")
    // .mode(SaveMode.Append)
    // .options(opts.parameters())
    // .save();
    StructType schema = ds.schema();
    String sql_ = JdbcUtils.getInsertStatement(opts.table(), schema, Option.empty(), true, dialect);
    String sql;
    if (opts.driverClass().equals("org.postgresql.Driver")) {
        sql = sql_;
    /*+ " on conflict do update " +
                    Arrays.stream(schema.fieldNames())
                            .map(i -> String.format("set %s=excluded.%s", dialect.quoteIdentifier(i), dialect.quoteIdentifier(i)))
                            .collect(Collectors.joining(","));*/
    } else {
        sql = sql_ + " on duplicate key update " + Arrays.stream(schema.fieldNames()).map(i -> String.format("%s=values(%s)", dialect.quoteIdentifier(i), dialect.quoteIdentifier(i))).collect(Collectors.joining(","));
    }
    // ds.cache();
    // long countDelta = ds.count();
    StringBuilder sb = new StringBuilder();
    sb.append(sql_);
    String tail = sql_.substring(sql_.indexOf("(?"));
    ds.foreachPartition((rows) -> {
        String sql2 = "insert into xx() values (?,?),(?,?)...(?,?)";
        // Function0<Connection> connectionFactory = JdbcUtils.createConnectionFactory(opts);
        // JdbcUtils.savePartition(connectionFactory,
        // opts.table(),
        // JavaConverters.asScalaIteratorConverter(rows).asScala(),
        // schema, sql, 2000,
        // dialect, Connection.TRANSACTION_READ_UNCOMMITTED, opts);
        Connection conn = JdbcUtils.createConnectionFactory(opts).apply();
        conn.setAutoCommit(false);
        // PreparedStatement preparedStatement = conn.prepareStatement(sql_);
        int numFields = schema.fields().length;
        int count = 0;
        for (int k = 0; k < 5 - 1; k++) {
            sb.append(",").append(tail);
        }
        PreparedStatement preparedStatement = conn.prepareStatement(sb.toString());
        while (rows.hasNext()) {
            Row row = rows.next();
            int i = 0;
            while (i < numFields) {
                preparedStatement.setObject((count * numFields) + i + 1, row.get(i));
                i++;
            }
            count++;
            if (count % 5 == 0) {
                // sb.delete(0, sb.length());
                // sb.append(sql_);
                preparedStatement.addBatch();
                preparedStatement.executeBatch();
                count = 0;
            }
        }
        conn.commit();
    });
// ds.unpersist();
/*long countAfter;
        try (Connection conn = JdbcUtils.createConnectionFactory(opts).apply();
             Statement statement = conn.createStatement()) {
            ResultSet rs = statement.executeQuery(String.format("select count(0) from `%s`", opts.table()));
            Preconditions.checkArgument(rs.next());
            countAfter = rs.getLong(1);
        }*/
// logger.info("before={},after={},rows={}", countBefore, countAfter, -1);
// if (countBefore + countDelta != countAfter) {
// String msg = String.format("%d+%d!=%d", countBefore, countDelta, countAfter);
// throw new ExecutionException(new IllegalStateException(msg));
// }
}
Also used : java.sql(java.sql) Iterables(com.google.common.collect.Iterables) JdbcDialect(org.apache.spark.sql.jdbc.JdbcDialect) Arrays(java.util.Arrays) URISyntaxException(java.net.URISyntaxException) LoggerFactory(org.slf4j.LoggerFactory) Function0(scala.Function0) JdbcOptionsInWrite(org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite) Lists(com.google.common.collect.Lists) Matcher(java.util.regex.Matcher) Map(java.util.Map) JDBCOptions(org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions) URI(java.net.URI) SparkException(org.apache.spark.SparkException) StructType(org.apache.spark.sql.types.StructType) Logger(org.slf4j.Logger) JdbcUtils(org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils) com.google.common.base(com.google.common.base) IOException(java.io.IOException) Streams(com.google.common.collect.Streams) Option(scala.Option) Tuple2(scala.Tuple2) Instant(java.time.Instant) Maps(com.google.common.collect.Maps) DeltaTable(io.delta.tables.DeltaTable) Collectors(java.util.stream.Collectors) InvocationTargetException(java.lang.reflect.InvocationTargetException) Objects(java.util.Objects) ExecutionException(java.util.concurrent.ExecutionException) List(java.util.List) org.apache.spark.sql(org.apache.spark.sql) JavaConverters(scala.collection.JavaConverters) Pattern(java.util.regex.Pattern) JdbcDialects(org.apache.spark.sql.jdbc.JdbcDialects) StructType(org.apache.spark.sql.types.StructType) JdbcDialect(org.apache.spark.sql.jdbc.JdbcDialect)

Aggregations

JdbcOptionsInWrite (org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite)7 StructType (org.apache.spark.sql.types.StructType)6 Row (org.apache.spark.sql.Row)4 SparkSession (org.apache.spark.sql.SparkSession)4 Maps (com.google.common.collect.Maps)3 InvocationTargetException (java.lang.reflect.InvocationTargetException)3 java.sql (java.sql)3 Instant (java.time.Instant)3 List (java.util.List)3 Map (java.util.Map)3 Objects (java.util.Objects)3 ExecutionException (java.util.concurrent.ExecutionException)3 SparkException (org.apache.spark.SparkException)3 JdbcUtils (org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils)3 JdbcDialect (org.apache.spark.sql.jdbc.JdbcDialect)3 JdbcDialects (org.apache.spark.sql.jdbc.JdbcDialects)3 Logger (org.slf4j.Logger)3 LoggerFactory (org.slf4j.LoggerFactory)3 Option (scala.Option)3 Tuple2 (scala.Tuple2)3