Search in sources :

Example 1 with SparkSession$

use of org.apache.spark.sql.SparkSession$ in project Taier by DTStack.

the class SqlProxy method runJob.

public void runJob(String submitSql, String appName, String logLevel, SparkConf conf) {
    if (appName == null) {
        appName = DEFAULT_APP_NAME;
    }
    SparkSession spark = SparkSession.builder().config(conf).appName(appName).enableHiveSupport().getOrCreate();
    setLogLevel(spark, logLevel);
    // 解压sql
    String unzipSql = ZipUtil.unzip(submitSql);
    // 屏蔽引号内的 分号
    Splitter splitter = new Splitter(';');
    List<String> sqlArray = splitter.splitEscaped(unzipSql);
    for (String sql : sqlArray) {
        if (sql == null || sql.trim().length() == 0) {
            continue;
        }
        logger.info("processed sql statement {}", sql);
        spark.sql(sql);
    }
    spark.close();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Splitter(com.dtstack.taier.base.util.Splitter)

Example 2 with SparkSession$

use of org.apache.spark.sql.SparkSession$ in project coral by linkedin.

the class TransportableUDFMapTest method testScalaVersionWithSparkSession.

@Test
public void testScalaVersionWithSparkSession() {
    SparkSession ss = SparkSession.builder().appName(TransportableUDFMapTest.class.getSimpleName()).master("local[1]").enableHiveSupport().getOrCreate();
    Assert.assertEquals(TransportableUDFMap.getScalaVersion(), TransportableUDFMap.ScalaVersion.SCALA_2_11);
    ss.close();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Test(org.testng.annotations.Test)

Example 3 with SparkSession$

use of org.apache.spark.sql.SparkSession$ in project iceberg by apache.

the class TestSparkSchema method stopSpark.

@AfterClass
public static void stopSpark() {
    SparkSession currentSpark = TestSparkSchema.spark;
    TestSparkSchema.spark = null;
    currentSpark.stop();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) AfterClass(org.junit.AfterClass)

Example 4 with SparkSession$

use of org.apache.spark.sql.SparkSession$ in project iceberg by apache.

the class TestNameMappingProjection method stopSpark.

@AfterClass
public static void stopSpark() {
    SparkSession currentSpark = TestNameMappingProjection.spark;
    // Stop the spark session.
    TestNameMappingProjection.spark = null;
    currentSpark.stop();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) AfterClass(org.junit.AfterClass)

Example 5 with SparkSession$

use of org.apache.spark.sql.SparkSession$ in project iceberg by apache.

the class Spark3Util method getPartitions.

/**
 * Use Spark to list all partitions in the table.
 *
 * @param spark a Spark session
 * @param rootPath a table identifier
 * @param format format of the file
 * @return all table's partitions
 */
public static List<SparkPartition> getPartitions(SparkSession spark, Path rootPath, String format) {
    FileStatusCache fileStatusCache = FileStatusCache.getOrCreate(spark);
    Map<String, String> emptyMap = Collections.emptyMap();
    InMemoryFileIndex fileIndex = new InMemoryFileIndex(spark, JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(rootPath)).asScala().toSeq(), JavaConverters.mapAsScalaMapConverter(emptyMap).asScala().toMap(Predef.conforms()), Option.empty(), fileStatusCache, Option.empty(), Option.empty());
    org.apache.spark.sql.execution.datasources.PartitionSpec spec = fileIndex.partitionSpec();
    StructType schema = spec.partitionColumns();
    return JavaConverters.seqAsJavaListConverter(spec.partitions()).asJava().stream().map(partition -> {
        Map<String, String> values = Maps.newHashMap();
        JavaConverters.asJavaIterableConverter(schema).asJava().forEach(field -> {
            int fieldIndex = schema.fieldIndex(field.name());
            Object catalystValue = partition.values().get(fieldIndex, field.dataType());
            Object value = CatalystTypeConverters.convertToScala(catalystValue, field.dataType());
            values.put(field.name(), String.valueOf(value));
        });
        return new SparkPartition(values, partition.path().toString(), format);
    }).collect(Collectors.toList());
}
Also used : FileStatusCache(org.apache.spark.sql.execution.datasources.FileStatusCache) WRITE_DISTRIBUTION_MODE_RANGE(org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_RANGE) Distributions(org.apache.spark.sql.connector.iceberg.distributions.Distributions) Arrays(java.util.Arrays) DataSourceV2Relation(org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation) TypeUtil(org.apache.iceberg.types.TypeUtil) Types(org.apache.iceberg.types.Types) MetadataTableUtils(org.apache.iceberg.MetadataTableUtils) UpdateSchema(org.apache.iceberg.UpdateSchema) PartitionSpecVisitor(org.apache.iceberg.transforms.PartitionSpecVisitor) ByteBuffer(java.nio.ByteBuffer) TableOperations(org.apache.iceberg.TableOperations) TableCatalog(org.apache.spark.sql.connector.catalog.TableCatalog) SortOrder(org.apache.spark.sql.connector.iceberg.expressions.SortOrder) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) StructType(org.apache.spark.sql.types.StructType) Some(scala.Some) Term(org.apache.iceberg.expressions.Term) IntegerType(org.apache.spark.sql.types.IntegerType) Seq(scala.collection.Seq) SortOrderVisitor(org.apache.iceberg.transforms.SortOrderVisitor) Set(java.util.Set) LongType(org.apache.spark.sql.types.LongType) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) WRITE_DISTRIBUTION_MODE(org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) Type(org.apache.iceberg.types.Type) List(java.util.List) UpdateProperties(org.apache.iceberg.UpdateProperties) ExpressionVisitors(org.apache.iceberg.expressions.ExpressionVisitors) OrderedDistribution(org.apache.spark.sql.connector.iceberg.distributions.OrderedDistribution) Expressions(org.apache.spark.sql.connector.expressions.Expressions) DistributionMode(org.apache.iceberg.DistributionMode) PartitionSpec(org.apache.iceberg.PartitionSpec) JavaConverters(scala.collection.JavaConverters) TableProperties(org.apache.iceberg.TableProperties) Transform(org.apache.spark.sql.connector.expressions.Transform) ImmutableSet(org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet) Dataset(org.apache.spark.sql.Dataset) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) TableChange(org.apache.spark.sql.connector.catalog.TableChange) Pair(org.apache.iceberg.util.Pair) SortOrderUtil(org.apache.iceberg.util.SortOrderUtil) ParseException(org.apache.spark.sql.catalyst.parser.ParseException) BoundPredicate(org.apache.iceberg.expressions.BoundPredicate) InMemoryFileIndex(org.apache.spark.sql.execution.datasources.InMemoryFileIndex) Predef(scala.Predef) SparkPartition(org.apache.iceberg.spark.SparkTableUtil.SparkPartition) NoSuchTableException(org.apache.spark.sql.catalyst.analysis.NoSuchTableException) NullOrder(org.apache.iceberg.NullOrder) Namespace(org.apache.iceberg.catalog.Namespace) SparkSession(org.apache.spark.sql.SparkSession) CatalystTypeConverters(org.apache.spark.sql.catalyst.CatalystTypeConverters) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Literal(org.apache.spark.sql.connector.expressions.Literal) Maps(org.apache.iceberg.relocated.com.google.common.collect.Maps) MetadataTableType(org.apache.iceberg.MetadataTableType) Row(org.apache.spark.sql.Row) Option(scala.Option) Joiner(org.apache.iceberg.relocated.com.google.common.base.Joiner) Distribution(org.apache.spark.sql.connector.iceberg.distributions.Distribution) Expression(org.apache.spark.sql.connector.expressions.Expression) CatalogPlugin(org.apache.spark.sql.connector.catalog.CatalogPlugin) Preconditions(org.apache.iceberg.relocated.com.google.common.base.Preconditions) UnboundPredicate(org.apache.iceberg.expressions.UnboundPredicate) Identifier(org.apache.spark.sql.connector.catalog.Identifier) ParserInterface(org.apache.spark.sql.catalyst.parser.ParserInterface) WRITE_DISTRIBUTION_MODE_NONE(org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE_NONE) Collections(java.util.Collections) SparkTable(org.apache.iceberg.spark.source.SparkTable) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) CatalogManager(org.apache.spark.sql.connector.catalog.CatalogManager) Table(org.apache.spark.sql.connector.catalog.Table) StructType(org.apache.spark.sql.types.StructType) InMemoryFileIndex(org.apache.spark.sql.execution.datasources.InMemoryFileIndex) SparkPartition(org.apache.iceberg.spark.SparkTableUtil.SparkPartition) Map(java.util.Map) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) FileStatusCache(org.apache.spark.sql.execution.datasources.FileStatusCache)

Aggregations

SparkSession (org.apache.spark.sql.SparkSession)393 Row (org.apache.spark.sql.Row)223 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)64 StructType (org.apache.spark.sql.types.StructType)61 ArrayList (java.util.ArrayList)60 Test (org.junit.jupiter.api.Test)42 List (java.util.List)37 Dataset (org.apache.spark.sql.Dataset)34 StructField (org.apache.spark.sql.types.StructField)34 IOException (java.io.IOException)28 Tuple2 (scala.Tuple2)26 JavaRDD (org.apache.spark.api.java.JavaRDD)25 Collectors (java.util.stream.Collectors)24 Path (org.apache.hadoop.fs.Path)24 Arrays (java.util.Arrays)20 HashSet (java.util.HashSet)20 Map (java.util.Map)20 SparkConf (org.apache.spark.SparkConf)20 Schema (uk.gov.gchq.gaffer.store.schema.Schema)19 Configuration (org.apache.hadoop.conf.Configuration)18