Search in sources :

Example 1 with Map$

use of scala.collection.immutable.Map$ in project iceberg by apache.

the class SparkTableUtil method getPartitions.

/**
 * Returns all partitions in the table.
 *
 * @param spark a Spark session
 * @param tableIdent a table identifier
 * @param partitionFilter partition filter, or null if no filter
 * @return all table's partitions
 */
public static List<SparkPartition> getPartitions(SparkSession spark, TableIdentifier tableIdent, Map<String, String> partitionFilter) {
    try {
        SessionCatalog catalog = spark.sessionState().catalog();
        CatalogTable catalogTable = catalog.getTableMetadata(tableIdent);
        Option<scala.collection.immutable.Map<String, String>> scalaPartitionFilter;
        if (partitionFilter != null && !partitionFilter.isEmpty()) {
            Builder<Tuple2<String, String>, scala.collection.immutable.Map<String, String>> builder = Map$.MODULE$.<String, String>newBuilder();
            partitionFilter.forEach((key, value) -> builder.$plus$eq(Tuple2.apply(key, value)));
            scalaPartitionFilter = Option.apply(builder.result());
        } else {
            scalaPartitionFilter = Option.empty();
        }
        Seq<CatalogTablePartition> partitions = catalog.listPartitions(tableIdent, scalaPartitionFilter).toIndexedSeq();
        return JavaConverters.seqAsJavaListConverter(partitions).asJava().stream().map(catalogPartition -> toSparkPartition(catalogPartition, catalogTable)).collect(Collectors.toList());
    } catch (NoSuchDatabaseException e) {
        throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Database not found in catalog.", tableIdent);
    } catch (NoSuchTableException e) {
        throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Table not found in catalog.", tableIdent);
    }
}
Also used : Objects(org.apache.iceberg.relocated.com.google.common.base.Objects) Map$(scala.collection.immutable.Map$) LoggerFactory(org.slf4j.LoggerFactory) AppendFiles(org.apache.iceberg.AppendFiles) CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) NameMapping(org.apache.iceberg.mapping.NameMapping) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) PropertyUtil(org.apache.iceberg.util.PropertyUtil) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) Expression(org.apache.spark.sql.catalyst.expressions.Expression) DataFile(org.apache.iceberg.DataFile) SessionCatalog(org.apache.spark.sql.catalyst.catalog.SessionCatalog) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) MapFunction(org.apache.spark.api.java.function.MapFunction) ManifestWriter(org.apache.iceberg.ManifestWriter) Some(scala.Some) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) DataFrameReader(org.apache.spark.sql.DataFrameReader) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) TableIdentifier(org.apache.spark.sql.catalyst.TableIdentifier) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) Util(org.apache.iceberg.hadoop.Util) List(java.util.List) DynMethods(org.apache.iceberg.common.DynMethods) SerializableConfiguration(org.apache.iceberg.hadoop.SerializableConfiguration) PartitionSpec(org.apache.iceberg.PartitionSpec) JavaConverters(scala.collection.JavaConverters) TableProperties(org.apache.iceberg.TableProperties) org.apache.spark.sql.functions.col(org.apache.spark.sql.functions.col) Builder(scala.collection.mutable.Builder) Seq(scala.collection.immutable.Seq) AnalysisException(org.apache.spark.sql.AnalysisException) Dataset(org.apache.spark.sql.Dataset) UnresolvedAttribute(org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) PathFilter(org.apache.hadoop.fs.PathFilter) HadoopFileIO(org.apache.iceberg.hadoop.HadoopFileIO) MapPartitionsFunction(org.apache.spark.api.java.function.MapPartitionsFunction) OutputFile(org.apache.iceberg.io.OutputFile) ParseException(org.apache.spark.sql.catalyst.parser.ParseException) Function2(scala.Function2) ManifestFile(org.apache.iceberg.ManifestFile) ManifestFiles(org.apache.iceberg.ManifestFiles) NoSuchTableException(org.apache.spark.sql.catalyst.analysis.NoSuchTableException) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) CatalogTablePartition(org.apache.spark.sql.catalyst.catalog.CatalogTablePartition) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkSession(org.apache.spark.sql.SparkSession) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) MetricsConfig(org.apache.iceberg.MetricsConfig) Table(org.apache.iceberg.Table) TaskContext(org.apache.spark.TaskContext) Column(org.apache.spark.sql.Column) Maps(org.apache.iceberg.relocated.com.google.common.collect.Maps) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IOException(java.io.IOException) MoreObjects(org.apache.iceberg.relocated.com.google.common.base.MoreObjects) MetadataTableType(org.apache.iceberg.MetadataTableType) Row(org.apache.spark.sql.Row) Option(scala.Option) FileFormat(org.apache.iceberg.FileFormat) NoSuchDatabaseException(org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException) Joiner(org.apache.iceberg.relocated.com.google.common.base.Joiner) Encoders(org.apache.spark.sql.Encoders) TableMigrationUtil(org.apache.iceberg.data.TableMigrationUtil) Tasks(org.apache.iceberg.util.Tasks) Preconditions(org.apache.iceberg.relocated.com.google.common.base.Preconditions) FileIO(org.apache.iceberg.io.FileIO) Collections(java.util.Collections) AbstractPartialFunction(scala.runtime.AbstractPartialFunction) CatalogTablePartition(org.apache.spark.sql.catalyst.catalog.CatalogTablePartition) NoSuchTableException(org.apache.spark.sql.catalyst.analysis.NoSuchTableException) SessionCatalog(org.apache.spark.sql.catalyst.catalog.SessionCatalog) CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) NoSuchDatabaseException(org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException) Tuple2(scala.Tuple2) Map(java.util.Map)

Example 2 with Map$

use of scala.collection.immutable.Map$ in project iceberg by apache.

the class SparkTableUtil method getPartitionsByFilter.

/**
 * Returns partitions that match the specified 'predicate'.
 *
 * @param spark a Spark session
 * @param tableIdent a table identifier
 * @param predicateExpr a predicate expression on partition columns
 * @return matching table's partitions
 */
public static List<SparkPartition> getPartitionsByFilter(SparkSession spark, TableIdentifier tableIdent, Expression predicateExpr) {
    try {
        SessionCatalog catalog = spark.sessionState().catalog();
        CatalogTable catalogTable = catalog.getTableMetadata(tableIdent);
        Expression resolvedPredicateExpr;
        if (!predicateExpr.resolved()) {
            resolvedPredicateExpr = resolveAttrs(spark, tableIdent.quotedString(), predicateExpr);
        } else {
            resolvedPredicateExpr = predicateExpr;
        }
        Seq<Expression> predicates = JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(resolvedPredicateExpr)).asScala().toIndexedSeq();
        Seq<CatalogTablePartition> partitions = catalog.listPartitionsByFilter(tableIdent, predicates).toIndexedSeq();
        return JavaConverters.seqAsJavaListConverter(partitions).asJava().stream().map(catalogPartition -> toSparkPartition(catalogPartition, catalogTable)).collect(Collectors.toList());
    } catch (NoSuchDatabaseException e) {
        throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Database not found in catalog.", tableIdent);
    } catch (NoSuchTableException e) {
        throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Table not found in catalog.", tableIdent);
    }
}
Also used : Objects(org.apache.iceberg.relocated.com.google.common.base.Objects) Map$(scala.collection.immutable.Map$) LoggerFactory(org.slf4j.LoggerFactory) AppendFiles(org.apache.iceberg.AppendFiles) CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) NameMapping(org.apache.iceberg.mapping.NameMapping) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) PropertyUtil(org.apache.iceberg.util.PropertyUtil) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) Expression(org.apache.spark.sql.catalyst.expressions.Expression) DataFile(org.apache.iceberg.DataFile) SessionCatalog(org.apache.spark.sql.catalyst.catalog.SessionCatalog) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) MapFunction(org.apache.spark.api.java.function.MapFunction) ManifestWriter(org.apache.iceberg.ManifestWriter) Some(scala.Some) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) DataFrameReader(org.apache.spark.sql.DataFrameReader) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) TableIdentifier(org.apache.spark.sql.catalyst.TableIdentifier) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) Util(org.apache.iceberg.hadoop.Util) List(java.util.List) DynMethods(org.apache.iceberg.common.DynMethods) SerializableConfiguration(org.apache.iceberg.hadoop.SerializableConfiguration) PartitionSpec(org.apache.iceberg.PartitionSpec) JavaConverters(scala.collection.JavaConverters) TableProperties(org.apache.iceberg.TableProperties) org.apache.spark.sql.functions.col(org.apache.spark.sql.functions.col) Builder(scala.collection.mutable.Builder) Seq(scala.collection.immutable.Seq) AnalysisException(org.apache.spark.sql.AnalysisException) Dataset(org.apache.spark.sql.Dataset) UnresolvedAttribute(org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) PathFilter(org.apache.hadoop.fs.PathFilter) HadoopFileIO(org.apache.iceberg.hadoop.HadoopFileIO) MapPartitionsFunction(org.apache.spark.api.java.function.MapPartitionsFunction) OutputFile(org.apache.iceberg.io.OutputFile) ParseException(org.apache.spark.sql.catalyst.parser.ParseException) Function2(scala.Function2) ManifestFile(org.apache.iceberg.ManifestFile) ManifestFiles(org.apache.iceberg.ManifestFiles) NoSuchTableException(org.apache.spark.sql.catalyst.analysis.NoSuchTableException) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) CatalogTablePartition(org.apache.spark.sql.catalyst.catalog.CatalogTablePartition) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkSession(org.apache.spark.sql.SparkSession) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) MetricsConfig(org.apache.iceberg.MetricsConfig) Table(org.apache.iceberg.Table) TaskContext(org.apache.spark.TaskContext) Column(org.apache.spark.sql.Column) Maps(org.apache.iceberg.relocated.com.google.common.collect.Maps) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IOException(java.io.IOException) MoreObjects(org.apache.iceberg.relocated.com.google.common.base.MoreObjects) MetadataTableType(org.apache.iceberg.MetadataTableType) Row(org.apache.spark.sql.Row) Option(scala.Option) FileFormat(org.apache.iceberg.FileFormat) NoSuchDatabaseException(org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException) Joiner(org.apache.iceberg.relocated.com.google.common.base.Joiner) Encoders(org.apache.spark.sql.Encoders) TableMigrationUtil(org.apache.iceberg.data.TableMigrationUtil) Tasks(org.apache.iceberg.util.Tasks) Preconditions(org.apache.iceberg.relocated.com.google.common.base.Preconditions) FileIO(org.apache.iceberg.io.FileIO) Collections(java.util.Collections) AbstractPartialFunction(scala.runtime.AbstractPartialFunction) NoSuchDatabaseException(org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException) Expression(org.apache.spark.sql.catalyst.expressions.Expression) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) CatalogTablePartition(org.apache.spark.sql.catalyst.catalog.CatalogTablePartition) NoSuchTableException(org.apache.spark.sql.catalyst.analysis.NoSuchTableException) SessionCatalog(org.apache.spark.sql.catalyst.catalog.SessionCatalog) CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable)

Example 3 with Map$

use of scala.collection.immutable.Map$ in project secor by pinterest.

the class OstrichAdminService method start.

public void start() {
    Duration[] defaultLatchIntervals = { Duration.apply(1, TimeUnit.MINUTES) };
    Map<String, CustomHttpHandler> handlers = mPrometheusEnabled ? new Map.Map1<>("/prometheus", new PrometheusHandler()) : Map$.MODULE$.empty();
    @SuppressWarnings("deprecation") AdminServiceFactory adminServiceFactory = new AdminServiceFactory(this.mPort, 20, List$.MODULE$.<StatsFactory>empty(), Option.<String>empty(), List$.MODULE$.<Regex>empty(), handlers, JavaConversions.asScalaBuffer(Arrays.asList(defaultLatchIntervals)).toList());
    RuntimeEnvironment runtimeEnvironment = new RuntimeEnvironment(this);
    adminServiceFactory.apply(runtimeEnvironment);
    try {
        Properties properties = new Properties();
        properties.load(this.getClass().getResource("build.properties").openStream());
        String buildRevision = properties.getProperty("build_revision", "unknown");
        LOG.info("build.properties build_revision: {}", properties.getProperty("build_revision", "unknown"));
        StatsUtil.setLabel("secor.build_revision", buildRevision);
    } catch (Throwable t) {
        LOG.error("Failed to load properties from build.properties", t);
    }
}
Also used : RuntimeEnvironment(com.twitter.ostrich.admin.RuntimeEnvironment) Duration(com.twitter.util.Duration) Properties(java.util.Properties) CustomHttpHandler(com.twitter.ostrich.admin.CustomHttpHandler) AdminServiceFactory(com.twitter.ostrich.admin.AdminServiceFactory) Map(scala.collection.immutable.Map)

Example 4 with Map$

use of scala.collection.immutable.Map$ in project parent by Daytime-Don-t-Know-Dark-Night.

the class Jdbcs method onDuplicateUpdate.

public static String onDuplicateUpdate(StructType schema, String sql2, String... exclude) {
    Set<String> excludeSet = Arrays.stream(exclude).collect(Collectors.toSet());
    Iterable<String> cols1 = Splitter.on(",").trimResults().omitEmptyStrings().split(sql2);
    Pattern pat = Pattern.compile("([^=]+)=[^=]+$");
    Streams.stream(cols1).forEach(i -> {
        Matcher matcher = pat.matcher(i);
        Preconditions.checkArgument(matcher.matches(), "format error in " + i);
        excludeSet.add(CharMatcher.anyOf("`").trimFrom(matcher.group(1)));
    });
    List<String> cols2 = Arrays.stream(schema.fields()).filter(i -> !excludeSet.contains(i.name())).map(i -> String.format("`%s`=values(`%s`)", i.name(), i.name())).collect(Collectors.toList());
    Preconditions.checkArgument(Iterables.size(cols1) + cols2.size() + exclude.length == schema.size());
    String cols = Joiner.on(",").join(Iterables.concat(cols1, cols2));
    return "on duplicate key update " + cols;
}
Also used : java.sql(java.sql) java.util(java.util) Dataset(org.apache.spark.sql.Dataset) Map$(scala.collection.immutable.Map$) LoggerFactory(org.slf4j.LoggerFactory) Function0(scala.Function0) JdbcOptionsInWrite(org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite) Matcher(java.util.regex.Matcher) StreamSupport(java.util.stream.StreamSupport) JDBCOptions(org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions) com.google.common.collect(com.google.common.collect) StructType(org.apache.spark.sql.types.StructType) Logger(org.slf4j.Logger) JdbcUtils(org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils) com.google.common.base(com.google.common.base) Seq(scala.collection.Seq) Row(org.apache.spark.sql.Row) Option(scala.Option) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) Stream(java.util.stream.Stream) JavaConverters(scala.collection.JavaConverters) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher)

Example 5 with Map$

use of scala.collection.immutable.Map$ in project kylo by Teradata.

the class JdbcRelationProviderTest method testWithClassLoader.

/**
 * Verify creating a JdbcRelation using a custom class loader.
 */
@Test
@SuppressWarnings("unchecked")
public void testWithClassLoader() {
    // Create parameters map
    final Map<String, String> parameters = (Map<String, String>) Map$.MODULE$.<String, String>newBuilder().$plus$eq(new Tuple2<>("dbtable", "mytable")).$plus$eq(new Tuple2<>("url", "jdbc:h2:mem:spark")).result();
    // Test creating a JDBC relation
    final DataSourceResourceLoader classLoader = new DataSourceResourceLoader(Mockito.mock(SparkContext.class), Thread.currentThread().getContextClassLoader());
    final AtomicReference<JdbcRelation> relation = new AtomicReference<>();
    classLoader.runWithThreadContext(new Runnable() {

        @Override
        public void run() {
            try (final Connection conn = DriverManager.getConnection("jdbc:h2:mem:spark");
                final Statement stmt = conn.createStatement()) {
                stmt.execute("CREATE TABLE mytable (col1 VARCHAR)");
                final JdbcRelationProvider provider = new JdbcRelationProvider();
                relation.set((JdbcRelation) provider.createRelation(Mockito.mock(SQLContext.class), parameters));
            } catch (final Exception e) {
                Throwables.propagate(e);
            }
        }
    });
    Assert.assertNotNull("Expected relation to be created", relation.get());
    Assert.assertEquals(JDBCRelation.class, relation.get().getDelegate().getClass());
    Assert.assertEquals(classLoader, relation.get().getLoader());
}
Also used : DataSourceResourceLoader(com.thinkbiganalytics.kylo.catalog.spark.DataSourceResourceLoader) Statement(java.sql.Statement) Connection(java.sql.Connection) AtomicReference(java.util.concurrent.atomic.AtomicReference) SparkContext(org.apache.spark.SparkContext) Tuple2(scala.Tuple2) Map(scala.collection.immutable.Map) Test(org.junit.Test)

Aggregations

Tuple2 (scala.Tuple2)6 Collectors (java.util.stream.Collectors)5 Map$ (scala.collection.immutable.Map$)5 IOException (java.io.IOException)4 URI (java.net.URI)4 List (java.util.List)4 Serializable (java.io.Serializable)3 Collections (java.util.Collections)3 Iterator (java.util.Iterator)3 Map (java.util.Map)3 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 PathFilter (org.apache.hadoop.fs.PathFilter)3 AppendFiles (org.apache.iceberg.AppendFiles)3 DataFile (org.apache.iceberg.DataFile)3 FileFormat (org.apache.iceberg.FileFormat)3 ManifestFile (org.apache.iceberg.ManifestFile)3 ManifestFiles (org.apache.iceberg.ManifestFiles)3 Dataset (org.apache.spark.sql.Dataset)3 Row (org.apache.spark.sql.Row)3