Search in sources :

Example 11 with CaseInsensitiveStringMap

use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.

the class TestPathIdentifier method before.

@Before
public void before() throws IOException {
    tableLocation = temp.newFolder();
    identifier = new PathIdentifier(tableLocation.getAbsolutePath());
    sparkCatalog = new SparkCatalog();
    sparkCatalog.initialize("test", new CaseInsensitiveStringMap(ImmutableMap.of()));
}
Also used : SparkCatalog(org.apache.iceberg.spark.SparkCatalog) PathIdentifier(org.apache.iceberg.spark.PathIdentifier) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) Before(org.junit.Before)

Example 12 with CaseInsensitiveStringMap

use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.

the class TestFilteredScan method testPartitionedByDataNotStartsWithFilter.

@Test
public void testPartitionedByDataNotStartsWithFilter() {
    Table table = buildPartitionedTable("partitioned_by_data", PARTITION_BY_DATA, "data_ident", "data");
    CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
    SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
    pushFilters(builder, new Not(new StringStartsWith("data", "junc")));
    Batch scan = builder.build().toBatch();
    Assert.assertEquals(9, scan.planInputPartitions().length);
}
Also used : Not(org.apache.spark.sql.sources.Not) Table(org.apache.iceberg.Table) StringStartsWith(org.apache.spark.sql.sources.StringStartsWith) Batch(org.apache.spark.sql.connector.read.Batch) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) Test(org.junit.Test)

Example 13 with CaseInsensitiveStringMap

use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.

the class TestFilteredScan method testDayPartitionedTimestampFilters.

@SuppressWarnings("checkstyle:AvoidNestedBlocks")
@Test
public void testDayPartitionedTimestampFilters() {
    Table table = buildPartitionedTable("partitioned_by_day", PARTITION_BY_DAY, "ts_day", "ts");
    CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
    Batch unfiltered = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options).build().toBatch();
    Assert.assertEquals("Unfiltered table should created 2 read tasks", 2, unfiltered.planInputPartitions().length);
    {
        SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
        pushFilters(builder, LessThan.apply("ts", "2017-12-22T00:00:00+00:00"));
        Batch scan = builder.build().toBatch();
        InputPartition[] tasks = scan.planInputPartitions();
        Assert.assertEquals("Should create one task for 2017-12-21", 1, tasks.length);
        assertEqualsSafe(SCHEMA.asStruct(), expected(5, 6, 7, 8, 9), read(table.location(), vectorized, "ts < cast('2017-12-22 00:00:00+00:00' as timestamp)"));
    }
    {
        SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
        pushFilters(builder, And.apply(GreaterThan.apply("ts", "2017-12-22T06:00:00+00:00"), LessThan.apply("ts", "2017-12-22T08:00:00+00:00")));
        Batch scan = builder.build().toBatch();
        InputPartition[] tasks = scan.planInputPartitions();
        Assert.assertEquals("Should create one task for 2017-12-22", 1, tasks.length);
        assertEqualsSafe(SCHEMA.asStruct(), expected(1, 2), read(table.location(), vectorized, "ts > cast('2017-12-22 06:00:00+00:00' as timestamp) and " + "ts < cast('2017-12-22 08:00:00+00:00' as timestamp)"));
    }
}
Also used : Table(org.apache.iceberg.Table) Batch(org.apache.spark.sql.connector.read.Batch) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) Test(org.junit.Test)

Example 14 with CaseInsensitiveStringMap

use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.

the class TestTimestampWithoutZone method testCreateNewTableShouldHaveTimestampWithoutZoneIcebergType.

@Test
public void testCreateNewTableShouldHaveTimestampWithoutZoneIcebergType() {
    withSQLConf(ImmutableMap.of(SparkSQLProperties.HANDLE_TIMESTAMP_WITHOUT_TIMEZONE, "true", SparkSQLProperties.USE_TIMESTAMP_WITHOUT_TIME_ZONE_IN_NEW_TABLES, "true"), () -> {
        spark.sessionState().catalogManager().currentCatalog().initialize(catalog.name(), new CaseInsensitiveStringMap(config));
        sql("INSERT INTO %s VALUES %s", tableName, rowToSqlValues(values));
        sql("CREATE TABLE %s USING iceberg AS SELECT * FROM %s", newTableName, tableName);
        Assert.assertEquals("Should have " + values.size() + " row", (long) values.size(), scalarSql("SELECT count(*) FROM %s", newTableName));
        assertEquals("Row data should match expected", sql("SELECT * FROM %s ORDER BY id", tableName), sql("SELECT * FROM %s ORDER BY id", newTableName));
        Table createdTable = validationCatalog.loadTable(TableIdentifier.of("default", newTableName));
        assertFieldsType(createdTable.schema(), Types.TimestampType.withoutZone(), "ts", "tsz");
    });
}
Also used : Table(org.apache.iceberg.Table) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) Test(org.junit.Test)

Aggregations

CaseInsensitiveStringMap (org.apache.spark.sql.util.CaseInsensitiveStringMap)14 Test (org.junit.Test)11 Batch (org.apache.spark.sql.connector.read.Batch)10 Table (org.apache.iceberg.Table)8 InputPartition (org.apache.spark.sql.connector.read.InputPartition)4 StringStartsWith (org.apache.spark.sql.sources.StringStartsWith)4 PathIdentifier (org.apache.iceberg.spark.PathIdentifier)2 Not (org.apache.spark.sql.sources.Not)2 Schema (org.apache.iceberg.Schema)1 Snapshot (org.apache.iceberg.Snapshot)1 TableScan (org.apache.iceberg.TableScan)1 Spark3Util (org.apache.iceberg.spark.Spark3Util)1 SparkCatalog (org.apache.iceberg.spark.SparkCatalog)1 SparkReadConf (org.apache.iceberg.spark.SparkReadConf)1 NoSuchTableException (org.apache.spark.sql.catalyst.analysis.NoSuchTableException)1 CatalogPlugin (org.apache.spark.sql.connector.catalog.CatalogPlugin)1 Identifier (org.apache.spark.sql.connector.catalog.Identifier)1 TableCatalog (org.apache.spark.sql.connector.catalog.TableCatalog)1 Before (org.junit.Before)1