use of io.cdap.cdap.api.dataset.lib.Partitioning in project cdap by cdapio.
the class PartitioningTest method testFieldOrder.
@Test
public void testFieldOrder() {
Partitioning partitioning = Partitioning.builder().addIntField("1").addLongField("2").addStringField("3").build();
Iterator<Map.Entry<String, FieldType>> iterator = partitioning.getFields().entrySet().iterator();
Assert.assertEquals("1", iterator.next().getKey());
Assert.assertEquals("2", iterator.next().getKey());
Assert.assertEquals("3", iterator.next().getKey());
Assert.assertFalse(iterator.hasNext());
// the previous order may have been preserved by chance. Now try the reverse order
partitioning = Partitioning.builder().addIntField("3").addLongField("2").addStringField("1").build();
iterator = partitioning.getFields().entrySet().iterator();
Assert.assertEquals("3", iterator.next().getKey());
Assert.assertEquals("2", iterator.next().getKey());
Assert.assertEquals("1", iterator.next().getKey());
Assert.assertFalse(iterator.hasNext());
}
use of io.cdap.cdap.api.dataset.lib.Partitioning in project cdap by cdapio.
the class CreateStatementBuilderTest method testRowSerdeCreate.
@Test
public void testRowSerdeCreate() throws Exception {
String expected = "CREATE EXTERNAL TABLE IF NOT EXISTS dataset_myfiles " + "(f1 string, f2 int, f3 double, f4 binary, f5 array<int>) COMMENT 'CDAP Dataset' " + "PARTITIONED BY (f1 STRING, f2 INT) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' " + "WITH SERDEPROPERTIES ('input.regex'='escapeme!\\'') " + "STORED AS TEXTFILE LOCATION 'hdfs://namenode/my/path' " + "TBLPROPERTIES ('cdap.name'='myfiles', 'cdap.version'='" + ProjectInfo.getVersion().toString() + "')";
String hiveSchema = "f1 string, f2 int, f3 double, f4 binary, f5 array<int>";
Partitioning partitioning = Partitioning.builder().addStringField("f1").addIntField("f2").build();
String actual = new CreateStatementBuilder("myfiles", null, "dataset_myfiles", false).setSchema(hiveSchema).setLocation("hdfs://namenode/my/path").setTableComment("CDAP Dataset").setPartitioning(partitioning).setRowFormatSerde("org.apache.hadoop.hive.serde2.RegexSerDe", ImmutableMap.of("input.regex", "escapeme!'")).buildWithFileFormat("TEXTFILE");
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.api.dataset.lib.Partitioning in project cdap by caskdata.
the class PartitioningTest method testBuilderGetters.
@Test
public void testBuilderGetters() {
Partitioning partitioning = Partitioning.builder().addField("a", FieldType.STRING).addField("b", FieldType.INT).addField("c", FieldType.LONG).addStringField("d").addIntField("e").addLongField("f").build();
Assert.assertEquals(FieldType.STRING, partitioning.getFieldType("a"));
Assert.assertEquals(FieldType.INT, partitioning.getFieldType("b"));
Assert.assertEquals(FieldType.LONG, partitioning.getFieldType("c"));
Assert.assertEquals(FieldType.STRING, partitioning.getFieldType("d"));
Assert.assertEquals(FieldType.INT, partitioning.getFieldType("e"));
Assert.assertEquals(FieldType.LONG, partitioning.getFieldType("f"));
Assert.assertNull(partitioning.getFieldType("x"));
Assert.assertEquals(partitioning.getFields().keySet(), ImmutableSet.of("a", "b", "c", "d", "e", "f"));
}
use of io.cdap.cdap.api.dataset.lib.Partitioning in project cdap by caskdata.
the class CreateStatementBuilderTest method testRowDelimitedCreate.
@Test
public void testRowDelimitedCreate() throws Exception {
String expected = "CREATE EXTERNAL TABLE IF NOT EXISTS abc.dataset_myfiles " + "(f1 string, f2 int, f3 double, f4 binary, f5 array<int>) COMMENT 'CDAP Dataset' " + "PARTITIONED BY (f1 STRING, f2 INT) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' " + "STORED AS TEXTFILE LOCATION 'hdfs://namenode/my/path' " + "TBLPROPERTIES ('cdap.name'='myfiles', 'cdap.version'='" + ProjectInfo.getVersion().toString() + "')";
String hiveSchema = "f1 string, f2 int, f3 double, f4 binary, f5 array<int>";
Partitioning partitioning = Partitioning.builder().addStringField("f1").addIntField("f2").build();
String actual = new CreateStatementBuilder("myfiles", "abc", "dataset_myfiles", false).setSchema(hiveSchema).setLocation("hdfs://namenode/my/path").setTableComment("CDAP Dataset").setPartitioning(partitioning).setRowFormatDelimited(",", null).buildWithFileFormat("TEXTFILE");
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.api.dataset.lib.Partitioning in project cdap by caskdata.
the class CreateStatementBuilderTest method testRowSerdeCreate.
@Test
public void testRowSerdeCreate() throws Exception {
String expected = "CREATE EXTERNAL TABLE IF NOT EXISTS dataset_myfiles " + "(f1 string, f2 int, f3 double, f4 binary, f5 array<int>) COMMENT 'CDAP Dataset' " + "PARTITIONED BY (f1 STRING, f2 INT) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' " + "WITH SERDEPROPERTIES ('input.regex'='escapeme!\\'') " + "STORED AS TEXTFILE LOCATION 'hdfs://namenode/my/path' " + "TBLPROPERTIES ('cdap.name'='myfiles', 'cdap.version'='" + ProjectInfo.getVersion().toString() + "')";
String hiveSchema = "f1 string, f2 int, f3 double, f4 binary, f5 array<int>";
Partitioning partitioning = Partitioning.builder().addStringField("f1").addIntField("f2").build();
String actual = new CreateStatementBuilder("myfiles", null, "dataset_myfiles", false).setSchema(hiveSchema).setLocation("hdfs://namenode/my/path").setTableComment("CDAP Dataset").setPartitioning(partitioning).setRowFormatSerde("org.apache.hadoop.hive.serde2.RegexSerDe", ImmutableMap.of("input.regex", "escapeme!'")).buildWithFileFormat("TEXTFILE");
Assert.assertEquals(expected, actual);
}
Aggregations