use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testWriteAndReadJson.
@Test
public void testWriteAndReadJson() throws IOException, InterruptedException {
int numRows = 100;
String path = "./testWriteFiles";
FileUtils.deleteDirectory(new File(path));
String json = "{\"name\":\"bob\", \"age\":10}";
Schema schema = new Schema(new Field[] { new Field("name", "string"), new Field("age", "int") });
try {
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withJsonInput(schema).writtenBy("AvroCarbonWriterTest").build();
for (int i = 0; i < numRows; i++) {
writer.write(json);
}
writer.close();
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
File[] dataFiles = new File(path).listFiles(new FileFilter() {
@Override
public boolean accept(File pathname) {
return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
}
});
Assert.assertNotNull(dataFiles);
Assert.assertEquals(1, dataFiles.length);
// read it and verify
CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).build();
int i = 0;
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
Assert.assertEquals("bob", row[0]);
Assert.assertEquals(10, row[1]);
i++;
}
Assert.assertEquals(i, numRows);
reader.close();
FileUtils.deleteDirectory(new File(path));
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testReadNextRowWithRowUtil.
@Test
public void testReadNextRowWithRowUtil() {
String path = "./carbondata";
try {
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[12];
fields[0] = new Field("stringField", DataTypes.STRING);
fields[1] = new Field("shortField", DataTypes.SHORT);
fields[2] = new Field("intField", DataTypes.INT);
fields[3] = new Field("longField", DataTypes.LONG);
fields[4] = new Field("doubleField", DataTypes.DOUBLE);
fields[5] = new Field("boolField", DataTypes.BOOLEAN);
fields[6] = new Field("dateField", DataTypes.DATE);
fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
fields[9] = new Field("varcharField", DataTypes.VARCHAR);
fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
fields[11] = new Field("floatField", DataTypes.FLOAT);
Map<String, String> map = new HashMap<>();
map.put("complex_delimiter_level_1", "#");
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
for (int i = 0; i < 10; i++) {
String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon", "1.23" };
writer.write(row2);
}
writer.close();
File[] dataFiles = new File(path).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name == null) {
return false;
}
return name.endsWith("carbonindex");
}
});
if (dataFiles == null || dataFiles.length < 1) {
throw new RuntimeException("Carbon index file not exists.");
}
Schema schema = CarbonSchemaReader.readSchema(dataFiles[0].getAbsolutePath()).asOriginOrder();
// Transform the schema
int count = 0;
for (int i = 0; i < schema.getFields().length; i++) {
if (!((schema.getFields())[i].getFieldName().contains("."))) {
count++;
}
}
String[] strings = new String[count];
int index = 0;
for (int i = 0; i < schema.getFields().length; i++) {
if (!((schema.getFields())[i].getFieldName().contains("."))) {
strings[index] = (schema.getFields())[i].getFieldName();
index++;
}
}
// Read data
CarbonReader reader = CarbonReader.builder(path, "_temp").projection(strings).build();
int i = 0;
while (reader.hasNext()) {
Object[] data = (Object[]) reader.readNextRow();
assert (RowUtil.getString(data, 0).equals("robot" + i));
assertEquals(RowUtil.getShort(data, 1), i);
assertEquals(RowUtil.getInt(data, 2), i);
assertEquals(RowUtil.getLong(data, 3), Long.MAX_VALUE - i);
assertEquals(RowUtil.getDouble(data, 4), ((double) i) / 2);
assert (RowUtil.getBoolean(data, 5));
assertEquals(RowUtil.getString(data, 6), "2019-03-02");
assert (RowUtil.getDecimal(data, 8).equals("12.35"));
assert (RowUtil.getVarchar(data, 9).equals("varchar"));
Object[] arr = RowUtil.getArray(data, 10);
assert (arr[0].equals("Hello"));
assert (arr[1].equals("World"));
assert (arr[2].equals("From"));
assert (arr[3].equals("Carbon"));
assertEquals(RowUtil.getFloat(data, 11), (float) 1.23);
i++;
}
reader.close();
} catch (Throwable e) {
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testReadingDateAndTimestampColumnInArrayOfStruct.
@Test
public void testReadingDateAndTimestampColumnInArrayOfStruct() throws IOException {
String path = "./testWriteFilesArrayStruct";
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[4];
fields[0] = new Field("id", DataTypes.STRING);
fields[1] = new Field("source", DataTypes.STRING);
fields[2] = new Field("usage", DataTypes.STRING);
List<StructField> structFieldsList = new ArrayList<>();
structFieldsList.add(new StructField("name", DataTypes.STRING));
structFieldsList.add(new StructField("type", DataTypes.STRING));
structFieldsList.add(new StructField("creation-date", DataTypes.DATE));
structFieldsList.add(new StructField("creation-timestamp", DataTypes.TIMESTAMP));
StructField structTypeByList = new StructField("annotation", DataTypes.createStructType(structFieldsList), structFieldsList);
List<StructField> list = new ArrayList<>();
list.add(structTypeByList);
Field arrayType = new Field("annotations", "array", list);
fields[3] = arrayType;
try {
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)).writtenBy("complexTest").build();
for (int i = 0; i < 15; i++) {
String[] row = new String[] { "robot" + i, String.valueOf(i), i + "." + i, "sunflowers" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30" + "\002" + "2019-03-30 17:22:31" + "\001" + "roses" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30" + "\002" + "2019-03-30 17:22:31" };
writer.write(row);
}
writer.close();
} catch (Exception e) {
e.printStackTrace();
Assert.fail();
}
Schema schema = CarbonSchemaReader.readSchema(path).asOriginOrder();
assert (4 == schema.getFieldsLength());
CarbonReader reader = null;
try {
reader = CarbonReader.builder(path).projection(new String[] { "id", "source", "usage", "annotations" }).build();
int i = 0;
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
assert (4 == row.length);
assert (row[0].equals("robot" + i));
int value = Integer.valueOf((String) row[1]);
Float value2 = Float.valueOf((String) row[2]);
assert (value > -1 || value < 15);
assert (value2 > -1 || value2 < 15);
Object[] annotations = (Object[]) row[3];
for (int j = 0; j < annotations.length; j++) {
Object[] annotation = (Object[]) annotations[j];
assert (((String) annotation[0]).contains("sunflowers") || ((String) annotation[0]).contains("roses"));
assert (((String) annotation[1]).contains("modelarts/image_classification"));
assert (annotation[2].equals("2019-03-30"));
assert (annotation[3].equals("2019-03-30 17:22:31"));
}
i++;
}
assert (15 == i);
reader.close();
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
FileUtils.deleteDirectory(new File(path));
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testValidateQuoteCharWithProperValue.
@Test
public void testValidateQuoteCharWithProperValue() throws IOException {
String path = "./testValidateQuoteCharWithProperValue";
Field[] fields = new Field[2];
fields[0] = new Field("stringField", DataTypes.STRING);
fields[1] = new Field("varcharField", DataTypes.VARCHAR);
Schema schema = new Schema(fields);
Map map = new HashMap();
map.put("quotechar", "#");
try {
CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(schema).enableLocalDictionary(false).writtenBy("CarbonReaderTest").build();
} catch (IllegalArgumentException e) {
e.printStackTrace();
Assert.fail();
} catch (Exception e) {
Assert.fail(e.getMessage());
} finally {
FileUtils.deleteDirectory(new File(path));
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testReadWithFilterEqualSet.
@Test
public void testReadWithFilterEqualSet() throws IOException, InterruptedException {
String path = "./testWriteFiles";
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[3];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("doubleField", DataTypes.DOUBLE);
TestUtil.writeFilesAndVerify(200, new Schema(fields), path);
List<Object> values = new ArrayList<>();
values.add("robot7");
values.add("robot1");
CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpressionSet("name", "String", values)).build();
int i = 0;
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
if (((String) row[0]).contains("robot7")) {
assert (7 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else if (((String) row[0]).contains("robot1")) {
assert (1 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else {
Assert.assertTrue(false);
}
i++;
}
Assert.assertEquals(i, 40);
reader.close();
List<Object> values2 = new ArrayList<>();
values2.add(1);
values2.add(7);
CarbonReader reader2 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpressionSet("age", "int", values2)).build();
i = 0;
while (reader2.hasNext()) {
Object[] row = (Object[]) reader2.readNextRow();
if (((String) row[0]).contains("robot7")) {
assert (7 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else if (((String) row[0]).contains("robot1")) {
assert (1 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else {
Assert.assertTrue(false);
}
i++;
}
Assert.assertEquals(i, 2);
reader2.close();
List<Object> values3 = new ArrayList<>();
values3.add(0.5);
values3.add(3.5);
CarbonReader reader3 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpressionSet("doubleField", "double", values3)).build();
i = 0;
while (reader3.hasNext()) {
Object[] row = (Object[]) reader3.readNextRow();
if (((String) row[0]).contains("robot7")) {
assert (7 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else if (((String) row[0]).contains("robot1")) {
assert (1 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else {
Assert.assertTrue(false);
}
i++;
}
Assert.assertEquals(i, 2);
reader3.close();
CarbonReader reader4 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpression("name", "string", "robot7")).build();
i = 0;
while (reader4.hasNext()) {
Object[] row = (Object[]) reader4.readNextRow();
if (((String) row[0]).contains("robot7")) {
assert (7 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else {
Assert.assertTrue(false);
}
i++;
}
Assert.assertEquals(i, 20);
reader4.close();
List<Expression> expressions = new ArrayList<>();
expressions.add(prepareEqualToExpression("name", "String", "robot1"));
expressions.add(prepareEqualToExpression("name", "String", "robot7"));
expressions.add(prepareEqualToExpression("age", "int", "2"));
CarbonReader reader5 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareOrExpression(expressions)).build();
i = 0;
while (reader5.hasNext()) {
Object[] row = (Object[]) reader5.readNextRow();
if (((String) row[0]).contains("robot7")) {
assert (7 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else if (((String) row[0]).contains("robot1")) {
assert (1 == ((int) (row[1]) % 10));
assert (0.5 == ((double) (row[2]) % 1));
} else if (((String) row[0]).contains("robot2")) {
assert (2 == ((int) (row[1]) % 10));
assert (0 == ((double) (row[2]) % 1));
} else {
Assert.assertTrue(false);
}
i++;
}
Assert.assertEquals(i, 41);
reader5.close();
FileUtils.deleteDirectory(new File(path));
}
Aggregations