use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class HiveCarbonUtil method getTableInfo.
private static TableInfo getTableInfo(String tableName, String databaseName, String location, String sortColumnsString, String[] columns, String[] columnTypes, List<String> partitionColumns) throws SQLException {
TableInfo tableInfo = new TableInfo();
TableSchemaBuilder builder = new TableSchemaBuilder();
builder.tableName(tableName);
List<String> sortColumns = new ArrayList<>();
if (sortColumnsString != null) {
sortColumns = Arrays.asList(sortColumnsString.toLowerCase().split("\\,"));
}
PartitionInfo partitionInfo = null;
AtomicInteger integer = new AtomicInteger();
List<StructField> partitionStructFields = new ArrayList<>();
for (int i = 0; i < columns.length; i++) {
DataType dataType = DataTypeUtil.convertHiveTypeToCarbon(columnTypes[i]);
Field field = new Field(columns[i].toLowerCase(), dataType);
if (partitionColumns.contains(columns[i])) {
partitionStructFields.add(new StructField(columns[i].toLowerCase(), dataType, field.getChildren()));
} else {
builder.addColumn(new StructField(columns[i].toLowerCase(), dataType, field.getChildren()), integer, sortColumns.contains(columns[i]), false);
}
}
if (!partitionStructFields.isEmpty()) {
List<ColumnSchema> partitionColumnSchemas = new ArrayList<>();
for (StructField partitionStructField : partitionStructFields) {
partitionColumnSchemas.add(builder.addColumn(partitionStructField, integer, sortColumns.contains(partitionStructField.getFieldName()), false));
}
partitionInfo = new PartitionInfo(partitionColumnSchemas, PartitionType.NATIVE_HIVE);
}
TableSchema tableSchema = builder.build();
SchemaEvolution schemaEvol = new SchemaEvolution();
List<SchemaEvolutionEntry> schemaEvolutionEntry = new ArrayList<>();
schemaEvolutionEntry.add(new SchemaEvolutionEntry());
schemaEvol.setSchemaEvolutionEntryList(schemaEvolutionEntry);
tableSchema.setSchemaEvolution(schemaEvol);
tableSchema.setPartitionInfo(partitionInfo);
tableInfo.setDatabaseName(databaseName);
tableInfo.setTablePath(location);
tableInfo.setFactTable(tableSchema);
tableInfo.setTableUniqueName(databaseName + "_" + tableName);
return tableInfo;
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testReadFilesWithProjectAllColumns.
@Test
public void testReadFilesWithProjectAllColumns() throws IOException, InterruptedException {
String path = "./testWriteFiles";
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[2];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
TestUtil.writeFilesAndVerify(100, new Schema(fields), path);
CarbonReader reader = CarbonReader.builder(path, "_temp").build();
int i = 0;
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
Assert.assertEquals(("robot" + (i % 10)), row[0]);
Assert.assertEquals(i, row[1]);
i++;
}
Assert.assertEquals(i, 100);
reader.close();
FileUtils.deleteDirectory(new File(path));
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testReadNextBatchRow.
@Test
public void testReadNextBatchRow() {
String path = "./carbondata";
try {
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[12];
fields[0] = new Field("stringField", DataTypes.STRING);
fields[1] = new Field("shortField", DataTypes.SHORT);
fields[2] = new Field("intField", DataTypes.INT);
fields[3] = new Field("longField", DataTypes.LONG);
fields[4] = new Field("doubleField", DataTypes.DOUBLE);
fields[5] = new Field("boolField", DataTypes.BOOLEAN);
fields[6] = new Field("dateField", DataTypes.DATE);
fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
fields[9] = new Field("varcharField", DataTypes.VARCHAR);
fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
fields[11] = new Field("floatField", DataTypes.FLOAT);
Map<String, String> map = new HashMap<>();
map.put("complex_delimiter_level_1", "#");
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
for (int i = 0; i < 300; i++) {
String[] row2 = new String[] { "robot" + (i % 10000), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon", "1.23" };
writer.write(row2);
}
writer.close();
// Read data
int batchSize = 150;
CarbonReader reader = CarbonReader.builder(path, "_temp").withBatch(batchSize).build();
int i = 0;
while (reader.hasNext()) {
Object[] batch = reader.readNextBatchRow();
Assert.assertTrue(batch.length <= batchSize);
for (int j = 0; j < batch.length; j++) {
Object[] data = (Object[]) batch[j];
assert (RowUtil.getString(data, 0).equals("robot" + i));
assertEquals(RowUtil.getString(data, 1), "2019-03-02");
assert (RowUtil.getVarchar(data, 3).equals("varchar"));
Object[] arr = RowUtil.getArray(data, 4);
assert (arr[0].equals("Hello"));
assert (arr[1].equals("World"));
assert (arr[2].equals("From"));
assert (arr[3].equals("Carbon"));
assertEquals(RowUtil.getShort(data, 5), i);
assertEquals(RowUtil.getInt(data, 6), i);
assertEquals(RowUtil.getLong(data, 7), Long.MAX_VALUE - i);
assertEquals(RowUtil.getDouble(data, 8), ((double) i) / 2);
assert (RowUtil.getBoolean(data, 9));
assert (RowUtil.getDecimal(data, 10).equals("12.35"));
assertEquals(RowUtil.getFloat(data, 11), (float) 1.23);
i++;
}
System.out.println("batch is " + i);
}
reader.close();
} catch (Throwable e) {
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testVectorReader.
@Test
public void testVectorReader() {
String path = "./testWriteFiles";
try {
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[12];
fields[0] = new Field("stringField", DataTypes.STRING);
fields[1] = new Field("shortField", DataTypes.SHORT);
fields[2] = new Field("intField", DataTypes.INT);
fields[3] = new Field("longField", DataTypes.LONG);
fields[4] = new Field("doubleField", DataTypes.DOUBLE);
fields[5] = new Field("boolField", DataTypes.BOOLEAN);
fields[6] = new Field("dateField", DataTypes.DATE);
fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
fields[9] = new Field("varcharField", DataTypes.VARCHAR);
fields[10] = new Field("byteField", DataTypes.BYTE);
fields[11] = new Field("floatField", DataTypes.FLOAT);
Map<String, String> map = new HashMap<>();
map.put("complex_delimiter_level_1", "#");
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
for (int i = 0; i < 10; i++) {
String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", String.valueOf(i), "1.23" };
writer.write(row2);
}
writer.close();
// Read data
CarbonReader reader = CarbonReader.builder(path, "_temp").build();
int i = 0;
while (reader.hasNext()) {
Object[] data = (Object[]) reader.readNextRow();
assert (RowUtil.getString(data, 0).equals("robot" + i));
assertEquals(RowUtil.getShort(data, 4), i);
assertEquals(RowUtil.getInt(data, 5), i);
assert (RowUtil.getLong(data, 6) == Long.MAX_VALUE - i);
assertEquals(RowUtil.getDouble(data, 7), ((double) i) / 2);
assert (RowUtil.getBoolean(data, 8));
assertEquals(RowUtil.getString(data, 1), "2019-03-02");
assert (RowUtil.getDecimal(data, 9).equals("12.35"));
assert (RowUtil.getString(data, 3).equals("varchar"));
assertEquals(RowUtil.getByte(data, 10), new Byte(String.valueOf(i)));
assertEquals(RowUtil.getFloat(data, 11), new Float("1.23"));
i++;
}
assert (i == 10);
reader.close();
} catch (Throwable e) {
e.printStackTrace();
Assert.fail(e.getMessage());
} finally {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderTest method testSdkWriteWhenArrayOfStringIsEmpty.
@Test
public void testSdkWriteWhenArrayOfStringIsEmpty() throws IOException, InvalidLoadOptionException {
String badRecordAction = CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "FAIL");
String path = "./testSdkWriteWhenArrayOfStringIsEmpty";
String[] rec = { "aaa", "bbb", "aaa@cdf.com", "", "", "mmm", "" };
Field[] fields = new Field[7];
fields[0] = new Field("stringField", DataTypes.STRING);
fields[1] = new Field("varcharField", DataTypes.VARCHAR);
fields[2] = new Field("stringField1", DataTypes.STRING);
fields[3] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
fields[4] = new Field("arrayField1", DataTypes.createArrayType(DataTypes.STRING));
fields[5] = new Field("arrayField2", DataTypes.createArrayType(DataTypes.STRING));
fields[6] = new Field("varcharField1", DataTypes.VARCHAR);
Schema schema = new Schema(fields);
Map map = new HashMap();
map.put("complex_delimiter_level_1", "#");
map.put("bad_records_logger_enable", "TRUE");
map.put("bad_record_path", path + "/badrec");
CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path);
builder.withLoadOptions(map).withCsvInput(schema).enableLocalDictionary(false).writtenBy("CarbonReaderTest");
CarbonWriter writer = builder.build();
writer.write(rec);
writer.close();
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, badRecordAction);
FileUtils.deleteDirectory(new File(path));
}
Aggregations