use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonReaderExample method main.
public static void main(String[] args) {
String path = "./testWriteFiles";
try {
FileUtils.deleteDirectory(new File(path));
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT).addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
Field[] fields = new Field[11];
fields[0] = new Field("stringField", DataTypes.STRING);
fields[1] = new Field("shortField", DataTypes.SHORT);
fields[2] = new Field("intField", DataTypes.INT);
fields[3] = new Field("longField", DataTypes.LONG);
fields[4] = new Field("doubleField", DataTypes.DOUBLE);
fields[5] = new Field("boolField", DataTypes.BOOLEAN);
fields[6] = new Field("dateField", DataTypes.DATE);
fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
fields[9] = new Field("varcharField", DataTypes.VARCHAR);
fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOption("complex_delimiter_level_1", "#").withCsvInput(new Schema(fields)).writtenBy("CarbonReaderExample").build();
for (int i = 0; i < 10; i++) {
String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon" };
writer.write(row2);
}
writer.close();
File[] dataFiles = new File(path).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name == null) {
return false;
}
return name.endsWith("carbonindex");
}
});
if (dataFiles == null || dataFiles.length < 1) {
throw new RuntimeException("Carbon index file not exists.");
}
Schema schema = CarbonSchemaReader.readSchema(dataFiles[0].getAbsolutePath()).asOriginOrder();
// Transform the schema
String[] strings = new String[schema.getFields().length];
for (int i = 0; i < schema.getFields().length; i++) {
strings[i] = (schema.getFields())[i].getFieldName();
}
// Read data
CarbonReader reader = CarbonReader.builder(path, "_temp").projection(strings).build();
System.out.println("\nData:");
int i = 0;
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", i, row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]));
Object[] arr = (Object[]) row[10];
for (int j = 0; j < arr.length; j++) {
System.out.print(arr[j] + " ");
}
assert (arr[0].equals("Hello"));
assert (arr[3].equals("Carbon"));
System.out.println();
i++;
}
reader.close();
// Read data
CarbonReader reader2 = CarbonReader.builder(path, "_temp").build();
System.out.println("\nData:");
i = 0;
while (reader2.hasNext()) {
Object[] row = (Object[]) reader2.readNextRow();
System.out.print(String.format("%s\t%s\t%s\t%s\t%s\t", i, row[0], row[1], row[2], row[3]));
Object[] arr = (Object[]) row[4];
for (int j = 0; j < arr.length; j++) {
System.out.print(arr[j] + " ");
}
System.out.println(String.format("\t%s\t%s\t%s\t%s\t%s\t%s\t", row[5], row[6], row[7], row[8], row[9], row[10]));
i++;
}
reader2.close();
} catch (Throwable e) {
e.printStackTrace();
assert (false);
System.out.println(e.getMessage());
} finally {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonCliTest method before.
@Before
public void before() throws IOException {
FileUtils.deleteDirectory(new File(path));
Field[] fields = new Field[2];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new String[] { "name", "age" }, 3, 8);
TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new String[] { "name", "age" }, 3, 8);
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonCliTest method testBinary.
@Test
public void testBinary() throws IOException, InvalidLoadOptionException {
FileUtils.deleteDirectory(new File(pathBinary));
Field[] fields = new Field[3];
fields[0] = new Field("name", DataTypes.STRING);
fields[1] = new Field("age", DataTypes.INT);
fields[2] = new Field("binaryField", DataTypes.BINARY);
buildBinaryData(5000000, new Schema(fields), pathBinary, new String[] { "name" }, 3, 8);
String[] args = { "-cmd", "summary", "-p", pathBinary };
ByteArrayOutputStream out = new ByteArrayOutputStream();
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String[] args2 = { "-cmd", "summary", "-p", pathBinary, "-s" };
out = new ByteArrayOutputStream();
stream = new PrintStream(out);
CarbonCli.run(args2, stream);
String output = new String(out.toByteArray());
Assert.assertTrue(output.contains("binaryfield") && output.contains("BINARY"));
FileUtils.deleteDirectory(new File(pathBinary));
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonWriterBuilder method buildTableSchema.
private void buildTableSchema(Field[] fields, TableSchemaBuilder tableSchemaBuilder, List<String> sortColumnsList, ColumnSchema[] sortColumnsSchemaList, List<String> invertedIdxColumnsList) {
Set<String> uniqueFields = new HashSet<>();
// a counter which will be used in case of complex array type. This valIndex will be assigned
// to child of complex array type in the order val1, val2 so that each array type child is
// differentiated to any level
AtomicInteger valIndex = new AtomicInteger(0);
// Check if any of the columns specified in sort columns are missing from schema.
for (String sortColumn : sortColumnsList) {
boolean exists = false;
for (Field field : fields) {
if (field.getFieldName().equalsIgnoreCase(sortColumn)) {
exists = true;
break;
}
}
if (!exists) {
throw new RuntimeException("column: " + sortColumn + " specified in sort columns does not exist in schema");
}
}
// Check if any of the columns specified in inverted index are missing from schema.
for (String invertedIdxColumn : invertedIdxColumnsList) {
boolean exists = false;
for (Field field : fields) {
if (field.getFieldName().equalsIgnoreCase(invertedIdxColumn)) {
exists = true;
break;
}
}
if (!exists) {
throw new RuntimeException("column: " + invertedIdxColumn + " specified in inverted index columns does not exist in schema");
}
}
int i = 0;
for (Field field : fields) {
if (null != field) {
if (!uniqueFields.add(field.getFieldName())) {
throw new RuntimeException("Duplicate column " + field.getFieldName() + " found in table schema");
}
int isSortColumn = sortColumnsList.indexOf(field.getFieldName());
int isInvertedIdxColumn = invertedIdxColumnsList.indexOf(field.getFieldName());
if (isSortColumn > -1) {
// unsupported types for ("array", "struct", "double", "float", "decimal")
if (field.getDataType() == DataTypes.DOUBLE || field.getDataType() == DataTypes.FLOAT || DataTypes.isDecimal(field.getDataType()) || field.getDataType().isComplexType() || field.getDataType() == DataTypes.VARCHAR || field.getDataType() == DataTypes.BINARY) {
String errorMsg = "sort columns not supported for array, struct, map, double, float, decimal, " + "varchar, binary";
throw new RuntimeException(errorMsg);
}
}
if (field.getChildren() != null && field.getChildren().size() > 0) {
if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) {
// Loop through the inner columns and for a StructData
DataType complexType = DataTypes.createArrayType(field.getChildren().get(0).getDataType(), field.getChildren().get(0).getFieldName());
tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), complexType, field.getChildren()), valIndex, false, isInvertedIdxColumn > -1);
} else if (field.getDataType().getName().equalsIgnoreCase("STRUCT")) {
// Loop through the inner columns and for a StructData
List<StructField> structFieldsArray = new ArrayList<StructField>(field.getChildren().size());
for (StructField childFld : field.getChildren()) {
structFieldsArray.add(new StructField(childFld.getFieldName(), childFld.getDataType()));
}
DataType complexType = DataTypes.createStructType(structFieldsArray);
tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), complexType, field.getChildren()), valIndex, false, isInvertedIdxColumn > -1);
} else if (field.getDataType().getName().equalsIgnoreCase("MAP")) {
// Loop through the inner columns for MapType
DataType mapType = DataTypes.createMapType(((MapType) field.getDataType()).getKeyType(), field.getChildren().get(0).getDataType());
tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), mapType, field.getChildren()), valIndex, false, isInvertedIdxColumn > -1);
}
} else {
ColumnSchema columnSchema = tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), field.getDataType()), valIndex, isSortColumn > -1, isInvertedIdxColumn > -1);
if (isSortColumn > -1) {
columnSchema.setSortColumn(true);
sortColumnsSchemaList[isSortColumn] = columnSchema;
}
}
}
}
}
use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.
the class CarbonWriterBuilder method setCsvHeader.
private void setCsvHeader(CarbonLoadModel model) {
Field[] fields = schema.getFields();
StringBuilder builder = new StringBuilder();
String[] columns = new String[fields.length];
int i = 0;
for (Field field : fields) {
if (null != field) {
builder.append(field.getFieldName());
builder.append(",");
columns[i++] = field.getFieldName();
}
}
String header = builder.toString();
model.setCsvHeader(header.substring(0, header.length() - 1));
model.setCsvHeaderColumns(columns);
}
Aggregations