use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.
the class ColumnTypeDetector method detectColumnTypes.
/**
* Estimates and returns the type for each column in the input text
*
* <p>The type is determined by checking a sample of the data. Because only a sample of the data
* is checked, the types may be incorrect. If that is the case a Parse Exception will be thrown.
*
* <p>The method {@code printColumnTypes()} can be used to print a list of the detected columns
* that can be corrected and used to explicitly specify the correct column types.
*/
public ColumnType[] detectColumnTypes(Iterator<String[]> rows, ReadOptions options) {
boolean useSampling = options.sample();
// to hold the results
List<ColumnType> columnTypes = new ArrayList<>();
// to hold the data read from the file
List<List<String>> columnData = new ArrayList<>();
// make sure we don't go over maxRows
int rowCount = 0;
int nextRow = 0;
while (rows.hasNext()) {
String[] nextLine = rows.next();
// first row
if (rowCount == 0) {
for (int i = 0; i < nextLine.length; i++) {
columnData.add(new ArrayList<>());
}
}
int columnNumber = 0;
if (rowCount == nextRow) {
for (String field : nextLine) {
columnData.get(columnNumber).add(field);
columnNumber++;
}
if (useSampling) {
nextRow = nextRow(nextRow);
} else {
nextRow = nextRowWithoutSampling(nextRow);
}
}
rowCount++;
}
// now detect
for (List<String> valuesList : columnData) {
ColumnType detectedType = detectType(valuesList, options);
if (detectedType.equals(STRING) && rowCount > STRING_COLUMN_ROW_COUNT_CUTOFF && options.columnTypesToDetect().contains(TEXT)) {
HashSet<String> unique = new HashSet<>(valuesList);
double uniquePct = unique.size() / (valuesList.size() * 1.0);
if (uniquePct > STRING_COLUMN_CUTOFF) {
detectedType = TEXT;
}
}
columnTypes.add(detectedType);
}
return columnTypes.toArray(new ColumnType[0]);
}
use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.
the class SqlResultSetReader method read.
/**
* Returns a new table with the given tableName, constructed from the given result set
*
* @throws SQLException if there is a problem detected in the database
*/
public static Table read(ResultSet resultSet) throws SQLException {
ResultSetMetaData metaData = resultSet.getMetaData();
Table table = Table.create();
// Setup the columns and add to the table
for (int i = 1; i <= metaData.getColumnCount(); i++) {
ColumnType type = getColumnType(metaData.getColumnType(i), metaData.getScale(i), metaData.getPrecision(i));
Preconditions.checkState(type != null, "No column type found for %s as specified for column %s", metaData.getColumnType(i), metaData.getColumnName(i));
Column<?> newColumn = type.create(metaData.getColumnLabel(i));
table.addColumns(newColumn);
}
// Add the rows
while (resultSet.next()) {
for (int i = 1; i <= metaData.getColumnCount(); i++) {
Column<?> column = // subtract 1 because results sets originate at 1 not 0
table.column(i - 1);
if (column instanceof ShortColumn) {
appendToColumn(column, resultSet, resultSet.getShort(i));
} else if (column instanceof IntColumn) {
appendToColumn(column, resultSet, resultSet.getInt(i));
} else if (column instanceof LongColumn) {
appendToColumn(column, resultSet, resultSet.getLong(i));
} else if (column instanceof FloatColumn) {
appendToColumn(column, resultSet, resultSet.getFloat(i));
} else if (column instanceof DoubleColumn) {
appendToColumn(column, resultSet, resultSet.getDouble(i));
} else if (column instanceof BooleanColumn) {
appendToColumn(column, resultSet, resultSet.getBoolean(i));
} else {
column.appendObj(resultSet.getObject(i));
}
}
}
return table;
}
use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.
the class TableSliceGroup method aggregate.
/**
* Applies the given aggregations to the given columns. The apply and combine steps of a
* split-apply-combine.
*
* @param functions map from column name to aggregation to apply on that function
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Table aggregate(ListMultimap<String, AggregateFunction<?, ?>> functions) {
Table groupTable = summaryTableName(sourceTable);
StringColumn groupColumn = StringColumn.create("Group");
groupTable.addColumns(groupColumn);
boolean firstFunction = true;
for (Map.Entry<String, Collection<AggregateFunction<?, ?>>> entry : functions.asMap().entrySet()) {
String columnName = entry.getKey();
for (AggregateFunction function : entry.getValue()) {
String colName = aggregateColumnName(columnName, function.functionName());
ColumnType type = function.returnType();
Column resultColumn = type.create(colName);
for (TableSlice subTable : getSlices()) {
Object result = function.summarize(subTable.column(columnName));
if (firstFunction) {
groupColumn.append(subTable.name());
}
if (function.returnType().equals(ColumnType.DOUBLE)) {
Number number = (Number) result;
resultColumn.append(number.doubleValue());
} else {
resultColumn.append(result);
}
}
groupTable.addColumns(resultColumn);
firstFunction = false;
}
}
return splitGroupingColumn(groupTable);
}
use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.
the class TableBuildingUtils method build.
public static Table build(List<String> columnNames, List<String[]> dataRows, ReadOptions options) {
Table table = Table.create(options.tableName());
if (dataRows.isEmpty()) {
return table;
}
ColumnTypeDetector detector = new ColumnTypeDetector(options.columnTypesToDetect());
Iterator<String[]> iterator = dataRows.iterator();
ColumnType[] types = detector.detectColumnTypes(iterator, options);
// If there are columnTypes configured by the user use them
for (int i = 0; i < types.length; i++) {
boolean hasColumnName = i < columnNames.size();
Optional<ColumnType> configuredColumnType = options.columnTypeReadOptions().columnType(i, hasColumnName ? columnNames.get(i) : null);
if (configuredColumnType.isPresent())
types[i] = configuredColumnType.get();
}
for (int i = 0; i < columnNames.size(); i++) {
table.addColumns(types[i].create(columnNames.get(i)));
}
for (int i = 0; i < dataRows.size(); i++) {
for (int j = 0; j < table.columnCount(); j++) {
table.column(j).appendCell(dataRows.get(i)[j]);
}
}
return table;
}
use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.
the class XlsxReader method createTable.
private Table createTable(Sheet sheet, TableRange tableArea, XlsxReadOptions options) {
Optional<List<String>> optHeaderNames = getHeaderNames(sheet, tableArea);
optHeaderNames.ifPresent(h -> tableArea.startRow++);
List<String> headerNames = optHeaderNames.orElse(calculateDefaultColumnNames(tableArea));
Table table = Table.create(options.tableName() + "#" + sheet.getSheetName());
List<Column<?>> columns = new ArrayList<>(Collections.nCopies(headerNames.size(), null));
for (int rowNum = tableArea.startRow; rowNum <= tableArea.endRow; rowNum++) {
Row row = sheet.getRow(rowNum);
for (int colNum = 0; colNum < headerNames.size(); colNum++) {
int excelColNum = colNum + tableArea.startColumn;
Cell cell = row.getCell(excelColNum, MissingCellPolicy.RETURN_BLANK_AS_NULL);
Column<?> column = columns.get(colNum);
String columnName = headerNames.get(colNum);
if (cell != null) {
if (column == null) {
column = createColumn(colNum, columnName, sheet, excelColNum, tableArea, options);
columns.set(colNum, column);
while (column.size() < rowNum - tableArea.startRow) {
column.appendMissing();
}
}
Column<?> altColumn = appendValue(column, cell);
if (altColumn != null && altColumn != column) {
column = altColumn;
columns.set(colNum, column);
}
} else {
boolean hasCustomizedType = options.columnTypeReadOptions().columnType(colNum, columnName).isPresent();
if (column == null && hasCustomizedType) {
ColumnType columnType = options.columnTypeReadOptions().columnType(colNum, columnName).get();
column = columnType.create(columnName).appendMissing();
columns.set(colNum, column);
} else if (hasCustomizedType) {
column.appendMissing();
}
}
if (column != null) {
while (column.size() <= rowNum - tableArea.startRow) {
column.appendMissing();
}
}
}
}
columns.removeAll(Collections.singleton(null));
table.addColumns(columns.toArray(new Column<?>[columns.size()]));
return table;
}
Aggregations