Search in sources :

Example 6 with ColumnType

use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.

the class ColumnTypeDetector method detectColumnTypes.

/**
 * Estimates and returns the type for each column in the input text
 *
 * <p>The type is determined by checking a sample of the data. Because only a sample of the data
 * is checked, the types may be incorrect. If that is the case a Parse Exception will be thrown.
 *
 * <p>The method {@code printColumnTypes()} can be used to print a list of the detected columns
 * that can be corrected and used to explicitly specify the correct column types.
 */
public ColumnType[] detectColumnTypes(Iterator<String[]> rows, ReadOptions options) {
    boolean useSampling = options.sample();
    // to hold the results
    List<ColumnType> columnTypes = new ArrayList<>();
    // to hold the data read from the file
    List<List<String>> columnData = new ArrayList<>();
    // make sure we don't go over maxRows
    int rowCount = 0;
    int nextRow = 0;
    while (rows.hasNext()) {
        String[] nextLine = rows.next();
        // first row
        if (rowCount == 0) {
            for (int i = 0; i < nextLine.length; i++) {
                columnData.add(new ArrayList<>());
            }
        }
        int columnNumber = 0;
        if (rowCount == nextRow) {
            for (String field : nextLine) {
                columnData.get(columnNumber).add(field);
                columnNumber++;
            }
            if (useSampling) {
                nextRow = nextRow(nextRow);
            } else {
                nextRow = nextRowWithoutSampling(nextRow);
            }
        }
        rowCount++;
    }
    // now detect
    for (List<String> valuesList : columnData) {
        ColumnType detectedType = detectType(valuesList, options);
        if (detectedType.equals(STRING) && rowCount > STRING_COLUMN_ROW_COUNT_CUTOFF && options.columnTypesToDetect().contains(TEXT)) {
            HashSet<String> unique = new HashSet<>(valuesList);
            double uniquePct = unique.size() / (valuesList.size() * 1.0);
            if (uniquePct > STRING_COLUMN_CUTOFF) {
                detectedType = TEXT;
            }
        }
        columnTypes.add(detectedType);
    }
    return columnTypes.toArray(new ColumnType[0]);
}
Also used : ColumnType(tech.tablesaw.api.ColumnType) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) List(java.util.List) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) HashSet(java.util.HashSet)

Example 7 with ColumnType

use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.

the class SqlResultSetReader method read.

/**
 * Returns a new table with the given tableName, constructed from the given result set
 *
 * @throws SQLException if there is a problem detected in the database
 */
public static Table read(ResultSet resultSet) throws SQLException {
    ResultSetMetaData metaData = resultSet.getMetaData();
    Table table = Table.create();
    // Setup the columns and add to the table
    for (int i = 1; i <= metaData.getColumnCount(); i++) {
        ColumnType type = getColumnType(metaData.getColumnType(i), metaData.getScale(i), metaData.getPrecision(i));
        Preconditions.checkState(type != null, "No column type found for %s as specified for column %s", metaData.getColumnType(i), metaData.getColumnName(i));
        Column<?> newColumn = type.create(metaData.getColumnLabel(i));
        table.addColumns(newColumn);
    }
    // Add the rows
    while (resultSet.next()) {
        for (int i = 1; i <= metaData.getColumnCount(); i++) {
            Column<?> column = // subtract 1 because results sets originate at 1 not 0
            table.column(i - 1);
            if (column instanceof ShortColumn) {
                appendToColumn(column, resultSet, resultSet.getShort(i));
            } else if (column instanceof IntColumn) {
                appendToColumn(column, resultSet, resultSet.getInt(i));
            } else if (column instanceof LongColumn) {
                appendToColumn(column, resultSet, resultSet.getLong(i));
            } else if (column instanceof FloatColumn) {
                appendToColumn(column, resultSet, resultSet.getFloat(i));
            } else if (column instanceof DoubleColumn) {
                appendToColumn(column, resultSet, resultSet.getDouble(i));
            } else if (column instanceof BooleanColumn) {
                appendToColumn(column, resultSet, resultSet.getBoolean(i));
            } else {
                column.appendObj(resultSet.getObject(i));
            }
        }
    }
    return table;
}
Also used : ResultSetMetaData(java.sql.ResultSetMetaData) LongColumn(tech.tablesaw.api.LongColumn) Table(tech.tablesaw.api.Table) ColumnType(tech.tablesaw.api.ColumnType) DoubleColumn(tech.tablesaw.api.DoubleColumn) ShortColumn(tech.tablesaw.api.ShortColumn) BooleanColumn(tech.tablesaw.api.BooleanColumn) FloatColumn(tech.tablesaw.api.FloatColumn) IntColumn(tech.tablesaw.api.IntColumn)

Example 8 with ColumnType

use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.

the class TableSliceGroup method aggregate.

/**
 * Applies the given aggregations to the given columns. The apply and combine steps of a
 * split-apply-combine.
 *
 * @param functions map from column name to aggregation to apply on that function
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public Table aggregate(ListMultimap<String, AggregateFunction<?, ?>> functions) {
    Table groupTable = summaryTableName(sourceTable);
    StringColumn groupColumn = StringColumn.create("Group");
    groupTable.addColumns(groupColumn);
    boolean firstFunction = true;
    for (Map.Entry<String, Collection<AggregateFunction<?, ?>>> entry : functions.asMap().entrySet()) {
        String columnName = entry.getKey();
        for (AggregateFunction function : entry.getValue()) {
            String colName = aggregateColumnName(columnName, function.functionName());
            ColumnType type = function.returnType();
            Column resultColumn = type.create(colName);
            for (TableSlice subTable : getSlices()) {
                Object result = function.summarize(subTable.column(columnName));
                if (firstFunction) {
                    groupColumn.append(subTable.name());
                }
                if (function.returnType().equals(ColumnType.DOUBLE)) {
                    Number number = (Number) result;
                    resultColumn.append(number.doubleValue());
                } else {
                    resultColumn.append(result);
                }
            }
            groupTable.addColumns(resultColumn);
            firstFunction = false;
        }
    }
    return splitGroupingColumn(groupTable);
}
Also used : StringColumn(tech.tablesaw.api.StringColumn) Table(tech.tablesaw.api.Table) ColumnType(tech.tablesaw.api.ColumnType) StringColumn(tech.tablesaw.api.StringColumn) Column(tech.tablesaw.columns.Column) AggregateFunction(tech.tablesaw.aggregate.AggregateFunction) Collection(java.util.Collection) Map(java.util.Map)

Example 9 with ColumnType

use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.

the class TableBuildingUtils method build.

public static Table build(List<String> columnNames, List<String[]> dataRows, ReadOptions options) {
    Table table = Table.create(options.tableName());
    if (dataRows.isEmpty()) {
        return table;
    }
    ColumnTypeDetector detector = new ColumnTypeDetector(options.columnTypesToDetect());
    Iterator<String[]> iterator = dataRows.iterator();
    ColumnType[] types = detector.detectColumnTypes(iterator, options);
    // If there are columnTypes configured by the user use them
    for (int i = 0; i < types.length; i++) {
        boolean hasColumnName = i < columnNames.size();
        Optional<ColumnType> configuredColumnType = options.columnTypeReadOptions().columnType(i, hasColumnName ? columnNames.get(i) : null);
        if (configuredColumnType.isPresent())
            types[i] = configuredColumnType.get();
    }
    for (int i = 0; i < columnNames.size(); i++) {
        table.addColumns(types[i].create(columnNames.get(i)));
    }
    for (int i = 0; i < dataRows.size(); i++) {
        for (int j = 0; j < table.columnCount(); j++) {
            table.column(j).appendCell(dataRows.get(i)[j]);
        }
    }
    return table;
}
Also used : Table(tech.tablesaw.api.Table) ColumnType(tech.tablesaw.api.ColumnType)

Example 10 with ColumnType

use of tech.tablesaw.api.ColumnType in project symja_android_library by axkr.

the class XlsxReader method createTable.

private Table createTable(Sheet sheet, TableRange tableArea, XlsxReadOptions options) {
    Optional<List<String>> optHeaderNames = getHeaderNames(sheet, tableArea);
    optHeaderNames.ifPresent(h -> tableArea.startRow++);
    List<String> headerNames = optHeaderNames.orElse(calculateDefaultColumnNames(tableArea));
    Table table = Table.create(options.tableName() + "#" + sheet.getSheetName());
    List<Column<?>> columns = new ArrayList<>(Collections.nCopies(headerNames.size(), null));
    for (int rowNum = tableArea.startRow; rowNum <= tableArea.endRow; rowNum++) {
        Row row = sheet.getRow(rowNum);
        for (int colNum = 0; colNum < headerNames.size(); colNum++) {
            int excelColNum = colNum + tableArea.startColumn;
            Cell cell = row.getCell(excelColNum, MissingCellPolicy.RETURN_BLANK_AS_NULL);
            Column<?> column = columns.get(colNum);
            String columnName = headerNames.get(colNum);
            if (cell != null) {
                if (column == null) {
                    column = createColumn(colNum, columnName, sheet, excelColNum, tableArea, options);
                    columns.set(colNum, column);
                    while (column.size() < rowNum - tableArea.startRow) {
                        column.appendMissing();
                    }
                }
                Column<?> altColumn = appendValue(column, cell);
                if (altColumn != null && altColumn != column) {
                    column = altColumn;
                    columns.set(colNum, column);
                }
            } else {
                boolean hasCustomizedType = options.columnTypeReadOptions().columnType(colNum, columnName).isPresent();
                if (column == null && hasCustomizedType) {
                    ColumnType columnType = options.columnTypeReadOptions().columnType(colNum, columnName).get();
                    column = columnType.create(columnName).appendMissing();
                    columns.set(colNum, column);
                } else if (hasCustomizedType) {
                    column.appendMissing();
                }
            }
            if (column != null) {
                while (column.size() <= rowNum - tableArea.startRow) {
                    column.appendMissing();
                }
            }
        }
    }
    columns.removeAll(Collections.singleton(null));
    table.addColumns(columns.toArray(new Column<?>[columns.size()]));
    return table;
}
Also used : Table(tech.tablesaw.api.Table) ColumnType(tech.tablesaw.api.ColumnType) ArrayList(java.util.ArrayList) LongColumn(tech.tablesaw.api.LongColumn) DoubleColumn(tech.tablesaw.api.DoubleColumn) Column(tech.tablesaw.columns.Column) ArrayList(java.util.ArrayList) List(java.util.List) Row(org.apache.poi.ss.usermodel.Row) Cell(org.apache.poi.ss.usermodel.Cell)

Aggregations

ColumnType (tech.tablesaw.api.ColumnType)17 Table (tech.tablesaw.api.Table)7 ArrayList (java.util.ArrayList)6 Column (tech.tablesaw.columns.Column)6 List (java.util.List)3 IExpr (org.matheclipse.core.interfaces.IExpr)3 Reader (java.io.Reader)2 Map (java.util.Map)2 NoSuchElementException (java.util.NoSuchElementException)2 IASTAppendable (org.matheclipse.core.interfaces.IASTAppendable)2 DoubleColumn (tech.tablesaw.api.DoubleColumn)2 IntColumn (tech.tablesaw.api.IntColumn)2 LongColumn (tech.tablesaw.api.LongColumn)2 Row (tech.tablesaw.api.Row)2 Objects (com.google.common.base.Objects)1 Strings (com.google.common.base.Strings)1 Lists (com.google.common.collect.Lists)1 Streams (com.google.common.collect.Streams)1 AbstractParser (com.univocity.parsers.common.AbstractParser)1 ResultSetMetaData (java.sql.ResultSetMetaData)1