Search in sources :

Example 11 with Column

use of tech.tablesaw.columns.Column in project symja_android_library by axkr.

the class Table method cast.

/**
 * Cast implements the 'tidy' cast operation as described in these papers by Hadley Wickham:
 *
 * <p>Cast takes a table in 'molten' format, such as is produced by the {@link #melt(List, List,
 * Boolean)} t} method, and returns a version in standard tidy format.
 *
 * <p>The molten table should have a StringColumn called "variable" and a column called "value"
 * Every unique variable name will become a column in the output table.
 *
 * <p>All other columns in this table are considered identifier variable. Each combination of
 * identifier variables specifies an observation, so there will be one row for each, with the
 * other variables added.
 *
 * <p>Variable columns are returned in an arbitrary order. Use {@link #reorderColumns(String...)}
 * if column order is important.
 *
 * <p>Tidy concepts: {@see https://www.jstatsoft.org/article/view/v059i10}
 *
 * <p>Cast function details: {@see https://www.jstatsoft.org/article/view/v021i12}
 */
@Beta
public Table cast() {
    StringColumn variableNames = stringColumn(MELT_VARIABLE_COLUMN_NAME);
    List<Column<?>> idColumns = columnList.stream().filter(column -> !column.name().equals(MELT_VARIABLE_COLUMN_NAME) && !column.name().equals(MELT_VALUE_COLUMN_NAME)).collect(toList());
    Table result = Table.create(name);
    for (Column<?> idColumn : idColumns) {
        result.addColumns(idColumn.type().create(idColumn.name()));
    }
    StringColumn uniqueVariableNames = variableNames.unique();
    for (String varName : uniqueVariableNames) {
        result.addColumns(DoubleColumn.create(varName));
    }
    TableSliceGroup slices = splitOn(idColumns.stream().map(Column::name).toArray(String[]::new));
    for (TableSlice slice : slices) {
        Table sliceTable = slice.asTable();
        for (Column<?> idColumn : idColumns) {
            final ColumnType columnType = idColumn.type();
            if (columnType.equals(ColumnType.STRING)) {
                StringColumn source = (StringColumn) sliceTable.column(idColumn.name());
                StringColumn dest = (StringColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.TEXT)) {
                TextColumn source = (TextColumn) sliceTable.column(idColumn.name());
                TextColumn dest = (TextColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.INTEGER)) {
                IntColumn source = (IntColumn) sliceTable.column(idColumn.name());
                IntColumn dest = (IntColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.LONG)) {
                LongColumn source = (LongColumn) sliceTable.column(idColumn.name());
                LongColumn dest = (LongColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.SHORT)) {
                ShortColumn source = (ShortColumn) sliceTable.column(idColumn.name());
                ShortColumn dest = (ShortColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.BOOLEAN)) {
                BooleanColumn source = (BooleanColumn) sliceTable.column(idColumn.name());
                BooleanColumn dest = (BooleanColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.LOCAL_DATE)) {
                DateColumn source = (DateColumn) sliceTable.column(idColumn.name());
                DateColumn dest = (DateColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.LOCAL_DATE_TIME)) {
                DateTimeColumn source = (DateTimeColumn) sliceTable.column(idColumn.name());
                DateTimeColumn dest = (DateTimeColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.INSTANT)) {
                InstantColumn source = (InstantColumn) sliceTable.column(idColumn.name());
                InstantColumn dest = (InstantColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            } else if (columnType.equals(ColumnType.LOCAL_TIME)) {
                TimeColumn source = (TimeColumn) sliceTable.column(idColumn.name());
                TimeColumn dest = (TimeColumn) result.column(idColumn.name());
                dest.append(source.get(0));
            }
        }
        for (String varName : uniqueVariableNames) {
            DoubleColumn dest = (DoubleColumn) result.column(varName);
            Table sliceRow = sliceTable.where(sliceTable.stringColumn(MELT_VARIABLE_COLUMN_NAME).isEqualTo(varName));
            if (!sliceRow.isEmpty()) {
                dest.append(sliceRow.doubleColumn(MELT_VALUE_COLUMN_NAME).get(0));
            } else {
                dest.appendMissing();
            }
        }
    }
    return result;
}
Also used : ClassGraph(io.github.classgraph.ClassGraph) Arrays(java.util.Arrays) DataFrameWriter(tech.tablesaw.io.DataFrameWriter) IntComparatorChain(tech.tablesaw.sorting.comparators.IntComparatorChain) AggregateFunction(tech.tablesaw.aggregate.AggregateFunction) WriterRegistry(tech.tablesaw.io.WriterRegistry) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BitmapBackedSelection(tech.tablesaw.selection.BitmapBackedSelection) Selection(tech.tablesaw.selection.Selection) AggregateFunctions.countMissing(tech.tablesaw.aggregate.AggregateFunctions.countMissing) DataFrameReader(tech.tablesaw.io.DataFrameReader) QuerySupport.not(tech.tablesaw.api.QuerySupport.not) Column(tech.tablesaw.columns.Column) SortUtils(tech.tablesaw.sorting.SortUtils) IntComparator(it.unimi.dsi.fastutil.ints.IntComparator) NoSuchElementException(java.util.NoSuchElementException) ScanResult(io.github.classgraph.ScanResult) IntFunction(java.util.function.IntFunction) Iterator(java.util.Iterator) DataWriter(tech.tablesaw.io.DataWriter) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Summarizer(tech.tablesaw.aggregate.Summarizer) Selection.selectNRowsAtRandom(tech.tablesaw.selection.Selection.selectNRowsAtRandom) Streams(com.google.common.collect.Streams) Ints(com.google.common.primitives.Ints) Collectors(java.util.stream.Collectors) ReaderRegistry(tech.tablesaw.io.ReaderRegistry) PivotTable(tech.tablesaw.aggregate.PivotTable) Beta(com.google.common.annotations.Beta) Consumer(java.util.function.Consumer) DataReader(tech.tablesaw.io.DataReader) tech.tablesaw.table(tech.tablesaw.table) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) Sort(tech.tablesaw.sorting.Sort) Stream(java.util.stream.Stream) DataFrameJoiner(tech.tablesaw.joining.DataFrameJoiner) Preconditions(com.google.common.base.Preconditions) CrossTab(tech.tablesaw.aggregate.CrossTab) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) PivotTable(tech.tablesaw.aggregate.PivotTable) Column(tech.tablesaw.columns.Column) Beta(com.google.common.annotations.Beta)

Example 12 with Column

use of tech.tablesaw.columns.Column in project symja_android_library by axkr.

the class AnalyticQueryEngine method processNumberingFunctions.

/**
 * Execute all numbering functions for the given slice setting values in the appropriate
 * destination column.
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void processNumberingFunctions(TableSlice slice) {
    for (String toColumn : query.getArgumentList().getNumberingFunctions().keySet()) {
        if (rowComparator == null) {
            throw new IllegalArgumentException("Cannot use Numbering Function without OrderBy");
        }
        FunctionCall<NumberingFunctions> functionCall = query.getArgumentList().getNumberingFunctions().get(toColumn);
        NumberingFunctions numberingFunctions = functionCall.getFunction();
        NumberingFunction function = numberingFunctions.getImplementation();
        Column<Integer> destinationColumn = (Column<Integer>) destination.column(functionCall.getDestinationColumnName());
        int prevRowNumber = -1;
        // Slice has already been ordered.
        for (Row row : slice) {
            if (row.getRowNumber() == 0) {
                function.addNextRow();
            } else {
                // Consecutive rows are equal.
                if (rowComparator.compare(slice.mappedRowNumber(prevRowNumber), slice.mappedRowNumber(row.getRowNumber())) == 0) {
                    function.addEqualRow();
                } else {
                    // Consecutive rows are not equal.
                    function.addNextRow();
                }
            }
            prevRowNumber = row.getRowNumber();
            // Set the row number in the destination that corresponds to the row in the view.
            destinationColumn.set(slice.mappedRowNumber(row.getRowNumber()), function.getValue());
        }
    }
}
Also used : Column(tech.tablesaw.columns.Column) Row(tech.tablesaw.api.Row)

Example 13 with Column

use of tech.tablesaw.columns.Column in project symja_android_library by axkr.

the class XlsxReader method createTable.

private Table createTable(Sheet sheet, TableRange tableArea, XlsxReadOptions options) {
    Optional<List<String>> optHeaderNames = getHeaderNames(sheet, tableArea);
    optHeaderNames.ifPresent(h -> tableArea.startRow++);
    List<String> headerNames = optHeaderNames.orElse(calculateDefaultColumnNames(tableArea));
    Table table = Table.create(options.tableName() + "#" + sheet.getSheetName());
    List<Column<?>> columns = new ArrayList<>(Collections.nCopies(headerNames.size(), null));
    for (int rowNum = tableArea.startRow; rowNum <= tableArea.endRow; rowNum++) {
        Row row = sheet.getRow(rowNum);
        for (int colNum = 0; colNum < headerNames.size(); colNum++) {
            int excelColNum = colNum + tableArea.startColumn;
            Cell cell = row.getCell(excelColNum, MissingCellPolicy.RETURN_BLANK_AS_NULL);
            Column<?> column = columns.get(colNum);
            String columnName = headerNames.get(colNum);
            if (cell != null) {
                if (column == null) {
                    column = createColumn(colNum, columnName, sheet, excelColNum, tableArea, options);
                    columns.set(colNum, column);
                    while (column.size() < rowNum - tableArea.startRow) {
                        column.appendMissing();
                    }
                }
                Column<?> altColumn = appendValue(column, cell);
                if (altColumn != null && altColumn != column) {
                    column = altColumn;
                    columns.set(colNum, column);
                }
            } else {
                boolean hasCustomizedType = options.columnTypeReadOptions().columnType(colNum, columnName).isPresent();
                if (column == null && hasCustomizedType) {
                    ColumnType columnType = options.columnTypeReadOptions().columnType(colNum, columnName).get();
                    column = columnType.create(columnName).appendMissing();
                    columns.set(colNum, column);
                } else if (hasCustomizedType) {
                    column.appendMissing();
                }
            }
            if (column != null) {
                while (column.size() <= rowNum - tableArea.startRow) {
                    column.appendMissing();
                }
            }
        }
    }
    columns.removeAll(Collections.singleton(null));
    table.addColumns(columns.toArray(new Column<?>[columns.size()]));
    return table;
}
Also used : Table(tech.tablesaw.api.Table) ColumnType(tech.tablesaw.api.ColumnType) ArrayList(java.util.ArrayList) LongColumn(tech.tablesaw.api.LongColumn) DoubleColumn(tech.tablesaw.api.DoubleColumn) Column(tech.tablesaw.columns.Column) ArrayList(java.util.ArrayList) List(java.util.List) Row(org.apache.poi.ss.usermodel.Row) Cell(org.apache.poi.ss.usermodel.Cell)

Example 14 with Column

use of tech.tablesaw.columns.Column in project symja_android_library by axkr.

the class FileReader method parseRows.

protected Table parseRows(ReadOptions options, boolean headerOnly, Reader reader, ReadOptions.ColumnTypeReadOptions columnTypeReadOptions, AbstractParser<?> parser, int sampleSize) {
    parser.beginParsing(reader);
    Table table = Table.create(options.tableName());
    List<String> headerRow = Lists.newArrayList(getColumnNames(options, columnTypeReadOptions, parser));
    @SuppressWarnings({ "UnstableApiUsage", "OptionalGetWithoutIsPresent" }) ColumnType[] types = Streams.mapWithIndex(headerRow.stream(), (columnName, idx) -> columnTypeReadOptions.columnType((int) idx, columnName)).filter(Optional::isPresent).map(Optional::get).toArray(ColumnType[]::new);
    for (int x = 0; x < types.length; x++) {
        if (types[x] != SKIP) {
            String columnName = cleanName(headerRow.get(x));
            if (Strings.isNullOrEmpty(columnName)) {
                columnName = "Column " + table.columnCount();
            }
            Column<?> newColumn = types[x].create(columnName);
            table.addColumns(newColumn);
        }
    }
    if (!headerOnly) {
        String[] columnNames = selectColumnNames(headerRow, types);
        int[] columnIndexes = new int[columnNames.length];
        for (int i = 0; i < columnIndexes.length; i++) {
            // get the index in the original table, which includes skipped fields
            columnIndexes[i] = headerRow.indexOf(columnNames[i]);
        }
        addRows(options, types, parser, table, columnIndexes, sampleSize);
    }
    return table;
}
Also used : Logger(org.slf4j.Logger) ColumnType(tech.tablesaw.api.ColumnType) AbstractParser(com.univocity.parsers.common.AbstractParser) Iterator(java.util.Iterator) Table(tech.tablesaw.api.Table) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Random(java.util.Random) Reader(java.io.Reader) Streams(com.google.common.collect.Streams) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) List(java.util.List) Lists(com.google.common.collect.Lists) AbstractColumnParser(tech.tablesaw.columns.AbstractColumnParser) Map(java.util.Map) Column(tech.tablesaw.columns.Column) SKIP(tech.tablesaw.api.ColumnType.SKIP) Optional(java.util.Optional) NoSuchElementException(java.util.NoSuchElementException) Table(tech.tablesaw.api.Table) ColumnType(tech.tablesaw.api.ColumnType) Optional(java.util.Optional)

Example 15 with Column

use of tech.tablesaw.columns.Column in project symja_android_library by axkr.

the class ASTDataset method newListOfAssociations.

/**
 * Create a <code>Dataset</code> object from a <code>List(...)</code> of associations. Each
 * association represents a row in the <code>Dataset</code>. The left-hand-side of each singular
 * rule in an association was assumed to be the name of the resulting dataset columns. Identical
 * names maps the right-hand-side values of the rule to the same columns in the resulting <code>
 * Dataset
 * </code>.
 *
 * @param listOfAssociations
 * @return {@link F#NIL} if the <code>Dataset</code> cannot be created
 */
public static IExpr newListOfAssociations(IAST listOfAssociations) {
    // 1. phase: build up column names
    List<String> colNames = new ArrayList<String>();
    Set<String> colNamesSet = new HashSet<String>();
    for (int i = 1; i < listOfAssociations.size(); i++) {
        IAssociation assoc = (IAssociation) listOfAssociations.get(i);
        for (int j = 1; j < assoc.size(); j++) {
            IAST rule = assoc.getRule(j);
            String columnName = rule.first().toString();
            if (!colNamesSet.contains(columnName)) {
                colNamesSet.add(columnName);
                colNames.add(columnName);
            }
        }
    }
    if (colNames.size() > 0) {
        // 2. phase: define the columns
        Table table = Table.create();
        Column<?>[] cols = new Column<?>[colNames.size()];
        for (int i = 0; i < colNames.size(); i++) {
            cols[i] = ExprColumn.create(colNames.get(i));
        }
        table.addColumns(cols);
        // 3. phase: add the values
        for (int i = 1; i < listOfAssociations.size(); i++) {
            IAssociation assoc = (IAssociation) listOfAssociations.get(i);
            Row row = table.appendRow();
            for (int j = 1; j < assoc.size(); j++) {
                IAST rule = assoc.getRule(j);
                String columnName = rule.first().toString();
                IExpr value = rule.second();
                row.setExpr(columnName, value);
            }
        }
        return newTablesawTable(table);
    }
    return F.NIL;
}
Also used : IAssociation(org.matheclipse.core.interfaces.IAssociation) Table(tech.tablesaw.api.Table) ArrayList(java.util.ArrayList) Column(tech.tablesaw.columns.Column) ExprColumn(tech.tablesaw.api.ExprColumn) IAST(org.matheclipse.core.interfaces.IAST) Row(tech.tablesaw.api.Row) IExpr(org.matheclipse.core.interfaces.IExpr) HashSet(java.util.HashSet)

Aggregations

Column (tech.tablesaw.columns.Column)17 ArrayList (java.util.ArrayList)7 Table (tech.tablesaw.api.Table)7 ColumnType (tech.tablesaw.api.ColumnType)6 Row (tech.tablesaw.api.Row)4 List (java.util.List)3 IExpr (org.matheclipse.core.interfaces.IExpr)3 ExprColumn (tech.tablesaw.api.ExprColumn)3 Streams (com.google.common.collect.Streams)2 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Iterator (java.util.Iterator)2 Map (java.util.Map)2 NoSuchElementException (java.util.NoSuchElementException)2 Beta (com.google.common.annotations.Beta)1 Objects (com.google.common.base.Objects)1 Preconditions (com.google.common.base.Preconditions)1 Strings (com.google.common.base.Strings)1 Lists (com.google.common.collect.Lists)1