Search in sources :

Example 1 with ColumnDescription

use of org.hillview.table.ColumnDescription in project hillview by vmware.

the class CreateColumnJSMap method createColumn.

@Override
IColumn createColumn(ITable table) {
    try {
        Context context = Context.newBuilder().allowAllAccess(true).build();
        // Compiles the JS function
        context.eval("js", this.info.jsFunction);
        ColumnDescription outCol = new ColumnDescription(this.info.outputColumn, this.info.outputKind);
        IMutableColumn col;
        // only used for Intervals
        IMutableColumn endCol = null;
        ContentsKind kind = this.info.outputKind;
        IMembershipSet set = table.getMembershipSet();
        if (kind == ContentsKind.Interval) {
            ColumnDescription cd = new ColumnDescription("start", ContentsKind.Double);
            col = BaseColumn.create(cd, set.getMax(), set.getSize());
            endCol = BaseColumn.create(cd, set.getMax(), set.getSize());
        } else {
            col = BaseColumn.create(outCol, set.getMax(), set.getSize());
        }
        table.getLoadedColumns(this.info.schema.getColumnNames());
        JSVirtualRowSnapshot vrs = new JSVirtualRowSnapshot(table, this.info.schema, context);
        ProxyObject vrsProxy = ProxyObject.fromMap(vrs);
        IRowIterator it = table.getMembershipSet().getIterator();
        int r = it.getNextRow();
        Value function = context.eval("js", "vrs => map(vrs)");
        assert function.canExecute();
        while (r >= 0) {
            vrs.setRow(r);
            Value value = function.execute(vrsProxy);
            if (value == null)
                col.setMissing(r);
            else {
                switch(kind) {
                    case None:
                        throw new RuntimeException("Only null values can be stored in this column");
                    case String:
                    case Json:
                        col.set(r, value.toString());
                        break;
                    case Date:
                    case Time:
                        double timestampLocal = value.invokeMember("getTime").asDouble();
                        col.set(r, timestampLocal);
                        break;
                    case LocalDate:
                        double ts = value.invokeMember("getTime").asDouble();
                        // ts is the local time; we have to adjust for the timezone
                        double offset = value.invokeMember("getTimezoneOffset").asDouble();
                        col.set(r, ts - offset * 60 * 1000);
                        break;
                    case Integer:
                        col.set(r, value.asInt());
                        break;
                    case Double:
                    case Duration:
                        col.set(r, value.asDouble());
                        break;
                    case Interval:
                        Value v0 = value.getArrayElement(0);
                        Value v1 = value.getArrayElement(1);
                        col.set(r, v0.asDouble());
                        assert endCol != null;
                        endCol.set(r, v1.asDouble());
                        break;
                    default:
                        throw new RuntimeException("Unhandled kind " + kind);
                }
            }
            r = it.getNextRow();
        }
        if (kind == ContentsKind.Interval)
            return new IntervalColumn(outCol, col, endCol);
        return col.seal();
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
Also used : Context(org.graalvm.polyglot.Context) IntervalColumn(org.hillview.table.columns.IntervalColumn) ColumnDescription(org.hillview.table.ColumnDescription) ProxyObject(org.graalvm.polyglot.proxy.ProxyObject) Value(org.graalvm.polyglot.Value) JSVirtualRowSnapshot(org.hillview.table.rows.JSVirtualRowSnapshot)

Example 2 with ColumnDescription

use of org.hillview.table.ColumnDescription in project hillview by vmware.

the class CreateColumnMap method createColumn.

@Override
public IColumn createColumn(ITable table) {
    IColumn col = table.getLoadedColumn(this.info.inputColumn.name);
    ColumnDescription outputColumn = new ColumnDescription(this.info.outputColumn, ContentsKind.String);
    IMutableColumn outCol = BaseColumn.create(outputColumn, table.getMembershipSet().getMax(), table.getMembershipSet().getSize());
    IRowIterator it = table.getMembershipSet().getIterator();
    int r = it.getNextRow();
    while (r >= 0) {
        String source = col.asString(r);
        String value = this.extract(source);
        outCol.set(r, value);
        r = it.getNextRow();
    }
    return outCol;
}
Also used : ColumnDescription(org.hillview.table.ColumnDescription)

Example 3 with ColumnDescription

use of org.hillview.table.ColumnDescription in project hillview by vmware.

the class CreateIntervalColumnMap method createColumn.

@Override
public IColumn createColumn(ITable table) {
    List<IColumn> cols = table.getLoadedColumns(info.startColName, info.endColName);
    ColumnDescription desc = new ColumnDescription(info.newColName, ContentsKind.Interval);
    IColumn col0 = cols.get(0);
    IColumn col1 = cols.get(1);
    return new IntervalColumn(desc, Converters.checkNull(col0), Converters.checkNull(col1));
}
Also used : IntervalColumn(org.hillview.table.columns.IntervalColumn) IColumn(org.hillview.table.api.IColumn) ColumnDescription(org.hillview.table.ColumnDescription)

Example 4 with ColumnDescription

use of org.hillview.table.ColumnDescription in project hillview by vmware.

the class DPPerfBenchmarks method run.

public void run(HashSet<String> datasets) {
    assert this.ontimeSchema != null;
    ExperimentConfig conf = new ExperimentConfig();
    System.out.println("Type,Column(s),Measurements,Machines,Bucket ct,Iteration,Time (ms)");
    List<ColumnDescription> cols = this.ontimeSchema.getColumnDescriptions();
    for (Dataset d : Arrays.asList(Dataset.Cloud, Dataset.Local, Dataset.DB)) {
        if (!datasets.contains(d.toString()))
            continue;
        conf.dataset = d;
        List<Integer> machines = new ArrayList<Integer>();
        if (d.equals(Dataset.Cloud)) {
            assert this.cloudFlights != null;
            machines.addAll(this.cloudFlights.keySet());
        } else {
            // On local datasets this will always have 1 machine
            machines.add(1);
        }
        if (false) {
            // Vary the columns
            for (int m : machines) {
                conf.machines = m;
                for (ColumnDescription col : cols) {
                    this.allHistograms(col, conf);
                }
                for (int i = 0; i < cols.size() - 1; i++) {
                    ColumnDescription col0 = cols.get(i);
                    ColumnDescription col1 = cols.get(i + 1);
                    this.allHeatmaps(col0, col1, conf);
                }
            }
        } else if (false) {
            // Vary number of buckets for some columns
            if (d.equals(Dataset.Local)) {
                ColumnDescription col = this.ontimeSchema.getDescription("FlightDate");
                ColumnDescription col1 = this.ontimeSchema.getDescription("OriginState");
                for (int buckets = 1; buckets < 1025; buckets *= 2) {
                    conf.bucketCount = buckets;
                    this.allHistograms(col, conf);
                }
                for (int buckets = 1; buckets < 1025; buckets *= 2) {
                    conf.bucketCount = buckets;
                    this.allHeatmaps(col, col1, conf);
                }
            }
        } else {
            // vary quantization intervals
            if (d.equals(Dataset.Local)) {
                ColumnDescription col = this.ontimeSchema.getDescription("DepTime");
                int[] granularity = { 1, 2, 5, 10, 20, 100 };
                PrivacySchema ps = this.flightsWrapper.getPrivacySchema();
                DoubleColumnQuantization q = (DoubleColumnQuantization) ps.quantization(col.name);
                Converters.checkNull(q);
                for (int i : granularity) {
                    // that's a lie
                    conf.machines = i;
                    ps.quantization.add(new DoubleColumnQuantization(col.name, i, q.globalMin, q.globalMax));
                    this.allHistograms(col, conf);
                }
            }
        }
    }
}
Also used : ColumnDescription(org.hillview.table.ColumnDescription) PrivacySchema(org.hillview.table.PrivacySchema) DoubleColumnQuantization(org.hillview.table.columns.DoubleColumnQuantization)

Example 5 with ColumnDescription

use of org.hillview.table.ColumnDescription in project hillview by vmware.

the class OrcFileLoader method load.

@Override
public ITable load() {
    try {
        this.hillviewSchema = this.lzschema.getSchema();
        Reader reader = OrcFile.createReader(new Path(this.filename), OrcFile.readerOptions(conf));
        this.schema = reader.getSchema();
        assert this.schema != null;
        Table result;
        if (this.lazy) {
            IColumnLoader lazyLoader = new OrcColumnLoader();
            List<ColumnDescription> desc = getDescriptions(this.schema);
            if (hillviewSchema != null) {
                List<ColumnDescription> imposed = hillviewSchema.getColumnDescriptions();
                if (imposed.size() != desc.size())
                    throw new RuntimeException("Schema in JSON file does not match Orc schema");
                desc = imposed;
            }
            long rowCount = reader.getNumberOfRows();
            result = Table.createLazyTable(desc, Converters.toInt(rowCount), this.filename, lazyLoader);
        } else {
            Reader.Options options = new Reader.Options();
            List<IAppendableColumn> cols = readColumns(reader, options, this.hillviewSchema);
            this.close(null);
            result = new Table(cols, this.filename, null);
        }
        return result;
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.hillview.table.Table) ColumnDescription(org.hillview.table.ColumnDescription) RecordReader(org.apache.orc.RecordReader) Reader(org.apache.orc.Reader) IOException(java.io.IOException)

Aggregations

ColumnDescription (org.hillview.table.ColumnDescription)83 Test (org.junit.Test)40 BaseTest (org.hillview.test.BaseTest)24 Table (org.hillview.table.Table)20 SQLException (java.sql.SQLException)15 JdbcConnectionInformation (org.hillview.storage.jdbc.JdbcConnectionInformation)15 JdbcDatabase (org.hillview.storage.jdbc.JdbcDatabase)15 ITable (org.hillview.table.api.ITable)14 ArrayList (java.util.ArrayList)13 Schema (org.hillview.table.Schema)12 IColumn (org.hillview.table.api.IColumn)10 IntArrayColumn (org.hillview.table.columns.IntArrayColumn)10 SmallTable (org.hillview.table.SmallTable)7 DoubleColumnQuantization (org.hillview.table.columns.DoubleColumnQuantization)6 LocalDateTime (java.time.LocalDateTime)4 LocalDataSet (org.hillview.dataset.LocalDataSet)4 ColumnSortOrientation (org.hillview.sketches.results.ColumnSortOrientation)4 RecordOrder (org.hillview.table.RecordOrder)4 IRowIterator (org.hillview.table.api.IRowIterator)4 DoubleArrayColumn (org.hillview.table.columns.DoubleArrayColumn)4