Search in sources :

Example 1 with Table

use of org.hillview.table.Table in project hillview by vmware.

the class HillviewBenchmarks method benchmarkHistogram.

// Testing the performance of histogram computations
private static void benchmarkHistogram(String[] args) throws IOException, InterruptedException {
    System.out.println(Arrays.toString(args));
    final int runCount = Integer.parseInt(args[1]);
    final int parallelism = Integer.parseInt(args[3]);
    final double rateParameter = Double.parseDouble(args[4]);
    final int datasetScalingParameter = Integer.parseInt(args[5]);
    HillviewLogger.instance.setLogLevel(Level.OFF);
    final int bucketNum = 40;
    final int mega = 1024 * 1024;
    final int colSize = 100 * mega / datasetScalingParameter;
    final DoubleArrayColumn col = generateDoubleArray(colSize, 100);
    IHistogramBuckets buckDes = new DoubleHistogramBuckets(col.getName(), 0, 100, bucketNum);
    ITable table = createTable(colSize, col);
    TableSketch<Groups<Count>> sk = new HistogramSketch(buckDes).sampled(rateParameter, 0);
    System.out.println("Bench,Time (ms),Melems/s,Percent slower");
    if (args[0].equals("noseparatethread")) {
        final IDataSet<ITable> ds = new LocalDataSet<ITable>(table, false);
        Runnable r = () -> ds.blockingSketch(sk);
        runNTimes(r, runCount, "Dataset histogram", colSize);
    }
    if (args[0].equals("separatethread")) {
        final IDataSet<ITable> lds = new LocalDataSet<ITable>(table);
        Runnable r = () -> lds.blockingSketch(sk);
        runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
    }
    if (args[0].equals("remote")) {
        // Setup server
        final HostAndPort serverAddress = HostAndPort.fromParts("127.0.0.1", 1234);
        final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).mapToObj((i) -> new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)))).collect(Collectors.toList());
        final IDataSet<ITable> lds = new ParallelDataSet<>(tables);
        new HillviewServer(serverAddress, lds);
        // Setup client
        final IDataSet<ITable> remoteIds = new RemoteDataSet<ITable>(serverAddress);
        Runnable r = () -> remoteIds.blockingSketch(sk);
        runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
    }
    if (args[0].equals("remote-no-memoization")) {
        // Setup server
        final HostAndPort serverAddress = HostAndPort.fromParts("127.0.0.1", 1234);
        final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).mapToObj((i) -> new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)))).collect(Collectors.toList());
        final IDataSet<ITable> lds = new ParallelDataSet<ITable>(tables);
        final HillviewServer server = new HillviewServer(serverAddress, lds);
        server.setMemoization(false);
        // Setup client
        final IDataSet<ITable> remoteIds = new RemoteDataSet<ITable>(serverAddress);
        Runnable r = () -> remoteIds.blockingSketch(sk);
        runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
    }
    if (args[0].equals("remote-no-memoization-nw-server")) {
        final HostAndPort serverAddress = HostAndPort.fromParts(args[2], 1234);
        // Setup server
        final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).parallel().mapToObj((i) -> {
            System.out.println("LDS " + i + " " + parallelism);
            return new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)));
        }).collect(Collectors.toList());
        final IDataSet<ITable> lds = new ParallelDataSet<ITable>(tables);
        final HillviewServer server = new HillviewServer(serverAddress, lds);
        server.setMemoization(false);
        Thread.currentThread().join();
    }
    if (args[0].equals("remote-nw-server")) {
        // Setup server
        final HostAndPort serverAddress = HostAndPort.fromParts(args[2], 1234);
        final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).mapToObj((i) -> new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)))).collect(Collectors.toList());
        final IDataSet<ITable> lds = new ParallelDataSet<ITable>(tables);
        new HillviewServer(serverAddress, lds);
        Thread.currentThread().join();
    }
    if (args[0].equals("remote-nw-client")) {
        final List<IDataSet<ITable>> dataSets = Arrays.stream(args[2].split(",")).map(s -> s + ":1234").map(HostAndPort::fromString).map(RemoteDataSet<ITable>::new).collect(Collectors.toList());
        // Setup client
        final IDataSet<ITable> remoteIds = new ParallelDataSet<ITable>(dataSets);
        Runnable r = () -> remoteIds.blockingSketch(sk);
        runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
    }
    System.exit(0);
}
Also used : org.hillview.sketches(org.hillview.sketches) FullMembershipSet(org.hillview.table.membership.FullMembershipSet) IntStream(java.util.stream.IntStream) LocalDataSet(org.hillview.dataset.LocalDataSet) Arrays(java.util.Arrays) RemoteDataSet(org.hillview.dataset.RemoteDataSet) org.hillview.utils(org.hillview.utils) Empty(org.hillview.dataset.api.Empty) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) ClusterConfig(org.hillview.management.ClusterConfig) IDataSet(org.hillview.dataset.api.IDataSet) ITable(org.hillview.table.api.ITable) ContentsKind(org.hillview.table.api.ContentsKind) org.hillview.sketches.results(org.hillview.sketches.results) Nullable(javax.annotation.Nullable) TableSketch(org.hillview.dataset.api.TableSketch) ParallelDataSet(org.hillview.dataset.ParallelDataSet) Table(org.hillview.table.Table) IOException(java.io.IOException) IMap(org.hillview.dataset.api.IMap) Collectors(java.util.stream.Collectors) ParallelizerMap(org.hillview.maps.ParallelizerMap) SetMemoization(org.hillview.management.SetMemoization) List(java.util.List) DoubleArrayColumn(org.hillview.table.columns.DoubleArrayColumn) IColumn(org.hillview.table.api.IColumn) HillviewServer(org.hillview.dataset.remoting.HillviewServer) ColumnDescription(org.hillview.table.ColumnDescription) ParallelDataSet(org.hillview.dataset.ParallelDataSet) DoubleArrayColumn(org.hillview.table.columns.DoubleArrayColumn) RemoteDataSet(org.hillview.dataset.RemoteDataSet) HillviewServer(org.hillview.dataset.remoting.HillviewServer) LocalDataSet(org.hillview.dataset.LocalDataSet) ITable(org.hillview.table.api.ITable) IDataSet(org.hillview.dataset.api.IDataSet)

Example 2 with Table

use of org.hillview.table.Table in project hillview by vmware.

the class HillviewBenchmarks method createTable.

private static ITable createTable(final int colSize, final IColumn col) {
    FullMembershipSet fMap = new FullMembershipSet(colSize);
    List<IColumn> cols = new ArrayList<IColumn>();
    cols.add(col);
    return new Table(cols, fMap, null, null);
}
Also used : ITable(org.hillview.table.api.ITable) Table(org.hillview.table.Table) IColumn(org.hillview.table.api.IColumn) ArrayList(java.util.ArrayList) FullMembershipSet(org.hillview.table.membership.FullMembershipSet)

Example 3 with Table

use of org.hillview.table.Table in project hillview by vmware.

the class OrcFileLoader method load.

@Override
public ITable load() {
    try {
        this.hillviewSchema = this.lzschema.getSchema();
        Reader reader = OrcFile.createReader(new Path(this.filename), OrcFile.readerOptions(conf));
        this.schema = reader.getSchema();
        assert this.schema != null;
        Table result;
        if (this.lazy) {
            IColumnLoader lazyLoader = new OrcColumnLoader();
            List<ColumnDescription> desc = getDescriptions(this.schema);
            if (hillviewSchema != null) {
                List<ColumnDescription> imposed = hillviewSchema.getColumnDescriptions();
                if (imposed.size() != desc.size())
                    throw new RuntimeException("Schema in JSON file does not match Orc schema");
                desc = imposed;
            }
            long rowCount = reader.getNumberOfRows();
            result = Table.createLazyTable(desc, Converters.toInt(rowCount), this.filename, lazyLoader);
        } else {
            Reader.Options options = new Reader.Options();
            List<IAppendableColumn> cols = readColumns(reader, options, this.hillviewSchema);
            this.close(null);
            result = new Table(cols, this.filename, null);
        }
        return result;
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.hillview.table.Table) ColumnDescription(org.hillview.table.ColumnDescription) RecordReader(org.apache.orc.RecordReader) Reader(org.apache.orc.Reader) IOException(java.io.IOException)

Example 4 with Table

use of org.hillview.table.Table in project hillview by vmware.

the class ParquetFileLoader method load.

public ITable load() {
    ParquetMetadata md = this.metadata;
    if (this.lazy) {
        ParquetColumnLoader loader = new ParquetColumnLoader();
        List<ColumnDescriptor> cds = md.getFileMetaData().getSchema().getColumns();
        int size = this.getNumRows();
        List<ColumnDescription> desc = Linq.map(cds, ParquetFileLoader::getColumnDescription);
        Table result = Table.createLazyTable(desc, size, this.filename, loader);
        this.close(null);
        return result;
    } else {
        List<IColumn> cols = this.loadColumns(md);
        this.close(null);
        return new Table(cols, this.filename, null);
    }
}
Also used : Table(org.hillview.table.Table) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ColumnDescription(org.hillview.table.ColumnDescription) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor)

Example 5 with Table

use of org.hillview.table.Table in project hillview by vmware.

the class JsonFileLoader method load.

public ITable load() {
    Schema schema = this.lazySchema.getSchema();
    Reader file = this.getFileReader();
    JsonReader jReader = new JsonReader(file);
    JsonElement elem = Streams.parse(jReader);
    if (!elem.isJsonArray())
        throw new RuntimeException("Expected a JSON array in " + filename);
    JsonArray array = elem.getAsJsonArray();
    if (array.size() == 0 && schema == null)
        throw new RuntimeException("Empty JSON array in " + filename);
    if (schema == null)
        schema = this.guessSchema(filename, array.iterator());
    IAppendableColumn[] columns = schema.createAppendableColumns();
    this.currentRow = 0;
    for (JsonElement e : array) this.append(columns, e);
    this.close(file);
    return new Table(columns, this.filename, null);
}
Also used : JsonArray(com.google.gson.JsonArray) Table(org.hillview.table.Table) JsonElement(com.google.gson.JsonElement) LazySchema(org.hillview.table.LazySchema) Schema(org.hillview.table.Schema) JsonReader(com.google.gson.stream.JsonReader) JsonReader(com.google.gson.stream.JsonReader)

Aggregations

Table (org.hillview.table.Table)57 Test (org.junit.Test)38 BaseTest (org.hillview.test.BaseTest)36 ITable (org.hillview.table.api.ITable)33 ColumnDescription (org.hillview.table.ColumnDescription)21 SmallTable (org.hillview.table.SmallTable)15 IColumn (org.hillview.table.api.IColumn)14 ArrayList (java.util.ArrayList)10 RowSnapshot (org.hillview.table.rows.RowSnapshot)9 LocalDataSet (org.hillview.dataset.LocalDataSet)8 ColumnSortOrientation (org.hillview.sketches.results.ColumnSortOrientation)8 RecordOrder (org.hillview.table.RecordOrder)8 FindSketch (org.hillview.sketches.FindSketch)7 LazySchema (org.hillview.table.LazySchema)7 Schema (org.hillview.table.Schema)7 StringFilterDescription (org.hillview.table.filters.StringFilterDescription)7 IRowIterator (org.hillview.table.api.IRowIterator)6 VirtualRowSnapshot (org.hillview.table.rows.VirtualRowSnapshot)5 FilterMap (org.hillview.maps.FilterMap)4 OrcFileLoader (org.hillview.storage.OrcFileLoader)4