use of org.hillview.table.Table in project hillview by vmware.
the class HillviewBenchmarks method benchmarkHistogram.
// Testing the performance of histogram computations
private static void benchmarkHistogram(String[] args) throws IOException, InterruptedException {
System.out.println(Arrays.toString(args));
final int runCount = Integer.parseInt(args[1]);
final int parallelism = Integer.parseInt(args[3]);
final double rateParameter = Double.parseDouble(args[4]);
final int datasetScalingParameter = Integer.parseInt(args[5]);
HillviewLogger.instance.setLogLevel(Level.OFF);
final int bucketNum = 40;
final int mega = 1024 * 1024;
final int colSize = 100 * mega / datasetScalingParameter;
final DoubleArrayColumn col = generateDoubleArray(colSize, 100);
IHistogramBuckets buckDes = new DoubleHistogramBuckets(col.getName(), 0, 100, bucketNum);
ITable table = createTable(colSize, col);
TableSketch<Groups<Count>> sk = new HistogramSketch(buckDes).sampled(rateParameter, 0);
System.out.println("Bench,Time (ms),Melems/s,Percent slower");
if (args[0].equals("noseparatethread")) {
final IDataSet<ITable> ds = new LocalDataSet<ITable>(table, false);
Runnable r = () -> ds.blockingSketch(sk);
runNTimes(r, runCount, "Dataset histogram", colSize);
}
if (args[0].equals("separatethread")) {
final IDataSet<ITable> lds = new LocalDataSet<ITable>(table);
Runnable r = () -> lds.blockingSketch(sk);
runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
}
if (args[0].equals("remote")) {
// Setup server
final HostAndPort serverAddress = HostAndPort.fromParts("127.0.0.1", 1234);
final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).mapToObj((i) -> new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)))).collect(Collectors.toList());
final IDataSet<ITable> lds = new ParallelDataSet<>(tables);
new HillviewServer(serverAddress, lds);
// Setup client
final IDataSet<ITable> remoteIds = new RemoteDataSet<ITable>(serverAddress);
Runnable r = () -> remoteIds.blockingSketch(sk);
runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
}
if (args[0].equals("remote-no-memoization")) {
// Setup server
final HostAndPort serverAddress = HostAndPort.fromParts("127.0.0.1", 1234);
final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).mapToObj((i) -> new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)))).collect(Collectors.toList());
final IDataSet<ITable> lds = new ParallelDataSet<ITable>(tables);
final HillviewServer server = new HillviewServer(serverAddress, lds);
server.setMemoization(false);
// Setup client
final IDataSet<ITable> remoteIds = new RemoteDataSet<ITable>(serverAddress);
Runnable r = () -> remoteIds.blockingSketch(sk);
runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
}
if (args[0].equals("remote-no-memoization-nw-server")) {
final HostAndPort serverAddress = HostAndPort.fromParts(args[2], 1234);
// Setup server
final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).parallel().mapToObj((i) -> {
System.out.println("LDS " + i + " " + parallelism);
return new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)));
}).collect(Collectors.toList());
final IDataSet<ITable> lds = new ParallelDataSet<ITable>(tables);
final HillviewServer server = new HillviewServer(serverAddress, lds);
server.setMemoization(false);
Thread.currentThread().join();
}
if (args[0].equals("remote-nw-server")) {
// Setup server
final HostAndPort serverAddress = HostAndPort.fromParts(args[2], 1234);
final List<IDataSet<ITable>> tables = IntStream.range(0, parallelism).mapToObj((i) -> new LocalDataSet<ITable>(createTable(colSize, generateDoubleArray(colSize, 100)))).collect(Collectors.toList());
final IDataSet<ITable> lds = new ParallelDataSet<ITable>(tables);
new HillviewServer(serverAddress, lds);
Thread.currentThread().join();
}
if (args[0].equals("remote-nw-client")) {
final List<IDataSet<ITable>> dataSets = Arrays.stream(args[2].split(",")).map(s -> s + ":1234").map(HostAndPort::fromString).map(RemoteDataSet<ITable>::new).collect(Collectors.toList());
// Setup client
final IDataSet<ITable> remoteIds = new ParallelDataSet<ITable>(dataSets);
Runnable r = () -> remoteIds.blockingSketch(sk);
runNTimes(r, runCount, "Dataset histogram (separate thread)", colSize);
}
System.exit(0);
}
use of org.hillview.table.Table in project hillview by vmware.
the class HillviewBenchmarks method createTable.
private static ITable createTable(final int colSize, final IColumn col) {
FullMembershipSet fMap = new FullMembershipSet(colSize);
List<IColumn> cols = new ArrayList<IColumn>();
cols.add(col);
return new Table(cols, fMap, null, null);
}
use of org.hillview.table.Table in project hillview by vmware.
the class OrcFileLoader method load.
@Override
public ITable load() {
try {
this.hillviewSchema = this.lzschema.getSchema();
Reader reader = OrcFile.createReader(new Path(this.filename), OrcFile.readerOptions(conf));
this.schema = reader.getSchema();
assert this.schema != null;
Table result;
if (this.lazy) {
IColumnLoader lazyLoader = new OrcColumnLoader();
List<ColumnDescription> desc = getDescriptions(this.schema);
if (hillviewSchema != null) {
List<ColumnDescription> imposed = hillviewSchema.getColumnDescriptions();
if (imposed.size() != desc.size())
throw new RuntimeException("Schema in JSON file does not match Orc schema");
desc = imposed;
}
long rowCount = reader.getNumberOfRows();
result = Table.createLazyTable(desc, Converters.toInt(rowCount), this.filename, lazyLoader);
} else {
Reader.Options options = new Reader.Options();
List<IAppendableColumn> cols = readColumns(reader, options, this.hillviewSchema);
this.close(null);
result = new Table(cols, this.filename, null);
}
return result;
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
use of org.hillview.table.Table in project hillview by vmware.
the class ParquetFileLoader method load.
public ITable load() {
ParquetMetadata md = this.metadata;
if (this.lazy) {
ParquetColumnLoader loader = new ParquetColumnLoader();
List<ColumnDescriptor> cds = md.getFileMetaData().getSchema().getColumns();
int size = this.getNumRows();
List<ColumnDescription> desc = Linq.map(cds, ParquetFileLoader::getColumnDescription);
Table result = Table.createLazyTable(desc, size, this.filename, loader);
this.close(null);
return result;
} else {
List<IColumn> cols = this.loadColumns(md);
this.close(null);
return new Table(cols, this.filename, null);
}
}
use of org.hillview.table.Table in project hillview by vmware.
the class JsonFileLoader method load.
public ITable load() {
Schema schema = this.lazySchema.getSchema();
Reader file = this.getFileReader();
JsonReader jReader = new JsonReader(file);
JsonElement elem = Streams.parse(jReader);
if (!elem.isJsonArray())
throw new RuntimeException("Expected a JSON array in " + filename);
JsonArray array = elem.getAsJsonArray();
if (array.size() == 0 && schema == null)
throw new RuntimeException("Empty JSON array in " + filename);
if (schema == null)
schema = this.guessSchema(filename, array.iterator());
IAppendableColumn[] columns = schema.createAppendableColumns();
this.currentRow = 0;
for (JsonElement e : array) this.append(columns, e);
this.close(file);
return new Table(columns, this.filename, null);
}
Aggregations