use of org.apache.arrow.vector.types.pojo.Schema in project parquet-mr by apache.
the class SchemaConverter method fromParquet.
/**
* Creates an Arrow Schema from an Parquet one and returns the mapping
* @param parquetSchema the provided Parquet Schema
* @return the mapping between the 2
*/
public SchemaMapping fromParquet(MessageType parquetSchema) {
List<Type> fields = parquetSchema.getFields();
List<TypeMapping> mappings = fromParquet(fields);
List<Field> arrowFields = fields(mappings);
return new SchemaMapping(new Schema(arrowFields), parquetSchema, mappings);
}
use of org.apache.arrow.vector.types.pojo.Schema in project flink by apache.
the class ArrowUtilsTest method testConvertBetweenLogicalTypeAndArrowType.
@Test
public void testConvertBetweenLogicalTypeAndArrowType() {
Schema schema = ArrowUtils.toArrowSchema(rowType);
assertEquals(testFields.size(), schema.getFields().size());
List<Field> fields = schema.getFields();
for (int i = 0; i < schema.getFields().size(); i++) {
// verify convert from RowType to ArrowType
assertEquals(testFields.get(i).f0, fields.get(i).getName());
assertEquals(testFields.get(i).f2, fields.get(i).getType());
}
}
use of org.apache.arrow.vector.types.pojo.Schema in project twister2 by DSC-SPIDAL.
the class BTAllToAll method execute.
@Override
public void execute(Config config, JobAPI.Job job, IWorkerController workerController, IPersistentVolume persistentVolume, IVolatileVolume volatileVolume) {
this.jobParameters = JobParameters.build(config);
// create a worker environment
this.wEnv = WorkerEnvironment.init(config, job, workerController, persistentVolume, volatileVolume);
LogicalPlanBuilder logicalPlanBuilder = LogicalPlanBuilder.plan(jobParameters.getSources(), jobParameters.getTargets(), wEnv).withFairDistribution();
RootAllocator rootAllocator = new RootAllocator();
IntVector intVector = new IntVector("fist", rootAllocator);
Float8Vector float8Vector = new Float8Vector("second", rootAllocator);
for (int i = 0; i < 1000; i++) {
intVector.setSafe(i, i);
float8Vector.setSafe(i, i);
}
intVector.setValueCount(1000);
float8Vector.setValueCount(1000);
List<Field> fieldList = Arrays.asList(intVector.getField(), float8Vector.getField());
Schema schema = new Schema(fieldList);
Table t = new ArrowTable(schema, Arrays.asList(new FieldVector[] { intVector, float8Vector }));
allToAll = new ArrowAllToAll(wEnv.getConfig(), wEnv.getWorkerController(), logicalPlanBuilder.getSources(), logicalPlanBuilder.getTargets(), logicalPlanBuilder.build(), wEnv.getCommunicator().nextEdge(), new ArrowReceiver(), schema, rootAllocator);
for (int i : logicalPlanBuilder.getTargets()) {
allToAll.insert(t, i);
}
for (int s : logicalPlanBuilder.getSourcesOnThisWorker()) {
allToAll.finish(s);
}
while (!allToAll.isComplete()) {
// wait
}
}
use of org.apache.arrow.vector.types.pojo.Schema in project twister2 by DSC-SPIDAL.
the class ArrowTSetSourceExample method execute.
@Override
public void execute(WorkerEnvironment workerEnv) {
BatchEnvironment env = TSetEnvironment.initBatch(workerEnv);
Config config = env.getConfig();
String csvInputDirectory = config.getStringValue(DataObjectConstants.DINPUT_DIRECTORY);
String arrowInputDirectory = config.getStringValue(DataObjectConstants.ARROW_DIRECTORY);
String arrowFileName = config.getStringValue(DataObjectConstants.FILE_NAME);
int workers = config.getIntegerValue(DataObjectConstants.WORKERS);
int parallel = config.getIntegerValue(DataObjectConstants.PARALLELISM_VALUE);
int dsize = config.getIntegerValue(DataObjectConstants.DSIZE);
LOG.info("arrow input file:" + arrowFileName + "\t" + arrowInputDirectory + "\t" + csvInputDirectory + "\t" + workers + "\t" + parallel);
Schema schema = makeSchema();
SourceTSet<String[]> csvSource = env.createCSVSource(csvInputDirectory, dsize, parallel, "split");
SinkTSet<Iterator<Integer>> sinkTSet = csvSource.direct().map((MapFunc<String[], Integer>) input -> Integer.parseInt(input[0])).direct().sink(new ArrowBasedSinkFunction<>(arrowInputDirectory, arrowFileName, schema.toJson()));
env.run(sinkTSet);
// Source Function Call
env.createArrowSource(arrowInputDirectory, arrowFileName, parallel, schema.toJson()).direct().compute((ComputeFunc<Iterator<Object>, List<Integer>>) input -> {
List<Integer> integers = new ArrayList<>();
input.forEachRemaining(i -> integers.add((Integer) i));
return integers;
}).direct().forEach(s -> LOG.info("Integer Array Size:" + s.size() + "\tvalues:" + s));
}
use of org.apache.arrow.vector.types.pojo.Schema in project twister2 by DSC-SPIDAL.
the class ArrowTSetSourceExample method makeSchema.
private Schema makeSchema() {
ImmutableList.Builder<Field> builder = ImmutableList.builder();
builder.add(new Field("int", FieldType.nullable(new ArrowType.Int(32, true)), null));
// builder.add(new Field("long", FieldType.nullable(new ArrowType.Int(64, true)), null));
return new Schema(builder.build(), null);
}
Aggregations