use of org.apache.spark.sql.types.LongType$ in project iceberg by apache.
the class Spark3Util method findWidth.
@SuppressWarnings("unchecked")
private static int findWidth(Transform transform) {
for (Expression expr : transform.arguments()) {
if (expr instanceof Literal) {
if (((Literal) expr).dataType() instanceof IntegerType) {
Literal<Integer> lit = (Literal<Integer>) expr;
Preconditions.checkArgument(lit.value() > 0, "Unsupported width for transform: %s", transform.describe());
return lit.value();
} else if (((Literal) expr).dataType() instanceof LongType) {
Literal<Long> lit = (Literal<Long>) expr;
Preconditions.checkArgument(lit.value() > 0 && lit.value() < Integer.MAX_VALUE, "Unsupported width for transform: %s", transform.describe());
if (lit.value() > Integer.MAX_VALUE) {
throw new IllegalArgumentException();
}
return lit.value().intValue();
}
}
}
throw new IllegalArgumentException("Cannot find width for transform: " + transform.describe());
}
use of org.apache.spark.sql.types.LongType$ in project OpenLineage by OpenLineage.
the class DeltaDataSourceTest method testInsertIntoDeltaSource.
@Test
public void testInsertIntoDeltaSource(@TempDir Path tempDir, SparkSession spark) throws IOException, InterruptedException, TimeoutException {
StructType tableSchema = new StructType(new StructField[] { new StructField("name", StringType$.MODULE$, false, Metadata.empty()), new StructField("age", LongType$.MODULE$, false, Metadata.empty()) });
Dataset<Row> df = spark.createDataFrame(Arrays.asList(new GenericRowWithSchema(new Object[] { "john", 25L }, tableSchema), new GenericRowWithSchema(new Object[] { "sam", 22L }, tableSchema), new GenericRowWithSchema(new Object[] { "alicia", 35L }, tableSchema), new GenericRowWithSchema(new Object[] { "bob", 47L }, tableSchema), new GenericRowWithSchema(new Object[] { "jordan", 52L }, tableSchema), new GenericRowWithSchema(new Object[] { "liz", 19L }, tableSchema), new GenericRowWithSchema(new Object[] { "marcia", 83L }, tableSchema), new GenericRowWithSchema(new Object[] { "maria", 40L }, tableSchema), new GenericRowWithSchema(new Object[] { "luis", 8L }, tableSchema), new GenericRowWithSchema(new Object[] { "gabriel", 30L }, tableSchema)), tableSchema);
String deltaDir = tempDir.resolve("deltaData").toAbsolutePath().toString();
df.write().format("delta").option("path", deltaDir).mode(SaveMode.Overwrite).save();
// wait for event processing to complete
StaticExecutionContextFactory.waitForExecutionEnd();
ArgumentCaptor<RunEvent> lineageEvent = ArgumentCaptor.forClass(OpenLineage.RunEvent.class);
Mockito.verify(SparkAgentTestExtension.OPEN_LINEAGE_SPARK_CONTEXT, Mockito.atLeast(2)).emit(lineageEvent.capture());
List<RunEvent> events = lineageEvent.getAllValues();
Optional<RunEvent> completionEvent = events.stream().filter(e -> e.getEventType().equals(EventType.COMPLETE) && !e.getOutputs().isEmpty()).findFirst();
assertTrue(completionEvent.isPresent());
OpenLineage.RunEvent event = completionEvent.get();
List<OpenLineage.OutputDataset> outputs = event.getOutputs();
assertEquals(1, outputs.size());
assertEquals("file", outputs.get(0).getNamespace());
assertEquals(deltaDir, outputs.get(0).getName());
}
use of org.apache.spark.sql.types.LongType$ in project spark-bigquery-connector by GoogleCloudDataproc.
the class AvroSchemaConverter method createConverterFor.
static Converter createConverterFor(DataType sparkType, Schema avroType) {
if (sparkType instanceof NullType && avroType.getType() == Schema.Type.NULL) {
return (getter, ordinal) -> null;
}
if (sparkType instanceof BooleanType && avroType.getType() == Schema.Type.BOOLEAN) {
return (getter, ordinal) -> getter.getBoolean(ordinal);
}
if (sparkType instanceof ByteType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> Long.valueOf(getter.getByte(ordinal));
}
if (sparkType instanceof ShortType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> Long.valueOf(getter.getShort(ordinal));
}
if (sparkType instanceof IntegerType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> Long.valueOf(getter.getInt(ordinal));
}
if (sparkType instanceof LongType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> getter.getLong(ordinal);
}
if (sparkType instanceof FloatType && avroType.getType() == Schema.Type.DOUBLE) {
return (getter, ordinal) -> Double.valueOf(getter.getFloat(ordinal));
}
if (sparkType instanceof DoubleType && avroType.getType() == Schema.Type.DOUBLE) {
return (getter, ordinal) -> getter.getDouble(ordinal);
}
if (sparkType instanceof DecimalType && avroType.getType() == Schema.Type.BYTES) {
DecimalType decimalType = (DecimalType) sparkType;
return (getter, ordinal) -> {
Decimal decimal = getter.getDecimal(ordinal, decimalType.precision(), decimalType.scale());
return DECIMAL_CONVERSIONS.toBytes(decimal.toJavaBigDecimal(), avroType, LogicalTypes.decimal(decimalType.precision(), decimalType.scale()));
};
}
if (sparkType instanceof StringType && avroType.getType() == Schema.Type.STRING) {
return (getter, ordinal) -> new Utf8(getter.getUTF8String(ordinal).getBytes());
}
if (sparkType instanceof BinaryType && avroType.getType() == Schema.Type.FIXED) {
int size = avroType.getFixedSize();
return (getter, ordinal) -> {
byte[] data = getter.getBinary(ordinal);
if (data.length != size) {
throw new IllegalArgumentException(String.format("Cannot write %s bytes of binary data into FIXED Type with size of %s bytes", data.length, size));
}
return new GenericData.Fixed(avroType, data);
};
}
if (sparkType instanceof BinaryType && avroType.getType() == Schema.Type.BYTES) {
return (getter, ordinal) -> ByteBuffer.wrap(getter.getBinary(ordinal));
}
if (sparkType instanceof DateType && avroType.getType() == Schema.Type.INT) {
return (getter, ordinal) -> getter.getInt(ordinal);
}
if (sparkType instanceof TimestampType && avroType.getType() == Schema.Type.LONG) {
return (getter, ordinal) -> getter.getLong(ordinal);
}
if (sparkType instanceof ArrayType && avroType.getType() == Schema.Type.ARRAY) {
DataType et = ((ArrayType) sparkType).elementType();
boolean containsNull = ((ArrayType) sparkType).containsNull();
Converter elementConverter = createConverterFor(et, resolveNullableType(avroType.getElementType(), containsNull));
return (getter, ordinal) -> {
ArrayData arrayData = getter.getArray(ordinal);
int len = arrayData.numElements();
Object[] result = new Object[len];
for (int i = 0; i < len; i++) {
if (containsNull && arrayData.isNullAt(i)) {
result[i] = null;
} else {
result[i] = elementConverter.convert(arrayData, i);
}
}
// `ArrayList` backed by the specified array without data copying.
return java.util.Arrays.asList(result);
};
}
if (sparkType instanceof StructType && avroType.getType() == Schema.Type.RECORD) {
StructType sparkStruct = (StructType) sparkType;
StructConverter structConverter = new StructConverter(sparkStruct, avroType);
int numFields = sparkStruct.length();
return (getter, ordinal) -> structConverter.convert(getter.getStruct(ordinal, numFields));
}
if (sparkType instanceof UserDefinedType) {
UserDefinedType userDefinedType = (UserDefinedType) sparkType;
return createConverterFor(userDefinedType.sqlType(), avroType);
}
throw new IllegalArgumentException(String.format("Cannot convert Catalyst type %s to Avro type %s", sparkType, avroType));
}
use of org.apache.spark.sql.types.LongType$ in project spark-bigquery-connector by GoogleCloudDataproc.
the class AvroSchemaConverter method sparkTypeToRawAvroType.
static Schema sparkTypeToRawAvroType(DataType dataType, String recordName, SchemaBuilder.TypeBuilder<Schema> builder) {
if (dataType instanceof BinaryType) {
return builder.bytesType();
}
if (dataType instanceof ByteType || dataType instanceof ShortType || dataType instanceof IntegerType || dataType instanceof LongType) {
return builder.longType();
}
if (dataType instanceof BooleanType) {
return builder.booleanType();
}
if (dataType instanceof FloatType || dataType instanceof DoubleType) {
return builder.doubleType();
}
if (dataType instanceof DecimalType) {
DecimalType decimalType = (DecimalType) dataType;
if (decimalType.precision() <= SchemaConverters.BQ_NUMERIC_PRECISION && decimalType.scale() <= SchemaConverters.BQ_NUMERIC_SCALE) {
return LogicalTypes.decimal(decimalType.precision(), decimalType.scale()).addToSchema(builder.bytesType());
} else {
throw new IllegalArgumentException("Decimal type is too wide to fit in BigQuery Numeric format");
}
}
if (dataType instanceof StringType) {
return builder.stringType();
}
if (dataType instanceof TimestampType) {
// team adds microsecond support to their backend
return LogicalTypes.timestampMicros().addToSchema(builder.longType());
}
if (dataType instanceof DateType) {
return LogicalTypes.date().addToSchema(builder.intType());
}
if (dataType instanceof ArrayType) {
return builder.array().items(sparkTypeToRawAvroType(((ArrayType) dataType).elementType(), ((ArrayType) dataType).containsNull(), recordName));
}
if (dataType instanceof StructType) {
SchemaBuilder.FieldAssembler<Schema> fieldsAssembler = builder.record(recordName).fields();
for (StructField field : ((StructType) dataType).fields()) {
Schema avroType = sparkTypeToRawAvroType(field.dataType(), field.nullable(), field.name());
fieldsAssembler.name(field.name()).type(avroType).noDefault();
}
return fieldsAssembler.endRecord();
}
if (dataType instanceof UserDefinedType) {
DataType userDefinedType = ((UserDefinedType) dataType).sqlType();
return sparkTypeToRawAvroType(userDefinedType, recordName, builder);
}
if (dataType instanceof MapType) {
throw new IllegalArgumentException(SchemaConverters.MAPTYPE_ERROR_MESSAGE);
} else {
throw new IllegalArgumentException("Data type not supported: " + dataType.simpleString());
}
}
use of org.apache.spark.sql.types.LongType$ in project carbondata by apache.
the class CarbonColumnVectorWrapper method convertSparkToCarbonDataType.
// TODO: this is copied from carbondata-spark-common module, use presto type instead of this
private org.apache.carbondata.core.metadata.datatype.DataType convertSparkToCarbonDataType(org.apache.spark.sql.types.DataType dataType) {
if (dataType instanceof StringType) {
return DataTypes.STRING;
} else if (dataType instanceof ShortType) {
return DataTypes.SHORT;
} else if (dataType instanceof IntegerType) {
return DataTypes.INT;
} else if (dataType instanceof LongType) {
return DataTypes.LONG;
} else if (dataType instanceof DoubleType) {
return DataTypes.DOUBLE;
} else if (dataType instanceof FloatType) {
return DataTypes.FLOAT;
} else if (dataType instanceof DateType) {
return DataTypes.DATE;
} else if (dataType instanceof BooleanType) {
return DataTypes.BOOLEAN;
} else if (dataType instanceof TimestampType) {
return DataTypes.TIMESTAMP;
} else if (dataType instanceof NullType) {
return DataTypes.NULL;
} else if (dataType instanceof DecimalType) {
DecimalType decimal = (DecimalType) dataType;
return DataTypes.createDecimalType(decimal.precision(), decimal.scale());
} else if (dataType instanceof ArrayType) {
org.apache.spark.sql.types.DataType elementType = ((ArrayType) dataType).elementType();
return DataTypes.createArrayType(convertSparkToCarbonDataType(elementType));
} else if (dataType instanceof StructType) {
StructType structType = (StructType) dataType;
org.apache.spark.sql.types.StructField[] fields = structType.fields();
List<StructField> carbonFields = new ArrayList<>();
for (org.apache.spark.sql.types.StructField field : fields) {
carbonFields.add(new StructField(field.name(), convertSparkToCarbonDataType(field.dataType())));
}
return DataTypes.createStructType(carbonFields);
} else {
throw new UnsupportedOperationException("getting " + dataType + " from presto");
}
}
Aggregations