Search in sources :

Example 1 with StringColumnStatisticsData

use of com.amazonaws.services.glue.model.StringColumnStatisticsData in project alluxio by Alluxio.

the class GlueUtilsTest method verifyColumnStats.

private void verifyColumnStats(ColumnStatistics glueColStats) {
    ColumnStatisticsInfo colStats = GlueUtils.toProto(glueColStats);
    assertEquals(glueColStats.getColumnName(), colStats.getColName());
    assertEquals(glueColStats.getColumnType(), colStats.getColType());
    // verify empty ColumnStatisticData
    if (glueColStats.getStatisticsData() == null) {
        assertEquals(glueColStats.getStatisticsData() == null && glueColStats.getStatisticsData().getType() != null, colStats.hasData());
    }
    if (glueColStats.getStatisticsData() != null) {
        ColumnStatisticsData glueData = glueColStats.getStatisticsData();
        alluxio.grpc.table.ColumnStatisticsData data = colStats.getData();
        // verify boolean
        if (glueData.getBooleanColumnStatisticsData() != null) {
            assertEquals(glueData.getType(), "BOOLEAN");
            BooleanColumnStatisticsData glueBoolean = glueData.getBooleanColumnStatisticsData();
            assertEquals(glueBoolean.getNumberOfFalses() != null && glueBoolean.getNumberOfTrues() != null && glueBoolean.getNumberOfNulls() != null, data.hasBooleanStats());
            if (data.hasBooleanStats()) {
                alluxio.grpc.table.BooleanColumnStatsData boolData = data.getBooleanStats();
                assertEquals(glueBoolean.getNumberOfFalses().longValue(), boolData.getNumFalses());
                assertEquals(glueBoolean.getNumberOfTrues().longValue(), boolData.getNumTrues());
                assertEquals(glueBoolean.getNumberOfNulls().longValue(), boolData.getNumNulls());
            }
        }
        // verify date
        if (glueData.getDateColumnStatisticsData() != null) {
            assertEquals(glueData.getType(), "DATE");
            DateColumnStatisticsData glueDate = glueData.getDateColumnStatisticsData();
            assertEquals(glueDate.getNumberOfDistinctValues() != null && glueDate.getNumberOfNulls() != null, data.hasDateStats());
            if (data.hasDateStats()) {
                alluxio.grpc.table.DateColumnStatsData date = data.getDateStats();
                assertEquals(glueDate.getNumberOfDistinctValues().longValue(), date.getNumDistincts());
                assertEquals(glueDate.getNumberOfNulls().longValue(), date.getNumNulls());
                assertEquals(glueDate.getMaximumValue() != null, date.hasHighValue());
                if (glueDate.getMaximumValue() != null) {
                    assertEquals(glueDate.getMaximumValue().getTime(), date.getHighValue().getDaysSinceEpoch());
                }
                assertEquals(glueDate.getMinimumValue() != null, date.hasLowValue());
                if (glueDate.getMinimumValue() != null) {
                    assertEquals(glueDate.getMinimumValue().getTime(), date.getLowValue().getDaysSinceEpoch());
                }
            }
        }
        // verify decimal
        if (glueData.getDecimalColumnStatisticsData() != null) {
            assertEquals(glueData.getType(), "DECIMAL");
            DecimalColumnStatisticsData glueDecimal = glueData.getDecimalColumnStatisticsData();
            assertEquals(glueDecimal.getNumberOfDistinctValues() != null && glueDecimal.getNumberOfNulls() != null, data.hasDecimalStats());
            if (data.hasDecimalStats()) {
                alluxio.grpc.table.DecimalColumnStatsData decimal = data.getDecimalStats();
                assertEquals(glueDecimal.getNumberOfDistinctValues().longValue(), decimal.getNumDistincts());
                assertEquals(glueDecimal.getNumberOfNulls().longValue(), decimal.getNumNulls());
                assertEquals(glueDecimal.getMaximumValue() != null, decimal.hasHighValue());
                if (glueDecimal.getMaximumValue() != null) {
                    assertEquals(glueDecimal.getMaximumValue().getScale().longValue(), decimal.getHighValue().getScale());
                    assertArrayEquals(glueDecimal.getMaximumValue().getUnscaledValue().array(), decimal.getHighValue().getUnscaled().toByteArray());
                }
                assertEquals(glueDecimal.getMinimumValue() != null, decimal.hasLowValue());
                if (glueDecimal.getMinimumValue() != null) {
                    assertEquals(glueDecimal.getMinimumValue().getScale().longValue(), decimal.getLowValue().getScale());
                    assertArrayEquals(glueDecimal.getMinimumValue().getUnscaledValue().array(), decimal.getLowValue().getUnscaled().toByteArray());
                }
            }
        }
        // verify double
        if (glueData.getDoubleColumnStatisticsData() != null) {
            assertEquals(glueData.getType(), "DOUBLE");
            DoubleColumnStatisticsData glueDouble = glueData.getDoubleColumnStatisticsData();
            assertEquals(glueDouble.getNumberOfDistinctValues() != null && glueDouble.getNumberOfNulls() != null, data.hasDoubleStats());
            if (data.hasDoubleStats()) {
                alluxio.grpc.table.DoubleColumnStatsData doubleData = data.getDoubleStats();
                assertEquals(glueDouble.getNumberOfDistinctValues().longValue(), doubleData.getNumDistincts());
                assertEquals(glueDouble.getNumberOfNulls().longValue(), doubleData.getNumNulls());
                assertEquals(glueDouble.getMaximumValue() != null, doubleData.hasHighValue());
                if (glueDouble.getMaximumValue() != null) {
                    assertEquals(glueDouble.getMaximumValue().doubleValue(), doubleData.getHighValue(), 0.01);
                }
                assertEquals(glueDouble.getMinimumValue() != null, doubleData.hasLowValue());
                if (glueDouble.getMinimumValue() != null) {
                    assertEquals(glueDouble.getMinimumValue().doubleValue(), doubleData.getLowValue(), 0.01);
                }
            }
        }
        // verify long
        if (glueData.getLongColumnStatisticsData() != null) {
            assertEquals(glueData.getType(), "LONG");
            LongColumnStatisticsData glueLong = glueData.getLongColumnStatisticsData();
            assertEquals(glueLong.getNumberOfDistinctValues() != null && glueLong.getNumberOfNulls() != null, data.hasLongStats());
            if (data.hasLongStats()) {
                alluxio.grpc.table.LongColumnStatsData longData = data.getLongStats();
                assertEquals(glueLong.getNumberOfDistinctValues().longValue(), longData.getNumDistincts());
                assertEquals(glueLong.getNumberOfNulls().longValue(), longData.getNumNulls());
                assertEquals(glueLong.getMaximumValue() != null, longData.hasHighValue());
                if (glueLong.getMaximumValue() != null) {
                    assertEquals(glueLong.getMaximumValue().longValue(), longData.getHighValue());
                }
                assertEquals(glueLong.getMinimumValue() != null, longData.hasLowValue());
                if (glueLong.getMinimumValue() != null) {
                    assertEquals(glueLong.getMinimumValue().longValue(), longData.getLowValue());
                }
            }
        }
        // verify string
        if (glueData.getStringColumnStatisticsData() != null) {
            assertEquals(glueData.getType(), "STRING");
            StringColumnStatisticsData glueString = glueData.getStringColumnStatisticsData();
            assertEquals(glueString.getNumberOfDistinctValues() != null && glueString.getNumberOfNulls() != null && glueString.getMaximumLength() != null && glueString.getAverageLength() != null, data.hasStringStats());
            if (data.hasStringStats()) {
                alluxio.grpc.table.StringColumnStatsData stringData = data.getStringStats();
                assertEquals(glueString.getNumberOfDistinctValues().longValue(), stringData.getNumDistincts());
                assertEquals(glueString.getNumberOfNulls().longValue(), stringData.getNumNulls());
                assertEquals(glueString.getMaximumLength().longValue(), stringData.getMaxColLen());
                assertEquals(glueString.getAverageLength().doubleValue(), stringData.getAvgColLen(), 0.01);
            }
        }
        // verify binary
        if (glueData.getBinaryColumnStatisticsData() != null) {
            assertEquals(glueData.getType(), "BINARY");
            BinaryColumnStatisticsData glueBinary = glueData.getBinaryColumnStatisticsData();
            assertEquals(glueBinary.getAverageLength() != null && glueBinary.getMaximumLength() != null && glueBinary.getNumberOfNulls() != null, data.hasBinaryStats());
            if (data.hasBinaryStats()) {
                alluxio.grpc.table.BinaryColumnStatsData binary = data.getBinaryStats();
                assertEquals(glueBinary.getAverageLength().doubleValue(), binary.getAvgColLen(), 0.01);
                assertEquals(glueBinary.getMaximumLength().longValue(), binary.getMaxColLen());
                assertEquals(glueBinary.getNumberOfNulls().longValue(), binary.getNumNulls());
            }
        }
    }
}
Also used : BooleanColumnStatisticsData(com.amazonaws.services.glue.model.BooleanColumnStatisticsData) BinaryColumnStatisticsData(com.amazonaws.services.glue.model.BinaryColumnStatisticsData) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) ColumnStatisticsData(com.amazonaws.services.glue.model.ColumnStatisticsData) BooleanColumnStatisticsData(com.amazonaws.services.glue.model.BooleanColumnStatisticsData) BinaryColumnStatisticsData(com.amazonaws.services.glue.model.BinaryColumnStatisticsData) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData)

Example 2 with StringColumnStatisticsData

use of com.amazonaws.services.glue.model.StringColumnStatisticsData in project alluxio by Alluxio.

the class GlueUtilsTest method protoColStatsWithStringData.

@Test
public void protoColStatsWithStringData() {
    // ColumnStatistics required fields: AnalyzedTime, ColumnName, ColumnType, StatisticsData
    ColumnStatistics glueColStats = new ColumnStatistics();
    glueColStats.setColumnName("colName");
    glueColStats.setColumnType("colType");
    ColumnStatisticsData data = new ColumnStatisticsData();
    // verify empty data
    data.setType("STRING");
    glueColStats.setStatisticsData(data);
    verifyColumnStats(glueColStats);
    // verify non-empty data
    StringColumnStatisticsData stringData = new StringColumnStatisticsData();
    stringData.setMaximumLength(mRandom.nextLong());
    stringData.setAverageLength(mRandom.nextDouble());
    stringData.setNumberOfNulls(mRandom.nextLong());
    stringData.setNumberOfDistinctValues(mRandom.nextLong());
    data.setStringColumnStatisticsData(stringData);
    glueColStats.setStatisticsData(data);
    verifyColumnStats(glueColStats);
}
Also used : ColumnStatistics(com.amazonaws.services.glue.model.ColumnStatistics) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) ColumnStatisticsData(com.amazonaws.services.glue.model.ColumnStatisticsData) BooleanColumnStatisticsData(com.amazonaws.services.glue.model.BooleanColumnStatisticsData) BinaryColumnStatisticsData(com.amazonaws.services.glue.model.BinaryColumnStatisticsData) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData) Test(org.junit.Test)

Aggregations

BinaryColumnStatisticsData (com.amazonaws.services.glue.model.BinaryColumnStatisticsData)2 BooleanColumnStatisticsData (com.amazonaws.services.glue.model.BooleanColumnStatisticsData)2 ColumnStatisticsData (com.amazonaws.services.glue.model.ColumnStatisticsData)2 DateColumnStatisticsData (com.amazonaws.services.glue.model.DateColumnStatisticsData)2 DecimalColumnStatisticsData (com.amazonaws.services.glue.model.DecimalColumnStatisticsData)2 DoubleColumnStatisticsData (com.amazonaws.services.glue.model.DoubleColumnStatisticsData)2 LongColumnStatisticsData (com.amazonaws.services.glue.model.LongColumnStatisticsData)2 StringColumnStatisticsData (com.amazonaws.services.glue.model.StringColumnStatisticsData)2 ColumnStatisticsInfo (alluxio.grpc.table.ColumnStatisticsInfo)1 ColumnStatistics (com.amazonaws.services.glue.model.ColumnStatistics)1 Test (org.junit.Test)1