Search in sources :

Example 1 with VersionParseException

use of org.apache.parquet.VersionParser.VersionParseException in project parquet-mr by apache.

the class VersionTest method testVersionParser.

@Test
public void testVersionParser() throws Exception {
    assertEquals(new ParsedVersion("parquet-mr", "1.6.0", "abcd"), VersionParser.parse("parquet-mr version 1.6.0 (build abcd)"));
    assertEquals(new ParsedVersion("parquet-mr", "1.6.22rc99-SNAPSHOT", "abcd"), VersionParser.parse("parquet-mr version 1.6.22rc99-SNAPSHOT (build abcd)"));
    try {
        VersionParser.parse("unparseable string");
        fail("this should throw");
    } catch (VersionParseException e) {
    // 
    }
    // missing semver
    assertEquals(new ParsedVersion("parquet-mr", null, "abcd"), VersionParser.parse("parquet-mr version (build abcd)"));
    assertEquals(new ParsedVersion("parquet-mr", null, "abcd"), VersionParser.parse("parquet-mr version  (build abcd)"));
    // missing build hash
    assertEquals(new ParsedVersion("parquet-mr", "1.6.0", null), VersionParser.parse("parquet-mr version 1.6.0 (build )"));
    assertEquals(new ParsedVersion("parquet-mr", "1.6.0", null), VersionParser.parse("parquet-mr version 1.6.0 (build)"));
    assertEquals(new ParsedVersion("parquet-mr", null, null), VersionParser.parse("parquet-mr version (build)"));
    assertEquals(new ParsedVersion("parquet-mr", null, null), VersionParser.parse("parquet-mr version (build )"));
    // Missing entire build section
    assertEquals(new ParsedVersion("parquet-mr", "1.6.0", null), VersionParser.parse("parquet-mr version 1.6.0"));
    assertEquals(new ParsedVersion("parquet-mr", "1.8.0rc4", null), VersionParser.parse("parquet-mr version 1.8.0rc4"));
    assertEquals(new ParsedVersion("parquet-mr", "1.8.0rc4-SNAPSHOT", null), VersionParser.parse("parquet-mr version 1.8.0rc4-SNAPSHOT"));
    assertEquals(new ParsedVersion("parquet-mr", null, null), VersionParser.parse("parquet-mr version"));
    // Various spaces
    assertEquals(new ParsedVersion("parquet-mr", "1.6.0", null), VersionParser.parse("parquet-mr     version    1.6.0"));
    assertEquals(new ParsedVersion("parquet-mr", "1.8.0rc4", null), VersionParser.parse("parquet-mr     version    1.8.0rc4"));
    assertEquals(new ParsedVersion("parquet-mr", "1.8.0rc4-SNAPSHOT", null), VersionParser.parse("parquet-mr      version    1.8.0rc4-SNAPSHOT  "));
    assertEquals(new ParsedVersion("parquet-mr", null, null), VersionParser.parse("parquet-mr      version"));
    assertEquals(new ParsedVersion("parquet-mr", "1.6.0", null), VersionParser.parse("parquet-mr version 1.6.0 (  build )"));
    assertEquals(new ParsedVersion("parquet-mr", "1.6.0", null), VersionParser.parse("parquet-mr     version 1.6.0 (    build)"));
    assertEquals(new ParsedVersion("parquet-mr", null, null), VersionParser.parse("parquet-mr     version (    build)"));
    assertEquals(new ParsedVersion("parquet-mr", null, null), VersionParser.parse("parquet-mr    version    (build    )"));
}
Also used : VersionParseException(org.apache.parquet.VersionParser.VersionParseException) ParsedVersion(org.apache.parquet.VersionParser.ParsedVersion) Test(org.junit.Test)

Example 2 with VersionParseException

use of org.apache.parquet.VersionParser.VersionParseException in project parquet-mr by apache.

the class CorruptStatistics method shouldIgnoreStatistics.

/**
 * Decides if the statistics from a file created by createdBy (the created_by field from parquet format)
 * should be ignored because they are potentially corrupt.
 *
 * @param createdBy the created-by string from a file footer
 * @param columnType the type of the column that this is checking
 * @return true if the statistics may be invalid and should be ignored, false otherwise
 */
public static boolean shouldIgnoreStatistics(String createdBy, PrimitiveTypeName columnType) {
    if (columnType != PrimitiveTypeName.BINARY && columnType != PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
        // the bug only applies to binary columns
        return false;
    }
    if (Strings.isNullOrEmpty(createdBy)) {
        // created_by is not populated, which could have been caused by
        // parquet-mr during the same time as PARQUET-251, see PARQUET-297
        warnOnce("Ignoring statistics because created_by is null or empty! See PARQUET-251 and PARQUET-297");
        return true;
    }
    try {
        ParsedVersion version = VersionParser.parse(createdBy);
        if (!"parquet-mr".equals(version.application)) {
            // assume other applications don't have this bug
            return false;
        }
        if (Strings.isNullOrEmpty(version.version)) {
            warnOnce("Ignoring statistics because created_by did not contain a semver (see PARQUET-251): " + createdBy);
            return true;
        }
        SemanticVersion semver = SemanticVersion.parse(version.version);
        if (semver.compareTo(PARQUET_251_FIXED_VERSION) < 0 && !(semver.compareTo(CDH_5_PARQUET_251_FIXED_START) >= 0 && semver.compareTo(CDH_5_PARQUET_251_FIXED_END) < 0)) {
            warnOnce("Ignoring statistics because this file was created prior to " + PARQUET_251_FIXED_VERSION + ", see PARQUET-251");
            return true;
        }
        // this file was created after the fix
        return false;
    } catch (RuntimeException e) {
        // couldn't parse the created_by field, log what went wrong, don't trust the stats,
        // but don't make this fatal.
        warnParseErrorOnce(createdBy, e);
        return true;
    } catch (SemanticVersionParseException e) {
        // couldn't parse the created_by field, log what went wrong, don't trust the stats,
        // but don't make this fatal.
        warnParseErrorOnce(createdBy, e);
        return true;
    } catch (VersionParseException e) {
        // couldn't parse the created_by field, log what went wrong, don't trust the stats,
        // but don't make this fatal.
        warnParseErrorOnce(createdBy, e);
        return true;
    }
}
Also used : SemanticVersionParseException(org.apache.parquet.SemanticVersion.SemanticVersionParseException) SemanticVersionParseException(org.apache.parquet.SemanticVersion.SemanticVersionParseException) VersionParseException(org.apache.parquet.VersionParser.VersionParseException) ParsedVersion(org.apache.parquet.VersionParser.ParsedVersion)

Aggregations

ParsedVersion (org.apache.parquet.VersionParser.ParsedVersion)2 VersionParseException (org.apache.parquet.VersionParser.VersionParseException)2 SemanticVersionParseException (org.apache.parquet.SemanticVersion.SemanticVersionParseException)1 Test (org.junit.Test)1