use of javax.measure.format.ParserException in project sis by apache.
the class UnitFormat method parse.
/**
* Parses a portion of the given text as an instance of {@code Unit}.
* Parsing begins at the index given by {@link ParsePosition#getIndex()}.
* After parsing, the above-cited index is updated to the first unparsed character.
*
* <p>The parsing is lenient: symbols can be products or quotients of units like “m∕s”,
* words like “meters per second”, or authority codes like {@code "urn:ogc:def:uom:EPSG::1026"}.
* The product operator can be either {@code '.'} (ASCII) or {@code '⋅'} (Unicode) character.
* Exponent after symbol can be decimal digits as in “m2” or a superscript as in “m²”.</p>
*
* <p>Note that contrarily to {@link #parseObject(String, ParsePosition)}, this method never return {@code null}.
* If an error occurs at parsing time, an unchecked {@link ParserException} is thrown.</p>
*
* @param symbols the unit symbols to parse.
* @param position on input, index of the first character to parse.
* On output, index after the last parsed character.
* @return the unit parsed from the specified symbols.
* @throws ParserException if a problem occurred while parsing the given symbols.
*/
@SuppressWarnings({ "null", "fallthrough" })
public Unit<?> parse(CharSequence symbols, final ParsePosition position) throws ParserException {
ArgumentChecks.ensureNonNull("symbols", symbols);
ArgumentChecks.ensureNonNull("position", position);
/*
* Check for authority codes (currently only EPSG, but more could be added later).
* Example: "urn:ogc:def:uom:EPSG::9001". If the unit is not an authority code
* (which is the most common case), only then we will parse the unit symbols.
*/
int end = symbols.length();
int start = CharSequences.skipLeadingWhitespaces(symbols, position.getIndex(), end);
int endOfURI = XPaths.endOfURI(symbols, start);
if (endOfURI >= 0) {
final String uom = symbols.subSequence(start, endOfURI).toString();
String code = DefinitionURI.codeOf("uom", Constants.EPSG, uom);
/*
* DefinitionURI.codeOf(…) returns 'uom' directly (provided that whitespaces were already trimmed)
* if no ':' character were found, in which case the string is assumed to be the code directly.
* This is the intended behavior for AuthorityFactory, but in the particular case of this method
* we want to try to parse as a xpointer before to give up.
*/
if (code != null && code != uom) {
NumberFormatException failure = null;
try {
final Unit<?> unit = Units.valueOfEPSG(Integer.parseInt(code));
if (unit != null) {
position.setIndex(endOfURI);
return unit;
}
} catch (NumberFormatException e) {
failure = e;
}
throw (ParserException) new ParserException(Errors.format(Errors.Keys.UnknownUnit_1, Constants.EPSG + Citations.DEFAULT_SEPARATOR + code), symbols, start + Math.max(0, uom.lastIndexOf(code))).initCause(failure);
}
/*
* Not an EPSG code. Maybe it is a URI like this example:
* http://schemas.opengis.net/iso/19139/20070417/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])
*
* If we find such 'uom' value, we could replace 'symbols' by that 'uom'. But it would cause a wrong
* error index to be reported in case of parsing failure. We will rather try to adjust the indices
* (and replace 'symbols' only in last resort).
*/
code = XPaths.xpointer("uom", uom);
if (code != null) {
final int base = start;
start = endOfURI - code.length();
do if (--start < base) {
// Should never happen (see above comment), but we are paranoiac.
symbols = code;
start = 0;
break;
} while (!CharSequences.regionMatches(symbols, start, code));
end = start + code.length();
} else {
endOfURI = -1;
}
}
/*
* Split the unit around the multiplication and division operators and parse each term individually.
* Note that exponentation need to be kept as part of a single unit symbol.
*
* The 'start' variable is the index of the first character of the next unit term to parse.
*/
// Enumeration value: IMPLICIT, MULTIPLY, DIVIDE.
int operation = NOOP;
Unit<?> unit = null;
boolean hasSpaces = false;
int i = start;
scan: for (int n; i < end; i += n) {
final int c = Character.codePointAt(symbols, i);
n = Character.charCount(c);
final int next;
switch(c) {
/*
* For any character that are is not an operator or parenthesis, either continue the scanning of
* characters or stop it, depending on whether the character is valid for a unit symbol or not.
* In the later case, we consider that we reached the end of a unit symbol.
*/
default:
{
if (AbstractUnit.isSymbolChar(c)) {
if (operation == IMPLICIT) {
operation = MULTIPLY;
}
continue;
}
if (Character.isDigit(c) || Characters.isSuperScript(c)) {
continue;
}
if (Character.isSpaceChar(c)) {
// NOT Character.isWhitespace(int)
hasSpaces = true;
continue;
}
break scan;
}
/*
* Star is for exponentiation in UCUM syntax, but some symbols may use it for unit multiplication.
* We interpret the symbol as a multiplication if the characters before or after it seem to be for
* a unit symbol.
*/
case Style.EXPONENT_OR_MULTIPLY:
{
final int w = exponentOperator(symbols, i, end);
if (w < 0) {
next = MULTIPLY;
break;
}
i += w;
// else fall through.
}
case Style.EXPONENT:
{
if (operation == IMPLICIT) {
// Support of exponentiation after parenthesis is not yet supported.
break scan;
}
continue;
}
/*
* The period is the multiplication operator in UCUM format. According UCUM there is no ambiguity
* with the decimal separator since unit terms should not contain floating point numbers. However
* we relax this rule in order to support scale factor of angular units (e.g. π/180). The period
* is interpreted as a decimal separator if there is a decimal digit before and after it.
*/
case '.':
if (isDecimalSeparator(symbols, i, end))
continue;
// Fall through
case '⋅':
case '×':
next = MULTIPLY;
break;
case '÷':
// Fraction slash
case '⁄':
case '/':
case '∕':
next = DIVIDE;
break;
/*
* If we find an '(' parenthesis, invoke recursively this method for the part inside parenthesis.
* The parsing should end at the ')' parenthesis since it is not a valid unit symbol. If we do not
* find that closing parenthesis, this will be considered an error.
*/
case Style.OPEN:
{
final ParsePosition sub = new ParsePosition(i + Character.charCount(c));
final Unit<?> term = parse(symbols, sub);
i = CharSequences.skipLeadingWhitespaces(symbols, sub.getIndex(), end);
if (i >= end || Character.codePointAt(symbols, i) != Style.CLOSE) {
throw new ParserException(Errors.format(Errors.Keys.NonEquilibratedParenthesis_2, symbols.subSequence(start, i), Style.CLOSE), symbols, start);
}
unit = apply(operation, unit, term);
// Default operation if there is no × or / symbols after parenthesis.
operation = IMPLICIT;
// Skip the number of characters in the '(' Unicode code point.
start = i + (n = 1);
continue;
}
}
/*
* At this point, we have either a first unit to parse (NOOP), or a multiplication or division to apply
* between the previously parsed units and the next unit to parse. A special case is IMPLICIT, which is
* a multiplication without explicit × symbol after the parenthesis. The implicit multiplication can be
* overridden by an explicit × or / symbol, which is what happened if we reach this point (tip: look in
* the above 'switch' statement all cases that end with 'break', not 'break scan' or 'continue').
*/
if (operation != IMPLICIT) {
unit = apply(operation, unit, parseTerm(symbols, start, i));
}
hasSpaces = false;
operation = next;
start = i + n;
}
/*
* At this point we either found an unrecognized character or reached the end of string. We will
* parse the remaining characters as a unit and apply the pending unit operation (multiplication
* or division). But before, we need to check if the parsing should stop at the first whitespace.
* This verification assumes that spaces are allowed only in labels specified by the label(…)
* method and in resource bundles, not in labels specified by AbstractUnit.alternate(String).
*/
Unit<?> component = null;
if (hasSpaces) {
end = i;
start = CharSequences.skipLeadingWhitespaces(symbols, start, i);
search: while ((i = CharSequences.skipTrailingWhitespaces(symbols, start, i)) > start) {
final String uom = symbols.subSequence(start, i).toString();
if ((component = labelToUnit.get(uom)) != null)
break;
if ((component = fromName(uom)) != null)
break;
int j = i, c;
do {
c = Character.codePointBefore(symbols, j);
j -= Character.charCount(c);
if (j <= start)
break search;
} while (!Character.isWhitespace(c));
/*
* Really use Character.isWhitespace(c) above, not Character.isSpaceChar(c), because we want
* to exclude non-breaking spaces. This block should be the only place in UnitFormat class
* where we use isWhitespace(c) instead of isSpaceChar(c).
*/
// Will become the index of first space after search loop completion.
i = j;
}
/*
* At this point we did not found any user-specified label or localized name matching the substring.
* Assume that the parsing should stop at the first space, on the basis that spaces are not allowed
* in unit symbols. We make an exception if we detect that the part before the first space contains
* digits (not allowed in unit symbols neither), in which case the substring may be something like
* "100 feet".
*/
if (hasDigit(symbols, start, i)) {
// Restore the full length (until the first illegal character).
i = end;
}
}
if (component == null) {
component = parseTerm(symbols, start, i);
}
unit = apply(operation, unit, component);
position.setIndex(endOfURI >= 0 ? endOfURI : i);
return unit;
}
use of javax.measure.format.ParserException in project sis by apache.
the class EPSGDataAccess method createUnit.
/**
* Creates an unit of measurement from a code.
* Current implementation first checks if {@link Units#valueOfEPSG(int)} can provide a hard-coded unit
* for the given code before to try to parse the information found in the database. This is done that
* way for better support of non-straightforward units like <cite>sexagesimal degrees</cite>
* (EPSG:9110 and 9111).
*
* <div class="note"><b>Example:</b>
* some EPSG codes for units are:
*
* <table class="sis" summary="EPSG codes examples">
* <tr><th>Code</th> <th>Description</th></tr>
* <tr><td>9002</td> <td>decimal degree</td></tr>
* <tr><td>9001</td> <td>metre</td></tr>
* <tr><td>9030</td> <td>kilometre</td></tr>
* <tr><td>1040</td> <td>second</td></tr>
* <tr><td>1029</td> <td>year</td></tr>
* </table></div>
*
* @param code value allocated by EPSG.
* @return the unit of measurement for the given code.
* @throws NoSuchAuthorityCodeException if the specified {@code code} was not found.
* @throws FactoryException if the object creation failed for some other reason.
*/
@Override
public synchronized Unit<?> createUnit(final String code) throws NoSuchAuthorityCodeException, FactoryException {
ArgumentChecks.ensureNonNull("code", code);
Unit<?> returnValue = null;
try (ResultSet result = executeQuery("Unit of Measure", "UOM_CODE", "UNIT_OF_MEAS_NAME", "SELECT UOM_CODE," + " FACTOR_B," + " FACTOR_C," + " TARGET_UOM_CODE," + " UNIT_OF_MEAS_NAME" + " FROM [Unit of Measure]" + " WHERE UOM_CODE = ?", code)) {
while (result.next()) {
final int source = getInteger(code, result, 1);
final double b = getOptionalDouble(result, 2);
final double c = getOptionalDouble(result, 3);
final int target = getInteger(code, result, 4);
if (source == target) {
/*
* The unit is a base unit. Verify its consistency:
* conversion from 'source' to itself shall be the identity function.
*/
final boolean pb = (b != 1);
if (pb || c != 1) {
throw new FactoryDataException(error().getString(Errors.Keys.InconsistentAttribute_2, pb ? "FACTOR_B" : "FACTOR_C", pb ? b : c));
}
}
// Check in our list of hard-coded unit codes.
Unit<?> unit = Units.valueOfEPSG(source);
if (unit == null) {
final Unit<?> base = Units.valueOfEPSG(target);
if (base != null && !Double.isNaN(b) && !Double.isNaN(c)) {
// May be NaN if the conversion is non-linear.
unit = Units.multiply(base, b, c);
} else
try {
// Try parsing the unit symbol as a fallback.
unit = Units.valueOf(getString(code, result, 5));
} catch (ParserException e) {
throw new FactoryDataException(error().getString(Errors.Keys.UnknownUnit_1, code), e);
}
}
returnValue = ensureSingleton(unit, returnValue, code);
}
} catch (SQLException exception) {
throw databaseFailure(Unit.class, code, exception);
}
if (returnValue == null) {
throw noSuchAuthorityCode(Unit.class, code);
}
return returnValue;
}
use of javax.measure.format.ParserException in project uom-se by unitsofmeasurement.
the class UnitFormatTest method testParseLocal.
@Test(expected = UnsupportedOperationException.class)
public void testParseLocal() {
final UnitFormat format = LocalUnitFormat.getInstance();
try {
Unit<?> u = format.parse("min");
assertEquals("min", u.getSymbol());
} catch (ParserException e) {
fail(e.getMessage());
}
}
use of javax.measure.format.ParserException in project indriya by unitsofmeasurement.
the class LocalUnitFormat method parse.
public Unit<?> parse(CharSequence csq, ParsePosition cursor) throws ParserException {
// Parsing reads the whole character sequence from the parse position.
int start = cursor.getIndex();
int end = csq.length();
if (end <= start) {
return AbstractUnit.ONE;
}
String source = csq.subSequence(start, end).toString().trim();
if (source.length() == 0) {
return AbstractUnit.ONE;
}
try {
LocalUnitFormatParser parser = new LocalUnitFormatParser(symbolMap, new StringReader(source));
Unit<?> result = parser.parseUnit();
cursor.setIndex(end);
return result;
} catch (TokenException e) {
if (e.currentToken != null) {
cursor.setErrorIndex(start + e.currentToken.endColumn);
} else {
cursor.setErrorIndex(start);
}
// TODO should we throw
throw new IllegalArgumentException(e);
// ParserException here,
// too?
} catch (TokenMgrError e) {
cursor.setErrorIndex(start);
throw new ParserException(e);
}
}
use of javax.measure.format.ParserException in project indriya by unitsofmeasurement.
the class UnitFormatTest method testParseLocal.
@Test
public void testParseLocal() {
final UnitFormat format = LocalUnitFormat.getInstance();
assertThrows(UnsupportedOperationException.class, () -> {
try {
Unit<?> u = format.parse("min");
assertEquals("min", u.getSymbol());
} catch (ParserException e) {
fail(e.getMessage());
}
});
}
Aggregations