Skip to content

Commit d1866c3

Browse files
committed
Merge branch '2.12'
2 parents eaabb43 + 7ca2baa commit d1866c3

File tree

3 files changed

+278
-39
lines changed

3 files changed

+278
-39
lines changed

release-notes/VERSION-2.x

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Modules:
1010

1111
2.12.0 (not yet released)
1212

13+
#71: (yaml) Hex number as an entry of an Object causing problem(s) with binding to POJO
1314
#175: (yaml) Add `YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR` to indent by 2 spaces
1415
(requested by Jesper N; fix contributed by Damian S)
1516
#199: (csv) Empty Lists can only be String-typed in CSV
@@ -20,9 +21,10 @@ Modules:
2021
#226: (yaml) Quote 'y'/'Y'/'n'/'N' as names too (to avoid problems with Boolean keys)
2122
(requested by pnepywoda@github)
2223
#229: (yaml) Allow configuring the way "must quote" is determined for property names, String values
23-
#231: Typed object with anchor throws Already had POJO for id (note: actual
24+
#231: (yaml) Typed object with anchor throws Already had POJO for id (note: actual
2425
fix in `jackson-annotations`)
2526
(reported by almson@github)
27+
#233: (yaml) Support decoding Binary, Octal and Hex numbers as integers
2628
- Add configurability of "YAML version generator is to follow" via "YAMLFactory.builder()"
2729
- SnakeYAML 1.26 -> 1.27
2830
- Add Gradle Module Metadata (https://blog.gradle.org/alignment-with-gradle-module-metadata)

yaml/src/main/java/com/fasterxml/jackson/dataformat/yaml/YAMLParser.java

Lines changed: 175 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import java.math.BigDecimal;
55
import java.math.BigInteger;
66
import java.util.Optional;
7-
import java.util.regex.Pattern;
87

98
import org.snakeyaml.engine.v2.api.LoadSettings;
109
import org.snakeyaml.engine.v2.common.Anchor;
@@ -76,12 +75,10 @@ private Feature(boolean defaultState) {
7675
// note: does NOT include '0', handled separately
7776
// private final static Pattern PATTERN_INT = Pattern.compile("-?[1-9][0-9]*");
7877

79-
/**
80-
* We will use pattern that is bit stricter than YAML definition,
81-
* but we will still allow things like extra '_' in there.
82-
*/
83-
private final static Pattern PATTERN_FLOAT = Pattern.compile(
84-
"[-+]?([0-9][0-9_]*)?\\.[0-9]*([eE][-+][0-9]+)?");
78+
// 22-Nov-2020, tatu: Not needed as of 2.12 since SnakeYAML tags
79+
// doubles correctly
80+
// private final static Pattern PATTERN_FLOAT = Pattern.compile(
81+
// "[-+]?([0-9][0-9_]*)?\\.[0-9]*([eE][-+][0-9]+)?");
8582

8683
/*
8784
/**********************************************************************
@@ -528,25 +525,16 @@ protected Boolean _matchYAMLBoolean(String value, int len)
528525

529526
protected JsonToken _decodeNumberScalar(String value, final int len)
530527
{
531-
if ("0".equals(value)) { // special case for regexp (can't take minus etc)
532-
_numberNegative = false;
533-
_numberInt = 0;
534-
_numTypesValid = NR_INT;
535-
return JsonToken.VALUE_NUMBER_INT;
536-
}
537-
/* 05-May-2012, tatu: Turns out this is a hot spot; so let's write it
538-
* out and avoid regexp overhead...
539-
*/
528+
// 05-May-2012, tatu: Turns out this is a hot spot; so let's write it
529+
// out and avoid regexp overhead...
530+
540531
//if (PATTERN_INT.matcher(value).matches()) {
541532
int i;
542-
char sign = value.charAt(0);
543-
if (sign == '-') {
533+
char ch = value.charAt(0);
534+
if (ch == '-') {
544535
_numberNegative = true;
545-
if (len == 1) {
546-
return null;
547-
}
548536
i = 1;
549-
} else if (sign == '+') {
537+
} else if (ch == '+') {
550538
_numberNegative = false;
551539
if (len == 1) {
552540
return null;
@@ -556,42 +544,177 @@ protected JsonToken _decodeNumberScalar(String value, final int len)
556544
_numberNegative = false;
557545
i = 0;
558546
}
559-
// !!! 11-Jan-2018, tatu: Should check for binary/octal/hex/sexagesimal
560-
// as per http://yaml.org/type/int.html
547+
if (len == i) { // should not occur but play it safe
548+
return null;
549+
}
550+
// Next: either "0" ("-0" and "+0" also accepted), or non-decimal. So:
551+
if (value.charAt(i) == '0') {
552+
if (++i == len) {
553+
// can leave "_numberNegative" as is, does not matter
554+
_numberInt = 0;
555+
_numTypesValid = NR_INT;
556+
return JsonToken.VALUE_NUMBER_INT;
557+
}
558+
ch = value.charAt(i);
559+
560+
switch (ch) {
561+
case 'b': case 'B': // binary
562+
return _decodeNumberIntBinary(value, i+1, len, _numberNegative);
563+
case 'x': case 'X': // hex
564+
return _decodeNumberIntHex(value, i+1, len, _numberNegative);
565+
case '0': case '1': case '2': case '3': case '4':
566+
case '5': case '6': case '7': case '8': case '9':
567+
case '_':
568+
return _decodeNumberIntOctal(value, i, len, _numberNegative);
569+
default:
570+
}
571+
// should never occur, but in abundance of caution, let's not
572+
// throw exception but just return as String
573+
return JsonToken.VALUE_STRING;
574+
}
575+
576+
// 23-Nov-2020, tatu: will now check and support all formats EXCEPT
577+
// for 60-base; 60-base is trickier not just because decoding gets
578+
// more involved but also because it can accidentally "detect" values
579+
// that we most likely expressing either Times or IP numbers.
580+
581+
boolean underscores = false;
561582

562-
int underscores = 0;
563583
while (true) {
564584
int c = value.charAt(i);
565585
if (c > '9' || c < '0') {
566-
if (c != '_') {
586+
if (c == '_') {
587+
underscores = true;
588+
} else {
567589
break;
568590
}
569-
++underscores;
570591
}
571592
if (++i == len) {
572593
_numTypesValid = 0;
573-
if (underscores > 0) {
574-
return _cleanYamlInt(_textValue);
594+
if (underscores) {
595+
return _cleanYamlInt(value);
575596
}
576597
_cleanedTextValue = _textValue;
577598
return JsonToken.VALUE_NUMBER_INT;
578599
}
579600
}
580-
if (PATTERN_FLOAT.matcher(value).matches()) {
581-
_numTypesValid = 0;
582-
return _cleanYamlFloat(_textValue);
583-
}
601+
// 22-Nov-2020, tatu: Should not be needed; SnakeYAML does not
602+
// tag things this way...
603+
// if (PATTERN_FLOAT.matcher(value).matches()) {
604+
// _numTypesValid = 0;
605+
// return _cleanYamlFloat(_textValue);
606+
// }
584607

585608
// 25-Aug-2016, tatu: If we can't actually match it to valid number,
586-
// consider String; better than claiming there's not toekn
609+
// consider String; better than claiming there's not token
587610
return JsonToken.VALUE_STRING;
588611
}
589612

590-
protected JsonToken _decodeIntWithUnderscores(String value, final int len)
613+
// @since 2.12
614+
protected JsonToken _decodeNumberIntBinary(final String value, int i, final int origLen,
615+
boolean negative)
616+
{
617+
final String cleansed = _cleanUnderscores(value, i, origLen);
618+
int digitLen = cleansed.length();
619+
620+
if (digitLen <= 31) {
621+
int v = Integer.parseInt(cleansed, 2);
622+
if (negative) {
623+
v = -v;
624+
}
625+
_numberInt = v;
626+
_numTypesValid = NR_INT;
627+
return JsonToken.VALUE_NUMBER_INT;
628+
}
629+
if (digitLen <= 63) {
630+
return _decodeFromLong(Long.parseLong(cleansed, 2), negative,
631+
(digitLen == 32));
632+
}
633+
return _decodeFromBigInteger(new BigInteger(cleansed, 2), negative);
634+
}
635+
636+
// @since 2.12
637+
protected JsonToken _decodeNumberIntOctal(final String value, int i, final int origLen,
638+
boolean negative)
639+
{
640+
final String cleansed = _cleanUnderscores(value, i, origLen);
641+
int digitLen = cleansed.length();
642+
643+
if (digitLen <= 10) { // 30 bits
644+
int v = Integer.parseInt(cleansed, 8);
645+
if (negative) {
646+
v = -v;
647+
}
648+
_numberInt = v;
649+
_numTypesValid = NR_INT;
650+
return JsonToken.VALUE_NUMBER_INT;
651+
}
652+
if (digitLen <= 21) { // 63 bits
653+
return _decodeFromLong(Long.parseLong(cleansed, 8), negative, false);
654+
}
655+
return _decodeFromBigInteger(new BigInteger(cleansed, 8), negative);
656+
}
657+
658+
// @since 2.12
659+
protected JsonToken _decodeNumberIntHex(final String value, int i, final int origLen,
660+
boolean negative)
661+
{
662+
final String cleansed = _cleanUnderscores(value, i, origLen);
663+
int digitLen = cleansed.length();
664+
665+
if (digitLen <= 7) { // 28 bits
666+
int v = Integer.parseInt(cleansed, 16);
667+
if (negative) {
668+
v = -v;
669+
}
670+
_numberInt = v;
671+
_numTypesValid = NR_INT;
672+
return JsonToken.VALUE_NUMBER_INT;
673+
}
674+
if (digitLen <= 15) { // 60 bits
675+
return _decodeFromLong(Long.parseLong(cleansed, 16), negative,
676+
(digitLen == 8));
677+
}
678+
return _decodeFromBigInteger(new BigInteger(cleansed, 16), negative);
679+
}
680+
681+
private JsonToken _decodeFromLong(long unsignedValue, boolean negative,
682+
boolean checkIfInt)
683+
{
684+
long actualValue;
685+
686+
if (negative) {
687+
actualValue = -unsignedValue;
688+
if (checkIfInt && (actualValue >= MIN_INT_L)) {
689+
_numberInt = (int) actualValue;
690+
_numTypesValid = NR_INT;
691+
return JsonToken.VALUE_NUMBER_INT;
692+
}
693+
} else {
694+
if (checkIfInt && (unsignedValue < MAX_INT_L)) {
695+
_numberInt = (int) unsignedValue;
696+
_numTypesValid = NR_INT;
697+
return JsonToken.VALUE_NUMBER_INT;
698+
}
699+
actualValue = unsignedValue;
700+
}
701+
_numberLong = actualValue;
702+
_numTypesValid = NR_LONG;
703+
return JsonToken.VALUE_NUMBER_INT;
704+
}
705+
706+
private JsonToken _decodeFromBigInteger(BigInteger unsignedValue, boolean negative)
591707
{
708+
// Should we check for bounds here too? Let's not bother yet
709+
if (negative) {
710+
_numberBigInt = unsignedValue.negate();
711+
} else {
712+
_numberBigInt = unsignedValue;
713+
}
714+
_numTypesValid = NR_BIGINT;
592715
return JsonToken.VALUE_NUMBER_INT;
593716
}
594-
717+
595718
/*
596719
/**********************************************************
597720
/* String value handling
@@ -855,6 +978,22 @@ private JsonToken _cleanYamlInt(String str)
855978
return JsonToken.VALUE_NUMBER_INT;
856979
}
857980

981+
private String _cleanUnderscores(String str, int i, final int len)
982+
{
983+
final StringBuilder sb = new StringBuilder(len);
984+
for (; i < len; ++i) {
985+
char ch = str.charAt(i);
986+
if (ch != '_') {
987+
sb.append(ch);
988+
}
989+
}
990+
// tiny optimization: if nothing was trimmed, return String
991+
if (sb.length() == len) {
992+
return str;
993+
}
994+
return sb.toString();
995+
}
996+
858997
private JsonToken _cleanYamlFloat(String str)
859998
{
860999
// Here we do NOT yet know whether we might have underscores so check

0 commit comments

Comments
 (0)