Fix #266 (array index out of bounds in _decodeShortUnicodeValue() of SmileParser)

cowtowncoder · cowtowncoder · commit b714f0eaaf83 · 2021-03-26T13:03:48.000-07:00
diff --git a/release-notes/CREDITS-2.x b/release-notes/CREDITS-2.x
@@ -178,6 +178,8 @@ Fabian Meumertzheim (fmeum@github)
  (2.12.3)
 * Reported #263 (smile) Handle invalid chunked-binary-format length gracefully
  (2.12.3)
+* Reported #266: (smile)  ArrayIndexOutOfBoundsException in SmileParser._decodeShortUnicodeValue()
+ (2.12.3)
 
 (jhhladky@github)
 
diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x
@@ -22,10 +22,12 @@ Modules:
  (reported by Fabian M)
 #261 (cbor) CBORParser need to validate zero-length byte[] for BigInteger 
  (reported by Fabian M)
-#263 (smile) Handle invalid chunked-binary-format length gracefully
+#263: (smile) Handle invalid chunked-binary-format length gracefully
  (reported by Fabian M)
-#265 (smile) Allocate byte[] lazily for longer Smile binary data payloads
+#265: (smile) Allocate byte[] lazily for longer Smile binary data payloads
  (7-bit encoded) 
+#266: (smile)  ArrayIndexOutOfBoundsException in SmileParser._decodeShortUnicodeValue()
+ (reported by Fabian M)
 
 2.12.2 (03-Mar-2021)
 
diff --git a/smile/src/main/java/com/fasterxml/jackson/dataformat/smile/SmileParser.java b/smile/src/main/java/com/fasterxml/jackson/dataformat/smile/SmileParser.java
@@ -335,7 +335,6 @@ protected final int _tryToLoadToHaveAtLeast(int minAvailable) throws IOException
         return 0;
     }
 
-    @SuppressWarnings("deprecation")
     @Override
     protected void _closeInput() throws IOException
     {
@@ -391,7 +390,7 @@ protected void _releaseBuffers2()
     /* JsonParser impl
     /**********************************************************
      */
-    
+
     @Override
     public JsonToken nextToken() throws IOException
     {
@@ -2311,44 +2310,53 @@ protected final String _decodeShortAsciiValue(int len) throws IOException
         return _textBuffer.setCurrentAndReturn(len);
     }
 
-    protected final String _decodeShortUnicodeValue(int len) throws IOException
+    protected final String _decodeShortUnicodeValue(final int byteLen) throws IOException
     {
-        if ((_inputEnd - _inputPtr) < len) {
-            _loadToHaveAtLeast(len);
+        if ((_inputEnd - _inputPtr) < byteLen) {
+            _loadToHaveAtLeast(byteLen);
         }
         int outPtr = 0;
         char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
         int inPtr = _inputPtr;
-        _inputPtr += len;
+        _inputPtr += byteLen;
         final int[] codes = SmileConstants.sUtf8UnitLengths;
         final byte[] inputBuf = _inputBuffer;
-        for (int end = inPtr + len; inPtr < end; ) {
-            int i = inputBuf[inPtr++] & 0xFF;
-            int code = codes[i];
-            if (code != 0) {
-                // trickiest one, need surrogate handling
-                switch (code) {
-                case 1:
-                    i = ((i & 0x1F) << 6) | (inputBuf[inPtr++] & 0x3F);
-                    break;
-                case 2:
-                    i = ((i & 0x0F) << 12)
-	                  | ((inputBuf[inPtr++] & 0x3F) << 6)
-	                  | (inputBuf[inPtr++] & 0x3F);
-                    break;
-                case 3:
-                    i = ((i & 0x07) << 18)
-	                | ((inputBuf[inPtr++] & 0x3F) << 12)
-	                | ((inputBuf[inPtr++] & 0x3F) << 6)
-	                | (inputBuf[inPtr++] & 0x3F);
-                    // note: this is the codepoint value; need to split, too
-                    i -= 0x10000;
-                    outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
-                    i = 0xDC00 | (i & 0x3FF);
-                    break;
-                default: // invalid
-                    _reportError("Invalid byte "+Integer.toHexString(i)+" in short Unicode text block");
-                }
+        for (int end = inPtr + byteLen; inPtr < end; ) {
+            int i = inputBuf[inPtr++];
+            if (i >= 0) {
+                outBuf[outPtr++] = (char) i;
+                continue;
+            }
+            i &= 0xFF;
+            final int unitLen = codes[i];
+            if ((inPtr + unitLen) > end) {
+                // Last -1 to compensate for byte that was read:
+                final int firstCharOffset = byteLen - (end - inPtr) - 1;
+                return _reportTruncatedUTF8InString(byteLen, firstCharOffset, i, unitLen);
+            }
+            int i2 = inputBuf[inPtr++] & 0x3F;
+
+            switch (unitLen) {
+            case 1:
+                i = ((i & 0x1F) << 6) | i2;
+                break;
+            case 2:
+                i = ((i & 0x0F) << 12)
+                    | (i2 << 6)
+                    | (inputBuf[inPtr++] & 0x3F);
+                break;
+            case 3:// trickiest one, need surrogate handling
+                i = ((i & 0x07) << 18)
+                    | (i2 << 12)
+                    | ((inputBuf[inPtr++] & 0x3F) << 6)
+                    | (inputBuf[inPtr++] & 0x3F);
+                // note: this is the codepoint value; need to split, too
+                i -= 0x10000;
+                outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
+                i = 0xDC00 | (i & 0x3FF);
+                break;
+            default: // invalid
+                _reportError("Invalid byte "+Integer.toHexString(i)+" in short Unicode text block");
             }
             outBuf[outPtr++] = (char) i;
         }        
@@ -2948,7 +2956,18 @@ protected void _reportIncompleteBinaryRead7Bit(int expLen, int actLen)
 " for Binary value (7-bit): expected %d payload bytes (from %d encoded), only decoded %d",
                 expLen, encodedLen, actLen), currentToken());
     }
-    
+
+    // @since 2.12.3
+    protected String _reportTruncatedUTF8InString(int strLenBytes, int truncatedCharOffset,
+            int firstUTFByteValue, int bytesExpected)
+        throws IOException
+    {
+        throw _constructError(String.format(
+"Truncated UTF-8 character in Short Unicode String value (%d bytes): "
++"byte 0x%02X at offset #%d indicated %d more bytes needed",
+strLenBytes, firstUTFByteValue, truncatedCharOffset, bytesExpected));
+    }
+
     /*
     /**********************************************************
     /* Internal methods, other
diff --git a/smile/src/test/java/com/fasterxml/jackson/dataformat/smile/BaseTestForSmile.java b/smile/src/test/java/com/fasterxml/jackson/dataformat/smile/BaseTestForSmile.java
@@ -262,9 +262,11 @@ protected byte[] readResource(String ref)
        final byte[] buf = new byte[4000];
 
        try (InputStream in = getClass().getResourceAsStream(ref)) {
-           int len;
-           while ((len = in.read(buf)) > 0) {
-               bytes.write(buf, 0, len);
+           if (in != null) {
+               int len;
+               while ((len = in.read(buf)) > 0) {
+                   bytes.write(buf, 0, len);
+               }
            }
        } catch (IOException e) {
            throw new RuntimeException("Failed to read resource '"+ref+"': "+e);
diff --git a/smile/src/test/java/com/fasterxml/jackson/dataformat/smile/fuzz/Fuzz32168BigDecimalTest.java b/smile/src/test/java/com/fasterxml/jackson/dataformat/smile/fuzz/Fuzz32168BigDecimalTest.java
@@ -12,7 +12,6 @@ public class Fuzz32168BigDecimalTest extends BaseTestForSmile
 {
     private final ObjectMapper MAPPER = smileMapper();
 
-    // Payload:
     public void testInvalidBigDecimal() throws Exception
     {
         final byte[] input = new byte[] {
diff --git a/smile/src/test/java/com/fasterxml/jackson/dataformat/smile/fuzz/Fuzz32527ShortUnicodeTest.java b/smile/src/test/java/com/fasterxml/jackson/dataformat/smile/fuzz/Fuzz32527ShortUnicodeTest.java
@@ -0,0 +1,45 @@
+package com.fasterxml.jackson.dataformat.smile.fuzz;
+
+import com.fasterxml.jackson.core.*;
+import com.fasterxml.jackson.core.exc.StreamReadException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import com.fasterxml.jackson.dataformat.smile.BaseTestForSmile;
+
+public class Fuzz32527ShortUnicodeTest extends BaseTestForSmile
+{
+    private final ObjectMapper MAPPER = smileMapper();
+
+    // [dataformats-binary#266]
+    public void testInvalidShortUnicode() throws Exception
+    {
+        final byte[] input = new byte[] {
+                0x3A, 0x29, 0x0A, 0x00, // smile signature
+                (byte) 0xFA, // START_OBJECT
+                (byte) 0xC8, // short-unicode-name: 10 bytes (0x8 + 2), 6 chars
+                (byte) 0xC8, (byte) 0xC8,
+                (byte) 0xC8, (byte) 0xC8, (byte) 0xC8, 0x00,
+                0x00, (byte) 0xF3, (byte) 0xA0, (byte) 0x81,
+
+                (byte) 0x8A, // short-unicode-value: 12 bytes (0xA + 2)
+                0x00, 0x01, 0x00,
+                0x00, 0x00, 0x01, 0x01,
+                0x00, 0x00, 0x04, (byte) 0xE5,
+                0x04
+        };
+        try (JsonParser p = MAPPER.createParser(input)) {
+            assertToken(JsonToken.START_OBJECT, p.nextToken());
+            assertToken(JsonToken.FIELD_NAME, p.nextToken());
+            assertEquals(6, p.currentName().length());
+            assertToken(JsonToken.VALUE_STRING, p.nextToken());
+            try {
+                String text = p.getText();
+                fail("Should have failed, instead decoded String of "+text.length()+" chars");
+            } catch (StreamReadException e) {
+                verifyException(e, "Truncated UTF-8 character in Short Unicode String");
+                verifyException(e, "(12 bytes)");
+                verifyException(e, "byte 0xE5 at offset #10 indicated 2 more bytes needed");
+            }
+        }
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,6 @@ public class Fuzz32168BigDecimalTest extends BaseTestForSmile`
`12`	`12`	`{`
`13`	`13`	`private final ObjectMapper MAPPER = smileMapper();`
`14`	`14`
`15`		`- // Payload:`
`16`	`15`	`public void testInvalidBigDecimal() throws Exception`
`17`	`16`	`{`
`18`	`17`	`final byte[] input = new byte[] {`