Skip to content

Commit ec560f3

Browse files
committed
Add a failing test for #223
1 parent 4195e6e commit ec560f3

File tree

5 files changed

+146
-39
lines changed

5 files changed

+146
-39
lines changed

src/main/java/com/fasterxml/jackson/core/JsonGenerator.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,30 @@ public enum Feature {
146146
*/
147147
ESCAPE_NON_ASCII(false),
148148

149+
// 23-Nov-2015, tatu: for [core#223], if and when it gets implemented
150+
/**
151+
* Feature that specifies handling of UTF-8 content that contains
152+
* characters beyond BMP (Basic Multilingual Plane), which are
153+
* represented in UCS-2 (Java internal character encoding) as two
154+
* "surrogate" characters. If feature is enabled, these surrogate
155+
* pairs are separately escaped using backslash escapes; if disabled,
156+
* native output (4-byte UTF-8 sequence, or, with char-backed output
157+
* targets, writing of surrogates as is which is typically converted
158+
* by {@link java.io.Writer} into 4-byte UTF-8 sequence eventually)
159+
* is used.
160+
*<p>
161+
* Note that the original JSON specification suggests use of escaping;
162+
* but that this is not correct from standard UTF-8 handling perspective.
163+
* Because of two competing goals, this feature was added to allow either
164+
* behavior to be used, but defaulting to UTF-8 specification compliant
165+
* mode.
166+
*<p>
167+
* Feature is disabled by default.
168+
*
169+
* @since 2.7
170+
*/
171+
// ESCAPE_UTF8_SURROGATES(false),
172+
149173
// // Schema/Validity support features
150174

151175
/**

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -747,8 +747,7 @@ public int writeBinary(Base64Variant b64variant,
747747
*/
748748

749749
@Override
750-
public void writeNumber(short s)
751-
throws IOException, JsonGenerationException
750+
public void writeNumber(short s) throws IOException
752751
{
753752
_verifyValueWrite(WRITE_NUMBER);
754753
// up to 5 digits and possible minus sign
@@ -772,8 +771,7 @@ private final void _writeQuotedShort(short s) throws IOException {
772771
}
773772

774773
@Override
775-
public void writeNumber(int i)
776-
throws IOException, JsonGenerationException
774+
public void writeNumber(int i) throws IOException
777775
{
778776
_verifyValueWrite(WRITE_NUMBER);
779777
// up to 10 digits and possible minus sign
@@ -798,8 +796,7 @@ private final void _writeQuotedInt(int i) throws IOException
798796
}
799797

800798
@Override
801-
public void writeNumber(long l)
802-
throws IOException, JsonGenerationException
799+
public void writeNumber(long l) throws IOException
803800
{
804801
_verifyValueWrite(WRITE_NUMBER);
805802
if (_cfgNumbersAsStrings) {
@@ -824,8 +821,7 @@ private final void _writeQuotedLong(long l) throws IOException
824821
}
825822

826823
@Override
827-
public void writeNumber(BigInteger value)
828-
throws IOException, JsonGenerationException
824+
public void writeNumber(BigInteger value) throws IOException
829825
{
830826
_verifyValueWrite(WRITE_NUMBER);
831827
if (value == null) {
@@ -839,13 +835,11 @@ public void writeNumber(BigInteger value)
839835

840836

841837
@Override
842-
public void writeNumber(double d)
843-
throws IOException, JsonGenerationException
838+
public void writeNumber(double d) throws IOException
844839
{
845840
if (_cfgNumbersAsStrings ||
846-
// [JACKSON-139]
847841
(((Double.isNaN(d) || Double.isInfinite(d))
848-
&& isEnabled(Feature.QUOTE_NON_NUMERIC_NUMBERS)))) {
842+
&& Feature.QUOTE_NON_NUMERIC_NUMBERS.enabledIn(_features)))) {
849843
writeString(String.valueOf(d));
850844
return;
851845
}
@@ -855,13 +849,12 @@ && isEnabled(Feature.QUOTE_NON_NUMERIC_NUMBERS)))) {
855849
}
856850

857851
@Override
858-
public void writeNumber(float f)
859-
throws IOException, JsonGenerationException
852+
public void writeNumber(float f) throws IOException
860853
{
861854
if (_cfgNumbersAsStrings ||
862855
// [JACKSON-139]
863856
(((Float.isNaN(f) || Float.isInfinite(f))
864-
&& isEnabled(Feature.QUOTE_NON_NUMERIC_NUMBERS)))) {
857+
&& Feature.QUOTE_NON_NUMERIC_NUMBERS.enabledIn(_features)))) {
865858
writeString(String.valueOf(f));
866859
return;
867860
}
@@ -871,26 +864,25 @@ && isEnabled(Feature.QUOTE_NON_NUMERIC_NUMBERS)))) {
871864
}
872865

873866
@Override
874-
public void writeNumber(BigDecimal value)
875-
throws IOException, JsonGenerationException
867+
public void writeNumber(BigDecimal value) throws IOException
876868
{
877869
// Don't really know max length for big decimal, no point checking
878870
_verifyValueWrite(WRITE_NUMBER);
879871
if (value == null) {
880872
_writeNull();
881873
} else if (_cfgNumbersAsStrings) {
882-
String raw = isEnabled(Feature.WRITE_BIGDECIMAL_AS_PLAIN) ? value.toPlainString() : value.toString();
874+
String raw = Feature.WRITE_BIGDECIMAL_AS_PLAIN.enabledIn(_features)
875+
? value.toPlainString() : value.toString();
883876
_writeQuotedRaw(raw);
884-
} else if (isEnabled(Feature.WRITE_BIGDECIMAL_AS_PLAIN)) {
877+
} else if (Feature.WRITE_BIGDECIMAL_AS_PLAIN.enabledIn(_features)) {
885878
writeRaw(value.toPlainString());
886879
} else {
887880
writeRaw(value.toString());
888881
}
889882
}
890883

891884
@Override
892-
public void writeNumber(String encodedValue)
893-
throws IOException, JsonGenerationException
885+
public void writeNumber(String encodedValue) throws IOException
894886
{
895887
_verifyValueWrite(WRITE_NUMBER);
896888
if (_cfgNumbersAsStrings) {
@@ -914,8 +906,7 @@ private final void _writeQuotedRaw(String value) throws IOException
914906
}
915907

916908
@Override
917-
public void writeBoolean(boolean state)
918-
throws IOException, JsonGenerationException
909+
public void writeBoolean(boolean state) throws IOException
919910
{
920911
_verifyValueWrite(WRITE_BOOLEAN);
921912
if ((_outputTail + 5) >= _outputEnd) {
@@ -928,8 +919,7 @@ public void writeBoolean(boolean state)
928919
}
929920

930921
@Override
931-
public void writeNull()
932-
throws IOException, JsonGenerationException
922+
public void writeNull() throws IOException
933923
{
934924
_verifyValueWrite(WRITE_NULL);
935925
_writeNull();
@@ -1918,8 +1908,7 @@ private final int _outputRawMultiByteChar(int ch, char[] cbuf, int inputOffset,
19181908
return inputOffset;
19191909
}
19201910

1921-
protected final void _outputSurrogates(int surr1, int surr2)
1922-
throws IOException
1911+
protected final void _outputSurrogates(int surr1, int surr2) throws IOException
19231912
{
19241913
int c = _decodeSurrogate(surr1, surr2);
19251914
if ((_outputTail + 4) > _outputEnd) {
@@ -1945,21 +1934,26 @@ private final int _outputMultiByteChar(int ch, int outputPtr) throws IOException
19451934
{
19461935
byte[] bbuf = _outputBuffer;
19471936
if (ch >= SURR1_FIRST && ch <= SURR2_LAST) { // yes, outside of BMP; add an escape
1948-
bbuf[outputPtr++] = BYTE_BACKSLASH;
1949-
bbuf[outputPtr++] = BYTE_u;
1950-
1951-
bbuf[outputPtr++] = HEX_CHARS[(ch >> 12) & 0xF];
1952-
bbuf[outputPtr++] = HEX_CHARS[(ch >> 8) & 0xF];
1953-
bbuf[outputPtr++] = HEX_CHARS[(ch >> 4) & 0xF];
1954-
bbuf[outputPtr++] = HEX_CHARS[ch & 0xF];
1937+
// 23-Nov-2015, tatu: As per [core#223], may or may not want escapes;
1938+
// it would be added here... but as things are, we do not have proper
1939+
// access yet...
1940+
// if (Feature.ESCAPE_UTF8_SURROGATES.enabledIn(_features)) {
1941+
bbuf[outputPtr++] = BYTE_BACKSLASH;
1942+
bbuf[outputPtr++] = BYTE_u;
1943+
1944+
bbuf[outputPtr++] = HEX_CHARS[(ch >> 12) & 0xF];
1945+
bbuf[outputPtr++] = HEX_CHARS[(ch >> 8) & 0xF];
1946+
bbuf[outputPtr++] = HEX_CHARS[(ch >> 4) & 0xF];
1947+
bbuf[outputPtr++] = HEX_CHARS[ch & 0xF];
1948+
// } else { ... }
19551949
} else {
19561950
bbuf[outputPtr++] = (byte) (0xe0 | (ch >> 12));
19571951
bbuf[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
19581952
bbuf[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
19591953
}
19601954
return outputPtr;
19611955
}
1962-
1956+
19631957
private final void _writeNull() throws IOException
19641958
{
19651959
if ((_outputTail + 4) >= _outputEnd) {
@@ -1974,8 +1968,7 @@ private final void _writeNull() throws IOException
19741968
*
19751969
* @param charToEscape Character to escape using escape sequence (\\uXXXX)
19761970
*/
1977-
private int _writeGenericEscape(int charToEscape, int outputPtr)
1978-
throws IOException
1971+
private int _writeGenericEscape(int charToEscape, int outputPtr) throws IOException
19791972
{
19801973
final byte[] bbuf = _outputBuffer;
19811974
bbuf[outputPtr++] = BYTE_BACKSLASH;

src/test/java/com/fasterxml/jackson/failing/LocationInArrayTest.java renamed to src/test/java/com/fasterxml/jackson/core/json/LocationInArrayTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package com.fasterxml.jackson.failing;
1+
package com.fasterxml.jackson.core.json;
22

33
import com.fasterxml.jackson.core.*;
44

src/test/java/com/fasterxml/jackson/core/json/TestUtf8Generator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public void testUtf8Issue462() throws Exception
3737
p.close();
3838
}
3939

40-
// for [Issue#115]
40+
// for [core#115]
4141
public void testSurrogatesWithRaw() throws Exception
4242
{
4343
final String VALUE = quote("\ud83d\ude0c");
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package com.fasterxml.jackson.failing;
2+
3+
import java.io.ByteArrayOutputStream;
4+
import java.io.StringWriter;
5+
import java.io.Writer;
6+
7+
import com.fasterxml.jackson.core.BaseTest;
8+
import com.fasterxml.jackson.core.JsonFactory;
9+
import com.fasterxml.jackson.core.JsonGenerator;
10+
import com.fasterxml.jackson.core.JsonParser;
11+
import com.fasterxml.jackson.core.JsonToken;
12+
13+
public class Surrogate223Test extends BaseTest
14+
{
15+
private final JsonFactory JSON_F = new JsonFactory();
16+
17+
// for [core#223]
18+
public void testSurrogatesByteBacked() throws Exception
19+
{
20+
ByteArrayOutputStream out;
21+
JsonGenerator g;
22+
final String toQuote = new String(Character.toChars(0x1F602));
23+
assertEquals(2, toQuote.length()); // just sanity check
24+
25+
// default should be disabled:
26+
// assertFalse(JSON_F.isEnabled(JsonGenerator.Feature.ESCAPE_UTF8_SURROGATES));
27+
28+
out = new ByteArrayOutputStream();
29+
g = JSON_F.createGenerator(out);
30+
g.writeStartArray();
31+
g.writeString(toQuote);
32+
g.writeEndArray();
33+
g.close();
34+
assertEquals(2 + 2 + 4, out.size()); // brackets, quotes, 4-byte encoding
35+
36+
// Also parse back to ensure correctness
37+
JsonParser p = JSON_F.createParser(out.toByteArray());
38+
assertToken(JsonToken.START_ARRAY, p.nextToken());
39+
assertToken(JsonToken.VALUE_STRING, p.nextToken());
40+
assertToken(JsonToken.END_ARRAY, p.nextToken());
41+
p.close();
42+
43+
// but may revert back to original behavior
44+
out = new ByteArrayOutputStream();
45+
g = JSON_F.createGenerator(out);
46+
// g.enable(JsonGenerator.Feature.ESCAPE_UTF8_SURROGATES);
47+
g.writeStartArray();
48+
g.writeString(toQuote);
49+
g.writeEndArray();
50+
g.close();
51+
assertEquals(2 + 2 + 12, out.size()); // brackets, quotes, 2 x 6 byte JSON escape
52+
}
53+
54+
// for [core#223]
55+
public void testSurrogatesCharBacked() throws Exception
56+
{
57+
Writer out;
58+
JsonGenerator g;
59+
final String toQuote = new String(Character.toChars(0x1F602));
60+
assertEquals(2, toQuote.length()); // just sanity check
61+
62+
// default should be disabled:
63+
// assertFalse(JSON_F.isEnabled(JsonGenerator.Feature.ESCAPE_UTF8_SURROGATES));
64+
65+
out = new StringWriter();
66+
g = JSON_F.createGenerator(out);
67+
g.writeStartArray();
68+
g.writeString(toQuote);
69+
g.writeEndArray();
70+
g.close();
71+
assertEquals(2 + 2 + 2, out.toString().length()); // brackets, quotes, 2 chars as is
72+
73+
// Also parse back to ensure correctness
74+
JsonParser p = JSON_F.createParser(out.toString());
75+
assertToken(JsonToken.START_ARRAY, p.nextToken());
76+
assertToken(JsonToken.VALUE_STRING, p.nextToken());
77+
assertToken(JsonToken.END_ARRAY, p.nextToken());
78+
p.close();
79+
80+
// but may revert back to original behavior
81+
out = new StringWriter();
82+
g = JSON_F.createGenerator(out);
83+
// g.enable(JsonGenerator.Feature.ESCAPE_UTF8_SURROGATES);
84+
g.writeStartArray();
85+
g.writeString(toQuote);
86+
g.writeEndArray();
87+
g.close();
88+
assertEquals(2 + 2 + 12, out.toString().length()); // brackets, quotes, 2 x 6 byte JSON escape
89+
}
90+
}

0 commit comments

Comments
 (0)