Skip to content

Commit 9a627bd

Browse files
committed
Use hex escape sequences instead of octal escape sequences.
Octal escape sequences the least used form of escape sequences and hex supported everywhere. The only outsiders are Java, C++ and Rust
1 parent 542b241 commit 9a627bd

File tree

8 files changed

+50
-42
lines changed

8 files changed

+50
-42
lines changed

jvm/src/test/scala/io/kaitai/struct/translators/TranslatorSpec.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ class TranslatorSpec extends AnyFunSpec {
407407
GoCompiler -> "[]uint8{34, 0, 10, 64, 65, 66, 92}",
408408
JavaCompiler -> "new byte[] { 34, 0, 10, 64, 65, 66, 92 }",
409409
JavaScriptCompiler -> "new Uint8Array([34, 0, 10, 64, 65, 66, 92])",
410-
LuaCompiler -> "\"\\034\\000\\010\\064\\065\\066\\092\"",
410+
LuaCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
411411
PerlCompiler -> "pack('C*', (34, 0, 10, 64, 65, 66, 92))",
412412
PHPCompiler -> "\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
413413
PythonCompiler -> "b\"\\x22\\x00\\x0A\\x40\\x41\\x42\\x5C\"",
@@ -420,7 +420,7 @@ class TranslatorSpec extends AnyFunSpec {
420420
GoCompiler -> "[]uint8{255, 0, 255}",
421421
JavaCompiler -> "new byte[] { -1, 0, -1 }",
422422
JavaScriptCompiler -> "new Uint8Array([255, 0, 255])",
423-
LuaCompiler -> "\"\\255\\000\\255\"",
423+
LuaCompiler -> "\"\\xFF\\x00\\xFF\"",
424424
PerlCompiler -> "pack('C*', (255, 0, 255))",
425425
PHPCompiler -> "\"\\xFF\\x00\\xFF\"",
426426
PythonCompiler -> "b\"\\xFF\\x00\\xFF\"",
@@ -435,7 +435,7 @@ class TranslatorSpec extends AnyFunSpec {
435435
GoCompiler -> "len([]uint8{0, 1, 2})",
436436
JavaCompiler -> "new byte[] { 0, 1, 2 }.length",
437437
JavaScriptCompiler -> "new Uint8Array([0, 1, 2]).length",
438-
LuaCompiler -> "#\"\\000\\001\\002\"",
438+
LuaCompiler -> "#\"\\x00\\x01\\x02\"",
439439
PerlCompiler -> "length(pack('C*', (0, 1, 2)))",
440440
PHPCompiler -> "strlen(\"\\x00\\x01\\x02\")",
441441
PythonCompiler -> "len(b\"\\x00\\x01\\x02\")",
@@ -555,14 +555,14 @@ class TranslatorSpec extends AnyFunSpec {
555555
full("\"str\\0next\"", CalcIntType, CalcStrType, ResultMap(
556556
CppCompiler -> "std::string(\"str\\000next\", 8)",
557557
CSharpCompiler -> "\"str\\0next\"",
558-
GoCompiler -> "\"str\\000next\"",
558+
GoCompiler -> "\"str\\x00next\"",
559559
JavaCompiler -> "\"str\\000next\"",
560560
JavaScriptCompiler -> "\"str\\x00next\"",
561-
LuaCompiler -> "\"str\\000next\"",
562-
PerlCompiler -> "\"str\\000next\"",
563-
PHPCompiler -> "\"str\\000next\"",
564-
PythonCompiler -> "u\"str\\000next\"",
565-
RubyCompiler -> "\"str\\000next\""
561+
LuaCompiler -> "\"str\\x00next\"",
562+
PerlCompiler -> "\"str\\x00next\"",
563+
PHPCompiler -> "\"str\\x00next\"",
564+
PythonCompiler -> "u\"str\\x00next\"",
565+
RubyCompiler -> "\"str\\x00next\""
566566
))
567567
}
568568

shared/src/main/scala/io/kaitai/struct/JSON.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ object JSON extends CommonLiterals {
2828
}
2929
}
3030

31-
/** octal escapes (which [[translators.CommonLiterals.strLiteralGenericCC]] uses by default) are not allowed in JSON */
3231
override def strLiteralGenericCC(code: Char): String = strLiteralUnicode(code)
3332

3433
def stringToJson(str: String): String =

shared/src/main/scala/io/kaitai/struct/translators/CommonLiterals.scala

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ trait CommonLiterals {
3434
/**
3535
* Handle ASCII character conversion for inlining into string literals.
3636
* Default implementation consults [[asciiCharQuoteMap]] first, then
37-
* just dumps it as is if it's a printable ASCII charcter, or calls
37+
* just dumps it as is if it's a printable ASCII character, or calls
3838
* [[strLiteralGenericCC]] if it's a control character.
3939
* @param code character code to convert into string for inclusion in
4040
* a string literal
@@ -53,18 +53,14 @@ trait CommonLiterals {
5353

5454
/**
5555
* Converts generic control character code into something that's allowed
56-
* inside a string literal. Default implementation uses octal encoding,
56+
* inside a string literal. Default implementation uses hex encoding,
5757
* which is ok for most C-derived languages.
5858
*
59-
* Note that we use strictly 3 octal digits to work around potential
60-
* problems with following decimal digits, i.e. "\0" + "2" that would be
61-
* parsed as single character "\02" = "\x02", instead of two characters
62-
* "\x00\x32".
6359
* @param code character code to represent
6460
* @return string literal representation of given code
6561
*/
6662
def strLiteralGenericCC(code: Char): String =
67-
"\\%03o".format(code.toInt)
63+
"\\x%02X".format(code.toInt)
6864

6965
/**
7066
* Converts Unicode (typically, non-ASCII) character code into something

shared/src/main/scala/io/kaitai/struct/translators/CppTranslator.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,21 @@ class CppTranslator(provider: TypeProvider, importListSrc: CppImportList, import
131131
}
132132
}
133133

134+
/**
135+
* Hex escapes in C++ does not limited in length, so we use octal, as they are shorter.
136+
*
137+
* Note that we use strictly 3 octal digits to work around potential
138+
* problems with following decimal digits, i.e. "\0" + "2" that would be
139+
* parsed as single character "\02" = "\x02", instead of two characters
140+
* "\x00\x32".
141+
*
142+
* @see https://en.cppreference.com/w/cpp/language/escape
143+
* @param code character code to represent
144+
* @return string literal representation of given code
145+
*/
146+
override def strLiteralGenericCC(code: Char): String =
147+
"\\%03o".format(code.toInt)
148+
134149
override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
135150
(detectType(left), detectType(right), op) match {
136151
case (_: IntType, _: IntType, Ast.operator.Mod) =>

shared/src/main/scala/io/kaitai/struct/translators/JavaScriptTranslator.scala

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,6 @@ class JavaScriptTranslator(provider: TypeProvider, importList: ImportList) exten
1313
override def doByteArrayNonLiteral(elts: Seq[Ast.expr]): String =
1414
s"new Uint8Array([${elts.map(translate).mkString(", ")}])"
1515

16-
/**
17-
* JavaScript rendition of common control character that would use hex form,
18-
* not octal. "Octal" control character string literals might be accepted
19-
* in non-strict JS mode, but in strict mode only hex or unicode are ok.
20-
* Here we'll use hex, as they are shorter.
21-
*
22-
* @see https://github.com/kaitai-io/kaitai_struct/issues/279
23-
* @param code character code to represent
24-
* @return string literal representation of given code
25-
*/
26-
override def strLiteralGenericCC(code: Char): String =
27-
"\\x%02x".format(code.toInt)
28-
2916
override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
3017
(detectType(left), detectType(right), op) match {
3118
case (_: IntType, _: IntType, Ast.operator.Div) =>

shared/src/main/scala/io/kaitai/struct/translators/JavaTranslator.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,21 @@ class JavaTranslator(provider: TypeProvider, importList: ImportList) extends Bas
4545
override def doByteArrayNonLiteral(elts: Seq[expr]): String =
4646
s"new byte[] { ${elts.map(translate).mkString(", ")} }"
4747

48+
/**
49+
* Java does not support two-digit hex escape sequences, so use octal, as they are shorter.
50+
*
51+
* Note that we use strictly 3 octal digits to work around potential
52+
* problems with following decimal digits, i.e. "\0" + "2" that would be
53+
* parsed as single character "\02" = "\x02", instead of two characters
54+
* "\x00\x32".
55+
*
56+
* @see https://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.6
57+
* @param code character code to represent
58+
* @return string literal representation of given code
59+
*/
60+
override def strLiteralGenericCC(code: Char): String =
61+
"\\%03o".format(code.toInt)
62+
4863
override def genericBinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr, extPrec: Int) = {
4964
(detectType(left), detectType(right), op) match {
5065
case (_: IntType, _: IntType, Ast.operator.Mod) =>

shared/src/main/scala/io/kaitai/struct/translators/LuaTranslator.scala

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
3939
'\b' -> "\\b",
4040
'\u000b' -> "\\v",
4141
'\f' -> "\\f",
42-
'\u001b' -> "\\027"
4342
)
4443

4544
override def strLiteralUnicode(code: Char): String =
@@ -71,7 +70,7 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
7170
override def doArrayLiteral(t: DataType, value: Seq[Ast.expr]): String =
7271
"{" + value.map((v) => translate(v)).mkString(", ") + "}"
7372
override def doByteArrayLiteral(arr: Seq[Byte]): String =
74-
"\"" + decEscapeByteArray(arr) + "\""
73+
"\"" + Utils.hexEscapeByteArray(arr) + "\""
7574
override def doByteArrayNonLiteral(values: Seq[Ast.expr]): String =
7675
// It is assumed that every expression produces integer in the range [0; 255]
7776
"string.char(" + values.map(translate).mkString(", ") + ")"
@@ -189,14 +188,4 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
189188
case Ast.unaryop.Not => "not"
190189
case _ => super.unaryOp(op)
191190
}
192-
193-
/**
194-
* Converts byte array (Seq[Byte]) into decimal-escaped Lua-style literal
195-
* characters (i.e. like \255).
196-
*
197-
* @param arr byte array to escape
198-
* @return array contents decimal-escaped as string
199-
*/
200-
private def decEscapeByteArray(arr: Seq[Byte]): String =
201-
arr.map((x) => "\\%03d".format(x & 0xff)).mkString
202191
}

shared/src/main/scala/io/kaitai/struct/translators/RustTranslator.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ class RustTranslator(provider: TypeProvider, config: RuntimeConfig)
3838
override def strLiteralGenericCC(code: Char): String =
3939
strLiteralUnicode(code)
4040

41+
/**
42+
* Hex escapes in form `\xHH` in Rust allows only codes in the range 0x00 - 0x7f.
43+
*
44+
* @see https://doc.rust-lang.org/reference/tokens.html#examples
45+
* @param code character code to represent
46+
* @return string literal representation of given code
47+
*/
4148
override def strLiteralUnicode(code: Char): String =
4249
"\\u{%x}".format(code.toInt)
4350

0 commit comments

Comments
 (0)