@@ -20,6 +20,12 @@ export encoding, encodings_list, Encoding, @enc_str
20
20
21
21
abstract type StringEncodingError end
22
22
23
+ # contiguous 1d byte arrays compatible with C `unsigned char *` API
24
+ const ByteVector= Union{Vector{UInt8},
25
+ Base. FastContiguousSubArray{UInt8,1 ,<: Array{UInt8,1} },
26
+ Base. CodeUnits{UInt8, String}, Base. CodeUnits{UInt8, SubString{String}}}
27
+ const ByteString = Union{String,SubString{String}}
28
+
23
29
# Specified encodings or the combination are not supported by iconv
24
30
struct InvalidEncodingError <: StringEncodingError
25
31
args:: Tuple{String, String}
@@ -31,7 +37,7 @@ message(::Type{InvalidEncodingError}) = "Conversion from <<1>> to <<2>> not supp
31
37
struct InvalidSequenceError <: StringEncodingError
32
38
args:: Tuple{String}
33
39
end
34
- InvalidSequenceError (seq:: Vector {UInt8} ) = InvalidSequenceError ((bytes2hex (seq),))
40
+ InvalidSequenceError (seq:: AbstractVector {UInt8} ) = InvalidSequenceError ((bytes2hex (seq),))
35
41
message (:: Type{InvalidSequenceError} ) = " Byte sequence 0x<<1>> is invalid in source encoding or cannot be represented in target encoding"
36
42
37
43
struct IConvError <: StringEncodingError
@@ -123,7 +129,7 @@ function finalize(s::Union{StringEncoder, StringDecoder})
123
129
nothing
124
130
end
125
131
126
- function iconv! (cd:: Ptr{Nothing} , inbuf:: Vector{UInt8} , outbuf:: Vector{UInt8} ,
132
+ function iconv! (cd:: Ptr{Nothing} , inbuf:: ByteVector , outbuf:: ByteVector ,
127
133
inbufptr:: Ref{Ptr{UInt8}} , outbufptr:: Ref{Ptr{UInt8}} ,
128
134
inbytesleft:: Ref{Csize_t} , outbytesleft:: Ref{Csize_t} )
129
135
inbufptr[] = pointer (inbuf)
@@ -499,14 +505,20 @@ end
499
505
# # Functions to encode/decode strings
500
506
501
507
"""
502
- decode([T,] a::Vector {UInt8}, enc)
508
+ decode([T,] a::AbstractVector {UInt8}, enc)
503
509
504
510
Convert an array of bytes `a` representing text in encoding `enc` to a string of type `T`.
505
511
By default, a `String` is returned.
506
512
513
+ To `decode` an `s::String` of data in non-UTF-8 encoding, use
514
+ `decode(codeunits(s), enc)` to act on the underlying byte array.
515
+
507
516
`enc` can be specified either as a string or as an `Encoding` object.
517
+ The input data `a` can be a `Vector{UInt8}` of bytes, a contiguous
518
+ subarray thereof, or the `codeunits` of a `String` (or substring
519
+ thereof).
508
520
"""
509
- function decode (:: Type{T} , a:: Vector{UInt8} , enc:: Encoding ) where {T<: AbstractString }
521
+ function decode (:: Type{T} , a:: ByteVector , enc:: Encoding ) where {T<: AbstractString }
510
522
b = IOBuffer (a)
511
523
try
512
524
T (read (StringDecoder (b, enc, encoding (T))))
@@ -515,19 +527,19 @@ function decode(::Type{T}, a::Vector{UInt8}, enc::Encoding) where {T<:AbstractSt
515
527
end
516
528
end
517
529
518
- decode (:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) where {T<: AbstractString } =
530
+ decode (:: Type{T} , a:: ByteVector , enc:: AbstractString ) where {T<: AbstractString } =
519
531
decode (T, a, Encoding (enc))
520
532
521
- decode (a:: Vector{UInt8} , enc:: AbstractString ) = decode (String, a, Encoding (enc))
522
- decode (a:: Vector{UInt8} , enc:: Union{AbstractString, Encoding} ) = decode (String, a, enc)
533
+ decode (a:: ByteVector , enc:: Union{AbstractString, Encoding} ) = decode (String, a, enc)
523
534
524
535
"""
525
536
encode(s::AbstractString, enc)
526
537
527
538
Convert string `s` to an array of bytes representing text in encoding `enc`.
528
539
`enc` can be specified either as a string or as an `Encoding` object.
529
540
"""
530
- function encode (s:: AbstractString , enc:: Encoding )
541
+ encode (s:: AbstractString , enc:: Encoding ) = encode (String (s), enc)
542
+ function encode (s:: ByteString , enc:: Encoding )
531
543
b = IOBuffer ()
532
544
p = StringEncoder (b, enc, encoding (typeof (s)))
533
545
write (p, s)
0 commit comments