1
1
#include " text-encoding.h"
2
2
3
- #include <stddef.h >
3
+ #include < array >
4
4
5
- static const cc_u16l shiftjis_to_unicode_lookup [ 0x3100 ] = {
5
+ static const std::array< cc_u16l, 0x3100 > shiftjis_to_unicode_lookup = {
6
6
0x0000 , 0x0001 , 0x0002 , 0x0003 , 0x0004 , 0x0005 , 0x0006 , 0x0007 , 0x0008 , 0x0009 , 0x000A , 0x000B , 0x000C , 0x000D , 0x000E , 0x000F ,
7
7
0x0010 , 0x0011 , 0x0012 , 0x0013 , 0x0014 , 0x0015 , 0x0016 , 0x0017 , 0x0018 , 0x0019 , 0x001A , 0x001B , 0x001C , 0x001D , 0x001E , 0x001F ,
8
8
0x0020 , 0x0021 , 0x0022 , 0x0023 , 0x0024 , 0x0025 , 0x0026 , 0x0027 , 0x0028 , 0x0029 , 0x002A , 0x002B , 0x002C , 0x002D , 0x002E , 0x002F ,
@@ -796,7 +796,7 @@ cc_u16l ShiftJISToUTF32(const unsigned char* const in_buffer, cc_u8f* const byte
796
796
switch (in_buffer[0 ] & 0xF0 )
797
797
{
798
798
case 0x80 :
799
- lookup_index = 0x100 ;
799
+ lookup_index = 0x0100 ;
800
800
break ;
801
801
case 0x90 :
802
802
lookup_index = 0x1100 ;
@@ -806,52 +806,51 @@ cc_u16l ShiftJISToUTF32(const unsigned char* const in_buffer, cc_u8f* const byte
806
806
break ;
807
807
808
808
default :
809
- if (bytes_read != NULL )
809
+ if (bytes_read != nullptr )
810
810
*bytes_read = 1 ;
811
811
812
812
return shiftjis_to_unicode_lookup[in_buffer[0 ]];
813
813
}
814
814
815
- lookup_index += ((in_buffer [0 ] << 8 ) | in_buffer [1 ]) & 0xFFF ;
815
+ lookup_index += ((static_cast <cc_u16f>( in_buffer[0 ]) << 8 ) | in_buffer[1 ]) & 0xFFF ;
816
816
817
- if (bytes_read != NULL )
817
+ if (bytes_read != nullptr )
818
818
*bytes_read = 2 ;
819
819
820
820
return shiftjis_to_unicode_lookup[lookup_index];
821
821
}
822
822
823
- cc_u8f UTF32ToUTF8 (unsigned char * const out_buffer , const cc_u32f utf32_codepoint )
823
+ std::optional<std::string> UTF32ToUTF8 (const cc_u32f utf32_codepoint)
824
824
{
825
+ std::string utf8;
826
+ utf8.reserve (4 );
827
+
825
828
if (utf32_codepoint < 0x80 )
826
829
{
827
- out_buffer [0 ] = utf32_codepoint ;
828
- return 1 ;
830
+ utf8.push_back (static_cast <char >(utf32_codepoint));
829
831
}
830
832
else if (utf32_codepoint < 0x800 )
831
833
{
832
- out_buffer [0 ] = 0xC0 | utf32_codepoint >> 6 ;
833
- out_buffer [1 ] = 0x80 | (utf32_codepoint >> 0 & 0x3F );
834
- return 2 ;
834
+ utf8.push_back (static_cast <char >(0xC0 | utf32_codepoint >> 6 ));
835
+ utf8.push_back (static_cast <char >(0x80 | (utf32_codepoint >> 0 & 0x3F )));
835
836
}
836
837
else if (utf32_codepoint < 0x10000 )
837
838
{
838
- out_buffer [0 ] = 0xE0 | utf32_codepoint >> 12 ;
839
- out_buffer [1 ] = 0x80 | (utf32_codepoint >> 6 & 0x3F );
840
- out_buffer [2 ] = 0x80 | (utf32_codepoint >> 0 & 0x3F );
841
- return 3 ;
839
+ utf8.push_back (static_cast <char >(0xE0 | utf32_codepoint >> 12 ));
840
+ utf8.push_back (static_cast <char >(0x80 | (utf32_codepoint >> 6 & 0x3F )));
841
+ utf8.push_back (static_cast <char >(0x80 | (utf32_codepoint >> 0 & 0x3F )));
842
842
}
843
843
else if (utf32_codepoint < 0x110000 )
844
844
{
845
- out_buffer [0 ] = 0xF0 | utf32_codepoint >> 18 ;
846
- out_buffer [1 ] = 0x80 | (utf32_codepoint >> 12 & 0x3F );
847
- out_buffer [2 ] = 0x80 | (utf32_codepoint >> 6 & 0x3F );
848
- out_buffer [3 ] = 0x80 | (utf32_codepoint >> 0 & 0x3F );
849
- return 4 ;
845
+ utf8.push_back (static_cast <char >(0xF0 | utf32_codepoint >> 18 ));
846
+ utf8.push_back (static_cast <char >(0x80 | (utf32_codepoint >> 12 & 0x3F )));
847
+ utf8.push_back (static_cast <char >(0x80 | (utf32_codepoint >> 6 & 0x3F )));
848
+ utf8.push_back (static_cast <char >(0x80 | (utf32_codepoint >> 0 & 0x3F )));
850
849
}
851
850
else
852
851
{
853
- /* TODO: Report failure. */
854
- out_buffer [0 ] = ' ' ;
855
- return 1 ;
852
+ return std::nullopt;
856
853
}
854
+
855
+ return utf8;
857
856
}
0 commit comments