@@ -388,18 +388,19 @@ inline uint64_t lineRangeHash(uint64_t hashSeed, std::vector<wchar_t>& line, int
388
388
}
389
389
390
390
391
- uint64_t regexIgnoreLineHash (uint64_t hashSeed, const std::vector<char >& line, const CompareOptions& options)
391
+ uint64_t regexIgnoreLineHash (uint64_t hashSeed, int codepage, const std::vector<char >& line,
392
+ const CompareOptions& options)
392
393
{
393
- const intptr_t len = static_cast <intptr_t >(line.size ());
394
+ const int len = static_cast <int >(line.size ());
394
395
395
396
if (len == 0 )
396
397
return hashSeed;
397
398
398
- const int wLen = ::MultiByteToWideChar (CP_UTF8 , 0 , line.data (), static_cast < int >( len) , NULL , 0 );
399
+ const int wLen = ::MultiByteToWideChar (codepage , 0 , line.data (), len, NULL , 0 );
399
400
400
401
std::vector<wchar_t > wLine (wLen);
401
402
402
- ::MultiByteToWideChar (CP_UTF8 , 0 , line.data(), static_cast<int>( len) , wLine.data(), wLen);
403
+ ::MultiByteToWideChar (codepage , 0 , line.data(), len, wLine.data(), wLen);
403
404
404
405
#ifndef MULTITHREAD
405
406
LOGD (LOG_ALGO, " line len " + std::to_string (len) + " to wide char len " + std::to_string (wLen) + " \n " );
@@ -459,6 +460,8 @@ void getLines(DocCmpInfo& doc, const CompareOptions& options)
459
460
return ;
460
461
}
461
462
463
+ const int codepage = getCodepage (doc.view );
464
+
462
465
const intptr_t docLine = secLine + doc.section .off ;
463
466
const intptr_t lineStart = getLineStart (doc.view , docLine);
464
467
const intptr_t lineEnd = getLineEnd (doc.view , docLine);
@@ -478,12 +481,12 @@ void getLines(DocCmpInfo& doc, const CompareOptions& options)
478
481
" , view " + std::to_string (doc.view ) + " \n " );
479
482
#endif
480
483
481
- newLine.hash = regexIgnoreLineHash (newLine.hash , line, options);
484
+ newLine.hash = regexIgnoreLineHash (newLine.hash , codepage, line, options);
482
485
}
483
486
else
484
487
{
485
488
if (options.ignoreCase )
486
- toLowerCase (line);
489
+ toLowerCase (line, codepage );
487
490
488
491
for (intptr_t i = 0 ; i < lineEnd - lineStart; ++i)
489
492
{
@@ -513,19 +516,19 @@ charType getCharTypeW(wchar_t letter)
513
516
}
514
517
515
518
516
- inline void recalculateWordPos (std::vector<Word>& words, const std::vector<wchar_t >& line)
519
+ inline void recalculateWordPos (int codepage, std::vector<Word>& words, const std::vector<wchar_t >& line)
517
520
{
518
521
intptr_t bytePos = 0 ;
519
522
intptr_t currPos = 0 ;
520
523
521
524
for (auto & word : words)
522
525
{
523
526
if (currPos < word.pos )
524
- bytePos += ::WideCharToMultiByte (CP_UTF8 , 0 , line.data () + currPos, static_cast <int >(word.pos - currPos),
527
+ bytePos += ::WideCharToMultiByte (codepage , 0 , line.data () + currPos, static_cast <int >(word.pos - currPos),
525
528
NULL , 0 , NULL , NULL );
526
529
527
530
currPos = word.pos + word.len ;
528
- word.len = ::WideCharToMultiByte (CP_UTF8 , 0 , line.data () + word.pos , static_cast <int >(word.len ),
531
+ word.len = ::WideCharToMultiByte (codepage , 0 , line.data () + word.pos , static_cast <int >(word.len ),
529
532
NULL , 0 , NULL , NULL );
530
533
word.pos = bytePos;
531
534
bytePos += word.len ;
@@ -616,6 +619,8 @@ std::vector<Word> getLineWords(int view, intptr_t docLine, const CompareOptions&
616
619
{
617
620
std::vector<Word> words;
618
621
622
+ const int codepage = getCodepage (view);
623
+
619
624
const intptr_t lineStart = getLineStart (view, docLine);
620
625
const intptr_t lineEnd = getLineEnd (view, docLine);
621
626
@@ -625,11 +630,11 @@ std::vector<Word> getLineWords(int view, intptr_t docLine, const CompareOptions&
625
630
626
631
const int len = static_cast <int >(line.size ());
627
632
628
- const int wLen = ::MultiByteToWideChar (CP_UTF8 , 0 , line.data (), len, NULL , 0 );
633
+ const int wLen = ::MultiByteToWideChar (codepage , 0 , line.data (), len, NULL , 0 );
629
634
630
635
std::vector<wchar_t > wLine (wLen);
631
636
632
- ::MultiByteToWideChar (CP_UTF8 , 0 , line.data(), len, wLine.data(), wLen);
637
+ ::MultiByteToWideChar (codepage , 0 , line.data(), len, wLine.data(), wLen);
633
638
634
639
if (options.ignoreRegex )
635
640
words = getRegexIgnoreLineWords (wLine, options);
@@ -638,26 +643,26 @@ std::vector<Word> getLineWords(int view, intptr_t docLine, const CompareOptions&
638
643
639
644
// In case of UTF-16 or UTF-32 find words byte positions and lengths because Scintilla uses those
640
645
if (wLen != len)
641
- recalculateWordPos (words, wLine);
646
+ recalculateWordPos (codepage, words, wLine);
642
647
}
643
648
644
649
return words;
645
650
}
646
651
647
652
648
- inline void recalculateCharPos (std::vector<Char>& chars, const std::vector<wchar_t >& sec)
653
+ inline void recalculateCharPos (int codepage, std::vector<Char>& chars, const std::vector<wchar_t >& sec)
649
654
{
650
655
intptr_t bytePos = 0 ;
651
656
intptr_t currPos = 0 ;
652
657
653
658
for (auto & ch : chars)
654
659
{
655
660
if (currPos < ch.pos )
656
- bytePos += ::WideCharToMultiByte (CP_UTF8 , 0 , sec.data () + currPos, static_cast <int >(ch.pos - currPos),
661
+ bytePos += ::WideCharToMultiByte (codepage , 0 , sec.data () + currPos, static_cast <int >(ch.pos - currPos),
657
662
NULL , 0 , NULL , NULL );
658
663
659
664
currPos = ch.pos + 1 ;
660
- const int charLen = ::WideCharToMultiByte (CP_UTF8 , 0 , sec.data () + ch.pos , 1 , NULL , 0 , NULL , NULL );
665
+ const int charLen = ::WideCharToMultiByte (codepage , 0 , sec.data () + ch.pos , 1 , NULL , 0 , NULL , NULL );
661
666
ch.pos = bytePos;
662
667
bytePos += charLen;
663
668
}
@@ -695,23 +700,25 @@ std::vector<Char> getSectionChars(int view, intptr_t secStart, intptr_t secEnd,
695
700
696
701
if (secStart < secEnd)
697
702
{
703
+ const int codepage = getCodepage (view);
704
+
698
705
std::vector<char > sec = getText (view, secStart, secEnd);
699
706
700
707
const int len = static_cast <int >(sec.size ());
701
708
702
- const int wLen = ::MultiByteToWideChar (CP_UTF8 , 0 , sec.data (), len, NULL , 0 );
709
+ const int wLen = ::MultiByteToWideChar (codepage , 0 , sec.data (), len, NULL , 0 );
703
710
704
711
std::vector<wchar_t > wSec (wLen);
705
712
706
- ::MultiByteToWideChar (CP_UTF8 , 0 , sec.data(), len, wSec.data(), wLen);
713
+ ::MultiByteToWideChar (codepage , 0 , sec.data(), len, wSec.data(), wLen);
707
714
708
715
chars.reserve (wLen - 1 );
709
716
710
717
getSectionRangeChars (chars, wSec, 0 , wLen - 1 , options);
711
718
712
719
// In case of UTF-16 or UTF-32 find chars byte positions because Scintilla uses those
713
720
if (wLen != len)
714
- recalculateCharPos (chars, wSec);
721
+ recalculateCharPos (codepage, chars, wSec);
715
722
}
716
723
717
724
return chars;
@@ -722,17 +729,19 @@ std::vector<Char> getRegexIgnoreChars(int view, intptr_t secStart, intptr_t secE
722
729
{
723
730
std::vector<Char> chars;
724
731
732
+ const int codepage = getCodepage (view);
733
+
725
734
if (secStart < secEnd)
726
735
{
727
736
std::vector<char > sec = getText (view, secStart, secEnd);
728
737
729
738
const int len = static_cast <int >(sec.size ());
730
739
731
- const int wLen = ::MultiByteToWideChar (CP_UTF8 , 0 , sec.data (), len, NULL , 0 );
740
+ const int wLen = ::MultiByteToWideChar (codepage , 0 , sec.data (), len, NULL , 0 );
732
741
733
742
std::vector<wchar_t > wSec (wLen);
734
743
735
- ::MultiByteToWideChar (CP_UTF8 , 0 , sec.data(), len, wSec.data(), wLen);
744
+ ::MultiByteToWideChar (codepage , 0 , sec.data(), len, wSec.data(), wLen);
736
745
737
746
chars.reserve (wLen - 1 );
738
747
@@ -753,7 +762,7 @@ std::vector<Char> getRegexIgnoreChars(int view, intptr_t secStart, intptr_t secE
753
762
754
763
// In case of UTF-16 or UTF-32 find chars byte positions because Scintilla uses those
755
764
if (wLen != len)
756
- recalculateCharPos (chars, wSec);
765
+ recalculateCharPos (codepage, chars, wSec);
757
766
}
758
767
759
768
return chars;
0 commit comments