Skip to content

Commit 73b4794

Browse files
committed
Respect Scintilla codepage when doing MB-WC conversions
1 parent 0de3f86 commit 73b4794

File tree

3 files changed

+47
-26
lines changed

3 files changed

+47
-26
lines changed

src/Engine/Engine.cpp

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -388,18 +388,19 @@ inline uint64_t lineRangeHash(uint64_t hashSeed, std::vector<wchar_t>& line, int
388388
}
389389

390390

391-
uint64_t regexIgnoreLineHash(uint64_t hashSeed, const std::vector<char>& line, const CompareOptions& options)
391+
uint64_t regexIgnoreLineHash(uint64_t hashSeed, int codepage, const std::vector<char>& line,
392+
const CompareOptions& options)
392393
{
393-
const intptr_t len = static_cast<intptr_t>(line.size());
394+
const int len = static_cast<int>(line.size());
394395

395396
if (len == 0)
396397
return hashSeed;
397398

398-
const int wLen = ::MultiByteToWideChar(CP_UTF8, 0, line.data(), static_cast<int>(len), NULL, 0);
399+
const int wLen = ::MultiByteToWideChar(codepage, 0, line.data(), len, NULL, 0);
399400

400401
std::vector<wchar_t> wLine(wLen);
401402

402-
::MultiByteToWideChar(CP_UTF8, 0, line.data(), static_cast<int>(len), wLine.data(), wLen);
403+
::MultiByteToWideChar(codepage, 0, line.data(), len, wLine.data(), wLen);
403404

404405
#ifndef MULTITHREAD
405406
LOGD(LOG_ALGO, "line len " + std::to_string(len) + " to wide char len " + std::to_string(wLen) + "\n");
@@ -459,6 +460,8 @@ void getLines(DocCmpInfo& doc, const CompareOptions& options)
459460
return;
460461
}
461462

463+
const int codepage = getCodepage(doc.view);
464+
462465
const intptr_t docLine = secLine + doc.section.off;
463466
const intptr_t lineStart = getLineStart(doc.view, docLine);
464467
const intptr_t lineEnd = getLineEnd(doc.view, docLine);
@@ -478,12 +481,12 @@ void getLines(DocCmpInfo& doc, const CompareOptions& options)
478481
", view " + std::to_string(doc.view) + "\n");
479482
#endif
480483

481-
newLine.hash = regexIgnoreLineHash(newLine.hash, line, options);
484+
newLine.hash = regexIgnoreLineHash(newLine.hash, codepage, line, options);
482485
}
483486
else
484487
{
485488
if (options.ignoreCase)
486-
toLowerCase(line);
489+
toLowerCase(line, codepage);
487490

488491
for (intptr_t i = 0; i < lineEnd - lineStart; ++i)
489492
{
@@ -513,19 +516,19 @@ charType getCharTypeW(wchar_t letter)
513516
}
514517

515518

516-
inline void recalculateWordPos(std::vector<Word>& words, const std::vector<wchar_t>& line)
519+
inline void recalculateWordPos(int codepage, std::vector<Word>& words, const std::vector<wchar_t>& line)
517520
{
518521
intptr_t bytePos = 0;
519522
intptr_t currPos = 0;
520523

521524
for (auto& word : words)
522525
{
523526
if (currPos < word.pos)
524-
bytePos += ::WideCharToMultiByte(CP_UTF8, 0, line.data() + currPos, static_cast<int>(word.pos - currPos),
527+
bytePos += ::WideCharToMultiByte(codepage, 0, line.data() + currPos, static_cast<int>(word.pos - currPos),
525528
NULL, 0, NULL, NULL);
526529

527530
currPos = word.pos + word.len;
528-
word.len = ::WideCharToMultiByte(CP_UTF8, 0, line.data() + word.pos, static_cast<int>(word.len),
531+
word.len = ::WideCharToMultiByte(codepage, 0, line.data() + word.pos, static_cast<int>(word.len),
529532
NULL, 0, NULL, NULL);
530533
word.pos = bytePos;
531534
bytePos += word.len;
@@ -616,6 +619,8 @@ std::vector<Word> getLineWords(int view, intptr_t docLine, const CompareOptions&
616619
{
617620
std::vector<Word> words;
618621

622+
const int codepage = getCodepage(view);
623+
619624
const intptr_t lineStart = getLineStart(view, docLine);
620625
const intptr_t lineEnd = getLineEnd(view, docLine);
621626

@@ -625,11 +630,11 @@ std::vector<Word> getLineWords(int view, intptr_t docLine, const CompareOptions&
625630

626631
const int len = static_cast<int>(line.size());
627632

628-
const int wLen = ::MultiByteToWideChar(CP_UTF8, 0, line.data(), len, NULL, 0);
633+
const int wLen = ::MultiByteToWideChar(codepage, 0, line.data(), len, NULL, 0);
629634

630635
std::vector<wchar_t> wLine(wLen);
631636

632-
::MultiByteToWideChar(CP_UTF8, 0, line.data(), len, wLine.data(), wLen);
637+
::MultiByteToWideChar(codepage, 0, line.data(), len, wLine.data(), wLen);
633638

634639
if (options.ignoreRegex)
635640
words = getRegexIgnoreLineWords(wLine, options);
@@ -638,26 +643,26 @@ std::vector<Word> getLineWords(int view, intptr_t docLine, const CompareOptions&
638643

639644
// In case of UTF-16 or UTF-32 find words byte positions and lengths because Scintilla uses those
640645
if (wLen != len)
641-
recalculateWordPos(words, wLine);
646+
recalculateWordPos(codepage, words, wLine);
642647
}
643648

644649
return words;
645650
}
646651

647652

648-
inline void recalculateCharPos(std::vector<Char>& chars, const std::vector<wchar_t>& sec)
653+
inline void recalculateCharPos(int codepage, std::vector<Char>& chars, const std::vector<wchar_t>& sec)
649654
{
650655
intptr_t bytePos = 0;
651656
intptr_t currPos = 0;
652657

653658
for (auto& ch : chars)
654659
{
655660
if (currPos < ch.pos)
656-
bytePos += ::WideCharToMultiByte(CP_UTF8, 0, sec.data() + currPos, static_cast<int>(ch.pos - currPos),
661+
bytePos += ::WideCharToMultiByte(codepage, 0, sec.data() + currPos, static_cast<int>(ch.pos - currPos),
657662
NULL, 0, NULL, NULL);
658663

659664
currPos = ch.pos + 1;
660-
const int charLen = ::WideCharToMultiByte(CP_UTF8, 0, sec.data() + ch.pos, 1, NULL, 0, NULL, NULL);
665+
const int charLen = ::WideCharToMultiByte(codepage, 0, sec.data() + ch.pos, 1, NULL, 0, NULL, NULL);
661666
ch.pos = bytePos;
662667
bytePos += charLen;
663668
}
@@ -695,23 +700,25 @@ std::vector<Char> getSectionChars(int view, intptr_t secStart, intptr_t secEnd,
695700

696701
if (secStart < secEnd)
697702
{
703+
const int codepage = getCodepage(view);
704+
698705
std::vector<char> sec = getText(view, secStart, secEnd);
699706

700707
const int len = static_cast<int>(sec.size());
701708

702-
const int wLen = ::MultiByteToWideChar(CP_UTF8, 0, sec.data(), len, NULL, 0);
709+
const int wLen = ::MultiByteToWideChar(codepage, 0, sec.data(), len, NULL, 0);
703710

704711
std::vector<wchar_t> wSec(wLen);
705712

706-
::MultiByteToWideChar(CP_UTF8, 0, sec.data(), len, wSec.data(), wLen);
713+
::MultiByteToWideChar(codepage, 0, sec.data(), len, wSec.data(), wLen);
707714

708715
chars.reserve(wLen - 1);
709716

710717
getSectionRangeChars(chars, wSec, 0, wLen - 1, options);
711718

712719
// In case of UTF-16 or UTF-32 find chars byte positions because Scintilla uses those
713720
if (wLen != len)
714-
recalculateCharPos(chars, wSec);
721+
recalculateCharPos(codepage, chars, wSec);
715722
}
716723

717724
return chars;
@@ -722,17 +729,19 @@ std::vector<Char> getRegexIgnoreChars(int view, intptr_t secStart, intptr_t secE
722729
{
723730
std::vector<Char> chars;
724731

732+
const int codepage = getCodepage(view);
733+
725734
if (secStart < secEnd)
726735
{
727736
std::vector<char> sec = getText(view, secStart, secEnd);
728737

729738
const int len = static_cast<int>(sec.size());
730739

731-
const int wLen = ::MultiByteToWideChar(CP_UTF8, 0, sec.data(), len, NULL, 0);
740+
const int wLen = ::MultiByteToWideChar(codepage, 0, sec.data(), len, NULL, 0);
732741

733742
std::vector<wchar_t> wSec(wLen);
734743

735-
::MultiByteToWideChar(CP_UTF8, 0, sec.data(), len, wSec.data(), wLen);
744+
::MultiByteToWideChar(codepage, 0, sec.data(), len, wSec.data(), wLen);
736745

737746
chars.reserve(wLen - 1);
738747

@@ -753,7 +762,7 @@ std::vector<Char> getRegexIgnoreChars(int view, intptr_t secStart, intptr_t secE
753762

754763
// In case of UTF-16 or UTF-32 find chars byte positions because Scintilla uses those
755764
if (wLen != len)
756-
recalculateCharPos(chars, wSec);
765+
recalculateCharPos(codepage, chars, wSec);
757766
}
758767

759768
return chars;

src/NppHelpers.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -535,24 +535,24 @@ std::vector<char> getText(int view, intptr_t startPos, intptr_t endPos)
535535
}
536536

537537

538-
void toLowerCase(std::vector<char>& text)
538+
void toLowerCase(std::vector<char>& text, int codepage)
539539
{
540540
const int len = static_cast<int>(text.size());
541541

542542
if (len == 0)
543543
return;
544544

545-
const int wLen = ::MultiByteToWideChar(CP_UTF8, 0, text.data(), len, NULL, 0);
545+
const int wLen = ::MultiByteToWideChar(codepage, 0, text.data(), len, NULL, 0);
546546

547547
std::vector<wchar_t> wText(wLen);
548548

549-
::MultiByteToWideChar(CP_UTF8, 0, text.data(), len, wText.data(), wLen);
549+
::MultiByteToWideChar(codepage, 0, text.data(), len, wText.data(), wLen);
550550

551551
wText.push_back(L'\0');
552552
::CharLowerW((LPWSTR)wText.data());
553553
wText.pop_back();
554554

555-
::WideCharToMultiByte(CP_UTF8, 0, wText.data(), wLen, text.data(), len, NULL, NULL);
555+
::WideCharToMultiByte(codepage, 0, wText.data(), wLen, text.data(), len, NULL, NULL);
556556
}
557557

558558

src/NppHelpers.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,18 @@ inline int getEncoding(LRESULT buffId)
421421
}
422422

423423

424+
inline int getCodepage(int view)
425+
{
426+
return (int)CallScintilla(view, SCI_GETCODEPAGE, 0, 0);
427+
}
428+
429+
430+
inline int getCharacterSet(int view, int style = STYLE_DEFAULT)
431+
{
432+
return (int)CallScintilla(view, SCI_STYLEGETCHARACTERSET, style, 0);
433+
}
434+
435+
424436
inline intptr_t getDocId(int view)
425437
{
426438
return CallScintilla(view, SCI_GETDOCPOINTER, 0, 0);
@@ -653,7 +665,7 @@ inline void clearAnnotation(int view, intptr_t line)
653665
void clearAnnotations(int view, intptr_t startLine, intptr_t length);
654666

655667
std::vector<char> getText(int view, intptr_t startPos, intptr_t endPos);
656-
void toLowerCase(std::vector<char>& text);
668+
void toLowerCase(std::vector<char>& text, int codepage = CP_UTF8);
657669

658670
void addBlankSection(int view, intptr_t line, intptr_t length, intptr_t selectionMarkPosition = 0,
659671
const char *text = nullptr);

0 commit comments

Comments
 (0)