I have been experimenting with the Styled Text Control and was interested in how it handled utf-8. I downloaded the web page http://kermitproject.org/utf8.html and wrote a simple test app to load that file into an STC.
I noticed that while the STC does handle much of the file OK, there are some sections where it doesn’t. For example it shows the Gothic section (lines 325 through 332) as escape codes instead of the correct characters. Web browsers and text editors I’ve tried do display these characters.
However, the real problem is that when you call the GetText() method, the text returned doesn’t match the original text from the file (as shown in the checkData() method in the example below).
Is there a way to configure the STC to handle all the data in the file correctly?
# Module: stc_utf8_test.py
import difflib
import wx
import wx.stc as stc
class STCFrame(wx.Frame):
def __init__(self, *args, **kwds):
kwds["style"] = kwds.get("style", 0) | wx.DEFAULT_FRAME_STYLE
wx.Frame.__init__(self, *args, **kwds)
filename = "utf8.html"
self.SetTitle(filename)
self.SetSize((700, 600))
self.styled_text_ctrl = stc.StyledTextCtrl(self, wx.ID_ANY)
self.styled_text_ctrl.SetMarginType(1, stc.STC_MARGIN_NUMBER)
self.styled_text_ctrl.SetMarginMask(1, 0)
self.styled_text_ctrl.SetMarginWidth(1, 40)
sizer_1 = wx.BoxSizer(wx.VERTICAL)
sizer_1.Add(self.styled_text_ctrl, 1, wx.EXPAND, 0)
self.SetSizer(sizer_1)
self.Layout()
with open(filename, encoding='utf-8') as i_file:
text = i_file.read()
self.styled_text_ctrl.SetText(text)
wx.CallLater(200, self.checkData, text)
def checkData(self, text):
temp = self.styled_text_ctrl.GetText()
if temp != text:
print("Text from STC doesn't match original.")
print("Matching blocks:")
s = difflib.SequenceMatcher(None, temp, text)
for block in s.get_matching_blocks():
print(' ', block)
class MyApp(wx.App):
def OnInit(self):
self.frame = STCFrame(None, wx.ID_ANY, "")
self.SetTopWindow(self.frame)
self.frame.Show()
return True
if __name__ == "__main__":
app = MyApp(0)
app.MainLoop()
Using Python 3.8.5 + wxPython 4.0.7 gtk3 (phoenix) wxWidgets 3.0.4 + Linux Mint 20