[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] eliot/dic compdic.cpp
From: |
Olivier Teulière |
Subject: |
[Eliot-dev] eliot/dic compdic.cpp |
Date: |
Tue, 20 Apr 2010 20:49:59 +0000 |
CVSROOT: /cvsroot/eliot
Module name: eliot
Changes by: Olivier Teulière <ipkiss> 10/04/20 20:49:59
Modified files:
dic : compdic.cpp
Log message:
Fixed BOM handling
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/compdic.cpp?cvsroot=eliot&r1=1.16&r2=1.17
Patches:
Index: compdic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/compdic.cpp,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -b -r1.16 -r1.17
--- compdic.cpp 4 Jan 2010 21:00:33 -0000 1.16
+++ compdic.cpp 20 Apr 2010 20:49:58 -0000 1.17
@@ -98,14 +98,25 @@
file.read(&buffer.front(), ioDicSize);
file.close();
+ // If there is a BOM in the file, use an offset to start reading after it
+ size_t bomOffset = 0;
+ if ((uint8_t)buffer[0] == 0xEF &&
+ (uint8_t)buffer[1] == 0xBB &&
+ (uint8_t)buffer[2] == 0xBF)
+ {
+ bomOffset = 3;
+ }
+
// Buffer for the wide characters (it will use at most as many characters
// as the utf-8 version)
wchar_t *wideBuf = new wchar_t[ioDicSize];
try
{
- unsigned int number = readFromUTF8(wideBuf, ioDicSize, &buffer.front(),
- ioDicSize, "load_uncompressed");
+ unsigned int number = readFromUTF8(wideBuf, ioDicSize,
+ (&buffer.front()) + bomOffset,
+ ioDicSize - bomOffset,
+ "load_uncompressed");
ioDicSize = number;
return wideBuf;
}
@@ -158,23 +169,11 @@
wstring letter = tokens[0];
if (letter.size() != 1)
{
- // On the first line, there could be the BOM...
- if (lineNb == 1 && tokens[0].size() > 3 &&
- (uint8_t)tokens[0][0] == 0xEF &&
- (uint8_t)tokens[0][1] == 0xBB &&
- (uint8_t)tokens[0][2] == 0xBF)
- {
- // BOM detected, remove the first char in the wide string
- letter.erase(0, 1);
- }
- else
- {
ostringstream ss;
ss << fmt(_("readLetters: Invalid letter at line %1% "
"(only one character allowed)")) % lineNb;
throw DicException(ss.str());
}
- }
// We don't support non-alphabetical characters in the dictionary
// apart from the joker '?'. For more explanations on the issue, see
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Eliot-dev] eliot/dic compdic.cpp,
Olivier Teulière <=