[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [bug-gperf] Add machine-readable output to gperf
From: |
Sei Lisa |
Subject: |
Re: [bug-gperf] Add machine-readable output to gperf |
Date: |
Mon, 21 Nov 2016 14:21:52 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Icedove/24.7.0 |
Bruno Haible wrote, On 2016-11-21 02:15:
>
> In which language is the project written?
Pascal. It's using the multi-platform Lazarus IDE, http://www.lazarus-ide.org/
If you're asking in regards to whether it allows binding to a C library, the
answer is probably yes (not sure about the portability issues that would need
to be dealt with, though). However, the aim is to avoid any recompilation every
time there's a change in the keywords, which is often. Just a gperf run should
suffice.
For reference, here's the current parser of the hash function and keywords
table (GPL3 licensed; the latter is still WIP):
procedure ReadHashFunc(GperfName: String; BuiltinsName: String);
const
ParseAssoValuesStr = ' asso_values[] =';
ParseNumEntriesStr = '#define MAX_HASH_VALUE ';
ParseAssoStr = 'str[';
ParseUseLastStr = 'len - 1]]';
ParseWordlistStr = 'wordlist[] =';
var
F: Text;
L: String;
Len: Integer;
i: Integer;
start: Integer;
NumAux: Integer;
ReadingAssoValuesTable: Boolean;
begin
If NumEntries <> 0 then
begin
FreeMem(HashTable);
end;
{ Parse the gperf C output to determine the hash function's parameters
(unfortunately, gperf doesn't have a machine-readable output mode) }
NumAux := 0;
NumKeys := 0;
ReadingAssoValuesTable := False;
IncludeLast := False;
Assign(F, GperfName);
Reset(F);
try
While not Eof(F) do
begin
ReadLn(F, L);
Len := Length(L);
if ReadingAssoValuesTable then
begin
{ asso_values[] goes into HashAux (one entry per char) }
i := 1;
While i <= Len do
begin
While (i <= Len) and not (L[i] in ['0'..'9']) do
Inc(i);
If i > Len then
break;
start := i;
While (i <= Len) and (L[i] in ['0'..'9']) do
Inc(i);
If NumAux >= 256 then
raise EParseGperf.Create('Too many entries in asso_values');
HashAux[NumAux] := StrToInt(Copy(L, start, i - start));
Inc(NumAux);
end;
if Pos('}', L) <> 0 then
begin
If NumAux <> 256 then
raise EParseGperf.Create('Too few entries in asso_values');
ReadingAssoValuesTable := False;
end;
end
else
begin
{ MAX_HASH_VALUE goes into NumEntries }
if Copy(L, 1, Length(ParseNumEntriesStr)) = ParseNumEntriesStr then
NumEntries := StrToInt(Copy(L, Length(ParseNumEntriesStr) + 1, Len))
+ 1
else if Pos(ParseAssoValuesStr, L) <> 0 then
ReadingAssoValuesTable := True
else if Pos(ParseWordlistStr, L) <> 0 then
Break
else
begin
{ Search for snippets of the form asso_values[str[n]+m].
The n goes into HashKeys (zero-terminated).
The m goes into HashOffsets. When absent (i.e. most of the time),
it means zero (it's how the gperf algorithm deals with anagrams).
IncludeLast is set to True if n is the string 'len - 1'.
Note asso_values[str[len - 1]] never has +m. }
repeat
// Dot indicates position under consideration.
// XXX indicates any text.
// .XXXstr[n]+m]
start := Pos(ParseAssoStr, L);
If start = 0 then
Break;
L := Copy(L, start + Length(ParseAssoStr), Len);
Len := Length(L);
// str[.n]+m]
If Copy(L, 1, Length(ParseUseLastStr)) = ParseUseLastStr then
// asso_values[str[.len - 1]]
// Rest is ignored, not skipped - next iteration will skip it.
IncludeLast := True
else
begin
i := 1;
// skip number
While (i <= Len) and (L[i] in ['0'..'9']) do
Inc(i);
// parse number into HashKeys
If NumKeys >= 256 then
raise EParseGperf.Create('Too many keys');
HashKeys[NumKeys] := StrToInt(Copy(L, 1, i - 1));
// str[n.]+m]
if (i >= Len - 1) or (L[i] <> ']') then
raise EParseGperf.Create('Invalid syntax in Gperf code (missing
or misplaced '']'' after ''str['')');
Inc(i);
If L[i] = '+' then
begin
Inc(i);
start := i;
// str[n]+.m]
While (i <= Len) and (L[i] in ['0'..'9']) do
Inc(i);
HashOffsets[NumKeys] := StrToInt(Copy(L, start, i - start));
end
else
begin
// str[n].]
HashOffsets[NumKeys] := 0;
end;
If (i >= Len) or (L[i] <> ']') then
raise EParseGperf.Create('Unexpected syntax in Gperf code
(missing last close bracket)');
Inc(NumKeys);
end;
until False;
end;
end
end;
finally
Close(F);
end;
If NumEntries = 0 then
raise EParseGperf.Create('Initialization failed');
// Read keywords data
HashTable := GetMem(NumEntries * SizeOf(THashTableEntry));
For i := 0 to NumEntries - 1 do
HashTable^[i].Entry := '';
Assign(F, BuiltinsName);
Reset(F);
try
While not Eof(F) do
begin
ReadLn(F, L);
{ TODO: Parse builtins.txt into HashTableEntry }
i := HashValue(PChar(L), Length(L));
If (i < 0) or (i >= NumEntries) or (HashTable^[i].Entry <> '') then
raise EParseBuiltins.Create('Mismatch between builtins.txt and
builtins.gperf, found on keyword: ''' + L + '''');
HashTable^[i].Entry := L;
end;
finally
Close(F);
end;
end;
Note parsing the C program needs elaborated code that could fail in future
versions. It could probably be simplified by using regular expressions, but my
main concern is the stability of the output.
The hash value calculator should be reasonably fast, even if not so much as a
version coming from a code generator would be:
function HashValue(P: PChar; Len: Integer): Integer; inline;
var
Key: Integer;
Sum: Integer;
i: Integer;
begin
Assert((NumEntries <> 0) and (NumKeys <> 0), 'Not Initialized');
Sum := Len;
If Len <> 0 then
begin
i := NumKeys;
Repeat
Dec(i);
Key := HashKeys[i];
If Key >= Len then
Break;
Sum := Sum + HashAux[Ord(P[Key]) + HashOffsets[i]];
until i = 0;
If IncludeLast then
Sum := Sum + HashAux[Ord(P[Len - 1])];
end;
Result := Sum;
end;
I'm going to add an optimization that would select a version without
HashOffsets when they are all zero, which is the case for the current language
keywords.
It would also be nice to be able to tell gperf to not consider the last
character, and let it determine a set of suitable keys otherwise. That would
let me avoid the last condition.
Regards,
Sei