qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 1/4] qjson: replace QString in JSONLexer with


From: Markus Armbruster
Subject: Re: [Qemu-devel] [PATCH v2 1/4] qjson: replace QString in JSONLexer with GString
Date: Wed, 25 Nov 2015 13:48:10 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux)

Paolo Bonzini <address@hidden> writes:

> JSONLexer only needs a simple resizable buffer.  json-streamer.c
> can allocate memory for each token instead of relying on reference
> counting of QStrings.

On its own, this doesn't really save anything, it merely decouples the
lexer's buffer from the rest of the machinery, which still allocates the
same QStrings as before.  But it enables the rest of your work.

> Signed-off-by: Paolo Bonzini <address@hidden>
> ---
>  include/qapi/qmp/json-lexer.h    |  7 +++----
>  include/qapi/qmp/json-streamer.h |  1 +
>  qobject/json-lexer.c             | 22 ++++++++--------------
>  qobject/json-streamer.c          | 10 +++++-----
>  4 files changed, 17 insertions(+), 23 deletions(-)
>
> diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h
> index cdff046..584b27d 100644
> --- a/include/qapi/qmp/json-lexer.h
> +++ b/include/qapi/qmp/json-lexer.h
> @@ -14,8 +14,7 @@
>  #ifndef QEMU_JSON_LEXER_H
>  #define QEMU_JSON_LEXER_H
>  
> -#include "qapi/qmp/qstring.h"
> -#include "qapi/qmp/qlist.h"
> +#include "glib-compat.h"
>  
>  typedef enum json_token_type {
>      JSON_OPERATOR = 100,
> @@ -30,13 +29,13 @@ typedef enum json_token_type {
>  
>  typedef struct JSONLexer JSONLexer;
>  
> -typedef void (JSONLexerEmitter)(JSONLexer *, QString *, JSONTokenType, int 
> x, int y);
> +typedef void (JSONLexerEmitter)(JSONLexer *, GString *, JSONTokenType, int 
> x, int y);
>  
>  struct JSONLexer
>  {
>      JSONLexerEmitter *emit;
>      int state;
> -    QString *token;
> +    GString *token;
>      int x, y;
>  };
>  
> diff --git a/include/qapi/qmp/json-streamer.h 
> b/include/qapi/qmp/json-streamer.h
> index 823f7d7..e901144 100644
> --- a/include/qapi/qmp/json-streamer.h
> +++ b/include/qapi/qmp/json-streamer.h
> @@ -14,6 +14,7 @@
>  #ifndef QEMU_JSON_STREAMER_H
>  #define QEMU_JSON_STREAMER_H
>  
> +#include <stdint.h>

I guess you need this because json-lexer.h no longer includes it via
qstring.h.  The only use appars to be uint64_t token_size, where size_t
would be more appropriate.  Follow-up cleanup.

>  #include "qapi/qmp/qlist.h"
>  #include "qapi/qmp/json-lexer.h"
>  
> diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
> index b19623e..c2060f8 100644
> --- a/qobject/json-lexer.c
> +++ b/qobject/json-lexer.c
> @@ -11,12 +11,9 @@
>   *
>   */
>  
> -#include "qapi/qmp/qstring.h"
> -#include "qapi/qmp/qlist.h"
> -#include "qapi/qmp/qdict.h"
> -#include "qapi/qmp/qint.h"
>  #include "qemu-common.h"
>  #include "qapi/qmp/json-lexer.h"
> +#include <stdint.h>
>  
>  #define MAX_TOKEN_SIZE (64ULL << 20)
>  
> @@ -272,7 +269,7 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter 
> func)
>  {
>      lexer->emit = func;
>      lexer->state = IN_START;
> -    lexer->token = qstring_new();
> +    lexer->token = g_string_sized_new(3);

Where does the 3 come from?

As long as g_string_truncate() doesn't reallocate, lexer->token will
grow to hold the largest token seen so far, and stay there.

Why not MAX_TOKEN_SIZE and be done with it?  Oh, it's 64 MiB.  Ookay,
I'd call that... generous.

>      lexer->x = lexer->y = 0;
>  }
>  
> @@ -290,7 +287,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char 
> ch, bool flush)
>          new_state = json_lexer[lexer->state][(uint8_t)ch];
>          char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
>          if (char_consumed) {
> -            qstring_append_chr(lexer->token, ch);
> +            g_string_append_c(lexer->token, ch);
>          }
>  
>          switch (new_state) {
> @@ -303,8 +300,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char 
> ch, bool flush)
>              lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
>              /* fall through */
>          case JSON_SKIP:
> -            QDECREF(lexer->token);
> -            lexer->token = qstring_new();
> +            g_string_truncate(lexer->token, 0);
>              new_state = IN_START;
>              break;

Before your patch, we effectively hand QString * lexer->token off to
whatever emit() is.

Now, it's a GString, and we want it back.

Okay, because below the obfuscating abstraction, emit() is
json_message_process_token(), and you update it accordingly, see last
patch hunk.

>          case IN_ERROR:
> @@ -322,8 +318,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char 
> ch, bool flush)
>               * induce an error/flush state.
>               */
>              lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
> -            QDECREF(lexer->token);
> -            lexer->token = qstring_new();
> +            g_string_truncate(lexer->token, 0);
>              new_state = IN_START;
>              lexer->state = new_state;
>              return 0;

Likewise.

> @@ -336,10 +331,9 @@ static int json_lexer_feed_char(JSONLexer *lexer, char 
> ch, bool flush)
>      /* Do not let a single token grow to an arbitrarily large size,
>       * this is a security consideration.
>       */
> -    if (lexer->token->length > MAX_TOKEN_SIZE) {
> +    if (lexer->token->len > MAX_TOKEN_SIZE) {
>          lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
> -        QDECREF(lexer->token);
> -        lexer->token = qstring_new();
> +        g_string_truncate(lexer->token, 0);
>          lexer->state = IN_START;
>      }
>  

Likewise.

Preexisting: splitting long tokens with an axe like that is wrong.  If
the token is too long, we should error out.  Outside this patch's scope.

> @@ -369,5 +363,5 @@ int json_lexer_flush(JSONLexer *lexer)
>  
>  void json_lexer_destroy(JSONLexer *lexer)
>  {
> -    QDECREF(lexer->token);
> +    g_string_free(lexer->token, true);
>  }
> diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
> index 1b2f9b1..f240501 100644
> --- a/qobject/json-streamer.c
> +++ b/qobject/json-streamer.c
> @@ -12,6 +12,7 @@
>   */
>  
>  #include "qapi/qmp/qlist.h"
> +#include "qapi/qmp/qstring.h"
>  #include "qapi/qmp/qint.h"
>  #include "qapi/qmp/qdict.h"
>  #include "qemu-common.h"
> @@ -21,13 +22,13 @@
>  #define MAX_TOKEN_SIZE (64ULL << 20)
>  #define MAX_NESTING (1ULL << 10)
>  
> -static void json_message_process_token(JSONLexer *lexer, QString *token, 
> JSONTokenType type, int x, int y)
> +static void json_message_process_token(JSONLexer *lexer, GString *input, 
> JSONTokenType type, int x, int y)
>  {
>      JSONMessageParser *parser = container_of(lexer, JSONMessageParser, 
> lexer);
>      QDict *dict;
>  
>      if (type == JSON_OPERATOR) {
> -        switch (qstring_get_str(token)[0]) {
> +        switch (input->str[0]) {
>          case '{':
>              parser->brace_count++;
>              break;
> @@ -47,12 +48,11 @@ static void json_message_process_token(JSONLexer *lexer, 
> QString *token, JSONTok
>  
>      dict = qdict_new();
>      qdict_put(dict, "type", qint_from_int(type));
> -    QINCREF(token);
> -    qdict_put(dict, "token", token);
> +    qdict_put(dict, "token", qstring_from_str(input->str));
>      qdict_put(dict, "x", qint_from_int(x));
>      qdict_put(dict, "y", qint_from_int(y));
>  
> -    parser->token_size += token->length;
> +    parser->token_size += input->len;
>  
>      qlist_append(parser->tokens, dict);

Before your patch: take a reference to token.

Now: copy it out.  Good.

Preexisting: storing the token's textual representation is necessary for
strings, defensible for numbers, and a total waste for everything else.
But we can tolerate some waste.  Just not the *egregious* waste this
series fixes.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]