qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] linux-user: add signalfd/signalfd4 syscalls


From: Peter Maydell
Subject: Re: [Qemu-devel] [PATCH] linux-user: add signalfd/signalfd4 syscalls
Date: Tue, 14 Jul 2015 21:50:47 +0100

On 29 May 2015 at 23:50, Laurent Vivier <address@hidden> wrote:
> This patch introduces a system very similar to the one used in the kernel
> to attach specific functions to a given file descriptor.
>
> In this case, we attach a specific "read()" to the fd returned by
> signalfd() to be able to byte-swap the signalfd_siginfo structure
> provided by read().
>
> This system could be also used to introduce netlink sockets.

Thanks for this patch; apologies for the delayed review.

> This patch allows to execute the example program given by
> man signalfd(2):
>
>     do { perror(msg); exit(EXIT_FAILURE); } while (0)

Your commit message seems to have lost all the lines beginning
'#' in this example program. I blame git :-)

> int
> main(int argc, char *argv[])
> {
>     sigset_t mask;
>     int sfd;
>     struct signalfd_siginfo fdsi;
>     ssize_t s;
>
>     sigemptyset(&mask);
>     sigaddset(&mask, SIGINT);
>     sigaddset(&mask, SIGQUIT);
>
>     /* Block signals so that they aren't handled
>        according to their default dispositions */
>
>     if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1)
>         handle_error("sigprocmask");
>
>     sfd = signalfd(-1, &mask, 0);
>     if (sfd == -1)
>         handle_error("signalfd");
>
>     for (;;) {
>         s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo));
>         if (s != sizeof(struct signalfd_siginfo))
>             handle_error("read");
>
>         if (fdsi.ssi_signo == SIGINT) {
>             printf("Got SIGINT\n");
>         } else if (fdsi.ssi_signo == SIGQUIT) {
>             printf("Got SIGQUIT\n");
>             exit(EXIT_SUCCESS);
>         } else {
>             printf("Read unexpected signal\n");
>         }
>     }
> }
>
> $ ./signalfd_demo
> ^CGot SIGINT
> ^CGot SIGINT
> ^\Got SIGQUIT
>
> Signed-off-by: Laurent Vivier <address@hidden>
> ---
>  linux-user/syscall.c | 117 
> ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 116 insertions(+), 1 deletion(-)
>
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 1622ad6..c72c440 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -60,6 +60,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>  #include <sys/statfs.h>
>  #include <utime.h>
>  #include <sys/sysinfo.h>
> +#include <sys/signalfd.h>
>  //#include <sys/user.h>
>  #include <netinet/ip.h>
>  #include <netinet/tcp.h>
> @@ -294,6 +295,15 @@ static bitmask_transtbl fcntl_flags_tbl[] = {
>    { 0, 0, 0, 0 }
>  };
>
> +struct target_fd_ops {
> +    abi_long (*read) (int, void *, size_t);

Maybe could use a brief comment defining the API here (for instance
that you need to return -target_errno isn't necessarily obvious).

> +};
> +
> +typedef struct target_fd_ops target_fd_ops_t;

QEMU style generally prefers CamelCase for struct and
struct typedef names.

> +
> +static int target_fd_max;
> +static target_fd_ops_t **target_fd_ops;
> +
>  static int sys_getcwd1(char *buf, size_t size)
>  {
>    if (getcwd(buf, size) == NULL) {
> @@ -4878,6 +4888,7 @@ void syscall_init(void)
>      const argtype *arg_type;
>      int size;
>      int i;
> +    struct rlimit rlim;
>
>  #define STRUCT(name, ...) thunk_register_struct(STRUCT_ ## name, #name, 
> struct_ ## name ## _def);
>  #define STRUCT_SPECIAL(name) thunk_register_struct_direct(STRUCT_ ## name, 
> #name, &struct_ ## name ## _def);
> @@ -4885,6 +4896,14 @@ void syscall_init(void)
>  #undef STRUCT
>  #undef STRUCT_SPECIAL
>
> +    /* allocate an array to store fd ops */
> +
> +    if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
> +        target_fd_max = rlim.rlim_cur;
> +        target_fd_ops = g_malloc0(target_fd_max * sizeof(target_fd_ops_t *));
> +    }

...and what if the guest raises the rlimit after we've started?
I think it would be better to just dynamically allocate/reallocate
the table when the guest does a signalfd syscall, rather than
trying to allocate it once at startup.

> +
> +
>      /* Build target_to_host_errno_table[] table from
>       * host_to_target_errno_table[]. */
>      for (i = 0; i < ERRNO_TABLE_SIZE; i++) {
> @@ -5512,6 +5531,51 @@ static target_timer_t get_timer_id(abi_long arg)
>      return timerid;
>  }
>
> +/* signalfd siginfo conversion */
> +
> +static void
> +host_to_target_signalfd_siginfo(struct signalfd_siginfo *tinfo,
> +                                const struct signalfd_siginfo *info)
> +{
> +    int sig = host_to_target_signal(info->ssi_signo);
> +    tinfo->ssi_signo = tswap32(sig);
> +    tinfo->ssi_errno = 0; /* unused */

(a) Why not swap it anyway?
(b) the kernel seems to write something to this field:
http://lxr.free-electrons.com/source/fs/signalfd.c#L97

> +    tinfo->ssi_code = tswap32(info->ssi_code);
> +    tinfo->ssi_pid =  tswap32(info->ssi_pid);
> +    tinfo->ssi_uid =  tswap32(info->ssi_uid);
> +    tinfo->ssi_fd =  tswap32(info->ssi_fd);
> +    tinfo->ssi_tid =  tswap32(info->ssi_tid);
> +    tinfo->ssi_band =  tswap32(info->ssi_band);
> +    tinfo->ssi_overrun =  tswap32(info->ssi_overrun);
> +    tinfo->ssi_trapno =  tswap32(info->ssi_trapno);
> +    tinfo->ssi_status =  tswap32(info->ssi_status);
> +    tinfo->ssi_int =  tswap32(info->ssi_int);
> +    tinfo->ssi_ptr =  tswap64(info->ssi_ptr);
> +    tinfo->ssi_utime =  tswap64(info->ssi_utime);
> +    tinfo->ssi_stime =  tswap64(info->ssi_stime);
> +    tinfo->ssi_addr =  tswap64(info->ssi_addr);

Missing
   tinfo->ssi_addr_lsb = tswap16(info->ssi_addr_lsb);

> +}
> +
> +static abi_long target_signalfd_read(int fd, void *buf, size_t count)
> +{
> +    int ret;
> +    int i;
> +
> +    ret = get_errno(read(fd, buf, count));

The return value from read might not fit into an int...

> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    for (i = 0; i < ret; i += sizeof(struct signalfd_siginfo)) {
> +        host_to_target_signalfd_siginfo(buf + i, buf + i);

This is implicitly assuming that none of the fields in
the target and host arrangements of signalfd_siginfo
overlap, so that all we're doing is swapping each one
in place. That's true, but it's a bit subtle...

> +    }
> +    return ret;
> +}
> +
> +static target_fd_ops_t target_signalfd_ops = {
> +    .read = target_signalfd_read,
> +};
> +
>  /* do_syscall() should always have a single exit point at the end so
>     that actions, such as logging of syscall results, can be performed.
>     All errnos that do_syscall() returns must be -TARGET_<errcode>. */
> @@ -5571,7 +5635,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
> arg1,
>          else {
>              if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0)))
>                  goto efault;
> -            ret = get_errno(read(arg1, p, arg3));
> +            if (arg1 < target_fd_max &&
> +                target_fd_ops[arg1] &&
> +                target_fd_ops[arg1]->read) {
> +                ret =  target_fd_ops[arg1]->read(arg1, p, arg3);
> +            } else {
> +                ret = get_errno(read(arg1, p, arg3));
> +            }

This won't do the right thing in the case of:
 emulated-under-QEMU program A does a signalfd() with CLOEXEC clear
 program A exec()s program B, also emulated under QEMU
 program B reads its inherited signalfd descriptor

That's a bit of an edge case, of course, and practically impossible
to get right. It is mentioned specifically as working in the
signalfd manpage, though; maybe worth a comment somewhere that we
don't do the right thing.

>              unlock_user(p, arg2, ret);
>          }
>          break;
> @@ -5598,6 +5668,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
> arg1,
>          unlock_user(p, arg2, 0);
>          break;
>      case TARGET_NR_close:
> +        if (arg1 < target_fd_max &&
> +            target_fd_ops[arg1]) {
> +            target_fd_ops[arg1] = NULL;
> +        }

This isn't the only place an fd can get closed; for instance
dup2() can implicitly close an fd. I think you need a wider set
of changes to either (a) NULL out the entries in all the places
where we can close an fd or (b) NULL out the entries in all the
places where we can open an fd.

>          ret = get_errno(close(arg1));
>          break;
>      case TARGET_NR_brk:
> @@ -9451,6 +9525,26 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
> arg1,
>          break;
>  #endif
>  #endif
> +#if defined(TARGET_NR_signalfd4)
> +    case TARGET_NR_signalfd4:
> +        {
> +            target_sigset_t *target_mask;
> +            sigset_t mask;
> +            if (!lock_user_struct(VERIFY_READ, target_mask, arg2, 1)) {
> +                goto efault;
> +            }
> +
> +            target_to_host_sigset(&mask, target_mask);
> +
> +            ret = get_errno(signalfd(arg1, &mask, arg4));

The signalfd flags are SFD_NONBLOCK and SFD_CLOEXEC, which the
kernel defines (and asserts) to be equal to O_NONBLOCK and
O_CLOEXEC. Those are not the same for all architectures, so we
need to convert the flags argument here.

> +            if (ret >= 0) {
> +                target_fd_ops[ret] = &target_signalfd_ops;

We checked the fd against target_fds_max earlier, but we don't here?
(Moot if you take my suggestion of auto-reallocating the fd_ops
array here.)

> +            }
> +
> +            unlock_user_struct(target_mask, arg2, 0);

You might as well do the unlock as soon as you've done
the target_to_host_sigset() conversion.

> +        }
> +        break;
> +#endif
>  #if defined(CONFIG_EPOLL)
>  #if defined(TARGET_NR_epoll_create)
>      case TARGET_NR_epoll_create:
> @@ -9743,6 +9837,27 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
> arg1,
>      }
>  #endif
>
> +#if defined(TARGET_NR_signalfd)

Can we put this next to the signalfd4 code rather than separated by
other cases?

> +    case TARGET_NR_signalfd:
> +        {
> +            target_sigset_t *target_mask;
> +            sigset_t mask;
> +            if (!lock_user_struct(VERIFY_READ, target_mask, arg2, 1)) {
> +                goto efault;
> +            }
> +
> +            target_to_host_sigset(&mask, target_mask);
> +
> +            ret = get_errno(signalfd(arg1, &mask, arg3));

The syscall signalfd() has the arguments (fd, mask, sizemask),
whereas the libc function takes (fd, mask, flags), so passing arg3
here looks wrong. Should be const 0 ?

> +            if (ret >= 0) {
> +                target_fd_ops[ret] = &target_signalfd_ops;
> +            }
> +
> +            unlock_user_struct(target_mask, arg2, 0);

This is very similar to the signalfd4 implementation,
unsurprisingly. I would suggest factoring it out into a
do_signalfd4() function to be called from both places.

> +        }
> +        break;
> +#endif
> +
>  #if defined(TARGET_NR_timerfd_create) && defined(CONFIG_TIMERFD)
>      case TARGET_NR_timerfd_create:
>          ret = get_errno(timerfd_create(arg1,
> --
> 2.4.1

thanks
-- PMM



reply via email to

[Prev in Thread] Current Thread [Next in Thread]