qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] linux-user: add signalfd/signalfd4 syscalls


From: Laurent Vivier
Subject: Re: [Qemu-devel] [PATCH] linux-user: add signalfd/signalfd4 syscalls
Date: Tue, 14 Jul 2015 23:13:28 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.0.1

Thank you for your comments.

I'm currently writing a v2 of this patch with what I've learned working
on the netlink interface. I will fix it accordingly all your comments.

[ Now, I can boot a PPC64 container with systemd on an x86_64 host:
https://asciinema.org/a/7sgwvok72m34v7hg08hfspg4s ]

Laurent

Le 14/07/2015 22:50, Peter Maydell a écrit :
> On 29 May 2015 at 23:50, Laurent Vivier <address@hidden> wrote:
>> This patch introduces a system very similar to the one used in the kernel
>> to attach specific functions to a given file descriptor.
>>
>> In this case, we attach a specific "read()" to the fd returned by
>> signalfd() to be able to byte-swap the signalfd_siginfo structure
>> provided by read().
>>
>> This system could be also used to introduce netlink sockets.
> 
> Thanks for this patch; apologies for the delayed review.
> 
>> This patch allows to execute the example program given by
>> man signalfd(2):
>>
>>     do { perror(msg); exit(EXIT_FAILURE); } while (0)
> 
> Your commit message seems to have lost all the lines beginning
> '#' in this example program. I blame git :-)
> 
>> int
>> main(int argc, char *argv[])
>> {
>>     sigset_t mask;
>>     int sfd;
>>     struct signalfd_siginfo fdsi;
>>     ssize_t s;
>>
>>     sigemptyset(&mask);
>>     sigaddset(&mask, SIGINT);
>>     sigaddset(&mask, SIGQUIT);
>>
>>     /* Block signals so that they aren't handled
>>        according to their default dispositions */
>>
>>     if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1)
>>         handle_error("sigprocmask");
>>
>>     sfd = signalfd(-1, &mask, 0);
>>     if (sfd == -1)
>>         handle_error("signalfd");
>>
>>     for (;;) {
>>         s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo));
>>         if (s != sizeof(struct signalfd_siginfo))
>>             handle_error("read");
>>
>>         if (fdsi.ssi_signo == SIGINT) {
>>             printf("Got SIGINT\n");
>>         } else if (fdsi.ssi_signo == SIGQUIT) {
>>             printf("Got SIGQUIT\n");
>>             exit(EXIT_SUCCESS);
>>         } else {
>>             printf("Read unexpected signal\n");
>>         }
>>     }
>> }
>>
>> $ ./signalfd_demo
>> ^CGot SIGINT
>> ^CGot SIGINT
>> ^\Got SIGQUIT
>>
>> Signed-off-by: Laurent Vivier <address@hidden>
>> ---
>>  linux-user/syscall.c | 117 
>> ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 116 insertions(+), 1 deletion(-)
>>
>> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
>> index 1622ad6..c72c440 100644
>> --- a/linux-user/syscall.c
>> +++ b/linux-user/syscall.c
>> @@ -60,6 +60,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>>  #include <sys/statfs.h>
>>  #include <utime.h>
>>  #include <sys/sysinfo.h>
>> +#include <sys/signalfd.h>
>>  //#include <sys/user.h>
>>  #include <netinet/ip.h>
>>  #include <netinet/tcp.h>
>> @@ -294,6 +295,15 @@ static bitmask_transtbl fcntl_flags_tbl[] = {
>>    { 0, 0, 0, 0 }
>>  };
>>
>> +struct target_fd_ops {
>> +    abi_long (*read) (int, void *, size_t);
> 
> Maybe could use a brief comment defining the API here (for instance
> that you need to return -target_errno isn't necessarily obvious).
> 
>> +};
>> +
>> +typedef struct target_fd_ops target_fd_ops_t;
> 
> QEMU style generally prefers CamelCase for struct and
> struct typedef names.
> 
>> +
>> +static int target_fd_max;
>> +static target_fd_ops_t **target_fd_ops;
>> +
>>  static int sys_getcwd1(char *buf, size_t size)
>>  {
>>    if (getcwd(buf, size) == NULL) {
>> @@ -4878,6 +4888,7 @@ void syscall_init(void)
>>      const argtype *arg_type;
>>      int size;
>>      int i;
>> +    struct rlimit rlim;
>>
>>  #define STRUCT(name, ...) thunk_register_struct(STRUCT_ ## name, #name, 
>> struct_ ## name ## _def);
>>  #define STRUCT_SPECIAL(name) thunk_register_struct_direct(STRUCT_ ## name, 
>> #name, &struct_ ## name ## _def);
>> @@ -4885,6 +4896,14 @@ void syscall_init(void)
>>  #undef STRUCT
>>  #undef STRUCT_SPECIAL
>>
>> +    /* allocate an array to store fd ops */
>> +
>> +    if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
>> +        target_fd_max = rlim.rlim_cur;
>> +        target_fd_ops = g_malloc0(target_fd_max * sizeof(target_fd_ops_t 
>> *));
>> +    }
> 
> ...and what if the guest raises the rlimit after we've started?
> I think it would be better to just dynamically allocate/reallocate
> the table when the guest does a signalfd syscall, rather than
> trying to allocate it once at startup.
> 
>> +
>> +
>>      /* Build target_to_host_errno_table[] table from
>>       * host_to_target_errno_table[]. */
>>      for (i = 0; i < ERRNO_TABLE_SIZE; i++) {
>> @@ -5512,6 +5531,51 @@ static target_timer_t get_timer_id(abi_long arg)
>>      return timerid;
>>  }
>>
>> +/* signalfd siginfo conversion */
>> +
>> +static void
>> +host_to_target_signalfd_siginfo(struct signalfd_siginfo *tinfo,
>> +                                const struct signalfd_siginfo *info)
>> +{
>> +    int sig = host_to_target_signal(info->ssi_signo);
>> +    tinfo->ssi_signo = tswap32(sig);
>> +    tinfo->ssi_errno = 0; /* unused */
> 
> (a) Why not swap it anyway?
> (b) the kernel seems to write something to this field:
> http://lxr.free-electrons.com/source/fs/signalfd.c#L97
> 
>> +    tinfo->ssi_code = tswap32(info->ssi_code);
>> +    tinfo->ssi_pid =  tswap32(info->ssi_pid);
>> +    tinfo->ssi_uid =  tswap32(info->ssi_uid);
>> +    tinfo->ssi_fd =  tswap32(info->ssi_fd);
>> +    tinfo->ssi_tid =  tswap32(info->ssi_tid);
>> +    tinfo->ssi_band =  tswap32(info->ssi_band);
>> +    tinfo->ssi_overrun =  tswap32(info->ssi_overrun);
>> +    tinfo->ssi_trapno =  tswap32(info->ssi_trapno);
>> +    tinfo->ssi_status =  tswap32(info->ssi_status);
>> +    tinfo->ssi_int =  tswap32(info->ssi_int);
>> +    tinfo->ssi_ptr =  tswap64(info->ssi_ptr);
>> +    tinfo->ssi_utime =  tswap64(info->ssi_utime);
>> +    tinfo->ssi_stime =  tswap64(info->ssi_stime);
>> +    tinfo->ssi_addr =  tswap64(info->ssi_addr);
> 
> Missing
>    tinfo->ssi_addr_lsb = tswap16(info->ssi_addr_lsb);
> 
>> +}
>> +
>> +static abi_long target_signalfd_read(int fd, void *buf, size_t count)
>> +{
>> +    int ret;
>> +    int i;
>> +
>> +    ret = get_errno(read(fd, buf, count));
> 
> The return value from read might not fit into an int...
> 
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    for (i = 0; i < ret; i += sizeof(struct signalfd_siginfo)) {
>> +        host_to_target_signalfd_siginfo(buf + i, buf + i);
> 
> This is implicitly assuming that none of the fields in
> the target and host arrangements of signalfd_siginfo
> overlap, so that all we're doing is swapping each one
> in place. That's true, but it's a bit subtle...
> 
>> +    }
>> +    return ret;
>> +}
>> +
>> +static target_fd_ops_t target_signalfd_ops = {
>> +    .read = target_signalfd_read,
>> +};
>> +
>>  /* do_syscall() should always have a single exit point at the end so
>>     that actions, such as logging of syscall results, can be performed.
>>     All errnos that do_syscall() returns must be -TARGET_<errcode>. */
>> @@ -5571,7 +5635,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
>> arg1,
>>          else {
>>              if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0)))
>>                  goto efault;
>> -            ret = get_errno(read(arg1, p, arg3));
>> +            if (arg1 < target_fd_max &&
>> +                target_fd_ops[arg1] &&
>> +                target_fd_ops[arg1]->read) {
>> +                ret =  target_fd_ops[arg1]->read(arg1, p, arg3);
>> +            } else {
>> +                ret = get_errno(read(arg1, p, arg3));
>> +            }
> 
> This won't do the right thing in the case of:
>  emulated-under-QEMU program A does a signalfd() with CLOEXEC clear
>  program A exec()s program B, also emulated under QEMU
>  program B reads its inherited signalfd descriptor
> 
> That's a bit of an edge case, of course, and practically impossible
> to get right. It is mentioned specifically as working in the
> signalfd manpage, though; maybe worth a comment somewhere that we
> don't do the right thing.
> 
>>              unlock_user(p, arg2, ret);
>>          }
>>          break;
>> @@ -5598,6 +5668,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
>> arg1,
>>          unlock_user(p, arg2, 0);
>>          break;
>>      case TARGET_NR_close:
>> +        if (arg1 < target_fd_max &&
>> +            target_fd_ops[arg1]) {
>> +            target_fd_ops[arg1] = NULL;
>> +        }
> 
> This isn't the only place an fd can get closed; for instance
> dup2() can implicitly close an fd. I think you need a wider set
> of changes to either (a) NULL out the entries in all the places
> where we can close an fd or (b) NULL out the entries in all the
> places where we can open an fd.
> 
>>          ret = get_errno(close(arg1));
>>          break;
>>      case TARGET_NR_brk:
>> @@ -9451,6 +9525,26 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
>> arg1,
>>          break;
>>  #endif
>>  #endif
>> +#if defined(TARGET_NR_signalfd4)
>> +    case TARGET_NR_signalfd4:
>> +        {
>> +            target_sigset_t *target_mask;
>> +            sigset_t mask;
>> +            if (!lock_user_struct(VERIFY_READ, target_mask, arg2, 1)) {
>> +                goto efault;
>> +            }
>> +
>> +            target_to_host_sigset(&mask, target_mask);
>> +
>> +            ret = get_errno(signalfd(arg1, &mask, arg4));
> 
> The signalfd flags are SFD_NONBLOCK and SFD_CLOEXEC, which the
> kernel defines (and asserts) to be equal to O_NONBLOCK and
> O_CLOEXEC. Those are not the same for all architectures, so we
> need to convert the flags argument here.
> 
>> +            if (ret >= 0) {
>> +                target_fd_ops[ret] = &target_signalfd_ops;
> 
> We checked the fd against target_fds_max earlier, but we don't here?
> (Moot if you take my suggestion of auto-reallocating the fd_ops
> array here.)
> 
>> +            }
>> +
>> +            unlock_user_struct(target_mask, arg2, 0);
> 
> You might as well do the unlock as soon as you've done
> the target_to_host_sigset() conversion.
> 
>> +        }
>> +        break;
>> +#endif
>>  #if defined(CONFIG_EPOLL)
>>  #if defined(TARGET_NR_epoll_create)
>>      case TARGET_NR_epoll_create:
>> @@ -9743,6 +9837,27 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
>> arg1,
>>      }
>>  #endif
>>
>> +#if defined(TARGET_NR_signalfd)
> 
> Can we put this next to the signalfd4 code rather than separated by
> other cases?
> 
>> +    case TARGET_NR_signalfd:
>> +        {
>> +            target_sigset_t *target_mask;
>> +            sigset_t mask;
>> +            if (!lock_user_struct(VERIFY_READ, target_mask, arg2, 1)) {
>> +                goto efault;
>> +            }
>> +
>> +            target_to_host_sigset(&mask, target_mask);
>> +
>> +            ret = get_errno(signalfd(arg1, &mask, arg3));
> 
> The syscall signalfd() has the arguments (fd, mask, sizemask),
> whereas the libc function takes (fd, mask, flags), so passing arg3
> here looks wrong. Should be const 0 ?
> 
>> +            if (ret >= 0) {
>> +                target_fd_ops[ret] = &target_signalfd_ops;
>> +            }
>> +
>> +            unlock_user_struct(target_mask, arg2, 0);
> 
> This is very similar to the signalfd4 implementation,
> unsurprisingly. I would suggest factoring it out into a
> do_signalfd4() function to be called from both places.
> 
>> +        }
>> +        break;
>> +#endif
>> +
>>  #if defined(TARGET_NR_timerfd_create) && defined(CONFIG_TIMERFD)
>>      case TARGET_NR_timerfd_create:
>>          ret = get_errno(timerfd_create(arg1,
>> --
>> 2.4.1
> 
> thanks
> -- PMM
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]