qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async


From: John Snow
Subject: Re: [Qemu-devel] [PATCH 1/5] ide/atapi: make PIO read requests async
Date: Wed, 7 Oct 2015 12:42:25 -0400
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0


On 10/06/2015 04:46 AM, Peter Lieven wrote:
> Am 05.10.2015 um 23:15 schrieb John Snow:
>>
>> On 09/21/2015 08:25 AM, Peter Lieven wrote:
>>> PIO read requests on the ATAPI interface used to be sync blk requests.
>>> This has to siginificant drawbacks. First the main loop hangs util an
>>> I/O request is completed and secondly if the I/O request does not
>>> complete (e.g. due to an unresponsive storage) Qemu hangs completely.
>>>
>>> Signed-off-by: Peter Lieven <address@hidden>
>>> ---
>>>   hw/ide/atapi.c | 69
>>> ++++++++++++++++++++++++++++++++++++----------------------
>>>   1 file changed, 43 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>>> index 747f466..9257e1c 100644
>>> --- a/hw/ide/atapi.c
>>> +++ b/hw/ide/atapi.c
>>> @@ -105,31 +105,51 @@ static void cd_data_to_raw(uint8_t *buf, int lba)
>>>       memset(buf, 0, 288);
>>>   }
>>>   -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int
>>> sector_size)
>>> +static void cd_read_sector_cb(void *opaque, int ret)
>>>   {
>>> -    int ret;
>>> +    IDEState *s = opaque;
>>>   -    switch(sector_size) {
>>> -    case 2048:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        break;
>>> -    case 2352:
>>> -        block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> -                         4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> -        ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4);
>>> -        block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> -        if (ret < 0)
>>> -            return ret;
>>> -        cd_data_to_raw(buf, lba);
>>> -        break;
>>> -    default:
>>> -        ret = -EIO;
>>> -        break;
>>> +    block_acct_done(blk_get_stats(s->blk), &s->acct);
>>> +
>>> +    if (ret < 0) {
>>> +        ide_atapi_io_error(s, ret);
>>> +        return;
>>> +    }
>>> +
>>> +    if (s->cd_sector_size == 2352) {
>>> +        cd_data_to_raw(s->io_buffer, s->lba);
>>>       }
>>> -    return ret;
>>> +
>>> +    s->lba++;
>>> +    s->io_buffer_index = 0;
>>> +    s->status &= ~BUSY_STAT;
>>> +
>>> +    ide_atapi_cmd_reply_end(s);
>>> +}
>>> +
>>> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
>>> sector_size)
>>> +{
>>> +    if (sector_size != 2048 && sector_size != 2352) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    s->iov.iov_base = buf;
>>> +    if (sector_size == 2352) {
>>> +        buf += 4;
>>> +    }
>>> +
>>> +    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>>> +    qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>>> +
>>> +    if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>>> +                      cd_read_sector_cb, s) == NULL) {
>>> +        return -EIO;
>>> +    }
>>> +
>>> +    block_acct_start(blk_get_stats(s->blk), &s->acct,
>>> +                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
>>> +    s->status |= BUSY_STAT;
>>> +    return 0;
>>>   }
>>>   
>> We discussed this off-list a bit, but for upstream synchronization:
>>
>> Unfortunately, I believe making cd_read_sector here non-blocking makes
>> ide_atapi_cmd_reply_end non-blocking, and as a result makes calls to
>> s->end_transfer_func() nonblocking, which functions like ide_data_readw
>> are not prepared to cope with.
>>
>> My suggestion is to buffer an entire DRQ block of data at once
>> (byte_count_limit) to avoid the problem.
> 
> Hi John,
> 
> first of all thank you for the detailed analysis.
> 
> Is the following what you have i mind. For PIO reads > 1 sector
> it is a great improvement for the NFS backend:
> 
> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
> index ab45495..ec2ba89 100644
> --- a/hw/ide/atapi.c
> +++ b/hw/ide/atapi.c
> @@ -117,37 +117,40 @@ static void cd_read_sector_cb(void *opaque, int ret)
>      }
> 
>      if (s->cd_sector_size == 2352) {
> -        cd_data_to_raw(s->io_buffer, s->lba);
> +        int nb_sectors = s->packet_transfer_size / 2352;
> +        while (nb_sectors--) {
> +            memmove(s->io_buffer + nb_sectors * 2352 + 4,
> +                    s->io_buffer + nb_sectors * 2048, 2048);
> +            cd_data_to_raw(s->io_buffer + nb_sectors * 2352,
> +                           s->lba + nb_sectors);
> +        }
>      }

Is this going to be correct in cases where the number of sectors we are
copying is less than the total request size? We might need to bookmark
how many sectors/bytes we're copying this go-around. Perhaps by looking
at lcyl/hcyl.

> 
> -    s->lba++;
> +    s->lba = -1;
>      s->io_buffer_index = 0;
>      s->status &= ~BUSY_STAT;
> 
>      ide_atapi_cmd_reply_end(s);
>  }
> 

Well, I might not name it cd_read_sector and cd_read_sector_cb anymore.
Perhaps cd_read_sectors[_cb].

> -static int cd_read_sector(IDEState *s, int lba, void *buf, int
> sector_size)
> +static int cd_read_sector(IDEState *s, int lba, void *buf, int
> sector_size, int nb_sectors)
>  {
>      if (sector_size != 2048 && sector_size != 2352) {
>          return -EINVAL;
>      }
> 
>      s->iov.iov_base = buf;
> -    if (sector_size == 2352) {
> -        buf += 4;
> -    }
> -
> -    s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
> +    s->iov.iov_len = nb_sectors * 2048;
>      qemu_iovec_init_external(&s->qiov, &s->iov, 1);
> 
> -    if (ide_readv_cancelable(s, (int64_t)lba << 2, &s->qiov, 4,
> -                      cd_read_sector_cb, s) == NULL) {
> +    if (ide_readv_cancelable(s, (int64_t)lba << 2,
> +                             &s->qiov, nb_sectors * 4,
> +                             cd_read_sector_cb, s) == NULL) {
>          return -EIO;
>      }
> 
>      block_acct_start(blk_get_stats(s->blk), &s->acct,
> -                     4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> +                     nb_sectors * 2048, BLOCK_ACCT_READ);
>      s->status |= BUSY_STAT;
>      return 0;
>  }
> @@ -190,7 +193,7 @@ void ide_atapi_io_error(IDEState *s, int ret)
>  /* The whole ATAPI transfer logic is handled in this function */
>  void ide_atapi_cmd_reply_end(IDEState *s)
>  {
> -    int byte_count_limit, size, ret;
> +    int byte_count_limit, size;
>  #ifdef DEBUG_IDE_ATAPI
>      printf("reply: tx_size=%d elem_tx_size=%d index=%d\n",
>             s->packet_transfer_size,
> @@ -205,14 +208,6 @@ void ide_atapi_cmd_reply_end(IDEState *s)
>          printf("status=0x%x\n", s->status);
>  #endif
>      } else {
> -        /* see if a new sector must be read */
> -        if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
> -            ret = cd_read_sector(s, s->lba, s->io_buffer,
> s->cd_sector_size);
> -            if (ret < 0) {
> -                ide_atapi_io_error(s, ret);
> -            }
> -            return;
> -        }
>          if (s->elementary_transfer_size > 0) {
>              /* there are some data left to transmit in this elementary
>                 transfer */
> @@ -287,13 +282,18 @@ static void ide_atapi_cmd_reply(IDEState *s, int
> size, int max_size)
>  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
>                                     int sector_size)
>  {
> +    int ret;
>      s->lba = lba;
>      s->packet_transfer_size = nb_sectors * sector_size;
> +    assert(s->packet_transfer_size <=
> +           IDE_DMA_BUF_SECTORS * BDRV_SECTOR_SIZE + 4);
>      s->elementary_transfer_size = 0;
> -    s->io_buffer_index = sector_size;
>      s->cd_sector_size = sector_size;
> -
> -    ide_atapi_cmd_reply_end(s);
> +    ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size,
> +                         nb_sectors);
> +    if (ret < 0) {
> +        ide_atapi_io_error(s, ret);
> +    }
>  }
> 
>  static void ide_atapi_cmd_check_status(IDEState *s)
> 
> 
> Did you also have a look at the other patches?
> 
> Thanks,
> Peter

On my queue; hopefully Stefan can take a peek too, but I'll try to
review the IDE-specific bits. I imagine you want to wait to respin until
we've looked at all the patches, that's fine -- I'll try not to keep you
waiting for much longer.

--js



reply via email to

[Prev in Thread] Current Thread [Next in Thread]