[dpdk-dev] vhost: fix mmap failure as len not aligned with hugepage size

Message ID 1446162713-130100-1-git-send-email-jianfeng.tan@intel.com (mailing list archive)
State Changes Requested, archived
Headers

Commit Message

Jianfeng Tan Oct. 29, 2015, 11:51 p.m. UTC
  This patch fixes a bug under lower version linux kernel, mmap() fails when
length is not aligned with hugepage size.

Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
---
 lib/librte_vhost/vhost_user/virtio-net-user.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)
  

Comments

Changchun Ouyang Nov. 6, 2015, 1:23 a.m. UTC | #1
> From: jianfeng.tan@intel.com

> To: dev@dpdk.org

> Date: Fri, 30 Oct 2015 07:51:53 +0800

> Subject: [dpdk-dev] [PATCH] vhost: fix mmap failure as len not aligned with	hugepage size

> 

> This patch fixes a bug under lower version linux kernel, mmap() fails when

> length is not aligned with hugepage size.

> 

> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>

Acked-by: Changchun Ouyang <changchun.ouyang@hotmail.com>> --->  lib/librte_vhost/vhost_user/virtio-net-user.c | 12 +++++++++---

>  1 file changed, 9 insertions(+), 3 deletions(-)

> 

> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c

> index a998ad8..641561c 100644

> --- a/lib/librte_vhost/vhost_user/virtio-net-user.c

> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c

> @@ -147,6 +147,10 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)

>  		/* This is ugly */

>  		mapped_size = memory.regions[idx].memory_size +

>  			memory.regions[idx].mmap_offset;

> +

> +		alignment = get_blk_size(pmsg->fds[idx]);

> +		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);

> +

>  		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,

>  			mapped_size,

>  			PROT_READ | PROT_WRITE, MAP_SHARED,

> @@ -154,9 +158,11 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)

>  			0);

>  

>  		RTE_LOG(INFO, VHOST_CONFIG,

> -			"mapped region %d fd:%d to %p sz:0x%"PRIx64" off:0x%"PRIx64"\n",

> +			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "

> +			"off:0x%"PRIx64" align:0x%"PRIx64"\n",

>  			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,

> -			mapped_size, memory.regions[idx].mmap_offset);

> +			mapped_size, memory.regions[idx].mmap_offset,

> +			alignment);

>  

>  		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {

>  			RTE_LOG(ERR, VHOST_CONFIG,

> @@ -166,7 +172,7 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)

>  

>  		pregion_orig[idx].mapped_address = mapped_address;

>  		pregion_orig[idx].mapped_size = mapped_size;

> -		pregion_orig[idx].blksz = get_blk_size(pmsg->fds[idx]);

> +		pregion_orig[idx].blksz = alignment;

>  		pregion_orig[idx].fd = pmsg->fds[idx];

>  

>  		mapped_address +=  memory.regions[idx].mmap_offset;

> -- 

> 2.1.4

>
  
Huawei Xie Nov. 11, 2015, 3:57 a.m. UTC | #2
On 10/30/2015 2:52 PM, Jianfeng Tan wrote:
> This patch fixes a bug under lower version linux kernel, mmap() fails when
Since which version Linux hugetlbfs changes the requirement of size
alignment?
> length is not aligned with hugepage size.
>
> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
> ---
>  lib/librte_vhost/vhost_user/virtio-net-user.c | 12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> index a998ad8..641561c 100644
> --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -147,6 +147,10 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>  		/* This is ugly */
>  		mapped_size = memory.regions[idx].memory_size +
>  			memory.regions[idx].mmap_offset;
> +
> +		alignment = get_blk_size(pmsg->fds[idx]);
> +		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
Probably we could remove the alignment of mapped size in free_mem_region
as well.
   RTE_ALIGN_CEIL(
        region[idx].mapped_size, alignment)
If we are not sure, leave it as it is.
> +
>  		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
>  			mapped_size,
>  			PROT_READ | PROT_WRITE, MAP_SHARED,
> @@ -154,9 +158,11 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>  			0);
>  
>  		RTE_LOG(INFO, VHOST_CONFIG,
> -			"mapped region %d fd:%d to %p sz:0x%"PRIx64" off:0x%"PRIx64"\n",
> +			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
> +			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
>  			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
> -			mapped_size, memory.regions[idx].mmap_offset);
> +			mapped_size, memory.regions[idx].mmap_offset,
> +			alignment);
>  
>  		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>  			RTE_LOG(ERR, VHOST_CONFIG,
> @@ -166,7 +172,7 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>  
>  		pregion_orig[idx].mapped_address = mapped_address;
>  		pregion_orig[idx].mapped_size = mapped_size;
> -		pregion_orig[idx].blksz = get_blk_size(pmsg->fds[idx]);
> +		pregion_orig[idx].blksz = alignment;
>  		pregion_orig[idx].fd = pmsg->fds[idx];
>  
>  		mapped_address +=  memory.regions[idx].mmap_offset;
  
Jianfeng Tan Nov. 12, 2015, 2:35 a.m. UTC | #3
> -----Original Message-----
> From: Xie, Huawei
> Sent: Wednesday, November 11, 2015 11:57 AM
> To: Tan, Jianfeng; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] vhost: fix mmap failure as len not aligned with
> hugepage size
> 
> On 10/30/2015 2:52 PM, Jianfeng Tan wrote:
> > This patch fixes a bug under lower version linux kernel, mmap() fails
> > when
> Since which version Linux hugetlbfs changes the requirement of size alignment?
> > length is not aligned with hugepage size.

This link shows this bug was fixed in Linux kernel commit: dab2d3dc45ae7343216635d981d43637e1cb7d45
After my check, that patch was applied to long term version 3.4.110+
So distributions using 2.6.32 and 3.2.72 need this patch to make vhost work well.
https://bugzilla.kernel.org/show_bug.cgi?id=56881


> >
> > Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
> > ---
> >  lib/librte_vhost/vhost_user/virtio-net-user.c | 12 +++++++++---
> >  1 file changed, 9 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c
> > b/lib/librte_vhost/vhost_user/virtio-net-user.c
> > index a998ad8..641561c 100644
> > --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> > @@ -147,6 +147,10 @@ user_set_mem_table(struct vhost_device_ctx ctx,
> struct VhostUserMsg *pmsg)
> >  		/* This is ugly */
> >  		mapped_size = memory.regions[idx].memory_size +
> >  			memory.regions[idx].mmap_offset;
> > +
> > +		alignment = get_blk_size(pmsg->fds[idx]);
> > +		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
> Probably we could remove the alignment of mapped size in free_mem_region as
> well.

Yes, after aligning mapped_address when mmap(), this address does not need to be aligned again
when munmap(). But this will effect nothing, or incur any performance issue. I'm prone to take no
change to it.

>    RTE_ALIGN_CEIL(
>         region[idx].mapped_size, alignment) If we are not sure, leave it as it is.
> > +
> >  		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> >  			mapped_size,
> >  			PROT_READ | PROT_WRITE, MAP_SHARED, @@ -154,9
> +158,11 @@
> > user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> >  			0);
> >
> >  		RTE_LOG(INFO, VHOST_CONFIG,
> > -			"mapped region %d fd:%d to %p sz:0x%"PRIx64"
> off:0x%"PRIx64"\n",
> > +			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
> > +			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
> >  			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
> > -			mapped_size, memory.regions[idx].mmap_offset);
> > +			mapped_size, memory.regions[idx].mmap_offset,
> > +			alignment);
> >
> >  		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> >  			RTE_LOG(ERR, VHOST_CONFIG,
> > @@ -166,7 +172,7 @@ user_set_mem_table(struct vhost_device_ctx ctx,
> > struct VhostUserMsg *pmsg)
> >
> >  		pregion_orig[idx].mapped_address = mapped_address;
> >  		pregion_orig[idx].mapped_size = mapped_size;
> > -		pregion_orig[idx].blksz = get_blk_size(pmsg->fds[idx]);
> > +		pregion_orig[idx].blksz = alignment;
> >  		pregion_orig[idx].fd = pmsg->fds[idx];
> >
> >  		mapped_address +=  memory.regions[idx].mmap_offset;
  
Huawei Xie Nov. 12, 2015, 2:46 a.m. UTC | #4
On 11/12/2015 10:35 AM, Tan, Jianfeng wrote:
>
>> -----Original Message-----
>> From: Xie, Huawei
>> Sent: Wednesday, November 11, 2015 11:57 AM
>> To: Tan, Jianfeng; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH] vhost: fix mmap failure as len not aligned with
>> hugepage size
>>
>> On 10/30/2015 2:52 PM, Jianfeng Tan wrote:
>>> This patch fixes a bug under lower version linux kernel, mmap() fails
>>> when
>> Since which version Linux hugetlbfs changes the requirement of size alignment?
>>> length is not aligned with hugepage size.
> This link shows this bug was fixed in Linux kernel commit: dab2d3dc45ae7343216635d981d43637e1cb7d45
> After my check, that patch was applied to long term version 3.4.110+
> So distributions using 2.6.32 and 3.2.72 need this patch to make vhost work well.
> https://bugzilla.kernel.org/show_bug.cgi?id=56881
OK, please add this in commit message, remove unnecessary RTE_ALIGN in
free_memory_region, and add comment to the code because our fix is a
workaround to kernel hugetlbfs implementation issue.
>
>>> Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
>>> ---
>>>  lib/librte_vhost/vhost_user/virtio-net-user.c | 12 +++++++++---
>>>  1 file changed, 9 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c
>>> b/lib/librte_vhost/vhost_user/virtio-net-user.c
>>> index a998ad8..641561c 100644
>>> --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
>>> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
>>> @@ -147,6 +147,10 @@ user_set_mem_table(struct vhost_device_ctx ctx,
>> struct VhostUserMsg *pmsg)
>>>  		/* This is ugly */
>>>  		mapped_size = memory.regions[idx].memory_size +
>>>  			memory.regions[idx].mmap_offset;
>>> +
>>> +		alignment = get_blk_size(pmsg->fds[idx]);
>>> +		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
>> Probably we could remove the alignment of mapped size in free_mem_region as
>> well.
> Yes, after aligning mapped_address when mmap(), this address does not need to be aligned again
> when munmap(). But this will effect nothing, or incur any performance issue. I'm prone to take no
> change to it.
>
>>    RTE_ALIGN_CEIL(
>>         region[idx].mapped_size, alignment) If we are not sure, leave it as it is.
>>> +
>>>  		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
>>>  			mapped_size,
>>>  			PROT_READ | PROT_WRITE, MAP_SHARED, @@ -154,9
>> +158,11 @@
>>> user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>>>  			0);
>>>
>>>  		RTE_LOG(INFO, VHOST_CONFIG,
>>> -			"mapped region %d fd:%d to %p sz:0x%"PRIx64"
>> off:0x%"PRIx64"\n",
>>> +			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
>>> +			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
>>>  			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
>>> -			mapped_size, memory.regions[idx].mmap_offset);
>>> +			mapped_size, memory.regions[idx].mmap_offset,
>>> +			alignment);
>>>
>>>  		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>>>  			RTE_LOG(ERR, VHOST_CONFIG,
>>> @@ -166,7 +172,7 @@ user_set_mem_table(struct vhost_device_ctx ctx,
>>> struct VhostUserMsg *pmsg)
>>>
>>>  		pregion_orig[idx].mapped_address = mapped_address;
>>>  		pregion_orig[idx].mapped_size = mapped_size;
>>> -		pregion_orig[idx].blksz = get_blk_size(pmsg->fds[idx]);
>>> +		pregion_orig[idx].blksz = alignment;
>>>  		pregion_orig[idx].fd = pmsg->fds[idx];
>>>
>>>  		mapped_address +=  memory.regions[idx].mmap_offset;
>
  

Patch

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index a998ad8..641561c 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -147,6 +147,10 @@  user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 		/* This is ugly */
 		mapped_size = memory.regions[idx].memory_size +
 			memory.regions[idx].mmap_offset;
+
+		alignment = get_blk_size(pmsg->fds[idx]);
+		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
+
 		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
 			mapped_size,
 			PROT_READ | PROT_WRITE, MAP_SHARED,
@@ -154,9 +158,11 @@  user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 			0);
 
 		RTE_LOG(INFO, VHOST_CONFIG,
-			"mapped region %d fd:%d to %p sz:0x%"PRIx64" off:0x%"PRIx64"\n",
+			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
+			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
 			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
-			mapped_size, memory.regions[idx].mmap_offset);
+			mapped_size, memory.regions[idx].mmap_offset,
+			alignment);
 
 		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
 			RTE_LOG(ERR, VHOST_CONFIG,
@@ -166,7 +172,7 @@  user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 
 		pregion_orig[idx].mapped_address = mapped_address;
 		pregion_orig[idx].mapped_size = mapped_size;
-		pregion_orig[idx].blksz = get_blk_size(pmsg->fds[idx]);
+		pregion_orig[idx].blksz = alignment;
 		pregion_orig[idx].fd = pmsg->fds[idx];
 
 		mapped_address +=  memory.regions[idx].mmap_offset;