[dpdk-dev,v2] vhost: Only access header if offloading is supported in dequeue path

Message ID 1476171927-10134-1-git-send-email-maxime.coquelin@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Yuanhan Liu
Headers

Commit Message

Maxime Coquelin Oct. 11, 2016, 7:45 a.m. UTC
  If offloading features are not negotiated, parsing the virtio header
is not needed.

Micro-benchmark with testpmd shows that the gain is +4% with indirect
descriptors, +1% when using direct descriptors.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
Changes since v1:
  

Comments

Yuanhan Liu Oct. 11, 2016, 9:01 a.m. UTC | #1
On Tue, Oct 11, 2016 at 09:45:27AM +0200, Maxime Coquelin wrote:
> @@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>  		  struct rte_mempool *mbuf_pool)
>  {
>  	struct vring_desc *desc;
> -	uint64_t desc_addr;
> +	uint64_t desc_addr = 0;
>  	uint32_t desc_avail, desc_offset;
>  	uint32_t mbuf_avail, mbuf_offset;
>  	uint32_t cpy_len;
>  	struct rte_mbuf *cur = m, *prev = m;
> -	struct virtio_net_hdr *hdr;
> +	struct virtio_net_hdr *hdr = NULL;
>  	/* A counter to avoid desc dead loop chain */
>  	uint32_t nr_desc = 1;
>  
> @@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>  			(desc->flags & VRING_DESC_F_INDIRECT))
>  		return -1;
>  
> -	desc_addr = gpa_to_vva(dev, desc->addr);
> -	if (unlikely(!desc_addr))
> -		return -1;
> +	if (virtio_net_with_host_offload(dev)) {
> +		desc_addr = gpa_to_vva(dev, desc->addr);
> +		if (unlikely(!desc_addr))
> +			return -1;
>  
> -	hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> -	rte_prefetch0(hdr);
> +		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> +		rte_prefetch0(hdr);
> +	}
>  
>  	/*
>  	 * A virtio driver normally uses at least 2 desc buffers
> @@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>  		if (unlikely(!desc_addr))
>  			return -1;
>  
> -		rte_prefetch0((void *)(uintptr_t)desc_addr);
> -
>  		desc_offset = 0;
>  		desc_avail  = desc->len;
>  		nr_desc    += 1;
> -
> -		PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
>  	} else {
> +		if (!desc_addr) {
> +			desc_addr = gpa_to_vva(dev, desc->addr);
> +			if (unlikely(!desc_addr))
> +				return -1;
> +		}
> +

I think this piece of code make things a bit complex. I think what you
want to achieve is, besides saving hdr prefetch, to save one call to
gpa_to_vva() for the non-ANY_LAYOUT case. Does that matter too much?

How about just saving the hdr prefetch?

	if (virtio_net_with_host_offload(dev)) {
		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
		rte_prefetch0(hdr);
	}

	--yliu

>  		desc_avail  = desc->len - dev->vhost_hlen;
>  		desc_offset = dev->vhost_hlen;
>  	}
>  
> +	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
> +
> +	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
> +
>  	mbuf_offset = 0;
>  	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
>  	while (1) {
> @@ -795,7 +818,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>  	prev->data_len = mbuf_offset;
>  	m->pkt_len    += mbuf_offset;
>  
> -	if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
> +	if (virtio_net_with_host_offload(dev))
>  		vhost_dequeue_offload(hdr, m);
>  
>  	return 0;
> -- 
> 2.7.4
  
Maxime Coquelin Oct. 14, 2016, 7:24 a.m. UTC | #2
On 10/11/2016 11:01 AM, Yuanhan Liu wrote:
> On Tue, Oct 11, 2016 at 09:45:27AM +0200, Maxime Coquelin wrote:
>> @@ -684,12 +699,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>>  		  struct rte_mempool *mbuf_pool)
>>  {
>>  	struct vring_desc *desc;
>> -	uint64_t desc_addr;
>> +	uint64_t desc_addr = 0;
>>  	uint32_t desc_avail, desc_offset;
>>  	uint32_t mbuf_avail, mbuf_offset;
>>  	uint32_t cpy_len;
>>  	struct rte_mbuf *cur = m, *prev = m;
>> -	struct virtio_net_hdr *hdr;
>> +	struct virtio_net_hdr *hdr = NULL;
>>  	/* A counter to avoid desc dead loop chain */
>>  	uint32_t nr_desc = 1;
>>
>> @@ -698,12 +713,14 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>>  			(desc->flags & VRING_DESC_F_INDIRECT))
>>  		return -1;
>>
>> -	desc_addr = gpa_to_vva(dev, desc->addr);
>> -	if (unlikely(!desc_addr))
>> -		return -1;
>> +	if (virtio_net_with_host_offload(dev)) {
>> +		desc_addr = gpa_to_vva(dev, desc->addr);
>> +		if (unlikely(!desc_addr))
>> +			return -1;
>>
>> -	hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>> -	rte_prefetch0(hdr);
>> +		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
>> +		rte_prefetch0(hdr);
>> +	}
>>
>>  	/*
>>  	 * A virtio driver normally uses at least 2 desc buffers
>> @@ -720,18 +737,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
>>  		if (unlikely(!desc_addr))
>>  			return -1;
>>
>> -		rte_prefetch0((void *)(uintptr_t)desc_addr);
>> -
>>  		desc_offset = 0;
>>  		desc_avail  = desc->len;
>>  		nr_desc    += 1;
>> -
>> -		PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
>>  	} else {
>> +		if (!desc_addr) {
>> +			desc_addr = gpa_to_vva(dev, desc->addr);
>> +			if (unlikely(!desc_addr))
>> +				return -1;
>> +		}
>> +
>
> I think this piece of code make things a bit complex. I think what you
> want to achieve is, besides saving hdr prefetch, to save one call to
> gpa_to_vva() for the non-ANY_LAYOUT case. Does that matter too much?
>
> How about just saving the hdr prefetch?
>
> 	if (virtio_net_with_host_offload(dev)) {
> 		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> 		rte_prefetch0(hdr);
> 	}
Oops, you reply slipped through the cracks...

You're right, it doesn't matter too much, the thing to avoid id
definitely the hdr prefetch and access.

I'm sending a v3 now.

Thanks,
Maxime
  

Patch

=================
 - Rebased
 - Fix early out check in vhost_dequeue_offload

 lib/librte_vhost/virtio_net.c | 47 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index a59c39b..5e2fd75 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -548,6 +548,18 @@  rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 		return virtio_dev_rx(dev, queue_id, pkts, count);
 }
 
+static inline bool
+virtio_net_with_host_offload(struct virtio_net *dev)
+{
+	if (dev->features &
+			(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
+			 VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
+			 VIRTIO_NET_F_HOST_UFO))
+		return true;
+
+	return false;
+}
+
 static void
 parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
 {
@@ -600,6 +612,9 @@  vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	void *l4_hdr = NULL;
 	struct tcp_hdr *tcp_hdr = NULL;
 
+	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+		return;
+
 	parse_ethernet(m, &l4_proto, &l4_hdr);
 	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
@@ -684,12 +699,12 @@  copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
 		  struct rte_mempool *mbuf_pool)
 {
 	struct vring_desc *desc;
-	uint64_t desc_addr;
+	uint64_t desc_addr = 0;
 	uint32_t desc_avail, desc_offset;
 	uint32_t mbuf_avail, mbuf_offset;
 	uint32_t cpy_len;
 	struct rte_mbuf *cur = m, *prev = m;
-	struct virtio_net_hdr *hdr;
+	struct virtio_net_hdr *hdr = NULL;
 	/* A counter to avoid desc dead loop chain */
 	uint32_t nr_desc = 1;
 
@@ -698,12 +713,14 @@  copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
 			(desc->flags & VRING_DESC_F_INDIRECT))
 		return -1;
 
-	desc_addr = gpa_to_vva(dev, desc->addr);
-	if (unlikely(!desc_addr))
-		return -1;
+	if (virtio_net_with_host_offload(dev)) {
+		desc_addr = gpa_to_vva(dev, desc->addr);
+		if (unlikely(!desc_addr))
+			return -1;
 
-	hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
-	rte_prefetch0(hdr);
+		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+		rte_prefetch0(hdr);
+	}
 
 	/*
 	 * A virtio driver normally uses at least 2 desc buffers
@@ -720,18 +737,24 @@  copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
 		if (unlikely(!desc_addr))
 			return -1;
 
-		rte_prefetch0((void *)(uintptr_t)desc_addr);
-
 		desc_offset = 0;
 		desc_avail  = desc->len;
 		nr_desc    += 1;
-
-		PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
 	} else {
+		if (!desc_addr) {
+			desc_addr = gpa_to_vva(dev, desc->addr);
+			if (unlikely(!desc_addr))
+				return -1;
+		}
+
 		desc_avail  = desc->len - dev->vhost_hlen;
 		desc_offset = dev->vhost_hlen;
 	}
 
+	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
 	while (1) {
@@ -795,7 +818,7 @@  copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
 	prev->data_len = mbuf_offset;
 	m->pkt_len    += mbuf_offset;
 
-	if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
+	if (virtio_net_with_host_offload(dev))
 		vhost_dequeue_offload(hdr, m);
 
 	return 0;