[dpdk-dev,v2] i40: fix the VXLAN TSO issue

Message ID 1467865627-23524-1-git-send-email-zhe.tao@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Bruce Richardson
Headers

Commit Message

Zhe Tao July 7, 2016, 4:27 a.m. UTC
  Problem:
When using the TSO + VXLAN feature in i40e, the outer UDP length fields in
the multiple UDP segments which are TSOed by the i40e will have the
wrong value.

Fix this problem by adding the tunnel type field in the i40e descriptor
which was missed before.

Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")

Signed-off-by: Zhe Tao <zhe.tao@intel.com>
---
V2: Edited some comments for mbuf structure and i40e driver.

 app/test-pmd/csumonly.c      | 26 +++++++++++++++++++-------
 drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
 lib/librte_mbuf/rte_mbuf.h   | 16 +++++++++++++++-
 3 files changed, 43 insertions(+), 11 deletions(-)
  

Comments

Ananyev, Konstantin July 7, 2016, 10:01 a.m. UTC | #1
Hi Tao,

> 
> Problem:
> When using the TSO + VXLAN feature in i40e, the outer UDP length fields in
> the multiple UDP segments which are TSOed by the i40e will have the
> wrong value.
> 
> Fix this problem by adding the tunnel type field in the i40e descriptor
> which was missed before.
> 
> Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")
> 
> Signed-off-by: Zhe Tao <zhe.tao@intel.com>
> ---
> V2: Edited some comments for mbuf structure and i40e driver.
> 
>  app/test-pmd/csumonly.c      | 26 +++++++++++++++++++-------
>  drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
>  lib/librte_mbuf/rte_mbuf.h   | 16 +++++++++++++++-
>  3 files changed, 43 insertions(+), 11 deletions(-)
> 
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> index ac4bd8f..d423c20 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -204,7 +204,8 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info)
>  static void
>  parse_vxlan(struct udp_hdr *udp_hdr,
>  	    struct testpmd_offload_info *info,
> -	    uint32_t pkt_type)
> +	    uint32_t pkt_type,
> +	    uint64_t *ol_flags)
>  {
>  	struct ether_hdr *eth_hdr;
> 
> @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
>  		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
>  		return;
> 
> +	*ol_flags |= PKT_TX_TUNNEL_VXLAN;
>  	info->is_tunnel = 1;
>  	info->outer_ethertype = info->ethertype;
>  	info->outer_l2_len = info->l2_len;
> @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> 
>  /* Parse a gre header */
>  static void
> -parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> +parse_gre(struct simple_gre_hdr *gre_hdr,
> +	  struct testpmd_offload_info *info,
> +	  uint64_t *ol_flags)
>  {
>  	struct ether_hdr *eth_hdr;
>  	struct ipv4_hdr *ipv4_hdr;
> @@ -242,6 +246,8 @@ parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
>  	if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
>  		return;
> 
> +	*ol_flags |= PKT_TX_TUNNEL_GRE;
> +
>  	gre_len += sizeof(struct simple_gre_hdr);
> 
>  	if (gre_hdr->flags & _htons(GRE_KEY_PRESENT))
> @@ -417,7 +423,7 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
>   * packet */
>  static uint64_t
>  process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> -	uint16_t testpmd_ol_flags)
> +	uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
>  {
>  	struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
>  	struct ipv6_hdr *ipv6_hdr = outer_l3_hdr;
> @@ -442,6 +448,9 @@ process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
>  	 * hardware supporting it today, and no API for it. */
> 
>  	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
> +	if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
> +	    ((orig_ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN))
> +		udp_hdr->dgram_cksum = 0;
>  	/* do not recalculate udp cksum if it was 0 */
>  	if (udp_hdr->dgram_cksum != 0) {
>  		udp_hdr->dgram_cksum = 0;
> @@ -705,15 +714,18 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>  			if (info.l4_proto == IPPROTO_UDP) {
>  				struct udp_hdr *udp_hdr;
>  				udp_hdr = (struct udp_hdr *)((char *)l3_hdr +
> -					info.l3_len);
> -				parse_vxlan(udp_hdr, &info, m->packet_type);
> +					   info.l3_len);
> +				parse_vxlan(udp_hdr, &info, m->packet_type,
> +					    &ol_flags);
>  			} else if (info.l4_proto == IPPROTO_GRE) {
>  				struct simple_gre_hdr *gre_hdr;
>  				gre_hdr = (struct simple_gre_hdr *)
>  					((char *)l3_hdr + info.l3_len);
> -				parse_gre(gre_hdr, &info);
> +				parse_gre(gre_hdr, &info, &ol_flags);
>  			} else if (info.l4_proto == IPPROTO_IPIP) {
>  				void *encap_ip_hdr;
> +
> +				ol_flags |= PKT_TX_TUNNEL_IPIP;
>  				encap_ip_hdr = (char *)l3_hdr + info.l3_len;
>  				parse_encap_ip(encap_ip_hdr, &info);
>  			}
> @@ -745,7 +757,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>  		 * processed in hardware. */
>  		if (info.is_tunnel == 1) {
>  			ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
> -				testpmd_ol_flags);
> +				testpmd_ol_flags, ol_flags);
>  		}
> 
>  		/* step 4: fill the mbuf meta data (flags and header lengths) */
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index 049a813..4c987f2 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
>  			union i40e_tx_offload tx_offload,
>  			uint32_t *cd_tunneling)
>  {
> +	/* Tx pkts tunnel type*/
> +	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
> +		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
> +	else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
> +		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
> +
>  	/* UDP tunneling packet TX checksum offload */
>  	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> 
> @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
> 
>  /* set i40e TSO context descriptor */
>  static inline uint64_t
> -i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> +i40e_set_tso_ctx(struct rte_mbuf *mbuf,
> +		 union i40e_tx_offload tx_offload)
>  {
>  	uint64_t ctx_desc = 0;
>  	uint32_t cd_cmd, hdr_len, cd_tso_len;
> @@ -1521,7 +1528,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
>  	}
> 
>  	/**
> -	 * in case of tunneling packet, the outer_l2_len and
> +	 * in case of non tunneling packet, the outer_l2_len and
>  	 * outer_l3_len must be 0.
>  	 */
>  	hdr_len = tx_offload.outer_l2_len +
> @@ -1537,7 +1544,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
>  		 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
>  		((uint64_t)mbuf->tso_segsz <<
>  		 I40E_TXD_CTX_QW1_MSS_SHIFT);
> -
>  	return ctx_desc;
>  }
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 15e3a10..8eb0d33 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -133,6 +133,17 @@ extern "C" {
>  /* add new TX flags here */
> 
>  /**
> + * Bits 45:48 used for the tunnel type.
> + * When doing Tx offload like TSO or checksum, the HW needs to configure the
> + * tunnel type into the HW descriptors.
> + */
> +#define PKT_TX_TUNNEL_VXLAN   (1ULL << 45)
> +#define PKT_TX_TUNNEL_GRE   (2ULL << 45)
> +#define PKT_TX_TUNNEL_IPIP    (3ULL << 45)
> +/* add new TX TUNNEL type here */
> +#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
> +
> +/**
>   * Second VLAN insertion (QinQ) flag.
>   */
>  #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
> @@ -867,7 +878,10 @@ struct rte_mbuf {
>  	union {
>  		uint64_t tx_offload;       /**< combined for easy fetch */
>  		struct {
> -			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> +			/* L2 (MAC) Header Length if it is not a tunneling pkt.
> +			 * for tunnel it is outer L4 len+tunnel len+inner L2 len
> +			 */
> +			uint64_t l2_len:7;
>  			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
>  			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
>  			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> --
> 2.1.4
  
Ananyev, Konstantin July 7, 2016, 10:50 a.m. UTC | #2
Hi Tao,

Sorry hit send button too early by accident :)
 
> >
> > Problem:
> > When using the TSO + VXLAN feature in i40e, the outer UDP length fields in
> > the multiple UDP segments which are TSOed by the i40e will have the
> > wrong value.
> >
> > Fix this problem by adding the tunnel type field in the i40e descriptor
> > which was missed before.
> >
> > Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")
> >
> > Signed-off-by: Zhe Tao <zhe.tao@intel.com>
> > ---
> > V2: Edited some comments for mbuf structure and i40e driver.
> >
> >  app/test-pmd/csumonly.c      | 26 +++++++++++++++++++-------
> >  drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
> >  lib/librte_mbuf/rte_mbuf.h   | 16 +++++++++++++++-
> >  3 files changed, 43 insertions(+), 11 deletions(-)
> >
> > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> > index ac4bd8f..d423c20 100644
> > --- a/app/test-pmd/csumonly.c
> > +++ b/app/test-pmd/csumonly.c
> > @@ -204,7 +204,8 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info)
> >  static void
> >  parse_vxlan(struct udp_hdr *udp_hdr,
> >  	    struct testpmd_offload_info *info,
> > -	    uint32_t pkt_type)
> > +	    uint32_t pkt_type,
> > +	    uint64_t *ol_flags)
> >  {
> >  	struct ether_hdr *eth_hdr;
> >
> > @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> >  		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
> >  		return;
> >
> > +	*ol_flags |= PKT_TX_TUNNEL_VXLAN;
> >  	info->is_tunnel = 1;
> >  	info->outer_ethertype = info->ethertype;
> >  	info->outer_l2_len = info->l2_len;
> > @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> >
> >  /* Parse a gre header */
> >  static void
> > -parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> > +parse_gre(struct simple_gre_hdr *gre_hdr,
> > +	  struct testpmd_offload_info *info,
> > +	  uint64_t *ol_flags)
> >  {
> >  	struct ether_hdr *eth_hdr;
> >  	struct ipv4_hdr *ipv4_hdr;
> > @@ -242,6 +246,8 @@ parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> >  	if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
> >  		return;
> >
> > +	*ol_flags |= PKT_TX_TUNNEL_GRE;
> > +
> >  	gre_len += sizeof(struct simple_gre_hdr);
> >
> >  	if (gre_hdr->flags & _htons(GRE_KEY_PRESENT))
> > @@ -417,7 +423,7 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
> >   * packet */
> >  static uint64_t
> >  process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> > -	uint16_t testpmd_ol_flags)
> > +	uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
> >  {
> >  	struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
> >  	struct ipv6_hdr *ipv6_hdr = outer_l3_hdr;
> > @@ -442,6 +448,9 @@ process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> >  	 * hardware supporting it today, and no API for it. */
> >
> >  	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
> > +	if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
> > +	    ((orig_ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN))
> > +		udp_hdr->dgram_cksum = 0;
> >  	/* do not recalculate udp cksum if it was 0 */
> >  	if (udp_hdr->dgram_cksum != 0) {
> >  		udp_hdr->dgram_cksum = 0;
> > @@ -705,15 +714,18 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> >  			if (info.l4_proto == IPPROTO_UDP) {
> >  				struct udp_hdr *udp_hdr;
> >  				udp_hdr = (struct udp_hdr *)((char *)l3_hdr +
> > -					info.l3_len);
> > -				parse_vxlan(udp_hdr, &info, m->packet_type);
> > +					   info.l3_len);
> > +				parse_vxlan(udp_hdr, &info, m->packet_type,
> > +					    &ol_flags);
> >  			} else if (info.l4_proto == IPPROTO_GRE) {
> >  				struct simple_gre_hdr *gre_hdr;
> >  				gre_hdr = (struct simple_gre_hdr *)
> >  					((char *)l3_hdr + info.l3_len);
> > -				parse_gre(gre_hdr, &info);
> > +				parse_gre(gre_hdr, &info, &ol_flags);
> >  			} else if (info.l4_proto == IPPROTO_IPIP) {
> >  				void *encap_ip_hdr;
> > +
> > +				ol_flags |= PKT_TX_TUNNEL_IPIP;
> >  				encap_ip_hdr = (char *)l3_hdr + info.l3_len;
> >  				parse_encap_ip(encap_ip_hdr, &info);
> >  			}
> > @@ -745,7 +757,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> >  		 * processed in hardware. */
> >  		if (info.is_tunnel == 1) {
> >  			ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
> > -				testpmd_ol_flags);
> > +				testpmd_ol_flags, ol_flags);
> >  		}
> >
> >  		/* step 4: fill the mbuf meta data (flags and header lengths) */
> > diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> > index 049a813..4c987f2 100644
> > --- a/drivers/net/i40e/i40e_rxtx.c
> > +++ b/drivers/net/i40e/i40e_rxtx.c
> > @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
> >  			union i40e_tx_offload tx_offload,
> >  			uint32_t *cd_tunneling)
> >  {
> > +	/* Tx pkts tunnel type*/
> > +	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
> > +		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
> > +	else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
> > +		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
> > +

As I understand that fix is needed to enable TSO for tunnelling packets, correct?
For that case, should we setup EIPLEN also, no matter is PKT_TX_OUTER_IP_CKSUM
is on/off?

> >  	/* UDP tunneling packet TX checksum offload */
> >  	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> >
> > @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
> >
> >  /* set i40e TSO context descriptor */
> >  static inline uint64_t
> > -i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> > +i40e_set_tso_ctx(struct rte_mbuf *mbuf,
> > +		 union i40e_tx_offload tx_offload)
> >  {
> >  	uint64_t ctx_desc = 0;
> >  	uint32_t cd_cmd, hdr_len, cd_tso_len;
> > @@ -1521,7 +1528,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> >  	}
> >
> >  	/**
> > -	 * in case of tunneling packet, the outer_l2_len and
> > +	 * in case of non tunneling packet, the outer_l2_len and
> >  	 * outer_l3_len must be 0.
> >  	 */
> >  	hdr_len = tx_offload.outer_l2_len +
> > @@ -1537,7 +1544,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> >  		 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
> >  		((uint64_t)mbuf->tso_segsz <<
> >  		 I40E_TXD_CTX_QW1_MSS_SHIFT);
> > -
> >  	return ctx_desc;
> >  }
> >
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index 15e3a10..8eb0d33 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -133,6 +133,17 @@ extern "C" {
> >  /* add new TX flags here */
> >
> >  /**
> > + * Bits 45:48 used for the tunnel type.
> > + * When doing Tx offload like TSO or checksum, the HW needs to configure the
> > + * tunnel type into the HW descriptors.
> > + */
> > +#define PKT_TX_TUNNEL_VXLAN   (1ULL << 45)
> > +#define PKT_TX_TUNNEL_GRE   (2ULL << 45)
> > +#define PKT_TX_TUNNEL_IPIP    (3ULL << 45)
> > +/* add new TX TUNNEL type here */
> > +#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
> > +
> > +/**
> >   * Second VLAN insertion (QinQ) flag.
> >   */
> >  #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
> > @@ -867,7 +878,10 @@ struct rte_mbuf {
> >  	union {
> >  		uint64_t tx_offload;       /**< combined for easy fetch */
> >  		struct {
> > -			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> > +			/* L2 (MAC) Header Length if it is not a tunneling pkt.
> > +			 * for tunnel it is outer L4 len+tunnel len+inner L2 len
> > +			 */

As a nit: that doesn't look like doxygen style comment to me.
Konstantin

> > +			uint64_t l2_len:7;
> >  			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> >  			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> >  			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> > --
> > 2.1.4
  
Ananyev, Konstantin July 7, 2016, 12:24 p.m. UTC | #3
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> Sent: Thursday, July 07, 2016 11:51 AM
> To: Tao, Zhe; dev@dpdk.org
> Cc: Tao, Zhe; Wu, Jingjing
> Subject: Re: [dpdk-dev] [PATCH v2] i40: fix the VXLAN TSO issue
> 
> 
> Hi Tao,
> 
> Sorry hit send button too early by accident :)
> 
> > >
> > > Problem:
> > > When using the TSO + VXLAN feature in i40e, the outer UDP length fields in
> > > the multiple UDP segments which are TSOed by the i40e will have the
> > > wrong value.
> > >
> > > Fix this problem by adding the tunnel type field in the i40e descriptor
> > > which was missed before.
> > >
> > > Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")
> > >
> > > Signed-off-by: Zhe Tao <zhe.tao@intel.com>
> > > ---
> > > V2: Edited some comments for mbuf structure and i40e driver.
> > >
> > >  app/test-pmd/csumonly.c      | 26 +++++++++++++++++++-------
> > >  drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
> > >  lib/librte_mbuf/rte_mbuf.h   | 16 +++++++++++++++-
> > >  3 files changed, 43 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> > > index ac4bd8f..d423c20 100644
> > > --- a/app/test-pmd/csumonly.c
> > > +++ b/app/test-pmd/csumonly.c
> > > @@ -204,7 +204,8 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info)
> > >  static void
> > >  parse_vxlan(struct udp_hdr *udp_hdr,
> > >  	    struct testpmd_offload_info *info,
> > > -	    uint32_t pkt_type)
> > > +	    uint32_t pkt_type,
> > > +	    uint64_t *ol_flags)
> > >  {
> > >  	struct ether_hdr *eth_hdr;
> > >
> > > @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> > >  		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
> > >  		return;
> > >
> > > +	*ol_flags |= PKT_TX_TUNNEL_VXLAN;

Do we always have to setup tunnelling flags here?
Obviously it would mean an extra CTD per packet and might slowdown things.
In fact, I think current patch wouldn't work correctly if
TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM is not set.
So, can we do it only when TSO is enabled or outer IP checksum is enabled?

> > >  	info->is_tunnel = 1;
> > >  	info->outer_ethertype = info->ethertype;
> > >  	info->outer_l2_len = info->l2_len;
> > > @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> > >
> > >  /* Parse a gre header */
> > >  static void
> > > -parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> > > +parse_gre(struct simple_gre_hdr *gre_hdr,
> > > +	  struct testpmd_offload_info *info,
> > > +	  uint64_t *ol_flags)
> > >  {
> > >  	struct ether_hdr *eth_hdr;
> > >  	struct ipv4_hdr *ipv4_hdr;
> > > @@ -242,6 +246,8 @@ parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> > >  	if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
> > >  		return;
> > >
> > > +	*ol_flags |= PKT_TX_TUNNEL_GRE;
> > > +
> > >  	gre_len += sizeof(struct simple_gre_hdr);
> > >
> > >  	if (gre_hdr->flags & _htons(GRE_KEY_PRESENT))
> > > @@ -417,7 +423,7 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
> > >   * packet */
> > >  static uint64_t
> > >  process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> > > -	uint16_t testpmd_ol_flags)
> > > +	uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
> > >  {
> > >  	struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
> > >  	struct ipv6_hdr *ipv6_hdr = outer_l3_hdr;
> > > @@ -442,6 +448,9 @@ process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> > >  	 * hardware supporting it today, and no API for it. */
> > >
> > >  	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
> > > +	if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
> > > +	    ((orig_ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN))
> > > +		udp_hdr->dgram_cksum = 0;
> > >  	/* do not recalculate udp cksum if it was 0 */
> > >  	if (udp_hdr->dgram_cksum != 0) {
> > >  		udp_hdr->dgram_cksum = 0;
> > > @@ -705,15 +714,18 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> > >  			if (info.l4_proto == IPPROTO_UDP) {
> > >  				struct udp_hdr *udp_hdr;
> > >  				udp_hdr = (struct udp_hdr *)((char *)l3_hdr +
> > > -					info.l3_len);
> > > -				parse_vxlan(udp_hdr, &info, m->packet_type);
> > > +					   info.l3_len);
> > > +				parse_vxlan(udp_hdr, &info, m->packet_type,
> > > +					    &ol_flags);
> > >  			} else if (info.l4_proto == IPPROTO_GRE) {
> > >  				struct simple_gre_hdr *gre_hdr;
> > >  				gre_hdr = (struct simple_gre_hdr *)
> > >  					((char *)l3_hdr + info.l3_len);
> > > -				parse_gre(gre_hdr, &info);
> > > +				parse_gre(gre_hdr, &info, &ol_flags);
> > >  			} else if (info.l4_proto == IPPROTO_IPIP) {
> > >  				void *encap_ip_hdr;
> > > +
> > > +				ol_flags |= PKT_TX_TUNNEL_IPIP;
> > >  				encap_ip_hdr = (char *)l3_hdr + info.l3_len;
> > >  				parse_encap_ip(encap_ip_hdr, &info);
> > >  			}
> > > @@ -745,7 +757,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> > >  		 * processed in hardware. */
> > >  		if (info.is_tunnel == 1) {
> > >  			ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
> > > -				testpmd_ol_flags);
> > > +				testpmd_ol_flags, ol_flags);
> > >  		}
> > >
> > >  		/* step 4: fill the mbuf meta data (flags and header lengths) */
> > > diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> > > index 049a813..4c987f2 100644
> > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
> > >  			union i40e_tx_offload tx_offload,
> > >  			uint32_t *cd_tunneling)
> > >  {
> > > +	/* Tx pkts tunnel type*/
> > > +	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
> > > +		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
> > > +	else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
> > > +		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
> > > +
> 
> As I understand that fix is needed to enable TSO for tunnelling packets, correct?
> For that case, should we setup EIPLEN also, no matter is PKT_TX_OUTER_IP_CKSUM
> is on/off?

In fact, it seems we have always to setup all 3: EIPLEN, MACLEN and L4TUNLEN,
when L4TUNT != 0.

> 
> > >  	/* UDP tunneling packet TX checksum offload */
> > >  	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> > >
> > > @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
> > >
> > >  /* set i40e TSO context descriptor */
> > >  static inline uint64_t
> > > -i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> > > +i40e_set_tso_ctx(struct rte_mbuf *mbuf,
> > > +		 union i40e_tx_offload tx_offload)
> > >  {
> > >  	uint64_t ctx_desc = 0;
> > >  	uint32_t cd_cmd, hdr_len, cd_tso_len;
> > > @@ -1521,7 +1528,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> > >  	}
> > >
> > >  	/**
> > > -	 * in case of tunneling packet, the outer_l2_len and
> > > +	 * in case of non tunneling packet, the outer_l2_len and
> > >  	 * outer_l3_len must be 0.
> > >  	 */
> > >  	hdr_len = tx_offload.outer_l2_len +
> > > @@ -1537,7 +1544,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> > >  		 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
> > >  		((uint64_t)mbuf->tso_segsz <<
> > >  		 I40E_TXD_CTX_QW1_MSS_SHIFT);
> > > -
> > >  	return ctx_desc;
> > >  }
> > >
> > > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > > index 15e3a10..8eb0d33 100644
> > > --- a/lib/librte_mbuf/rte_mbuf.h
> > > +++ b/lib/librte_mbuf/rte_mbuf.h
> > > @@ -133,6 +133,17 @@ extern "C" {
> > >  /* add new TX flags here */
> > >
> > >  /**
> > > + * Bits 45:48 used for the tunnel type.
> > > + * When doing Tx offload like TSO or checksum, the HW needs to configure the
> > > + * tunnel type into the HW descriptors.
> > > + */
> > > +#define PKT_TX_TUNNEL_VXLAN   (1ULL << 45)
> > > +#define PKT_TX_TUNNEL_GRE   (2ULL << 45)
> > > +#define PKT_TX_TUNNEL_IPIP    (3ULL << 45)
> > > +/* add new TX TUNNEL type here */
> > > +#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
> > > +
> > > +/**
> > >   * Second VLAN insertion (QinQ) flag.
> > >   */
> > >  #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
> > > @@ -867,7 +878,10 @@ struct rte_mbuf {
> > >  	union {
> > >  		uint64_t tx_offload;       /**< combined for easy fetch */
> > >  		struct {
> > > -			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> > > +			/* L2 (MAC) Header Length if it is not a tunneling pkt.
> > > +			 * for tunnel it is outer L4 len+tunnel len+inner L2 len
> > > +			 */
> 
> As a nit: that doesn't look like doxygen style comment to me.
> Konstantin
> 
> > > +			uint64_t l2_len:7;
> > >  			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> > >  			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> > >  			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> > > --
> > > 2.1.4
  
Bruce Richardson July 15, 2016, 3:40 p.m. UTC | #4
Since we are now heading to RC3 for 16.07 and there are quite a number of
open comments on this patch unresolved, it's going to be deferred till
16.11, when it can have more review and discussion.

/Bruce

On Thu, Jul 07, 2016 at 12:24:43PM +0000, Ananyev, Konstantin wrote:
> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > Sent: Thursday, July 07, 2016 11:51 AM
> > To: Tao, Zhe; dev@dpdk.org
> > Cc: Tao, Zhe; Wu, Jingjing
> > Subject: Re: [dpdk-dev] [PATCH v2] i40: fix the VXLAN TSO issue
> > 
> > 
> > Hi Tao,
> > 
> > Sorry hit send button too early by accident :)
> > 
> > > >
> > > > Problem:
> > > > When using the TSO + VXLAN feature in i40e, the outer UDP length fields in
> > > > the multiple UDP segments which are TSOed by the i40e will have the
> > > > wrong value.
> > > >
> > > > Fix this problem by adding the tunnel type field in the i40e descriptor
> > > > which was missed before.
> > > >
> > > > Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")
> > > >
> > > > Signed-off-by: Zhe Tao <zhe.tao@intel.com>
> > > > ---
> > > > V2: Edited some comments for mbuf structure and i40e driver.
> > > >
> > > >  app/test-pmd/csumonly.c      | 26 +++++++++++++++++++-------
> > > >  drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
> > > >  lib/librte_mbuf/rte_mbuf.h   | 16 +++++++++++++++-
> > > >  3 files changed, 43 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
> > > > index ac4bd8f..d423c20 100644
> > > > --- a/app/test-pmd/csumonly.c
> > > > +++ b/app/test-pmd/csumonly.c
> > > > @@ -204,7 +204,8 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info)
> > > >  static void
> > > >  parse_vxlan(struct udp_hdr *udp_hdr,
> > > >  	    struct testpmd_offload_info *info,
> > > > -	    uint32_t pkt_type)
> > > > +	    uint32_t pkt_type,
> > > > +	    uint64_t *ol_flags)
> > > >  {
> > > >  	struct ether_hdr *eth_hdr;
> > > >
> > > > @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> > > >  		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
> > > >  		return;
> > > >
> > > > +	*ol_flags |= PKT_TX_TUNNEL_VXLAN;
> 
> Do we always have to setup tunnelling flags here?
> Obviously it would mean an extra CTD per packet and might slowdown things.
> In fact, I think current patch wouldn't work correctly if
> TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM is not set.
> So, can we do it only when TSO is enabled or outer IP checksum is enabled?
> 
> > > >  	info->is_tunnel = 1;
> > > >  	info->outer_ethertype = info->ethertype;
> > > >  	info->outer_l2_len = info->l2_len;
> > > > @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> > > >
> > > >  /* Parse a gre header */
> > > >  static void
> > > > -parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> > > > +parse_gre(struct simple_gre_hdr *gre_hdr,
> > > > +	  struct testpmd_offload_info *info,
> > > > +	  uint64_t *ol_flags)
> > > >  {
> > > >  	struct ether_hdr *eth_hdr;
> > > >  	struct ipv4_hdr *ipv4_hdr;
> > > > @@ -242,6 +246,8 @@ parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> > > >  	if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
> > > >  		return;
> > > >
> > > > +	*ol_flags |= PKT_TX_TUNNEL_GRE;
> > > > +
> > > >  	gre_len += sizeof(struct simple_gre_hdr);
> > > >
> > > >  	if (gre_hdr->flags & _htons(GRE_KEY_PRESENT))
> > > > @@ -417,7 +423,7 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
> > > >   * packet */
> > > >  static uint64_t
> > > >  process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> > > > -	uint16_t testpmd_ol_flags)
> > > > +	uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
> > > >  {
> > > >  	struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
> > > >  	struct ipv6_hdr *ipv6_hdr = outer_l3_hdr;
> > > > @@ -442,6 +448,9 @@ process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> > > >  	 * hardware supporting it today, and no API for it. */
> > > >
> > > >  	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
> > > > +	if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
> > > > +	    ((orig_ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN))
> > > > +		udp_hdr->dgram_cksum = 0;
> > > >  	/* do not recalculate udp cksum if it was 0 */
> > > >  	if (udp_hdr->dgram_cksum != 0) {
> > > >  		udp_hdr->dgram_cksum = 0;
> > > > @@ -705,15 +714,18 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> > > >  			if (info.l4_proto == IPPROTO_UDP) {
> > > >  				struct udp_hdr *udp_hdr;
> > > >  				udp_hdr = (struct udp_hdr *)((char *)l3_hdr +
> > > > -					info.l3_len);
> > > > -				parse_vxlan(udp_hdr, &info, m->packet_type);
> > > > +					   info.l3_len);
> > > > +				parse_vxlan(udp_hdr, &info, m->packet_type,
> > > > +					    &ol_flags);
> > > >  			} else if (info.l4_proto == IPPROTO_GRE) {
> > > >  				struct simple_gre_hdr *gre_hdr;
> > > >  				gre_hdr = (struct simple_gre_hdr *)
> > > >  					((char *)l3_hdr + info.l3_len);
> > > > -				parse_gre(gre_hdr, &info);
> > > > +				parse_gre(gre_hdr, &info, &ol_flags);
> > > >  			} else if (info.l4_proto == IPPROTO_IPIP) {
> > > >  				void *encap_ip_hdr;
> > > > +
> > > > +				ol_flags |= PKT_TX_TUNNEL_IPIP;
> > > >  				encap_ip_hdr = (char *)l3_hdr + info.l3_len;
> > > >  				parse_encap_ip(encap_ip_hdr, &info);
> > > >  			}
> > > > @@ -745,7 +757,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> > > >  		 * processed in hardware. */
> > > >  		if (info.is_tunnel == 1) {
> > > >  			ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
> > > > -				testpmd_ol_flags);
> > > > +				testpmd_ol_flags, ol_flags);
> > > >  		}
> > > >
> > > >  		/* step 4: fill the mbuf meta data (flags and header lengths) */
> > > > diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> > > > index 049a813..4c987f2 100644
> > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
> > > >  			union i40e_tx_offload tx_offload,
> > > >  			uint32_t *cd_tunneling)
> > > >  {
> > > > +	/* Tx pkts tunnel type*/
> > > > +	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
> > > > +		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
> > > > +	else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
> > > > +		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
> > > > +
> > 
> > As I understand that fix is needed to enable TSO for tunnelling packets, correct?
> > For that case, should we setup EIPLEN also, no matter is PKT_TX_OUTER_IP_CKSUM
> > is on/off?
> 
> In fact, it seems we have always to setup all 3: EIPLEN, MACLEN and L4TUNLEN,
> when L4TUNT != 0.
> 
> > 
> > > >  	/* UDP tunneling packet TX checksum offload */
> > > >  	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> > > >
> > > > @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
> > > >
> > > >  /* set i40e TSO context descriptor */
> > > >  static inline uint64_t
> > > > -i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> > > > +i40e_set_tso_ctx(struct rte_mbuf *mbuf,
> > > > +		 union i40e_tx_offload tx_offload)
> > > >  {
> > > >  	uint64_t ctx_desc = 0;
> > > >  	uint32_t cd_cmd, hdr_len, cd_tso_len;
> > > > @@ -1521,7 +1528,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> > > >  	}
> > > >
> > > >  	/**
> > > > -	 * in case of tunneling packet, the outer_l2_len and
> > > > +	 * in case of non tunneling packet, the outer_l2_len and
> > > >  	 * outer_l3_len must be 0.
> > > >  	 */
> > > >  	hdr_len = tx_offload.outer_l2_len +
> > > > @@ -1537,7 +1544,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> > > >  		 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
> > > >  		((uint64_t)mbuf->tso_segsz <<
> > > >  		 I40E_TXD_CTX_QW1_MSS_SHIFT);
> > > > -
> > > >  	return ctx_desc;
> > > >  }
> > > >
> > > > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > > > index 15e3a10..8eb0d33 100644
> > > > --- a/lib/librte_mbuf/rte_mbuf.h
> > > > +++ b/lib/librte_mbuf/rte_mbuf.h
> > > > @@ -133,6 +133,17 @@ extern "C" {
> > > >  /* add new TX flags here */
> > > >
> > > >  /**
> > > > + * Bits 45:48 used for the tunnel type.
> > > > + * When doing Tx offload like TSO or checksum, the HW needs to configure the
> > > > + * tunnel type into the HW descriptors.
> > > > + */
> > > > +#define PKT_TX_TUNNEL_VXLAN   (1ULL << 45)
> > > > +#define PKT_TX_TUNNEL_GRE   (2ULL << 45)
> > > > +#define PKT_TX_TUNNEL_IPIP    (3ULL << 45)
> > > > +/* add new TX TUNNEL type here */
> > > > +#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
> > > > +
> > > > +/**
> > > >   * Second VLAN insertion (QinQ) flag.
> > > >   */
> > > >  #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
> > > > @@ -867,7 +878,10 @@ struct rte_mbuf {
> > > >  	union {
> > > >  		uint64_t tx_offload;       /**< combined for easy fetch */
> > > >  		struct {
> > > > -			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> > > > +			/* L2 (MAC) Header Length if it is not a tunneling pkt.
> > > > +			 * for tunnel it is outer L4 len+tunnel len+inner L2 len
> > > > +			 */
> > 
> > As a nit: that doesn't look like doxygen style comment to me.
> > Konstantin
> > 
> > > > +			uint64_t l2_len:7;
> > > >  			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> > > >  			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> > > >  			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> > > > --
> > > > 2.1.4
>
  
Zhe Tao July 18, 2016, 2:57 a.m. UTC | #5
On Thu, Jul 07, 2016 at 08:24:43PM +0800, Ananyev, Konstantin wrote:
> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > Sent: Thursday, July 07, 2016 11:51 AM
> > To: Tao, Zhe; dev@dpdk.org
> > Cc: Tao, Zhe; Wu, Jingjing
> > Subject: Re: [dpdk-dev] [PATCH v2] i40: fix the VXLAN TSO issue
> > 
> > 
> > > > @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> > > >  		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
> > > >  		return;
> > > >
> > > > +	*ol_flags |= PKT_TX_TUNNEL_VXLAN;
> 
> Do we always have to setup tunnelling flags here?
> Obviously it would mean an extra CTD per packet and might slowdown things.
this flag is for tunnelling type, and CTD is based on whether we need to do the
external ip offload and TSO.so this flag will not cause one extra CTD.
> In fact, I think current patch wouldn't work correctly if
> TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM is not set.
> So, can we do it only when TSO is enabled or outer IP checksum is enabled?
good comments and I aggree with you to let the APP to make sure all the flags driver needed
is set.
> 
> > > >  	info->is_tunnel = 1;
> > > >  	info->outer_ethertype = info->ethertype;
> > > >  	info->outer_l2_len = info->l2_len;
> > > > @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> > > >
> > > > +				testpmd_ol_flags, ol_flags);
> > > >  		}
> > > >
> > > >  		/* step 4: fill the mbuf meta data (flags and header lengths) */
> > > > diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> > > > index 049a813..4c987f2 100644
> > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
> > > >  			union i40e_tx_offload tx_offload,
> > > >  			uint32_t *cd_tunneling)
> > > >  {
> > > > +	/* Tx pkts tunnel type*/
> > > > +	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
> > > > +		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
> > > > +	else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
> > > > +		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
> > > > +
> > 
> > As I understand that fix is needed to enable TSO for tunnelling packets, correct?
> > For that case, should we setup EIPLEN also, no matter is PKT_TX_OUTER_IP_CKSUM
> > is on/off?
> 
> In fact, it seems we have always to setup all 3: EIPLEN, MACLEN and L4TUNLEN,
> when L4TUNT != 0.
L4TUNT only means the tunnel type, but if we don't want to do the checksum,
these fields will not be set
> 
> > 
> > > >  	/* UDP tunneling packet TX checksum offload */
> > > >  	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> > > >
> > > > @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
thanks again for the commets,
I will resend the patch
Zhe
  
Jingjing Wu Oct. 10, 2016, 3:58 a.m. UTC | #6
NACK.

This fix has been done by a new one which is already merged:  http://dpdk.org/dev/patchwork/patch/15059/


> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Zhe Tao
> Sent: Thursday, July 7, 2016 12:27 PM
> To: dev@dpdk.org
> Cc: zhe.tao@intel.com; Wu, Jingjing <jingjing.wu@intel.com>
> Subject: [dpdk-dev] [PATCH v2] i40: fix the VXLAN TSO issue
> 
> Problem:
> When using the TSO + VXLAN feature in i40e, the outer UDP length fields in
> the multiple UDP segments which are TSOed by the i40e will have the wrong
> value.
> 
> Fix this problem by adding the tunnel type field in the i40e descriptor which
> was missed before.
> 
> Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")
> 
> Signed-off-by: Zhe Tao <zhe.tao@intel.com>
> ---
> V2: Edited some comments for mbuf structure and i40e driver.
> 
>  app/test-pmd/csumonly.c      | 26 +++++++++++++++++++-------
>  drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
>  lib/librte_mbuf/rte_mbuf.h   | 16 +++++++++++++++-
>  3 files changed, 43 insertions(+), 11 deletions(-)
> 
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index
> ac4bd8f..d423c20 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -204,7 +204,8 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct
> testpmd_offload_info *info)  static void  parse_vxlan(struct udp_hdr
> *udp_hdr,
>  	    struct testpmd_offload_info *info,
> -	    uint32_t pkt_type)
> +	    uint32_t pkt_type,
> +	    uint64_t *ol_flags)
>  {
>  	struct ether_hdr *eth_hdr;
> 
> @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
>  		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
>  		return;
> 
> +	*ol_flags |= PKT_TX_TUNNEL_VXLAN;
>  	info->is_tunnel = 1;
>  	info->outer_ethertype = info->ethertype;
>  	info->outer_l2_len = info->l2_len;
> @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> 
>  /* Parse a gre header */
>  static void
> -parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info
> *info)
> +parse_gre(struct simple_gre_hdr *gre_hdr,
> +	  struct testpmd_offload_info *info,
> +	  uint64_t *ol_flags)
>  {
>  	struct ether_hdr *eth_hdr;
>  	struct ipv4_hdr *ipv4_hdr;
> @@ -242,6 +246,8 @@ parse_gre(struct simple_gre_hdr *gre_hdr, struct
> testpmd_offload_info *info)
>  	if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
>  		return;
> 
> +	*ol_flags |= PKT_TX_TUNNEL_GRE;
> +
>  	gre_len += sizeof(struct simple_gre_hdr);
> 
>  	if (gre_hdr->flags & _htons(GRE_KEY_PRESENT)) @@ -417,7 +423,7
> @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info
> *info,
>   * packet */
>  static uint64_t
>  process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info
> *info,
> -	uint16_t testpmd_ol_flags)
> +	uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
>  {
>  	struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
>  	struct ipv6_hdr *ipv6_hdr = outer_l3_hdr; @@ -442,6 +448,9 @@
> process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info
> *info,
>  	 * hardware supporting it today, and no API for it. */
> 
>  	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info-
> >outer_l3_len);
> +	if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
> +	    ((orig_ol_flags & PKT_TX_TUNNEL_MASK) ==
> PKT_TX_TUNNEL_VXLAN))
> +		udp_hdr->dgram_cksum = 0;
>  	/* do not recalculate udp cksum if it was 0 */
>  	if (udp_hdr->dgram_cksum != 0) {
>  		udp_hdr->dgram_cksum = 0;
> @@ -705,15 +714,18 @@ pkt_burst_checksum_forward(struct fwd_stream
> *fs)
>  			if (info.l4_proto == IPPROTO_UDP) {
>  				struct udp_hdr *udp_hdr;
>  				udp_hdr = (struct udp_hdr *)((char *)l3_hdr
> +
> -					info.l3_len);
> -				parse_vxlan(udp_hdr, &info, m-
> >packet_type);
> +					   info.l3_len);
> +				parse_vxlan(udp_hdr, &info, m-
> >packet_type,
> +					    &ol_flags);
>  			} else if (info.l4_proto == IPPROTO_GRE) {
>  				struct simple_gre_hdr *gre_hdr;
>  				gre_hdr = (struct simple_gre_hdr *)
>  					((char *)l3_hdr + info.l3_len);
> -				parse_gre(gre_hdr, &info);
> +				parse_gre(gre_hdr, &info, &ol_flags);
>  			} else if (info.l4_proto == IPPROTO_IPIP) {
>  				void *encap_ip_hdr;
> +
> +				ol_flags |= PKT_TX_TUNNEL_IPIP;
>  				encap_ip_hdr = (char *)l3_hdr + info.l3_len;
>  				parse_encap_ip(encap_ip_hdr, &info);
>  			}
> @@ -745,7 +757,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>  		 * processed in hardware. */
>  		if (info.is_tunnel == 1) {
>  			ol_flags |= process_outer_cksums(outer_l3_hdr,
> &info,
> -				testpmd_ol_flags);
> +				testpmd_ol_flags, ol_flags);
>  		}
> 
>  		/* step 4: fill the mbuf meta data (flags and header lengths)
> */ diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
> index 049a813..4c987f2 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
>  			union i40e_tx_offload tx_offload,
>  			uint32_t *cd_tunneling)
>  {
> +	/* Tx pkts tunnel type*/
> +	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
> +		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
> +	else if ((ol_flags & PKT_TX_TUNNEL_MASK) ==
> PKT_TX_TUNNEL_GRE)
> +		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
> +
>  	/* UDP tunneling packet TX checksum offload */
>  	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> 
> @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
> 
>  /* set i40e TSO context descriptor */
>  static inline uint64_t
> -i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> +i40e_set_tso_ctx(struct rte_mbuf *mbuf,
> +		 union i40e_tx_offload tx_offload)
>  {
>  	uint64_t ctx_desc = 0;
>  	uint32_t cd_cmd, hdr_len, cd_tso_len;
> @@ -1521,7 +1528,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union
> i40e_tx_offload tx_offload)
>  	}
> 
>  	/**
> -	 * in case of tunneling packet, the outer_l2_len and
> +	 * in case of non tunneling packet, the outer_l2_len and
>  	 * outer_l3_len must be 0.
>  	 */
>  	hdr_len = tx_offload.outer_l2_len +
> @@ -1537,7 +1544,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union
> i40e_tx_offload tx_offload)
>  		 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
>  		((uint64_t)mbuf->tso_segsz <<
>  		 I40E_TXD_CTX_QW1_MSS_SHIFT);
> -
>  	return ctx_desc;
>  }
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index
> 15e3a10..8eb0d33 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -133,6 +133,17 @@ extern "C" {
>  /* add new TX flags here */
> 
>  /**
> + * Bits 45:48 used for the tunnel type.
> + * When doing Tx offload like TSO or checksum, the HW needs to
> +configure the
> + * tunnel type into the HW descriptors.
> + */
> +#define PKT_TX_TUNNEL_VXLAN   (1ULL << 45)
> +#define PKT_TX_TUNNEL_GRE   (2ULL << 45)
> +#define PKT_TX_TUNNEL_IPIP    (3ULL << 45)
> +/* add new TX TUNNEL type here */
> +#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
> +
> +/**
>   * Second VLAN insertion (QinQ) flag.
>   */
>  #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double
> VLAN inserted. */
> @@ -867,7 +878,10 @@ struct rte_mbuf {
>  	union {
>  		uint64_t tx_offload;       /**< combined for easy fetch */
>  		struct {
> -			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> +			/* L2 (MAC) Header Length if it is not a tunneling pkt.
> +			 * for tunnel it is outer L4 len+tunnel len+inner L2 len
> +			 */
> +			uint64_t l2_len:7;
>  			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
>  			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length.
> */
>  			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> --
> 2.1.4
  
Yuanhan Liu Oct. 10, 2016, 4:14 a.m. UTC | #7
On Mon, Oct 10, 2016 at 03:58:57AM +0000, Wu, Jingjing wrote:
> NACK.
> 
> This fix has been done by a new one which is already merged:  http://dpdk.org/dev/patchwork/patch/15059/

Just want to let you know, that you don't have to NACK an old patchset. In
the process of the patchwork, ideally, the patch author should mark the old
one as "Superseded" once he send out a new version.

Meanwhile, the committers might/could also do the mark while reviewing the
patchwork. I have done that a lot, since I didn't see any one submitted
virtio/vhost patches have done that before :/

	--yliu
  

Patch

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index ac4bd8f..d423c20 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -204,7 +204,8 @@  parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info)
 static void
 parse_vxlan(struct udp_hdr *udp_hdr,
 	    struct testpmd_offload_info *info,
-	    uint32_t pkt_type)
+	    uint32_t pkt_type,
+	    uint64_t *ol_flags)
 {
 	struct ether_hdr *eth_hdr;
 
@@ -215,6 +216,7 @@  parse_vxlan(struct udp_hdr *udp_hdr,
 		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
 		return;
 
+	*ol_flags |= PKT_TX_TUNNEL_VXLAN;
 	info->is_tunnel = 1;
 	info->outer_ethertype = info->ethertype;
 	info->outer_l2_len = info->l2_len;
@@ -231,7 +233,9 @@  parse_vxlan(struct udp_hdr *udp_hdr,
 
 /* Parse a gre header */
 static void
-parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
+parse_gre(struct simple_gre_hdr *gre_hdr,
+	  struct testpmd_offload_info *info,
+	  uint64_t *ol_flags)
 {
 	struct ether_hdr *eth_hdr;
 	struct ipv4_hdr *ipv4_hdr;
@@ -242,6 +246,8 @@  parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
 	if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
 		return;
 
+	*ol_flags |= PKT_TX_TUNNEL_GRE;
+
 	gre_len += sizeof(struct simple_gre_hdr);
 
 	if (gre_hdr->flags & _htons(GRE_KEY_PRESENT))
@@ -417,7 +423,7 @@  process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
  * packet */
 static uint64_t
 process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
-	uint16_t testpmd_ol_flags)
+	uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
 {
 	struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
 	struct ipv6_hdr *ipv6_hdr = outer_l3_hdr;
@@ -442,6 +448,9 @@  process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
 	 * hardware supporting it today, and no API for it. */
 
 	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
+	if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
+	    ((orig_ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN))
+		udp_hdr->dgram_cksum = 0;
 	/* do not recalculate udp cksum if it was 0 */
 	if (udp_hdr->dgram_cksum != 0) {
 		udp_hdr->dgram_cksum = 0;
@@ -705,15 +714,18 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 			if (info.l4_proto == IPPROTO_UDP) {
 				struct udp_hdr *udp_hdr;
 				udp_hdr = (struct udp_hdr *)((char *)l3_hdr +
-					info.l3_len);
-				parse_vxlan(udp_hdr, &info, m->packet_type);
+					   info.l3_len);
+				parse_vxlan(udp_hdr, &info, m->packet_type,
+					    &ol_flags);
 			} else if (info.l4_proto == IPPROTO_GRE) {
 				struct simple_gre_hdr *gre_hdr;
 				gre_hdr = (struct simple_gre_hdr *)
 					((char *)l3_hdr + info.l3_len);
-				parse_gre(gre_hdr, &info);
+				parse_gre(gre_hdr, &info, &ol_flags);
 			} else if (info.l4_proto == IPPROTO_IPIP) {
 				void *encap_ip_hdr;
+
+				ol_flags |= PKT_TX_TUNNEL_IPIP;
 				encap_ip_hdr = (char *)l3_hdr + info.l3_len;
 				parse_encap_ip(encap_ip_hdr, &info);
 			}
@@ -745,7 +757,7 @@  pkt_burst_checksum_forward(struct fwd_stream *fs)
 		 * processed in hardware. */
 		if (info.is_tunnel == 1) {
 			ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
-				testpmd_ol_flags);
+				testpmd_ol_flags, ol_flags);
 		}
 
 		/* step 4: fill the mbuf meta data (flags and header lengths) */
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 049a813..4c987f2 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -801,6 +801,12 @@  i40e_txd_enable_checksum(uint64_t ol_flags,
 			union i40e_tx_offload tx_offload,
 			uint32_t *cd_tunneling)
 {
+	/* Tx pkts tunnel type*/
+	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
+		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
+	else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
+		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
+
 	/* UDP tunneling packet TX checksum offload */
 	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
 
@@ -1510,7 +1516,8 @@  i40e_calc_context_desc(uint64_t flags)
 
 /* set i40e TSO context descriptor */
 static inline uint64_t
-i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
+i40e_set_tso_ctx(struct rte_mbuf *mbuf,
+		 union i40e_tx_offload tx_offload)
 {
 	uint64_t ctx_desc = 0;
 	uint32_t cd_cmd, hdr_len, cd_tso_len;
@@ -1521,7 +1528,7 @@  i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
 	}
 
 	/**
-	 * in case of tunneling packet, the outer_l2_len and
+	 * in case of non tunneling packet, the outer_l2_len and
 	 * outer_l3_len must be 0.
 	 */
 	hdr_len = tx_offload.outer_l2_len +
@@ -1537,7 +1544,6 @@  i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
 		 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
 		((uint64_t)mbuf->tso_segsz <<
 		 I40E_TXD_CTX_QW1_MSS_SHIFT);
-
 	return ctx_desc;
 }
 
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 15e3a10..8eb0d33 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -133,6 +133,17 @@  extern "C" {
 /* add new TX flags here */
 
 /**
+ * Bits 45:48 used for the tunnel type.
+ * When doing Tx offload like TSO or checksum, the HW needs to configure the
+ * tunnel type into the HW descriptors.
+ */
+#define PKT_TX_TUNNEL_VXLAN   (1ULL << 45)
+#define PKT_TX_TUNNEL_GRE   (2ULL << 45)
+#define PKT_TX_TUNNEL_IPIP    (3ULL << 45)
+/* add new TX TUNNEL type here */
+#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
+
+/**
  * Second VLAN insertion (QinQ) flag.
  */
 #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
@@ -867,7 +878,10 @@  struct rte_mbuf {
 	union {
 		uint64_t tx_offload;       /**< combined for easy fetch */
 		struct {
-			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+			/* L2 (MAC) Header Length if it is not a tunneling pkt.
+			 * for tunnel it is outer L4 len+tunnel len+inner L2 len
+			 */
+			uint64_t l2_len:7;
 			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
 			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
 			uint64_t tso_segsz:16; /**< TCP TSO segment size */