[dpdk-dev,v5] mempool: reduce rte_mempool structure size

Message ID 1460626956-12038-1-git-send-email-olivier.matz@6wind.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers

Commit Message

Olivier Matz April 14, 2016, 9:42 a.m. UTC
  From: Keith Wiles <keith.wiles@intel.com>

The rte_mempool structure is changed, which will cause an ABI change
for this structure. Providing backward compat is not reasonable
here as this structure is used in multiple defines/inlines.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Change to be effective in release 16.07.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---

Changes in v5:

- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
  Error log was:

  rte_mempool.c: In function ‘rte_mempool_xmem_create’:
  rte_mempool.c:595: error: cast increases required alignment of target type


 app/test/test_mempool.c          |  4 +--
 lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
 lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
 3 files changed, 40 insertions(+), 48 deletions(-)
  

Comments

Wiles, Keith April 14, 2016, 1:28 p.m. UTC | #1
>From: Keith Wiles <keith.wiles@intel.com>

>

>The rte_mempool structure is changed, which will cause an ABI change

>for this structure. Providing backward compat is not reasonable

>here as this structure is used in multiple defines/inlines.

>

>Allow mempool cache support to be dynamic depending on if the

>mempool being created needs cache support. Saves about 1.5M of

>memory used by the rte_mempool structure.

>

>Allocating small mempools which do not require cache can consume

>larges amounts of memory if you have a number of these mempools.

>

>Change to be effective in release 16.07.

>

>Signed-off-by: Keith Wiles <keith.wiles@intel.com>

>Acked-by: Olivier Matz <olivier.matz@6wind.com>

>---

>

>Changes in v5:

>

>- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.

>  Error log was:

>

>  rte_mempool.c: In function ‘rte_mempool_xmem_create’:

>  rte_mempool.c:595: error: cast increases required alignment of target type

>

>

> app/test/test_mempool.c          |  4 +--

> lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------

> lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------

> 3 files changed, 40 insertions(+), 48 deletions(-)

>

>diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c

>index f0f823b..10e1fa4 100644

>--- a/app/test/test_mempool.c

>+++ b/app/test/test_mempool.c

>@@ -122,8 +122,8 @@ test_mempool_basic(void)

> 		return -1;

> 

> 	printf("get private data\n");

>-	if (rte_mempool_get_priv(mp) !=

>-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))

>+	if (rte_mempool_get_priv(mp) != (char *)mp +

>+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))


Should we not add the RTE_PTR_ADD() here as well?

> 		return -1;

> 

> 	printf("get physical address of an object\n");

>diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c

>index f8781e1..7a0e07e 100644

>--- a/lib/librte_mempool/rte_mempool.c

>+++ b/lib/librte_mempool/rte_mempool.c

>@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 	/* compilation-time checks */

> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &

> 			  RTE_CACHE_LINE_MASK) != 0);

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &

> 			  RTE_CACHE_LINE_MASK) != 0);

>-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &

>-			  RTE_CACHE_LINE_MASK) != 0);

>-#endif

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &

> 			  RTE_CACHE_LINE_MASK) != 0);

>@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 		 */

> 		int head = sizeof(struct rte_mempool);

> 		int new_size = (private_data_size + head) % page_size;

>-		if (new_size) {

>+		if (new_size)

> 			private_data_size += page_size - new_size;

>-		}

> 	}

> 

> 	/* try to allocate tailq entry */

>@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 	 * store mempool objects. Otherwise reserve a memzone that is large

> 	 * enough to hold mempool header and metadata plus mempool objects.

> 	 */

>-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;

>+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);

>+	mempool_size += private_data_size;

> 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);

> 	if (vaddr == NULL)

> 		mempool_size += (size_t)objsz.total_size * n;

>@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);

> 	mp->private_data_size = private_data_size;

> 

>+	/*

>+	 * local_cache pointer is set even if cache_size is zero.

>+	 * The local_cache points to just past the elt_pa[] array.

>+	 */

>+	mp->local_cache = (struct rte_mempool_cache *)

>+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));

>+

> 	/* calculate address of the first element for continuous mempool. */

>-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +

>+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +

> 		private_data_size;

> 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);

> 

>@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 		mp->elt_va_start = (uintptr_t)obj;

> 		mp->elt_pa[0] = mp->phys_addr +

> 			(mp->elt_va_start - (uintptr_t)mp);

>-

>-	/* mempool elements in a separate chunk of memory. */

> 	} else {

>+		/* mempool elements in a separate chunk of memory. */

> 		mp->elt_va_start = (uintptr_t)vaddr;

> 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);

> 	}

>@@ -643,19 +645,15 @@ unsigned

> rte_mempool_count(const struct rte_mempool *mp)

> {

> 	unsigned count;

>+	unsigned lcore_id;

> 

> 	count = rte_ring_count(mp->ring);

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

>-	{

>-		unsigned lcore_id;

>-		if (mp->cache_size == 0)

>-			return count;

>+	if (mp->cache_size == 0)

>+		return count;

> 

>-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)

>-			count += mp->local_cache[lcore_id].len;

>-	}

>-#endif

>+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)

>+		count += mp->local_cache[lcore_id].len;

> 

> 	/*

> 	 * due to race condition (access to len is not locked), the

>@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)

> static unsigned

> rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)

> {

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	unsigned lcore_id;

> 	unsigned count = 0;

> 	unsigned cache_count;

> 

> 	fprintf(f, "  cache infos:\n");

> 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);

>+

>+	if (mp->cache_size == 0)

>+		return count;

>+

> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {

> 		cache_count = mp->local_cache[lcore_id].len;

> 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);

>@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)

> 	}

> 	fprintf(f, "    total_cache_count=%u\n", count);

> 	return count;

>-#else

>-	RTE_SET_USED(mp);

>-	fprintf(f, "  cache disabled\n");

>-	return 0;

>-#endif

> }

> 

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

>@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)

> #define mempool_audit_cookies(mp) do {} while(0)

> #endif

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> /* check cookies before and after objects */

> static void

> mempool_audit_cache(const struct rte_mempool *mp)

> {

> 	/* check cache size consistency */

> 	unsigned lcore_id;

>+

>+	if (mp->cache_size == 0)

>+		return;

>+

> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {

> 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {

> 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",

>@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)

> 		}

> 	}

> }

>-#else

>-#define mempool_audit_cache(mp) do {} while(0)

>-#endif

>-

> 

> /* check the consistency of mempool (size, cookies, ...) */

> void

>diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h

>index 9745bf0..8595e77 100644

>--- a/lib/librte_mempool/rte_mempool.h

>+++ b/lib/librte_mempool/rte_mempool.h

>@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {

> } __rte_cache_aligned;

> #endif

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> /**

>  * A structure that stores a per-core object cache.

>  */

>@@ -107,7 +106,6 @@ struct rte_mempool_cache {

> 	 */

> 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */

> } __rte_cache_aligned;

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> /**

>  * A structure that stores the size of mempool elements.

>@@ -194,10 +192,7 @@ struct rte_mempool {

> 

> 	unsigned private_data_size;      /**< Size of private data. */

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

>-	/** Per-lcore local cache. */

>-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];

>-#endif

>+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */

> 

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

> 	/** Per-lcore statistics. */

>@@ -247,6 +242,13 @@ struct rte_mempool {

> #endif

> 

> /**

>+ * Size of elt_pa array size based on number of pages. (Internal use)

>+ */

>+#define __PA_SIZE(mp, pgn) \

>+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \

>+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)

>+

>+/**

>  * Calculate the size of the mempool header.

>  *

>  * @param mp

>@@ -254,9 +256,9 @@ struct rte_mempool {

>  * @param pgn

>  *   Number of pages used to store mempool objects.

>  */

>-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \

>-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \

>-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))

>+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \

>+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \

>+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))

> 

> /**

>  * Return true if the whole mempool is in contiguous memory.

>@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))

> __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,

> 		    unsigned n, int is_mp)

> {

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	struct rte_mempool_cache *cache;

> 	uint32_t index;

> 	void **cache_objs;

> 	unsigned lcore_id = rte_lcore_id();

> 	uint32_t cache_size = mp->cache_size;

> 	uint32_t flushthresh = mp->cache_flushthresh;

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> 	/* increment stat now, adding in mempool always success */

> 	__MEMPOOL_STAT_ADD(mp, put, n);

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	/* cache is not enabled or single producer or non-EAL thread */

> 	if (unlikely(cache_size == 0 || is_mp == 0 ||

> 		     lcore_id >= RTE_MAX_LCORE))

>@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,

> 	return;

> 

> ring_enqueue:

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> 	/* push remaining objects in ring */

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

>@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,

> 		   unsigned n, int is_mc)

> {

> 	int ret;

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	struct rte_mempool_cache *cache;

> 	uint32_t index, len;

> 	void **cache_objs;

>@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,

> 	return 0;

> 

> ring_dequeue:

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> 	/* get remaining objects from ring */

> 	if (is_mc)

>@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);

>  */

> static inline void *rte_mempool_get_priv(struct rte_mempool *mp)

> {

>-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);

>+	return (char *)mp +

>+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);


And here?

> }

> 

> /**

>-- 

>2.1.4

>

>



Regards,
Keith
  
Olivier Matz April 14, 2016, 1:43 p.m. UTC | #2
Hi,

On 04/14/2016 03:28 PM, Wiles, Keith wrote:
>> From: Keith Wiles <keith.wiles@intel.com>
>> --- a/app/test/test_mempool.c
>> +++ b/app/test/test_mempool.c
>> @@ -122,8 +122,8 @@ test_mempool_basic(void)
>> 		return -1;
>>
>> 	printf("get private data\n");
>> -	if (rte_mempool_get_priv(mp) !=
>> -			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
>> +	if (rte_mempool_get_priv(mp) != (char *)mp +
>> +			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
>
> Should we not add the RTE_PTR_ADD() here as well?

The displayed error message was "cast increases required alignment
of target type", and in this case the alignment constraint of mp
is higher than the constraint for char * (1). So I think there is
no issue here... at least I can say it compiles without error.

Regards,
Olivier
  
Wiles, Keith April 14, 2016, 1:53 p.m. UTC | #3
>From: Keith Wiles <keith.wiles@intel.com>

>

>The rte_mempool structure is changed, which will cause an ABI change

>for this structure. Providing backward compat is not reasonable

>here as this structure is used in multiple defines/inlines.

>

>Allow mempool cache support to be dynamic depending on if the

>mempool being created needs cache support. Saves about 1.5M of

>memory used by the rte_mempool structure.

>

>Allocating small mempools which do not require cache can consume

>larges amounts of memory if you have a number of these mempools.

>

>Change to be effective in release 16.07.

>

>Signed-off-by: Keith Wiles <keith.wiles@intel.com>

>Acked-by: Olivier Matz <olivier.matz@6wind.com>


For the change to this patch:
Acked-by: Keith Wiles <keith.wiles@intel.com>


>---

>

>Changes in v5:

>

>- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.

>  Error log was:

>

>  rte_mempool.c: In function ‘rte_mempool_xmem_create’:

>  rte_mempool.c:595: error: cast increases required alignment of target type

>

>

> app/test/test_mempool.c          |  4 +--

> lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------

> lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------

> 3 files changed, 40 insertions(+), 48 deletions(-)

>

>diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c

>index f0f823b..10e1fa4 100644

>--- a/app/test/test_mempool.c

>+++ b/app/test/test_mempool.c

>@@ -122,8 +122,8 @@ test_mempool_basic(void)

> 		return -1;

> 

> 	printf("get private data\n");

>-	if (rte_mempool_get_priv(mp) !=

>-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))

>+	if (rte_mempool_get_priv(mp) != (char *)mp +

>+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))

> 		return -1;

> 

> 	printf("get physical address of an object\n");

>diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c

>index f8781e1..7a0e07e 100644

>--- a/lib/librte_mempool/rte_mempool.c

>+++ b/lib/librte_mempool/rte_mempool.c

>@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 	/* compilation-time checks */

> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &

> 			  RTE_CACHE_LINE_MASK) != 0);

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &

> 			  RTE_CACHE_LINE_MASK) != 0);

>-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &

>-			  RTE_CACHE_LINE_MASK) != 0);

>-#endif

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &

> 			  RTE_CACHE_LINE_MASK) != 0);

>@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 		 */

> 		int head = sizeof(struct rte_mempool);

> 		int new_size = (private_data_size + head) % page_size;

>-		if (new_size) {

>+		if (new_size)

> 			private_data_size += page_size - new_size;

>-		}

> 	}

> 

> 	/* try to allocate tailq entry */

>@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 	 * store mempool objects. Otherwise reserve a memzone that is large

> 	 * enough to hold mempool header and metadata plus mempool objects.

> 	 */

>-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;

>+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);

>+	mempool_size += private_data_size;

> 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);

> 	if (vaddr == NULL)

> 		mempool_size += (size_t)objsz.total_size * n;

>@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);

> 	mp->private_data_size = private_data_size;

> 

>+	/*

>+	 * local_cache pointer is set even if cache_size is zero.

>+	 * The local_cache points to just past the elt_pa[] array.

>+	 */

>+	mp->local_cache = (struct rte_mempool_cache *)

>+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));

>+

> 	/* calculate address of the first element for continuous mempool. */

>-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +

>+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +

> 		private_data_size;

> 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);

> 

>@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

> 		mp->elt_va_start = (uintptr_t)obj;

> 		mp->elt_pa[0] = mp->phys_addr +

> 			(mp->elt_va_start - (uintptr_t)mp);

>-

>-	/* mempool elements in a separate chunk of memory. */

> 	} else {

>+		/* mempool elements in a separate chunk of memory. */

> 		mp->elt_va_start = (uintptr_t)vaddr;

> 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);

> 	}

>@@ -643,19 +645,15 @@ unsigned

> rte_mempool_count(const struct rte_mempool *mp)

> {

> 	unsigned count;

>+	unsigned lcore_id;

> 

> 	count = rte_ring_count(mp->ring);

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

>-	{

>-		unsigned lcore_id;

>-		if (mp->cache_size == 0)

>-			return count;

>+	if (mp->cache_size == 0)

>+		return count;

> 

>-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)

>-			count += mp->local_cache[lcore_id].len;

>-	}

>-#endif

>+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)

>+		count += mp->local_cache[lcore_id].len;

> 

> 	/*

> 	 * due to race condition (access to len is not locked), the

>@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)

> static unsigned

> rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)

> {

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	unsigned lcore_id;

> 	unsigned count = 0;

> 	unsigned cache_count;

> 

> 	fprintf(f, "  cache infos:\n");

> 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);

>+

>+	if (mp->cache_size == 0)

>+		return count;

>+

> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {

> 		cache_count = mp->local_cache[lcore_id].len;

> 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);

>@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)

> 	}

> 	fprintf(f, "    total_cache_count=%u\n", count);

> 	return count;

>-#else

>-	RTE_SET_USED(mp);

>-	fprintf(f, "  cache disabled\n");

>-	return 0;

>-#endif

> }

> 

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

>@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)

> #define mempool_audit_cookies(mp) do {} while(0)

> #endif

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> /* check cookies before and after objects */

> static void

> mempool_audit_cache(const struct rte_mempool *mp)

> {

> 	/* check cache size consistency */

> 	unsigned lcore_id;

>+

>+	if (mp->cache_size == 0)

>+		return;

>+

> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {

> 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {

> 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",

>@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)

> 		}

> 	}

> }

>-#else

>-#define mempool_audit_cache(mp) do {} while(0)

>-#endif

>-

> 

> /* check the consistency of mempool (size, cookies, ...) */

> void

>diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h

>index 9745bf0..8595e77 100644

>--- a/lib/librte_mempool/rte_mempool.h

>+++ b/lib/librte_mempool/rte_mempool.h

>@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {

> } __rte_cache_aligned;

> #endif

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> /**

>  * A structure that stores a per-core object cache.

>  */

>@@ -107,7 +106,6 @@ struct rte_mempool_cache {

> 	 */

> 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */

> } __rte_cache_aligned;

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> /**

>  * A structure that stores the size of mempool elements.

>@@ -194,10 +192,7 @@ struct rte_mempool {

> 

> 	unsigned private_data_size;      /**< Size of private data. */

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

>-	/** Per-lcore local cache. */

>-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];

>-#endif

>+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */

> 

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

> 	/** Per-lcore statistics. */

>@@ -247,6 +242,13 @@ struct rte_mempool {

> #endif

> 

> /**

>+ * Size of elt_pa array size based on number of pages. (Internal use)

>+ */

>+#define __PA_SIZE(mp, pgn) \

>+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \

>+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)

>+

>+/**

>  * Calculate the size of the mempool header.

>  *

>  * @param mp

>@@ -254,9 +256,9 @@ struct rte_mempool {

>  * @param pgn

>  *   Number of pages used to store mempool objects.

>  */

>-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \

>-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \

>-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))

>+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \

>+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \

>+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))

> 

> /**

>  * Return true if the whole mempool is in contiguous memory.

>@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))

> __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,

> 		    unsigned n, int is_mp)

> {

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	struct rte_mempool_cache *cache;

> 	uint32_t index;

> 	void **cache_objs;

> 	unsigned lcore_id = rte_lcore_id();

> 	uint32_t cache_size = mp->cache_size;

> 	uint32_t flushthresh = mp->cache_flushthresh;

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> 	/* increment stat now, adding in mempool always success */

> 	__MEMPOOL_STAT_ADD(mp, put, n);

> 

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	/* cache is not enabled or single producer or non-EAL thread */

> 	if (unlikely(cache_size == 0 || is_mp == 0 ||

> 		     lcore_id >= RTE_MAX_LCORE))

>@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,

> 	return;

> 

> ring_enqueue:

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> 	/* push remaining objects in ring */

> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

>@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,

> 		   unsigned n, int is_mc)

> {

> 	int ret;

>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

> 	struct rte_mempool_cache *cache;

> 	uint32_t index, len;

> 	void **cache_objs;

>@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,

> 	return 0;

> 

> ring_dequeue:

>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */

> 

> 	/* get remaining objects from ring */

> 	if (is_mc)

>@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);

>  */

> static inline void *rte_mempool_get_priv(struct rte_mempool *mp)

> {

>-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);

>+	return (char *)mp +

>+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);

> }

> 

> /**

>-- 

>2.1.4

>

>



Regards,
Keith
  
Thomas Monjalon May 17, 2016, 5:31 a.m. UTC | #4
2016-04-14 11:42, Olivier Matz:
> From: Keith Wiles <keith.wiles@intel.com>
> 
> The rte_mempool structure is changed, which will cause an ABI change
> for this structure. Providing backward compat is not reasonable
> here as this structure is used in multiple defines/inlines.

The deprecation notice must be removed by this patch.

[...]
> +/**
>   * Calculate the size of the mempool header.
>   *
>   * @param mp
> @@ -254,9 +256,9 @@ struct rte_mempool {
>   * @param pgn
>   *   Number of pages used to store mempool objects.

A new parameter has been forgotten:
 * @param cs
 *   Size of the per-lcore cache.                                                                                

>   */
> -#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
> -	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
> -	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
> +#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
> +	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
> +	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))

Applied with above changes
  

Patch

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index f0f823b..10e1fa4 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,8 @@  test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
-	if (rte_mempool_get_priv(mp) !=
-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index f8781e1..7a0e07e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -452,12 +452,8 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	/* compilation-time checks */
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
-			  RTE_CACHE_LINE_MASK) != 0);
-#endif
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
@@ -527,9 +523,8 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +539,8 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -591,8 +587,15 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
 	/* calculate address of the first element for continuous mempool. */
-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
 		private_data_size;
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
@@ -606,9 +609,8 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -643,19 +645,15 @@  unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	{
-		unsigned lcore_id;
-		if (mp->cache_size == 0)
-			return count;
+	if (mp->cache_size == 0)
+		return count;
 
-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-			count += mp->local_cache[lcore_id].len;
-	}
-#endif
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -670,13 +668,16 @@  rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
 	unsigned cache_count;
 
 	fprintf(f, "  cache infos:\n");
 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+
+	if (mp->cache_size == 0)
+		return count;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		cache_count = mp->local_cache[lcore_id].len;
 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
@@ -684,11 +685,6 @@  rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	}
 	fprintf(f, "    total_cache_count=%u\n", count);
 	return count;
-#else
-	RTE_SET_USED(mp);
-	fprintf(f, "  cache disabled\n");
-	return 0;
-#endif
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -753,13 +749,16 @@  mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
 mempool_audit_cache(const struct rte_mempool *mp)
 {
 	/* check cache size consistency */
 	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
@@ -768,10 +767,6 @@  mempool_audit_cache(const struct rte_mempool *mp)
 		}
 	}
 }
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 9745bf0..8595e77 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,6 @@  struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
  */
@@ -107,7 +106,6 @@  struct rte_mempool_cache {
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +192,7 @@  struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/** Per-lcore local cache. */
-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -247,6 +242,13 @@  struct rte_mempool {
 #endif
 
 /**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
  * Calculate the size of the mempool header.
  *
  * @param mp
@@ -254,9 +256,9 @@  struct rte_mempool {
  * @param pgn
  *   Number of pages used to store mempool objects.
  */
-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
 
 /**
  * Return true if the whole mempool is in contiguous memory.
@@ -755,19 +757,16 @@  static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +801,6 @@  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +944,6 @@  __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +989,6 @@  __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1289,8 @@  void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
 }
 
 /**