[dpdk-dev] mempool: Reduce rte_mempool structure size

Message ID 1454454177-26743-1-git-send-email-keith.wiles@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Wiles, Keith Feb. 2, 2016, 11:02 p.m. UTC
  The rte_mempool structure is changed, which will cause an ABI change
for this structure.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure. Performance does not seem
to be effected running l3fwd and the test_mempool execution passed.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 app/test/test_mempool.c          |  4 +--
 lib/librte_mempool/rte_mempool.c | 56 ++++++++++++++++++----------------------
 lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
 3 files changed, 40 insertions(+), 49 deletions(-)
  

Comments

Ananyev, Konstantin Feb. 3, 2016, 5:11 p.m. UTC | #1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Keith Wiles
> Sent: Tuesday, February 02, 2016 11:03 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] mempool: Reduce rte_mempool structure size
> 
> The rte_mempool structure is changed, which will cause an ABI change
> for this structure.
> 
> Allow mempool cache support to be dynamic depending on if the
> mempool being created needs cache support. Saves about 1.5M of
> memory used by the rte_mempool structure. Performance does not seem
> to be effected running l3fwd and the test_mempool execution passed.
> 
> Allocating small mempools which do not require cache can consume
> larges amounts of memory if you have a number of these mempools.
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
  
Olivier Matz Feb. 8, 2016, 11:02 a.m. UTC | #2
Hi Keith,

Looks good, thanks. Please find some comments below.

> [PATCH] mempool: Reduce rte_mempool structure size

nit: we usually avoid uppercase letters in title

On 02/03/2016 12:02 AM, Keith Wiles wrote:
> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
> index aff5f6d..bdf8e2e 100644
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -450,15 +450,11 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>  	int page_size = getpagesize();
>  
>  	/* compilation-time checks */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
>  			  RTE_CACHE_LINE_MASK) != 0);
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
>  			  RTE_CACHE_LINE_MASK) != 0);
> -	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
> -			  RTE_CACHE_LINE_MASK) != 0);
> -#endif
> -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG

I don't think the #ifdef RTE_LIBRTE_MEMPOOL_DEBUG should be moved.
It should only protects the checks on stats which are enabled
in debug mode.

> @@ -194,10 +192,7 @@ struct rte_mempool {
>  
>  	unsigned private_data_size;      /**< Size of private data. */
>  
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> -	/** Per-lcore local cache. */
> -	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
> -#endif
> +	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
>  
>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>  	/** Per-lcore statistics. */

As you noticed it in your initial mail, this changes the ABI. I
think your patch justifies the ABI change, so I think it should
follow the ABI change process described in
dpdk/doc/guides/contributing/versioning.rst.

From what I understand of versioning.rst, these kind of changes
requires a deprecation notice first, and will be integrated in
next version. I don't think it's easy to keep a backward compat
in this case, especially because the rte_mempool structure is
used by several inlined functions.

Regards,
Olivier
  
Wiles, Keith Feb. 8, 2016, 3:57 p.m. UTC | #3
>Hi Keith,

>

>Looks good, thanks. Please find some comments below.

>

>> [PATCH] mempool: Reduce rte_mempool structure size

>

>nit: we usually avoid uppercase letters in title


Will make that change for v2. Why no uppercase letters in the title, seems a bit odd to me in this case??
>

>On 02/03/2016 12:02 AM, Keith Wiles wrote:

>> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c

>> index aff5f6d..bdf8e2e 100644

>> --- a/lib/librte_mempool/rte_mempool.c

>> +++ b/lib/librte_mempool/rte_mempool.c

>> @@ -450,15 +450,11 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,

>>  	int page_size = getpagesize();

>>  

>>  	/* compilation-time checks */

>> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG

>>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &

>>  			  RTE_CACHE_LINE_MASK) != 0);

>> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

>>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &

>>  			  RTE_CACHE_LINE_MASK) != 0);

>> -	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &

>> -			  RTE_CACHE_LINE_MASK) != 0);

>> -#endif

>> -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG

>

>I don't think the #ifdef RTE_LIBRTE_MEMPOOL_DEBUG should be moved.

>It should only protects the checks on stats which are enabled

>in debug mode.


Will make that change for v2.
>

>> @@ -194,10 +192,7 @@ struct rte_mempool {

>>  

>>  	unsigned private_data_size;      /**< Size of private data. */

>>  

>> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0

>> -	/** Per-lcore local cache. */

>> -	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];

>> -#endif

>> +	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */

>>  

>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG

>>  	/** Per-lcore statistics. */

>

>As you noticed it in your initial mail, this changes the ABI. I

>think your patch justifies the ABI change, so I think it should

>follow the ABI change process described in

>dpdk/doc/guides/contributing/versioning.rst.

>

>From what I understand of versioning.rst, these kind of changes

>requires a deprecation notice first, and will be integrated in

>next version. I don't think it's easy to keep a backward compat

>in this case, especially because the rte_mempool structure is

>used by several inlined functions.


I am reading the API doc and need to understand this process a bit more, but from what I can tell I need to add a ifdef RTE_NEXT_ABI around the new structure and old. Not sure where else I need to do that as compat is a bit hard as you stated. The API revision file is there something that needs to be done in that vile too?

You can reply to me directly it you like to save some bandwidth.
>

>Regards,

>Olivier

>



Regards,
Keith
  

Patch

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 72f8fb6..7b479f8 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,8 @@  test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
-	if (rte_mempool_get_priv(mp) !=
-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index aff5f6d..bdf8e2e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -450,15 +450,11 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	int page_size = getpagesize();
 
 	/* compilation-time checks */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
-			  RTE_CACHE_LINE_MASK) != 0);
-#endif
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
 	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) &
@@ -527,9 +523,8 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +539,8 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -598,8 +594,15 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+			((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
 	/* calculate address of the first element for continuous mempool. */
-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
 		private_data_size;
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
@@ -613,9 +616,8 @@  rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -645,19 +647,15 @@  unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	{
-		unsigned lcore_id;
-		if (mp->cache_size == 0)
-			return count;
+	if (mp->cache_size == 0)
+		return count;
 
-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-			count += mp->local_cache[lcore_id].len;
-	}
-#endif
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -672,13 +670,15 @@  rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
 	unsigned cache_count;
 
 	fprintf(f, "  cache infos:\n");
 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+	if (mp->cache_size == 0)
+		return count;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		cache_count = mp->local_cache[lcore_id].len;
 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
@@ -686,11 +686,6 @@  rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	}
 	fprintf(f, "    total_cache_count=%u\n", count);
 	return count;
-#else
-	RTE_SET_USED(mp);
-	fprintf(f, "  cache disabled\n");
-	return 0;
-#endif
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -755,13 +750,16 @@  mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
 mempool_audit_cache(const struct rte_mempool *mp)
 {
 	/* check cache size consistency */
 	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
@@ -770,10 +768,6 @@  mempool_audit_cache(const struct rte_mempool *mp)
 		}
 	}
 }
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 6e2390a..434ef98 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,6 @@  struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
  */
@@ -107,7 +106,6 @@  struct rte_mempool_cache {
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +192,7 @@  struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/** Per-lcore local cache. */
-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -247,6 +242,13 @@  struct rte_mempool {
 #endif
 
 /**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
  * Calculate the size of the mempool header.
  *
  * @param mp
@@ -254,9 +256,9 @@  struct rte_mempool {
  * @param pgn
  *   Number of pages used to store mempool objects.
  */
-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
 
 /**
  * Return true if the whole mempool is in contiguous memory.
@@ -755,19 +757,16 @@  static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +801,6 @@  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +944,6 @@  __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +989,6 @@  __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1289,8 @@  void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
 }
 
 /**