[dpdk-dev,v2] mem: command line option to delete hugepage backing files

Message ID D24BC7E2.23BFE%shesha@cisco.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

shesha Sreenivasamurthy (shesha) Oct. 20, 2015, 5:29 p.m. UTC
  When an application using huge-pages crash or exists, the hugetlbfs
backing files are not cleaned up. This is a patch to clean those files.
There are multi-process DPDK applications that may be benefited by those
backing files. Therefore, I have made that configurable so that the
application that does not need those backing files can remove them, thus
not changing the current default behavior. The application itself can
clean it up, however the rationale behind DPDK cleaning it up is, DPDK
created it and therefore, it is better it unlinks it.

Signed-off-by: Shesha Sreenivasamurthy <shesha@cisco.com>
---
 lib/librte_eal/common/eal_common_options.c | 12 +++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 ++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 39
++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+)

allocate
  * ALL hugepages (not just those we need), additional unmapping needs to
be done.
@@ -1289,6 +1320,14 @@ rte_eal_hugepage_init(void)
 		goto fail;
 	}
 
+	/* free the hugepage backing files */
+	if (internal_config.hugepage_unlink &&
+		unlink_hugepage_files(tmp_hp, used_hp,
+			internal_config.num_hugepage_sizes) < 0) {
+			RTE_LOG(ERR, EAL, "Unlinking hugepage backing files failed!\n");
+		goto fail;
+	}
+
 	/* free the temporary hugepage table */
 	free(tmp_hp);
 	tmp_hp = NULL;
  

Comments

Sergio Gonzalez Monroy Oct. 21, 2015, 2:25 p.m. UTC | #1
Hi Shesha,

A few things:
- For some reason the patch didn't upload properly to patchwork and the 
last bit of the patch is missing.
   The following link shows that the patch fails basic testing/compilation:
   http://dpdk.org/ml/archives/test-report/2015-October/000352.html
- You seem to be sending two copies of the same patch.
- Please update patchwork status for v1 patch. I think this is because 
there were two copies of it [patch id 7606]

On 20/10/2015 18:29, shesha Sreenivasamurthy (shesha) wrote:
> When an application using huge-pages crash or exists, the hugetlbfs
> backing files are not cleaned up. This is a patch to clean those files.
> There are multi-process DPDK applications that may be benefited by those
> backing files. Therefore, I have made that configurable so that the
> application that does not need those backing files can remove them, thus
> not changing the current default behavior. The application itself can
> clean it up, however the rationale behind DPDK cleaning it up is, DPDK
> created it and therefore, it is better it unlinks it.
>
> Signed-off-by: Shesha Sreenivasamurthy <shesha@cisco.com>
> ---
>   lib/librte_eal/common/eal_common_options.c | 12 +++++++++
>   lib/librte_eal/common/eal_internal_cfg.h   |  1 +
>   lib/librte_eal/common/eal_options.h        |  2 ++
>   lib/librte_eal/linuxapp/eal/eal_memory.c   | 39
> ++++++++++++++++++++++++++++++
>   4 files changed, 54 insertions(+)
>
> diff --git a/lib/librte_eal/common/eal_common_options.c
> b/lib/librte_eal/common/eal_common_options.c
> index 1f459ac..5fe6374 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -79,6 +79,7 @@ eal_long_options[] = {
>   	{OPT_MASTER_LCORE,      1, NULL, OPT_MASTER_LCORE_NUM     },
>   	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
>   	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
> +	{OPT_HUGE_UNLINK,       0, NULL, OPT_HUGE_UNLINK_NUM      },
>   	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
>   	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
>   	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
> @@ -722,6 +723,10 @@ eal_parse_common_option(int opt, const char *optarg,
>   		conf->no_hugetlbfs = 1;
>   		break;
>   
> +	case OPT_HUGE_UNLINK_NUM:
> +		conf->hugepage_unlink = 1;
> +		break;
> +
>   	case OPT_NO_PCI_NUM:
>   		conf->no_pci = 1;
>   		break;
> @@ -856,6 +861,12 @@ eal_check_common_options(struct internal_config
> *internal_cfg)
>   		return -1;
>   	}
>   
> +	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
> +		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
> +			"be specified together with --"OPT_NO_HUGE"\n");
> +		return -1;
> +	}
> +
>   	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 &&
>   		rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) {
>   		RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
> @@ -906,6 +917,7 @@ eal_common_usage(void)
>   	       "  -h, --help          This help\n"
>   	       "\nEAL options for DEBUG use only:\n"
>   	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
> +	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage backing file after
> initalization\n"
>   	       "  --"OPT_NO_PCI"            Disable PCI\n"
>   	       "  --"OPT_NO_HPET"           Disable HPET\n"
>   	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h
> b/lib/librte_eal/common/eal_internal_cfg.h
> index e2ecb0d..84b075f 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -64,6 +64,7 @@ struct internal_config {
>   	volatile unsigned force_nchannel; /**< force number of channels */
>   	volatile unsigned force_nrank;    /**< force number of ranks */
>   	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
> +	volatile unsigned hugepage_unlink; /** < true to unlink backing files */
>   	volatile unsigned xen_dom0_support; /**< support app running on Xen
> Dom0*/
>   	volatile unsigned no_pci;         /**< true to disable PCI */
>   	volatile unsigned no_hpet;        /**< true to disable HPET */
> diff --git a/lib/librte_eal/common/eal_options.h
> b/lib/librte_eal/common/eal_options.h
> index f6714d9..745f38c 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -63,6 +63,8 @@ enum {
>   	OPT_PROC_TYPE_NUM,
>   #define OPT_NO_HPET           "no-hpet"
>   	OPT_NO_HPET_NUM,
> +#define OPT_HUGE_UNLINK        "huge-unlink"
> +	OPT_HUGE_UNLINK_NUM,
>   #define OPT_NO_HUGE           "no-huge"
>   	OPT_NO_HUGE_NUM,
>   #define OPT_NO_PCI            "no-pci"
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c
> b/lib/librte_eal/linuxapp/eal/eal_memory.c
> index ac2745e..2b86428 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> @@ -786,6 +786,37 @@ copy_hugepages_to_shared_mem(struct hugepage_file *
> dst, int dest_size,
>   	return 0;
>   }
>   
> +static int
> +unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
> +		struct hugepage_info *hpi,
> +		unsigned num_hp_info)
> +{
> +	unsigned socket, size;
> +	int page, nrpages = 0;
> +
> +	/* get total number of hugepages */
> +	for (size = 0; size < num_hp_info; size++)
> +		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
> +			nrpages += internal_config.hugepage_info[size].num_pages[socket];
> +
> +	for (size = 0; size < num_hp_info; size++) {
> +		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
> +			for (page = 0; page < nrpages; page++) {
> +				struct hugepage_file *hp = &hugepg_tbl[page];
> +				if ((hp->size == hpi[size].hugepage_sz) &&
> +						(hp->socket_id == (int) socket) &&
> +						hp->final_va != NULL) {
Is all this necessary? At this point, all unneeded hugepages have been 
released and their final_va set to NULL.
I think we just need to calculate the total number of hugepages and loop 
through all hugepg_tbl entries checking va_final != NULL, then unlink.

Sergio
> +						if (unlink(hp->filepath)) {
> +							RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
> +								__func__, hp->filepath, strerror(errno));
> +					}
> +				}
> +			} /* foreach page */
> +		} /* foreach socket */
> +	} /* foreach pagesize */
> +	return 0;
> +}
> +
>   /*
>    * unmaps hugepages that are not going to be used. since we originally
> allocate
>    * ALL hugepages (not just those we need), additional unmapping needs to
> be done.
> @@ -1289,6 +1320,14 @@ rte_eal_hugepage_init(void)
>   		goto fail;
>   	}
>   
> +	/* free the hugepage backing files */
> +	if (internal_config.hugepage_unlink &&
> +		unlink_hugepage_files(tmp_hp, used_hp,
> +			internal_config.num_hugepage_sizes) < 0) {
> +			RTE_LOG(ERR, EAL, "Unlinking hugepage backing files failed!\n");
> +		goto fail;
> +	}
> +
>   	/* free the temporary hugepage table */
>   	free(tmp_hp);
>   	tmp_hp = NULL;
  

Patch

diff --git a/lib/librte_eal/common/eal_common_options.c
b/lib/librte_eal/common/eal_common_options.c
index 1f459ac..5fe6374 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -79,6 +79,7 @@  eal_long_options[] = {
 	{OPT_MASTER_LCORE,      1, NULL, OPT_MASTER_LCORE_NUM     },
 	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
+	{OPT_HUGE_UNLINK,       0, NULL, OPT_HUGE_UNLINK_NUM      },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
@@ -722,6 +723,10 @@  eal_parse_common_option(int opt, const char *optarg,
 		conf->no_hugetlbfs = 1;
 		break;
 
+	case OPT_HUGE_UNLINK_NUM:
+		conf->hugepage_unlink = 1;
+		break;
+
 	case OPT_NO_PCI_NUM:
 		conf->no_pci = 1;
 		break;
@@ -856,6 +861,12 @@  eal_check_common_options(struct internal_config
*internal_cfg)
 		return -1;
 	}
 
+	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
+			"be specified together with --"OPT_NO_HUGE"\n");
+		return -1;
+	}
+
 	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 &&
 		rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) {
 		RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
@@ -906,6 +917,7 @@  eal_common_usage(void)
 	       "  -h, --help          This help\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
+	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage backing file after
initalization\n"
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h
b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..84b075f 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -64,6 +64,7 @@  struct internal_config {
 	volatile unsigned force_nchannel; /**< force number of channels */
 	volatile unsigned force_nrank;    /**< force number of ranks */
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
+	volatile unsigned hugepage_unlink; /** < true to unlink backing files */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen
Dom0*/
 	volatile unsigned no_pci;         /**< true to disable PCI */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
diff --git a/lib/librte_eal/common/eal_options.h
b/lib/librte_eal/common/eal_options.h
index f6714d9..745f38c 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -63,6 +63,8 @@  enum {
 	OPT_PROC_TYPE_NUM,
 #define OPT_NO_HPET           "no-hpet"
 	OPT_NO_HPET_NUM,
+#define OPT_HUGE_UNLINK        "huge-unlink"
+	OPT_HUGE_UNLINK_NUM,
 #define OPT_NO_HUGE           "no-huge"
 	OPT_NO_HUGE_NUM,
 #define OPT_NO_PCI            "no-pci"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index ac2745e..2b86428 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -786,6 +786,37 @@  copy_hugepages_to_shared_mem(struct hugepage_file *
dst, int dest_size,
 	return 0;
 }
 
+static int
+unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
+		struct hugepage_info *hpi,
+		unsigned num_hp_info)
+{
+	unsigned socket, size;
+	int page, nrpages = 0;
+
+	/* get total number of hugepages */
+	for (size = 0; size < num_hp_info; size++)
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
+			nrpages += internal_config.hugepage_info[size].num_pages[socket];
+
+	for (size = 0; size < num_hp_info; size++) {
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+			for (page = 0; page < nrpages; page++) {
+				struct hugepage_file *hp = &hugepg_tbl[page];
+				if ((hp->size == hpi[size].hugepage_sz) &&
+						(hp->socket_id == (int) socket) &&
+						hp->final_va != NULL) {
+						if (unlink(hp->filepath)) {
+							RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
+								__func__, hp->filepath, strerror(errno));
+					}
+				}
+			} /* foreach page */
+		} /* foreach socket */
+	} /* foreach pagesize */
+	return 0;
+}
+
 /*
  * unmaps hugepages that are not going to be used. since we originally