[dpdk-dev,RFC] drivers: advertise kmod dependencies in pmdinfo

Message ID 1472217646-26219-1-git-send-email-olivier.matz@6wind.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Commit Message

Olivier Matz Aug. 26, 2016, 1:20 p.m. UTC
  Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
declare the list of kernel modules required to run properly.

Today, most PCI drivers require uio/vfio.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---

In this RFC, I supposed that all PCI drivers require a the loading of a
uio/vfio module (except mlx*), this may be wrong.
Comments are welcome!


 buildtools/pmdinfogen/pmdinfogen.c      |  1 +
 buildtools/pmdinfogen/pmdinfogen.h      |  1 +
 drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
 drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
 drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
 drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
 drivers/net/e1000/em_ethdev.c           |  2 ++
 drivers/net/e1000/igb_ethdev.c          |  4 ++++
 drivers/net/ena/ena_ethdev.c            |  2 ++
 drivers/net/enic/enic_ethdev.c          |  2 ++
 drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
 drivers/net/i40e/i40e_ethdev.c          |  2 ++
 drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
 drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
 drivers/net/mlx4/mlx4.c                 |  2 ++
 drivers/net/mlx5/mlx5.c                 |  3 +++
 drivers/net/nfp/nfp_net.c               |  2 ++
 drivers/net/qede/qede_ethdev.c          |  4 ++++
 drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
 drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
 drivers/net/virtio/virtio_ethdev.c      |  2 ++
 drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
 lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
 tools/dpdk-pmdinfo.py                   |  5 ++++-
 24 files changed, 69 insertions(+), 1 deletion(-)
  

Comments

Matej Vido Aug. 30, 2016, 8:40 a.m. UTC | #1
On 26.08.2016 15:20, Olivier Matz wrote:

> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
> declare the list of kernel modules required to run properly.
>
> Today, most PCI drivers require uio/vfio.
>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>
> In this RFC, I supposed that all PCI drivers require a the loading of a
> uio/vfio module (except mlx*), this may be wrong.
> Comments are welcome!
>
>
>   buildtools/pmdinfogen/pmdinfogen.c      |  1 +
>   buildtools/pmdinfogen/pmdinfogen.h      |  1 +
>   drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
>   drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
>   drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
>   drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
>   drivers/net/e1000/em_ethdev.c           |  2 ++
>   drivers/net/e1000/igb_ethdev.c          |  4 ++++
>   drivers/net/ena/ena_ethdev.c            |  2 ++
>   drivers/net/enic/enic_ethdev.c          |  2 ++
>   drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
>   drivers/net/i40e/i40e_ethdev.c          |  2 ++
>   drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
>   drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
>   drivers/net/mlx4/mlx4.c                 |  2 ++
>   drivers/net/mlx5/mlx5.c                 |  3 +++
>   drivers/net/nfp/nfp_net.c               |  2 ++
>   drivers/net/qede/qede_ethdev.c          |  4 ++++
>   drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
>   drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
>   drivers/net/virtio/virtio_ethdev.c      |  2 ++
>   drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
>   lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
>   tools/dpdk-pmdinfo.py                   |  5 ++++-
>   24 files changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/buildtools/pmdinfogen/pmdinfogen.c b/buildtools/pmdinfogen/pmdinfogen.c
> index e1bf2e4..1e5b6f3 100644
> --- a/buildtools/pmdinfogen/pmdinfogen.c
> +++ b/buildtools/pmdinfogen/pmdinfogen.c
> @@ -269,6 +269,7 @@ struct opt_tag {
>   
>   static const struct opt_tag opt_tags[] = {
>   	{"_param_string_export", "params"},
> +	{"_kmod_dep_export", "kmod"},
>   };
>   
>   static int complete_pmd_entry(struct elf_info *info, struct pmd_driver *drv)
> diff --git a/buildtools/pmdinfogen/pmdinfogen.h b/buildtools/pmdinfogen/pmdinfogen.h
> index 1da2966..2fab2aa 100644
> --- a/buildtools/pmdinfogen/pmdinfogen.h
> +++ b/buildtools/pmdinfogen/pmdinfogen.h
> @@ -85,6 +85,7 @@ else \
>   
>   enum opt_params {
>   	PMD_PARAM_STRING = 0,
> +	PMD_KMOD_DEP,
>   	PMD_OPT_MAX
>   };
[..]
>   
>
> diff --git a/drivers/net/szedata2/rte_eth_szedata2.c b/drivers/net/szedata2/rte_eth_szedata2.c
> index 483d789..409e71f 100644
> --- a/drivers/net/szedata2/rte_eth_szedata2.c
> +++ b/drivers/net/szedata2/rte_eth_szedata2.c
> @@ -1602,3 +1602,5 @@ static struct rte_driver rte_szedata2_driver = {
>   
>   PMD_REGISTER_DRIVER(rte_szedata2_driver, RTE_SZEDATA2_DRIVER_NAME);
>   DRIVER_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table);
> +DRIVER_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
Hi Olivier,

szedata2 doesn't require uio/vfio modules. Instead the following lines 
could be used:

+DRIVER_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME,
+	"combo6core,combov3,szedata2,szedata2_cv3");


Thanks,
Matej

[..]
> diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
> index 95789f9..b721dc3 100644
> --- a/lib/librte_eal/common/include/rte_dev.h
> +++ b/lib/librte_eal/common/include/rte_dev.h
> @@ -203,6 +203,20 @@ RTE_STR(table)
>   static const char DRV_EXP_TAG(name, param_string_export)[] \
>   __attribute__((used)) = str
>   
> +/**
> + * Advertise the list of kernel modules required to run this driver
> + *
> + * This string list the name of kernel modules, separated by commas. The
> + * order is important. If several modules lists are possible, they are
> + * separated by colons.
> + *
> + * Example: "uio,igb_uio:uio,uio_pci_generic" means either "uio,igb_uio"
> + * or "uio,uio_pci_generic".
> + */
> +#define DRIVER_REGISTER_KMOD_DEP(name, str) \
> +static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
> +__attribute__((used)) = str
> +
>   #ifdef __cplusplus
>   }
>   #endif
> diff --git a/tools/dpdk-pmdinfo.py b/tools/dpdk-pmdinfo.py
> index 3db9819..17bfed4 100755
> --- a/tools/dpdk-pmdinfo.py
> +++ b/tools/dpdk-pmdinfo.py
> @@ -312,7 +312,10 @@ class ReadElf(object):
>           global raw_output
>           global pcidb
>   
> -        optional_pmd_info = [{'id': 'params', 'tag': 'PMD PARAMETERS'}]
> +        optional_pmd_info = [
> +            {'id': 'params', 'tag': 'PMD PARAMETERS'},
> +            {'id': 'kmod', 'tag': 'PMD KMOD DEPENDENCIES'}
> +        ]
>   
>           i = mystring.index("=")
>           mystring = mystring[i + 2:]
  
Olivier Matz Aug. 30, 2016, 8:56 a.m. UTC | #2
Hi Matej,

On 08/30/2016 10:40 AM, Matej Vido wrote:
> On 26.08.2016 15:20, Olivier Matz wrote:
> 
>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
>> declare the list of kernel modules required to run properly.
>>
>> Today, most PCI drivers require uio/vfio.
>>
>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>> ---
> [..]
>>  
>> diff --git a/drivers/net/szedata2/rte_eth_szedata2.c
>> b/drivers/net/szedata2/rte_eth_szedata2.c
>> index 483d789..409e71f 100644
>> --- a/drivers/net/szedata2/rte_eth_szedata2.c
>> +++ b/drivers/net/szedata2/rte_eth_szedata2.c
>> @@ -1602,3 +1602,5 @@ static struct rte_driver rte_szedata2_driver = {
>>     PMD_REGISTER_DRIVER(rte_szedata2_driver, RTE_SZEDATA2_DRIVER_NAME);
>>   DRIVER_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME,
>> rte_szedata2_pci_id_table);
>> +DRIVER_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME,
>> +    "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> Hi Olivier,
> 
> szedata2 doesn't require uio/vfio modules. Instead the following lines
> could be used:
> 
> +DRIVER_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME,
> +    "combo6core,combov3,szedata2,szedata2_cv3");
> 

ok, I will update it for next revision, thanks !

Olivier
  
Neil Horman Aug. 30, 2016, 1:23 p.m. UTC | #3
On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
> declare the list of kernel modules required to run properly.
> 
> Today, most PCI drivers require uio/vfio.
> 
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> 
> ---
> In this RFC, I supposed that all PCI drivers require a the loading of a
> uio/vfio module (except mlx*), this may be wrong.
> Comments are welcome!
> 
> 
>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
>  drivers/net/e1000/em_ethdev.c           |  2 ++
>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
>  drivers/net/ena/ena_ethdev.c            |  2 ++
>  drivers/net/enic/enic_ethdev.c          |  2 ++
>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
>  drivers/net/mlx4/mlx4.c                 |  2 ++
>  drivers/net/mlx5/mlx5.c                 |  3 +++
>  drivers/net/nfp/nfp_net.c               |  2 ++
>  drivers/net/qede/qede_ethdev.c          |  4 ++++
>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
>  tools/dpdk-pmdinfo.py                   |  5 ++++-
>  24 files changed, 69 insertions(+), 1 deletion(-)
> 

Generally speaking, I like the idea, it makes sense to me in terms of using
pmdinfo to export this information

That said, This may need to be a set of macros.  By that I mean (and correct me
if I'm wrong here), but the relationship between pmd's and kernel modules is in
some cases, more complex than a 'requires' or 'depends' relationship.  That is
to say, some pmd may need user space hardware access, but can use either uio OR
vfio, but doesn't need both, and can continue to function if only one is
available.  Other PMD's may be able to use vfio or uio, but can still function
without either.  And some, as your patch implements, simply require one or the
other to function.  As such it seems like you may want a few macros, in the form
of:

DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading, ignore any
failures 
DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be loaded after
request macro completes, fail if any are not loaded

Thats just spitballing, mind you, theres probably a better way to do it, but the
idea is to list a set of modules you would like to have, and then create a
parsable syntax to describe the modules that need to be loaded after the request
is complete so that you can accurately codify the situations I described above.

Neil

> diff --git a/buildtools/pmdinfogen/pmdinfogen.c b/buildtools/pmdinfogen/pmdinfogen.c
> index e1bf2e4..1e5b6f3 100644
> --- a/buildtools/pmdinfogen/pmdinfogen.c
> +++ b/buildtools/pmdinfogen/pmdinfogen.c
> @@ -269,6 +269,7 @@ struct opt_tag {
>  
>  static const struct opt_tag opt_tags[] = {
>  	{"_param_string_export", "params"},
> +	{"_kmod_dep_export", "kmod"},
>  };
>  
>  static int complete_pmd_entry(struct elf_info *info, struct pmd_driver *drv)
> diff --git a/buildtools/pmdinfogen/pmdinfogen.h b/buildtools/pmdinfogen/pmdinfogen.h
> index 1da2966..2fab2aa 100644
> --- a/buildtools/pmdinfogen/pmdinfogen.h
> +++ b/buildtools/pmdinfogen/pmdinfogen.h
> @@ -85,6 +85,7 @@ else \
>  
>  enum opt_params {
>  	PMD_PARAM_STRING = 0,
> +	PMD_KMOD_DEP,
>  	PMD_OPT_MAX
>  };
>  
> diff --git a/drivers/crypto/qat/rte_qat_cryptodev.c b/drivers/crypto/qat/rte_qat_cryptodev.c
> index 82ab047..fc62be9 100644
> --- a/drivers/crypto/qat/rte_qat_cryptodev.c
> +++ b/drivers/crypto/qat/rte_qat_cryptodev.c
> @@ -135,4 +135,6 @@ static struct rte_driver pmd_qat_drv = {
>  
>  PMD_REGISTER_DRIVER(pmd_qat_drv, CRYPTODEV_NAME_QAT_SYM_PMD);
>  DRIVER_REGISTER_PCI_TABLE(CRYPTODEV_NAME_QAT_SYM_PMD, pci_id_qat_map);
> +DRIVER_REGISTER_KMOD_DEP(CRYPTODEV_NAME_QAT_SYM_PMD,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  
> diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c
> index f3ab355..ba8831a 100644
> --- a/drivers/net/bnx2x/bnx2x_ethdev.c
> +++ b/drivers/net/bnx2x/bnx2x_ethdev.c
> @@ -667,5 +667,9 @@ static struct rte_driver rte_bnx2xvf_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_bnx2x_driver, bnx2x);
>  DRIVER_REGISTER_PCI_TABLE(bnx2x, pci_id_bnx2x_map);
> +DRIVER_REGISTER_KMOD_DEP(bnx2x,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  PMD_REGISTER_DRIVER(rte_bnx2xvf_driver, bnx2xvf);
>  DRIVER_REGISTER_PCI_TABLE(bnx2xvf, pci_id_bnx2xvf_map);
> +DRIVER_REGISTER_KMOD_DEP(bnx2xvf,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
> index 3795fac..5c6c7b5 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -1068,3 +1068,5 @@ static struct rte_driver bnxt_pmd_drv = {
>  
>  PMD_REGISTER_DRIVER(bnxt_pmd_drv, bnxt);
>  DRIVER_REGISTER_PCI_TABLE(bnxt, bnxt_pci_id_map);
> +DRIVER_REGISTER_KMOD_DEP(bnxt,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
> index 9208a61..cea2741 100644
> --- a/drivers/net/cxgbe/cxgbe_ethdev.c
> +++ b/drivers/net/cxgbe/cxgbe_ethdev.c
> @@ -1068,4 +1068,6 @@ static struct rte_driver rte_cxgbe_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_cxgbe_driver, cxgb4);
>  DRIVER_REGISTER_PCI_TABLE(cxgb4, cxgb4_pci_tbl);
> +DRIVER_REGISTER_KMOD_DEP(cxgb4,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  
> diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
> index ad104ed..bd0d0ea 100644
> --- a/drivers/net/e1000/em_ethdev.c
> +++ b/drivers/net/e1000/em_ethdev.c
> @@ -1806,3 +1806,5 @@ struct rte_driver em_pmd_drv = {
>  
>  PMD_REGISTER_DRIVER(em_pmd_drv, em);
>  DRIVER_REGISTER_PCI_TABLE(em, pci_id_em_map);
> +DRIVER_REGISTER_KMOD_DEP(em,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
> index 4e9e6a3..a3dfbfe 100644
> --- a/drivers/net/e1000/igb_ethdev.c
> +++ b/drivers/net/e1000/igb_ethdev.c
> @@ -5257,5 +5257,9 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
>  
>  PMD_REGISTER_DRIVER(pmd_igb_drv, igb);
>  DRIVER_REGISTER_PCI_TABLE(igb, pci_id_igb_map);
> +DRIVER_REGISTER_KMOD_DEP(igb,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  PMD_REGISTER_DRIVER(pmd_igbvf_drv, igbvf);
>  DRIVER_REGISTER_PCI_TABLE(igbvf, pci_id_igbvf_map);
> +DRIVER_REGISTER_KMOD_DEP(igbvf,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
> index ac0803d..a45d60c 100644
> --- a/drivers/net/ena/ena_ethdev.c
> +++ b/drivers/net/ena/ena_ethdev.c
> @@ -1709,3 +1709,5 @@ struct rte_driver ena_pmd_drv = {
>  
>  PMD_REGISTER_DRIVER(ena_pmd_drv, ena);
>  DRIVER_REGISTER_PCI_TABLE(ena, pci_id_ena_map);
> +DRIVER_REGISTER_KMOD_DEP(ena,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c
> index 47b07c9..a1b8abc 100644
> --- a/drivers/net/enic/enic_ethdev.c
> +++ b/drivers/net/enic/enic_ethdev.c
> @@ -642,3 +642,5 @@ static struct rte_driver rte_enic_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_enic_driver, enic);
>  DRIVER_REGISTER_PCI_TABLE(enic, pci_id_enic_map);
> +DRIVER_REGISTER_KMOD_DEP(enic,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
> index 01f4a72..391ccd7 100644
> --- a/drivers/net/fm10k/fm10k_ethdev.c
> +++ b/drivers/net/fm10k/fm10k_ethdev.c
> @@ -3086,3 +3086,5 @@ static struct rte_driver rte_fm10k_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_fm10k_driver, fm10k);
>  DRIVER_REGISTER_PCI_TABLE(fm10k, pci_id_fm10k_map);
> +DRIVER_REGISTER_KMOD_DEP(fm10k,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index d0aeb70..a1466aa 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -723,6 +723,8 @@ static struct rte_driver rte_i40e_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_i40e_driver, i40e);
>  DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map);
> +DRIVER_REGISTER_KMOD_DEP(i40e,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  
>  /*
>   * Initialize registers for flexible payload, which should be set by NVM.
> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
> index a616ae0..61be44a 100644
> --- a/drivers/net/i40e/i40e_ethdev_vf.c
> +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> @@ -1586,6 +1586,8 @@ static struct rte_driver rte_i40evf_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_i40evf_driver, i40evf);
>  DRIVER_REGISTER_PCI_TABLE(i40evf, pci_id_i40evf_map);
> +DRIVER_REGISTER_KMOD_DEP(i40evf,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  
>  static int
>  i40evf_dev_configure(struct rte_eth_dev *dev)
> diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
> index fb618ef..e353d7a 100644
> --- a/drivers/net/ixgbe/ixgbe_ethdev.c
> +++ b/drivers/net/ixgbe/ixgbe_ethdev.c
> @@ -7421,5 +7421,9 @@ static struct rte_driver rte_ixgbevf_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_ixgbe_driver, ixgbe);
>  DRIVER_REGISTER_PCI_TABLE(ixgbe, pci_id_ixgbe_map);
> +DRIVER_REGISTER_KMOD_DEP(ixgbe,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  PMD_REGISTER_DRIVER(rte_ixgbevf_driver, ixgbevf);
>  DRIVER_REGISTER_PCI_TABLE(ixgbevf, pci_id_ixgbevf_map);
> +DRIVER_REGISTER_KMOD_DEP(ixgbevf,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index 304c846..d8f6905 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -5948,3 +5948,5 @@ static struct rte_driver rte_mlx4_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_mlx4_driver, mlx4);
>  DRIVER_REGISTER_PCI_TABLE(mlx4, mlx4_pci_id_map);
> +DRIVER_REGISTER_KMOD_DEP(mlx4,
> +	"ib_uverbs,mlx4_core,mlx4_en,mlx4_ib");
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index d96a9af..29d7332 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -763,3 +763,6 @@ static struct rte_driver rte_mlx5_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5);
>  DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map);
> +DRIVER_REGISTER_KMOD_DEP(mlx5,
> +	"ptp,inet_lro,ib_sa,ib_mad,ib_netlink,ib_addr,"
> +	"ib_core,ib_uverbs,mlx5_core,mlx5_ib");
> diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
> index 82e3e4e..f4c8a39 100644
> --- a/drivers/net/nfp/nfp_net.c
> +++ b/drivers/net/nfp/nfp_net.c
> @@ -2488,6 +2488,8 @@ static struct rte_driver rte_nfp_net_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_nfp_net_driver, nfp);
>  DRIVER_REGISTER_PCI_TABLE(nfp, pci_id_nfp_net_map);
> +DRIVER_REGISTER_KMOD_DEP(nfp,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  
>  /*
>   * Local variables:
> diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
> index 82e44b8..a3c6b44 100644
> --- a/drivers/net/qede/qede_ethdev.c
> +++ b/drivers/net/qede/qede_ethdev.c
> @@ -1530,5 +1530,9 @@ static struct rte_driver rte_qede_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_qede_driver, qede);
>  DRIVER_REGISTER_PCI_TABLE(qede, pci_id_qede_map);
> +DRIVER_REGISTER_KMOD_DEP(qede,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
>  PMD_REGISTER_DRIVER(rte_qedevf_driver, qedevf);
>  DRIVER_REGISTER_PCI_TABLE(qedevf, pci_id_qedevf_map);
> +DRIVER_REGISTER_KMOD_DEP(qedevf,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/szedata2/rte_eth_szedata2.c b/drivers/net/szedata2/rte_eth_szedata2.c
> index 483d789..409e71f 100644
> --- a/drivers/net/szedata2/rte_eth_szedata2.c
> +++ b/drivers/net/szedata2/rte_eth_szedata2.c
> @@ -1602,3 +1602,5 @@ static struct rte_driver rte_szedata2_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_szedata2_driver, RTE_SZEDATA2_DRIVER_NAME);
>  DRIVER_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table);
> +DRIVER_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/thunderx/nicvf_ethdev.c b/drivers/net/thunderx/nicvf_ethdev.c
> index 4f875c0..8c33df2 100644
> --- a/drivers/net/thunderx/nicvf_ethdev.c
> +++ b/drivers/net/thunderx/nicvf_ethdev.c
> @@ -1785,3 +1785,5 @@ static struct rte_driver rte_nicvf_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_nicvf_driver, thunderx_nicvf);
>  DRIVER_REGISTER_PCI_TABLE(thunderx_nicvf, pci_id_nicvf_map);
> +DRIVER_REGISTER_KMOD_DEP(thunderx_nicvf,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 07d6449..f65b9a4 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1570,3 +1570,5 @@ static struct rte_driver rte_virtio_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_virtio_driver, virtio_net);
>  DRIVER_REGISTER_PCI_TABLE(virtio_net, pci_id_virtio_map);
> +DRIVER_REGISTER_KMOD_DEP(virtio_net,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> index 5874215..d2d07ad 100644
> --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> @@ -955,3 +955,5 @@ static struct rte_driver rte_vmxnet3_driver = {
>  
>  PMD_REGISTER_DRIVER(rte_vmxnet3_driver, vmxnet3);
>  DRIVER_REGISTER_PCI_TABLE(vmxnet3, pci_id_vmxnet3_map);
> +DRIVER_REGISTER_KMOD_DEP(vmxnet3,
> +	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
> diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
> index 95789f9..b721dc3 100644
> --- a/lib/librte_eal/common/include/rte_dev.h
> +++ b/lib/librte_eal/common/include/rte_dev.h
> @@ -203,6 +203,20 @@ RTE_STR(table)
>  static const char DRV_EXP_TAG(name, param_string_export)[] \
>  __attribute__((used)) = str
>  
> +/**
> + * Advertise the list of kernel modules required to run this driver
> + *
> + * This string list the name of kernel modules, separated by commas. The
> + * order is important. If several modules lists are possible, they are
> + * separated by colons.
> + *
> + * Example: "uio,igb_uio:uio,uio_pci_generic" means either "uio,igb_uio"
> + * or "uio,uio_pci_generic".
> + */
> +#define DRIVER_REGISTER_KMOD_DEP(name, str) \
> +static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
> +__attribute__((used)) = str
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/tools/dpdk-pmdinfo.py b/tools/dpdk-pmdinfo.py
> index 3db9819..17bfed4 100755
> --- a/tools/dpdk-pmdinfo.py
> +++ b/tools/dpdk-pmdinfo.py
> @@ -312,7 +312,10 @@ class ReadElf(object):
>          global raw_output
>          global pcidb
>  
> -        optional_pmd_info = [{'id': 'params', 'tag': 'PMD PARAMETERS'}]
> +        optional_pmd_info = [
> +            {'id': 'params', 'tag': 'PMD PARAMETERS'},
> +            {'id': 'kmod', 'tag': 'PMD KMOD DEPENDENCIES'}
> +        ]
>  
>          i = mystring.index("=")
>          mystring = mystring[i + 2:]
  
Olivier Matz Aug. 31, 2016, 9:21 a.m. UTC | #4
Hi Neil,

On 08/30/2016 03:23 PM, Neil Horman wrote:
> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
>> declare the list of kernel modules required to run properly.
>>
>> Today, most PCI drivers require uio/vfio.
>>
>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>>
>> ---
>> In this RFC, I supposed that all PCI drivers require a the loading of a
>> uio/vfio module (except mlx*), this may be wrong.
>> Comments are welcome!
>>
>>
>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
>>  drivers/net/e1000/em_ethdev.c           |  2 ++
>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
>>  drivers/net/ena/ena_ethdev.c            |  2 ++
>>  drivers/net/enic/enic_ethdev.c          |  2 ++
>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
>>  drivers/net/mlx4/mlx4.c                 |  2 ++
>>  drivers/net/mlx5/mlx5.c                 |  3 +++
>>  drivers/net/nfp/nfp_net.c               |  2 ++
>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
>>  24 files changed, 69 insertions(+), 1 deletion(-)
>>
> 
> Generally speaking, I like the idea, it makes sense to me in terms of using
> pmdinfo to export this information
> 
> That said, This may need to be a set of macros.  By that I mean (and correct me
> if I'm wrong here), but the relationship between pmd's and kernel modules is in
> some cases, more complex than a 'requires' or 'depends' relationship.  That is
> to say, some pmd may need user space hardware access, but can use either uio OR
> vfio, but doesn't need both, and can continue to function if only one is
> available.  Other PMD's may be able to use vfio or uio, but can still function
> without either.  And some, as your patch implements, simply require one or the
> other to function.  As such it seems like you may want a few macros, in the form
> of:
> 
> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading, ignore any
> failures 
> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be loaded after
> request macro completes, fail if any are not loaded
> 
> Thats just spitballing, mind you, theres probably a better way to do it, but the
> idea is to list a set of modules you would like to have, and then create a
> parsable syntax to describe the modules that need to be loaded after the request
> is complete so that you can accurately codify the situations I described above.

Thank you for your feedback.
However, I'm not sure I'm perfectly getting what you suggest.

Do you think some PMDs could request a kernel module without really
requiring it? Do you have an example in mind?

The syntax I've submitted lets you define several lists of modules, so
that the user or the script that starts the application can decide which
kmod list is better according to the environment.

For example, most drivers will advertise
"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user or script
will have to choose between loading:
- uio igb_uio
- uio uio_pci_generic
- vfio vfio-pci


Olivier
  
Neil Horman Aug. 31, 2016, 1:27 p.m. UTC | #5
On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
> Hi Neil,
> 
> On 08/30/2016 03:23 PM, Neil Horman wrote:
> > On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> >> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
> >> declare the list of kernel modules required to run properly.
> >>
> >> Today, most PCI drivers require uio/vfio.
> >>
> >> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> >>
> >> ---
> >> In this RFC, I supposed that all PCI drivers require a the loading of a
> >> uio/vfio module (except mlx*), this may be wrong.
> >> Comments are welcome!
> >>
> >>
> >>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> >>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> >>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> >>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> >>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> >>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> >>  drivers/net/e1000/em_ethdev.c           |  2 ++
> >>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> >>  drivers/net/ena/ena_ethdev.c            |  2 ++
> >>  drivers/net/enic/enic_ethdev.c          |  2 ++
> >>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> >>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> >>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> >>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> >>  drivers/net/mlx4/mlx4.c                 |  2 ++
> >>  drivers/net/mlx5/mlx5.c                 |  3 +++
> >>  drivers/net/nfp/nfp_net.c               |  2 ++
> >>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> >>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> >>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> >>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> >>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> >>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
> >>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> >>  24 files changed, 69 insertions(+), 1 deletion(-)
> >>
> > 
> > Generally speaking, I like the idea, it makes sense to me in terms of using
> > pmdinfo to export this information
> > 
> > That said, This may need to be a set of macros.  By that I mean (and correct me
> > if I'm wrong here), but the relationship between pmd's and kernel modules is in
> > some cases, more complex than a 'requires' or 'depends' relationship.  That is
> > to say, some pmd may need user space hardware access, but can use either uio OR
> > vfio, but doesn't need both, and can continue to function if only one is
> > available.  Other PMD's may be able to use vfio or uio, but can still function
> > without either.  And some, as your patch implements, simply require one or the
> > other to function.  As such it seems like you may want a few macros, in the form
> > of:
> > 
> > DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading, ignore any
> > failures 
> > DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be loaded after
> > request macro completes, fail if any are not loaded
> > 
> > Thats just spitballing, mind you, theres probably a better way to do it, but the
> > idea is to list a set of modules you would like to have, and then create a
> > parsable syntax to describe the modules that need to be loaded after the request
> > is complete so that you can accurately codify the situations I described above.
> 
> Thank you for your feedback.
> However, I'm not sure I'm perfectly getting what you suggest.
> 
> Do you think some PMDs could request a kernel module without really
> requiring it? Do you have an example in mind?
> 
Yes, thats precisely it.  The most clear example I could think of (though I'm
not sure if any pmd currently supports this), is a pmd that supports both UIO
and VFIO communication with the kernel.  Such a PMD requires that one of those
two modules be loaded, but only one (i.e. both are not required), so if only the
uio kernel module loads is a success case, likewise if only the vfio module
loads can be treated as success.  Both loading are clearly successful.  Only if
neither load do we have a failure case.  I'm suggesting that the grammer that
your exports define should take those cases into account.  Its not always as
simple as "I must have the following modules"

> The syntax I've submitted lets you define several lists of modules, so
> that the user or the script that starts the application can decide which
> kmod list is better according to the environment.
> 
If you have a human intervening in the module load process, sure, then its fine.
But it seems that this particular feature that you're implemnting might have
automated uses.  That is to say the dpdk core library might be interested in
parsing this particular information to direct module autoloading, and if thats
desireable then you need to define these lists such that you can codify failure
and success conditions.

> For example, most drivers will advertise
> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user or script
> will have to choose between loading:
> - uio igb_uio
> - uio uio_pci_generic
> - vfio vfio-pci
> 
Oh, I see, so your list is a colon delimited list of module load sets, where at
least one set must succeed by loading all modules in its set, but the failure of
any one set isn't fatal to the process?  e.g. a string like this:

uio,igb_uio:vfio,vfio-pci

could be interpreted to mean "I must load (uio AND igb_uio) OR (vfio AND
vfio-pci).  If the evaluation of that statement results in false, then the
operation fails, otherwise it succedes.

If thats the case, then, apologies, we're on the same page, and this will work
just fine.

Best
Neil


> 
> Olivier
>
  
Olivier Matz Aug. 31, 2016, 1:39 p.m. UTC | #6
Hi Neil,

On 08/31/2016 03:27 PM, Neil Horman wrote:
> On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
>> Hi Neil,
>>
>> On 08/30/2016 03:23 PM, Neil Horman wrote:
>>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
>>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
>>>> declare the list of kernel modules required to run properly.
>>>>
>>>> Today, most PCI drivers require uio/vfio.
>>>>
>>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
>>>>
>>>> ---
>>>> In this RFC, I supposed that all PCI drivers require a the loading of a
>>>> uio/vfio module (except mlx*), this may be wrong.
>>>> Comments are welcome!
>>>>
>>>>
>>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
>>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
>>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
>>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
>>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
>>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
>>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
>>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
>>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
>>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
>>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
>>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
>>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
>>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
>>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
>>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
>>>>  drivers/net/nfp/nfp_net.c               |  2 ++
>>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
>>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
>>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
>>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
>>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
>>>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
>>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
>>>>  24 files changed, 69 insertions(+), 1 deletion(-)
>>>>
>>>
>>> Generally speaking, I like the idea, it makes sense to me in terms of using
>>> pmdinfo to export this information
>>>
>>> That said, This may need to be a set of macros.  By that I mean (and correct me
>>> if I'm wrong here), but the relationship between pmd's and kernel modules is in
>>> some cases, more complex than a 'requires' or 'depends' relationship.  That is
>>> to say, some pmd may need user space hardware access, but can use either uio OR
>>> vfio, but doesn't need both, and can continue to function if only one is
>>> available.  Other PMD's may be able to use vfio or uio, but can still function
>>> without either.  And some, as your patch implements, simply require one or the
>>> other to function.  As such it seems like you may want a few macros, in the form
>>> of:
>>>
>>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading, ignore any
>>> failures 
>>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be loaded after
>>> request macro completes, fail if any are not loaded
>>>
>>> Thats just spitballing, mind you, theres probably a better way to do it, but the
>>> idea is to list a set of modules you would like to have, and then create a
>>> parsable syntax to describe the modules that need to be loaded after the request
>>> is complete so that you can accurately codify the situations I described above.
>>
>> Thank you for your feedback.
>> However, I'm not sure I'm perfectly getting what you suggest.
>>
>> Do you think some PMDs could request a kernel module without really
>> requiring it? Do you have an example in mind?
>>
> Yes, thats precisely it.  The most clear example I could think of (though I'm
> not sure if any pmd currently supports this), is a pmd that supports both UIO
> and VFIO communication with the kernel.  Such a PMD requires that one of those
> two modules be loaded, but only one (i.e. both are not required), so if only the
> uio kernel module loads is a success case, likewise if only the vfio module
> loads can be treated as success.  Both loading are clearly successful.  Only if
> neither load do we have a failure case.  I'm suggesting that the grammer that
> your exports define should take those cases into account.  Its not always as
> simple as "I must have the following modules"
> 
>> The syntax I've submitted lets you define several lists of modules, so
>> that the user or the script that starts the application can decide which
>> kmod list is better according to the environment.
>>
> If you have a human intervening in the module load process, sure, then its fine.
> But it seems that this particular feature that you're implemnting might have
> automated uses.  That is to say the dpdk core library might be interested in
> parsing this particular information to direct module autoloading, and if thats
> desireable then you need to define these lists such that you can codify failure
> and success conditions.
> 
>> For example, most drivers will advertise
>> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user or script
>> will have to choose between loading:
>> - uio igb_uio
>> - uio uio_pci_generic
>> - vfio vfio-pci
>>
> Oh, I see, so your list is a colon delimited list of module load sets, where at
> least one set must succeed by loading all modules in its set, but the failure of
> any one set isn't fatal to the process?  e.g. a string like this:
> 
> uio,igb_uio:vfio,vfio-pci
> 
> could be interpreted to mean "I must load (uio AND igb_uio) OR (vfio AND
> vfio-pci).  If the evaluation of that statement results in false, then the
> operation fails, otherwise it succedes.
> 
> If thats the case, then, apologies, we're on the same page, and this will work
> just fine.

Yep, that's the idea.

Colon and commas are the best separators I've thought about, but any
idea to make the syntax clearer is welcome ;)

Maybe a syntax like is clearer:
  "(mod1 & mod2)|(mod3 & mod4)" ?
But it would let the user think that more complex expressions are valid,
like "(mod1 & (mod2 | mod3)) | mod4", which is probably overkill.

Regards,
Olivier
  
Fiona Trahe Sept. 1, 2016, 12:55 p.m. UTC | #7
Hi Neil and Olivier,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> Sent: Wednesday, August 31, 2016 2:40 PM
> To: Neil Horman <nhorman@tuxdriver.com>
> Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> in pmdinfo
> 
> Hi Neil,
> 
> On 08/31/2016 03:27 PM, Neil Horman wrote:
> > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
> >> Hi Neil,
> >>
> >> On 08/30/2016 03:23 PM, Neil Horman wrote:
> >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
> >>>> declare the list of kernel modules required to run properly.
> >>>>
> >>>> Today, most PCI drivers require uio/vfio.
> >>>>
> >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> >>>>
> >>>> ---
> >>>> In this RFC, I supposed that all PCI drivers require a the loading of a
> >>>> uio/vfio module (except mlx*), this may be wrong.
> >>>> Comments are welcome!
> >>>>
> >>>>
> >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> >>>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
> >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> >>>>
> >>>
> >>> Generally speaking, I like the idea, it makes sense to me in terms of using
> >>> pmdinfo to export this information
> >>>
> >>> That said, This may need to be a set of macros.  By that I mean (and correct
> me
> >>> if I'm wrong here), but the relationship between pmd's and kernel modules
> is in
> >>> some cases, more complex than a 'requires' or 'depends' relationship.  That
> is
> >>> to say, some pmd may need user space hardware access, but can use either
> uio OR
> >>> vfio, but doesn't need both, and can continue to function if only one is
> >>> available.  Other PMD's may be able to use vfio or uio, but can still function
> >>> without either.  And some, as your patch implements, simply require one or
> the
> >>> other to function.  As such it seems like you may want a few macros, in the
> form
> >>> of:
> >>>
> >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading,
> ignore any
> >>> failures
> >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be
> loaded after
> >>> request macro completes, fail if any are not loaded
> >>>
> >>> Thats just spitballing, mind you, theres probably a better way to do it, but
> the
> >>> idea is to list a set of modules you would like to have, and then create a
> >>> parsable syntax to describe the modules that need to be loaded after the
> request
> >>> is complete so that you can accurately codify the situations I described
> above.
> >>
> >> Thank you for your feedback.
> >> However, I'm not sure I'm perfectly getting what you suggest.
> >>
> >> Do you think some PMDs could request a kernel module without really
> >> requiring it? Do you have an example in mind?
> >>
> > Yes, thats precisely it.  The most clear example I could think of (though I'm
> > not sure if any pmd currently supports this), is a pmd that supports both UIO
> > and VFIO communication with the kernel.  Such a PMD requires that one of
> those
> > two modules be loaded, but only one (i.e. both are not required), so if only
> the
> > uio kernel module loads is a success case, likewise if only the vfio module
> > loads can be treated as success.  Both loading are clearly successful.  Only if
> > neither load do we have a failure case.  I'm suggesting that the grammer that
> > your exports define should take those cases into account.  Its not always as
> > simple as "I must have the following modules"
> >
> >> The syntax I've submitted lets you define several lists of modules, so
> >> that the user or the script that starts the application can decide which
> >> kmod list is better according to the environment.
> >>
> > If you have a human intervening in the module load process, sure, then its
> fine.
> > But it seems that this particular feature that you're implemnting might have
> > automated uses.  That is to say the dpdk core library might be interested in
> > parsing this particular information to direct module autoloading, and if thats
> > desireable then you need to define these lists such that you can codify failure
> > and success conditions.
> >
> >> For example, most drivers will advertise
> >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user or script
> >> will have to choose between loading:
> >> - uio igb_uio
> >> - uio uio_pci_generic
> >> - vfio vfio-pci
> >>
> > Oh, I see, so your list is a colon delimited list of module load sets, where at
> > least one set must succeed by loading all modules in its set, but the failure of
> > any one set isn't fatal to the process?  e.g. a string like this:
> >
> > uio,igb_uio:vfio,vfio-pci
> >
> > could be interpreted to mean "I must load (uio AND igb_uio) OR (vfio AND
> > vfio-pci).  If the evaluation of that statement results in false, then the
> > operation fails, otherwise it succedes.
> >
> > If thats the case, then, apologies, we're on the same page, and this will work
> > just fine.
> 
> Yep, that's the idea.
> 
> Colon and commas are the best separators I've thought about, but any
> idea to make the syntax clearer is welcome ;)
> 
> Maybe a syntax like is clearer:
>   "(mod1 & mod2)|(mod3 & mod4)" ?
> But it would let the user think that more complex expressions are valid,
> like "(mod1 & (mod2 | mod3)) | mod4", which is probably overkill.
> 
> Regards,
> Olivier

This RFC seems like a good idea - and something the Intel QuickAssist PMD could benefit from.
However the (mod1 & mod2) can handle the QAT case better in my opinion.
i.e.
as well as needing one of 
* uio igb_uio
* uio uio_pci_generic
* vfio vfio-pci
QAT PMD also needs one of (depending on which physical device is plugged)
 * qat_dh895xcc
 * qat_c62x
 * qat_c3xxx

So the original syntax would result in a very long list of possible variations.
What really reflects the dependencies would be 
((uio & igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)) & (qat_dh895xcc | qat_c62x | qat_c3xxx)

Also the dependencies on a VM are different to a bare-metal installation, i.e. the qat_xxxx driver just 
needs to be loaded in the Host. So maybe this could be satisfied by a separate list?
DRIVER_REGISTER_KMOD_DEP()
DRIVER_REGISTER_KMOD_VM_DEP()

But maybe this is all too complex, and instead the feature should be considered as optional and 
not requiring all dependencies to be declared? 

Regards,
Fiona
  
Neil Horman Sept. 1, 2016, 5:35 p.m. UTC | #8
On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> Hi Neil and Olivier,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > Sent: Wednesday, August 31, 2016 2:40 PM
> > To: Neil Horman <nhorman@tuxdriver.com>
> > Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> > in pmdinfo
> > 
> > Hi Neil,
> > 
> > On 08/31/2016 03:27 PM, Neil Horman wrote:
> > > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
> > >> Hi Neil,
> > >>
> > >> On 08/30/2016 03:23 PM, Neil Horman wrote:
> > >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> > >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
> > >>>> declare the list of kernel modules required to run properly.
> > >>>>
> > >>>> Today, most PCI drivers require uio/vfio.
> > >>>>
> > >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > >>>>
> > >>>> ---
> > >>>> In this RFC, I supposed that all PCI drivers require a the loading of a
> > >>>> uio/vfio module (except mlx*), this may be wrong.
> > >>>> Comments are welcome!
> > >>>>
> > >>>>
> > >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> > >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> > >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> > >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> > >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> > >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> > >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> > >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> > >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> > >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> > >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> > >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> > >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> > >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> > >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> > >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> > >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> > >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> > >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> > >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> > >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> > >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> > >>>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
> > >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> > >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> > >>>>
> > >>>
> > >>> Generally speaking, I like the idea, it makes sense to me in terms of using
> > >>> pmdinfo to export this information
> > >>>
> > >>> That said, This may need to be a set of macros.  By that I mean (and correct
> > me
> > >>> if I'm wrong here), but the relationship between pmd's and kernel modules
> > is in
> > >>> some cases, more complex than a 'requires' or 'depends' relationship.  That
> > is
> > >>> to say, some pmd may need user space hardware access, but can use either
> > uio OR
> > >>> vfio, but doesn't need both, and can continue to function if only one is
> > >>> available.  Other PMD's may be able to use vfio or uio, but can still function
> > >>> without either.  And some, as your patch implements, simply require one or
> > the
> > >>> other to function.  As such it seems like you may want a few macros, in the
> > form
> > >>> of:
> > >>>
> > >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading,
> > ignore any
> > >>> failures
> > >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be
> > loaded after
> > >>> request macro completes, fail if any are not loaded
> > >>>
> > >>> Thats just spitballing, mind you, theres probably a better way to do it, but
> > the
> > >>> idea is to list a set of modules you would like to have, and then create a
> > >>> parsable syntax to describe the modules that need to be loaded after the
> > request
> > >>> is complete so that you can accurately codify the situations I described
> > above.
> > >>
> > >> Thank you for your feedback.
> > >> However, I'm not sure I'm perfectly getting what you suggest.
> > >>
> > >> Do you think some PMDs could request a kernel module without really
> > >> requiring it? Do you have an example in mind?
> > >>
> > > Yes, thats precisely it.  The most clear example I could think of (though I'm
> > > not sure if any pmd currently supports this), is a pmd that supports both UIO
> > > and VFIO communication with the kernel.  Such a PMD requires that one of
> > those
> > > two modules be loaded, but only one (i.e. both are not required), so if only
> > the
> > > uio kernel module loads is a success case, likewise if only the vfio module
> > > loads can be treated as success.  Both loading are clearly successful.  Only if
> > > neither load do we have a failure case.  I'm suggesting that the grammer that
> > > your exports define should take those cases into account.  Its not always as
> > > simple as "I must have the following modules"
> > >
> > >> The syntax I've submitted lets you define several lists of modules, so
> > >> that the user or the script that starts the application can decide which
> > >> kmod list is better according to the environment.
> > >>
> > > If you have a human intervening in the module load process, sure, then its
> > fine.
> > > But it seems that this particular feature that you're implemnting might have
> > > automated uses.  That is to say the dpdk core library might be interested in
> > > parsing this particular information to direct module autoloading, and if thats
> > > desireable then you need to define these lists such that you can codify failure
> > > and success conditions.
> > >
> > >> For example, most drivers will advertise
> > >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user or script
> > >> will have to choose between loading:
> > >> - uio igb_uio
> > >> - uio uio_pci_generic
> > >> - vfio vfio-pci
> > >>
> > > Oh, I see, so your list is a colon delimited list of module load sets, where at
> > > least one set must succeed by loading all modules in its set, but the failure of
> > > any one set isn't fatal to the process?  e.g. a string like this:
> > >
> > > uio,igb_uio:vfio,vfio-pci
> > >
> > > could be interpreted to mean "I must load (uio AND igb_uio) OR (vfio AND
> > > vfio-pci).  If the evaluation of that statement results in false, then the
> > > operation fails, otherwise it succedes.
> > >
> > > If thats the case, then, apologies, we're on the same page, and this will work
> > > just fine.
> > 
> > Yep, that's the idea.
> > 
> > Colon and commas are the best separators I've thought about, but any
> > idea to make the syntax clearer is welcome ;)
> > 
> > Maybe a syntax like is clearer:
> >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > But it would let the user think that more complex expressions are valid,
> > like "(mod1 & (mod2 | mod3)) | mod4", which is probably overkill.
> > 
> > Regards,
> > Olivier
> 
> This RFC seems like a good idea - and something the Intel QuickAssist PMD could benefit from.
> However the (mod1 & mod2) can handle the QAT case better in my opinion.
> i.e.
> as well as needing one of 
> * uio igb_uio
> * uio uio_pci_generic
> * vfio vfio-pci
> QAT PMD also needs one of (depending on which physical device is plugged)
>  * qat_dh895xcc
>  * qat_c62x
>  * qat_c3xxx
> 
> So the original syntax would result in a very long list of possible variations.
> What really reflects the dependencies would be 
> ((uio & igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)) & (qat_dh895xcc | qat_c62x | qat_c3xxx)
> 
Ah, I didn't consider that hardware specifics might create a use case where a
pmd must have one or more kernel modules available for hw support.  Perhaps it
is worthwhile to automate hardware support - that is to say, any module loading
script should automatically look at the pci table exported from a pmd, and, if
found, load any modules that claim support for that device:vendor tuple?  Though
that might break in the case of uio, if there are separate driver modules that
support native hardware and uio access.

> Also the dependencies on a VM are different to a bare-metal installation, i.e. the qat_xxxx driver just 
> needs to be loaded in the Host. So maybe this could be satisfied by a separate list?
> DRIVER_REGISTER_KMOD_DEP()
> DRIVER_REGISTER_KMOD_VM_DEP()
> 

This makes me a bit nervous, Ideally, nothing should have to know if its running
on bare metal or in a vm, we should try to avoid vm specific macros if possible.
Not sure what the alternative is yet, though.

> But maybe this is all too complex, and instead the feature should be considered as optional and 
> not requiring all dependencies to be declared? 
> 
> Regards,
> Fiona
> 
>
  
Stephen Hemminger Sept. 1, 2016, 5:41 p.m. UTC | #9
On Thu, 1 Sep 2016 13:35:19 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> > Hi Neil and Olivier,
> >   
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > > Sent: Wednesday, August 31, 2016 2:40 PM
> > > To: Neil Horman <nhorman@tuxdriver.com>
> > > Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> > > in pmdinfo
> > > 
> > > Hi Neil,
> > > 
> > > On 08/31/2016 03:27 PM, Neil Horman wrote:  
> > > > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:  
> > > >> Hi Neil,
> > > >>
> > > >> On 08/30/2016 03:23 PM, Neil Horman wrote:  
> > > >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:  
> > > >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
> > > >>>> declare the list of kernel modules required to run properly.
> > > >>>>
> > > >>>> Today, most PCI drivers require uio/vfio.
> > > >>>>
> > > >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > >>>>
> > > >>>> ---
> > > >>>> In this RFC, I supposed that all PCI drivers require a the loading of a
> > > >>>> uio/vfio module (except mlx*), this may be wrong.
> > > >>>> Comments are welcome!
> > > >>>>
> > > >>>>
> > > >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> > > >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> > > >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> > > >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> > > >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> > > >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> > > >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> > > >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> > > >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> > > >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> > > >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> > > >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> > > >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> > > >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> > > >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> > > >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> > > >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> > > >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> > > >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> > > >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> > > >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> > > >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> > > >>>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
> > > >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> > > >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> > > >>>>  
> > > >>>
> > > >>> Generally speaking, I like the idea, it makes sense to me in terms of using
> > > >>> pmdinfo to export this information
> > > >>>
> > > >>> That said, This may need to be a set of macros.  By that I mean (and correct  
> > > me  
> > > >>> if I'm wrong here), but the relationship between pmd's and kernel modules  
> > > is in  
> > > >>> some cases, more complex than a 'requires' or 'depends' relationship.  That  
> > > is  
> > > >>> to say, some pmd may need user space hardware access, but can use either  
> > > uio OR  
> > > >>> vfio, but doesn't need both, and can continue to function if only one is
> > > >>> available.  Other PMD's may be able to use vfio or uio, but can still function
> > > >>> without either.  And some, as your patch implements, simply require one or  
> > > the  
> > > >>> other to function.  As such it seems like you may want a few macros, in the  
> > > form  
> > > >>> of:
> > > >>>
> > > >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading,  
> > > ignore any  
> > > >>> failures
> > > >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be  
> > > loaded after  
> > > >>> request macro completes, fail if any are not loaded
> > > >>>
> > > >>> Thats just spitballing, mind you, theres probably a better way to do it, but  
> > > the  
> > > >>> idea is to list a set of modules you would like to have, and then create a
> > > >>> parsable syntax to describe the modules that need to be loaded after the  
> > > request  
> > > >>> is complete so that you can accurately codify the situations I described  
> > > above.  
> > > >>
> > > >> Thank you for your feedback.
> > > >> However, I'm not sure I'm perfectly getting what you suggest.
> > > >>
> > > >> Do you think some PMDs could request a kernel module without really
> > > >> requiring it? Do you have an example in mind?
> > > >>  
> > > > Yes, thats precisely it.  The most clear example I could think of (though I'm
> > > > not sure if any pmd currently supports this), is a pmd that supports both UIO
> > > > and VFIO communication with the kernel.  Such a PMD requires that one of  
> > > those  
> > > > two modules be loaded, but only one (i.e. both are not required), so if only  
> > > the  
> > > > uio kernel module loads is a success case, likewise if only the vfio module
> > > > loads can be treated as success.  Both loading are clearly successful.  Only if
> > > > neither load do we have a failure case.  I'm suggesting that the grammer that
> > > > your exports define should take those cases into account.  Its not always as
> > > > simple as "I must have the following modules"
> > > >  
> > > >> The syntax I've submitted lets you define several lists of modules, so
> > > >> that the user or the script that starts the application can decide which
> > > >> kmod list is better according to the environment.
> > > >>  
> > > > If you have a human intervening in the module load process, sure, then its  
> > > fine.  
> > > > But it seems that this particular feature that you're implemnting might have
> > > > automated uses.  That is to say the dpdk core library might be interested in
> > > > parsing this particular information to direct module autoloading, and if thats
> > > > desireable then you need to define these lists such that you can codify failure
> > > > and success conditions.
> > > >  
> > > >> For example, most drivers will advertise
> > > >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user or script
> > > >> will have to choose between loading:
> > > >> - uio igb_uio
> > > >> - uio uio_pci_generic
> > > >> - vfio vfio-pci
> > > >>  
> > > > Oh, I see, so your list is a colon delimited list of module load sets, where at
> > > > least one set must succeed by loading all modules in its set, but the failure of
> > > > any one set isn't fatal to the process?  e.g. a string like this:
> > > >
> > > > uio,igb_uio:vfio,vfio-pci
> > > >
> > > > could be interpreted to mean "I must load (uio AND igb_uio) OR (vfio AND
> > > > vfio-pci).  If the evaluation of that statement results in false, then the
> > > > operation fails, otherwise it succedes.
> > > >
> > > > If thats the case, then, apologies, we're on the same page, and this will work
> > > > just fine.  
> > > 
> > > Yep, that's the idea.
> > > 
> > > Colon and commas are the best separators I've thought about, but any
> > > idea to make the syntax clearer is welcome ;)
> > > 
> > > Maybe a syntax like is clearer:
> > >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > > But it would let the user think that more complex expressions are valid,
> > > like "(mod1 & (mod2 | mod3)) | mod4", which is probably overkill.
> > > 
> > > Regards,
> > > Olivier  
> > 
> > This RFC seems like a good idea - and something the Intel QuickAssist PMD could benefit from.
> > However the (mod1 & mod2) can handle the QAT case better in my opinion.
> > i.e.
> > as well as needing one of 
> > * uio igb_uio
> > * uio uio_pci_generic
> > * vfio vfio-pci
> > QAT PMD also needs one of (depending on which physical device is plugged)
> >  * qat_dh895xcc
> >  * qat_c62x
> >  * qat_c3xxx
> > 
> > So the original syntax would result in a very long list of possible variations.
> > What really reflects the dependencies would be 
> > ((uio & igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)) & (qat_dh895xcc | qat_c62x | qat_c3xxx)
> >   
> Ah, I didn't consider that hardware specifics might create a use case where a
> pmd must have one or more kernel modules available for hw support.  Perhaps it
> is worthwhile to automate hardware support - that is to say, any module loading
> script should automatically look at the pci table exported from a pmd, and, if
> found, load any modules that claim support for that device:vendor tuple?  Though
> that might break in the case of uio, if there are separate driver modules that
> support native hardware and uio access.

I ended up writing a script that went the other way.
First look at the hardware and load VFIO if IOMMU is available.
Then look for special driver needed for Xen and HyperV
Lastly fallback to loading igb_uio if no VFIO and PCI device present.

In other words it is a system not driver issue.
  
Neil Horman Sept. 1, 2016, 7:15 p.m. UTC | #10
On Thu, Sep 01, 2016 at 10:41:22AM -0700, Stephen Hemminger wrote:
> On Thu, 1 Sep 2016 13:35:19 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> > > Hi Neil and Olivier,
> > >   
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > > > Sent: Wednesday, August 31, 2016 2:40 PM
> > > > To: Neil Horman <nhorman@tuxdriver.com>
> > > > Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> > > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> > > > in pmdinfo
> > > > 
> > > > Hi Neil,
> > > > 
> > > > On 08/31/2016 03:27 PM, Neil Horman wrote:  
> > > > > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:  
> > > > >> Hi Neil,
> > > > >>
> > > > >> On 08/30/2016 03:23 PM, Neil Horman wrote:  
> > > > >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:  
> > > > >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a driver to
> > > > >>>> declare the list of kernel modules required to run properly.
> > > > >>>>
> > > > >>>> Today, most PCI drivers require uio/vfio.
> > > > >>>>
> > > > >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > >>>>
> > > > >>>> ---
> > > > >>>> In this RFC, I supposed that all PCI drivers require a the loading of a
> > > > >>>> uio/vfio module (except mlx*), this may be wrong.
> > > > >>>> Comments are welcome!
> > > > >>>>
> > > > >>>>
> > > > >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> > > > >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> > > > >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> > > > >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> > > > >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> > > > >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> > > > >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> > > > >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> > > > >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> > > > >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> > > > >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> > > > >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> > > > >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> > > > >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> > > > >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> > > > >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> > > > >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> > > > >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> > > > >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> > > > >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> > > > >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> > > > >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> > > > >>>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
> > > > >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> > > > >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> > > > >>>>  
> > > > >>>
> > > > >>> Generally speaking, I like the idea, it makes sense to me in terms of using
> > > > >>> pmdinfo to export this information
> > > > >>>
> > > > >>> That said, This may need to be a set of macros.  By that I mean (and correct  
> > > > me  
> > > > >>> if I'm wrong here), but the relationship between pmd's and kernel modules  
> > > > is in  
> > > > >>> some cases, more complex than a 'requires' or 'depends' relationship.  That  
> > > > is  
> > > > >>> to say, some pmd may need user space hardware access, but can use either  
> > > > uio OR  
> > > > >>> vfio, but doesn't need both, and can continue to function if only one is
> > > > >>> available.  Other PMD's may be able to use vfio or uio, but can still function
> > > > >>> without either.  And some, as your patch implements, simply require one or  
> > > > the  
> > > > >>> other to function.  As such it seems like you may want a few macros, in the  
> > > > form  
> > > > >>> of:
> > > > >>>
> > > > >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt loading,  
> > > > ignore any  
> > > > >>> failures
> > > > >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to be  
> > > > loaded after  
> > > > >>> request macro completes, fail if any are not loaded
> > > > >>>
> > > > >>> Thats just spitballing, mind you, theres probably a better way to do it, but  
> > > > the  
> > > > >>> idea is to list a set of modules you would like to have, and then create a
> > > > >>> parsable syntax to describe the modules that need to be loaded after the  
> > > > request  
> > > > >>> is complete so that you can accurately codify the situations I described  
> > > > above.  
> > > > >>
> > > > >> Thank you for your feedback.
> > > > >> However, I'm not sure I'm perfectly getting what you suggest.
> > > > >>
> > > > >> Do you think some PMDs could request a kernel module without really
> > > > >> requiring it? Do you have an example in mind?
> > > > >>  
> > > > > Yes, thats precisely it.  The most clear example I could think of (though I'm
> > > > > not sure if any pmd currently supports this), is a pmd that supports both UIO
> > > > > and VFIO communication with the kernel.  Such a PMD requires that one of  
> > > > those  
> > > > > two modules be loaded, but only one (i.e. both are not required), so if only  
> > > > the  
> > > > > uio kernel module loads is a success case, likewise if only the vfio module
> > > > > loads can be treated as success.  Both loading are clearly successful.  Only if
> > > > > neither load do we have a failure case.  I'm suggesting that the grammer that
> > > > > your exports define should take those cases into account.  Its not always as
> > > > > simple as "I must have the following modules"
> > > > >  
> > > > >> The syntax I've submitted lets you define several lists of modules, so
> > > > >> that the user or the script that starts the application can decide which
> > > > >> kmod list is better according to the environment.
> > > > >>  
> > > > > If you have a human intervening in the module load process, sure, then its  
> > > > fine.  
> > > > > But it seems that this particular feature that you're implemnting might have
> > > > > automated uses.  That is to say the dpdk core library might be interested in
> > > > > parsing this particular information to direct module autoloading, and if thats
> > > > > desireable then you need to define these lists such that you can codify failure
> > > > > and success conditions.
> > > > >  
> > > > >> For example, most drivers will advertise
> > > > >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user or script
> > > > >> will have to choose between loading:
> > > > >> - uio igb_uio
> > > > >> - uio uio_pci_generic
> > > > >> - vfio vfio-pci
> > > > >>  
> > > > > Oh, I see, so your list is a colon delimited list of module load sets, where at
> > > > > least one set must succeed by loading all modules in its set, but the failure of
> > > > > any one set isn't fatal to the process?  e.g. a string like this:
> > > > >
> > > > > uio,igb_uio:vfio,vfio-pci
> > > > >
> > > > > could be interpreted to mean "I must load (uio AND igb_uio) OR (vfio AND
> > > > > vfio-pci).  If the evaluation of that statement results in false, then the
> > > > > operation fails, otherwise it succedes.
> > > > >
> > > > > If thats the case, then, apologies, we're on the same page, and this will work
> > > > > just fine.  
> > > > 
> > > > Yep, that's the idea.
> > > > 
> > > > Colon and commas are the best separators I've thought about, but any
> > > > idea to make the syntax clearer is welcome ;)
> > > > 
> > > > Maybe a syntax like is clearer:
> > > >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > > > But it would let the user think that more complex expressions are valid,
> > > > like "(mod1 & (mod2 | mod3)) | mod4", which is probably overkill.
> > > > 
> > > > Regards,
> > > > Olivier  
> > > 
> > > This RFC seems like a good idea - and something the Intel QuickAssist PMD could benefit from.
> > > However the (mod1 & mod2) can handle the QAT case better in my opinion.
> > > i.e.
> > > as well as needing one of 
> > > * uio igb_uio
> > > * uio uio_pci_generic
> > > * vfio vfio-pci
> > > QAT PMD also needs one of (depending on which physical device is plugged)
> > >  * qat_dh895xcc
> > >  * qat_c62x
> > >  * qat_c3xxx
> > > 
> > > So the original syntax would result in a very long list of possible variations.
> > > What really reflects the dependencies would be 
> > > ((uio & igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)) & (qat_dh895xcc | qat_c62x | qat_c3xxx)
> > >   
> > Ah, I didn't consider that hardware specifics might create a use case where a
> > pmd must have one or more kernel modules available for hw support.  Perhaps it
> > is worthwhile to automate hardware support - that is to say, any module loading
> > script should automatically look at the pci table exported from a pmd, and, if
> > found, load any modules that claim support for that device:vendor tuple?  Though
> > that might break in the case of uio, if there are separate driver modules that
> > support native hardware and uio access.
> 
> I ended up writing a script that went the other way.
> First look at the hardware and load VFIO if IOMMU is available.
> Then look for special driver needed for Xen and HyperV
> Lastly fallback to loading igb_uio if no VFIO and PCI device present.
> 
> In other words it is a system not driver issue.
> 
That sounds like a reasonable approach, yes.
Neil

>
  
Fiona Trahe Sept. 2, 2016, 9:19 a.m. UTC | #11
> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Thursday, September 1, 2016 8:16 PM
> To: Stephen Hemminger <stephen@networkplumber.org>
> Cc: Trahe, Fiona <fiona.trahe@intel.com>; dev@dpdk.org; Olivier Matz
> <olivier.matz@6wind.com>; Thomas Monjalon
> <thomas.monjalon@6wind.com>
> Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> in pmdinfo
> 
> On Thu, Sep 01, 2016 at 10:41:22AM -0700, Stephen Hemminger wrote:
> > On Thu, 1 Sep 2016 13:35:19 -0400
> > Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > > On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> > > > Hi Neil and Olivier,
> > > >
> > > > > -----Original Message-----
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier
> > > > > Matz
> > > > > Sent: Wednesday, August 31, 2016 2:40 PM
> > > > > To: Neil Horman <nhorman@tuxdriver.com>
> > > > > Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> > > > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod
> > > > > dependencies in pmdinfo
> > > > >
> > > > > Hi Neil,
> > > > >
> > > > > On 08/31/2016 03:27 PM, Neil Horman wrote:
> > > > > > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
> > > > > >> Hi Neil,
> > > > > >>
> > > > > >> On 08/30/2016 03:23 PM, Neil Horman wrote:
> > > > > >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> > > > > >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a
> > > > > >>>> driver to declare the list of kernel modules required to run properly.
> > > > > >>>>
> > > > > >>>> Today, most PCI drivers require uio/vfio.
> > > > > >>>>
> > > > > >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > > >>>>
> > > > > >>>> ---
> > > > > >>>> In this RFC, I supposed that all PCI drivers require a the
> > > > > >>>> loading of a uio/vfio module (except mlx*), this may be wrong.
> > > > > >>>> Comments are welcome!
> > > > > >>>>
> > > > > >>>>
> > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> > > > > >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> > > > > >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> > > > > >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> > > > > >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> > > > > >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> > > > > >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> > > > > >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> > > > > >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> > > > > >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> > > > > >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> > > > > >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> > > > > >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> > > > > >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> > > > > >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> > > > > >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> > > > > >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> > > > > >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> > > > > >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> > > > > >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> > > > > >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> > > > > >>>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
> > > > > >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> > > > > >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> > > > > >>>>
> > > > > >>>
> > > > > >>> Generally speaking, I like the idea, it makes sense to me in
> > > > > >>> terms of using pmdinfo to export this information
> > > > > >>>
> > > > > >>> That said, This may need to be a set of macros.  By that I
> > > > > >>> mean (and correct
> > > > > me
> > > > > >>> if I'm wrong here), but the relationship between pmd's and
> > > > > >>> kernel modules
> > > > > is in
> > > > > >>> some cases, more complex than a 'requires' or 'depends'
> > > > > >>> relationship.  That
> > > > > is
> > > > > >>> to say, some pmd may need user space hardware access, but
> > > > > >>> can use either
> > > > > uio OR
> > > > > >>> vfio, but doesn't need both, and can continue to function if
> > > > > >>> only one is available.  Other PMD's may be able to use vfio
> > > > > >>> or uio, but can still function without either.  And some, as
> > > > > >>> your patch implements, simply require one or
> > > > > the
> > > > > >>> other to function.  As such it seems like you may want a few
> > > > > >>> macros, in the
> > > > > form
> > > > > >>> of:
> > > > > >>>
> > > > > >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt
> > > > > >>> loading,
> > > > > ignore any
> > > > > >>> failures
> > > > > >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to
> > > > > >>> be
> > > > > loaded after
> > > > > >>> request macro completes, fail if any are not loaded
> > > > > >>>
> > > > > >>> Thats just spitballing, mind you, theres probably a better
> > > > > >>> way to do it, but
> > > > > the
> > > > > >>> idea is to list a set of modules you would like to have, and
> > > > > >>> then create a parsable syntax to describe the modules that
> > > > > >>> need to be loaded after the
> > > > > request
> > > > > >>> is complete so that you can accurately codify the situations
> > > > > >>> I described
> > > > > above.
> > > > > >>
> > > > > >> Thank you for your feedback.
> > > > > >> However, I'm not sure I'm perfectly getting what you suggest.
> > > > > >>
> > > > > >> Do you think some PMDs could request a kernel module without
> > > > > >> really requiring it? Do you have an example in mind?
> > > > > >>
> > > > > > Yes, thats precisely it.  The most clear example I could think
> > > > > > of (though I'm not sure if any pmd currently supports this),
> > > > > > is a pmd that supports both UIO and VFIO communication with
> > > > > > the kernel.  Such a PMD requires that one of
> > > > > those
> > > > > > two modules be loaded, but only one (i.e. both are not
> > > > > > required), so if only
> > > > > the
> > > > > > uio kernel module loads is a success case, likewise if only
> > > > > > the vfio module loads can be treated as success.  Both loading
> > > > > > are clearly successful.  Only if neither load do we have a
> > > > > > failure case.  I'm suggesting that the grammer that your
> > > > > > exports define should take those cases into account.  Its not always as
> simple as "I must have the following modules"
> > > > > >
> > > > > >> The syntax I've submitted lets you define several lists of
> > > > > >> modules, so that the user or the script that starts the
> > > > > >> application can decide which kmod list is better according to the
> environment.
> > > > > >>
> > > > > > If you have a human intervening in the module load process,
> > > > > > sure, then its
> > > > > fine.
> > > > > > But it seems that this particular feature that you're
> > > > > > implemnting might have automated uses.  That is to say the
> > > > > > dpdk core library might be interested in parsing this
> > > > > > particular information to direct module autoloading, and if
> > > > > > thats desireable then you need to define these lists such that you can
> codify failure and success conditions.
> > > > > >
> > > > > >> For example, most drivers will advertise
> > > > > >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user
> > > > > >> or script will have to choose between loading:
> > > > > >> - uio igb_uio
> > > > > >> - uio uio_pci_generic
> > > > > >> - vfio vfio-pci
> > > > > >>
> > > > > > Oh, I see, so your list is a colon delimited list of module
> > > > > > load sets, where at least one set must succeed by loading all
> > > > > > modules in its set, but the failure of any one set isn't fatal to the
> process?  e.g. a string like this:
> > > > > >
> > > > > > uio,igb_uio:vfio,vfio-pci
> > > > > >
> > > > > > could be interpreted to mean "I must load (uio AND igb_uio) OR
> > > > > > (vfio AND vfio-pci).  If the evaluation of that statement
> > > > > > results in false, then the operation fails, otherwise it succedes.
> > > > > >
> > > > > > If thats the case, then, apologies, we're on the same page,
> > > > > > and this will work just fine.
> > > > >
> > > > > Yep, that's the idea.
> > > > >
> > > > > Colon and commas are the best separators I've thought about, but
> > > > > any idea to make the syntax clearer is welcome ;)
> > > > >
> > > > > Maybe a syntax like is clearer:
> > > > >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > > > > But it would let the user think that more complex expressions
> > > > > are valid, like "(mod1 & (mod2 | mod3)) | mod4", which is probably
> overkill.
> > > > >
> > > > > Regards,
> > > > > Olivier
> > > >
> > > > This RFC seems like a good idea - and something the Intel QuickAssist PMD
> could benefit from.
> > > > However the (mod1 & mod2) can handle the QAT case better in my
> opinion.
> > > > i.e.
> > > > as well as needing one of
> > > > * uio igb_uio
> > > > * uio uio_pci_generic
> > > > * vfio vfio-pci
> > > > QAT PMD also needs one of (depending on which physical device is
> > > > plugged)
> > > >  * qat_dh895xcc
> > > >  * qat_c62x
> > > >  * qat_c3xxx
> > > >
> > > > So the original syntax would result in a very long list of possible variations.
> > > > What really reflects the dependencies would be ((uio & igb_uio) |
> > > > (uio & uio_pci_generic) | (vfio & vfio_pci)) & (qat_dh895xcc |
> > > > qat_c62x | qat_c3xxx)
> > > >
> > > Ah, I didn't consider that hardware specifics might create a use
> > > case where a pmd must have one or more kernel modules available for
> > > hw support.  Perhaps it is worthwhile to automate hardware support -
> > > that is to say, any module loading script should automatically look
> > > at the pci table exported from a pmd, and, if found, load any
> > > modules that claim support for that device:vendor tuple?  Though
> > > that might break in the case of uio, if there are separate driver modules that
> support native hardware and uio access.

Actually if the script output was intended to be used to auto-load dependent kmods, 
then even the above would not suffice for the QAT driver (and presumably for other
PMDs with specific HW dependencies). i.e. the qat_dhxxxx modules have further dependencies 
themselves on an intel_qat module, and there are other steps documented in the 
guide which must be taken after loading the kmods. 
The use-case I'd addressed was for the script to identify and just throw an error where 
dependent modules are missing. 

I don't see a simple solution, but also don't see a strong need to find one. 
Documentation and if necessary a driver-specific script seem sufficient to me.

My conclusion is the RFC is a nice feature for some drivers, but if introduced needs 
to be optional as it doesn't handle the complexities of all drivers. 

> >
> > I ended up writing a script that went the other way.
> > First look at the hardware and load VFIO if IOMMU is available.
> > Then look for special driver needed for Xen and HyperV Lastly fallback
> > to loading igb_uio if no VFIO and PCI device present.
> >
> > In other words it is a system not driver issue.
> >
> That sounds like a reasonable approach, yes.
> Neil
> 
> >
  
Thomas Monjalon Sept. 2, 2016, 10:55 a.m. UTC | #12
2016-09-01 10:41, Stephen Hemminger:
> Neil Horman <nhorman@tuxdriver.com> wrote:
> > On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > > > On 08/31/2016 03:27 PM, Neil Horman wrote:  
> > > > > Oh, I see, so your list is a colon delimited list of module load sets, where at
> > > > > least one set must succeed by loading all modules in its set, but the failure of
> > > > > any one set isn't fatal to the process?  e.g. a string like this:
> > > > >
> > > > > uio,igb_uio:vfio,vfio-pci
> > > > >
> > > > > could be interpreted to mean "I must load (uio AND igb_uio) OR (vfio AND
> > > > > vfio-pci).  If the evaluation of that statement results in false, then the
> > > > > operation fails, otherwise it succedes.
> > > > >
> > > > > If thats the case, then, apologies, we're on the same page, and this will work
> > > > > just fine.  
> > > > 
> > > > Yep, that's the idea.
> > > > 
> > > > Colon and commas are the best separators I've thought about, but any
> > > > idea to make the syntax clearer is welcome ;)
> > > > 
> > > > Maybe a syntax like is clearer:
> > > >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > > > But it would let the user think that more complex expressions are valid,
> > > > like "(mod1 & (mod2 | mod3)) | mod4", which is probably overkill.
> > > 
> > > This RFC seems like a good idea - and something the Intel QuickAssist PMD could benefit from.
> > > However the (mod1 & mod2) can handle the QAT case better in my opinion.
> > > i.e.
> > > as well as needing one of 
> > > * uio igb_uio
> > > * uio uio_pci_generic
> > > * vfio vfio-pci
> > > QAT PMD also needs one of (depending on which physical device is plugged)
> > >  * qat_dh895xcc
> > >  * qat_c62x
> > >  * qat_c3xxx
> > > 
> > > So the original syntax would result in a very long list of possible variations.
> > > What really reflects the dependencies would be 
> > > ((uio & igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)) & (qat_dh895xcc | qat_c62x | qat_c3xxx)
> > >   
> > Ah, I didn't consider that hardware specifics might create a use case where a
> > pmd must have one or more kernel modules available for hw support.  Perhaps it
> > is worthwhile to automate hardware support - that is to say, any module loading
> > script should automatically look at the pci table exported from a pmd, and, if
> > found, load any modules that claim support for that device:vendor tuple?  Though
> > that might break in the case of uio, if there are separate driver modules that
> > support native hardware and uio access.
> 
> I ended up writing a script that went the other way.
> First look at the hardware and load VFIO if IOMMU is available.
> Then look for special driver needed for Xen and HyperV
> Lastly fallback to loading igb_uio if no VFIO and PCI device present.
> 
> In other words it is a system not driver issue.

That's partly right, yes.
But you need some information which are specific to the drivers and
we should try to embed them for three usages:
	- give more info the user (without digging in the doc)
	- replace info in some external system scripts harder to maintain
	- prepare for hotplug

Some PMDs do not use UIO or VFIO at all,
However, I agree that the requirement
	(uio & igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)
- and even the VFIO noiommu case - could be translated into a simple
flag, let's say "generic_device_mapping"
(unfortunately "queue_mapping" doesn't exist).

The other interesting point from Fiona is to show that this information is
device-related (not general for the whole driver). So we should add a device
parameter in the macro with the ability to set a wildcard.
  
Neil Horman Sept. 2, 2016, 1:33 p.m. UTC | #13
On Fri, Sep 02, 2016 at 09:19:26AM +0000, Trahe, Fiona wrote:
> 
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Thursday, September 1, 2016 8:16 PM
> > To: Stephen Hemminger <stephen@networkplumber.org>
> > Cc: Trahe, Fiona <fiona.trahe@intel.com>; dev@dpdk.org; Olivier Matz
> > <olivier.matz@6wind.com>; Thomas Monjalon
> > <thomas.monjalon@6wind.com>
> > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> > in pmdinfo
> > 
> > On Thu, Sep 01, 2016 at 10:41:22AM -0700, Stephen Hemminger wrote:
> > > On Thu, 1 Sep 2016 13:35:19 -0400
> > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > >
> > > > On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> > > > > Hi Neil and Olivier,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier
> > > > > > Matz
> > > > > > Sent: Wednesday, August 31, 2016 2:40 PM
> > > > > > To: Neil Horman <nhorman@tuxdriver.com>
> > > > > > Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> > > > > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod
> > > > > > dependencies in pmdinfo
> > > > > >
> > > > > > Hi Neil,
> > > > > >
> > > > > > On 08/31/2016 03:27 PM, Neil Horman wrote:
> > > > > > > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
> > > > > > >> Hi Neil,
> > > > > > >>
> > > > > > >> On 08/30/2016 03:23 PM, Neil Horman wrote:
> > > > > > >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> > > > > > >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows a
> > > > > > >>>> driver to declare the list of kernel modules required to run properly.
> > > > > > >>>>
> > > > > > >>>> Today, most PCI drivers require uio/vfio.
> > > > > > >>>>
> > > > > > >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > > > >>>>
> > > > > > >>>> ---
> > > > > > >>>> In this RFC, I supposed that all PCI drivers require a the
> > > > > > >>>> loading of a uio/vfio module (except mlx*), this may be wrong.
> > > > > > >>>> Comments are welcome!
> > > > > > >>>>
> > > > > > >>>>
> > > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> > > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> > > > > > >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> > > > > > >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> > > > > > >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> > > > > > >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> > > > > > >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> > > > > > >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> > > > > > >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> > > > > > >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> > > > > > >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> > > > > > >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> > > > > > >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> > > > > > >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> > > > > > >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> > > > > > >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> > > > > > >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> > > > > > >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> > > > > > >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> > > > > > >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> > > > > > >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> > > > > > >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> > > > > > >>>>  lib/librte_eal/common/include/rte_dev.h | 14 ++++++++++++++
> > > > > > >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> > > > > > >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> > > > > > >>>>
> > > > > > >>>
> > > > > > >>> Generally speaking, I like the idea, it makes sense to me in
> > > > > > >>> terms of using pmdinfo to export this information
> > > > > > >>>
> > > > > > >>> That said, This may need to be a set of macros.  By that I
> > > > > > >>> mean (and correct
> > > > > > me
> > > > > > >>> if I'm wrong here), but the relationship between pmd's and
> > > > > > >>> kernel modules
> > > > > > is in
> > > > > > >>> some cases, more complex than a 'requires' or 'depends'
> > > > > > >>> relationship.  That
> > > > > > is
> > > > > > >>> to say, some pmd may need user space hardware access, but
> > > > > > >>> can use either
> > > > > > uio OR
> > > > > > >>> vfio, but doesn't need both, and can continue to function if
> > > > > > >>> only one is available.  Other PMD's may be able to use vfio
> > > > > > >>> or uio, but can still function without either.  And some, as
> > > > > > >>> your patch implements, simply require one or
> > > > > > the
> > > > > > >>> other to function.  As such it seems like you may want a few
> > > > > > >>> macros, in the
> > > > > > form
> > > > > > >>> of:
> > > > > > >>>
> > > > > > >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to attempt
> > > > > > >>> loading,
> > > > > > ignore any
> > > > > > >>> failures
> > > > > > >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required to
> > > > > > >>> be
> > > > > > loaded after
> > > > > > >>> request macro completes, fail if any are not loaded
> > > > > > >>>
> > > > > > >>> Thats just spitballing, mind you, theres probably a better
> > > > > > >>> way to do it, but
> > > > > > the
> > > > > > >>> idea is to list a set of modules you would like to have, and
> > > > > > >>> then create a parsable syntax to describe the modules that
> > > > > > >>> need to be loaded after the
> > > > > > request
> > > > > > >>> is complete so that you can accurately codify the situations
> > > > > > >>> I described
> > > > > > above.
> > > > > > >>
> > > > > > >> Thank you for your feedback.
> > > > > > >> However, I'm not sure I'm perfectly getting what you suggest.
> > > > > > >>
> > > > > > >> Do you think some PMDs could request a kernel module without
> > > > > > >> really requiring it? Do you have an example in mind?
> > > > > > >>
> > > > > > > Yes, thats precisely it.  The most clear example I could think
> > > > > > > of (though I'm not sure if any pmd currently supports this),
> > > > > > > is a pmd that supports both UIO and VFIO communication with
> > > > > > > the kernel.  Such a PMD requires that one of
> > > > > > those
> > > > > > > two modules be loaded, but only one (i.e. both are not
> > > > > > > required), so if only
> > > > > > the
> > > > > > > uio kernel module loads is a success case, likewise if only
> > > > > > > the vfio module loads can be treated as success.  Both loading
> > > > > > > are clearly successful.  Only if neither load do we have a
> > > > > > > failure case.  I'm suggesting that the grammer that your
> > > > > > > exports define should take those cases into account.  Its not always as
> > simple as "I must have the following modules"
> > > > > > >
> > > > > > >> The syntax I've submitted lets you define several lists of
> > > > > > >> modules, so that the user or the script that starts the
> > > > > > >> application can decide which kmod list is better according to the
> > environment.
> > > > > > >>
> > > > > > > If you have a human intervening in the module load process,
> > > > > > > sure, then its
> > > > > > fine.
> > > > > > > But it seems that this particular feature that you're
> > > > > > > implemnting might have automated uses.  That is to say the
> > > > > > > dpdk core library might be interested in parsing this
> > > > > > > particular information to direct module autoloading, and if
> > > > > > > thats desireable then you need to define these lists such that you can
> > codify failure and success conditions.
> > > > > > >
> > > > > > >> For example, most drivers will advertise
> > > > > > >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the user
> > > > > > >> or script will have to choose between loading:
> > > > > > >> - uio igb_uio
> > > > > > >> - uio uio_pci_generic
> > > > > > >> - vfio vfio-pci
> > > > > > >>
> > > > > > > Oh, I see, so your list is a colon delimited list of module
> > > > > > > load sets, where at least one set must succeed by loading all
> > > > > > > modules in its set, but the failure of any one set isn't fatal to the
> > process?  e.g. a string like this:
> > > > > > >
> > > > > > > uio,igb_uio:vfio,vfio-pci
> > > > > > >
> > > > > > > could be interpreted to mean "I must load (uio AND igb_uio) OR
> > > > > > > (vfio AND vfio-pci).  If the evaluation of that statement
> > > > > > > results in false, then the operation fails, otherwise it succedes.
> > > > > > >
> > > > > > > If thats the case, then, apologies, we're on the same page,
> > > > > > > and this will work just fine.
> > > > > >
> > > > > > Yep, that's the idea.
> > > > > >
> > > > > > Colon and commas are the best separators I've thought about, but
> > > > > > any idea to make the syntax clearer is welcome ;)
> > > > > >
> > > > > > Maybe a syntax like is clearer:
> > > > > >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > > > > > But it would let the user think that more complex expressions
> > > > > > are valid, like "(mod1 & (mod2 | mod3)) | mod4", which is probably
> > overkill.
> > > > > >
> > > > > > Regards,
> > > > > > Olivier
> > > > >
> > > > > This RFC seems like a good idea - and something the Intel QuickAssist PMD
> > could benefit from.
> > > > > However the (mod1 & mod2) can handle the QAT case better in my
> > opinion.
> > > > > i.e.
> > > > > as well as needing one of
> > > > > * uio igb_uio
> > > > > * uio uio_pci_generic
> > > > > * vfio vfio-pci
> > > > > QAT PMD also needs one of (depending on which physical device is
> > > > > plugged)
> > > > >  * qat_dh895xcc
> > > > >  * qat_c62x
> > > > >  * qat_c3xxx
> > > > >
> > > > > So the original syntax would result in a very long list of possible variations.
> > > > > What really reflects the dependencies would be ((uio & igb_uio) |
> > > > > (uio & uio_pci_generic) | (vfio & vfio_pci)) & (qat_dh895xcc |
> > > > > qat_c62x | qat_c3xxx)
> > > > >
> > > > Ah, I didn't consider that hardware specifics might create a use
> > > > case where a pmd must have one or more kernel modules available for
> > > > hw support.  Perhaps it is worthwhile to automate hardware support -
> > > > that is to say, any module loading script should automatically look
> > > > at the pci table exported from a pmd, and, if found, load any
> > > > modules that claim support for that device:vendor tuple?  Though
> > > > that might break in the case of uio, if there are separate driver modules that
> > support native hardware and uio access.
> 
> Actually if the script output was intended to be used to auto-load dependent kmods, 
> then even the above would not suffice for the QAT driver (and presumably for other
> PMDs with specific HW dependencies). i.e. the qat_dhxxxx modules have further dependencies 
> themselves on an intel_qat module, and there are other steps documented in the 
But any dependency chain such as what you describe is covered in the next step
of the chain.  That is to say if the qat pmd has a hardware dependency on
qat_dhxxx (or qat_cxxx, etc), and those modules depend on intel_qat, the pmd
doesn't need to know that, because qat_dhxxx and companions should all list
intel_qat as a dependency that modprobe will resolve when installing the kernel
module.

> guide which must be taken after loading the kmods. 
I'm not sure what you mean by this.  Are you referring to the qat documentation
that comes with the DPDK?  I only see three additional items there to address

1) Removing other modules when using the 01.org kernel modules

2) installation of firmware

3) Binding of the device to user space for VFIO/UIO

All three of these tasks fall outside the scope of what this macro is meant to
do.  We could try to create macros for them to export information for use in a
loading script if you like, but I wouldn't.  All three of the above items fall
in my mind under the category of administrative responsibilities.  That is to
say, they are orthogonoal to defining a module dependency structure, and if
they're arent properly completed, the module dependency chain won't matter
anyway.

> The use-case I'd addressed was for the script to identify and just throw an error where 
> dependent modules are missing. 
> 

That doesn't really add much value then, since missing modules already result in
errors when the PMD tries to initalize.

> I don't see a simple solution, but also don't see a strong need to find one. 
> Documentation and if necessary a driver-specific script seem sufficient to me.
> 
> My conclusion is the RFC is a nice feature for some drivers, but if introduced needs 
> to be optional as it doesn't handle the complexities of all drivers. 
> 

I agree its an optional export. If there are no dependencies, or if the author
wishes to to simply not supply any, thats fine, the results will be in
accordance with that, but I strongly disagree that its optional implies the fact
that we can ignore the complexities of the depedencies that can be exported.

The more I think about it the more I like Stephens idea, possibly with some
macro assistance.  That is to say:

1) Start by loading hardware specific modules, the information for which is
already available.  You can parse the pci table that a pmd exports and match it
with the pci aliases retrieved via modinfo

2) Load a special virt driver if no hardware is found on the system in (1).
special virt drivers might be worth tagging with a VIRT/VFIO/UIO tag export for
pmdinfo

That allows to set asside the complexities of our dependency chain, as we can
assume hardware support modules will codify any real dependencies there, and a
VIRT tag will let us find any modules needed for hardware the is assigned into
our guest.

Neil

Neil

> > >
> > > I ended up writing a script that went the other way.
> > > First look at the hardware and load VFIO if IOMMU is available.
> > > Then look for special driver needed for Xen and HyperV Lastly fallback
> > > to loading igb_uio if no VFIO and PCI device present.
> > >
> > > In other words it is a system not driver issue.
> > >
> > That sounds like a reasonable approach, yes.
> > Neil
> > 
> > >
>
  
Fiona Trahe Sept. 2, 2016, 1:52 p.m. UTC | #14
> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Friday, September 2, 2016 2:33 PM
> To: Trahe, Fiona <fiona.trahe@intel.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>; dev@dpdk.org;
> Olivier Matz <olivier.matz@6wind.com>; Thomas Monjalon
> <thomas.monjalon@6wind.com>
> Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> in pmdinfo
> 
> On Fri, Sep 02, 2016 at 09:19:26AM +0000, Trahe, Fiona wrote:
> >
> >
> > > -----Original Message-----
> > > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > > Sent: Thursday, September 1, 2016 8:16 PM
> > > To: Stephen Hemminger <stephen@networkplumber.org>
> > > Cc: Trahe, Fiona <fiona.trahe@intel.com>; dev@dpdk.org; Olivier Matz
> > > <olivier.matz@6wind.com>; Thomas Monjalon
> > > <thomas.monjalon@6wind.com>
> > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod
> > > dependencies in pmdinfo
> > >
> > > On Thu, Sep 01, 2016 at 10:41:22AM -0700, Stephen Hemminger wrote:
> > > > On Thu, 1 Sep 2016 13:35:19 -0400
> > > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > > >
> > > > > On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> > > > > > Hi Neil and Olivier,
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier
> > > > > > > Matz
> > > > > > > Sent: Wednesday, August 31, 2016 2:40 PM
> > > > > > > To: Neil Horman <nhorman@tuxdriver.com>
> > > > > > > Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> > > > > > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise
> > > > > > > kmod dependencies in pmdinfo
> > > > > > >
> > > > > > > Hi Neil,
> > > > > > >
> > > > > > > On 08/31/2016 03:27 PM, Neil Horman wrote:
> > > > > > > > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
> > > > > > > >> Hi Neil,
> > > > > > > >>
> > > > > > > >> On 08/30/2016 03:23 PM, Neil Horman wrote:
> > > > > > > >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> > > > > > > >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows
> > > > > > > >>>> a driver to declare the list of kernel modules required to run
> properly.
> > > > > > > >>>>
> > > > > > > >>>> Today, most PCI drivers require uio/vfio.
> > > > > > > >>>>
> > > > > > > >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > > > > >>>>
> > > > > > > >>>> ---
> > > > > > > >>>> In this RFC, I supposed that all PCI drivers require a
> > > > > > > >>>> the loading of a uio/vfio module (except mlx*), this may be
> wrong.
> > > > > > > >>>> Comments are welcome!
> > > > > > > >>>>
> > > > > > > >>>>
> > > > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> > > > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> > > > > > > >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> > > > > > > >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> > > > > > > >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> > > > > > > >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> > > > > > > >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> > > > > > > >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> > > > > > > >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> > > > > > > >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> > > > > > > >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> > > > > > > >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> > > > > > > >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> > > > > > > >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> > > > > > > >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> > > > > > > >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> > > > > > > >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> > > > > > > >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> > > > > > > >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> > > > > > > >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> > > > > > > >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> > > > > > > >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> > > > > > > >>>>  lib/librte_eal/common/include/rte_dev.h | 14
> ++++++++++++++
> > > > > > > >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> > > > > > > >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> > > > > > > >>>>
> > > > > > > >>>
> > > > > > > >>> Generally speaking, I like the idea, it makes sense to
> > > > > > > >>> me in terms of using pmdinfo to export this information
> > > > > > > >>>
> > > > > > > >>> That said, This may need to be a set of macros.  By that
> > > > > > > >>> I mean (and correct
> > > > > > > me
> > > > > > > >>> if I'm wrong here), but the relationship between pmd's
> > > > > > > >>> and kernel modules
> > > > > > > is in
> > > > > > > >>> some cases, more complex than a 'requires' or 'depends'
> > > > > > > >>> relationship.  That
> > > > > > > is
> > > > > > > >>> to say, some pmd may need user space hardware access,
> > > > > > > >>> but can use either
> > > > > > > uio OR
> > > > > > > >>> vfio, but doesn't need both, and can continue to
> > > > > > > >>> function if only one is available.  Other PMD's may be
> > > > > > > >>> able to use vfio or uio, but can still function without
> > > > > > > >>> either.  And some, as your patch implements, simply
> > > > > > > >>> require one or
> > > > > > > the
> > > > > > > >>> other to function.  As such it seems like you may want a
> > > > > > > >>> few macros, in the
> > > > > > > form
> > > > > > > >>> of:
> > > > > > > >>>
> > > > > > > >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to
> > > > > > > >>> attempt loading,
> > > > > > > ignore any
> > > > > > > >>> failures
> > > > > > > >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required
> > > > > > > >>> to be
> > > > > > > loaded after
> > > > > > > >>> request macro completes, fail if any are not loaded
> > > > > > > >>>
> > > > > > > >>> Thats just spitballing, mind you, theres probably a
> > > > > > > >>> better way to do it, but
> > > > > > > the
> > > > > > > >>> idea is to list a set of modules you would like to have,
> > > > > > > >>> and then create a parsable syntax to describe the
> > > > > > > >>> modules that need to be loaded after the
> > > > > > > request
> > > > > > > >>> is complete so that you can accurately codify the
> > > > > > > >>> situations I described
> > > > > > > above.
> > > > > > > >>
> > > > > > > >> Thank you for your feedback.
> > > > > > > >> However, I'm not sure I'm perfectly getting what you suggest.
> > > > > > > >>
> > > > > > > >> Do you think some PMDs could request a kernel module
> > > > > > > >> without really requiring it? Do you have an example in mind?
> > > > > > > >>
> > > > > > > > Yes, thats precisely it.  The most clear example I could
> > > > > > > > think of (though I'm not sure if any pmd currently
> > > > > > > > supports this), is a pmd that supports both UIO and VFIO
> > > > > > > > communication with the kernel.  Such a PMD requires that
> > > > > > > > one of
> > > > > > > those
> > > > > > > > two modules be loaded, but only one (i.e. both are not
> > > > > > > > required), so if only
> > > > > > > the
> > > > > > > > uio kernel module loads is a success case, likewise if
> > > > > > > > only the vfio module loads can be treated as success.
> > > > > > > > Both loading are clearly successful.  Only if neither load
> > > > > > > > do we have a failure case.  I'm suggesting that the
> > > > > > > > grammer that your exports define should take those cases
> > > > > > > > into account.  Its not always as
> > > simple as "I must have the following modules"
> > > > > > > >
> > > > > > > >> The syntax I've submitted lets you define several lists
> > > > > > > >> of modules, so that the user or the script that starts
> > > > > > > >> the application can decide which kmod list is better
> > > > > > > >> according to the
> > > environment.
> > > > > > > >>
> > > > > > > > If you have a human intervening in the module load
> > > > > > > > process, sure, then its
> > > > > > > fine.
> > > > > > > > But it seems that this particular feature that you're
> > > > > > > > implemnting might have automated uses.  That is to say the
> > > > > > > > dpdk core library might be interested in parsing this
> > > > > > > > particular information to direct module autoloading, and
> > > > > > > > if thats desireable then you need to define these lists
> > > > > > > > such that you can
> > > codify failure and success conditions.
> > > > > > > >
> > > > > > > >> For example, most drivers will advertise
> > > > > > > >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the
> > > > > > > >> user or script will have to choose between loading:
> > > > > > > >> - uio igb_uio
> > > > > > > >> - uio uio_pci_generic
> > > > > > > >> - vfio vfio-pci
> > > > > > > >>
> > > > > > > > Oh, I see, so your list is a colon delimited list of
> > > > > > > > module load sets, where at least one set must succeed by
> > > > > > > > loading all modules in its set, but the failure of any one
> > > > > > > > set isn't fatal to the
> > > process?  e.g. a string like this:
> > > > > > > >
> > > > > > > > uio,igb_uio:vfio,vfio-pci
> > > > > > > >
> > > > > > > > could be interpreted to mean "I must load (uio AND
> > > > > > > > igb_uio) OR (vfio AND vfio-pci).  If the evaluation of
> > > > > > > > that statement results in false, then the operation fails, otherwise
> it succedes.
> > > > > > > >
> > > > > > > > If thats the case, then, apologies, we're on the same
> > > > > > > > page, and this will work just fine.
> > > > > > >
> > > > > > > Yep, that's the idea.
> > > > > > >
> > > > > > > Colon and commas are the best separators I've thought about,
> > > > > > > but any idea to make the syntax clearer is welcome ;)
> > > > > > >
> > > > > > > Maybe a syntax like is clearer:
> > > > > > >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > > > > > > But it would let the user think that more complex
> > > > > > > expressions are valid, like "(mod1 & (mod2 | mod3)) | mod4",
> > > > > > > which is probably
> > > overkill.
> > > > > > >
> > > > > > > Regards,
> > > > > > > Olivier
> > > > > >
> > > > > > This RFC seems like a good idea - and something the Intel
> > > > > > QuickAssist PMD
> > > could benefit from.
> > > > > > However the (mod1 & mod2) can handle the QAT case better in my
> > > opinion.
> > > > > > i.e.
> > > > > > as well as needing one of
> > > > > > * uio igb_uio
> > > > > > * uio uio_pci_generic
> > > > > > * vfio vfio-pci
> > > > > > QAT PMD also needs one of (depending on which physical device
> > > > > > is
> > > > > > plugged)
> > > > > >  * qat_dh895xcc
> > > > > >  * qat_c62x
> > > > > >  * qat_c3xxx
> > > > > >
> > > > > > So the original syntax would result in a very long list of possible
> variations.
> > > > > > What really reflects the dependencies would be ((uio &
> > > > > > igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)) &
> > > > > > (qat_dh895xcc | qat_c62x | qat_c3xxx)
> > > > > >
> > > > > Ah, I didn't consider that hardware specifics might create a use
> > > > > case where a pmd must have one or more kernel modules available
> > > > > for hw support.  Perhaps it is worthwhile to automate hardware
> > > > > support - that is to say, any module loading script should
> > > > > automatically look at the pci table exported from a pmd, and, if
> > > > > found, load any modules that claim support for that
> > > > > device:vendor tuple?  Though that might break in the case of
> > > > > uio, if there are separate driver modules that
> > > support native hardware and uio access.
> >
> > Actually if the script output was intended to be used to auto-load
> > dependent kmods, then even the above would not suffice for the QAT
> > driver (and presumably for other PMDs with specific HW dependencies).
> > i.e. the qat_dhxxxx modules have further dependencies themselves on an
> > intel_qat module, and there are other steps documented in the
> But any dependency chain such as what you describe is covered in the next step
> of the chain.  That is to say if the qat pmd has a hardware dependency on
> qat_dhxxx (or qat_cxxx, etc), and those modules depend on intel_qat, the pmd
> doesn't need to know that, because qat_dhxxx and companions should all list
> intel_qat as a dependency that modprobe will resolve when installing the kernel
> module.
> 
> > guide which must be taken after loading the kmods.
> I'm not sure what you mean by this.  Are you referring to the qat
> documentation that comes with the DPDK?  I only see three additional items
> there to address
> 
> 1) Removing other modules when using the 01.org kernel modules
> 
> 2) installation of firmware
> 
> 3) Binding of the device to user space for VFIO/UIO
> 
> All three of these tasks fall outside the scope of what this macro is meant to do.
> We could try to create macros for them to export information for use in a
> loading script if you like, but I wouldn't.  All three of the above items fall in my
> mind under the category of administrative responsibilities.  That is to say, they
> are orthogonoal to defining a module dependency structure, and if they're
> arent properly completed, the module dependency chain won't matter anyway.
> 

Another manual step is documented, which must be done after insmod: 
echo 32 > /sys/bus/pci/drivers/dh895xcc/0000\:03\:00.0/sriov_numvfs
(steps will vary for different hardware types)
Which I agree like the others are outside the scope of what this macro is meant to do.
So using the macro to facilitate auto-loading of modules isn't a very useful feature
for the QAT driver.

> > The use-case I'd addressed was for the script to identify and just
> > throw an error where dependent modules are missing.
> >
> 
> That doesn't really add much value then, since missing modules already result in
> errors when the PMD tries to initalize.
> 
> > I don't see a simple solution, but also don't see a strong need to find one.
> > Documentation and if necessary a driver-specific script seem sufficient to me.
> >
> > My conclusion is the RFC is a nice feature for some drivers, but if introduced
> needs
> > to be optional as it doesn't handle the complexities of all drivers.
> >
> 
> I agree its an optional export. If there are no dependencies, or if the author
> wishes to to simply not supply any, thats fine, the results will be in
> accordance with that, but I strongly disagree that its optional implies the fact
> that we can ignore the complexities of the depedencies that can be exported.
> 
> The more I think about it the more I like Stephens idea, possibly with some
> macro assistance.  That is to say:
> 
> 1) Start by loading hardware specific modules, the information for which is
> already available.  You can parse the pci table that a pmd exports and match it
> with the pci aliases retrieved via modinfo
> 
> 2) Load a special virt driver if no hardware is found on the system in (1).
> special virt drivers might be worth tagging with a VIRT/VFIO/UIO tag export for
> pmdinfo
> 
> That allows to set asside the complexities of our dependency chain, as we can
> assume hardware support modules will codify any real dependencies there, and
> a
> VIRT tag will let us find any modules needed for hardware the is assigned into
> our guest.
> 
> Neil
> 
> Neil
> 
> > > >
> > > > I ended up writing a script that went the other way.
> > > > First look at the hardware and load VFIO if IOMMU is available.
> > > > Then look for special driver needed for Xen and HyperV Lastly fallback
> > > > to loading igb_uio if no VFIO and PCI device present.
> > > >
> > > > In other words it is a system not driver issue.
> > > >
> > > That sounds like a reasonable approach, yes.
> > > Neil
> > >
> > > >
> >
  
Neil Horman Sept. 2, 2016, 2:15 p.m. UTC | #15
On Fri, Sep 02, 2016 at 01:52:46PM +0000, Trahe, Fiona wrote:
> 
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Friday, September 2, 2016 2:33 PM
> > To: Trahe, Fiona <fiona.trahe@intel.com>
> > Cc: Stephen Hemminger <stephen@networkplumber.org>; dev@dpdk.org;
> > Olivier Matz <olivier.matz@6wind.com>; Thomas Monjalon
> > <thomas.monjalon@6wind.com>
> > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod dependencies
> > in pmdinfo
> > 
> > On Fri, Sep 02, 2016 at 09:19:26AM +0000, Trahe, Fiona wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > > > Sent: Thursday, September 1, 2016 8:16 PM
> > > > To: Stephen Hemminger <stephen@networkplumber.org>
> > > > Cc: Trahe, Fiona <fiona.trahe@intel.com>; dev@dpdk.org; Olivier Matz
> > > > <olivier.matz@6wind.com>; Thomas Monjalon
> > > > <thomas.monjalon@6wind.com>
> > > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise kmod
> > > > dependencies in pmdinfo
> > > >
> > > > On Thu, Sep 01, 2016 at 10:41:22AM -0700, Stephen Hemminger wrote:
> > > > > On Thu, 1 Sep 2016 13:35:19 -0400
> > > > > Neil Horman <nhorman@tuxdriver.com> wrote:
> > > > >
> > > > > > On Thu, Sep 01, 2016 at 12:55:27PM +0000, Trahe, Fiona wrote:
> > > > > > > Hi Neil and Olivier,
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier
> > > > > > > > Matz
> > > > > > > > Sent: Wednesday, August 31, 2016 2:40 PM
> > > > > > > > To: Neil Horman <nhorman@tuxdriver.com>
> > > > > > > > Cc: dev@dpdk.org; thomas.monjalon@6wind.com
> > > > > > > > Subject: Re: [dpdk-dev] [dpdk-dev, RFC] drivers: advertise
> > > > > > > > kmod dependencies in pmdinfo
> > > > > > > >
> > > > > > > > Hi Neil,
> > > > > > > >
> > > > > > > > On 08/31/2016 03:27 PM, Neil Horman wrote:
> > > > > > > > > On Wed, Aug 31, 2016 at 11:21:18AM +0200, Olivier Matz wrote:
> > > > > > > > >> Hi Neil,
> > > > > > > > >>
> > > > > > > > >> On 08/30/2016 03:23 PM, Neil Horman wrote:
> > > > > > > > >>> On Fri, Aug 26, 2016 at 03:20:46PM +0200, Olivier Matz wrote:
> > > > > > > > >>>> Add a new macro DRIVER_REGISTER_KMOD_DEP() that allows
> > > > > > > > >>>> a driver to declare the list of kernel modules required to run
> > properly.
> > > > > > > > >>>>
> > > > > > > > >>>> Today, most PCI drivers require uio/vfio.
> > > > > > > > >>>>
> > > > > > > > >>>> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> > > > > > > > >>>>
> > > > > > > > >>>> ---
> > > > > > > > >>>> In this RFC, I supposed that all PCI drivers require a
> > > > > > > > >>>> the loading of a uio/vfio module (except mlx*), this may be
> > wrong.
> > > > > > > > >>>> Comments are welcome!
> > > > > > > > >>>>
> > > > > > > > >>>>
> > > > > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.c      |  1 +
> > > > > > > > >>>>  buildtools/pmdinfogen/pmdinfogen.h      |  1 +
> > > > > > > > >>>>  drivers/crypto/qat/rte_qat_cryptodev.c  |  2 ++
> > > > > > > > >>>>  drivers/net/bnx2x/bnx2x_ethdev.c        |  4 ++++
> > > > > > > > >>>>  drivers/net/bnxt/bnxt_ethdev.c          |  2 ++
> > > > > > > > >>>>  drivers/net/cxgbe/cxgbe_ethdev.c        |  2 ++
> > > > > > > > >>>>  drivers/net/e1000/em_ethdev.c           |  2 ++
> > > > > > > > >>>>  drivers/net/e1000/igb_ethdev.c          |  4 ++++
> > > > > > > > >>>>  drivers/net/ena/ena_ethdev.c            |  2 ++
> > > > > > > > >>>>  drivers/net/enic/enic_ethdev.c          |  2 ++
> > > > > > > > >>>>  drivers/net/fm10k/fm10k_ethdev.c        |  2 ++
> > > > > > > > >>>>  drivers/net/i40e/i40e_ethdev.c          |  2 ++
> > > > > > > > >>>>  drivers/net/i40e/i40e_ethdev_vf.c       |  2 ++
> > > > > > > > >>>>  drivers/net/ixgbe/ixgbe_ethdev.c        |  4 ++++
> > > > > > > > >>>>  drivers/net/mlx4/mlx4.c                 |  2 ++
> > > > > > > > >>>>  drivers/net/mlx5/mlx5.c                 |  3 +++
> > > > > > > > >>>>  drivers/net/nfp/nfp_net.c               |  2 ++
> > > > > > > > >>>>  drivers/net/qede/qede_ethdev.c          |  4 ++++
> > > > > > > > >>>>  drivers/net/szedata2/rte_eth_szedata2.c |  2 ++
> > > > > > > > >>>>  drivers/net/thunderx/nicvf_ethdev.c     |  2 ++
> > > > > > > > >>>>  drivers/net/virtio/virtio_ethdev.c      |  2 ++
> > > > > > > > >>>>  drivers/net/vmxnet3/vmxnet3_ethdev.c    |  2 ++
> > > > > > > > >>>>  lib/librte_eal/common/include/rte_dev.h | 14
> > ++++++++++++++
> > > > > > > > >>>>  tools/dpdk-pmdinfo.py                   |  5 ++++-
> > > > > > > > >>>>  24 files changed, 69 insertions(+), 1 deletion(-)
> > > > > > > > >>>>
> > > > > > > > >>>
> > > > > > > > >>> Generally speaking, I like the idea, it makes sense to
> > > > > > > > >>> me in terms of using pmdinfo to export this information
> > > > > > > > >>>
> > > > > > > > >>> That said, This may need to be a set of macros.  By that
> > > > > > > > >>> I mean (and correct
> > > > > > > > me
> > > > > > > > >>> if I'm wrong here), but the relationship between pmd's
> > > > > > > > >>> and kernel modules
> > > > > > > > is in
> > > > > > > > >>> some cases, more complex than a 'requires' or 'depends'
> > > > > > > > >>> relationship.  That
> > > > > > > > is
> > > > > > > > >>> to say, some pmd may need user space hardware access,
> > > > > > > > >>> but can use either
> > > > > > > > uio OR
> > > > > > > > >>> vfio, but doesn't need both, and can continue to
> > > > > > > > >>> function if only one is available.  Other PMD's may be
> > > > > > > > >>> able to use vfio or uio, but can still function without
> > > > > > > > >>> either.  And some, as your patch implements, simply
> > > > > > > > >>> require one or
> > > > > > > > the
> > > > > > > > >>> other to function.  As such it seems like you may want a
> > > > > > > > >>> few macros, in the
> > > > > > > > form
> > > > > > > > >>> of:
> > > > > > > > >>>
> > > > > > > > >>> DRIVER_REGISTER_KMOD_REQUEST - List of modules to
> > > > > > > > >>> attempt loading,
> > > > > > > > ignore any
> > > > > > > > >>> failures
> > > > > > > > >>> DRIVER_REGISTER_KMOD_REQUIRE - List of modules required
> > > > > > > > >>> to be
> > > > > > > > loaded after
> > > > > > > > >>> request macro completes, fail if any are not loaded
> > > > > > > > >>>
> > > > > > > > >>> Thats just spitballing, mind you, theres probably a
> > > > > > > > >>> better way to do it, but
> > > > > > > > the
> > > > > > > > >>> idea is to list a set of modules you would like to have,
> > > > > > > > >>> and then create a parsable syntax to describe the
> > > > > > > > >>> modules that need to be loaded after the
> > > > > > > > request
> > > > > > > > >>> is complete so that you can accurately codify the
> > > > > > > > >>> situations I described
> > > > > > > > above.
> > > > > > > > >>
> > > > > > > > >> Thank you for your feedback.
> > > > > > > > >> However, I'm not sure I'm perfectly getting what you suggest.
> > > > > > > > >>
> > > > > > > > >> Do you think some PMDs could request a kernel module
> > > > > > > > >> without really requiring it? Do you have an example in mind?
> > > > > > > > >>
> > > > > > > > > Yes, thats precisely it.  The most clear example I could
> > > > > > > > > think of (though I'm not sure if any pmd currently
> > > > > > > > > supports this), is a pmd that supports both UIO and VFIO
> > > > > > > > > communication with the kernel.  Such a PMD requires that
> > > > > > > > > one of
> > > > > > > > those
> > > > > > > > > two modules be loaded, but only one (i.e. both are not
> > > > > > > > > required), so if only
> > > > > > > > the
> > > > > > > > > uio kernel module loads is a success case, likewise if
> > > > > > > > > only the vfio module loads can be treated as success.
> > > > > > > > > Both loading are clearly successful.  Only if neither load
> > > > > > > > > do we have a failure case.  I'm suggesting that the
> > > > > > > > > grammer that your exports define should take those cases
> > > > > > > > > into account.  Its not always as
> > > > simple as "I must have the following modules"
> > > > > > > > >
> > > > > > > > >> The syntax I've submitted lets you define several lists
> > > > > > > > >> of modules, so that the user or the script that starts
> > > > > > > > >> the application can decide which kmod list is better
> > > > > > > > >> according to the
> > > > environment.
> > > > > > > > >>
> > > > > > > > > If you have a human intervening in the module load
> > > > > > > > > process, sure, then its
> > > > > > > > fine.
> > > > > > > > > But it seems that this particular feature that you're
> > > > > > > > > implemnting might have automated uses.  That is to say the
> > > > > > > > > dpdk core library might be interested in parsing this
> > > > > > > > > particular information to direct module autoloading, and
> > > > > > > > > if thats desireable then you need to define these lists
> > > > > > > > > such that you can
> > > > codify failure and success conditions.
> > > > > > > > >
> > > > > > > > >> For example, most drivers will advertise
> > > > > > > > >> "uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci", and the
> > > > > > > > >> user or script will have to choose between loading:
> > > > > > > > >> - uio igb_uio
> > > > > > > > >> - uio uio_pci_generic
> > > > > > > > >> - vfio vfio-pci
> > > > > > > > >>
> > > > > > > > > Oh, I see, so your list is a colon delimited list of
> > > > > > > > > module load sets, where at least one set must succeed by
> > > > > > > > > loading all modules in its set, but the failure of any one
> > > > > > > > > set isn't fatal to the
> > > > process?  e.g. a string like this:
> > > > > > > > >
> > > > > > > > > uio,igb_uio:vfio,vfio-pci
> > > > > > > > >
> > > > > > > > > could be interpreted to mean "I must load (uio AND
> > > > > > > > > igb_uio) OR (vfio AND vfio-pci).  If the evaluation of
> > > > > > > > > that statement results in false, then the operation fails, otherwise
> > it succedes.
> > > > > > > > >
> > > > > > > > > If thats the case, then, apologies, we're on the same
> > > > > > > > > page, and this will work just fine.
> > > > > > > >
> > > > > > > > Yep, that's the idea.
> > > > > > > >
> > > > > > > > Colon and commas are the best separators I've thought about,
> > > > > > > > but any idea to make the syntax clearer is welcome ;)
> > > > > > > >
> > > > > > > > Maybe a syntax like is clearer:
> > > > > > > >   "(mod1 & mod2)|(mod3 & mod4)" ?
> > > > > > > > But it would let the user think that more complex
> > > > > > > > expressions are valid, like "(mod1 & (mod2 | mod3)) | mod4",
> > > > > > > > which is probably
> > > > overkill.
> > > > > > > >
> > > > > > > > Regards,
> > > > > > > > Olivier
> > > > > > >
> > > > > > > This RFC seems like a good idea - and something the Intel
> > > > > > > QuickAssist PMD
> > > > could benefit from.
> > > > > > > However the (mod1 & mod2) can handle the QAT case better in my
> > > > opinion.
> > > > > > > i.e.
> > > > > > > as well as needing one of
> > > > > > > * uio igb_uio
> > > > > > > * uio uio_pci_generic
> > > > > > > * vfio vfio-pci
> > > > > > > QAT PMD also needs one of (depending on which physical device
> > > > > > > is
> > > > > > > plugged)
> > > > > > >  * qat_dh895xcc
> > > > > > >  * qat_c62x
> > > > > > >  * qat_c3xxx
> > > > > > >
> > > > > > > So the original syntax would result in a very long list of possible
> > variations.
> > > > > > > What really reflects the dependencies would be ((uio &
> > > > > > > igb_uio) | (uio & uio_pci_generic) | (vfio & vfio_pci)) &
> > > > > > > (qat_dh895xcc | qat_c62x | qat_c3xxx)
> > > > > > >
> > > > > > Ah, I didn't consider that hardware specifics might create a use
> > > > > > case where a pmd must have one or more kernel modules available
> > > > > > for hw support.  Perhaps it is worthwhile to automate hardware
> > > > > > support - that is to say, any module loading script should
> > > > > > automatically look at the pci table exported from a pmd, and, if
> > > > > > found, load any modules that claim support for that
> > > > > > device:vendor tuple?  Though that might break in the case of
> > > > > > uio, if there are separate driver modules that
> > > > support native hardware and uio access.
> > >
> > > Actually if the script output was intended to be used to auto-load
> > > dependent kmods, then even the above would not suffice for the QAT
> > > driver (and presumably for other PMDs with specific HW dependencies).
> > > i.e. the qat_dhxxxx modules have further dependencies themselves on an
> > > intel_qat module, and there are other steps documented in the
> > But any dependency chain such as what you describe is covered in the next step
> > of the chain.  That is to say if the qat pmd has a hardware dependency on
> > qat_dhxxx (or qat_cxxx, etc), and those modules depend on intel_qat, the pmd
> > doesn't need to know that, because qat_dhxxx and companions should all list
> > intel_qat as a dependency that modprobe will resolve when installing the kernel
> > module.
> > 
> > > guide which must be taken after loading the kmods.
> > I'm not sure what you mean by this.  Are you referring to the qat
> > documentation that comes with the DPDK?  I only see three additional items
> > there to address
> > 
> > 1) Removing other modules when using the 01.org kernel modules
> > 
> > 2) installation of firmware
> > 
> > 3) Binding of the device to user space for VFIO/UIO
> > 
> > All three of these tasks fall outside the scope of what this macro is meant to do.
> > We could try to create macros for them to export information for use in a
> > loading script if you like, but I wouldn't.  All three of the above items fall in my
> > mind under the category of administrative responsibilities.  That is to say, they
> > are orthogonoal to defining a module dependency structure, and if they're
> > arent properly completed, the module dependency chain won't matter anyway.
> > 
> 
> Another manual step is documented, which must be done after insmod: 
> echo 32 > /sys/bus/pci/drivers/dh895xcc/0000\:03\:00.0/sriov_numvfs
> (steps will vary for different hardware types)
> Which I agree like the others are outside the scope of what this macro is meant to do.
> So using the macro to facilitate auto-loading of modules isn't a very useful feature
> for the QAT driver.
> 
Ok, but by that logic then, defining any sort of dependency chain isn't really
usefull at all.  If you need a custom per-driver script to load the right
modules and set their parameters correctly, you may as well just codify the
dependencies in the script, and leave the pmd out of it entirely.

Note I don't agree with that stance, It just seems to me like the most
reasonable conclusion to draw from your argument.  If your intent is to just
provide a hint to some hardware specific script as to what modules to load, and
the sript then needs to still know independently that it has to set some
site/usecase specific parameters based on which modules were loaded, you may as
well just move the dependency map into that script so its all in one place.

> > > The use-case I'd addressed was for the script to identify and just
> > > throw an error where dependent modules are missing.
> > >
> > 
> > That doesn't really add much value then, since missing modules already result in
> > errors when the PMD tries to initalize.
> > 
> > > I don't see a simple solution, but also don't see a strong need to find one.
> > > Documentation and if necessary a driver-specific script seem sufficient to me.
> > >
> > > My conclusion is the RFC is a nice feature for some drivers, but if introduced
> > needs
> > > to be optional as it doesn't handle the complexities of all drivers.
> > >
> > 
> > I agree its an optional export. If there are no dependencies, or if the author
> > wishes to to simply not supply any, thats fine, the results will be in
> > accordance with that, but I strongly disagree that its optional implies the fact
> > that we can ignore the complexities of the depedencies that can be exported.
> > 
> > The more I think about it the more I like Stephens idea, possibly with some
> > macro assistance.  That is to say:
> > 
> > 1) Start by loading hardware specific modules, the information for which is
> > already available.  You can parse the pci table that a pmd exports and match it
> > with the pci aliases retrieved via modinfo
> > 
> > 2) Load a special virt driver if no hardware is found on the system in (1).
> > special virt drivers might be worth tagging with a VIRT/VFIO/UIO tag export for
> > pmdinfo
> > 
> > That allows to set asside the complexities of our dependency chain, as we can
> > assume hardware support modules will codify any real dependencies there, and
> > a
> > VIRT tag will let us find any modules needed for hardware the is assigned into
> > our guest.
> > 
> > Neil
> > 
> > Neil
> > 
> > > > >
> > > > > I ended up writing a script that went the other way.
> > > > > First look at the hardware and load VFIO if IOMMU is available.
> > > > > Then look for special driver needed for Xen and HyperV Lastly fallback
> > > > > to loading igb_uio if no VFIO and PCI device present.
> > > > >
> > > > > In other words it is a system not driver issue.
> > > > >
> > > > That sounds like a reasonable approach, yes.
> > > > Neil
> > > >
> > > > >
> > >
>
  
Olivier Matz Sept. 2, 2016, 4:13 p.m. UTC | #16
Hi,

Thank you everyone for all these comments. I'll try to summarize them here:

a- the kmod information should be per-device instead of per-driver
(modalias-like)
b- there is no need to specify kmod dependencies (i.e. "uio_pci_generic"
is enough, not "uio & uio_pci_generic") since it is available via modinfo
c- the syntax with commas and colons is not ideal, we have no better
consensus for now, but maybe "&" and "|" are better.
d- the information provided is not complete:
   - some drivers requires module parameters or writing is /sys
   - choosing between one kmod or another is a system choice
  -> this info could be optional for this reason
e- for usual drivers having a "<generic-uio>" tag instead of a kmod list
may be more simple (I'm not sure of it though)


So we could either:

- drop this RFC, and like today everyone shoule handle the loading of
kmods on its own

- keep the approach of this RFC, enhance it with a- b- c- e-

   Examples of string to manage specify the devices wildcards:
     /* kmod list for all pci devices */
     "pci:v*:d*:sv*:sd*:vfio|uio_pci_generic|igb_uio"
     /* if we need device-specific modules: */
     "pci:v8086:d1234:sv*:sd*:some_kmod"
     "pci:v8086:d4321:sv*:sd*:some_other_kmod"

   this is not a perfect solution, but I think it would make the
   development of an kmod autoload script easier.


Any other idea?

Thanks,
Olivier
  

Patch

diff --git a/buildtools/pmdinfogen/pmdinfogen.c b/buildtools/pmdinfogen/pmdinfogen.c
index e1bf2e4..1e5b6f3 100644
--- a/buildtools/pmdinfogen/pmdinfogen.c
+++ b/buildtools/pmdinfogen/pmdinfogen.c
@@ -269,6 +269,7 @@  struct opt_tag {
 
 static const struct opt_tag opt_tags[] = {
 	{"_param_string_export", "params"},
+	{"_kmod_dep_export", "kmod"},
 };
 
 static int complete_pmd_entry(struct elf_info *info, struct pmd_driver *drv)
diff --git a/buildtools/pmdinfogen/pmdinfogen.h b/buildtools/pmdinfogen/pmdinfogen.h
index 1da2966..2fab2aa 100644
--- a/buildtools/pmdinfogen/pmdinfogen.h
+++ b/buildtools/pmdinfogen/pmdinfogen.h
@@ -85,6 +85,7 @@  else \
 
 enum opt_params {
 	PMD_PARAM_STRING = 0,
+	PMD_KMOD_DEP,
 	PMD_OPT_MAX
 };
 
diff --git a/drivers/crypto/qat/rte_qat_cryptodev.c b/drivers/crypto/qat/rte_qat_cryptodev.c
index 82ab047..fc62be9 100644
--- a/drivers/crypto/qat/rte_qat_cryptodev.c
+++ b/drivers/crypto/qat/rte_qat_cryptodev.c
@@ -135,4 +135,6 @@  static struct rte_driver pmd_qat_drv = {
 
 PMD_REGISTER_DRIVER(pmd_qat_drv, CRYPTODEV_NAME_QAT_SYM_PMD);
 DRIVER_REGISTER_PCI_TABLE(CRYPTODEV_NAME_QAT_SYM_PMD, pci_id_qat_map);
+DRIVER_REGISTER_KMOD_DEP(CRYPTODEV_NAME_QAT_SYM_PMD,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 
diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c
index f3ab355..ba8831a 100644
--- a/drivers/net/bnx2x/bnx2x_ethdev.c
+++ b/drivers/net/bnx2x/bnx2x_ethdev.c
@@ -667,5 +667,9 @@  static struct rte_driver rte_bnx2xvf_driver = {
 
 PMD_REGISTER_DRIVER(rte_bnx2x_driver, bnx2x);
 DRIVER_REGISTER_PCI_TABLE(bnx2x, pci_id_bnx2x_map);
+DRIVER_REGISTER_KMOD_DEP(bnx2x,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 PMD_REGISTER_DRIVER(rte_bnx2xvf_driver, bnx2xvf);
 DRIVER_REGISTER_PCI_TABLE(bnx2xvf, pci_id_bnx2xvf_map);
+DRIVER_REGISTER_KMOD_DEP(bnx2xvf,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 3795fac..5c6c7b5 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1068,3 +1068,5 @@  static struct rte_driver bnxt_pmd_drv = {
 
 PMD_REGISTER_DRIVER(bnxt_pmd_drv, bnxt);
 DRIVER_REGISTER_PCI_TABLE(bnxt, bnxt_pci_id_map);
+DRIVER_REGISTER_KMOD_DEP(bnxt,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
index 9208a61..cea2741 100644
--- a/drivers/net/cxgbe/cxgbe_ethdev.c
+++ b/drivers/net/cxgbe/cxgbe_ethdev.c
@@ -1068,4 +1068,6 @@  static struct rte_driver rte_cxgbe_driver = {
 
 PMD_REGISTER_DRIVER(rte_cxgbe_driver, cxgb4);
 DRIVER_REGISTER_PCI_TABLE(cxgb4, cxgb4_pci_tbl);
+DRIVER_REGISTER_KMOD_DEP(cxgb4,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 
diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index ad104ed..bd0d0ea 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -1806,3 +1806,5 @@  struct rte_driver em_pmd_drv = {
 
 PMD_REGISTER_DRIVER(em_pmd_drv, em);
 DRIVER_REGISTER_PCI_TABLE(em, pci_id_em_map);
+DRIVER_REGISTER_KMOD_DEP(em,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 4e9e6a3..a3dfbfe 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -5257,5 +5257,9 @@  eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
 
 PMD_REGISTER_DRIVER(pmd_igb_drv, igb);
 DRIVER_REGISTER_PCI_TABLE(igb, pci_id_igb_map);
+DRIVER_REGISTER_KMOD_DEP(igb,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 PMD_REGISTER_DRIVER(pmd_igbvf_drv, igbvf);
 DRIVER_REGISTER_PCI_TABLE(igbvf, pci_id_igbvf_map);
+DRIVER_REGISTER_KMOD_DEP(igbvf,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index ac0803d..a45d60c 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -1709,3 +1709,5 @@  struct rte_driver ena_pmd_drv = {
 
 PMD_REGISTER_DRIVER(ena_pmd_drv, ena);
 DRIVER_REGISTER_PCI_TABLE(ena, pci_id_ena_map);
+DRIVER_REGISTER_KMOD_DEP(ena,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c
index 47b07c9..a1b8abc 100644
--- a/drivers/net/enic/enic_ethdev.c
+++ b/drivers/net/enic/enic_ethdev.c
@@ -642,3 +642,5 @@  static struct rte_driver rte_enic_driver = {
 
 PMD_REGISTER_DRIVER(rte_enic_driver, enic);
 DRIVER_REGISTER_PCI_TABLE(enic, pci_id_enic_map);
+DRIVER_REGISTER_KMOD_DEP(enic,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 01f4a72..391ccd7 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -3086,3 +3086,5 @@  static struct rte_driver rte_fm10k_driver = {
 
 PMD_REGISTER_DRIVER(rte_fm10k_driver, fm10k);
 DRIVER_REGISTER_PCI_TABLE(fm10k, pci_id_fm10k_map);
+DRIVER_REGISTER_KMOD_DEP(fm10k,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index d0aeb70..a1466aa 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -723,6 +723,8 @@  static struct rte_driver rte_i40e_driver = {
 
 PMD_REGISTER_DRIVER(rte_i40e_driver, i40e);
 DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map);
+DRIVER_REGISTER_KMOD_DEP(i40e,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 
 /*
  * Initialize registers for flexible payload, which should be set by NVM.
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index a616ae0..61be44a 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -1586,6 +1586,8 @@  static struct rte_driver rte_i40evf_driver = {
 
 PMD_REGISTER_DRIVER(rte_i40evf_driver, i40evf);
 DRIVER_REGISTER_PCI_TABLE(i40evf, pci_id_i40evf_map);
+DRIVER_REGISTER_KMOD_DEP(i40evf,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 
 static int
 i40evf_dev_configure(struct rte_eth_dev *dev)
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index fb618ef..e353d7a 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -7421,5 +7421,9 @@  static struct rte_driver rte_ixgbevf_driver = {
 
 PMD_REGISTER_DRIVER(rte_ixgbe_driver, ixgbe);
 DRIVER_REGISTER_PCI_TABLE(ixgbe, pci_id_ixgbe_map);
+DRIVER_REGISTER_KMOD_DEP(ixgbe,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 PMD_REGISTER_DRIVER(rte_ixgbevf_driver, ixgbevf);
 DRIVER_REGISTER_PCI_TABLE(ixgbevf, pci_id_ixgbevf_map);
+DRIVER_REGISTER_KMOD_DEP(ixgbevf,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 304c846..d8f6905 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -5948,3 +5948,5 @@  static struct rte_driver rte_mlx4_driver = {
 
 PMD_REGISTER_DRIVER(rte_mlx4_driver, mlx4);
 DRIVER_REGISTER_PCI_TABLE(mlx4, mlx4_pci_id_map);
+DRIVER_REGISTER_KMOD_DEP(mlx4,
+	"ib_uverbs,mlx4_core,mlx4_en,mlx4_ib");
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d96a9af..29d7332 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -763,3 +763,6 @@  static struct rte_driver rte_mlx5_driver = {
 
 PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5);
 DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map);
+DRIVER_REGISTER_KMOD_DEP(mlx5,
+	"ptp,inet_lro,ib_sa,ib_mad,ib_netlink,ib_addr,"
+	"ib_core,ib_uverbs,mlx5_core,mlx5_ib");
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index 82e3e4e..f4c8a39 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -2488,6 +2488,8 @@  static struct rte_driver rte_nfp_net_driver = {
 
 PMD_REGISTER_DRIVER(rte_nfp_net_driver, nfp);
 DRIVER_REGISTER_PCI_TABLE(nfp, pci_id_nfp_net_map);
+DRIVER_REGISTER_KMOD_DEP(nfp,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 
 /*
  * Local variables:
diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 82e44b8..a3c6b44 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -1530,5 +1530,9 @@  static struct rte_driver rte_qede_driver = {
 
 PMD_REGISTER_DRIVER(rte_qede_driver, qede);
 DRIVER_REGISTER_PCI_TABLE(qede, pci_id_qede_map);
+DRIVER_REGISTER_KMOD_DEP(qede,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
 PMD_REGISTER_DRIVER(rte_qedevf_driver, qedevf);
 DRIVER_REGISTER_PCI_TABLE(qedevf, pci_id_qedevf_map);
+DRIVER_REGISTER_KMOD_DEP(qedevf,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/szedata2/rte_eth_szedata2.c b/drivers/net/szedata2/rte_eth_szedata2.c
index 483d789..409e71f 100644
--- a/drivers/net/szedata2/rte_eth_szedata2.c
+++ b/drivers/net/szedata2/rte_eth_szedata2.c
@@ -1602,3 +1602,5 @@  static struct rte_driver rte_szedata2_driver = {
 
 PMD_REGISTER_DRIVER(rte_szedata2_driver, RTE_SZEDATA2_DRIVER_NAME);
 DRIVER_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table);
+DRIVER_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/thunderx/nicvf_ethdev.c b/drivers/net/thunderx/nicvf_ethdev.c
index 4f875c0..8c33df2 100644
--- a/drivers/net/thunderx/nicvf_ethdev.c
+++ b/drivers/net/thunderx/nicvf_ethdev.c
@@ -1785,3 +1785,5 @@  static struct rte_driver rte_nicvf_driver = {
 
 PMD_REGISTER_DRIVER(rte_nicvf_driver, thunderx_nicvf);
 DRIVER_REGISTER_PCI_TABLE(thunderx_nicvf, pci_id_nicvf_map);
+DRIVER_REGISTER_KMOD_DEP(thunderx_nicvf,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 07d6449..f65b9a4 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1570,3 +1570,5 @@  static struct rte_driver rte_virtio_driver = {
 
 PMD_REGISTER_DRIVER(rte_virtio_driver, virtio_net);
 DRIVER_REGISTER_PCI_TABLE(virtio_net, pci_id_virtio_map);
+DRIVER_REGISTER_KMOD_DEP(virtio_net,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index 5874215..d2d07ad 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -955,3 +955,5 @@  static struct rte_driver rte_vmxnet3_driver = {
 
 PMD_REGISTER_DRIVER(rte_vmxnet3_driver, vmxnet3);
 DRIVER_REGISTER_PCI_TABLE(vmxnet3, pci_id_vmxnet3_map);
+DRIVER_REGISTER_KMOD_DEP(vmxnet3,
+	"uio,igb_uio:uio,uio_pci_generic:vfio,vfio-pci");
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 95789f9..b721dc3 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -203,6 +203,20 @@  RTE_STR(table)
 static const char DRV_EXP_TAG(name, param_string_export)[] \
 __attribute__((used)) = str
 
+/**
+ * Advertise the list of kernel modules required to run this driver
+ *
+ * This string list the name of kernel modules, separated by commas. The
+ * order is important. If several modules lists are possible, they are
+ * separated by colons.
+ *
+ * Example: "uio,igb_uio:uio,uio_pci_generic" means either "uio,igb_uio"
+ * or "uio,uio_pci_generic".
+ */
+#define DRIVER_REGISTER_KMOD_DEP(name, str) \
+static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
+__attribute__((used)) = str
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tools/dpdk-pmdinfo.py b/tools/dpdk-pmdinfo.py
index 3db9819..17bfed4 100755
--- a/tools/dpdk-pmdinfo.py
+++ b/tools/dpdk-pmdinfo.py
@@ -312,7 +312,10 @@  class ReadElf(object):
         global raw_output
         global pcidb
 
-        optional_pmd_info = [{'id': 'params', 'tag': 'PMD PARAMETERS'}]
+        optional_pmd_info = [
+            {'id': 'params', 'tag': 'PMD PARAMETERS'},
+            {'id': 'kmod', 'tag': 'PMD KMOD DEPENDENCIES'}
+        ]
 
         i = mystring.index("=")
         mystring = mystring[i + 2:]