[dpdk-dev,v1,11/12] lpm/arm: implement rte_lpm_lookupx4 using rte_lpm_lookup_bulk on for-x86
Commit Message
From: Vlastimil Kosar <kosar@rehivetech.com>
LPM function rte_lpm_lookupx4() uses i686/x86_64 SIMD intrinsics. Therefore,
the function is reimplemented using non-vector operations for non-x86
architectures. In the future, each architecture should have vectorized code.
This patch includes rudimentary emulation of intrinsic functions _mm_set_epi32(),
_mm_loadu_si128() and _mm_load_si128() for easy portability of existing
applications.
LPM builds now when on ARM.
FIXME: to be reworked
Signed-off-by: Vlastimil Kosar <kosar@rehivetech.com>
Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
---
config/defconfig_arm-armv7-a-linuxapp-gcc | 1 -
lib/librte_lpm/rte_lpm.h | 71 +++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+), 1 deletion(-)
@@ -56,7 +56,6 @@ CONFIG_XMM_SIZE=16
# fails to compile on ARM
CONFIG_RTE_LIBRTE_ACL=n
-CONFIG_RTE_LIBRTE_LPM=n
# cannot use those on ARM
CONFIG_RTE_KNI_KMOD=n
@@ -47,7 +47,9 @@
#include <rte_byteorder.h>
#include <rte_memory.h>
#include <rte_common.h>
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
#include <rte_vect.h>
+#endif
#ifdef __cplusplus
extern "C" {
@@ -369,6 +371,7 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
return 0;
}
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
/* Mask four results. */
#define RTE_LPM_MASKX4_RES UINT64_C(0x00ff00ff00ff00ff)
@@ -483,6 +486,74 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[2] : defv;
hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[3] : defv;
}
+#else
+// TODO: this code should be reworked.
+
+typedef struct {
+ union uint128 {
+ uint8_t uint8[16];
+ uint32_t uint32[4];
+ } val;
+} __m128i;
+
+static inline __m128i
+_mm_set_epi32(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3)
+{
+ __m128i res;
+ res.val.uint32[0] = v0;
+ res.val.uint32[1] = v1;
+ res.val.uint32[2] = v2;
+ res.val.uint32[3] = v3;
+ return res;
+}
+
+static inline __m128i
+_mm_loadu_si128(__m128i * v)
+{
+ __m128i res;
+ res = *v;
+ return res;
+}
+
+static inline __m128i
+_mm_load_si128(__m128i * v)
+{
+ __m128i res;
+ res = *v;
+ return res;
+}
+
+/**
+ * Lookup four IP addresses in an LPM table.
+ *
+ * @param lpm
+ * LPM object handle
+ * @param ip
+ * Four IPs to be looked up in the LPM table
+ * @param hop
+ * Next hop of the most specific rule found for IP (valid on lookup hit only).
+ * This is an 4 elements array of two byte values.
+ * If the lookup was succesfull for the given IP, then least significant byte
+ * of the corresponding element is the actual next hop and the most
+ * significant byte is zero.
+ * If the lookup for the given IP failed, then corresponding element would
+ * contain default value, see description of then next parameter.
+ * @param defv
+ * Default value to populate into corresponding element of hop[] array,
+ * if lookup would fail.
+ */
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
+ uint16_t defv)
+{
+ rte_lpm_lookup_bulk(lpm, ip.val.uint32, hop, 4);
+
+ hop[0] = (hop[0] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[0] : defv;
+ hop[1] = (hop[1] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[1] : defv;
+ hop[2] = (hop[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[2] : defv;
+ hop[3] = (hop[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)hop[3] : defv;
+}
+#endif
#ifdef __cplusplus
}