#ifndef PCAPPP_DPDK_DEVICE #define PCAPPP_DPDK_DEVICE // GCOVR_EXCL_START #include #include #include #include "MacAddress.h" #include "SystemUtils.h" #include "Device.h" #include "MBufRawPacket.h" /** * @file * This file and DpdkDeviceList.h provide PcapPlusPlus C++ wrapper for DPDK (stands for data-plan development kit). What is * DPDK? as quoting from http://dpdk.org: "DPDK is a set of libraries and drivers for fast packet processing... These libraries can be used to: * receive and send packets within the minimum number of CPU cycles (usually less than 80 cycles)... develop fast packet capture algorithms * (tcpdump-like)... run third-party fast path stacks... Some packet processing functions have been benchmarked up to hundreds million * frames per second, using 64-byte packets with a PCIe NIC"
* As DPDK API is written in C, PcapPlusPlus wraps the main functionality in a C++ easy-to-use classes which should have minimum affect * on performance and packet processing rate. In addition it brings DPDK to the PcapPlusPlus framework and API so you can use DPDK * together with other PcapPlusPlus features such as packet parsing and editing, etc.
* So how DPDK basically works? in order to boost packet processing performance on a commodity server DPDK is bypassing the Linux kernel. * All the packet processing activity happens in the user space so basically packets are delivered from NIC hardware queues directly * to user-space shared memory without going through the kernel. In addition DPDK uses polling instead of handling interrupts for each * arrived packet (as interrupts create some delays). Other methods to boost packets processing implemented by DPDK are using Hugepages to * decrease the size of TLB that results in a much faster virtual to physical page conversion, thread affinity to bind threads to a * specific core, lock-free user-space multi-core synchronization using rings data structures and NUMA awareness to avoid expensive data * transfers between sockets.
* Not every NIC supports kernel-bypass capabilities so DPDK cannot work with any NIC. The list of supported NICs are in DPDK's web-site * http://dpdk.org/doc/nics . For each such NIC the DPDK framework provides a module that called poll-mode-driver (PMD in short) that * enables this NIC to the working with DPDK. PcapPlusPlus wasn't tested with most PMDs but all of them should theoretically work as * PcapPlusPlus doesn't change the PMD behavior
* DPDK has another basic data-structure called mbuf. An mbuf is DPDK wrapper struct for network packets. When working with packets * in DPDK you actually work with mbufs. The mbuf contains the packet data (obviously) but also some metadata on the packet such * as the DPDK port it was captured on, packet ref-count (which allows it to be referenced by several objects), etc. One important * concept is that DPDK doesn't allocate mbufs on-the-fly but uses mbuf pools. These pools is allocated on application startup and * used throughout the application. The goal of this, of course, is increasing packet processing performance as allocating memory has * its cost. So pool size is important and varies between applications. For example: an application that stores packets in memory * has to have a large pool of mbufs so mbufs doesn't run-out. PcapPlusPlus enables to choose the pool size at startup
*
* PcapPlusPlus main wrapper classes for DPDK are: * - DpdkDevice - a class that wraps a DPDK port and provides all capabilities of receiving and sending packets to this port * - DpdkDeviceList - a singleton class that initializes the DPDK infrastructure and creates DpdkDevice instances to all available ports. * In addition it allows starting and stopping of worker threads * - MBufRawPacket - a child class to RawPacket which customizes it for working with mbuf * - In addition PcapPlusPlus provides a shell script to initialize DPDK prerequisites: setup_dpdk.py. This is an easy-to-use script * that sets up huge-pages, loads DPDK kernel module and sets up the NICs that will be used by DPDK. This script must run before an * application that uses DPDK runs. If you forgot to run it the application will fail with an appropriate error that will remind * * DPDK initialization using PcapPlusPlus: * - Before application runs: run the setup_dpdk.py script * - On application startup call DpdkDeviceList#initDpdk() static method to initialize DPDK infrastructure and DpdkDevice instances * - Open the relevant DpdkDevice(s) * - Send & receive packets... */ struct rte_mbuf; struct rte_mempool; struct rte_eth_conf; struct rte_eth_dev_tx_buffer; /** * \namespace pcpp * \brief The main namespace for the PcapPlusPlus lib */ namespace pcpp { #define DPDK_MAX_RX_QUEUES 16 #define DPDK_MAX_TX_QUEUES 16 #define PCPP_RSS_HASH_MAGIC_NUMBER 0x123456 class DpdkDeviceList; class DpdkDevice; /** * An enum describing all PMD (poll mode driver) types supported by DPDK. For more info about these PMDs please visit the DPDK web-site */ enum DpdkPMDType { /** Unknown PMD type */ PMD_UNKNOWN, /** Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple (slave) NICs into a single logical interface*/ PMD_BOND, /** Intel E1000 PMD */ PMD_E1000EM, /** Intel 1GbE PMD */ PMD_IGB, /** Intel 1GbE virtual function PMD */ PMD_IGBVF, /** Cisco enic (UCS Virtual Interface Card) PMD */ PMD_ENIC, /** Intel fm10k PMD */ PMD_FM10K, /** Intel 40GbE PMD */ PMD_I40E, /** Intel 40GbE virtual function PMD */ PMD_I40EVF, /** Intel 10GbE PMD */ PMD_IXGBE, /** Intel 10GbE virtual function PMD */ PMD_IXGBEVF, /** Mellanox ConnectX-3, ConnectX-3 Pro PMD */ PMD_MLX4, /** Null PMD */ PMD_NULL, /** pcap file PMD */ PMD_PCAP, /** ring-based (memory) PMD */ PMD_RING, /** VirtIO PMD */ PMD_VIRTIO, /** VMWare VMXNET3 PMD */ PMD_VMXNET3, /** Xen Project PMD */ PMD_XENVIRT, /** AF_PACKET PMD */ PMD_AF_PACKET }; /** * @typedef OnDpdkPacketsArriveCallback * A callback that is called when a burst of packets are captured by DpdkDevice * @param[in] packets A pointer to an array of MBufRawPacket * @param[in] numOfPackets The length of the array * @param[in] threadId The thread/core ID who captured the packets * @param[in] device A pointer to the DpdkDevice who captured the packets * @param[in] userCookie The user cookie assigned by the user in DpdkDevice#startCaptureSingleThread() or DpdkDevice#startCaptureMultiThreads */ typedef void (*OnDpdkPacketsArriveCallback)(MBufRawPacket* packets, uint32_t numOfPackets, uint8_t threadId, DpdkDevice* device, void* userCookie); /** * @class DpdkDevice * Encapsulates a DPDK port and enables receiving and sending packets using DPDK as well as getting interface info & status, packet * statistics, etc. This class has no public c'tor as it's constructed by DpdkDeviceList during initialization.
* * __RX/TX queues__: modern NICs provide hardware load-balancing for packets. This means that each packet received by the NIC is hashed * by one or more parameter (IP address, port, etc.) and goes into one of several RX queues provided by the NIC. This enables * applications to work in a multi-core environment where each core can read packets from different RX queue(s). Same goes for TX * queues: it's possible to write packets to different TX queues and the NIC is taking care of sending them to the network. * Different NICs provide different number of RX and TX queues. DPDK supports this capability and enables the user to open the * DPDK port (DpdkDevice) with a single or multiple RX and TX queues. When receiving packets the user can decide from which RX queue * to read from, and when transmitting packets the user can decide to which TX queue to send them to. RX/TX queues are configured * when opening the DpdkDevice (see openMultiQueues())
* * __Capturing packets__: there are two ways to capture packets using DpdkDevice: * - using worker threads (see DpdkDeviceList#startDpdkWorkerThreads() ). When using this method the worker should use the * DpdkDevice#receivePackets() methods to get packets from the DpdkDevice * - by setting a callback which is invoked each time a burst of packets arrives. For more details see * DpdkDevice#startCaptureSingleThread() * * __Sending packets:__ DpdkDevice has various methods for sending packets. They enable sending raw packets, parsed packets, etc. * for all opened TX queues. Also, DPDK provides an option to buffer TX packets and send them only when reaching a certain threshold (you * can read more about it here: http://dpdk.org/doc/api/rte__ethdev_8h.html#a0e941a74ae1b1b886764bc282458d946). DpdkDevice supports that * option as well. See DpdkDevice#sendPackets()
* * __Get interface info__: DpdkDevice provides all kind of information on the interface/device such as MAC address, MTU, link status, * PCI address, PMD (poll-mode-driver) used for this port, etc. In addition it provides RX/TX statistics when receiving or sending * packets
* * __Known limitations:__ * - BPF filters are currently not supported by this device (as opposed to other PcapPlusPlus device types. This means that the * device cannot filter packets before they get to the user * - It's not possible to set or change NIC load-balancing method. DPDK provides this capability but it's still not * supported by DpdkDevice */ class DpdkDevice : public IDevice { friend class DpdkDeviceList; friend class MBufRawPacket; public: /** * An enum describing all RSS (Receive Side Scaling) hash functions supported in DPDK. Notice not all * PMDs support all types of hash functions */ enum DpdkRssHashFunction { /** No RSS */ RSS_NONE = 0, /** IPv4 based flow */ RSS_IPV4 = 0x1, /** Fragmented IPv4 based flow */ RSS_FRAG_IPV4 = 0x2, /** Non-fragmented IPv4 + TCP flow */ RSS_NONFRAG_IPV4_TCP = 0x4, /** Non-fragmented IPv4 + UDP flow */ RSS_NONFRAG_IPV4_UDP = 0x8, /** Non-fragmented IPv4 + SCTP flow */ RSS_NONFRAG_IPV4_SCTP = 0x10, /** Non-fragmented IPv4 + non TCP/UDP/SCTP flow */ RSS_NONFRAG_IPV4_OTHER = 0x20, /** IPv6 based flow */ RSS_IPV6 = 0x40, /** Fragmented IPv6 based flow */ RSS_FRAG_IPV6 = 0x80, /** Non-fragmented IPv6 + TCP flow */ RSS_NONFRAG_IPV6_TCP = 0x100, /** Non-fragmented IPv6 + UDP flow */ RSS_NONFRAG_IPV6_UDP = 0x200, /** Non-fragmented IPv6 + SCTP flow */ RSS_NONFRAG_IPV6_SCTP = 0x400, /** Non-fragmented IPv6 + non TCP/UDP/SCTP flow */ RSS_NONFRAG_IPV6_OTHER = 0x800, /** L2 payload based flow */ RSS_L2_PAYLOAD = 0x1000, /** IPv6 Ex based flow */ RSS_IPV6_EX = 0x2000, /** IPv6 + TCP Ex based flow */ RSS_IPV6_TCP_EX = 0x4000, /** IPv6 + UDP Ex based flow */ RSS_IPV6_UDP_EX = 0x8000, /** Consider device port number as a flow differentiator */ RSS_PORT = 0x10000, /** VXLAN protocol based flow */ RSS_VXLAN = 0x20000, /** GENEVE protocol based flow */ RSS_GENEVE = 0x40000, /** NVGRE protocol based flow */ RSS_NVGRE = 0x80000, /** All RSS functions supported by the device */ RSS_ALL_SUPPORTED = -1, /** A default set of RSS functions supported by the device */ RSS_DEFAULT = PCPP_RSS_HASH_MAGIC_NUMBER }; /** * @struct DpdkDeviceConfiguration * A struct that contains user configurable parameters for opening a DpdkDevice. All of these parameters have default values so * the user doesn't have to use these parameters or understand exactly what is their effect */ struct DpdkDeviceConfiguration { /** * When configuring a DPDK RX queue, DPDK creates descriptors it will use for receiving packets from the network to this RX queue. * This parameter enables to configure the number of descriptors that will be created for each RX queue */ uint16_t receiveDescriptorsNumber; /** * When configuring a DPDK TX queue, DPDK creates descriptors it will use for transmitting packets to the network through this TX queue. * This parameter enables to configure the number of descriptors that will be created for each TX queue */ uint16_t transmitDescriptorsNumber; /** * Set the TX buffer flush timeout in millisecond (only relevant if sending packets using DPDK TX buffer mechanism). * A value of zero means no timeout */ uint16_t flushTxBufferTimeout; /** * When configuring a DPDK device, DPDK supports to activate the Receive Side Scaling (RSS) feature to distribute traffic between the RX queues * This parameter points to an array holding the RSS key to use for hashing specific header fields of received packets. * The length of this array should be indicated by rssKeyLength below. * Supplying a NULL value causes a default random hash key to be used by the device driver */ uint8_t* rssKey; /** * This parameter indicates the length in bytes of the array pointed by rssKey. * This length will be checked in i40e only. Others assume 40 bytes to be used. */ uint8_t rssKeyLength; /** * This parameter enables to configure the types of packets to which the RSS hashing must be applied. The value * is a mask composed of hash functions described in DpdkRssHashFunction enum. Supplying a value equal to zero * disables the RSS feature. Supplying a value equal to -1 enables all hash functions supported by this PMD */ uint64_t rssHashFunction; /** * A c'tor for this struct * @param[in] receiveDescriptorsNumber An optional parameter for defining the number of RX descriptors that will be allocated for each RX queue. * Default value is 128 * @param[in] transmitDescriptorsNumber An optional parameter for defining the number of TX descriptors that will be allocated for each TX queue. * Default value is 512 * @param[in] flushTxBufferTimeout An optional parameter for setting TX buffer timeout in usec. Default value is 100 usec * @param[in] rssHashFunction This parameter enable to configure the types of packets to which the RSS hashing must be applied. * The value provided here should be a mask composed of hash functions described in DpdkRssHashFunction enum. * The default value is RSS_DEFAULT. * @param[in] rssKey A pointer to an array holding the RSS key to use for hashing specific header of received packets. If not * specified, there is a default key defined inside DpdkDevice * @param[in] rssKeyLength The length in bytes of the array pointed by rssKey. Default value is the length of default rssKey */ explicit DpdkDeviceConfiguration(uint16_t receiveDescriptorsNumber = 128, uint16_t transmitDescriptorsNumber = 512, uint16_t flushTxBufferTimeout = 100, uint64_t rssHashFunction = RSS_DEFAULT, uint8_t* rssKey = DpdkDevice::m_RSSKey, uint8_t rssKeyLength = 40) { this->receiveDescriptorsNumber = receiveDescriptorsNumber; this->transmitDescriptorsNumber = transmitDescriptorsNumber; this->flushTxBufferTimeout = flushTxBufferTimeout; this->rssKey = rssKey; this->rssKeyLength = rssKeyLength; this->rssHashFunction = rssHashFunction; } }; /** * @struct LinkStatus * A struct that contains the link status of a DpdkDevice (DPDK port). Returned from DpdkDevice#getLinkStatus() */ struct LinkStatus { /** Enum for describing link duplex */ enum LinkDuplex { /** Full duplex */ FULL_DUPLEX, /** Half duplex */ HALF_DUPLEX }; /** True if link is up, false if it's down */ bool linkUp; /** Link speed in Mbps (for example: 10Gbe will show 10000) */ int linkSpeedMbps; /** Link duplex (half/full duplex) */ LinkDuplex linkDuplex; }; /** * @struct RxTxStats * A container for RX/TX statistics */ struct RxTxStats { /** Total number of packets */ uint64_t packets; /** Total number of successfully received bytes */ uint64_t bytes; /** Packets per second */ uint64_t packetsPerSec; /** Bytes per second */ uint64_t bytesPerSec; }; /** * @struct DpdkDeviceStats * A container for DpdkDevice statistics */ struct DpdkDeviceStats { /** DpdkDevice ID */ uint8_t devId; /** The timestamp of when the stats were written */ timespec timestamp; /** RX statistics per RX queue */ RxTxStats rxStats[DPDK_MAX_RX_QUEUES]; /** TX statistics per TX queue */ RxTxStats txStats[DPDK_MAX_RX_QUEUES]; /** RX statistics, aggregated for all RX queues */ RxTxStats aggregatedRxStats; /** TX statistics, aggregated for all TX queues */ RxTxStats aggregatedTxStats; /** Total number of RX packets dropped by H/W because there are no available buffers (i.e RX queues are full) */ uint64_t rxPacketsDroppedByHW; /** Total number of erroneous packets */ uint64_t rxErroneousPackets; /** Total number of RX mbuf allocation failures */ uint64_t rxMbufAlocFailed; }; virtual ~DpdkDevice(); /** * @return The device ID (DPDK port ID) */ int getDeviceId() const { return m_Id; } /** * @return The device name which is in the format of 'DPDK_[PORT-ID]' */ std::string getDeviceName() const { return m_DeviceName; } /** * @return The MAC address of the device (DPDK port) */ MacAddress getMacAddress() const { return m_MacAddress; } /** * @return The name of the PMD (poll mode driver) DPDK is using for this device. You can read about PMDs in the DPDK documentation: * http://dpdk.org/doc/guides/prog_guide/poll_mode_drv.html */ std::string getPMDName() const { return m_PMDName; } /** * @return The enum type of the PMD (poll mode driver) DPDK is using for this device. You can read about PMDs in the DPDK documentation: * http://dpdk.org/doc/guides/prog_guide/poll_mode_drv.html */ DpdkPMDType getPMDType() const { return m_PMDType; } /** * @return The PCI address of the device */ std::string getPciAddress() const { return m_PciAddress; } /** * @return The device's maximum transmission unit (MTU) in bytes */ uint16_t getMtu() const { return m_DeviceMtu; } /** * Set a new maximum transmission unit (MTU) for this device * @param[in] newMtu The new MTU in bytes * @return True if MTU was set successfully, false if operation failed or if PMD doesn't support changing the MTU */ bool setMtu(uint16_t newMtu); /** * @return True if this device is a virtual interface (such as VMXNET3, 1G/10G virtual function, etc.), false otherwise */ bool isVirtual() const; /** * Get the link status (link up/down, link speed and link duplex) * @param[out] linkStatus A reference to object the result shall be written to */ void getLinkStatus(LinkStatus& linkStatus) const; /** * @return The core ID used in this context */ uint32_t getCurrentCoreId() const; /** * @return The number of RX queues currently opened for this device (as configured in openMultiQueues() ) */ uint16_t getNumOfOpenedRxQueues() const { return m_NumOfRxQueuesOpened; } /** * @return The number of TX queues currently opened for this device (as configured in openMultiQueues() ) */ uint16_t getNumOfOpenedTxQueues() const { return m_NumOfTxQueuesOpened; } /** * @return The total number of RX queues available on this device */ uint16_t getTotalNumOfRxQueues() const { return m_TotalAvailableRxQueues; } /** * @return The total number of TX queues available on this device */ uint16_t getTotalNumOfTxQueues() const { return m_TotalAvailableTxQueues; } /** * Receive raw packets from the network * @param[out] rawPacketsArr A vector where all received packets will be written into * @param[in] rxQueueId The RX queue to receive packets from * @return The number of packets received. If an error occurred 0 will be returned and the error will be printed to log */ uint16_t receivePackets(MBufRawPacketVector& rawPacketsArr, uint16_t rxQueueId) const; /** * Receive raw packets from the network. Please notice that in terms of performance, this is the best method to use * for receiving packets because out of all receivePackets overloads this method requires the least overhead and is * almost as efficient as receiving packets directly through DPDK. So if performance is a critical factor in your * application, please use this method * @param[out] rawPacketsArr A pointer to an array of MBufRawPacket pointers where all received packets will be written into. The array is expected to * be allocated by the user and its length should be provided in rawPacketArrLength. Number of packets received will be returned. * Notice it's the user responsibility to free the array and its content when done using it * @param[out] rawPacketArrLength The length of MBufRawPacket pointers array * @param[in] rxQueueId The RX queue to receive packets from * @return The number of packets received. If an error occurred 0 will be returned and the error will be printed to log */ uint16_t receivePackets(MBufRawPacket** rawPacketsArr, uint16_t rawPacketArrLength, uint16_t rxQueueId) const; /** * Receive parsed packets from the network * @param[out] packetsArr A pointer to an allocated array of Packet pointers where all received packets will be written into. The array is expected to * be allocated by the user and its length should be provided in packetsArrLength. Number of packets received will be returned. * Notice it's the user responsibility to free the array and its content when done using it * @param[out] packetsArrLength The length of Packet pointers array * @param[in] rxQueueId The RX queue to receive packets from * @return The number of packets received. If an error occurred 0 will be returned and the error will be printed to log */ uint16_t receivePackets(Packet** packetsArr, uint16_t packetsArrLength, uint16_t rxQueueId) const; /** * Send an array of MBufRawPacket to the network. Please notice the following:
* - In terms of performance, this is the best method to use for sending packets because out of all sendPackets overloads * this method requires the least overhead and is almost as efficient as sending the packets directly through DPDK. So if performance * is a critical factor in your application, please use this method * - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each * iteration of 64 packets * - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets), * then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean * - The mbufs used in this method aren't freed by this method, they will be transparently freed by DPDK *

* @param[in] rawPacketsArr A pointer to an array of MBufRawPacket * @param[in] arrLength The length of the array * @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is * TX queue 0 * @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's * TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism) * @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned. * Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent */ uint16_t sendPackets(MBufRawPacket** rawPacketsArr, uint16_t arrLength, uint16_t txQueueId = 0, bool useTxBuffer = false); /** * Send an array of parsed packets to the network. Please notice the following:
* - If some or all of the packets contain raw packets which aren't of type MBufRawPacket, a new temp MBufRawPacket instances * will be created and packet data will be copied to them. This is necessary to allocate mbufs which will store the data to be sent. * If performance is a critical factor please make sure you send parsed packets that contain only raw packets of type MBufRawPacket * - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each * iteration of 64 packets * - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets), * then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean * - The mbufs used or allocated in this method aren't freed by this method, they will be transparently freed by DPDK *

* @param[in] packetsArr A pointer to an array of parsed packet pointers * @param[in] arrLength The length of the array * @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is * TX queue 0 * @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's * TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism) * @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned. * Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent */ uint16_t sendPackets(Packet** packetsArr, uint16_t arrLength, uint16_t txQueueId = 0, bool useTxBuffer = false); /** * Send a vector of MBufRawPacket pointers to the network. Please notice the following:
* - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each * iteration of 64 packets * - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets), * then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean * - The mbufs used in this method aren't freed by this method, they will be transparently freed by DPDK *

* @param[in] rawPacketsVec The vector of raw packet * @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is * TX queue 0 * @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's * TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism) * @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned. * Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent */ uint16_t sendPackets(MBufRawPacketVector& rawPacketsVec, uint16_t txQueueId = 0, bool useTxBuffer = false); /** * Send a vector of RawPacket pointers to the network. Please notice the following:
* - If some or all of the raw packets aren't of type MBufRawPacket, a new temp MBufRawPacket instances will be created * and packet data will be copied to them. This is necessary to allocate mbufs which will store the data to be sent. If * performance is a critical factor please make sure you send only raw packets of type MBufRawPacket (or use the sendPackets overload * that sends MBufRawPacketVector) * - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each * iteration of 64 packets * - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets), * then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean * - The mbufs used or allocated in this method aren't freed by this method, they will be transparently freed by DPDK *

* @param[in] rawPacketsVec The vector of raw packet * @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is * TX queue 0 * @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's * TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism) * @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned. * Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent */ uint16_t sendPackets(RawPacketVector& rawPacketsVec, uint16_t txQueueId = 0, bool useTxBuffer = false); /** * Send a raw packet to the network. Please notice that if the raw packet isn't of type MBufRawPacket, a new temp MBufRawPacket * will be created and the data will be copied to it. This is necessary to allocate an mbuf which will store the data to be sent. * If performance is a critical factor please make sure you send a raw packet of type MBufRawPacket. Please also notice that the * mbuf used or allocated in this method isn't freed by this method, it will be transparently freed by DPDK * @param[in] rawPacket The raw packet to send * @param[in] txQueueId An optional parameter which indicates to which TX queue the packet will be sent to. The default is * TX queue 0 * @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's * TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism) * @return True if packet was sent successfully or false if device is not opened, TX queue isn't opened, or if the packet wasn't * sent for any other reason. Please notice that when using TX buffers the packet may be buffered and not sent immediately, which * may also result in returning false */ bool sendPacket(RawPacket& rawPacket, uint16_t txQueueId = 0, bool useTxBuffer = false); /** * Send a MBufRawPacket to the network. Please notice that the mbuf used in this method isn't freed by this method, it will be * transparently freed by DPDK * @param[in] rawPacket The MBufRawPacket to send * @param[in] txQueueId An optional parameter which indicates to which TX queue the packet will be sent to. The default is * TX queue 0 * @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's * TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism) * @return True if packet was sent successfully or false if device is not opened, TX queue isn't opened, or if the packet wasn't * sent for any other reason. Please notice that when using TX buffers the packet may be buffered and not sent immediately, which * may also result in returning false */ bool sendPacket(MBufRawPacket& rawPacket, uint16_t txQueueId = 0, bool useTxBuffer = false); /** * Send a parsed packet to the network. Please notice that the mbuf used or allocated in this method isn't freed by this method, * it will be transparently freed by DPDK * @param[in] packet The parsed packet to send. Please notice that if the packet contains a raw packet which isn't of type * MBufRawPacket, a new temp MBufRawPacket will be created and the data will be copied to it. This is necessary to * allocate an mbuf which will store the data to be sent. If performance is a critical factor please make sure you send a * parsed packet that contains a raw packet of type MBufRawPacket * @param[in] txQueueId An optional parameter which indicates to which TX queue the packet will be sent on. The default is * TX queue 0 * @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's * TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism) * @return True if packet was sent successfully or false if device is not opened, TX queue isn't opened, or if the packet wasn't * sent for any other reason. Please notice that when using TX buffers the packet may be buffered and not sent immediately, which * may also result in returning false */ bool sendPacket(Packet& packet, uint16_t txQueueId = 0, bool useTxBuffer = false); /** * Overridden method from IPcapDevice. __BPF filters are currently not implemented for DpdkDevice__ * @param[in] filter Not used in this method * @return Always false with a "Filters aren't supported in DPDK device" error message */ bool setFilter(GeneralFilter& filter); /** * Overridden method from IPcapDevice. __BPF filters are currently not implemented for DpdkDevice__ * @param[in] filterAsString Not used in this method * @return Always false with a "Filters aren't supported in DPDK device" error message */ bool setFilter(std::string filterAsString); /** * Open the DPDK device. Notice opening the device only makes it ready to use, it doesn't start packet capturing. This method initializes RX and TX queues, * configures the DPDK port and starts it. Call close() to close the device. The device is opened in promiscuous mode * @param[in] numOfRxQueuesToOpen Number of RX queues to setup. This number must be smaller or equal to the return value of getTotalNumOfRxQueues() * @param[in] numOfTxQueuesToOpen Number of TX queues to setup. This number must be smaller or equal to the return value of getTotalNumOfTxQueues() * @param[in] config Optional parameter for defining special port configuration parameters such as number of receive/transmit descriptors. If not set the default * parameters will be set (see DpdkDeviceConfiguration) * @return True if the device was opened successfully, false if device is already opened, if RX/TX queues configuration failed or of DPDK port * configuration and startup failed */ bool openMultiQueues(uint16_t numOfRxQueuesToOpen, uint16_t numOfTxQueuesToOpen, const DpdkDeviceConfiguration& config = DpdkDeviceConfiguration()); /** * There are two ways to capture packets using DpdkDevice: one of them is using worker threads (see DpdkDeviceList#startDpdkWorkerThreads() ) and * the other way is setting a callback which is invoked each time a burst of packets is captured. This method implements the second way. * After invoking this method the DpdkDevice enters capture mode and starts capturing packets. * This method assumes there is only 1 RX queue opened for this device, otherwise an error is returned. It then allocates a core and creates 1 thread * that runs in an endless loop and tries to capture packets using DPDK. Each time a burst of packets is captured the user callback is invoked with the user * cookie as a parameter. This loop continues until stopCapture() is called. Notice: since the callback is invoked for every packet burst * using this method can be slower than using worker threads. On the other hand, it's a simpler way comparing to worker threads * @param[in] onPacketsArrive The user callback which will be invoked each time a packet burst is captured by the device * @param[in] onPacketsArriveUserCookie The user callback is invoked with this cookie as a parameter. It can be used to pass * information from the user application to the callback * @return True if capture thread started successfully or false if device is already in capture mode, number of opened RX queues isn't equal * to 1, if the method couldn't find an available core to allocate for the capture thread, or if thread invocation failed. In * all of these cases an appropriate error message will be printed */ bool startCaptureSingleThread(OnDpdkPacketsArriveCallback onPacketsArrive, void* onPacketsArriveUserCookie); /** * This method does exactly what startCaptureSingleThread() does, but with more than one RX queue / capturing thread. It's called * with a core mask as a parameter and creates a packet capture thread on every core. Each capturing thread is assigned with a specific * RX queue. This method assumes all cores in the core-mask are available and there are enough opened RX queues to match for each thread. * If these assumptions are not true an error is returned. After invoking all threads, all of them run in an endless loop * and try to capture packets from their designated RX queues. Each time a burst of packets is captured the callback is invoked with the user * cookie and the thread ID that captured the packets * @param[in] onPacketsArrive The user callback which will be invoked each time a burst of packets is captured by the device * @param[in] onPacketsArriveUserCookie The user callback is invoked with this cookie as a parameter. It can be used to pass * information from the user application to the callback * @param coreMask The core-mask for creating the capture threads * @return True if all capture threads started successfully or false if device is already in capture mode, not all cores in the core-mask are * available to DPDK, there are not enough opened RX queues to match all cores in the core-mask, or if thread invocation failed. In * all of these cases an appropriate error message will be printed */ bool startCaptureMultiThreads(OnDpdkPacketsArriveCallback onPacketsArrive, void* onPacketsArriveUserCookie, CoreMask coreMask); /** * If device is in capture mode started by invoking startCaptureSingleThread() or startCaptureMultiThreads(), this method * will stop all capturing threads and set the device to non-capturing mode */ void stopCapture(); /** * @return The number of free mbufs in device's mbufs pool */ int getAmountOfFreeMbufs() const; /** * @return The number of mbufs currently in use in device's mbufs pool */ int getAmountOfMbufsInUse() const; /** * Retrieve RX/TX statistics from device * @param[out] stats A reference to a DpdkDeviceStats object where stats will be written into */ void getStatistics(DpdkDeviceStats& stats) const; /** * Clear device statistics */ void clearStatistics(); /** * DPDK supports an option to buffer TX packets and send them only when reaching a certain threshold. This method enables * the user to flush a TX buffer for certain TX queue and send the packets stored in it (you can read about it here: * http://dpdk.org/doc/api/rte__ethdev_8h.html#a0e941a74ae1b1b886764bc282458d946). It has the option to flush only * when timeout that was set in DpdkDeviceConfiguration#flushTxBufferTimeout expired or flush immediately regardless * of the timeout. The usage of this method can be in the main loop where you can call this method once every a couple * of iterations to make sure TX buffers are flushed * @param[in] flushOnlyIfTimeoutExpired When set to true, flush will happen only if the timeout defined in * DpdkDeviceConfiguration#flushTxBufferTimeout expired. If set to false flush will happen immediately. Default value * is false * @param[in] txQueueId The TX queue ID to flush its buffer. Default is 0 * @return The number of packets sent after buffer was flushed */ uint16_t flushTxBuffer(bool flushOnlyIfTimeoutExpired = false, uint16_t txQueueId = 0); /** * Check whether a specific RSS hash function is supported by this device (PMD) * @param[in] rssHF RSS hash function to check * @return True if this hash function is supported, false otherwise */ bool isDeviceSupportRssHashFunction(DpdkRssHashFunction rssHF) const; /** * Check whether a mask of RSS hash functions is supported by this device (PMD) * @param[in] rssHFMask RSS hash functions mask to check. This mask should be built from values in DpdkRssHashFunction enum * @return True if all hash functions in this mask are supported, false otherwise */ bool isDeviceSupportRssHashFunction(uint64_t rssHFMask) const; /** * @return A mask of all RSS hash functions supported by this device (PMD). This mask is built from values in DpdkRssHashFunction enum. * Value of zero means RSS is not supported by this device */ uint64_t getSupportedRssHashFunctions() const; /** * @return The RSS hash function mask configured for this device (PMD) */ uint64_t getConfiguredRssHashFunction() const; /** * Translate RSS hash function mask to a list of their string representation * @param rssHFMask RSS hash function mask * @return RSS hash functions as strings */ std::vector rssHashFunctionMaskToString(uint64_t rssHFMask) const; //overridden methods /** * Overridden method from IPcapDevice. It calls openMultiQueues() with 1 RX queue and 1 TX queue. * Notice opening the device only makes it ready to use, it doesn't start packet capturing. The device is opened in promiscuous mode * @return True if the device was opened successfully, false if device is already opened, if RX/TX queues configuration failed or of DPDK port * configuration and startup failed */ bool open() { return openMultiQueues(1, 1); }; /** * Close the DpdkDevice. When device is closed it's not possible work with it */ void close(); private: struct DpdkCoreConfiguration { int RxQueueId; bool IsCoreInUse; void clear() { RxQueueId = -1; IsCoreInUse = false; } DpdkCoreConfiguration() : RxQueueId(-1), IsCoreInUse(false) {} }; DpdkDevice(int port, uint32_t mBufPoolSize); bool initMemPool(struct rte_mempool*& memPool, const char* mempoolName, uint32_t mBufPoolSize); bool configurePort(uint8_t numOfRxQueues, uint8_t numOfTxQueues); bool initQueues(uint8_t numOfRxQueuesToInit, uint8_t numOfTxQueuesToInit); bool startDevice(); static int dpdkCaptureThreadMain(void* ptr); void clearCoreConfiguration(); bool initCoreConfigurationByCoreMask(CoreMask coreMask); int getCoresInUseCount() const; void setDeviceInfo(); typedef rte_mbuf* (*PacketIterator)(void* packetStorage, int index); uint16_t sendPacketsInner(uint16_t txQueueId, void* packetStorage, PacketIterator iter, int arrLength, bool useTxBuffer); uint64_t convertRssHfToDpdkRssHf(uint64_t rssHF) const; uint64_t convertDpdkRssHfToRssHf(uint64_t dpdkRssHF) const; std::string m_DeviceName; DpdkPMDType m_PMDType; std::string m_PMDName; std::string m_PciAddress; DpdkDeviceConfiguration m_Config; int m_Id; MacAddress m_MacAddress; uint16_t m_DeviceMtu; struct rte_mempool* m_MBufMempool; struct rte_eth_dev_tx_buffer** m_TxBuffers; uint64_t m_TxBufferDrainTsc; uint64_t* m_TxBufferLastDrainTsc; DpdkCoreConfiguration m_CoreConfiguration[MAX_NUM_OF_CORES]; uint16_t m_TotalAvailableRxQueues; uint16_t m_TotalAvailableTxQueues; uint16_t m_NumOfRxQueuesOpened; uint16_t m_NumOfTxQueuesOpened; OnDpdkPacketsArriveCallback m_OnPacketsArriveCallback; void* m_OnPacketsArriveUserCookie; bool m_StopThread; bool m_WasOpened; // RSS key used by the NIC for load balancing the packets between cores static uint8_t m_RSSKey[40]; mutable DpdkDeviceStats m_PrevStats; }; } // namespace pcpp // GCOVR_EXCL_STOP #endif /* PCAPPP_DPDK_DEVICE */