access/udp: use pf_read rather than pf_block

With pf_block, there is, for each received packet: - one heap allocation for the block_t, - one memory copy from the block_t into the downstream filter/demux (normally the prefetch filter) in the stream core, and - one heap free in the stream core. And it gets worse if the packet size exceeds the MRU (1316 bytes). In practice, the read size (from the prefetch filter) is almost always much larger than the packet size. Using pf_read, we eliminate both the heap manipulations and the memory copies, both for sanely sized and insanely sized fragmented packets. In the corner case that the read size is actually small, this uses a circular buffer and incurs one memory copy. That is still faster than the pf_block logic.

access/udp: use pf_read rather than pf_block
With pf_block, there is, for each received packet: - one heap allocation for the block_t, - one memory copy from the block_t into the downstream filter/demux (normally the prefetch filter) in the stream core, and - one heap free in the stream core. And it gets worse if the packet size exceeds the MRU (1316 bytes). In practice, the read size (from the prefetch filter) is almost always much larger than the packet size. Using pf_read, we eliminate both the heap manipulations and the memory copies, both for sanely sized and insanely sized fragmented packets. In the corner case that the read size is actually small, this uses a circular buffer and incurs one memory copy. That is still faster than the pf_block logic.
7afbace5 · Rémi Denis-Courmont · afbaaea4 · 7afbace5
Commit 7afbace5 authored 5 years ago by Rémi Denis-Courmont
--- a/modules/access/udp.c
+++ b/modules/access/udp.c
@@ -49,12 +49,18 @@
 # include <sys/uio.h>
 #endif

-typedef struct
-{
+/* Buufer can be max theoretical datagram content minus anticipated MTU.
+ * IPv6 headers are larger than IPv4, ignore IPv6 jumbograms.
+ */
+#define MRU 65507u
+
+typedef struct {
    int fd;
    int timeout;
-    size_t mtu;
-    block_t *overflow_block;
+
+    size_t length;
+    char *offset;
+    char buf[MRU];
 } access_sys_t;

 static int Control(stream_t *access, int query, va_list args)
@@ -78,30 +84,19 @@ static int Control(stream_t *access, int query, va_list args)
    return VLC_SUCCESS;
 }

-static block_t *BlockUDP(stream_t *access, bool *restrict eof)
+static ssize_t Read(stream_t *access, void *buf, size_t len)
 {
    access_sys_t *sys = access->p_sys;

-    block_t *pkt = block_Alloc(sys->mtu);
-    if (unlikely(pkt == NULL)) {
-        /* OOM - dequeue and discard one packet */
-        char dummy;
-        recv(sys->fd, &dummy, 1, 0);
-        return NULL;
-    }
-
+    if (sys->length > 0) {
+        if (len > sys->length)
+            len = sys->length;

-    struct iovec iov[] = {{
-        .iov_base = pkt->p_buffer,
-        .iov_len = sys->mtu,
-    },{
-        .iov_base = sys->overflow_block->p_buffer,
-        .iov_len = sys->overflow_block->i_buffer,
-    }};
-    struct msghdr msg = {
-        .msg_iov = iov,
-        .msg_iovlen = 2,
-    };
+        memcpy(buf, sys->offset, len);
+        sys->offset += len;
+        sys->length -= len;
+        return len;
+    }

    struct pollfd ufd[1];

@@ -111,40 +106,31 @@ static block_t *BlockUDP(stream_t *access, bool *restrict eof)
    switch (vlc_poll_i11e(ufd, 1, sys->timeout)) {
        case 0:
            msg_Err(access, "receive time-out");
-            *eof = true;
-            /* fall through */
+            return 0;
        case -1:
-            goto skip;
-    }
-
-    ssize_t len = recvmsg(sys->fd, &msg, 0);
-
-    if (len < 0) {
-skip:
-        block_Release(pkt);
-        return NULL;
+            return -1;
    }

-    /* Received more than mtu amount,
-     * we should gather blocks and increase mtu
-     * and allocate new overflow block.  See Open()
-     */
-    if (unlikely((size_t)len > sys->mtu)) {
-        msg_Warn(access, "%zd bytes packet received (MTU was %zu), adjusting mtu",
-                len, sys->mtu);
-        block_t *gather_block = sys->overflow_block;
-
-        sys->overflow_block = block_Alloc(65507 - len);
+    struct iovec iov[] = {
+        { .iov_base = buf,      .iov_len = len, },
+        { .iov_base = sys->buf, .iov_len = MRU, },
+    };
+    struct msghdr msg = {
+        .msg_iov = iov,
+        .msg_iovlen = ARRAY_SIZE(iov),
+    };
+    ssize_t val = recvmsg(sys->fd, &msg, 0);

-        gather_block->i_buffer = len - sys->mtu;
-        pkt->p_next = gather_block;
-        pkt = block_ChainGather(pkt);
+    if (val <= 0) /* empty (0 bytes) payload does *not* mean EOF here */
+        return -1;

-        sys->mtu = len;
-    }else
-        pkt->i_buffer = len;
+    if (unlikely((size_t)val > len)) {
+        sys->offset = sys->buf;
+        sys->length = val - len;
+        val = len;
+    }

-    return pkt;
+    return val;
 }

 /*****************************************************************************
@@ -162,19 +148,12 @@ static int Open( vlc_object_t *p_this )
    if( unlikely( sys == NULL ) )
        return VLC_ENOMEM;

-    sys->mtu = 7 * 188;
-
-    /* Overflow can be max theoretical datagram content less anticipated MTU,
-     *  IPv6 headers are larger than IPv4, ignore IPv6 jumbograms
-     */
-    sys->overflow_block = block_Alloc(65507 - sys->mtu);
-    if( unlikely( sys->overflow_block == NULL ) )
-        return VLC_ENOMEM;
-
+    sys->length = 0;
    p_access->p_sys = sys;
-
-    /* Set up p_access */
-    ACCESS_SET_CALLBACKS( NULL, BlockUDP, Control, NULL );
+    p_access->pf_read = Read;
+    p_access->pf_block = NULL;
+    p_access->pf_control = Control;
+    p_access->pf_seek = NULL;

    char *psz_name = strdup( p_access->psz_location );
    char *psz_parser;
@@ -249,8 +228,6 @@ static void Close( vlc_object_t *p_this )
 {
    stream_t     *p_access = (stream_t*)p_this;
    access_sys_t *sys = p_access->p_sys;
-    if( sys->overflow_block )
-        block_Release( sys->overflow_block );

    net_Close( sys->fd );
 }