6969#include <net/dst.h>
7070#include <net/sock.h>
7171#include <net/checksum.h>
72+ #include <net/gro.h>
7273#include <net/gso.h>
7374#include <net/hotdata.h>
7475#include <net/ip6_checksum.h>
9596static struct kmem_cache * skbuff_ext_cache __ro_after_init ;
9697#endif
9798
98- #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
99+ #define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN)
100+ #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \
101+ GRO_MAX_HEAD_PAD))
99102
100103/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
101104 * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
@@ -220,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
220223#define NAPI_SKB_CACHE_BULK 16
221224#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
222225
223- #if PAGE_SIZE == SZ_4K
224-
225- #define NAPI_HAS_SMALL_PAGE_FRAG 1
226- #define NAPI_SMALL_PAGE_PFMEMALLOC (nc ) ((nc).pfmemalloc)
227-
228- /* specialized page frag allocator using a single order 0 page
229- * and slicing it into 1K sized fragment. Constrained to systems
230- * with a very limited amount of 1K fragments fitting a single
231- * page - to avoid excessive truesize underestimation
232- */
233-
234- struct page_frag_1k {
235- void * va ;
236- u16 offset ;
237- bool pfmemalloc ;
238- };
239-
240- static void * page_frag_alloc_1k (struct page_frag_1k * nc , gfp_t gfp )
241- {
242- struct page * page ;
243- int offset ;
244-
245- offset = nc -> offset - SZ_1K ;
246- if (likely (offset >= 0 ))
247- goto use_frag ;
248-
249- page = alloc_pages_node (NUMA_NO_NODE , gfp , 0 );
250- if (!page )
251- return NULL ;
252-
253- nc -> va = page_address (page );
254- nc -> pfmemalloc = page_is_pfmemalloc (page );
255- offset = PAGE_SIZE - SZ_1K ;
256- page_ref_add (page , offset / SZ_1K );
257-
258- use_frag :
259- nc -> offset = offset ;
260- return nc -> va + offset ;
261- }
262- #else
263-
264- /* the small page is actually unused in this build; add dummy helpers
265- * to please the compiler and avoid later preprocessor's conditionals
266- */
267- #define NAPI_HAS_SMALL_PAGE_FRAG 0
268- #define NAPI_SMALL_PAGE_PFMEMALLOC (nc ) false
269-
270- struct page_frag_1k {
271- };
272-
273- static void * page_frag_alloc_1k (struct page_frag_1k * nc , gfp_t gfp_mask )
274- {
275- return NULL ;
276- }
277-
278- #endif
279-
280226struct napi_alloc_cache {
281227 local_lock_t bh_lock ;
282228 struct page_frag_cache page ;
283- struct page_frag_1k page_small ;
284229 unsigned int skb_count ;
285230 void * skb_cache [NAPI_SKB_CACHE_SIZE ];
286231};
@@ -290,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
290235 .bh_lock = INIT_LOCAL_LOCK (bh_lock ),
291236};
292237
293- /* Double check that napi_get_frags() allocates skbs with
294- * skb->head being backed by slab, not a page fragment.
295- * This is to make sure bug fixed in 3226b158e67c
296- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
297- * does not accidentally come back.
298- */
299- void napi_get_frags_check (struct napi_struct * napi )
300- {
301- struct sk_buff * skb ;
302-
303- local_bh_disable ();
304- skb = napi_get_frags (napi );
305- WARN_ON_ONCE (!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb -> head_frag );
306- napi_free_frags (napi );
307- local_bh_enable ();
308- }
309-
310238void * __napi_alloc_frag_align (unsigned int fragsz , unsigned int align_mask )
311239{
312240 struct napi_alloc_cache * nc = this_cpu_ptr (& napi_alloc_cache );
@@ -736,7 +664,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
736664 /* If requested length is either too small or too big,
737665 * we use kmalloc() for skb->head allocation.
738666 */
739- if (len <= SKB_WITH_OVERHEAD (1024 ) ||
667+ if (len <= SKB_WITH_OVERHEAD (SKB_SMALL_HEAD_CACHE_SIZE ) ||
740668 len > SKB_WITH_OVERHEAD (PAGE_SIZE ) ||
741669 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA ))) {
742670 skb = __alloc_skb (len , gfp_mask , SKB_ALLOC_RX , NUMA_NO_NODE );
@@ -813,10 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
813741
814742 /* If requested length is either too small or too big,
815743 * we use kmalloc() for skb->head allocation.
816- * When the small frag allocator is available, prefer it over kmalloc
817- * for small fragments
818744 */
819- if ((! NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD (1024 ) ) ||
745+ if (len <= SKB_WITH_OVERHEAD (SKB_SMALL_HEAD_CACHE_SIZE ) ||
820746 len > SKB_WITH_OVERHEAD (PAGE_SIZE ) ||
821747 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA ))) {
822748 skb = __alloc_skb (len , gfp_mask , SKB_ALLOC_RX | SKB_ALLOC_NAPI ,
@@ -826,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
826752 goto skb_success ;
827753 }
828754
755+ len = SKB_HEAD_ALIGN (len );
756+
829757 if (sk_memalloc_socks ())
830758 gfp_mask |= __GFP_MEMALLOC ;
831759
832760 local_lock_nested_bh (& napi_alloc_cache .bh_lock );
833761 nc = this_cpu_ptr (& napi_alloc_cache );
834- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD (1024 )) {
835- /* we are artificially inflating the allocation size, but
836- * that is not as bad as it may look like, as:
837- * - 'len' less than GRO_MAX_HEAD makes little sense
838- * - On most systems, larger 'len' values lead to fragment
839- * size above 512 bytes
840- * - kmalloc would use the kmalloc-1k slab for such values
841- * - Builds with smaller GRO_MAX_HEAD will very likely do
842- * little networking, as that implies no WiFi and no
843- * tunnels support, and 32 bits arches.
844- */
845- len = SZ_1K ;
846762
847- data = page_frag_alloc_1k (& nc -> page_small , gfp_mask );
848- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC (nc -> page_small );
849- } else {
850- len = SKB_HEAD_ALIGN (len );
851-
852- data = page_frag_alloc (& nc -> page , len , gfp_mask );
853- pfmemalloc = page_frag_cache_is_pfmemalloc (& nc -> page );
854- }
763+ data = page_frag_alloc (& nc -> page , len , gfp_mask );
764+ pfmemalloc = page_frag_cache_is_pfmemalloc (& nc -> page );
855765 local_unlock_nested_bh (& napi_alloc_cache .bh_lock );
856766
857767 if (unlikely (!data ))
0 commit comments