]>
Commit | Line | Data |
---|---|---|
e263cd49 | 1 | /* |
605d52e6 | 2 | * QEMU TX packets abstractions |
e263cd49 DF |
3 | * |
4 | * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) | |
5 | * | |
6 | * Developed by Daynix Computing LTD (http://www.daynix.com) | |
7 | * | |
8 | * Authors: | |
9 | * Dmitry Fleytman <[email protected]> | |
10 | * Tamir Shomer <[email protected]> | |
11 | * Yan Vugenfirer <[email protected]> | |
12 | * | |
13 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
14 | * See the COPYING file in the top-level directory. | |
15 | * | |
16 | */ | |
17 | ||
e9abfcb5 | 18 | #include "qemu/osdep.h" |
605d52e6 | 19 | #include "net_tx_pkt.h" |
e263cd49 | 20 | #include "net/eth.h" |
e263cd49 DF |
21 | #include "net/checksum.h" |
22 | #include "net/tap.h" | |
23 | #include "net/net.h" | |
edf5ca5d | 24 | #include "hw/pci/pci_device.h" |
e263cd49 DF |
25 | |
26 | enum { | |
605d52e6 DF |
27 | NET_TX_PKT_VHDR_FRAG = 0, |
28 | NET_TX_PKT_L2HDR_FRAG, | |
29 | NET_TX_PKT_L3HDR_FRAG, | |
30 | NET_TX_PKT_PL_START_FRAG | |
e263cd49 DF |
31 | }; |
32 | ||
33 | /* TX packet private context */ | |
605d52e6 | 34 | struct NetTxPkt { |
11171010 DF |
35 | PCIDevice *pci_dev; |
36 | ||
e263cd49 DF |
37 | struct virtio_net_hdr virt_hdr; |
38 | bool has_virt_hdr; | |
39 | ||
40 | struct iovec *raw; | |
41 | uint32_t raw_frags; | |
42 | uint32_t max_raw_frags; | |
43 | ||
44 | struct iovec *vec; | |
45 | ||
46 | uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; | |
eb700029 | 47 | uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN]; |
e263cd49 DF |
48 | |
49 | uint32_t payload_len; | |
50 | ||
51 | uint32_t payload_frags; | |
52 | uint32_t max_payload_frags; | |
53 | ||
54 | uint16_t hdr_len; | |
55 | eth_pkt_types_e packet_type; | |
56 | uint8_t l4proto; | |
eb700029 DF |
57 | |
58 | bool is_loopback; | |
e263cd49 DF |
59 | }; |
60 | ||
11171010 DF |
61 | void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, |
62 | uint32_t max_frags, bool has_virt_hdr) | |
e263cd49 | 63 | { |
605d52e6 | 64 | struct NetTxPkt *p = g_malloc0(sizeof *p); |
e263cd49 | 65 | |
11171010 DF |
66 | p->pci_dev = pci_dev; |
67 | ||
47882fa4 | 68 | p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG); |
e263cd49 | 69 | |
47882fa4 | 70 | p->raw = g_new(struct iovec, max_frags); |
e263cd49 DF |
71 | |
72 | p->max_payload_frags = max_frags; | |
73 | p->max_raw_frags = max_frags; | |
74 | p->has_virt_hdr = has_virt_hdr; | |
605d52e6 DF |
75 | p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; |
76 | p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = | |
e263cd49 | 77 | p->has_virt_hdr ? sizeof p->virt_hdr : 0; |
605d52e6 | 78 | p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; |
eb700029 | 79 | p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr; |
e263cd49 DF |
80 | |
81 | *pkt = p; | |
82 | } | |
83 | ||
605d52e6 | 84 | void net_tx_pkt_uninit(struct NetTxPkt *pkt) |
e263cd49 DF |
85 | { |
86 | if (pkt) { | |
87 | g_free(pkt->vec); | |
88 | g_free(pkt->raw); | |
89 | g_free(pkt); | |
90 | } | |
91 | } | |
92 | ||
eb700029 | 93 | void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) |
e263cd49 DF |
94 | { |
95 | uint16_t csum; | |
e263cd49 | 96 | assert(pkt); |
e263cd49 | 97 | struct ip_header *ip_hdr; |
605d52e6 | 98 | ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; |
e263cd49 | 99 | |
e263cd49 | 100 | ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + |
605d52e6 | 101 | pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); |
e263cd49 | 102 | |
e263cd49 DF |
103 | ip_hdr->ip_sum = 0; |
104 | csum = net_raw_checksum((uint8_t *)ip_hdr, | |
605d52e6 | 105 | pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); |
e263cd49 | 106 | ip_hdr->ip_sum = cpu_to_be16(csum); |
eb700029 DF |
107 | } |
108 | ||
109 | void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) | |
110 | { | |
111 | uint16_t csum; | |
112 | uint32_t cntr, cso; | |
113 | assert(pkt); | |
114 | uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; | |
115 | void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; | |
116 | ||
117 | if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > | |
118 | ETH_MAX_IP_DGRAM_LEN) { | |
119 | return; | |
120 | } | |
121 | ||
122 | if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || | |
123 | gso_type == VIRTIO_NET_HDR_GSO_UDP) { | |
124 | /* Calculate IP header checksum */ | |
125 | net_tx_pkt_update_ip_hdr_checksum(pkt); | |
126 | ||
127 | /* Calculate IP pseudo header checksum */ | |
128 | cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso); | |
129 | csum = cpu_to_be16(~net_checksum_finish(cntr)); | |
130 | } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { | |
131 | /* Calculate IP pseudo header checksum */ | |
132 | cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len, | |
133 | IP_PROTO_TCP, &cso); | |
134 | csum = cpu_to_be16(~net_checksum_finish(cntr)); | |
135 | } else { | |
136 | return; | |
137 | } | |
e263cd49 | 138 | |
605d52e6 | 139 | iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, |
e263cd49 DF |
140 | pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); |
141 | } | |
142 | ||
605d52e6 | 143 | static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt) |
e263cd49 | 144 | { |
605d52e6 DF |
145 | pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len + |
146 | pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; | |
e263cd49 DF |
147 | } |
148 | ||
605d52e6 | 149 | static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) |
e263cd49 DF |
150 | { |
151 | struct iovec *l2_hdr, *l3_hdr; | |
152 | size_t bytes_read; | |
153 | size_t full_ip6hdr_len; | |
154 | uint16_t l3_proto; | |
155 | ||
156 | assert(pkt); | |
157 | ||
605d52e6 DF |
158 | l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; |
159 | l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG]; | |
e263cd49 DF |
160 | |
161 | bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, | |
162 | ETH_MAX_L2_HDR_LEN); | |
a7278b36 DR |
163 | if (bytes_read < sizeof(struct eth_header)) { |
164 | l2_hdr->iov_len = 0; | |
165 | return false; | |
166 | } | |
167 | ||
168 | l2_hdr->iov_len = sizeof(struct eth_header); | |
169 | switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) { | |
170 | case ETH_P_VLAN: | |
171 | l2_hdr->iov_len += sizeof(struct vlan_header); | |
172 | break; | |
173 | case ETH_P_DVLAN: | |
174 | l2_hdr->iov_len += 2 * sizeof(struct vlan_header); | |
175 | break; | |
176 | } | |
177 | ||
178 | if (bytes_read < l2_hdr->iov_len) { | |
e263cd49 | 179 | l2_hdr->iov_len = 0; |
eb700029 DF |
180 | l3_hdr->iov_len = 0; |
181 | pkt->packet_type = ETH_PKT_UCAST; | |
e263cd49 | 182 | return false; |
eb700029 DF |
183 | } else { |
184 | l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN; | |
185 | l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); | |
186 | pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); | |
e263cd49 DF |
187 | } |
188 | ||
eb700029 | 189 | l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len); |
e263cd49 DF |
190 | |
191 | switch (l3_proto) { | |
192 | case ETH_P_IP: | |
e263cd49 DF |
193 | bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, |
194 | l3_hdr->iov_base, sizeof(struct ip_header)); | |
195 | ||
196 | if (bytes_read < sizeof(struct ip_header)) { | |
197 | l3_hdr->iov_len = 0; | |
198 | return false; | |
199 | } | |
200 | ||
201 | l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); | |
e263cd49 | 202 | |
eb700029 | 203 | if (l3_hdr->iov_len < sizeof(struct ip_header)) { |
e263cd49 DF |
204 | l3_hdr->iov_len = 0; |
205 | return false; | |
206 | } | |
eb700029 | 207 | |
4f51e1d3 | 208 | pkt->l4proto = IP_HDR_GET_P(l3_hdr->iov_base); |
eb700029 DF |
209 | |
210 | if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) { | |
211 | /* copy optional IPv4 header data if any*/ | |
212 | bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, | |
213 | l2_hdr->iov_len + sizeof(struct ip_header), | |
214 | l3_hdr->iov_base + sizeof(struct ip_header), | |
215 | l3_hdr->iov_len - sizeof(struct ip_header)); | |
216 | if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { | |
217 | l3_hdr->iov_len = 0; | |
218 | return false; | |
219 | } | |
220 | } | |
221 | ||
e263cd49 DF |
222 | break; |
223 | ||
224 | case ETH_P_IPV6: | |
eb700029 DF |
225 | { |
226 | eth_ip6_hdr_info hdrinfo; | |
227 | ||
e263cd49 | 228 | if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, |
eb700029 | 229 | &hdrinfo)) { |
e263cd49 DF |
230 | l3_hdr->iov_len = 0; |
231 | return false; | |
232 | } | |
233 | ||
eb700029 DF |
234 | pkt->l4proto = hdrinfo.l4proto; |
235 | full_ip6hdr_len = hdrinfo.full_hdr_len; | |
236 | ||
237 | if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) { | |
238 | l3_hdr->iov_len = 0; | |
239 | return false; | |
240 | } | |
e263cd49 DF |
241 | |
242 | bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, | |
243 | l3_hdr->iov_base, full_ip6hdr_len); | |
244 | ||
245 | if (bytes_read < full_ip6hdr_len) { | |
246 | l3_hdr->iov_len = 0; | |
247 | return false; | |
248 | } else { | |
249 | l3_hdr->iov_len = full_ip6hdr_len; | |
250 | } | |
251 | break; | |
eb700029 | 252 | } |
e263cd49 DF |
253 | default: |
254 | l3_hdr->iov_len = 0; | |
255 | break; | |
256 | } | |
257 | ||
605d52e6 | 258 | net_tx_pkt_calculate_hdr_len(pkt); |
e263cd49 DF |
259 | return true; |
260 | } | |
261 | ||
eb700029 | 262 | static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) |
e263cd49 | 263 | { |
eb700029 | 264 | pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; |
605d52e6 | 265 | pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], |
e263cd49 DF |
266 | pkt->max_payload_frags, |
267 | pkt->raw, pkt->raw_frags, | |
eb700029 DF |
268 | pkt->hdr_len, pkt->payload_len); |
269 | } | |
e263cd49 | 270 | |
eb700029 DF |
271 | bool net_tx_pkt_parse(struct NetTxPkt *pkt) |
272 | { | |
273 | if (net_tx_pkt_parse_headers(pkt)) { | |
274 | net_tx_pkt_rebuild_payload(pkt); | |
e263cd49 DF |
275 | return true; |
276 | } else { | |
277 | return false; | |
278 | } | |
279 | } | |
280 | ||
605d52e6 | 281 | struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) |
e263cd49 DF |
282 | { |
283 | assert(pkt); | |
284 | return &pkt->virt_hdr; | |
285 | } | |
286 | ||
605d52e6 | 287 | static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, |
e263cd49 DF |
288 | bool tso_enable) |
289 | { | |
290 | uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; | |
291 | uint16_t l3_proto; | |
292 | ||
eb700029 | 293 | l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1, |
605d52e6 | 294 | pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); |
e263cd49 DF |
295 | |
296 | if (!tso_enable) { | |
297 | goto func_exit; | |
298 | } | |
299 | ||
605d52e6 | 300 | rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, |
e263cd49 DF |
301 | pkt->l4proto); |
302 | ||
303 | func_exit: | |
304 | return rc; | |
305 | } | |
306 | ||
605d52e6 | 307 | void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, |
e263cd49 DF |
308 | bool csum_enable, uint32_t gso_size) |
309 | { | |
310 | struct tcp_hdr l4hdr; | |
311 | assert(pkt); | |
312 | ||
313 | /* csum has to be enabled if tso is. */ | |
314 | assert(csum_enable || !tso_enable); | |
315 | ||
605d52e6 | 316 | pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable); |
e263cd49 DF |
317 | |
318 | switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { | |
319 | case VIRTIO_NET_HDR_GSO_NONE: | |
320 | pkt->virt_hdr.hdr_len = 0; | |
321 | pkt->virt_hdr.gso_size = 0; | |
322 | break; | |
323 | ||
324 | case VIRTIO_NET_HDR_GSO_UDP: | |
eb700029 | 325 | pkt->virt_hdr.gso_size = gso_size; |
e263cd49 DF |
326 | pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); |
327 | break; | |
328 | ||
329 | case VIRTIO_NET_HDR_GSO_TCPV4: | |
330 | case VIRTIO_NET_HDR_GSO_TCPV6: | |
605d52e6 | 331 | iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, |
e263cd49 DF |
332 | 0, &l4hdr, sizeof(l4hdr)); |
333 | pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); | |
eb700029 | 334 | pkt->virt_hdr.gso_size = gso_size; |
e263cd49 DF |
335 | break; |
336 | ||
337 | default: | |
dfc6f865 | 338 | g_assert_not_reached(); |
e263cd49 DF |
339 | } |
340 | ||
341 | if (csum_enable) { | |
342 | switch (pkt->l4proto) { | |
343 | case IP_PROTO_TCP: | |
344 | pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | |
345 | pkt->virt_hdr.csum_start = pkt->hdr_len; | |
346 | pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); | |
347 | break; | |
348 | case IP_PROTO_UDP: | |
349 | pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | |
350 | pkt->virt_hdr.csum_start = pkt->hdr_len; | |
351 | pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); | |
352 | break; | |
353 | default: | |
354 | break; | |
355 | } | |
356 | } | |
357 | } | |
358 | ||
eb700029 DF |
359 | void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, |
360 | uint16_t vlan, uint16_t vlan_ethtype) | |
e263cd49 DF |
361 | { |
362 | bool is_new; | |
363 | assert(pkt); | |
364 | ||
eb700029 DF |
365 | eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, |
366 | vlan, vlan_ethtype, &is_new); | |
e263cd49 DF |
367 | |
368 | /* update l2hdrlen */ | |
369 | if (is_new) { | |
370 | pkt->hdr_len += sizeof(struct vlan_header); | |
605d52e6 | 371 | pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len += |
e263cd49 DF |
372 | sizeof(struct vlan_header); |
373 | } | |
374 | } | |
375 | ||
605d52e6 | 376 | bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, |
e263cd49 DF |
377 | size_t len) |
378 | { | |
379 | hwaddr mapped_len = 0; | |
380 | struct iovec *ventry; | |
381 | assert(pkt); | |
035e69b0 MMC |
382 | |
383 | if (pkt->raw_frags >= pkt->max_raw_frags) { | |
384 | return false; | |
385 | } | |
e263cd49 DF |
386 | |
387 | if (!len) { | |
388 | return true; | |
389 | } | |
390 | ||
391 | ventry = &pkt->raw[pkt->raw_frags]; | |
392 | mapped_len = len; | |
393 | ||
11171010 DF |
394 | ventry->iov_base = pci_dma_map(pkt->pci_dev, pa, |
395 | &mapped_len, DMA_DIRECTION_TO_DEVICE); | |
e263cd49 | 396 | |
eb700029 DF |
397 | if ((ventry->iov_base != NULL) && (len == mapped_len)) { |
398 | ventry->iov_len = mapped_len; | |
399 | pkt->raw_frags++; | |
400 | return true; | |
401 | } else { | |
e263cd49 DF |
402 | return false; |
403 | } | |
eb700029 | 404 | } |
e263cd49 | 405 | |
eb700029 DF |
406 | bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) |
407 | { | |
408 | return pkt->raw_frags > 0; | |
e263cd49 DF |
409 | } |
410 | ||
605d52e6 | 411 | eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) |
e263cd49 DF |
412 | { |
413 | assert(pkt); | |
414 | ||
415 | return pkt->packet_type; | |
416 | } | |
417 | ||
605d52e6 | 418 | size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt) |
e263cd49 DF |
419 | { |
420 | assert(pkt); | |
421 | ||
422 | return pkt->hdr_len + pkt->payload_len; | |
423 | } | |
424 | ||
605d52e6 | 425 | void net_tx_pkt_dump(struct NetTxPkt *pkt) |
e263cd49 | 426 | { |
605d52e6 | 427 | #ifdef NET_TX_PKT_DEBUG |
e263cd49 DF |
428 | assert(pkt); |
429 | ||
430 | printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " | |
431 | "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, | |
605d52e6 DF |
432 | pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, |
433 | pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); | |
e263cd49 DF |
434 | #endif |
435 | } | |
436 | ||
605d52e6 | 437 | void net_tx_pkt_reset(struct NetTxPkt *pkt) |
e263cd49 DF |
438 | { |
439 | int i; | |
440 | ||
441 | /* no assert, as reset can be called before tx_pkt_init */ | |
442 | if (!pkt) { | |
443 | return; | |
444 | } | |
445 | ||
446 | memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); | |
447 | ||
e263cd49 | 448 | assert(pkt->vec); |
eb700029 | 449 | |
e263cd49 DF |
450 | pkt->payload_len = 0; |
451 | pkt->payload_frags = 0; | |
452 | ||
283f0a05 TH |
453 | if (pkt->max_raw_frags > 0) { |
454 | assert(pkt->raw); | |
455 | for (i = 0; i < pkt->raw_frags; i++) { | |
456 | assert(pkt->raw[i].iov_base); | |
457 | pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, | |
458 | pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0); | |
459 | } | |
e263cd49 DF |
460 | } |
461 | pkt->raw_frags = 0; | |
462 | ||
463 | pkt->hdr_len = 0; | |
e263cd49 DF |
464 | pkt->l4proto = 0; |
465 | } | |
466 | ||
605d52e6 | 467 | static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) |
e263cd49 | 468 | { |
605d52e6 | 469 | struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; |
e263cd49 DF |
470 | uint32_t csum_cntr; |
471 | uint16_t csum = 0; | |
eb700029 | 472 | uint32_t cso; |
e263cd49 | 473 | /* num of iovec without vhdr */ |
605d52e6 | 474 | uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; |
e263cd49 | 475 | uint16_t csl; |
e263cd49 | 476 | size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; |
9a8d9492 | 477 | uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len); |
e263cd49 DF |
478 | |
479 | /* Put zero to checksum field */ | |
480 | iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); | |
481 | ||
482 | /* Calculate L4 TCP/UDP checksum */ | |
483 | csl = pkt->payload_len; | |
484 | ||
9a8d9492 A |
485 | csum_cntr = 0; |
486 | cso = 0; | |
e263cd49 | 487 | /* add pseudo header to csum */ |
9a8d9492 A |
488 | if (l3_proto == ETH_P_IP) { |
489 | csum_cntr = eth_calc_ip4_pseudo_hdr_csum( | |
490 | pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, | |
491 | csl, &cso); | |
492 | } else if (l3_proto == ETH_P_IPV6) { | |
493 | csum_cntr = eth_calc_ip6_pseudo_hdr_csum( | |
494 | pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, | |
495 | csl, pkt->l4proto, &cso); | |
496 | } | |
eb700029 DF |
497 | |
498 | /* data checksum */ | |
499 | csum_cntr += | |
500 | net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso); | |
e263cd49 DF |
501 | |
502 | /* Put the checksum obtained into the packet */ | |
0dacea92 | 503 | csum = cpu_to_be16(net_checksum_finish_nozero(csum_cntr)); |
e263cd49 DF |
504 | iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); |
505 | } | |
506 | ||
507 | enum { | |
605d52e6 DF |
508 | NET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, |
509 | NET_TX_PKT_FRAGMENT_L3_HDR_POS, | |
510 | NET_TX_PKT_FRAGMENT_HEADER_NUM | |
e263cd49 DF |
511 | }; |
512 | ||
605d52e6 | 513 | #define NET_MAX_FRAG_SG_LIST (64) |
e263cd49 | 514 | |
605d52e6 | 515 | static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, |
e263cd49 DF |
516 | int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx) |
517 | { | |
518 | size_t fetched = 0; | |
519 | struct iovec *src = pkt->vec; | |
520 | ||
605d52e6 | 521 | *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM; |
e263cd49 | 522 | |
eb700029 | 523 | while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) { |
e263cd49 DF |
524 | |
525 | /* no more place in fragment iov */ | |
605d52e6 | 526 | if (*dst_idx == NET_MAX_FRAG_SG_LIST) { |
e263cd49 DF |
527 | break; |
528 | } | |
529 | ||
530 | /* no more data in iovec */ | |
605d52e6 | 531 | if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) { |
e263cd49 DF |
532 | break; |
533 | } | |
534 | ||
535 | ||
536 | dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; | |
537 | dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, | |
eb700029 | 538 | IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size) - fetched); |
e263cd49 DF |
539 | |
540 | *src_offset += dst[*dst_idx].iov_len; | |
541 | fetched += dst[*dst_idx].iov_len; | |
542 | ||
543 | if (*src_offset == src[*src_idx].iov_len) { | |
544 | *src_offset = 0; | |
545 | (*src_idx)++; | |
546 | } | |
547 | ||
548 | (*dst_idx)++; | |
549 | } | |
550 | ||
551 | return fetched; | |
552 | } | |
553 | ||
eb700029 DF |
554 | static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt, |
555 | NetClientState *nc, const struct iovec *iov, int iov_cnt) | |
556 | { | |
557 | if (pkt->is_loopback) { | |
8c552542 | 558 | qemu_receive_packet_iov(nc, iov, iov_cnt); |
eb700029 DF |
559 | } else { |
560 | qemu_sendv_packet(nc, iov, iov_cnt); | |
561 | } | |
562 | } | |
563 | ||
605d52e6 | 564 | static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, |
e263cd49 DF |
565 | NetClientState *nc) |
566 | { | |
605d52e6 | 567 | struct iovec fragment[NET_MAX_FRAG_SG_LIST]; |
e263cd49 DF |
568 | size_t fragment_len = 0; |
569 | bool more_frags = false; | |
570 | ||
571 | /* some pointers for shorter code */ | |
572 | void *l2_iov_base, *l3_iov_base; | |
573 | size_t l2_iov_len, l3_iov_len; | |
605d52e6 | 574 | int src_idx = NET_TX_PKT_PL_START_FRAG, dst_idx; |
e263cd49 DF |
575 | size_t src_offset = 0; |
576 | size_t fragment_offset = 0; | |
577 | ||
605d52e6 DF |
578 | l2_iov_base = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base; |
579 | l2_iov_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len; | |
580 | l3_iov_base = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; | |
581 | l3_iov_len = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; | |
e263cd49 DF |
582 | |
583 | /* Copy headers */ | |
605d52e6 DF |
584 | fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; |
585 | fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; | |
586 | fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; | |
587 | fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; | |
e263cd49 DF |
588 | |
589 | ||
590 | /* Put as much data as possible and send */ | |
591 | do { | |
605d52e6 | 592 | fragment_len = net_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, |
e263cd49 DF |
593 | fragment, &dst_idx); |
594 | ||
595 | more_frags = (fragment_offset + fragment_len < pkt->payload_len); | |
596 | ||
597 | eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base, | |
598 | l3_iov_len, fragment_len, fragment_offset, more_frags); | |
599 | ||
600 | eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); | |
601 | ||
eb700029 | 602 | net_tx_pkt_sendv(pkt, nc, fragment, dst_idx); |
e263cd49 DF |
603 | |
604 | fragment_offset += fragment_len; | |
605 | ||
ead315e4 | 606 | } while (fragment_len && more_frags); |
e263cd49 DF |
607 | |
608 | return true; | |
609 | } | |
610 | ||
605d52e6 | 611 | bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) |
e263cd49 DF |
612 | { |
613 | assert(pkt); | |
614 | ||
615 | if (!pkt->has_virt_hdr && | |
616 | pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { | |
605d52e6 | 617 | net_tx_pkt_do_sw_csum(pkt); |
e263cd49 DF |
618 | } |
619 | ||
620 | /* | |
621 | * Since underlying infrastructure does not support IP datagrams longer | |
622 | * than 64K we should drop such packets and don't even try to send | |
623 | */ | |
624 | if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { | |
625 | if (pkt->payload_len > | |
626 | ETH_MAX_IP_DGRAM_LEN - | |
605d52e6 | 627 | pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) { |
e263cd49 DF |
628 | return false; |
629 | } | |
630 | } | |
631 | ||
632 | if (pkt->has_virt_hdr || | |
633 | pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { | |
e219d309 | 634 | net_tx_pkt_fix_ip6_payload_len(pkt); |
eb700029 | 635 | net_tx_pkt_sendv(pkt, nc, pkt->vec, |
605d52e6 | 636 | pkt->payload_frags + NET_TX_PKT_PL_START_FRAG); |
e263cd49 DF |
637 | return true; |
638 | } | |
639 | ||
605d52e6 | 640 | return net_tx_pkt_do_sw_fragmentation(pkt, nc); |
e263cd49 | 641 | } |
eb700029 DF |
642 | |
643 | bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc) | |
644 | { | |
645 | bool res; | |
646 | ||
647 | pkt->is_loopback = true; | |
648 | res = net_tx_pkt_send(pkt, nc); | |
649 | pkt->is_loopback = false; | |
650 | ||
651 | return res; | |
652 | } | |
e219d309 A |
653 | |
654 | void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt) | |
655 | { | |
656 | struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; | |
657 | if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) { | |
658 | struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr; | |
659 | /* | |
660 | * TODO: if qemu would support >64K packets - add jumbo option check | |
661 | * something like that: | |
662 | * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {' | |
663 | */ | |
664 | if (ip6->ip6_plen == 0) { | |
665 | if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) { | |
666 | ip6->ip6_plen = htons(pkt->payload_len); | |
667 | } | |
668 | /* | |
669 | * TODO: if qemu would support >64K packets | |
670 | * add jumbo option for packets greater then 65,535 bytes | |
671 | */ | |
672 | } | |
673 | } | |
674 | } |