]>
Commit | Line | Data |
---|---|---|
e4d5639d AS |
1 | /* |
2 | * Helpers for getting linearized buffers from iov / filling buffers into iovs | |
3 | * | |
4 | * Copyright IBM, Corp. 2007, 2008 | |
5 | * Copyright (C) 2010 Red Hat, Inc. | |
6 | * | |
7 | * Author(s): | |
8 | * Anthony Liguori <[email protected]> | |
9 | * Amit Shah <[email protected]> | |
2278a69e | 10 | * Michael Tokarev <[email protected]> |
e4d5639d AS |
11 | * |
12 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
13 | * the COPYING file in the top-level directory. | |
6b620ca3 PB |
14 | * |
15 | * Contributions after 2012-01-13 are licensed under the terms of the | |
16 | * GNU GPL, version 2 or (at your option) any later version. | |
e4d5639d AS |
17 | */ |
18 | ||
1de7afc9 | 19 | #include "qemu/iov.h" |
cc99c6f5 | 20 | #include "qemu/sockets.h" |
25e5e4c7 | 21 | |
844b5cea | 22 | size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, |
2278a69e | 23 | size_t offset, const void *buf, size_t bytes) |
e4d5639d | 24 | { |
2278a69e | 25 | size_t done; |
e4d5639d | 26 | unsigned int i; |
2278a69e MT |
27 | for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { |
28 | if (offset < iov[i].iov_len) { | |
29 | size_t len = MIN(iov[i].iov_len - offset, bytes - done); | |
30 | memcpy(iov[i].iov_base + offset, buf + done, len); | |
31 | done += len; | |
32 | offset = 0; | |
33 | } else { | |
34 | offset -= iov[i].iov_len; | |
348e7b8d | 35 | } |
e4d5639d | 36 | } |
2278a69e MT |
37 | assert(offset == 0); |
38 | return done; | |
e4d5639d | 39 | } |
fa6111f2 | 40 | |
2278a69e MT |
41 | size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, |
42 | size_t offset, void *buf, size_t bytes) | |
fa6111f2 | 43 | { |
2278a69e | 44 | size_t done; |
fa6111f2 | 45 | unsigned int i; |
2278a69e MT |
46 | for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { |
47 | if (offset < iov[i].iov_len) { | |
48 | size_t len = MIN(iov[i].iov_len - offset, bytes - done); | |
49 | memcpy(buf + done, iov[i].iov_base + offset, len); | |
50 | done += len; | |
51 | offset = 0; | |
52 | } else { | |
53 | offset -= iov[i].iov_len; | |
fa6111f2 | 54 | } |
8d15028e | 55 | } |
2278a69e MT |
56 | assert(offset == 0); |
57 | return done; | |
8d15028e GH |
58 | } |
59 | ||
dcf6f5e1 | 60 | size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, |
2278a69e | 61 | size_t offset, int fillc, size_t bytes) |
8d15028e | 62 | { |
2278a69e | 63 | size_t done; |
8d15028e | 64 | unsigned int i; |
2278a69e MT |
65 | for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { |
66 | if (offset < iov[i].iov_len) { | |
67 | size_t len = MIN(iov[i].iov_len - offset, bytes - done); | |
68 | memset(iov[i].iov_base + offset, fillc, len); | |
69 | done += len; | |
70 | offset = 0; | |
71 | } else { | |
72 | offset -= iov[i].iov_len; | |
8d15028e | 73 | } |
fa6111f2 | 74 | } |
2278a69e MT |
75 | assert(offset == 0); |
76 | return done; | |
fa6111f2 AS |
77 | } |
78 | ||
348e7b8d | 79 | size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) |
fa6111f2 AS |
80 | { |
81 | size_t len; | |
82 | unsigned int i; | |
83 | ||
84 | len = 0; | |
348e7b8d | 85 | for (i = 0; i < iov_cnt; i++) { |
fa6111f2 AS |
86 | len += iov[i].iov_len; |
87 | } | |
88 | return len; | |
89 | } | |
3a1dca94 | 90 | |
25e5e4c7 MT |
91 | /* helper function for iov_send_recv() */ |
92 | static ssize_t | |
93 | do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) | |
94 | { | |
9adea5f7 | 95 | #ifdef CONFIG_POSIX |
25e5e4c7 MT |
96 | ssize_t ret; |
97 | struct msghdr msg; | |
98 | memset(&msg, 0, sizeof(msg)); | |
99 | msg.msg_iov = iov; | |
100 | msg.msg_iovlen = iov_cnt; | |
101 | do { | |
102 | ret = do_send | |
103 | ? sendmsg(sockfd, &msg, 0) | |
104 | : recvmsg(sockfd, &msg, 0); | |
105 | } while (ret < 0 && errno == EINTR); | |
106 | return ret; | |
107 | #else | |
108 | /* else send piece-by-piece */ | |
109 | /*XXX Note: windows has WSASend() and WSARecv() */ | |
c0958559 SW |
110 | unsigned i = 0; |
111 | ssize_t ret = 0; | |
112 | while (i < iov_cnt) { | |
25e5e4c7 MT |
113 | ssize_t r = do_send |
114 | ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) | |
115 | : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); | |
116 | if (r > 0) { | |
117 | ret += r; | |
118 | } else if (!r) { | |
119 | break; | |
120 | } else if (errno == EINTR) { | |
121 | continue; | |
122 | } else { | |
123 | /* else it is some "other" error, | |
124 | * only return if there was no data processed. */ | |
125 | if (ret == 0) { | |
c0958559 | 126 | ret = -1; |
25e5e4c7 MT |
127 | } |
128 | break; | |
129 | } | |
c0958559 | 130 | i++; |
25e5e4c7 | 131 | } |
c0958559 | 132 | return ret; |
25e5e4c7 MT |
133 | #endif |
134 | } | |
135 | ||
136 | ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, | |
137 | size_t offset, size_t bytes, | |
138 | bool do_send) | |
139 | { | |
83f75c26 | 140 | ssize_t total = 0; |
25e5e4c7 | 141 | ssize_t ret; |
5209d675 | 142 | size_t orig_len, tail; |
f48869ad | 143 | unsigned niov; |
5209d675 | 144 | |
83f75c26 PB |
145 | while (bytes > 0) { |
146 | /* Find the start position, skipping `offset' bytes: | |
147 | * first, skip all full-sized vector elements, */ | |
148 | for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) { | |
149 | offset -= iov[niov].iov_len; | |
150 | } | |
cb6247a7 | 151 | |
83f75c26 PB |
152 | /* niov == iov_cnt would only be valid if bytes == 0, which |
153 | * we already ruled out in the loop condition. */ | |
f48869ad | 154 | assert(niov < iov_cnt); |
83f75c26 PB |
155 | iov += niov; |
156 | iov_cnt -= niov; | |
157 | ||
158 | if (offset) { | |
159 | /* second, skip `offset' bytes from the (now) first element, | |
160 | * undo it on exit */ | |
161 | iov[0].iov_base += offset; | |
162 | iov[0].iov_len -= offset; | |
163 | } | |
164 | /* Find the end position skipping `bytes' bytes: */ | |
165 | /* first, skip all full-sized elements */ | |
166 | tail = bytes; | |
167 | for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) { | |
168 | tail -= iov[niov].iov_len; | |
169 | } | |
170 | if (tail) { | |
171 | /* second, fixup the last element, and remember the original | |
172 | * length */ | |
173 | assert(niov < iov_cnt); | |
174 | assert(iov[niov].iov_len > tail); | |
175 | orig_len = iov[niov].iov_len; | |
176 | iov[niov++].iov_len = tail; | |
2be178a4 MT |
177 | ret = do_send_recv(sockfd, iov, niov, do_send); |
178 | /* Undo the changes above before checking for errors */ | |
83f75c26 | 179 | iov[niov-1].iov_len = orig_len; |
2be178a4 MT |
180 | } else { |
181 | ret = do_send_recv(sockfd, iov, niov, do_send); | |
83f75c26 PB |
182 | } |
183 | if (offset) { | |
184 | iov[0].iov_base -= offset; | |
185 | iov[0].iov_len += offset; | |
186 | } | |
187 | ||
188 | if (ret < 0) { | |
189 | assert(errno != EINTR); | |
190 | if (errno == EAGAIN && total > 0) { | |
191 | return total; | |
192 | } | |
193 | return -1; | |
194 | } | |
195 | ||
84004290 MK |
196 | if (ret == 0 && !do_send) { |
197 | /* recv returns 0 when the peer has performed an orderly | |
198 | * shutdown. */ | |
199 | break; | |
200 | } | |
201 | ||
83f75c26 PB |
202 | /* Prepare for the next iteration */ |
203 | offset += ret; | |
204 | total += ret; | |
205 | bytes -= ret; | |
25e5e4c7 | 206 | } |
25e5e4c7 | 207 | |
83f75c26 | 208 | return total; |
25e5e4c7 MT |
209 | } |
210 | ||
211 | ||
3a1dca94 GH |
212 | void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, |
213 | FILE *fp, const char *prefix, size_t limit) | |
214 | { | |
6ff66f50 PC |
215 | int v; |
216 | size_t size = 0; | |
217 | char *buf; | |
218 | ||
219 | for (v = 0; v < iov_cnt; v++) { | |
220 | size += iov[v].iov_len; | |
3a1dca94 | 221 | } |
6ff66f50 PC |
222 | size = size > limit ? limit : size; |
223 | buf = g_malloc(size); | |
224 | iov_to_buf(iov, iov_cnt, 0, buf, size); | |
3568ac2a | 225 | qemu_hexdump(buf, fp, prefix, size); |
6ff66f50 | 226 | g_free(buf); |
3a1dca94 | 227 | } |
0191253c | 228 | |
d336336c MT |
229 | unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt, |
230 | const struct iovec *iov, unsigned int iov_cnt, | |
231 | size_t offset, size_t bytes) | |
232 | { | |
233 | size_t len; | |
234 | unsigned int i, j; | |
235 | for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) { | |
236 | if (offset >= iov[i].iov_len) { | |
237 | offset -= iov[i].iov_len; | |
238 | continue; | |
239 | } | |
240 | len = MIN(bytes, iov[i].iov_len - offset); | |
241 | ||
242 | dst_iov[j].iov_base = iov[i].iov_base + offset; | |
243 | dst_iov[j].iov_len = len; | |
244 | j++; | |
245 | bytes -= len; | |
246 | offset = 0; | |
247 | } | |
248 | assert(offset == 0); | |
249 | return j; | |
250 | } | |
f563a5d7 | 251 | |
0191253c PB |
252 | /* io vectors */ |
253 | ||
254 | void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint) | |
255 | { | |
256 | qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec)); | |
257 | qiov->niov = 0; | |
258 | qiov->nalloc = alloc_hint; | |
259 | qiov->size = 0; | |
260 | } | |
261 | ||
262 | void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov) | |
263 | { | |
264 | int i; | |
265 | ||
266 | qiov->iov = iov; | |
267 | qiov->niov = niov; | |
268 | qiov->nalloc = -1; | |
269 | qiov->size = 0; | |
270 | for (i = 0; i < niov; i++) | |
271 | qiov->size += iov[i].iov_len; | |
272 | } | |
273 | ||
274 | void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) | |
275 | { | |
276 | assert(qiov->nalloc != -1); | |
277 | ||
278 | if (qiov->niov == qiov->nalloc) { | |
279 | qiov->nalloc = 2 * qiov->nalloc + 1; | |
280 | qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec)); | |
281 | } | |
282 | qiov->iov[qiov->niov].iov_base = base; | |
283 | qiov->iov[qiov->niov].iov_len = len; | |
284 | qiov->size += len; | |
285 | ++qiov->niov; | |
286 | } | |
287 | ||
288 | /* | |
530c0bbd | 289 | * Concatenates (partial) iovecs from src_iov to the end of dst. |
0191253c PB |
290 | * It starts copying after skipping `soffset' bytes at the |
291 | * beginning of src and adds individual vectors from src to | |
292 | * dst copies up to `sbytes' bytes total, or up to the end | |
530c0bbd | 293 | * of src_iov if it comes first. This way, it is okay to specify |
0191253c PB |
294 | * very large value for `sbytes' to indicate "up to the end |
295 | * of src". | |
296 | * Only vector pointers are processed, not the actual data buffers. | |
297 | */ | |
519661ee PB |
298 | size_t qemu_iovec_concat_iov(QEMUIOVector *dst, |
299 | struct iovec *src_iov, unsigned int src_cnt, | |
300 | size_t soffset, size_t sbytes) | |
0191253c PB |
301 | { |
302 | int i; | |
303 | size_t done; | |
facf98ad AK |
304 | |
305 | if (!sbytes) { | |
519661ee | 306 | return 0; |
facf98ad | 307 | } |
0191253c | 308 | assert(dst->nalloc != -1); |
530c0bbd SH |
309 | for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) { |
310 | if (soffset < src_iov[i].iov_len) { | |
311 | size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done); | |
312 | qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len); | |
0191253c PB |
313 | done += len; |
314 | soffset = 0; | |
315 | } else { | |
530c0bbd | 316 | soffset -= src_iov[i].iov_len; |
0191253c PB |
317 | } |
318 | } | |
530c0bbd | 319 | assert(soffset == 0); /* offset beyond end of src */ |
519661ee PB |
320 | |
321 | return done; | |
530c0bbd SH |
322 | } |
323 | ||
324 | /* | |
325 | * Concatenates (partial) iovecs from src to the end of dst. | |
326 | * It starts copying after skipping `soffset' bytes at the | |
327 | * beginning of src and adds individual vectors from src to | |
328 | * dst copies up to `sbytes' bytes total, or up to the end | |
329 | * of src if it comes first. This way, it is okay to specify | |
330 | * very large value for `sbytes' to indicate "up to the end | |
331 | * of src". | |
332 | * Only vector pointers are processed, not the actual data buffers. | |
333 | */ | |
334 | void qemu_iovec_concat(QEMUIOVector *dst, | |
335 | QEMUIOVector *src, size_t soffset, size_t sbytes) | |
336 | { | |
337 | qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes); | |
0191253c PB |
338 | } |
339 | ||
43f35cb5 PL |
340 | /* |
341 | * Check if the contents of the iovecs are all zero | |
342 | */ | |
343 | bool qemu_iovec_is_zero(QEMUIOVector *qiov) | |
344 | { | |
345 | int i; | |
346 | for (i = 0; i < qiov->niov; i++) { | |
347 | size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); | |
348 | uint8_t *ptr = qiov->iov[i].iov_base; | |
349 | if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { | |
350 | return false; | |
351 | } | |
352 | for (; offs < qiov->iov[i].iov_len; offs++) { | |
353 | if (ptr[offs]) { | |
354 | return false; | |
355 | } | |
356 | } | |
357 | } | |
358 | return true; | |
359 | } | |
360 | ||
0191253c PB |
361 | void qemu_iovec_destroy(QEMUIOVector *qiov) |
362 | { | |
363 | assert(qiov->nalloc != -1); | |
364 | ||
365 | qemu_iovec_reset(qiov); | |
366 | g_free(qiov->iov); | |
367 | qiov->nalloc = 0; | |
368 | qiov->iov = NULL; | |
369 | } | |
370 | ||
371 | void qemu_iovec_reset(QEMUIOVector *qiov) | |
372 | { | |
373 | assert(qiov->nalloc != -1); | |
374 | ||
375 | qiov->niov = 0; | |
376 | qiov->size = 0; | |
377 | } | |
378 | ||
379 | size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, | |
380 | void *buf, size_t bytes) | |
381 | { | |
382 | return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); | |
383 | } | |
384 | ||
385 | size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, | |
386 | const void *buf, size_t bytes) | |
387 | { | |
388 | return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); | |
389 | } | |
390 | ||
391 | size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, | |
392 | int fillc, size_t bytes) | |
393 | { | |
394 | return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); | |
395 | } | |
d0277635 | 396 | |
f70d7f7e BC |
397 | /** |
398 | * Check that I/O vector contents are identical | |
399 | * | |
400 | * The IO vectors must have the same structure (same length of all parts). | |
401 | * A typical usage is to compare vectors created with qemu_iovec_clone(). | |
402 | * | |
403 | * @a: I/O vector | |
404 | * @b: I/O vector | |
405 | * @ret: Offset to first mismatching byte or -1 if match | |
406 | */ | |
407 | ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) | |
408 | { | |
409 | int i; | |
410 | ssize_t offset = 0; | |
411 | ||
412 | assert(a->niov == b->niov); | |
413 | for (i = 0; i < a->niov; i++) { | |
414 | size_t len = 0; | |
415 | uint8_t *p = (uint8_t *)a->iov[i].iov_base; | |
416 | uint8_t *q = (uint8_t *)b->iov[i].iov_base; | |
417 | ||
418 | assert(a->iov[i].iov_len == b->iov[i].iov_len); | |
419 | while (len < a->iov[i].iov_len && *p++ == *q++) { | |
420 | len++; | |
421 | } | |
422 | ||
423 | offset += len; | |
424 | ||
425 | if (len != a->iov[i].iov_len) { | |
426 | return offset; | |
427 | } | |
428 | } | |
429 | return -1; | |
430 | } | |
431 | ||
432 | typedef struct { | |
433 | int src_index; | |
434 | struct iovec *src_iov; | |
435 | void *dest_base; | |
436 | } IOVectorSortElem; | |
437 | ||
438 | static int sortelem_cmp_src_base(const void *a, const void *b) | |
439 | { | |
440 | const IOVectorSortElem *elem_a = a; | |
441 | const IOVectorSortElem *elem_b = b; | |
442 | ||
443 | /* Don't overflow */ | |
444 | if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) { | |
445 | return -1; | |
446 | } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) { | |
447 | return 1; | |
448 | } else { | |
449 | return 0; | |
450 | } | |
451 | } | |
452 | ||
453 | static int sortelem_cmp_src_index(const void *a, const void *b) | |
454 | { | |
455 | const IOVectorSortElem *elem_a = a; | |
456 | const IOVectorSortElem *elem_b = b; | |
457 | ||
458 | return elem_a->src_index - elem_b->src_index; | |
459 | } | |
460 | ||
461 | /** | |
462 | * Copy contents of I/O vector | |
463 | * | |
464 | * The relative relationships of overlapping iovecs are preserved. This is | |
465 | * necessary to ensure identical semantics in the cloned I/O vector. | |
466 | */ | |
467 | void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf) | |
468 | { | |
469 | IOVectorSortElem sortelems[src->niov]; | |
470 | void *last_end; | |
471 | int i; | |
472 | ||
473 | /* Sort by source iovecs by base address */ | |
474 | for (i = 0; i < src->niov; i++) { | |
475 | sortelems[i].src_index = i; | |
476 | sortelems[i].src_iov = &src->iov[i]; | |
477 | } | |
478 | qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base); | |
479 | ||
480 | /* Allocate buffer space taking into account overlapping iovecs */ | |
481 | last_end = NULL; | |
482 | for (i = 0; i < src->niov; i++) { | |
483 | struct iovec *cur = sortelems[i].src_iov; | |
484 | ptrdiff_t rewind = 0; | |
485 | ||
486 | /* Detect overlap */ | |
487 | if (last_end && last_end > cur->iov_base) { | |
488 | rewind = last_end - cur->iov_base; | |
489 | } | |
490 | ||
491 | sortelems[i].dest_base = buf - rewind; | |
492 | buf += cur->iov_len - MIN(rewind, cur->iov_len); | |
493 | last_end = MAX(cur->iov_base + cur->iov_len, last_end); | |
494 | } | |
495 | ||
496 | /* Sort by source iovec index and build destination iovec */ | |
497 | qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index); | |
498 | for (i = 0; i < src->niov; i++) { | |
499 | qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len); | |
500 | } | |
501 | } | |
502 | ||
d0277635 SH |
503 | size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt, |
504 | size_t bytes) | |
505 | { | |
506 | size_t total = 0; | |
507 | struct iovec *cur; | |
508 | ||
509 | for (cur = *iov; *iov_cnt > 0; cur++) { | |
510 | if (cur->iov_len > bytes) { | |
511 | cur->iov_base += bytes; | |
512 | cur->iov_len -= bytes; | |
513 | total += bytes; | |
514 | break; | |
515 | } | |
516 | ||
517 | bytes -= cur->iov_len; | |
518 | total += cur->iov_len; | |
519 | *iov_cnt -= 1; | |
520 | } | |
521 | ||
522 | *iov = cur; | |
523 | return total; | |
524 | } | |
525 | ||
526 | size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt, | |
527 | size_t bytes) | |
528 | { | |
529 | size_t total = 0; | |
530 | struct iovec *cur; | |
531 | ||
532 | if (*iov_cnt == 0) { | |
533 | return 0; | |
534 | } | |
535 | ||
536 | cur = iov + (*iov_cnt - 1); | |
537 | ||
538 | while (*iov_cnt > 0) { | |
539 | if (cur->iov_len > bytes) { | |
540 | cur->iov_len -= bytes; | |
541 | total += bytes; | |
542 | break; | |
543 | } | |
544 | ||
545 | bytes -= cur->iov_len; | |
546 | total += cur->iov_len; | |
547 | cur--; | |
548 | *iov_cnt -= 1; | |
549 | } | |
550 | ||
551 | return total; | |
552 | } |