]>
Commit | Line | Data |
---|---|---|
6e02c38d AL |
1 | /* |
2 | * Virtio Block Device | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
869a5c6d AL |
14 | #include <qemu-common.h> |
15 | #include <sysemu.h> | |
6e02c38d AL |
16 | #include "virtio-blk.h" |
17 | #include "block_int.h" | |
18 | ||
19 | typedef struct VirtIOBlock | |
20 | { | |
21 | VirtIODevice vdev; | |
22 | BlockDriverState *bs; | |
23 | VirtQueue *vq; | |
869a5c6d | 24 | void *rq; |
6e02c38d AL |
25 | } VirtIOBlock; |
26 | ||
27 | static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev) | |
28 | { | |
29 | return (VirtIOBlock *)vdev; | |
30 | } | |
31 | ||
32 | typedef struct VirtIOBlockReq | |
33 | { | |
34 | VirtIOBlock *dev; | |
35 | VirtQueueElement elem; | |
36 | struct virtio_blk_inhdr *in; | |
37 | struct virtio_blk_outhdr *out; | |
38 | size_t size; | |
39 | uint8_t *buffer; | |
869a5c6d | 40 | struct VirtIOBlockReq *next; |
6e02c38d AL |
41 | } VirtIOBlockReq; |
42 | ||
869a5c6d AL |
43 | static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) |
44 | { | |
45 | VirtIOBlock *s = req->dev; | |
46 | ||
47 | req->in->status = status; | |
48 | virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in)); | |
49 | virtio_notify(&s->vdev, s->vq); | |
50 | ||
51 | qemu_free(req->buffer); | |
52 | qemu_free(req); | |
53 | } | |
54 | ||
55 | static int virtio_blk_handle_write_error(VirtIOBlockReq *req, int error) | |
56 | { | |
57 | BlockInterfaceErrorAction action = drive_get_onerror(req->dev->bs); | |
58 | VirtIOBlock *s = req->dev; | |
59 | ||
60 | if (action == BLOCK_ERR_IGNORE) | |
61 | return 0; | |
62 | ||
63 | if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) | |
64 | || action == BLOCK_ERR_STOP_ANY) { | |
65 | req->next = s->rq; | |
66 | s->rq = req; | |
67 | vm_stop(0); | |
68 | } else { | |
69 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); | |
70 | } | |
71 | ||
72 | return 1; | |
73 | } | |
74 | ||
6e02c38d AL |
75 | static void virtio_blk_rw_complete(void *opaque, int ret) |
76 | { | |
77 | VirtIOBlockReq *req = opaque; | |
6e02c38d AL |
78 | |
79 | /* Copy read data to the guest */ | |
80 | if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) { | |
81 | size_t offset = 0; | |
82 | int i; | |
83 | ||
84 | for (i = 0; i < req->elem.in_num - 1; i++) { | |
85 | size_t len; | |
86 | ||
87 | /* Be pretty defensive wrt malicious guests */ | |
88 | len = MIN(req->elem.in_sg[i].iov_len, | |
89 | req->size - offset); | |
90 | ||
91 | memcpy(req->elem.in_sg[i].iov_base, | |
92 | req->buffer + offset, | |
93 | len); | |
94 | offset += len; | |
95 | } | |
869a5c6d AL |
96 | } else if (ret && (req->out->type & VIRTIO_BLK_T_OUT)) { |
97 | if (virtio_blk_handle_write_error(req, -ret)) | |
98 | return; | |
6e02c38d AL |
99 | } |
100 | ||
869a5c6d AL |
101 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); |
102 | } | |
6e02c38d | 103 | |
869a5c6d AL |
104 | static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) |
105 | { | |
106 | VirtIOBlockReq *req = qemu_mallocz(sizeof(*req)); | |
487414f1 | 107 | req->dev = s; |
869a5c6d | 108 | return req; |
6e02c38d AL |
109 | } |
110 | ||
111 | static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) | |
112 | { | |
869a5c6d | 113 | VirtIOBlockReq *req = virtio_blk_alloc_request(s); |
6e02c38d | 114 | |
869a5c6d AL |
115 | if (req != NULL) { |
116 | if (!virtqueue_pop(s->vq, &req->elem)) { | |
117 | qemu_free(req); | |
118 | return NULL; | |
119 | } | |
6e02c38d AL |
120 | } |
121 | ||
122 | return req; | |
123 | } | |
124 | ||
869a5c6d AL |
125 | static int virtio_blk_handle_write(VirtIOBlockReq *req) |
126 | { | |
127 | if (!req->buffer) { | |
128 | size_t offset = 0; | |
129 | int i; | |
130 | ||
131 | for (i = 1; i < req->elem.out_num; i++) | |
132 | req->size += req->elem.out_sg[i].iov_len; | |
133 | ||
134 | req->buffer = qemu_memalign(512, req->size); | |
135 | if (req->buffer == NULL) { | |
136 | qemu_free(req); | |
137 | return -1; | |
138 | } | |
139 | ||
140 | /* We copy the data from the SG list to avoid splitting up the request. | |
141 | This helps performance a lot until we can pass full sg lists as AIO | |
142 | operations */ | |
143 | for (i = 1; i < req->elem.out_num; i++) { | |
144 | size_t len; | |
145 | ||
146 | len = MIN(req->elem.out_sg[i].iov_len, | |
147 | req->size - offset); | |
148 | memcpy(req->buffer + offset, | |
149 | req->elem.out_sg[i].iov_base, | |
150 | len); | |
151 | offset += len; | |
152 | } | |
153 | } | |
154 | ||
155 | bdrv_aio_write(req->dev->bs, req->out->sector, req->buffer, req->size / 512, | |
156 | virtio_blk_rw_complete, req); | |
157 | return 0; | |
158 | } | |
159 | ||
6e02c38d AL |
160 | static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) |
161 | { | |
162 | VirtIOBlock *s = to_virtio_blk(vdev); | |
163 | VirtIOBlockReq *req; | |
164 | ||
165 | while ((req = virtio_blk_get_request(s))) { | |
166 | int i; | |
167 | ||
168 | if (req->elem.out_num < 1 || req->elem.in_num < 1) { | |
169 | fprintf(stderr, "virtio-blk missing headers\n"); | |
170 | exit(1); | |
171 | } | |
172 | ||
173 | if (req->elem.out_sg[0].iov_len < sizeof(*req->out) || | |
174 | req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) { | |
175 | fprintf(stderr, "virtio-blk header not in correct element\n"); | |
176 | exit(1); | |
177 | } | |
178 | ||
179 | req->out = (void *)req->elem.out_sg[0].iov_base; | |
180 | req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; | |
181 | ||
182 | if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) { | |
183 | unsigned int len = sizeof(*req->in); | |
184 | ||
185 | req->in->status = VIRTIO_BLK_S_UNSUPP; | |
186 | virtqueue_push(vq, &req->elem, len); | |
187 | virtio_notify(vdev, vq); | |
188 | qemu_free(req); | |
189 | } else if (req->out->type & VIRTIO_BLK_T_OUT) { | |
869a5c6d | 190 | if (virtio_blk_handle_write(req) < 0) |
6e02c38d | 191 | break; |
6e02c38d AL |
192 | } else { |
193 | for (i = 0; i < req->elem.in_num - 1; i++) | |
194 | req->size += req->elem.in_sg[i].iov_len; | |
195 | ||
196 | req->buffer = qemu_memalign(512, req->size); | |
197 | if (req->buffer == NULL) { | |
198 | qemu_free(req); | |
199 | break; | |
200 | } | |
201 | ||
202 | bdrv_aio_read(s->bs, req->out->sector, | |
203 | req->buffer, | |
204 | req->size / 512, | |
205 | virtio_blk_rw_complete, | |
206 | req); | |
207 | } | |
208 | } | |
209 | /* | |
210 | * FIXME: Want to check for completions before returning to guest mode, | |
211 | * so cached reads and writes are reported as quickly as possible. But | |
212 | * that should be done in the generic block layer. | |
213 | */ | |
214 | } | |
215 | ||
869a5c6d AL |
216 | static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason) |
217 | { | |
218 | VirtIOBlock *s = opaque; | |
219 | VirtIOBlockReq *req = s->rq; | |
220 | ||
221 | if (!running) | |
222 | return; | |
223 | ||
224 | s->rq = NULL; | |
225 | ||
226 | while (req) { | |
227 | virtio_blk_handle_write(req); | |
228 | req = req->next; | |
229 | } | |
230 | } | |
231 | ||
6e02c38d AL |
232 | static void virtio_blk_reset(VirtIODevice *vdev) |
233 | { | |
234 | /* | |
235 | * This should cancel pending requests, but can't do nicely until there | |
236 | * are per-device request lists. | |
237 | */ | |
238 | qemu_aio_flush(); | |
239 | } | |
240 | ||
241 | static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) | |
242 | { | |
243 | VirtIOBlock *s = to_virtio_blk(vdev); | |
244 | struct virtio_blk_config blkcfg; | |
245 | uint64_t capacity; | |
246 | int cylinders, heads, secs; | |
247 | ||
248 | bdrv_get_geometry(s->bs, &capacity); | |
249 | bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs); | |
250 | stq_raw(&blkcfg.capacity, capacity); | |
251 | stl_raw(&blkcfg.seg_max, 128 - 2); | |
252 | stw_raw(&blkcfg.cylinders, cylinders); | |
253 | blkcfg.heads = heads; | |
254 | blkcfg.sectors = secs; | |
255 | memcpy(config, &blkcfg, sizeof(blkcfg)); | |
256 | } | |
257 | ||
258 | static uint32_t virtio_blk_get_features(VirtIODevice *vdev) | |
259 | { | |
260 | return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY); | |
261 | } | |
262 | ||
263 | static void virtio_blk_save(QEMUFile *f, void *opaque) | |
264 | { | |
265 | VirtIOBlock *s = opaque; | |
869a5c6d AL |
266 | VirtIOBlockReq *req = s->rq; |
267 | ||
6e02c38d | 268 | virtio_save(&s->vdev, f); |
869a5c6d AL |
269 | |
270 | while (req) { | |
271 | qemu_put_sbyte(f, 1); | |
272 | qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); | |
273 | req = req->next; | |
274 | } | |
275 | qemu_put_sbyte(f, 0); | |
6e02c38d AL |
276 | } |
277 | ||
278 | static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) | |
279 | { | |
280 | VirtIOBlock *s = opaque; | |
281 | ||
869a5c6d | 282 | if (version_id != 2) |
6e02c38d AL |
283 | return -EINVAL; |
284 | ||
285 | virtio_load(&s->vdev, f); | |
869a5c6d AL |
286 | while (qemu_get_sbyte(f)) { |
287 | VirtIOBlockReq *req = virtio_blk_alloc_request(s); | |
288 | qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); | |
289 | req->next = s->rq; | |
290 | s->rq = req->next; | |
291 | } | |
6e02c38d AL |
292 | |
293 | return 0; | |
294 | } | |
295 | ||
9b32d5a5 | 296 | void *virtio_blk_init(PCIBus *bus, BlockDriverState *bs) |
6e02c38d AL |
297 | { |
298 | VirtIOBlock *s; | |
299 | int cylinders, heads, secs; | |
300 | static int virtio_blk_id; | |
301 | ||
9b32d5a5 AL |
302 | s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", |
303 | PCI_VENDOR_ID_REDHAT_QUMRANET, | |
304 | PCI_DEVICE_ID_VIRTIO_BLOCK, | |
99b3718e AL |
305 | PCI_VENDOR_ID_REDHAT_QUMRANET, |
306 | VIRTIO_ID_BLOCK, | |
173a543b | 307 | PCI_CLASS_STORAGE_OTHER, 0x00, |
6e02c38d AL |
308 | sizeof(struct virtio_blk_config), sizeof(VirtIOBlock)); |
309 | if (!s) | |
310 | return NULL; | |
311 | ||
312 | s->vdev.get_config = virtio_blk_update_config; | |
313 | s->vdev.get_features = virtio_blk_get_features; | |
314 | s->vdev.reset = virtio_blk_reset; | |
315 | s->bs = bs; | |
869a5c6d | 316 | s->rq = NULL; |
6e02c38d AL |
317 | bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs); |
318 | bdrv_set_geometry_hint(s->bs, cylinders, heads, secs); | |
319 | ||
320 | s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output); | |
321 | ||
869a5c6d AL |
322 | qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); |
323 | register_savevm("virtio-blk", virtio_blk_id++, 2, | |
6e02c38d AL |
324 | virtio_blk_save, virtio_blk_load, s); |
325 | ||
326 | return s; | |
327 | } |