* 'stable/vmalloc-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
net: xen-netback: use API provided by xenbus module to map rings
block: xen-blkback: use API provided by xenbus module to map rings
xen: use generic functions instead of xen_{alloc, free}_vm_area()
#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
#define __XEN_BLKIF__BACKEND__COMMON_H__
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
/* i386 protocol version */
#pragma pack(push, 4)
+
+struct blkif_x86_32_request_rw {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_32_request_discard {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ uint64_t nr_sectors;
+};
+
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
- blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
- struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ union {
+ struct blkif_x86_32_request_rw rw;
+ struct blkif_x86_32_request_discard discard;
+ } u;
};
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
#pragma pack(pop)
/* x86_64 protocol version */
+
+struct blkif_x86_64_request_rw {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_64_request_discard {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ uint64_t nr_sectors;
+};
+
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t __attribute__((__aligned__(8))) id;
- blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
- struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ union {
+ struct blkif_x86_64_request_rw rw;
+ struct blkif_x86_64_request_discard discard;
+ } u;
};
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
BLKIF_PROTOCOL_X86_64 = 3,
};
+enum blkif_backend_type {
+ BLKIF_BACKEND_PHY = 1,
+ BLKIF_BACKEND_FILE = 2,
+};
+
struct xen_vbd {
/* What the domain refers to this vbd as. */
blkif_vdev_t handle;
unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
+ enum blkif_backend_type blk_backend_type;
union blkif_back_rings blk_rings;
- struct vm_struct *blk_ring_area;
+ void *blk_ring;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
atomic_t refcnt;
wait_queue_head_t wq;
+ /* for barrier (drain) requests */
+ struct completion drain_complete;
+ atomic_t drain;
/* One thread per one blkif. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
int st_wr_req;
int st_oo_req;
int st_f_req;
+ int st_ds_req;
int st_rd_sect;
int st_wr_sect;
wait_queue_head_t waiting_to_free;
-
- grant_handle_t shmem_handle;
- grant_ref_t shmem_ref;
};
struct phys_req {
unsigned short dev;
- unsigned short nr_sects;
+ blkif_sector_t nr_sects;
struct block_device *bdev;
blkif_sector_t sector_number;
};
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state);
+int xen_blkbk_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
static inline void blkif_get_x86_32_req(struct blkif_request *dst,
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
- dst->u.rw.sector_number = src->sector_number;
- barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->seg[i];
+ switch (src->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ barrier();
+ if (n > dst->nr_segments)
+ n = dst->nr_segments;
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+ default:
+ break;
+ }
}
static inline void blkif_get_x86_64_req(struct blkif_request *dst,
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
- dst->u.rw.sector_number = src->sector_number;
- barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->seg[i];
+ switch (src->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ barrier();
+ if (n > dst->nr_segments)
+ n = dst->nr_segments;
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+ default:
+ break;
+ }
}
#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
init_waitqueue_head(&blkif->wq);
+ init_completion(&blkif->drain_complete);
+ atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
init_waitqueue_head(&blkif->waiting_to_free);
return blkif;
}
- static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page)
- {
- struct gnttab_map_grant_ref op;
-
- gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
- GNTMAP_host_map, shared_page, blkif->domid);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
- BUG();
-
- if (op.status) {
- DPRINTK("Grant table operation failure !\n");
- return op.status;
- }
-
- blkif->shmem_ref = shared_page;
- blkif->shmem_handle = op.handle;
-
- return 0;
- }
-
- static void unmap_frontend_page(struct xen_blkif *blkif)
- {
- struct gnttab_unmap_grant_ref op;
-
- gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
- GNTMAP_host_map, blkif->shmem_handle);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
- BUG();
- }
-
static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
unsigned int evtchn)
{
if (blkif->irq)
return 0;
- blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE);
- if (!blkif->blk_ring_area)
- return -ENOMEM;
-
- err = map_frontend_page(blkif, shared_page);
- if (err) {
- free_vm_area(blkif->blk_ring_area);
+ err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+ if (err < 0)
return err;
- }
switch (blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
{
struct blkif_sring *sring;
- sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
+ sring = (struct blkif_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
- sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
+ sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
- sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
+ sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
break;
}
xen_blkif_be_int, 0,
"blkif-backend", blkif);
if (err < 0) {
- unmap_frontend_page(blkif);
- free_vm_area(blkif->blk_ring_area);
+ xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
blkif->blk_rings.common.sring = NULL;
return err;
}
}
if (blkif->blk_rings.common.sring) {
- unmap_frontend_page(blkif);
- free_vm_area(blkif->blk_ring_area);
+ xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
blkif->blk_rings.common.sring = NULL;
}
}
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
+VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
&dev_attr_rd_req.attr,
&dev_attr_wr_req.attr,
&dev_attr_f_req.attr,
+ &dev_attr_ds_req.attr,
&dev_attr_rd_sect.attr,
&dev_attr_wr_sect.attr,
NULL
return err;
}
+int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
+{
+ struct xenbus_device *dev = be->dev;
+ struct xen_blkif *blkif = be->blkif;
+ char *type;
+ int err;
+ int state = 0;
+
+ type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+ if (!IS_ERR(type)) {
+ if (strncmp(type, "file", 4) == 0) {
+ state = 1;
+ blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+ }
+ if (strncmp(type, "phy", 3) == 0) {
+ struct block_device *bdev = be->blkif->vbd.bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ if (blk_queue_discard(q)) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-granularity", "%u",
+ q->limits.discard_granularity);
+ if (err) {
+ xenbus_dev_fatal(dev, err,
+ "writing discard-granularity");
+ goto kfree;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-alignment", "%u",
+ q->limits.discard_alignment);
+ if (err) {
+ xenbus_dev_fatal(dev, err,
+ "writing discard-alignment");
+ goto kfree;
+ }
+ state = 1;
+ blkif->blk_backend_type = BLKIF_BACKEND_PHY;
+ }
+ }
+ } else {
+ err = PTR_ERR(type);
+ xenbus_dev_fatal(dev, err, "reading type");
+ goto out;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-discard",
+ "%d", state);
+ if (err)
+ xenbus_dev_fatal(dev, err, "writing feature-discard");
+kfree:
+ kfree(type);
+out:
+ return err;
+}
+int xen_blkbk_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state)
+{
+ struct xenbus_device *dev = be->dev;
+ int err;
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+ "%d", state);
+ if (err)
+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
+
+ return err;
+}
+
/*
* Entry point to this code when a new device is created. Allocate the basic
* structures, and watch the store waiting for the hotplug scripts to tell us
if (err)
goto abort;
+ err = xen_blkbk_discard(xbt, be);
+
+ /* If we can't advertise it is OK. */
+ err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
+
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
(unsigned long long)vbd_sz(&be->blkif->vbd));
if (err) {
#define MAX_PENDING_REQS 256
+/* Discriminate from any valid pending_idx value. */
+#define INVALID_PENDING_IDX 0xFFFF
+
#define MAX_BUFFER_OFFSET PAGE_SIZE
/* extra field used in struct page */
u16 flags);
static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
- unsigned int idx)
+ u16 idx)
{
return page_to_pfn(netbk->mmap_pages[idx]);
}
static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
- unsigned int idx)
+ u16 idx)
{
return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
}
sizeof(struct iphdr) + MAX_IPOPTLEN + \
sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
+static u16 frag_get_pending_idx(skb_frag_t *frag)
+{
+ return (u16)frag->page_offset;
+}
+
+static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
+{
+ frag->page_offset = pending_idx;
+}
+
static inline pending_ring_idx_t pending_index(unsigned i)
{
return i & (MAX_PENDING_REQS-1);
count++;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- unsigned long size = skb_shinfo(skb)->frags[i].size;
+ unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
unsigned long bytes;
while (size > 0) {
BUG_ON(copy_off > MAX_BUFFER_OFFSET);
for (i = 0; i < nr_frags; i++) {
netbk_gop_frag_copy(vif, skb, npo,
- skb_shinfo(skb)->frags[i].page,
- skb_shinfo(skb)->frags[i].size,
+ skb_frag_page(&skb_shinfo(skb)->frags[i]),
+ skb_frag_size(&skb_shinfo(skb)->frags[i]),
skb_shinfo(skb)->frags[i].page_offset,
&head);
}
static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
struct sk_buff *skb,
- unsigned long pending_idx)
+ u16 pending_idx)
{
struct page *page;
page = alloc_page(GFP_KERNEL|__GFP_COLD);
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
- unsigned long pending_idx = *((u16 *)skb->data);
+ u16 pending_idx = *((u16 *)skb->data);
int i, start;
/* Skip first skb fragment if it is on same page as header fragment. */
- start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
for (i = start; i < shinfo->nr_frags; i++, txp++) {
struct page *page;
memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
xenvif_get(vif);
pending_tx_info[pending_idx].vif = vif;
- frags[i].page = (void *)pending_idx;
+ frag_set_pending_idx(&frags[i], pending_idx);
}
return gop;
struct gnttab_copy **gopp)
{
struct gnttab_copy *gop = *gopp;
- int pending_idx = *((u16 *)skb->data);
+ u16 pending_idx = *((u16 *)skb->data);
struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
struct xenvif *vif = pending_tx_info[pending_idx].vif;
struct xen_netif_tx_request *txp;
}
/* Skip first skb fragment if it is on same page as header fragment. */
- start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
for (i = start; i < nr_frags; i++) {
int j, newerr;
pending_ring_idx_t index;
- pending_idx = (unsigned long)shinfo->frags[i].page;
+ pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
/* Check error status: if okay then remember grant handle. */
newerr = (++gop)->status;
pending_idx = *((u16 *)skb->data);
xen_netbk_idx_release(netbk, pending_idx);
for (j = start; j < i; j++) {
- pending_idx = (unsigned long)shinfo->frags[i].page;
+ pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
xen_netbk_idx_release(netbk, pending_idx);
}
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = shinfo->frags + i;
struct xen_netif_tx_request *txp;
- unsigned long pending_idx;
+ struct page *page;
+ u16 pending_idx;
- pending_idx = (unsigned long)frag->page;
+ pending_idx = frag_get_pending_idx(frag);
txp = &netbk->pending_tx_info[pending_idx].req;
- frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
- frag->size = txp->size;
- frag->page_offset = txp->offset;
-
+ page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+ __skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
skb->len += txp->size;
skb->data_len += txp->size;
skb->truesize += txp->size;
skb_shinfo(skb)->nr_frags = ret;
if (data_len < txreq.size) {
skb_shinfo(skb)->nr_frags++;
- skb_shinfo(skb)->frags[0].page =
- (void *)(unsigned long)pending_idx;
+ frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+ pending_idx);
} else {
- /* Discriminate from any valid pending_idx value. */
- skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+ frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
+ INVALID_PENDING_IDX);
}
__skb_queue_tail(&netbk->tx_queue, skb);
void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
{
- struct gnttab_unmap_grant_ref op;
-
- if (vif->tx.sring) {
- gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,
- GNTMAP_host_map, vif->tx_shmem_handle);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
- BUG();
- }
-
- if (vif->rx.sring) {
- gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
- GNTMAP_host_map, vif->rx_shmem_handle);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
- BUG();
- }
- if (vif->rx_comms_area)
- free_vm_area(vif->rx_comms_area);
- if (vif->tx_comms_area)
- free_vm_area(vif->tx_comms_area);
+ if (vif->tx.sring)
+ xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
+ vif->tx.sring);
+ if (vif->rx.sring)
+ xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
+ vif->rx.sring);
}
int xen_netbk_map_frontend_rings(struct xenvif *vif,
grant_ref_t tx_ring_ref,
grant_ref_t rx_ring_ref)
{
- struct gnttab_map_grant_ref op;
+ void *addr;
struct xen_netif_tx_sring *txs;
struct xen_netif_rx_sring *rxs;
int err = -ENOMEM;
- vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
- if (vif->tx_comms_area == NULL)
+ err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+ tx_ring_ref, &addr);
+ if (err)
goto err;
- vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
- if (vif->rx_comms_area == NULL)
- goto err;
-
- gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
- GNTMAP_host_map, tx_ring_ref, vif->domid);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
- BUG();
-
- if (op.status) {
- netdev_warn(vif->dev,
- "failed to map tx ring. err=%d status=%d\n",
- err, op.status);
- err = op.status;
- goto err;
- }
-
- vif->tx_shmem_ref = tx_ring_ref;
- vif->tx_shmem_handle = op.handle;
-
- txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
+ txs = (struct xen_netif_tx_sring *)addr;
BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
- gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,
- GNTMAP_host_map, rx_ring_ref, vif->domid);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
- BUG();
-
- if (op.status) {
- netdev_warn(vif->dev,
- "failed to map rx ring. err=%d status=%d\n",
- err, op.status);
- err = op.status;
+ err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+ rx_ring_ref, &addr);
+ if (err)
goto err;
- }
-
- vif->rx_shmem_ref = rx_ring_ref;
- vif->rx_shmem_handle = op.handle;
- vif->rx_req_cons_peek = 0;
- rxs = (struct xen_netif_rx_sring *)vif->rx_comms_area->addr;
+ rxs = (struct xen_netif_rx_sring *)addr;
BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
+ vif->rx_req_cons_peek = 0;
+
return 0;
err:
#include <xen/interface/grant_table.h>
#include <asm/xen/hypervisor.h>
- #include <asm/xen/grant_table.h>
#include <xen/features.h>
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count);
int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
struct page **pages, unsigned int count);