]>
Commit | Line | Data |
---|---|---|
7df20f2d SD |
1 | /* |
2 | * Intel MIC Platform Software Stack (MPSS) | |
3 | * | |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
9 | * Copyright(c) 2014 Intel Corporation. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of version 2 of the GNU General Public License as | |
13 | * published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, but | |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * General Public License for more details. | |
19 | * | |
20 | * BSD LICENSE | |
21 | * | |
22 | * Copyright(c) 2014 Intel Corporation. | |
23 | * | |
24 | * Redistribution and use in source and binary forms, with or without | |
25 | * modification, are permitted provided that the following conditions | |
26 | * are met: | |
27 | * | |
28 | * * Redistributions of source code must retain the above copyright | |
29 | * notice, this list of conditions and the following disclaimer. | |
30 | * * Redistributions in binary form must reproduce the above copyright | |
31 | * notice, this list of conditions and the following disclaimer in | |
32 | * the documentation and/or other materials provided with the | |
33 | * distribution. | |
34 | * * Neither the name of Intel Corporation nor the names of its | |
35 | * contributors may be used to endorse or promote products derived | |
36 | * from this software without specific prior written permission. | |
37 | * | |
38 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
39 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
40 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
41 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
42 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
43 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
44 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
45 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
46 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
47 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
48 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
49 | * | |
50 | * Intel SCIF driver. | |
51 | * | |
52 | */ | |
53 | #ifndef __SCIF_H__ | |
54 | #define __SCIF_H__ | |
55 | ||
56 | #include <linux/types.h> | |
57 | #include <linux/poll.h> | |
d3d912eb | 58 | #include <linux/device.h> |
7df20f2d SD |
59 | #include <linux/scif_ioctl.h> |
60 | ||
61 | #define SCIF_ACCEPT_SYNC 1 | |
62 | #define SCIF_SEND_BLOCK 1 | |
63 | #define SCIF_RECV_BLOCK 1 | |
64 | ||
65 | enum { | |
66 | SCIF_PROT_READ = (1 << 0), | |
67 | SCIF_PROT_WRITE = (1 << 1) | |
68 | }; | |
69 | ||
70 | enum { | |
71 | SCIF_MAP_FIXED = 0x10, | |
72 | SCIF_MAP_KERNEL = 0x20, | |
73 | }; | |
74 | ||
75 | enum { | |
76 | SCIF_FENCE_INIT_SELF = (1 << 0), | |
77 | SCIF_FENCE_INIT_PEER = (1 << 1), | |
78 | SCIF_SIGNAL_LOCAL = (1 << 4), | |
79 | SCIF_SIGNAL_REMOTE = (1 << 5) | |
80 | }; | |
81 | ||
82 | enum { | |
83 | SCIF_RMA_USECPU = (1 << 0), | |
84 | SCIF_RMA_USECACHE = (1 << 1), | |
85 | SCIF_RMA_SYNC = (1 << 2), | |
86 | SCIF_RMA_ORDERED = (1 << 3) | |
87 | }; | |
88 | ||
89 | /* End of SCIF Admin Reserved Ports */ | |
90 | #define SCIF_ADMIN_PORT_END 1024 | |
91 | ||
92 | /* End of SCIF Reserved Ports */ | |
93 | #define SCIF_PORT_RSVD 1088 | |
94 | ||
95 | typedef struct scif_endpt *scif_epd_t; | |
a44f2630 SD |
96 | typedef struct scif_pinned_pages *scif_pinned_pages_t; |
97 | ||
98 | /** | |
99 | * struct scif_range - SCIF registered range used in kernel mode | |
100 | * @cookie: cookie used internally by SCIF | |
101 | * @nr_pages: number of pages of PAGE_SIZE | |
102 | * @prot_flags: R/W protection | |
103 | * @phys_addr: Array of bus addresses | |
104 | * @va: Array of kernel virtual addresses backed by the pages in the phys_addr | |
105 | * array. The va is populated only when called on the host for a remote | |
106 | * SCIF connection on MIC. This is required to support the use case of DMA | |
107 | * between MIC and another device which is not a SCIF node e.g., an IB or | |
108 | * ethernet NIC. | |
109 | */ | |
110 | struct scif_range { | |
111 | void *cookie; | |
112 | int nr_pages; | |
113 | int prot_flags; | |
114 | dma_addr_t *phys_addr; | |
115 | void __iomem **va; | |
116 | }; | |
7df20f2d | 117 | |
b7f94441 AD |
118 | /** |
119 | * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll | |
120 | * @epd: SCIF endpoint | |
121 | * @events: requested events | |
122 | * @revents: returned events | |
123 | */ | |
124 | struct scif_pollepd { | |
125 | scif_epd_t epd; | |
126 | short events; | |
127 | short revents; | |
128 | }; | |
129 | ||
d3d912eb AD |
130 | /** |
131 | * scif_peer_dev - representation of a peer SCIF device | |
132 | * | |
133 | * Peer devices show up as PCIe devices for the mgmt node but not the cards. | |
134 | * The mgmt node discovers all the cards on the PCIe bus and informs the other | |
135 | * cards about their peers. Upon notification of a peer a node adds a peer | |
136 | * device to the peer bus to maintain symmetry in the way devices are | |
137 | * discovered across all nodes in the SCIF network. | |
138 | * | |
139 | * @dev: underlying device | |
140 | * @dnode - The destination node which this device will communicate with. | |
141 | */ | |
142 | struct scif_peer_dev { | |
143 | struct device dev; | |
144 | u8 dnode; | |
145 | }; | |
146 | ||
147 | /** | |
148 | * scif_client - representation of a SCIF client | |
149 | * @name: client name | |
150 | * @probe - client method called when a peer device is registered | |
151 | * @remove - client method called when a peer device is unregistered | |
152 | * @si - subsys_interface used internally for implementing SCIF clients | |
153 | */ | |
154 | struct scif_client { | |
155 | const char *name; | |
156 | void (*probe)(struct scif_peer_dev *spdev); | |
157 | void (*remove)(struct scif_peer_dev *spdev); | |
158 | struct subsys_interface si; | |
159 | }; | |
160 | ||
7df20f2d SD |
161 | #define SCIF_OPEN_FAILED ((scif_epd_t)-1) |
162 | #define SCIF_REGISTER_FAILED ((off_t)-1) | |
163 | #define SCIF_MMAP_FAILED ((void *)-1) | |
164 | ||
165 | /** | |
166 | * scif_open() - Create an endpoint | |
167 | * | |
168 | * Return: | |
169 | * Upon successful completion, scif_open() returns an endpoint descriptor to | |
170 | * be used in subsequent SCIF functions calls to refer to that endpoint; | |
171 | * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is | |
172 | * returned and errno is set to indicate the error; in kernel mode a NULL | |
173 | * scif_epd_t is returned. | |
174 | * | |
175 | * Errors: | |
176 | * ENOMEM - Insufficient kernel memory was available | |
177 | */ | |
178 | scif_epd_t scif_open(void); | |
179 | ||
180 | /** | |
181 | * scif_bind() - Bind an endpoint to a port | |
182 | * @epd: endpoint descriptor | |
183 | * @pn: port number | |
184 | * | |
185 | * scif_bind() binds endpoint epd to port pn, where pn is a port number on the | |
186 | * local node. If pn is zero, a port number greater than or equal to | |
187 | * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to | |
188 | * exactly one local port. Ports less than 1024 when requested can only be bound | |
189 | * by system (or root) processes or by processes executed by privileged users. | |
190 | * | |
191 | * Return: | |
192 | * Upon successful completion, scif_bind() returns the port number to which epd | |
193 | * is bound; otherwise in user mode -1 is returned and errno is set to | |
194 | * indicate the error; in kernel mode the negative of one of the following | |
195 | * errors is returned. | |
196 | * | |
197 | * Errors: | |
198 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
199 | * EINVAL - the endpoint or the port is already bound | |
200 | * EISCONN - The endpoint is already connected | |
201 | * ENOSPC - No port number available for assignment | |
202 | * EACCES - The port requested is protected and the user is not the superuser | |
203 | */ | |
204 | int scif_bind(scif_epd_t epd, u16 pn); | |
205 | ||
206 | /** | |
207 | * scif_listen() - Listen for connections on an endpoint | |
208 | * @epd: endpoint descriptor | |
209 | * @backlog: maximum pending connection requests | |
210 | * | |
211 | * scif_listen() marks the endpoint epd as a listening endpoint - that is, as | |
212 | * an endpoint that will be used to accept incoming connection requests. Once | |
213 | * so marked, the endpoint is said to be in the listening state and may not be | |
214 | * used as the endpoint of a connection. | |
215 | * | |
216 | * The endpoint, epd, must have been bound to a port. | |
217 | * | |
218 | * The backlog argument defines the maximum length to which the queue of | |
219 | * pending connections for epd may grow. If a connection request arrives when | |
220 | * the queue is full, the client may receive an error with an indication that | |
221 | * the connection was refused. | |
222 | * | |
223 | * Return: | |
224 | * Upon successful completion, scif_listen() returns 0; otherwise in user mode | |
225 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
226 | * negative of one of the following errors is returned. | |
227 | * | |
228 | * Errors: | |
229 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
230 | * EINVAL - the endpoint is not bound to a port | |
231 | * EISCONN - The endpoint is already connected or listening | |
232 | */ | |
233 | int scif_listen(scif_epd_t epd, int backlog); | |
234 | ||
235 | /** | |
236 | * scif_connect() - Initiate a connection on a port | |
237 | * @epd: endpoint descriptor | |
238 | * @dst: global id of port to which to connect | |
239 | * | |
240 | * The scif_connect() function requests the connection of endpoint epd to remote | |
241 | * port dst. If the connection is successful, a peer endpoint, bound to dst, is | |
242 | * created on node dst.node. On successful return, the connection is complete. | |
243 | * | |
244 | * If the endpoint epd has not already been bound to a port, scif_connect() | |
245 | * will bind it to an unused local port. | |
246 | * | |
247 | * A connection is terminated when an endpoint of the connection is closed, | |
248 | * either explicitly by scif_close(), or when a process that owns one of the | |
249 | * endpoints of the connection is terminated. | |
250 | * | |
251 | * In user space, scif_connect() supports an asynchronous connection mode | |
252 | * if the application has set the O_NONBLOCK flag on the endpoint via the | |
253 | * fcntl() system call. Setting this flag will result in the calling process | |
254 | * not to wait during scif_connect(). | |
255 | * | |
256 | * Return: | |
257 | * Upon successful completion, scif_connect() returns the port ID to which the | |
258 | * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is | |
259 | * set to indicate the error; in kernel mode the negative of one of the | |
260 | * following errors is returned. | |
261 | * | |
262 | * Errors: | |
263 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
264 | * ECONNREFUSED - The destination was not listening for connections or refused | |
265 | * the connection request | |
266 | * EINVAL - dst.port is not a valid port ID | |
267 | * EISCONN - The endpoint is already connected | |
268 | * ENOMEM - No buffer space is available | |
269 | * ENODEV - The destination node does not exist, or the node is lost or existed, | |
270 | * but is not currently in the network since it may have crashed | |
271 | * ENOSPC - No port number available for assignment | |
272 | * EOPNOTSUPP - The endpoint is listening and cannot be connected | |
273 | */ | |
274 | int scif_connect(scif_epd_t epd, struct scif_port_id *dst); | |
275 | ||
276 | /** | |
277 | * scif_accept() - Accept a connection on an endpoint | |
278 | * @epd: endpoint descriptor | |
279 | * @peer: global id of port to which connected | |
280 | * @newepd: new connected endpoint descriptor | |
281 | * @flags: flags | |
282 | * | |
283 | * The scif_accept() call extracts the first connection request from the queue | |
284 | * of pending connections for the port on which epd is listening. scif_accept() | |
285 | * creates a new endpoint, bound to the same port as epd, and allocates a new | |
286 | * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new | |
287 | * endpoint is connected to the endpoint through which the connection was | |
288 | * requested. epd is unaffected by this call, and remains in the listening | |
289 | * state. | |
290 | * | |
291 | * On successful return, peer holds the global port identifier (node id and | |
292 | * local port number) of the port which requested the connection. | |
293 | * | |
294 | * A connection is terminated when an endpoint of the connection is closed, | |
295 | * either explicitly by scif_close(), or when a process that owns one of the | |
296 | * endpoints of the connection is terminated. | |
297 | * | |
298 | * The number of connections that can (subsequently) be accepted on epd is only | |
299 | * limited by system resources (memory). | |
300 | * | |
301 | * The flags argument is formed by OR'ing together zero or more of the | |
302 | * following values. | |
303 | * SCIF_ACCEPT_SYNC - block until a connection request is presented. If | |
304 | * SCIF_ACCEPT_SYNC is not in flags, and no pending | |
305 | * connections are present on the queue, scif_accept() | |
306 | * fails with an EAGAIN error | |
307 | * | |
308 | * In user mode, the select() and poll() functions can be used to determine | |
309 | * when there is a connection request. In kernel mode, the scif_poll() | |
310 | * function may be used for this purpose. A readable event will be delivered | |
311 | * when a connection is requested. | |
312 | * | |
313 | * Return: | |
314 | * Upon successful completion, scif_accept() returns 0; otherwise in user mode | |
315 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
316 | * negative of one of the following errors is returned. | |
317 | * | |
318 | * Errors: | |
319 | * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be | |
320 | * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete | |
321 | * its connection request | |
322 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
323 | * EINTR - Interrupted function | |
324 | * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is | |
325 | * NULL, or newepd is NULL | |
326 | * ENODEV - The requesting node is lost or existed, but is not currently in the | |
327 | * network since it may have crashed | |
328 | * ENOMEM - Not enough space | |
329 | * ENOENT - Secondary part of epd registration failed | |
330 | */ | |
331 | int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t | |
332 | *newepd, int flags); | |
333 | ||
334 | /** | |
335 | * scif_close() - Close an endpoint | |
336 | * @epd: endpoint descriptor | |
337 | * | |
338 | * scif_close() closes an endpoint and performs necessary teardown of | |
339 | * facilities associated with that endpoint. | |
340 | * | |
341 | * If epd is a listening endpoint then it will no longer accept connection | |
342 | * requests on the port to which it is bound. Any pending connection requests | |
343 | * are rejected. | |
344 | * | |
345 | * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs | |
346 | * which are in-process through epd or its peer endpoint will complete before | |
347 | * scif_close() returns. Registered windows of the local and peer endpoints are | |
348 | * released as if scif_unregister() was called against each window. | |
349 | * | |
350 | * Closing a SCIF endpoint does not affect local registered memory mapped by | |
351 | * a SCIF endpoint on a remote node. The local memory remains mapped by the peer | |
352 | * SCIF endpoint explicitly removed by calling munmap(..) by the peer. | |
353 | * | |
354 | * If the peer endpoint's receive queue is not empty at the time that epd is | |
355 | * closed, then the peer endpoint can be passed as the endpoint parameter to | |
356 | * scif_recv() until the receive queue is empty. | |
357 | * | |
358 | * epd is freed and may no longer be accessed. | |
359 | * | |
360 | * Return: | |
361 | * Upon successful completion, scif_close() returns 0; otherwise in user mode | |
362 | * -1 is returned and errno is set to indicate the error; in kernel mode the | |
363 | * negative of one of the following errors is returned. | |
364 | * | |
365 | * Errors: | |
366 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
367 | */ | |
368 | int scif_close(scif_epd_t epd); | |
369 | ||
370 | /** | |
371 | * scif_send() - Send a message | |
372 | * @epd: endpoint descriptor | |
373 | * @msg: message buffer address | |
374 | * @len: message length | |
375 | * @flags: blocking mode flags | |
376 | * | |
377 | * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data | |
378 | * are copied from memory starting at address msg. On successful execution the | |
379 | * return value of scif_send() is the number of bytes that were sent, and is | |
380 | * zero if no bytes were sent because len was zero. scif_send() may be called | |
381 | * only when the endpoint is in a connected state. | |
382 | * | |
383 | * If a scif_send() call is non-blocking, then it sends only those bytes which | |
384 | * can be sent without waiting, up to a maximum of len bytes. | |
385 | * | |
386 | * If a scif_send() call is blocking, then it normally returns after sending | |
387 | * all len bytes. If a blocking call is interrupted or the connection is | |
388 | * reset, the call is considered successful if some bytes were sent or len is | |
389 | * zero, otherwise the call is considered unsuccessful. | |
390 | * | |
391 | * In user mode, the select() and poll() functions can be used to determine | |
392 | * when the send queue is not full. In kernel mode, the scif_poll() function | |
393 | * may be used for this purpose. | |
394 | * | |
395 | * It is recommended that scif_send()/scif_recv() only be used for short | |
396 | * control-type message communication between SCIF endpoints. The SCIF RMA | |
397 | * APIs are expected to provide better performance for transfer sizes of | |
398 | * 1024 bytes or longer for the current MIC hardware and software | |
399 | * implementation. | |
400 | * | |
401 | * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK | |
402 | * is passed as the flags argument. | |
403 | * | |
404 | * Return: | |
405 | * Upon successful completion, scif_send() returns the number of bytes sent; | |
406 | * otherwise in user mode -1 is returned and errno is set to indicate the | |
407 | * error; in kernel mode the negative of one of the following errors is | |
408 | * returned. | |
409 | * | |
410 | * Errors: | |
411 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
412 | * ECONNRESET - Connection reset by peer | |
7df20f2d SD |
413 | * EINVAL - flags is invalid, or len is negative |
414 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
415 | * network since it may have crashed | |
416 | * ENOMEM - Not enough space | |
417 | * ENOTCONN - The endpoint is not connected | |
418 | */ | |
419 | int scif_send(scif_epd_t epd, void *msg, int len, int flags); | |
420 | ||
421 | /** | |
422 | * scif_recv() - Receive a message | |
423 | * @epd: endpoint descriptor | |
424 | * @msg: message buffer address | |
425 | * @len: message buffer length | |
426 | * @flags: blocking mode flags | |
427 | * | |
428 | * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of | |
429 | * data are copied to memory starting at address msg. On successful execution | |
430 | * the return value of scif_recv() is the number of bytes that were received, | |
431 | * and is zero if no bytes were received because len was zero. scif_recv() may | |
432 | * be called only when the endpoint is in a connected state. | |
433 | * | |
434 | * If a scif_recv() call is non-blocking, then it receives only those bytes | |
435 | * which can be received without waiting, up to a maximum of len bytes. | |
436 | * | |
437 | * If a scif_recv() call is blocking, then it normally returns after receiving | |
438 | * all len bytes. If the blocking call was interrupted due to a disconnection, | |
439 | * subsequent calls to scif_recv() will copy all bytes received upto the point | |
440 | * of disconnection. | |
441 | * | |
442 | * In user mode, the select() and poll() functions can be used to determine | |
443 | * when data is available to be received. In kernel mode, the scif_poll() | |
444 | * function may be used for this purpose. | |
445 | * | |
446 | * It is recommended that scif_send()/scif_recv() only be used for short | |
447 | * control-type message communication between SCIF endpoints. The SCIF RMA | |
448 | * APIs are expected to provide better performance for transfer sizes of | |
449 | * 1024 bytes or longer for the current MIC hardware and software | |
450 | * implementation. | |
451 | * | |
452 | * scif_recv() will block until the entire message is received if | |
453 | * SCIF_RECV_BLOCK is passed as the flags argument. | |
454 | * | |
455 | * Return: | |
456 | * Upon successful completion, scif_recv() returns the number of bytes | |
457 | * received; otherwise in user mode -1 is returned and errno is set to | |
458 | * indicate the error; in kernel mode the negative of one of the following | |
459 | * errors is returned. | |
460 | * | |
461 | * Errors: | |
462 | * EAGAIN - The destination node is returning from a low power state | |
463 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
464 | * ECONNRESET - Connection reset by peer | |
7df20f2d SD |
465 | * EINVAL - flags is invalid, or len is negative |
466 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
467 | * network since it may have crashed | |
468 | * ENOMEM - Not enough space | |
469 | * ENOTCONN - The endpoint is not connected | |
470 | */ | |
471 | int scif_recv(scif_epd_t epd, void *msg, int len, int flags); | |
472 | ||
473 | /** | |
474 | * scif_register() - Mark a memory region for remote access. | |
475 | * @epd: endpoint descriptor | |
476 | * @addr: starting virtual address | |
477 | * @len: length of range | |
478 | * @offset: offset of window | |
479 | * @prot_flags: read/write protection flags | |
480 | * @map_flags: mapping flags | |
481 | * | |
482 | * The scif_register() function opens a window, a range of whole pages of the | |
483 | * registered address space of the endpoint epd, starting at offset po and | |
484 | * continuing for len bytes. The value of po, further described below, is a | |
485 | * function of the parameters offset and len, and the value of map_flags. Each | |
486 | * page of the window represents the physical memory page which backs the | |
487 | * corresponding page of the range of virtual address pages starting at addr | |
488 | * and continuing for len bytes. addr and len are constrained to be multiples | |
489 | * of the page size. A successful scif_register() call returns po. | |
490 | * | |
491 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset | |
492 | * exactly, and offset is constrained to be a multiple of the page size. The | |
493 | * mapping established by scif_register() will not replace any existing | |
494 | * registration; an error is returned if any page within the range [offset, | |
495 | * offset + len - 1] intersects an existing window. | |
496 | * | |
497 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an | |
498 | * implementation-defined manner to arrive at po. The po value so chosen will | |
499 | * be an area of the registered address space that the implementation deems | |
500 | * suitable for a mapping of len bytes. An offset value of 0 is interpreted as | |
501 | * granting the implementation complete freedom in selecting po, subject to | |
502 | * constraints described below. A non-zero value of offset is taken to be a | |
503 | * suggestion of an offset near which the mapping should be placed. When the | |
504 | * implementation selects a value for po, it does not replace any extant | |
505 | * window. In all cases, po will be a multiple of the page size. | |
506 | * | |
507 | * The physical pages which are so represented by a window are available for | |
508 | * access in calls to mmap(), scif_readfrom(), scif_writeto(), | |
509 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the | |
510 | * physical pages represented by the window will not be reused by the memory | |
511 | * subsystem for any other purpose. Note that the same physical page may be | |
512 | * represented by multiple windows. | |
513 | * | |
514 | * Subsequent operations which change the memory pages to which virtual | |
515 | * addresses are mapped (such as mmap(), munmap()) have no effect on | |
516 | * existing window. | |
517 | * | |
518 | * If the process will fork(), it is recommended that the registered | |
519 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent | |
520 | * problems due to copy-on-write semantics. | |
521 | * | |
522 | * The prot_flags argument is formed by OR'ing together one or more of the | |
523 | * following values. | |
524 | * SCIF_PROT_READ - allow read operations from the window | |
525 | * SCIF_PROT_WRITE - allow write operations to the window | |
526 | * | |
7df20f2d SD |
527 | * Return: |
528 | * Upon successful completion, scif_register() returns the offset at which the | |
529 | * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that | |
530 | * is (off_t *)-1) is returned and errno is set to indicate the error; in | |
531 | * kernel mode the negative of one of the following errors is returned. | |
532 | * | |
533 | * Errors: | |
534 | * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range | |
535 | * [offset, offset + len -1] are already registered | |
536 | * EAGAIN - The mapping could not be performed due to lack of resources | |
537 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
538 | * ECONNRESET - Connection reset by peer | |
7df20f2d SD |
539 | * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is |
540 | * set in flags, and offset is not a multiple of the page size, or addr is not a | |
541 | * multiple of the page size, or len is not a multiple of the page size, or is | |
542 | * 0, or offset is negative | |
543 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
544 | * network since it may have crashed | |
545 | * ENOMEM - Not enough space | |
546 | * ENOTCONN -The endpoint is not connected | |
547 | */ | |
548 | off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, | |
549 | int prot_flags, int map_flags); | |
550 | ||
551 | /** | |
552 | * scif_unregister() - Mark a memory region for remote access. | |
553 | * @epd: endpoint descriptor | |
554 | * @offset: start of range to unregister | |
555 | * @len: length of range to unregister | |
556 | * | |
557 | * The scif_unregister() function closes those previously registered windows | |
558 | * which are entirely within the range [offset, offset + len - 1]. It is an | |
559 | * error to specify a range which intersects only a subrange of a window. | |
560 | * | |
561 | * On a successful return, pages within the window may no longer be specified | |
562 | * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), | |
563 | * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, | |
564 | * however, continues to exist until all previous references against it are | |
565 | * removed. A window is referenced if there is a mapping to it created by | |
566 | * mmap(), or if scif_get_pages() was called against the window | |
567 | * (and the pages have not been returned via scif_put_pages()). A window is | |
568 | * also referenced while an RMA, in which some range of the window is a source | |
569 | * or destination, is in progress. Finally a window is referenced while some | |
570 | * offset in that window was specified to scif_fence_signal(), and the RMAs | |
571 | * marked by that call to scif_fence_signal() have not completed. While a | |
572 | * window is in this state, its registered address space pages are not | |
573 | * available for use in a new registered window. | |
574 | * | |
575 | * When all such references to the window have been removed, its references to | |
576 | * all the physical pages which it represents are removed. Similarly, the | |
577 | * registered address space pages of the window become available for | |
578 | * registration in a new window. | |
579 | * | |
580 | * Return: | |
581 | * Upon successful completion, scif_unregister() returns 0; otherwise in user | |
582 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
583 | * the negative of one of the following errors is returned. In the event of an | |
584 | * error, no windows are unregistered. | |
585 | * | |
586 | * Errors: | |
587 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
588 | * ECONNRESET - Connection reset by peer | |
589 | * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a | |
590 | * window, or offset is negative | |
591 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
592 | * network since it may have crashed | |
593 | * ENOTCONN - The endpoint is not connected | |
594 | * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the | |
595 | * registered address space of epd | |
596 | */ | |
597 | int scif_unregister(scif_epd_t epd, off_t offset, size_t len); | |
598 | ||
599 | /** | |
600 | * scif_readfrom() - Copy from a remote address space | |
601 | * @epd: endpoint descriptor | |
602 | * @loffset: offset in local registered address space to | |
603 | * which to copy | |
604 | * @len: length of range to copy | |
605 | * @roffset: offset in remote registered address space | |
606 | * from which to copy | |
607 | * @rma_flags: transfer mode flags | |
608 | * | |
609 | * scif_readfrom() copies len bytes from the remote registered address space of | |
610 | * the peer of endpoint epd, starting at the offset roffset to the local | |
611 | * registered address space of epd, starting at the offset loffset. | |
612 | * | |
613 | * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, | |
614 | * roffset + len - 1] must be within some registered window or windows of the | |
615 | * local and remote nodes. A range may intersect multiple registered windows, | |
616 | * but only if those windows are contiguous in the registered address space. | |
617 | * | |
618 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
619 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
620 | * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the | |
621 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
622 | * ously. The order in which any two asynchronous RMA operations complete | |
623 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
624 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
625 | * the completion of asynchronous RMA operations on the same endpoint. | |
626 | * | |
627 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
628 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
629 | * cacheline or partial cacheline of the source range will become visible on | |
630 | * the destination node after all other transferred data in the source | |
631 | * range has become visible on the destination node. | |
632 | * | |
633 | * The optimal DMA performance will likely be realized if both | |
634 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | |
635 | * performance will likely be realized if loffset and roffset are not | |
636 | * cacheline aligned but are separated by some multiple of 64. The lowest level | |
637 | * of performance is likely if loffset and roffset are not separated by a | |
638 | * multiple of 64. | |
639 | * | |
640 | * The rma_flags argument is formed by ORing together zero or more of the | |
641 | * following values. | |
642 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
643 | * engine. | |
644 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
645 | * transfer has completed. Passing this flag results in the | |
646 | * current implementation busy waiting and consuming CPU cycles | |
647 | * while the DMA transfer is in progress for best performance by | |
648 | * avoiding the interrupt latency. | |
649 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
650 | * the source range becomes visible on the destination node | |
651 | * after all other transferred data in the source range has | |
652 | * become visible on the destination | |
653 | * | |
654 | * Return: | |
655 | * Upon successful completion, scif_readfrom() returns 0; otherwise in user | |
656 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
657 | * the negative of one of the following errors is returned. | |
658 | * | |
659 | * Errors: | |
660 | * EACCESS - Attempt to write to a read-only range | |
661 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
662 | * ECONNRESET - Connection reset by peer | |
663 | * EINVAL - rma_flags is invalid | |
664 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
665 | * network since it may have crashed | |
666 | * ENOTCONN - The endpoint is not connected | |
667 | * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered | |
668 | * address space of epd, or, The range [roffset, roffset + len - 1] is invalid | |
669 | * for the registered address space of the peer of epd, or loffset or roffset | |
670 | * is negative | |
671 | */ | |
672 | int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t | |
673 | roffset, int rma_flags); | |
674 | ||
675 | /** | |
676 | * scif_writeto() - Copy to a remote address space | |
677 | * @epd: endpoint descriptor | |
678 | * @loffset: offset in local registered address space | |
679 | * from which to copy | |
680 | * @len: length of range to copy | |
681 | * @roffset: offset in remote registered address space to | |
682 | * which to copy | |
683 | * @rma_flags: transfer mode flags | |
684 | * | |
685 | * scif_writeto() copies len bytes from the local registered address space of | |
686 | * epd, starting at the offset loffset to the remote registered address space | |
687 | * of the peer of endpoint epd, starting at the offset roffset. | |
688 | * | |
689 | * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, | |
690 | * roffset + len - 1] must be within some registered window or windows of the | |
691 | * local and remote nodes. A range may intersect multiple registered windows, | |
692 | * but only if those windows are contiguous in the registered address space. | |
693 | * | |
694 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
695 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
696 | * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the | |
697 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
698 | * ously. The order in which any two asynchronous RMA operations complete | |
699 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
700 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
701 | * the completion of asynchronous RMA operations on the same endpoint. | |
702 | * | |
703 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
704 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
705 | * cacheline or partial cacheline of the source range will become visible on | |
706 | * the destination node after all other transferred data in the source | |
707 | * range has become visible on the destination node. | |
708 | * | |
709 | * The optimal DMA performance will likely be realized if both | |
710 | * loffset and roffset are cacheline aligned (are a multiple of 64). Lower | |
711 | * performance will likely be realized if loffset and roffset are not cacheline | |
712 | * aligned but are separated by some multiple of 64. The lowest level of | |
713 | * performance is likely if loffset and roffset are not separated by a multiple | |
714 | * of 64. | |
715 | * | |
716 | * The rma_flags argument is formed by ORing together zero or more of the | |
717 | * following values. | |
718 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
719 | * engine. | |
720 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
721 | * transfer has completed. Passing this flag results in the | |
722 | * current implementation busy waiting and consuming CPU cycles | |
723 | * while the DMA transfer is in progress for best performance by | |
724 | * avoiding the interrupt latency. | |
725 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
726 | * the source range becomes visible on the destination node | |
727 | * after all other transferred data in the source range has | |
728 | * become visible on the destination | |
729 | * | |
730 | * Return: | |
731 | * Upon successful completion, scif_readfrom() returns 0; otherwise in user | |
732 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
733 | * the negative of one of the following errors is returned. | |
734 | * | |
735 | * Errors: | |
736 | * EACCESS - Attempt to write to a read-only range | |
737 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
738 | * ECONNRESET - Connection reset by peer | |
739 | * EINVAL - rma_flags is invalid | |
740 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
741 | * network since it may have crashed | |
742 | * ENOTCONN - The endpoint is not connected | |
743 | * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered | |
744 | * address space of epd, or, The range [roffset , roffset + len -1] is invalid | |
745 | * for the registered address space of the peer of epd, or loffset or roffset | |
746 | * is negative | |
747 | */ | |
748 | int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t | |
749 | roffset, int rma_flags); | |
750 | ||
751 | /** | |
752 | * scif_vreadfrom() - Copy from a remote address space | |
753 | * @epd: endpoint descriptor | |
754 | * @addr: address to which to copy | |
755 | * @len: length of range to copy | |
756 | * @roffset: offset in remote registered address space | |
757 | * from which to copy | |
758 | * @rma_flags: transfer mode flags | |
759 | * | |
760 | * scif_vreadfrom() copies len bytes from the remote registered address | |
761 | * space of the peer of endpoint epd, starting at the offset roffset, to local | |
762 | * memory, starting at addr. | |
763 | * | |
764 | * The specified range [roffset, roffset + len - 1] must be within some | |
765 | * registered window or windows of the remote nodes. The range may | |
766 | * intersect multiple registered windows, but only if those windows are | |
767 | * contiguous in the registered address space. | |
768 | * | |
769 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
770 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
771 | * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the | |
772 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
773 | * ously. The order in which any two asynchronous RMA operations complete | |
774 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
775 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
776 | * the completion of asynchronous RMA operations on the same endpoint. | |
777 | * | |
778 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
779 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
780 | * cacheline or partial cacheline of the source range will become visible on | |
781 | * the destination node after all other transferred data in the source | |
782 | * range has become visible on the destination node. | |
783 | * | |
784 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | |
785 | * the specified local memory range may be remain in a pinned state even after | |
786 | * the specified transfer completes. This may reduce overhead if some or all of | |
787 | * the same virtual address range is referenced in a subsequent call of | |
788 | * scif_vreadfrom() or scif_vwriteto(). | |
789 | * | |
790 | * The optimal DMA performance will likely be realized if both | |
791 | * addr and roffset are cacheline aligned (are a multiple of 64). Lower | |
792 | * performance will likely be realized if addr and roffset are not | |
793 | * cacheline aligned but are separated by some multiple of 64. The lowest level | |
794 | * of performance is likely if addr and roffset are not separated by a | |
795 | * multiple of 64. | |
796 | * | |
797 | * The rma_flags argument is formed by ORing together zero or more of the | |
798 | * following values. | |
799 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
800 | * engine. | |
801 | * SCIF_RMA_USECACHE - enable registration caching | |
802 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
803 | * transfer has completed. Passing this flag results in the | |
804 | * current implementation busy waiting and consuming CPU cycles | |
805 | * while the DMA transfer is in progress for best performance by | |
806 | * avoiding the interrupt latency. | |
807 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
808 | * the source range becomes visible on the destination node | |
809 | * after all other transferred data in the source range has | |
810 | * become visible on the destination | |
811 | * | |
812 | * Return: | |
813 | * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user | |
814 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
815 | * the negative of one of the following errors is returned. | |
816 | * | |
817 | * Errors: | |
818 | * EACCESS - Attempt to write to a read-only range | |
819 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
820 | * ECONNRESET - Connection reset by peer | |
7df20f2d SD |
821 | * EINVAL - rma_flags is invalid |
822 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
823 | * network since it may have crashed | |
824 | * ENOTCONN - The endpoint is not connected | |
825 | * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the | |
826 | * registered address space of epd | |
827 | */ | |
828 | int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, | |
829 | int rma_flags); | |
830 | ||
831 | /** | |
832 | * scif_vwriteto() - Copy to a remote address space | |
833 | * @epd: endpoint descriptor | |
834 | * @addr: address from which to copy | |
835 | * @len: length of range to copy | |
836 | * @roffset: offset in remote registered address space to | |
837 | * which to copy | |
838 | * @rma_flags: transfer mode flags | |
839 | * | |
840 | * scif_vwriteto() copies len bytes from the local memory, starting at addr, to | |
841 | * the remote registered address space of the peer of endpoint epd, starting at | |
842 | * the offset roffset. | |
843 | * | |
844 | * The specified range [roffset, roffset + len - 1] must be within some | |
845 | * registered window or windows of the remote nodes. The range may intersect | |
846 | * multiple registered windows, but only if those windows are contiguous in the | |
847 | * registered address space. | |
848 | * | |
849 | * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using | |
850 | * programmed read/writes. Otherwise the data is copied using DMA. If rma_- | |
851 | * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the | |
852 | * transfer is complete. Otherwise, the transfer may be performed asynchron- | |
853 | * ously. The order in which any two asynchronous RMA operations complete | |
854 | * is non-deterministic. The synchronization functions, scif_fence_mark()/ | |
855 | * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to | |
856 | * the completion of asynchronous RMA operations on the same endpoint. | |
857 | * | |
858 | * The DMA transfer of individual bytes is not guaranteed to complete in | |
859 | * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last | |
860 | * cacheline or partial cacheline of the source range will become visible on | |
861 | * the destination node after all other transferred data in the source | |
862 | * range has become visible on the destination node. | |
863 | * | |
864 | * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back | |
865 | * the specified local memory range may be remain in a pinned state even after | |
866 | * the specified transfer completes. This may reduce overhead if some or all of | |
867 | * the same virtual address range is referenced in a subsequent call of | |
868 | * scif_vreadfrom() or scif_vwriteto(). | |
869 | * | |
870 | * The optimal DMA performance will likely be realized if both | |
871 | * addr and offset are cacheline aligned (are a multiple of 64). Lower | |
872 | * performance will likely be realized if addr and offset are not cacheline | |
873 | * aligned but are separated by some multiple of 64. The lowest level of | |
874 | * performance is likely if addr and offset are not separated by a multiple of | |
875 | * 64. | |
876 | * | |
877 | * The rma_flags argument is formed by ORing together zero or more of the | |
878 | * following values. | |
879 | * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA | |
880 | * engine. | |
881 | * SCIF_RMA_USECACHE - allow registration caching | |
882 | * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the | |
883 | * transfer has completed. Passing this flag results in the | |
884 | * current implementation busy waiting and consuming CPU cycles | |
885 | * while the DMA transfer is in progress for best performance by | |
886 | * avoiding the interrupt latency. | |
887 | * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of | |
888 | * the source range becomes visible on the destination node | |
889 | * after all other transferred data in the source range has | |
890 | * become visible on the destination | |
891 | * | |
892 | * Return: | |
893 | * Upon successful completion, scif_vwriteto() returns 0; otherwise in user | |
894 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
895 | * the negative of one of the following errors is returned. | |
896 | * | |
897 | * Errors: | |
898 | * EACCESS - Attempt to write to a read-only range | |
899 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
900 | * ECONNRESET - Connection reset by peer | |
7df20f2d SD |
901 | * EINVAL - rma_flags is invalid |
902 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
903 | * network since it may have crashed | |
904 | * ENOTCONN - The endpoint is not connected | |
905 | * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the | |
906 | * registered address space of epd | |
907 | */ | |
908 | int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, | |
909 | int rma_flags); | |
910 | ||
911 | /** | |
912 | * scif_fence_mark() - Mark previously issued RMAs | |
913 | * @epd: endpoint descriptor | |
914 | * @flags: control flags | |
915 | * @mark: marked value returned as output. | |
916 | * | |
917 | * scif_fence_mark() returns after marking the current set of all uncompleted | |
918 | * RMAs initiated through the endpoint epd or the current set of all | |
919 | * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are | |
920 | * marked with a value returned at mark. The application may subsequently call | |
921 | * scif_fence_wait(), passing the value returned at mark, to await completion | |
922 | * of all RMAs so marked. | |
923 | * | |
924 | * The flags argument has exactly one of the following values. | |
925 | * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint | |
926 | * epd are marked | |
927 | * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer | |
928 | * of endpoint epd are marked | |
929 | * | |
930 | * Return: | |
931 | * Upon successful completion, scif_fence_mark() returns 0; otherwise in user | |
932 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
933 | * the negative of one of the following errors is returned. | |
934 | * | |
935 | * Errors: | |
936 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
937 | * ECONNRESET - Connection reset by peer | |
938 | * EINVAL - flags is invalid | |
939 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
940 | * network since it may have crashed | |
941 | * ENOTCONN - The endpoint is not connected | |
942 | * ENOMEM - Insufficient kernel memory was available | |
943 | */ | |
944 | int scif_fence_mark(scif_epd_t epd, int flags, int *mark); | |
945 | ||
946 | /** | |
947 | * scif_fence_wait() - Wait for completion of marked RMAs | |
948 | * @epd: endpoint descriptor | |
949 | * @mark: mark request | |
950 | * | |
951 | * scif_fence_wait() returns after all RMAs marked with mark have completed. | |
952 | * The value passed in mark must have been obtained in a previous call to | |
953 | * scif_fence_mark(). | |
954 | * | |
955 | * Return: | |
956 | * Upon successful completion, scif_fence_wait() returns 0; otherwise in user | |
957 | * mode -1 is returned and errno is set to indicate the error; in kernel mode | |
958 | * the negative of one of the following errors is returned. | |
959 | * | |
960 | * Errors: | |
961 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
962 | * ECONNRESET - Connection reset by peer | |
963 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
964 | * network since it may have crashed | |
965 | * ENOTCONN - The endpoint is not connected | |
966 | * ENOMEM - Insufficient kernel memory was available | |
967 | */ | |
968 | int scif_fence_wait(scif_epd_t epd, int mark); | |
969 | ||
970 | /** | |
971 | * scif_fence_signal() - Request a memory update on completion of RMAs | |
972 | * @epd: endpoint descriptor | |
973 | * @loff: local offset | |
974 | * @lval: local value to write to loffset | |
975 | * @roff: remote offset | |
976 | * @rval: remote value to write to roffset | |
977 | * @flags: flags | |
978 | * | |
979 | * scif_fence_signal() returns after marking the current set of all uncompleted | |
980 | * RMAs initiated through the endpoint epd or marking the current set of all | |
981 | * uncompleted RMAs initiated through the peer of endpoint epd. | |
982 | * | |
983 | * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the | |
984 | * marked set, lval is written to memory at the address corresponding to offset | |
985 | * loff in the local registered address space of epd. loff must be within a | |
986 | * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion | |
987 | * of the RMAs in the marked set, rval is written to memory at the address | |
988 | * corresponding to offset roff in the remote registered address space of epd. | |
989 | * roff must be within a remote registered window of the peer of epd. Note | |
990 | * that any specified offset must be DWORD (4 byte / 32 bit) aligned. | |
991 | * | |
992 | * The flags argument is formed by OR'ing together the following. | |
993 | * Exactly one of the following values. | |
994 | * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint | |
995 | * epd are marked | |
996 | * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer | |
997 | * of endpoint epd are marked | |
998 | * One or more of the following values. | |
999 | * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to | |
1000 | * memory at the address corresponding to offset loff in the local | |
1001 | * registered address space of epd. | |
1002 | * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to | |
1003 | * memory at the address corresponding to offset roff in the remote | |
1004 | * registered address space of epd. | |
1005 | * | |
1006 | * Return: | |
1007 | * Upon successful completion, scif_fence_signal() returns 0; otherwise in | |
1008 | * user mode -1 is returned and errno is set to indicate the error; in kernel | |
1009 | * mode the negative of one of the following errors is returned. | |
1010 | * | |
1011 | * Errors: | |
1012 | * EBADF, ENOTTY - epd is not a valid endpoint descriptor | |
1013 | * ECONNRESET - Connection reset by peer | |
1014 | * EINVAL - flags is invalid, or loff or roff are not DWORD aligned | |
1015 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
1016 | * network since it may have crashed | |
1017 | * ENOTCONN - The endpoint is not connected | |
1018 | * ENXIO - loff is invalid for the registered address of epd, or roff is invalid | |
1019 | * for the registered address space, of the peer of epd | |
1020 | */ | |
1021 | int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, | |
1022 | u64 rval, int flags); | |
1023 | ||
1024 | /** | |
1025 | * scif_get_node_ids() - Return information about online nodes | |
1026 | * @nodes: array in which to return online node IDs | |
1027 | * @len: number of entries in the nodes array | |
1028 | * @self: address to place the node ID of the local node | |
1029 | * | |
1030 | * scif_get_node_ids() fills in the nodes array with up to len node IDs of the | |
1031 | * nodes in the SCIF network. If there is not enough space in nodes, as | |
1032 | * indicated by the len parameter, only len node IDs are returned in nodes. The | |
1033 | * return value of scif_get_node_ids() is the total number of nodes currently in | |
1034 | * the SCIF network. By checking the return value against the len parameter, | |
1035 | * the user may determine if enough space for nodes was allocated. | |
1036 | * | |
1037 | * The node ID of the local node is returned at self. | |
1038 | * | |
1039 | * Return: | |
1040 | * Upon successful completion, scif_get_node_ids() returns the actual number of | |
1041 | * online nodes in the SCIF network including 'self'; otherwise in user mode | |
1042 | * -1 is returned and errno is set to indicate the error; in kernel mode no | |
1043 | * errors are returned. | |
a44f2630 SD |
1044 | */ |
1045 | int scif_get_node_ids(u16 *nodes, int len, u16 *self); | |
1046 | ||
1047 | /** | |
1048 | * scif_pin_pages() - Pin a set of pages | |
1049 | * @addr: Virtual address of range to pin | |
1050 | * @len: Length of range to pin | |
1051 | * @prot_flags: Page protection flags | |
1052 | * @map_flags: Page classification flags | |
1053 | * @pinned_pages: Handle to pinned pages | |
1054 | * | |
1055 | * scif_pin_pages() pins (locks in physical memory) the physical pages which | |
1056 | * back the range of virtual address pages starting at addr and continuing for | |
1057 | * len bytes. addr and len are constrained to be multiples of the page size. A | |
1058 | * successful scif_pin_pages() call returns a handle to pinned_pages which may | |
1059 | * be used in subsequent calls to scif_register_pinned_pages(). | |
1060 | * | |
1061 | * The pages will remain pinned as long as there is a reference against the | |
1062 | * scif_pinned_pages_t value returned by scif_pin_pages() and until | |
1063 | * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A | |
1064 | * reference is added to a scif_pinned_pages_t value each time a window is | |
1065 | * created by calling scif_register_pinned_pages() and passing the | |
1066 | * scif_pinned_pages_t value. A reference is removed from a | |
1067 | * scif_pinned_pages_t value each time such a window is deleted. | |
1068 | * | |
1069 | * Subsequent operations which change the memory pages to which virtual | |
1070 | * addresses are mapped (such as mmap(), munmap()) have no effect on the | |
1071 | * scif_pinned_pages_t value or windows created against it. | |
1072 | * | |
1073 | * If the process will fork(), it is recommended that the registered | |
1074 | * virtual address range be marked with MADV_DONTFORK. Doing so will prevent | |
1075 | * problems due to copy-on-write semantics. | |
1076 | * | |
1077 | * The prot_flags argument is formed by OR'ing together one or more of the | |
1078 | * following values. | |
1079 | * SCIF_PROT_READ - allow read operations against the pages | |
1080 | * SCIF_PROT_WRITE - allow write operations against the pages | |
1081 | * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a | |
1082 | * kernel space address. By default, addr is interpreted as a user space | |
1083 | * address. | |
1084 | * | |
1085 | * Return: | |
1086 | * Upon successful completion, scif_pin_pages() returns 0; otherwise the | |
1087 | * negative of one of the following errors is returned. | |
7df20f2d SD |
1088 | * |
1089 | * Errors: | |
a44f2630 SD |
1090 | * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative |
1091 | * ENOMEM - Not enough space | |
7df20f2d | 1092 | */ |
a44f2630 SD |
1093 | int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags, |
1094 | scif_pinned_pages_t *pinned_pages); | |
1095 | ||
1096 | /** | |
1097 | * scif_unpin_pages() - Unpin a set of pages | |
1098 | * @pinned_pages: Handle to pinned pages to be unpinned | |
1099 | * | |
1100 | * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new | |
1101 | * windows against pinned_pages. The physical pages represented by pinned_pages | |
1102 | * will remain pinned until all windows previously registered against | |
1103 | * pinned_pages are deleted (the window is scif_unregister()'d and all | |
1104 | * references to the window are removed (see scif_unregister()). | |
1105 | * | |
1106 | * pinned_pages must have been obtain from a previous call to scif_pin_pages(). | |
1107 | * After calling scif_unpin_pages(), it is an error to pass pinned_pages to | |
1108 | * scif_register_pinned_pages(). | |
1109 | * | |
1110 | * Return: | |
1111 | * Upon successful completion, scif_unpin_pages() returns 0; otherwise the | |
1112 | * negative of one of the following errors is returned. | |
1113 | * | |
1114 | * Errors: | |
1115 | * EINVAL - pinned_pages is not valid | |
1116 | */ | |
1117 | int scif_unpin_pages(scif_pinned_pages_t pinned_pages); | |
1118 | ||
1119 | /** | |
1120 | * scif_register_pinned_pages() - Mark a memory region for remote access. | |
1121 | * @epd: endpoint descriptor | |
1122 | * @pinned_pages: Handle to pinned pages | |
1123 | * @offset: Registered address space offset | |
1124 | * @map_flags: Flags which control where pages are mapped | |
1125 | * | |
1126 | * The scif_register_pinned_pages() function opens a window, a range of whole | |
1127 | * pages of the registered address space of the endpoint epd, starting at | |
1128 | * offset po. The value of po, further described below, is a function of the | |
1129 | * parameters offset and pinned_pages, and the value of map_flags. Each page of | |
1130 | * the window represents a corresponding physical memory page of the range | |
1131 | * represented by pinned_pages; the length of the window is the same as the | |
1132 | * length of range represented by pinned_pages. A successful | |
1133 | * scif_register_pinned_pages() call returns po as the return value. | |
1134 | * | |
1135 | * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset | |
1136 | * exactly, and offset is constrained to be a multiple of the page size. The | |
1137 | * mapping established by scif_register_pinned_pages() will not replace any | |
1138 | * existing registration; an error is returned if any page of the new window | |
1139 | * would intersect an existing window. | |
1140 | * | |
1141 | * When SCIF_MAP_FIXED is not set, the implementation uses offset in an | |
1142 | * implementation-defined manner to arrive at po. The po so chosen will be an | |
1143 | * area of the registered address space that the implementation deems suitable | |
1144 | * for a mapping of the required size. An offset value of 0 is interpreted as | |
1145 | * granting the implementation complete freedom in selecting po, subject to | |
1146 | * constraints described below. A non-zero value of offset is taken to be a | |
1147 | * suggestion of an offset near which the mapping should be placed. When the | |
1148 | * implementation selects a value for po, it does not replace any extant | |
1149 | * window. In all cases, po will be a multiple of the page size. | |
1150 | * | |
1151 | * The physical pages which are so represented by a window are available for | |
1152 | * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), | |
1153 | * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the | |
1154 | * physical pages represented by the window will not be reused by the memory | |
1155 | * subsystem for any other purpose. Note that the same physical page may be | |
1156 | * represented by multiple windows. | |
1157 | * | |
1158 | * Windows created by scif_register_pinned_pages() are unregistered by | |
1159 | * scif_unregister(). | |
1160 | * | |
1161 | * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a | |
1162 | * fixed offset. | |
1163 | * | |
1164 | * Return: | |
1165 | * Upon successful completion, scif_register_pinned_pages() returns the offset | |
1166 | * at which the mapping was placed (po); otherwise the negative of one of the | |
1167 | * following errors is returned. | |
1168 | * | |
1169 | * Errors: | |
1170 | * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window | |
1171 | * would intersect an existing window | |
1172 | * EAGAIN - The mapping could not be performed due to lack of resources | |
1173 | * ECONNRESET - Connection reset by peer | |
1174 | * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and | |
1175 | * offset is not a multiple of the page size, or offset is negative | |
1176 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
1177 | * network since it may have crashed | |
1178 | * ENOMEM - Not enough space | |
1179 | * ENOTCONN - The endpoint is not connected | |
1180 | */ | |
1181 | off_t scif_register_pinned_pages(scif_epd_t epd, | |
1182 | scif_pinned_pages_t pinned_pages, | |
1183 | off_t offset, int map_flags); | |
1184 | ||
1185 | /** | |
1186 | * scif_get_pages() - Add references to remote registered pages | |
1187 | * @epd: endpoint descriptor | |
1188 | * @offset: remote registered offset | |
1189 | * @len: length of range of pages | |
1190 | * @pages: returned scif_range structure | |
1191 | * | |
1192 | * scif_get_pages() returns the addresses of the physical pages represented by | |
1193 | * those pages of the registered address space of the peer of epd, starting at | |
1194 | * offset and continuing for len bytes. offset and len are constrained to be | |
1195 | * multiples of the page size. | |
1196 | * | |
1197 | * All of the pages in the specified range [offset, offset + len - 1] must be | |
1198 | * within a single window of the registered address space of the peer of epd. | |
1199 | * | |
1200 | * The addresses are returned as a virtually contiguous array pointed to by the | |
1201 | * phys_addr component of the scif_range structure whose address is returned in | |
1202 | * pages. The nr_pages component of scif_range is the length of the array. The | |
1203 | * prot_flags component of scif_range holds the protection flag value passed | |
1204 | * when the pages were registered. | |
1205 | * | |
1206 | * Each physical page whose address is returned by scif_get_pages() remains | |
1207 | * available and will not be released for reuse until the scif_range structure | |
1208 | * is returned in a call to scif_put_pages(). The scif_range structure returned | |
1209 | * by scif_get_pages() must be unmodified. | |
1210 | * | |
1211 | * It is an error to call scif_close() on an endpoint on which a scif_range | |
1212 | * structure of that endpoint has not been returned to scif_put_pages(). | |
1213 | * | |
1214 | * Return: | |
1215 | * Upon successful completion, scif_get_pages() returns 0; otherwise the | |
1216 | * negative of one of the following errors is returned. | |
1217 | * Errors: | |
1218 | * ECONNRESET - Connection reset by peer. | |
1219 | * EINVAL - offset is not a multiple of the page size, or offset is negative, or | |
1220 | * len is not a multiple of the page size | |
1221 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
1222 | * network since it may have crashed | |
1223 | * ENOTCONN - The endpoint is not connected | |
1224 | * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid | |
1225 | * for the registered address space of the peer epd | |
1226 | */ | |
1227 | int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, | |
1228 | struct scif_range **pages); | |
1229 | ||
1230 | /** | |
1231 | * scif_put_pages() - Remove references from remote registered pages | |
1232 | * @pages: pages to be returned | |
1233 | * | |
1234 | * scif_put_pages() releases a scif_range structure previously obtained by | |
1235 | * calling scif_get_pages(). The physical pages represented by pages may | |
1236 | * be reused when the window which represented those pages is unregistered. | |
1237 | * Therefore, those pages must not be accessed after calling scif_put_pages(). | |
1238 | * | |
1239 | * Return: | |
1240 | * Upon successful completion, scif_put_pages() returns 0; otherwise the | |
1241 | * negative of one of the following errors is returned. | |
1242 | * Errors: | |
1243 | * EINVAL - pages does not point to a valid scif_range structure, or | |
1244 | * the scif_range structure pointed to by pages was already returned | |
1245 | * ENODEV - The remote node is lost or existed, but is not currently in the | |
1246 | * network since it may have crashed | |
1247 | * ENOTCONN - The endpoint is not connected | |
1248 | */ | |
1249 | int scif_put_pages(struct scif_range *pages); | |
7df20f2d | 1250 | |
b7f94441 AD |
1251 | /** |
1252 | * scif_poll() - Wait for some event on an endpoint | |
1253 | * @epds: Array of endpoint descriptors | |
1254 | * @nepds: Length of epds | |
1255 | * @timeout: Upper limit on time for which scif_poll() will block | |
1256 | * | |
1257 | * scif_poll() waits for one of a set of endpoints to become ready to perform | |
1258 | * an I/O operation. | |
1259 | * | |
1260 | * The epds argument specifies the endpoint descriptors to be examined and the | |
1261 | * events of interest for each endpoint descriptor. epds is a pointer to an | |
1262 | * array with one member for each open endpoint descriptor of interest. | |
1263 | * | |
1264 | * The number of items in the epds array is specified in nepds. The epd field | |
1265 | * of scif_pollepd is an endpoint descriptor of an open endpoint. The field | |
1266 | * events is a bitmask specifying the events which the application is | |
1267 | * interested in. The field revents is an output parameter, filled by the | |
1268 | * kernel with the events that actually occurred. The bits returned in revents | |
1269 | * can include any of those specified in events, or one of the values POLLERR, | |
1270 | * POLLHUP, or POLLNVAL. (These three bits are meaningless in the events | |
1271 | * field, and will be set in the revents field whenever the corresponding | |
1272 | * condition is true.) | |
1273 | * | |
1274 | * If none of the events requested (and no error) has occurred for any of the | |
1275 | * endpoint descriptors, then scif_poll() blocks until one of the events occurs. | |
1276 | * | |
1277 | * The timeout argument specifies an upper limit on the time for which | |
1278 | * scif_poll() will block, in milliseconds. Specifying a negative value in | |
1279 | * timeout means an infinite timeout. | |
1280 | * | |
1281 | * The following bits may be set in events and returned in revents. | |
1282 | * POLLIN - Data may be received without blocking. For a connected | |
1283 | * endpoint, this means that scif_recv() may be called without blocking. For a | |
1284 | * listening endpoint, this means that scif_accept() may be called without | |
1285 | * blocking. | |
1286 | * POLLOUT - Data may be sent without blocking. For a connected endpoint, this | |
1287 | * means that scif_send() may be called without blocking. POLLOUT may also be | |
1288 | * used to block waiting for a non-blocking connect to complete. This bit value | |
1289 | * has no meaning for a listening endpoint and is ignored if specified. | |
1290 | * | |
1291 | * The following bits are only returned in revents, and are ignored if set in | |
1292 | * events. | |
1293 | * POLLERR - An error occurred on the endpoint | |
1294 | * POLLHUP - The connection to the peer endpoint was disconnected | |
1295 | * POLLNVAL - The specified endpoint descriptor is invalid. | |
1296 | * | |
1297 | * Return: | |
1298 | * Upon successful completion, scif_poll() returns a non-negative value. A | |
1299 | * positive value indicates the total number of endpoint descriptors that have | |
1300 | * been selected (that is, endpoint descriptors for which the revents member is | |
1301 | * non-zero). A value of 0 indicates that the call timed out and no endpoint | |
1302 | * descriptors have been selected. Otherwise in user mode -1 is returned and | |
1303 | * errno is set to indicate the error; in kernel mode the negative of one of | |
1304 | * the following errors is returned. | |
1305 | * | |
1306 | * Errors: | |
1307 | * EINTR - A signal occurred before any requested event | |
1308 | * EINVAL - The nepds argument is greater than {OPEN_MAX} | |
1309 | * ENOMEM - There was no space to allocate file descriptor tables | |
1310 | */ | |
1311 | int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout); | |
1312 | ||
d3d912eb AD |
1313 | /** |
1314 | * scif_client_register() - Register a SCIF client | |
1315 | * @client: client to be registered | |
1316 | * | |
1317 | * scif_client_register() registers a SCIF client. The probe() method | |
1318 | * of the client is called when SCIF peer devices come online and the | |
1319 | * remove() method is called when the peer devices disappear. | |
1320 | * | |
1321 | * Return: | |
1322 | * Upon successful completion, scif_client_register() returns a non-negative | |
1323 | * value. Otherwise the return value is the same as subsys_interface_register() | |
1324 | * in the kernel. | |
1325 | */ | |
1326 | int scif_client_register(struct scif_client *client); | |
1327 | ||
1328 | /** | |
1329 | * scif_client_unregister() - Unregister a SCIF client | |
1330 | * @client: client to be unregistered | |
1331 | * | |
1332 | * scif_client_unregister() unregisters a SCIF client. | |
1333 | * | |
1334 | * Return: | |
1335 | * None | |
1336 | */ | |
1337 | void scif_client_unregister(struct scif_client *client); | |
1338 | ||
7df20f2d | 1339 | #endif /* __SCIF_H__ */ |