]>
Commit | Line | Data |
---|---|---|
d1a4c0b3 GC |
1 | #include <net/tcp.h> |
2 | #include <net/tcp_memcontrol.h> | |
3 | #include <net/sock.h> | |
3dc43e3e GC |
4 | #include <net/ip.h> |
5 | #include <linux/nsproxy.h> | |
d1a4c0b3 GC |
6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | |
8 | ||
1d62e436 | 9 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
d1a4c0b3 GC |
10 | { |
11 | /* | |
12 | * The root cgroup does not use res_counters, but rather, | |
13 | * rely on the data already collected by the network | |
14 | * subsystem | |
15 | */ | |
16 | struct res_counter *res_parent = NULL; | |
17 | struct cg_proto *cg_proto, *parent_cg; | |
d1a4c0b3 GC |
18 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
19 | ||
20 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
21 | if (!cg_proto) | |
6bc10349 | 22 | return 0; |
d1a4c0b3 | 23 | |
2e685cad EB |
24 | cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; |
25 | cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; | |
26 | cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; | |
27 | cg_proto->memory_pressure = 0; | |
28 | cg_proto->memcg = memcg; | |
d1a4c0b3 GC |
29 | |
30 | parent_cg = tcp_prot.proto_cgroup(parent); | |
31 | if (parent_cg) | |
2e685cad | 32 | res_parent = &parent_cg->memory_allocated; |
d1a4c0b3 | 33 | |
2e685cad EB |
34 | res_counter_init(&cg_proto->memory_allocated, res_parent); |
35 | percpu_counter_init(&cg_proto->sockets_allocated, 0); | |
d1a4c0b3 | 36 | |
6bc10349 | 37 | return 0; |
d1a4c0b3 GC |
38 | } |
39 | EXPORT_SYMBOL(tcp_init_cgroup); | |
40 | ||
1d62e436 | 41 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) |
d1a4c0b3 | 42 | { |
d1a4c0b3 | 43 | struct cg_proto *cg_proto; |
d1a4c0b3 GC |
44 | |
45 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
46 | if (!cg_proto) | |
47 | return; | |
48 | ||
2e685cad | 49 | percpu_counter_destroy(&cg_proto->sockets_allocated); |
d1a4c0b3 GC |
50 | } |
51 | EXPORT_SYMBOL(tcp_destroy_cgroup); | |
3aaabe23 GC |
52 | |
53 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |
54 | { | |
3aaabe23 | 55 | struct cg_proto *cg_proto; |
3aaabe23 GC |
56 | int i; |
57 | int ret; | |
58 | ||
59 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
60 | if (!cg_proto) | |
61 | return -EINVAL; | |
62 | ||
6de5a8bf SZ |
63 | if (val > RES_COUNTER_MAX) |
64 | val = RES_COUNTER_MAX; | |
3aaabe23 | 65 | |
2e685cad | 66 | ret = res_counter_set_limit(&cg_proto->memory_allocated, val); |
3aaabe23 GC |
67 | if (ret) |
68 | return ret; | |
69 | ||
70 | for (i = 0; i < 3; i++) | |
2e685cad EB |
71 | cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT, |
72 | sysctl_tcp_mem[i]); | |
3aaabe23 | 73 | |
6de5a8bf | 74 | if (val == RES_COUNTER_MAX) |
3f134619 | 75 | clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); |
6de5a8bf | 76 | else if (val != RES_COUNTER_MAX) { |
3f134619 GC |
77 | /* |
78 | * The active bit needs to be written after the static_key | |
79 | * update. This is what guarantees that the socket activation | |
80 | * function is the last one to run. See sock_update_memcg() for | |
81 | * details, and note that we don't mark any socket as belonging | |
82 | * to this memcg until that flag is up. | |
83 | * | |
84 | * We need to do this, because static_keys will span multiple | |
85 | * sites, but we can't control their order. If we mark a socket | |
86 | * as accounted, but the accounting functions are not patched in | |
87 | * yet, we'll lose accounting. | |
88 | * | |
89 | * We never race with the readers in sock_update_memcg(), | |
90 | * because when this value change, the code to process it is not | |
91 | * patched in yet. | |
92 | * | |
93 | * The activated bit is used to guarantee that no two writers | |
94 | * will do the update in the same memcg. Without that, we can't | |
95 | * properly shutdown the static key. | |
96 | */ | |
97 | if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | |
98 | static_key_slow_inc(&memcg_socket_limit_enabled); | |
99 | set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | |
100 | } | |
3aaabe23 GC |
101 | |
102 | return 0; | |
103 | } | |
104 | ||
182446d0 | 105 | static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, |
4d3bb511 | 106 | char *buffer) |
3aaabe23 | 107 | { |
182446d0 | 108 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3aaabe23 GC |
109 | unsigned long long val; |
110 | int ret = 0; | |
111 | ||
112 | switch (cft->private) { | |
113 | case RES_LIMIT: | |
114 | /* see memcontrol.c */ | |
115 | ret = res_counter_memparse_write_strategy(buffer, &val); | |
116 | if (ret) | |
117 | break; | |
118 | ret = tcp_update_limit(memcg, val); | |
119 | break; | |
120 | default: | |
121 | ret = -EINVAL; | |
122 | break; | |
123 | } | |
124 | return ret; | |
125 | } | |
126 | ||
127 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | |
128 | { | |
3aaabe23 GC |
129 | struct cg_proto *cg_proto; |
130 | ||
131 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
132 | if (!cg_proto) | |
133 | return default_val; | |
134 | ||
2e685cad | 135 | return res_counter_read_u64(&cg_proto->memory_allocated, type); |
3aaabe23 GC |
136 | } |
137 | ||
5a6dd343 GC |
138 | static u64 tcp_read_usage(struct mem_cgroup *memcg) |
139 | { | |
5a6dd343 GC |
140 | struct cg_proto *cg_proto; |
141 | ||
142 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
143 | if (!cg_proto) | |
144 | return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; | |
145 | ||
2e685cad | 146 | return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE); |
5a6dd343 GC |
147 | } |
148 | ||
182446d0 | 149 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) |
3aaabe23 | 150 | { |
182446d0 | 151 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3aaabe23 GC |
152 | u64 val; |
153 | ||
154 | switch (cft->private) { | |
155 | case RES_LIMIT: | |
6de5a8bf | 156 | val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX); |
3aaabe23 | 157 | break; |
5a6dd343 GC |
158 | case RES_USAGE: |
159 | val = tcp_read_usage(memcg); | |
160 | break; | |
ffea59e5 | 161 | case RES_FAILCNT: |
0850f0f5 GC |
162 | case RES_MAX_USAGE: |
163 | val = tcp_read_stat(memcg, cft->private, 0); | |
ffea59e5 | 164 | break; |
3aaabe23 GC |
165 | default: |
166 | BUG(); | |
167 | } | |
168 | return val; | |
169 | } | |
170 | ||
182446d0 | 171 | static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) |
ffea59e5 GC |
172 | { |
173 | struct mem_cgroup *memcg; | |
ffea59e5 GC |
174 | struct cg_proto *cg_proto; |
175 | ||
182446d0 | 176 | memcg = mem_cgroup_from_css(css); |
ffea59e5 GC |
177 | cg_proto = tcp_prot.proto_cgroup(memcg); |
178 | if (!cg_proto) | |
179 | return 0; | |
ffea59e5 GC |
180 | |
181 | switch (event) { | |
0850f0f5 | 182 | case RES_MAX_USAGE: |
2e685cad | 183 | res_counter_reset_max(&cg_proto->memory_allocated); |
0850f0f5 | 184 | break; |
ffea59e5 | 185 | case RES_FAILCNT: |
2e685cad | 186 | res_counter_reset_failcnt(&cg_proto->memory_allocated); |
ffea59e5 GC |
187 | break; |
188 | } | |
189 | ||
190 | return 0; | |
191 | } | |
192 | ||
676f7c8f TH |
193 | static struct cftype tcp_files[] = { |
194 | { | |
195 | .name = "kmem.tcp.limit_in_bytes", | |
196 | .write_string = tcp_cgroup_write, | |
197 | .read_u64 = tcp_cgroup_read, | |
198 | .private = RES_LIMIT, | |
199 | }, | |
200 | { | |
201 | .name = "kmem.tcp.usage_in_bytes", | |
202 | .read_u64 = tcp_cgroup_read, | |
203 | .private = RES_USAGE, | |
204 | }, | |
205 | { | |
206 | .name = "kmem.tcp.failcnt", | |
207 | .private = RES_FAILCNT, | |
208 | .trigger = tcp_cgroup_reset, | |
209 | .read_u64 = tcp_cgroup_read, | |
210 | }, | |
211 | { | |
212 | .name = "kmem.tcp.max_usage_in_bytes", | |
213 | .private = RES_MAX_USAGE, | |
214 | .trigger = tcp_cgroup_reset, | |
215 | .read_u64 = tcp_cgroup_read, | |
216 | }, | |
6bc10349 | 217 | { } /* terminate */ |
676f7c8f | 218 | }; |
6bc10349 TH |
219 | |
220 | static int __init tcp_memcontrol_init(void) | |
221 | { | |
073219e9 | 222 | WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files)); |
6bc10349 TH |
223 | return 0; |
224 | } | |
225 | __initcall(tcp_memcontrol_init); |