Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/block/drbd/drbd_nl.c
29266 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
drbd_nl.c
4
5
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6
7
Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8
Copyright (C) 1999-2008, Philipp Reisner <[email protected]>.
9
Copyright (C) 2002-2008, Lars Ellenberg <[email protected]>.
10
11
12
*/
13
14
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16
#include <linux/module.h>
17
#include <linux/drbd.h>
18
#include <linux/in.h>
19
#include <linux/fs.h>
20
#include <linux/file.h>
21
#include <linux/slab.h>
22
#include <linux/blkpg.h>
23
#include <linux/cpumask.h>
24
#include "drbd_int.h"
25
#include "drbd_protocol.h"
26
#include "drbd_req.h"
27
#include "drbd_state_change.h"
28
#include <linux/unaligned.h>
29
#include <linux/drbd_limits.h>
30
#include <linux/kthread.h>
31
32
#include <net/genetlink.h>
33
34
/* .doit */
35
// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
36
// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
37
38
int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
39
int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
40
41
int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
42
int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
43
int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
44
45
int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
46
int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
47
int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
48
int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
49
int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
50
int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
51
int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
52
int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
53
int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
54
int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
55
int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
56
int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
57
int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
58
int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
59
int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
60
int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
61
int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
62
int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
63
int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
64
int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
65
/* .dumpit */
66
int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
67
int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
68
int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
69
int drbd_adm_dump_devices_done(struct netlink_callback *cb);
70
int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
71
int drbd_adm_dump_connections_done(struct netlink_callback *cb);
72
int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
73
int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
74
int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
75
76
#include <linux/drbd_genl_api.h>
77
#include "drbd_nla.h"
78
#include <linux/genl_magic_func.h>
79
80
static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
81
static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
82
83
DEFINE_MUTEX(notification_mutex);
84
85
/* used bdev_open_by_path, to claim our meta data device(s) */
86
static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
87
88
static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
89
{
90
genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
91
if (genlmsg_reply(skb, info))
92
pr_err("error sending genl reply\n");
93
}
94
95
/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
96
* reason it could fail was no space in skb, and there are 4k available. */
97
static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
98
{
99
struct nlattr *nla;
100
int err = -EMSGSIZE;
101
102
if (!info || !info[0])
103
return 0;
104
105
nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY);
106
if (!nla)
107
return err;
108
109
err = nla_put_string(skb, T_info_text, info);
110
if (err) {
111
nla_nest_cancel(skb, nla);
112
return err;
113
} else
114
nla_nest_end(skb, nla);
115
return 0;
116
}
117
118
__printf(2, 3)
119
static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
120
{
121
va_list args;
122
struct nlattr *nla, *txt;
123
int err = -EMSGSIZE;
124
int len;
125
126
nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY);
127
if (!nla)
128
return err;
129
130
txt = nla_reserve(skb, T_info_text, 256);
131
if (!txt) {
132
nla_nest_cancel(skb, nla);
133
return err;
134
}
135
va_start(args, fmt);
136
len = vscnprintf(nla_data(txt), 256, fmt, args);
137
va_end(args);
138
139
/* maybe: retry with larger reserve, if truncated */
140
txt->nla_len = nla_attr_size(len+1);
141
nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len));
142
nla_nest_end(skb, nla);
143
144
return 0;
145
}
146
147
/* This would be a good candidate for a "pre_doit" hook,
148
* and per-family private info->pointers.
149
* But we need to stay compatible with older kernels.
150
* If it returns successfully, adm_ctx members are valid.
151
*
152
* At this point, we still rely on the global genl_lock().
153
* If we want to avoid that, and allow "genl_family.parallel_ops", we may need
154
* to add additional synchronization against object destruction/modification.
155
*/
156
#define DRBD_ADM_NEED_MINOR 1
157
#define DRBD_ADM_NEED_RESOURCE 2
158
#define DRBD_ADM_NEED_CONNECTION 4
159
static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
160
struct sk_buff *skb, struct genl_info *info, unsigned flags)
161
{
162
struct drbd_genlmsghdr *d_in = genl_info_userhdr(info);
163
const u8 cmd = info->genlhdr->cmd;
164
int err;
165
166
memset(adm_ctx, 0, sizeof(*adm_ctx));
167
168
/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
169
if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
170
return -EPERM;
171
172
adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
173
if (!adm_ctx->reply_skb) {
174
err = -ENOMEM;
175
goto fail;
176
}
177
178
adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
179
info, &drbd_genl_family, 0, cmd);
180
/* put of a few bytes into a fresh skb of >= 4k will always succeed.
181
* but anyways */
182
if (!adm_ctx->reply_dh) {
183
err = -ENOMEM;
184
goto fail;
185
}
186
187
adm_ctx->reply_dh->minor = d_in->minor;
188
adm_ctx->reply_dh->ret_code = NO_ERROR;
189
190
adm_ctx->volume = VOLUME_UNSPECIFIED;
191
if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
192
struct nlattr *nla;
193
/* parse and validate only */
194
err = drbd_cfg_context_from_attrs(NULL, info);
195
if (err)
196
goto fail;
197
198
/* It was present, and valid,
199
* copy it over to the reply skb. */
200
err = nla_put_nohdr(adm_ctx->reply_skb,
201
info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
202
info->attrs[DRBD_NLA_CFG_CONTEXT]);
203
if (err)
204
goto fail;
205
206
/* and assign stuff to the adm_ctx */
207
nla = nested_attr_tb[__nla_type(T_ctx_volume)];
208
if (nla)
209
adm_ctx->volume = nla_get_u32(nla);
210
nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
211
if (nla)
212
adm_ctx->resource_name = nla_data(nla);
213
adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
214
adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
215
if ((adm_ctx->my_addr &&
216
nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
217
(adm_ctx->peer_addr &&
218
nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
219
err = -EINVAL;
220
goto fail;
221
}
222
}
223
224
adm_ctx->minor = d_in->minor;
225
adm_ctx->device = minor_to_device(d_in->minor);
226
227
/* We are protected by the global genl_lock().
228
* But we may explicitly drop it/retake it in drbd_adm_set_role(),
229
* so make sure this object stays around. */
230
if (adm_ctx->device)
231
kref_get(&adm_ctx->device->kref);
232
233
if (adm_ctx->resource_name) {
234
adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
235
}
236
237
if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
238
drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
239
return ERR_MINOR_INVALID;
240
}
241
if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
242
drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
243
if (adm_ctx->resource_name)
244
return ERR_RES_NOT_KNOWN;
245
return ERR_INVALID_REQUEST;
246
}
247
248
if (flags & DRBD_ADM_NEED_CONNECTION) {
249
if (adm_ctx->resource) {
250
drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
251
return ERR_INVALID_REQUEST;
252
}
253
if (adm_ctx->device) {
254
drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
255
return ERR_INVALID_REQUEST;
256
}
257
if (adm_ctx->my_addr && adm_ctx->peer_addr)
258
adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
259
nla_len(adm_ctx->my_addr),
260
nla_data(adm_ctx->peer_addr),
261
nla_len(adm_ctx->peer_addr));
262
if (!adm_ctx->connection) {
263
drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
264
return ERR_INVALID_REQUEST;
265
}
266
}
267
268
/* some more paranoia, if the request was over-determined */
269
if (adm_ctx->device && adm_ctx->resource &&
270
adm_ctx->device->resource != adm_ctx->resource) {
271
pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
272
adm_ctx->minor, adm_ctx->resource->name,
273
adm_ctx->device->resource->name);
274
drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
275
return ERR_INVALID_REQUEST;
276
}
277
if (adm_ctx->device &&
278
adm_ctx->volume != VOLUME_UNSPECIFIED &&
279
adm_ctx->volume != adm_ctx->device->vnr) {
280
pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
281
adm_ctx->minor, adm_ctx->volume,
282
adm_ctx->device->vnr, adm_ctx->device->resource->name);
283
drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
284
return ERR_INVALID_REQUEST;
285
}
286
287
/* still, provide adm_ctx->resource always, if possible. */
288
if (!adm_ctx->resource) {
289
adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
290
: adm_ctx->connection ? adm_ctx->connection->resource : NULL;
291
if (adm_ctx->resource)
292
kref_get(&adm_ctx->resource->kref);
293
}
294
295
return NO_ERROR;
296
297
fail:
298
nlmsg_free(adm_ctx->reply_skb);
299
adm_ctx->reply_skb = NULL;
300
return err;
301
}
302
303
static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
304
struct genl_info *info, int retcode)
305
{
306
if (adm_ctx->device) {
307
kref_put(&adm_ctx->device->kref, drbd_destroy_device);
308
adm_ctx->device = NULL;
309
}
310
if (adm_ctx->connection) {
311
kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
312
adm_ctx->connection = NULL;
313
}
314
if (adm_ctx->resource) {
315
kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
316
adm_ctx->resource = NULL;
317
}
318
319
if (!adm_ctx->reply_skb)
320
return -ENOMEM;
321
322
adm_ctx->reply_dh->ret_code = retcode;
323
drbd_adm_send_reply(adm_ctx->reply_skb, info);
324
return 0;
325
}
326
327
static void setup_khelper_env(struct drbd_connection *connection, char **envp)
328
{
329
char *afs;
330
331
/* FIXME: A future version will not allow this case. */
332
if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
333
return;
334
335
switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
336
case AF_INET6:
337
afs = "ipv6";
338
snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
339
&((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
340
break;
341
case AF_INET:
342
afs = "ipv4";
343
snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
344
&((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
345
break;
346
default:
347
afs = "ssocks";
348
snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
349
&((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
350
}
351
snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
352
}
353
354
int drbd_khelper(struct drbd_device *device, char *cmd)
355
{
356
char *envp[] = { "HOME=/",
357
"TERM=linux",
358
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
359
(char[20]) { }, /* address family */
360
(char[60]) { }, /* address */
361
NULL };
362
char mb[14];
363
char *argv[] = {drbd_usermode_helper, cmd, mb, NULL };
364
struct drbd_connection *connection = first_peer_device(device)->connection;
365
struct sib_info sib;
366
int ret;
367
368
if (current == connection->worker.task)
369
set_bit(CALLBACK_PENDING, &connection->flags);
370
371
snprintf(mb, 14, "minor-%d", device_to_minor(device));
372
setup_khelper_env(connection, envp);
373
374
/* The helper may take some time.
375
* write out any unsynced meta data changes now */
376
drbd_md_sync(device);
377
378
drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb);
379
sib.sib_reason = SIB_HELPER_PRE;
380
sib.helper_name = cmd;
381
drbd_bcast_event(device, &sib);
382
notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
383
ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
384
if (ret)
385
drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
386
drbd_usermode_helper, cmd, mb,
387
(ret >> 8) & 0xff, ret);
388
else
389
drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
390
drbd_usermode_helper, cmd, mb,
391
(ret >> 8) & 0xff, ret);
392
sib.sib_reason = SIB_HELPER_POST;
393
sib.helper_exit_code = ret;
394
drbd_bcast_event(device, &sib);
395
notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
396
397
if (current == connection->worker.task)
398
clear_bit(CALLBACK_PENDING, &connection->flags);
399
400
if (ret < 0) /* Ignore any ERRNOs we got. */
401
ret = 0;
402
403
return ret;
404
}
405
406
enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
407
{
408
char *envp[] = { "HOME=/",
409
"TERM=linux",
410
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
411
(char[20]) { }, /* address family */
412
(char[60]) { }, /* address */
413
NULL };
414
char *resource_name = connection->resource->name;
415
char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL };
416
int ret;
417
418
setup_khelper_env(connection, envp);
419
conn_md_sync(connection);
420
421
drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name);
422
/* TODO: conn_bcast_event() ?? */
423
notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
424
425
ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
426
if (ret)
427
drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
428
drbd_usermode_helper, cmd, resource_name,
429
(ret >> 8) & 0xff, ret);
430
else
431
drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
432
drbd_usermode_helper, cmd, resource_name,
433
(ret >> 8) & 0xff, ret);
434
/* TODO: conn_bcast_event() ?? */
435
notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
436
437
if (ret < 0) /* Ignore any ERRNOs we got. */
438
ret = 0;
439
440
return ret;
441
}
442
443
static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
444
{
445
enum drbd_fencing_p fp = FP_NOT_AVAIL;
446
struct drbd_peer_device *peer_device;
447
int vnr;
448
449
rcu_read_lock();
450
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
451
struct drbd_device *device = peer_device->device;
452
if (get_ldev_if_state(device, D_CONSISTENT)) {
453
struct disk_conf *disk_conf =
454
rcu_dereference(peer_device->device->ldev->disk_conf);
455
fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
456
put_ldev(device);
457
}
458
}
459
rcu_read_unlock();
460
461
return fp;
462
}
463
464
static bool resource_is_supended(struct drbd_resource *resource)
465
{
466
return resource->susp || resource->susp_fen || resource->susp_nod;
467
}
468
469
bool conn_try_outdate_peer(struct drbd_connection *connection)
470
{
471
struct drbd_resource * const resource = connection->resource;
472
unsigned int connect_cnt;
473
union drbd_state mask = { };
474
union drbd_state val = { };
475
enum drbd_fencing_p fp;
476
char *ex_to_string;
477
int r;
478
479
spin_lock_irq(&resource->req_lock);
480
if (connection->cstate >= C_WF_REPORT_PARAMS) {
481
drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
482
spin_unlock_irq(&resource->req_lock);
483
return false;
484
}
485
486
connect_cnt = connection->connect_cnt;
487
spin_unlock_irq(&resource->req_lock);
488
489
fp = highest_fencing_policy(connection);
490
switch (fp) {
491
case FP_NOT_AVAIL:
492
drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
493
spin_lock_irq(&resource->req_lock);
494
if (connection->cstate < C_WF_REPORT_PARAMS) {
495
_conn_request_state(connection,
496
(union drbd_state) { { .susp_fen = 1 } },
497
(union drbd_state) { { .susp_fen = 0 } },
498
CS_VERBOSE | CS_HARD | CS_DC_SUSP);
499
/* We are no longer suspended due to the fencing policy.
500
* We may still be suspended due to the on-no-data-accessible policy.
501
* If that was OND_IO_ERROR, fail pending requests. */
502
if (!resource_is_supended(resource))
503
_tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
504
}
505
/* Else: in case we raced with a connection handshake,
506
* let the handshake figure out if we maybe can RESEND,
507
* and do not resume/fail pending requests here.
508
* Worst case is we stay suspended for now, which may be
509
* resolved by either re-establishing the replication link, or
510
* the next link failure, or eventually the administrator. */
511
spin_unlock_irq(&resource->req_lock);
512
return false;
513
514
case FP_DONT_CARE:
515
return true;
516
default: ;
517
}
518
519
r = conn_khelper(connection, "fence-peer");
520
521
switch ((r>>8) & 0xff) {
522
case P_INCONSISTENT: /* peer is inconsistent */
523
ex_to_string = "peer is inconsistent or worse";
524
mask.pdsk = D_MASK;
525
val.pdsk = D_INCONSISTENT;
526
break;
527
case P_OUTDATED: /* peer got outdated, or was already outdated */
528
ex_to_string = "peer was fenced";
529
mask.pdsk = D_MASK;
530
val.pdsk = D_OUTDATED;
531
break;
532
case P_DOWN: /* peer was down */
533
if (conn_highest_disk(connection) == D_UP_TO_DATE) {
534
/* we will(have) create(d) a new UUID anyways... */
535
ex_to_string = "peer is unreachable, assumed to be dead";
536
mask.pdsk = D_MASK;
537
val.pdsk = D_OUTDATED;
538
} else {
539
ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
540
}
541
break;
542
case P_PRIMARY: /* Peer is primary, voluntarily outdate myself.
543
* This is useful when an unconnected R_SECONDARY is asked to
544
* become R_PRIMARY, but finds the other peer being active. */
545
ex_to_string = "peer is active";
546
drbd_warn(connection, "Peer is primary, outdating myself.\n");
547
mask.disk = D_MASK;
548
val.disk = D_OUTDATED;
549
break;
550
case P_FENCING:
551
/* THINK: do we need to handle this
552
* like case 4, or more like case 5? */
553
if (fp != FP_STONITH)
554
drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
555
ex_to_string = "peer was stonithed";
556
mask.pdsk = D_MASK;
557
val.pdsk = D_OUTDATED;
558
break;
559
default:
560
/* The script is broken ... */
561
drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
562
return false; /* Eventually leave IO frozen */
563
}
564
565
drbd_info(connection, "fence-peer helper returned %d (%s)\n",
566
(r>>8) & 0xff, ex_to_string);
567
568
/* Not using
569
conn_request_state(connection, mask, val, CS_VERBOSE);
570
here, because we might were able to re-establish the connection in the
571
meantime. */
572
spin_lock_irq(&resource->req_lock);
573
if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
574
if (connection->connect_cnt != connect_cnt)
575
/* In case the connection was established and droped
576
while the fence-peer handler was running, ignore it */
577
drbd_info(connection, "Ignoring fence-peer exit code\n");
578
else
579
_conn_request_state(connection, mask, val, CS_VERBOSE);
580
}
581
spin_unlock_irq(&resource->req_lock);
582
583
return conn_highest_pdsk(connection) <= D_OUTDATED;
584
}
585
586
static int _try_outdate_peer_async(void *data)
587
{
588
struct drbd_connection *connection = (struct drbd_connection *)data;
589
590
conn_try_outdate_peer(connection);
591
592
kref_put(&connection->kref, drbd_destroy_connection);
593
return 0;
594
}
595
596
void conn_try_outdate_peer_async(struct drbd_connection *connection)
597
{
598
struct task_struct *opa;
599
600
kref_get(&connection->kref);
601
/* We may have just sent a signal to this thread
602
* to get it out of some blocking network function.
603
* Clear signals; otherwise kthread_run(), which internally uses
604
* wait_on_completion_killable(), will mistake our pending signal
605
* for a new fatal signal and fail. */
606
flush_signals(current);
607
opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
608
if (IS_ERR(opa)) {
609
drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
610
kref_put(&connection->kref, drbd_destroy_connection);
611
}
612
}
613
614
enum drbd_state_rv
615
drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
616
{
617
struct drbd_peer_device *const peer_device = first_peer_device(device);
618
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
619
const int max_tries = 4;
620
enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
621
struct net_conf *nc;
622
int try = 0;
623
int forced = 0;
624
union drbd_state mask, val;
625
626
if (new_role == R_PRIMARY) {
627
struct drbd_connection *connection;
628
629
/* Detect dead peers as soon as possible. */
630
631
rcu_read_lock();
632
for_each_connection(connection, device->resource)
633
request_ping(connection);
634
rcu_read_unlock();
635
}
636
637
mutex_lock(device->state_mutex);
638
639
mask.i = 0; mask.role = R_MASK;
640
val.i = 0; val.role = new_role;
641
642
while (try++ < max_tries) {
643
rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE);
644
645
/* in case we first succeeded to outdate,
646
* but now suddenly could establish a connection */
647
if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
648
val.pdsk = 0;
649
mask.pdsk = 0;
650
continue;
651
}
652
653
if (rv == SS_NO_UP_TO_DATE_DISK && force &&
654
(device->state.disk < D_UP_TO_DATE &&
655
device->state.disk >= D_INCONSISTENT)) {
656
mask.disk = D_MASK;
657
val.disk = D_UP_TO_DATE;
658
forced = 1;
659
continue;
660
}
661
662
if (rv == SS_NO_UP_TO_DATE_DISK &&
663
device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
664
D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
665
666
if (conn_try_outdate_peer(connection)) {
667
val.disk = D_UP_TO_DATE;
668
mask.disk = D_MASK;
669
}
670
continue;
671
}
672
673
if (rv == SS_NOTHING_TO_DO)
674
goto out;
675
if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
676
if (!conn_try_outdate_peer(connection) && force) {
677
drbd_warn(device, "Forced into split brain situation!\n");
678
mask.pdsk = D_MASK;
679
val.pdsk = D_OUTDATED;
680
681
}
682
continue;
683
}
684
if (rv == SS_TWO_PRIMARIES) {
685
/* Maybe the peer is detected as dead very soon...
686
retry at most once more in this case. */
687
if (try < max_tries) {
688
int timeo;
689
try = max_tries - 1;
690
rcu_read_lock();
691
nc = rcu_dereference(connection->net_conf);
692
timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
693
rcu_read_unlock();
694
schedule_timeout_interruptible(timeo);
695
}
696
continue;
697
}
698
if (rv < SS_SUCCESS) {
699
rv = _drbd_request_state(device, mask, val,
700
CS_VERBOSE + CS_WAIT_COMPLETE);
701
if (rv < SS_SUCCESS)
702
goto out;
703
}
704
break;
705
}
706
707
if (rv < SS_SUCCESS)
708
goto out;
709
710
if (forced)
711
drbd_warn(device, "Forced to consider local data as UpToDate!\n");
712
713
/* Wait until nothing is on the fly :) */
714
wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
715
716
/* FIXME also wait for all pending P_BARRIER_ACK? */
717
718
if (new_role == R_SECONDARY) {
719
if (get_ldev(device)) {
720
device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
721
put_ldev(device);
722
}
723
} else {
724
mutex_lock(&device->resource->conf_update);
725
nc = connection->net_conf;
726
if (nc)
727
nc->discard_my_data = 0; /* without copy; single bit op is atomic */
728
mutex_unlock(&device->resource->conf_update);
729
730
if (get_ldev(device)) {
731
if (((device->state.conn < C_CONNECTED ||
732
device->state.pdsk <= D_FAILED)
733
&& device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
734
drbd_uuid_new_current(device);
735
736
device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
737
put_ldev(device);
738
}
739
}
740
741
/* writeout of activity log covered areas of the bitmap
742
* to stable storage done in after state change already */
743
744
if (device->state.conn >= C_WF_REPORT_PARAMS) {
745
/* if this was forced, we should consider sync */
746
if (forced)
747
drbd_send_uuids(peer_device);
748
drbd_send_current_state(peer_device);
749
}
750
751
drbd_md_sync(device);
752
set_disk_ro(device->vdisk, new_role == R_SECONDARY);
753
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
754
out:
755
mutex_unlock(device->state_mutex);
756
return rv;
757
}
758
759
static const char *from_attrs_err_to_txt(int err)
760
{
761
return err == -ENOMSG ? "required attribute missing" :
762
err == -EOPNOTSUPP ? "unknown mandatory attribute" :
763
err == -EEXIST ? "can not change invariant setting" :
764
"invalid attribute value";
765
}
766
767
int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
768
{
769
struct drbd_config_context adm_ctx;
770
struct set_role_parms parms;
771
int err;
772
enum drbd_ret_code retcode;
773
enum drbd_state_rv rv;
774
775
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
776
if (!adm_ctx.reply_skb)
777
return retcode;
778
if (retcode != NO_ERROR)
779
goto out;
780
781
memset(&parms, 0, sizeof(parms));
782
if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
783
err = set_role_parms_from_attrs(&parms, info);
784
if (err) {
785
retcode = ERR_MANDATORY_TAG;
786
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
787
goto out;
788
}
789
}
790
genl_unlock();
791
mutex_lock(&adm_ctx.resource->adm_mutex);
792
793
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
794
rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
795
else
796
rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
797
798
mutex_unlock(&adm_ctx.resource->adm_mutex);
799
genl_lock();
800
drbd_adm_finish(&adm_ctx, info, rv);
801
return 0;
802
out:
803
drbd_adm_finish(&adm_ctx, info, retcode);
804
return 0;
805
}
806
807
/* Initializes the md.*_offset members, so we are able to find
808
* the on disk meta data.
809
*
810
* We currently have two possible layouts:
811
* external:
812
* |----------- md_size_sect ------------------|
813
* [ 4k superblock ][ activity log ][ Bitmap ]
814
* | al_offset == 8 |
815
* | bm_offset = al_offset + X |
816
* ==> bitmap sectors = md_size_sect - bm_offset
817
*
818
* internal:
819
* |----------- md_size_sect ------------------|
820
* [data.....][ Bitmap ][ activity log ][ 4k superblock ]
821
* | al_offset < 0 |
822
* | bm_offset = al_offset - Y |
823
* ==> bitmap sectors = Y = al_offset - bm_offset
824
*
825
* Activity log size used to be fixed 32kB,
826
* but is about to become configurable.
827
*/
828
static void drbd_md_set_sector_offsets(struct drbd_device *device,
829
struct drbd_backing_dev *bdev)
830
{
831
sector_t md_size_sect = 0;
832
unsigned int al_size_sect = bdev->md.al_size_4k * 8;
833
834
bdev->md.md_offset = drbd_md_ss(bdev);
835
836
switch (bdev->md.meta_dev_idx) {
837
default:
838
/* v07 style fixed size indexed meta data */
839
bdev->md.md_size_sect = MD_128MB_SECT;
840
bdev->md.al_offset = MD_4kB_SECT;
841
bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
842
break;
843
case DRBD_MD_INDEX_FLEX_EXT:
844
/* just occupy the full device; unit: sectors */
845
bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
846
bdev->md.al_offset = MD_4kB_SECT;
847
bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
848
break;
849
case DRBD_MD_INDEX_INTERNAL:
850
case DRBD_MD_INDEX_FLEX_INT:
851
/* al size is still fixed */
852
bdev->md.al_offset = -al_size_sect;
853
/* we need (slightly less than) ~ this much bitmap sectors: */
854
md_size_sect = drbd_get_capacity(bdev->backing_bdev);
855
md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
856
md_size_sect = BM_SECT_TO_EXT(md_size_sect);
857
md_size_sect = ALIGN(md_size_sect, 8);
858
859
/* plus the "drbd meta data super block",
860
* and the activity log; */
861
md_size_sect += MD_4kB_SECT + al_size_sect;
862
863
bdev->md.md_size_sect = md_size_sect;
864
/* bitmap offset is adjusted by 'super' block size */
865
bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT;
866
break;
867
}
868
}
869
870
/* input size is expected to be in KB */
871
char *ppsize(char *buf, unsigned long long size)
872
{
873
/* Needs 9 bytes at max including trailing NUL:
874
* -1ULL ==> "16384 EB" */
875
static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
876
int base = 0;
877
while (size >= 10000 && base < sizeof(units)-1) {
878
/* shift + round */
879
size = (size >> 10) + !!(size & (1<<9));
880
base++;
881
}
882
sprintf(buf, "%u %cB", (unsigned)size, units[base]);
883
884
return buf;
885
}
886
887
/* there is still a theoretical deadlock when called from receiver
888
* on an D_INCONSISTENT R_PRIMARY:
889
* remote READ does inc_ap_bio, receiver would need to receive answer
890
* packet from remote to dec_ap_bio again.
891
* receiver receive_sizes(), comes here,
892
* waits for ap_bio_cnt == 0. -> deadlock.
893
* but this cannot happen, actually, because:
894
* R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
895
* (not connected, or bad/no disk on peer):
896
* see drbd_fail_request_early, ap_bio_cnt is zero.
897
* R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
898
* peer may not initiate a resize.
899
*/
900
/* Note these are not to be confused with
901
* drbd_adm_suspend_io/drbd_adm_resume_io,
902
* which are (sub) state changes triggered by admin (drbdsetup),
903
* and can be long lived.
904
* This changes an device->flag, is triggered by drbd internals,
905
* and should be short-lived. */
906
/* It needs to be a counter, since multiple threads might
907
independently suspend and resume IO. */
908
void drbd_suspend_io(struct drbd_device *device)
909
{
910
atomic_inc(&device->suspend_cnt);
911
if (drbd_suspended(device))
912
return;
913
wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
914
}
915
916
void drbd_resume_io(struct drbd_device *device)
917
{
918
if (atomic_dec_and_test(&device->suspend_cnt))
919
wake_up(&device->misc_wait);
920
}
921
922
/*
923
* drbd_determine_dev_size() - Sets the right device size obeying all constraints
924
* @device: DRBD device.
925
*
926
* Returns 0 on success, negative return values indicate errors.
927
* You should call drbd_md_sync() after calling this function.
928
*/
929
enum determine_dev_size
930
drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
931
{
932
struct md_offsets_and_sizes {
933
u64 last_agreed_sect;
934
u64 md_offset;
935
s32 al_offset;
936
s32 bm_offset;
937
u32 md_size_sect;
938
939
u32 al_stripes;
940
u32 al_stripe_size_4k;
941
} prev;
942
sector_t u_size, size;
943
struct drbd_md *md = &device->ldev->md;
944
void *buffer;
945
946
int md_moved, la_size_changed;
947
enum determine_dev_size rv = DS_UNCHANGED;
948
949
/* We may change the on-disk offsets of our meta data below. Lock out
950
* anything that may cause meta data IO, to avoid acting on incomplete
951
* layout changes or scribbling over meta data that is in the process
952
* of being moved.
953
*
954
* Move is not exactly correct, btw, currently we have all our meta
955
* data in core memory, to "move" it we just write it all out, there
956
* are no reads. */
957
drbd_suspend_io(device);
958
buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
959
if (!buffer) {
960
drbd_resume_io(device);
961
return DS_ERROR;
962
}
963
964
/* remember current offset and sizes */
965
prev.last_agreed_sect = md->la_size_sect;
966
prev.md_offset = md->md_offset;
967
prev.al_offset = md->al_offset;
968
prev.bm_offset = md->bm_offset;
969
prev.md_size_sect = md->md_size_sect;
970
prev.al_stripes = md->al_stripes;
971
prev.al_stripe_size_4k = md->al_stripe_size_4k;
972
973
if (rs) {
974
/* rs is non NULL if we should change the AL layout only */
975
md->al_stripes = rs->al_stripes;
976
md->al_stripe_size_4k = rs->al_stripe_size / 4;
977
md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
978
}
979
980
drbd_md_set_sector_offsets(device, device->ldev);
981
982
rcu_read_lock();
983
u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
984
rcu_read_unlock();
985
size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
986
987
if (size < prev.last_agreed_sect) {
988
if (rs && u_size == 0) {
989
/* Remove "rs &&" later. This check should always be active, but
990
right now the receiver expects the permissive behavior */
991
drbd_warn(device, "Implicit shrink not allowed. "
992
"Use --size=%llus for explicit shrink.\n",
993
(unsigned long long)size);
994
rv = DS_ERROR_SHRINK;
995
}
996
if (u_size > size)
997
rv = DS_ERROR_SPACE_MD;
998
if (rv != DS_UNCHANGED)
999
goto err_out;
1000
}
1001
1002
if (get_capacity(device->vdisk) != size ||
1003
drbd_bm_capacity(device) != size) {
1004
int err;
1005
err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
1006
if (unlikely(err)) {
1007
/* currently there is only one error: ENOMEM! */
1008
size = drbd_bm_capacity(device);
1009
if (size == 0) {
1010
drbd_err(device, "OUT OF MEMORY! "
1011
"Could not allocate bitmap!\n");
1012
} else {
1013
drbd_err(device, "BM resizing failed. "
1014
"Leaving size unchanged\n");
1015
}
1016
rv = DS_ERROR;
1017
}
1018
/* racy, see comments above. */
1019
drbd_set_my_capacity(device, size);
1020
md->la_size_sect = size;
1021
}
1022
if (rv <= DS_ERROR)
1023
goto err_out;
1024
1025
la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
1026
1027
md_moved = prev.md_offset != md->md_offset
1028
|| prev.md_size_sect != md->md_size_sect;
1029
1030
if (la_size_changed || md_moved || rs) {
1031
u32 prev_flags;
1032
1033
/* We do some synchronous IO below, which may take some time.
1034
* Clear the timer, to avoid scary "timer expired!" messages,
1035
* "Superblock" is written out at least twice below, anyways. */
1036
timer_delete(&device->md_sync_timer);
1037
1038
/* We won't change the "al-extents" setting, we just may need
1039
* to move the on-disk location of the activity log ringbuffer.
1040
* Lock for transaction is good enough, it may well be "dirty"
1041
* or even "starving". */
1042
wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
1043
1044
/* mark current on-disk bitmap and activity log as unreliable */
1045
prev_flags = md->flags;
1046
md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
1047
drbd_md_write(device, buffer);
1048
1049
drbd_al_initialize(device, buffer);
1050
1051
drbd_info(device, "Writing the whole bitmap, %s\n",
1052
la_size_changed && md_moved ? "size changed and md moved" :
1053
la_size_changed ? "size changed" : "md moved");
1054
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
1055
drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
1056
"size changed", BM_LOCKED_MASK, NULL);
1057
1058
/* on-disk bitmap and activity log is authoritative again
1059
* (unless there was an IO error meanwhile...) */
1060
md->flags = prev_flags;
1061
drbd_md_write(device, buffer);
1062
1063
if (rs)
1064
drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
1065
md->al_stripes, md->al_stripe_size_4k * 4);
1066
}
1067
1068
if (size > prev.last_agreed_sect)
1069
rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
1070
if (size < prev.last_agreed_sect)
1071
rv = DS_SHRUNK;
1072
1073
if (0) {
1074
err_out:
1075
/* restore previous offset and sizes */
1076
md->la_size_sect = prev.last_agreed_sect;
1077
md->md_offset = prev.md_offset;
1078
md->al_offset = prev.al_offset;
1079
md->bm_offset = prev.bm_offset;
1080
md->md_size_sect = prev.md_size_sect;
1081
md->al_stripes = prev.al_stripes;
1082
md->al_stripe_size_4k = prev.al_stripe_size_4k;
1083
md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
1084
}
1085
lc_unlock(device->act_log);
1086
wake_up(&device->al_wait);
1087
drbd_md_put_buffer(device);
1088
drbd_resume_io(device);
1089
1090
return rv;
1091
}
1092
1093
sector_t
1094
drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1095
sector_t u_size, int assume_peer_has_space)
1096
{
1097
sector_t p_size = device->p_size; /* partner's disk size. */
1098
sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1099
sector_t m_size; /* my size */
1100
sector_t size = 0;
1101
1102
m_size = drbd_get_max_capacity(bdev);
1103
1104
if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1105
drbd_warn(device, "Resize while not connected was forced by the user!\n");
1106
p_size = m_size;
1107
}
1108
1109
if (p_size && m_size) {
1110
size = min_t(sector_t, p_size, m_size);
1111
} else {
1112
if (la_size_sect) {
1113
size = la_size_sect;
1114
if (m_size && m_size < size)
1115
size = m_size;
1116
if (p_size && p_size < size)
1117
size = p_size;
1118
} else {
1119
if (m_size)
1120
size = m_size;
1121
if (p_size)
1122
size = p_size;
1123
}
1124
}
1125
1126
if (size == 0)
1127
drbd_err(device, "Both nodes diskless!\n");
1128
1129
if (u_size) {
1130
if (u_size > size)
1131
drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1132
(unsigned long)u_size>>1, (unsigned long)size>>1);
1133
else
1134
size = u_size;
1135
}
1136
1137
return size;
1138
}
1139
1140
/*
1141
* drbd_check_al_size() - Ensures that the AL is of the right size
1142
* @device: DRBD device.
1143
*
1144
* Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1145
* failed, and 0 on success. You should call drbd_md_sync() after you called
1146
* this function.
1147
*/
1148
static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1149
{
1150
struct lru_cache *n, *t;
1151
struct lc_element *e;
1152
unsigned int in_use;
1153
int i;
1154
1155
if (device->act_log &&
1156
device->act_log->nr_elements == dc->al_extents)
1157
return 0;
1158
1159
in_use = 0;
1160
t = device->act_log;
1161
n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1162
dc->al_extents, sizeof(struct lc_element), 0);
1163
1164
if (n == NULL) {
1165
drbd_err(device, "Cannot allocate act_log lru!\n");
1166
return -ENOMEM;
1167
}
1168
spin_lock_irq(&device->al_lock);
1169
if (t) {
1170
for (i = 0; i < t->nr_elements; i++) {
1171
e = lc_element_by_index(t, i);
1172
if (e->refcnt)
1173
drbd_err(device, "refcnt(%d)==%d\n",
1174
e->lc_number, e->refcnt);
1175
in_use += e->refcnt;
1176
}
1177
}
1178
if (!in_use)
1179
device->act_log = n;
1180
spin_unlock_irq(&device->al_lock);
1181
if (in_use) {
1182
drbd_err(device, "Activity log still in use!\n");
1183
lc_destroy(n);
1184
return -EBUSY;
1185
} else {
1186
lc_destroy(t);
1187
}
1188
drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1189
return 0;
1190
}
1191
1192
static unsigned int drbd_max_peer_bio_size(struct drbd_device *device)
1193
{
1194
/*
1195
* We may ignore peer limits if the peer is modern enough. From 8.3.8
1196
* onwards the peer can use multiple BIOs for a single peer_request.
1197
*/
1198
if (device->state.conn < C_WF_REPORT_PARAMS)
1199
return device->peer_max_bio_size;
1200
1201
if (first_peer_device(device)->connection->agreed_pro_version < 94)
1202
return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1203
1204
/*
1205
* Correct old drbd (up to 8.3.7) if it believes it can do more than
1206
* 32KiB.
1207
*/
1208
if (first_peer_device(device)->connection->agreed_pro_version == 94)
1209
return DRBD_MAX_SIZE_H80_PACKET;
1210
1211
/*
1212
* drbd 8.3.8 onwards, before 8.4.0
1213
*/
1214
if (first_peer_device(device)->connection->agreed_pro_version < 100)
1215
return DRBD_MAX_BIO_SIZE_P95;
1216
return DRBD_MAX_BIO_SIZE;
1217
}
1218
1219
static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
1220
{
1221
/* when we introduced REQ_WRITE_SAME support, we also bumped
1222
* our maximum supported batch bio size used for discards. */
1223
if (connection->agreed_features & DRBD_FF_WSAME)
1224
return DRBD_MAX_BBIO_SECTORS;
1225
/* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
1226
return AL_EXTENT_SIZE >> 9;
1227
}
1228
1229
static bool drbd_discard_supported(struct drbd_connection *connection,
1230
struct drbd_backing_dev *bdev)
1231
{
1232
if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
1233
return false;
1234
1235
if (connection->cstate >= C_CONNECTED &&
1236
!(connection->agreed_features & DRBD_FF_TRIM)) {
1237
drbd_info(connection,
1238
"peer DRBD too old, does not support TRIM: disabling discards\n");
1239
return false;
1240
}
1241
1242
return true;
1243
}
1244
1245
/* This is the workaround for "bio would need to, but cannot, be split" */
1246
static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device)
1247
{
1248
unsigned int max_segments;
1249
1250
rcu_read_lock();
1251
max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1252
rcu_read_unlock();
1253
1254
if (!max_segments)
1255
return BLK_MAX_SEGMENTS;
1256
return max_segments;
1257
}
1258
1259
void drbd_reconsider_queue_parameters(struct drbd_device *device,
1260
struct drbd_backing_dev *bdev, struct o_qlim *o)
1261
{
1262
struct drbd_connection *connection =
1263
first_peer_device(device)->connection;
1264
struct request_queue * const q = device->rq_queue;
1265
unsigned int now = queue_max_hw_sectors(q) << 9;
1266
struct queue_limits lim;
1267
struct request_queue *b = NULL;
1268
unsigned int new;
1269
1270
if (bdev) {
1271
b = bdev->backing_bdev->bd_disk->queue;
1272
1273
device->local_max_bio_size =
1274
queue_max_hw_sectors(b) << SECTOR_SHIFT;
1275
}
1276
1277
/*
1278
* We may later detach and re-attach on a disconnected Primary. Avoid
1279
* decreasing the value in this case.
1280
*
1281
* We want to store what we know the peer DRBD can handle, not what the
1282
* peer IO backend can handle.
1283
*/
1284
new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size,
1285
max(drbd_max_peer_bio_size(device), device->peer_max_bio_size));
1286
if (new != now) {
1287
if (device->state.role == R_PRIMARY && new < now)
1288
drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n",
1289
new, now);
1290
drbd_info(device, "max BIO size = %u\n", new);
1291
}
1292
1293
lim = queue_limits_start_update(q);
1294
if (bdev) {
1295
blk_set_stacking_limits(&lim);
1296
lim.max_segments = drbd_backing_dev_max_segments(device);
1297
} else {
1298
lim.max_segments = BLK_MAX_SEGMENTS;
1299
}
1300
1301
lim.max_hw_sectors = new >> SECTOR_SHIFT;
1302
lim.seg_boundary_mask = PAGE_SIZE - 1;
1303
1304
/*
1305
* We don't care for the granularity, really.
1306
*
1307
* Stacking limits below should fix it for the local device. Whether or
1308
* not it is a suitable granularity on the remote device is not our
1309
* problem, really. If you care, you need to use devices with similar
1310
* topology on all peers.
1311
*/
1312
if (drbd_discard_supported(connection, bdev)) {
1313
lim.discard_granularity = 512;
1314
lim.max_hw_discard_sectors =
1315
drbd_max_discard_sectors(connection);
1316
} else {
1317
lim.discard_granularity = 0;
1318
lim.max_hw_discard_sectors = 0;
1319
}
1320
1321
if (bdev)
1322
blk_stack_limits(&lim, &b->limits, 0);
1323
1324
/*
1325
* If we can handle "zeroes" efficiently on the protocol, we want to do
1326
* that, even if our backend does not announce max_write_zeroes_sectors
1327
* itself.
1328
*/
1329
if (connection->agreed_features & DRBD_FF_WZEROES)
1330
lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
1331
else
1332
lim.max_write_zeroes_sectors = 0;
1333
lim.max_hw_wzeroes_unmap_sectors = 0;
1334
1335
if ((lim.discard_granularity >> SECTOR_SHIFT) >
1336
lim.max_hw_discard_sectors) {
1337
lim.discard_granularity = 0;
1338
lim.max_hw_discard_sectors = 0;
1339
}
1340
1341
if (queue_limits_commit_update(q, &lim))
1342
drbd_err(device, "setting new queue limits failed\n");
1343
}
1344
1345
/* Starts the worker thread */
1346
static void conn_reconfig_start(struct drbd_connection *connection)
1347
{
1348
drbd_thread_start(&connection->worker);
1349
drbd_flush_workqueue(&connection->sender_work);
1350
}
1351
1352
/* if still unconfigured, stops worker again. */
1353
static void conn_reconfig_done(struct drbd_connection *connection)
1354
{
1355
bool stop_threads;
1356
spin_lock_irq(&connection->resource->req_lock);
1357
stop_threads = conn_all_vols_unconf(connection) &&
1358
connection->cstate == C_STANDALONE;
1359
spin_unlock_irq(&connection->resource->req_lock);
1360
if (stop_threads) {
1361
/* ack_receiver thread and ack_sender workqueue are implicitly
1362
* stopped by receiver in conn_disconnect() */
1363
drbd_thread_stop(&connection->receiver);
1364
drbd_thread_stop(&connection->worker);
1365
}
1366
}
1367
1368
/* Make sure IO is suspended before calling this function(). */
1369
static void drbd_suspend_al(struct drbd_device *device)
1370
{
1371
int s = 0;
1372
1373
if (!lc_try_lock(device->act_log)) {
1374
drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1375
return;
1376
}
1377
1378
drbd_al_shrink(device);
1379
spin_lock_irq(&device->resource->req_lock);
1380
if (device->state.conn < C_CONNECTED)
1381
s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1382
spin_unlock_irq(&device->resource->req_lock);
1383
lc_unlock(device->act_log);
1384
1385
if (s)
1386
drbd_info(device, "Suspended AL updates\n");
1387
}
1388
1389
1390
static bool should_set_defaults(struct genl_info *info)
1391
{
1392
struct drbd_genlmsghdr *dh = genl_info_userhdr(info);
1393
1394
return 0 != (dh->flags & DRBD_GENL_F_SET_DEFAULTS);
1395
}
1396
1397
static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1398
{
1399
/* This is limited by 16 bit "slot" numbers,
1400
* and by available on-disk context storage.
1401
*
1402
* Also (u16)~0 is special (denotes a "free" extent).
1403
*
1404
* One transaction occupies one 4kB on-disk block,
1405
* we have n such blocks in the on disk ring buffer,
1406
* the "current" transaction may fail (n-1),
1407
* and there is 919 slot numbers context information per transaction.
1408
*
1409
* 72 transaction blocks amounts to more than 2**16 context slots,
1410
* so cap there first.
1411
*/
1412
const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1413
const unsigned int sufficient_on_disk =
1414
(max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1415
/AL_CONTEXT_PER_TRANSACTION;
1416
1417
unsigned int al_size_4k = bdev->md.al_size_4k;
1418
1419
if (al_size_4k > sufficient_on_disk)
1420
return max_al_nr;
1421
1422
return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1423
}
1424
1425
static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1426
{
1427
return a->disk_barrier != b->disk_barrier ||
1428
a->disk_flushes != b->disk_flushes ||
1429
a->disk_drain != b->disk_drain;
1430
}
1431
1432
static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
1433
struct drbd_backing_dev *nbc)
1434
{
1435
struct block_device *bdev = nbc->backing_bdev;
1436
1437
if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1438
disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1439
if (disk_conf->al_extents > drbd_al_extents_max(nbc))
1440
disk_conf->al_extents = drbd_al_extents_max(nbc);
1441
1442
if (!bdev_max_discard_sectors(bdev)) {
1443
if (disk_conf->rs_discard_granularity) {
1444
disk_conf->rs_discard_granularity = 0; /* disable feature */
1445
drbd_info(device, "rs_discard_granularity feature disabled\n");
1446
}
1447
}
1448
1449
if (disk_conf->rs_discard_granularity) {
1450
int orig_value = disk_conf->rs_discard_granularity;
1451
sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
1452
unsigned int discard_granularity = bdev_discard_granularity(bdev);
1453
int remainder;
1454
1455
if (discard_granularity > disk_conf->rs_discard_granularity)
1456
disk_conf->rs_discard_granularity = discard_granularity;
1457
1458
remainder = disk_conf->rs_discard_granularity %
1459
discard_granularity;
1460
disk_conf->rs_discard_granularity += remainder;
1461
1462
if (disk_conf->rs_discard_granularity > discard_size)
1463
disk_conf->rs_discard_granularity = discard_size;
1464
1465
if (disk_conf->rs_discard_granularity != orig_value)
1466
drbd_info(device, "rs_discard_granularity changed to %d\n",
1467
disk_conf->rs_discard_granularity);
1468
}
1469
}
1470
1471
static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1472
{
1473
int err = -EBUSY;
1474
1475
if (device->act_log &&
1476
device->act_log->nr_elements == dc->al_extents)
1477
return 0;
1478
1479
drbd_suspend_io(device);
1480
/* If IO completion is currently blocked, we would likely wait
1481
* "forever" for the activity log to become unused. So we don't. */
1482
if (atomic_read(&device->ap_bio_cnt))
1483
goto out;
1484
1485
wait_event(device->al_wait, lc_try_lock(device->act_log));
1486
drbd_al_shrink(device);
1487
err = drbd_check_al_size(device, dc);
1488
lc_unlock(device->act_log);
1489
wake_up(&device->al_wait);
1490
out:
1491
drbd_resume_io(device);
1492
return err;
1493
}
1494
1495
int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1496
{
1497
struct drbd_config_context adm_ctx;
1498
enum drbd_ret_code retcode;
1499
struct drbd_device *device;
1500
struct disk_conf *new_disk_conf, *old_disk_conf;
1501
struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1502
int err;
1503
unsigned int fifo_size;
1504
1505
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1506
if (!adm_ctx.reply_skb)
1507
return retcode;
1508
if (retcode != NO_ERROR)
1509
goto finish;
1510
1511
device = adm_ctx.device;
1512
mutex_lock(&adm_ctx.resource->adm_mutex);
1513
1514
/* we also need a disk
1515
* to change the options on */
1516
if (!get_ldev(device)) {
1517
retcode = ERR_NO_DISK;
1518
goto out;
1519
}
1520
1521
new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1522
if (!new_disk_conf) {
1523
retcode = ERR_NOMEM;
1524
goto fail;
1525
}
1526
1527
mutex_lock(&device->resource->conf_update);
1528
old_disk_conf = device->ldev->disk_conf;
1529
*new_disk_conf = *old_disk_conf;
1530
if (should_set_defaults(info))
1531
set_disk_conf_defaults(new_disk_conf);
1532
1533
err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1534
if (err && err != -ENOMSG) {
1535
retcode = ERR_MANDATORY_TAG;
1536
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1537
goto fail_unlock;
1538
}
1539
1540
if (!expect(device, new_disk_conf->resync_rate >= 1))
1541
new_disk_conf->resync_rate = 1;
1542
1543
sanitize_disk_conf(device, new_disk_conf, device->ldev);
1544
1545
if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1546
new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1547
1548
fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1549
if (fifo_size != device->rs_plan_s->size) {
1550
new_plan = fifo_alloc(fifo_size);
1551
if (!new_plan) {
1552
drbd_err(device, "kmalloc of fifo_buffer failed");
1553
retcode = ERR_NOMEM;
1554
goto fail_unlock;
1555
}
1556
}
1557
1558
err = disk_opts_check_al_size(device, new_disk_conf);
1559
if (err) {
1560
/* Could be just "busy". Ignore?
1561
* Introduce dedicated error code? */
1562
drbd_msg_put_info(adm_ctx.reply_skb,
1563
"Try again without changing current al-extents setting");
1564
retcode = ERR_NOMEM;
1565
goto fail_unlock;
1566
}
1567
1568
lock_all_resources();
1569
retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1570
if (retcode == NO_ERROR) {
1571
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1572
drbd_resync_after_changed(device);
1573
}
1574
unlock_all_resources();
1575
1576
if (retcode != NO_ERROR)
1577
goto fail_unlock;
1578
1579
if (new_plan) {
1580
old_plan = device->rs_plan_s;
1581
rcu_assign_pointer(device->rs_plan_s, new_plan);
1582
}
1583
1584
mutex_unlock(&device->resource->conf_update);
1585
1586
if (new_disk_conf->al_updates)
1587
device->ldev->md.flags &= ~MDF_AL_DISABLED;
1588
else
1589
device->ldev->md.flags |= MDF_AL_DISABLED;
1590
1591
if (new_disk_conf->md_flushes)
1592
clear_bit(MD_NO_FUA, &device->flags);
1593
else
1594
set_bit(MD_NO_FUA, &device->flags);
1595
1596
if (write_ordering_changed(old_disk_conf, new_disk_conf))
1597
drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
1598
1599
if (old_disk_conf->discard_zeroes_if_aligned !=
1600
new_disk_conf->discard_zeroes_if_aligned)
1601
drbd_reconsider_queue_parameters(device, device->ldev, NULL);
1602
1603
drbd_md_sync(device);
1604
1605
if (device->state.conn >= C_CONNECTED) {
1606
struct drbd_peer_device *peer_device;
1607
1608
for_each_peer_device(peer_device, device)
1609
drbd_send_sync_param(peer_device);
1610
}
1611
1612
kvfree_rcu_mightsleep(old_disk_conf);
1613
kfree(old_plan);
1614
mod_timer(&device->request_timer, jiffies + HZ);
1615
goto success;
1616
1617
fail_unlock:
1618
mutex_unlock(&device->resource->conf_update);
1619
fail:
1620
kfree(new_disk_conf);
1621
kfree(new_plan);
1622
success:
1623
put_ldev(device);
1624
out:
1625
mutex_unlock(&adm_ctx.resource->adm_mutex);
1626
finish:
1627
drbd_adm_finish(&adm_ctx, info, retcode);
1628
return 0;
1629
}
1630
1631
static struct file *open_backing_dev(struct drbd_device *device,
1632
const char *bdev_path, void *claim_ptr, bool do_bd_link)
1633
{
1634
struct file *file;
1635
int err = 0;
1636
1637
file = bdev_file_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
1638
claim_ptr, NULL);
1639
if (IS_ERR(file)) {
1640
drbd_err(device, "open(\"%s\") failed with %ld\n",
1641
bdev_path, PTR_ERR(file));
1642
return file;
1643
}
1644
1645
if (!do_bd_link)
1646
return file;
1647
1648
err = bd_link_disk_holder(file_bdev(file), device->vdisk);
1649
if (err) {
1650
fput(file);
1651
drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
1652
bdev_path, err);
1653
file = ERR_PTR(err);
1654
}
1655
return file;
1656
}
1657
1658
static int open_backing_devices(struct drbd_device *device,
1659
struct disk_conf *new_disk_conf,
1660
struct drbd_backing_dev *nbc)
1661
{
1662
struct file *file;
1663
1664
file = open_backing_dev(device, new_disk_conf->backing_dev, device,
1665
true);
1666
if (IS_ERR(file))
1667
return ERR_OPEN_DISK;
1668
nbc->backing_bdev = file_bdev(file);
1669
nbc->backing_bdev_file = file;
1670
1671
/*
1672
* meta_dev_idx >= 0: external fixed size, possibly multiple
1673
* drbd sharing one meta device. TODO in that case, paranoia
1674
* check that [md_bdev, meta_dev_idx] is not yet used by some
1675
* other drbd minor! (if you use drbd.conf + drbdadm, that
1676
* should check it for you already; but if you don't, or
1677
* someone fooled it, we need to double check here)
1678
*/
1679
file = open_backing_dev(device, new_disk_conf->meta_dev,
1680
/* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
1681
* if potentially shared with other drbd minors */
1682
(new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
1683
/* avoid double bd_claim_by_disk() for the same (source,target) tuple,
1684
* as would happen with internal metadata. */
1685
(new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
1686
new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
1687
if (IS_ERR(file))
1688
return ERR_OPEN_MD_DISK;
1689
nbc->md_bdev = file_bdev(file);
1690
nbc->f_md_bdev = file;
1691
return NO_ERROR;
1692
}
1693
1694
static void close_backing_dev(struct drbd_device *device,
1695
struct file *bdev_file, bool do_bd_unlink)
1696
{
1697
if (!bdev_file)
1698
return;
1699
if (do_bd_unlink)
1700
bd_unlink_disk_holder(file_bdev(bdev_file), device->vdisk);
1701
fput(bdev_file);
1702
}
1703
1704
void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
1705
{
1706
if (ldev == NULL)
1707
return;
1708
1709
close_backing_dev(device, ldev->f_md_bdev,
1710
ldev->md_bdev != ldev->backing_bdev);
1711
close_backing_dev(device, ldev->backing_bdev_file, true);
1712
1713
kfree(ldev->disk_conf);
1714
kfree(ldev);
1715
}
1716
1717
int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1718
{
1719
struct drbd_config_context adm_ctx;
1720
struct drbd_device *device;
1721
struct drbd_peer_device *peer_device;
1722
struct drbd_connection *connection;
1723
int err;
1724
enum drbd_ret_code retcode;
1725
enum determine_dev_size dd;
1726
sector_t max_possible_sectors;
1727
sector_t min_md_device_sectors;
1728
struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1729
struct disk_conf *new_disk_conf = NULL;
1730
struct lru_cache *resync_lru = NULL;
1731
struct fifo_buffer *new_plan = NULL;
1732
union drbd_state ns, os;
1733
enum drbd_state_rv rv;
1734
struct net_conf *nc;
1735
1736
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1737
if (!adm_ctx.reply_skb)
1738
return retcode;
1739
if (retcode != NO_ERROR)
1740
goto finish;
1741
1742
device = adm_ctx.device;
1743
mutex_lock(&adm_ctx.resource->adm_mutex);
1744
peer_device = first_peer_device(device);
1745
connection = peer_device->connection;
1746
conn_reconfig_start(connection);
1747
1748
/* if you want to reconfigure, please tear down first */
1749
if (device->state.disk > D_DISKLESS) {
1750
retcode = ERR_DISK_CONFIGURED;
1751
goto fail;
1752
}
1753
/* It may just now have detached because of IO error. Make sure
1754
* drbd_ldev_destroy is done already, we may end up here very fast,
1755
* e.g. if someone calls attach from the on-io-error handler,
1756
* to realize a "hot spare" feature (not that I'd recommend that) */
1757
wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
1758
1759
/* make sure there is no leftover from previous force-detach attempts */
1760
clear_bit(FORCE_DETACH, &device->flags);
1761
clear_bit(WAS_IO_ERROR, &device->flags);
1762
clear_bit(WAS_READ_ERROR, &device->flags);
1763
1764
/* and no leftover from previously aborted resync or verify, either */
1765
device->rs_total = 0;
1766
device->rs_failed = 0;
1767
atomic_set(&device->rs_pending_cnt, 0);
1768
1769
/* allocation not in the IO path, drbdsetup context */
1770
nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1771
if (!nbc) {
1772
retcode = ERR_NOMEM;
1773
goto fail;
1774
}
1775
spin_lock_init(&nbc->md.uuid_lock);
1776
1777
new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1778
if (!new_disk_conf) {
1779
retcode = ERR_NOMEM;
1780
goto fail;
1781
}
1782
nbc->disk_conf = new_disk_conf;
1783
1784
set_disk_conf_defaults(new_disk_conf);
1785
err = disk_conf_from_attrs(new_disk_conf, info);
1786
if (err) {
1787
retcode = ERR_MANDATORY_TAG;
1788
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1789
goto fail;
1790
}
1791
1792
if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1793
new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1794
1795
new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1796
if (!new_plan) {
1797
retcode = ERR_NOMEM;
1798
goto fail;
1799
}
1800
1801
if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1802
retcode = ERR_MD_IDX_INVALID;
1803
goto fail;
1804
}
1805
1806
rcu_read_lock();
1807
nc = rcu_dereference(connection->net_conf);
1808
if (nc) {
1809
if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1810
rcu_read_unlock();
1811
retcode = ERR_STONITH_AND_PROT_A;
1812
goto fail;
1813
}
1814
}
1815
rcu_read_unlock();
1816
1817
retcode = open_backing_devices(device, new_disk_conf, nbc);
1818
if (retcode != NO_ERROR)
1819
goto fail;
1820
1821
if ((nbc->backing_bdev == nbc->md_bdev) !=
1822
(new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1823
new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1824
retcode = ERR_MD_IDX_INVALID;
1825
goto fail;
1826
}
1827
1828
resync_lru = lc_create("resync", drbd_bm_ext_cache,
1829
1, 61, sizeof(struct bm_extent),
1830
offsetof(struct bm_extent, lce));
1831
if (!resync_lru) {
1832
retcode = ERR_NOMEM;
1833
goto fail;
1834
}
1835
1836
/* Read our meta data super block early.
1837
* This also sets other on-disk offsets. */
1838
retcode = drbd_md_read(device, nbc);
1839
if (retcode != NO_ERROR)
1840
goto fail;
1841
1842
sanitize_disk_conf(device, new_disk_conf, nbc);
1843
1844
if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1845
drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1846
(unsigned long long) drbd_get_max_capacity(nbc),
1847
(unsigned long long) new_disk_conf->disk_size);
1848
retcode = ERR_DISK_TOO_SMALL;
1849
goto fail;
1850
}
1851
1852
if (new_disk_conf->meta_dev_idx < 0) {
1853
max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1854
/* at least one MB, otherwise it does not make sense */
1855
min_md_device_sectors = (2<<10);
1856
} else {
1857
max_possible_sectors = DRBD_MAX_SECTORS;
1858
min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1859
}
1860
1861
if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1862
retcode = ERR_MD_DISK_TOO_SMALL;
1863
drbd_warn(device, "refusing attach: md-device too small, "
1864
"at least %llu sectors needed for this meta-disk type\n",
1865
(unsigned long long) min_md_device_sectors);
1866
goto fail;
1867
}
1868
1869
/* Make sure the new disk is big enough
1870
* (we may currently be R_PRIMARY with no local disk...) */
1871
if (drbd_get_max_capacity(nbc) < get_capacity(device->vdisk)) {
1872
retcode = ERR_DISK_TOO_SMALL;
1873
goto fail;
1874
}
1875
1876
nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1877
1878
if (nbc->known_size > max_possible_sectors) {
1879
drbd_warn(device, "==> truncating very big lower level device "
1880
"to currently maximum possible %llu sectors <==\n",
1881
(unsigned long long) max_possible_sectors);
1882
if (new_disk_conf->meta_dev_idx >= 0)
1883
drbd_warn(device, "==>> using internal or flexible "
1884
"meta data may help <<==\n");
1885
}
1886
1887
drbd_suspend_io(device);
1888
/* also wait for the last barrier ack. */
1889
/* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1890
* We need a way to either ignore barrier acks for barriers sent before a device
1891
* was attached, or a way to wait for all pending barrier acks to come in.
1892
* As barriers are counted per resource,
1893
* we'd need to suspend io on all devices of a resource.
1894
*/
1895
wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1896
/* and for any other previously queued work */
1897
drbd_flush_workqueue(&connection->sender_work);
1898
1899
rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1900
retcode = (enum drbd_ret_code)rv;
1901
drbd_resume_io(device);
1902
if (rv < SS_SUCCESS)
1903
goto fail;
1904
1905
if (!get_ldev_if_state(device, D_ATTACHING))
1906
goto force_diskless;
1907
1908
if (!device->bitmap) {
1909
if (drbd_bm_init(device)) {
1910
retcode = ERR_NOMEM;
1911
goto force_diskless_dec;
1912
}
1913
}
1914
1915
if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
1916
(device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) &&
1917
(device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1918
drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1919
(unsigned long long)device->ed_uuid);
1920
retcode = ERR_DATA_NOT_CURRENT;
1921
goto force_diskless_dec;
1922
}
1923
1924
/* Since we are diskless, fix the activity log first... */
1925
if (drbd_check_al_size(device, new_disk_conf)) {
1926
retcode = ERR_NOMEM;
1927
goto force_diskless_dec;
1928
}
1929
1930
/* Prevent shrinking of consistent devices ! */
1931
{
1932
unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0);
1933
unsigned long long eff = nbc->md.la_size_sect;
1934
if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
1935
if (nsz == nbc->disk_conf->disk_size) {
1936
drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
1937
} else {
1938
drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
1939
drbd_msg_sprintf_info(adm_ctx.reply_skb,
1940
"To-be-attached device has last effective > current size, and is consistent\n"
1941
"(%llu > %llu sectors). Refusing to attach.", eff, nsz);
1942
retcode = ERR_IMPLICIT_SHRINK;
1943
goto force_diskless_dec;
1944
}
1945
}
1946
}
1947
1948
lock_all_resources();
1949
retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1950
if (retcode != NO_ERROR) {
1951
unlock_all_resources();
1952
goto force_diskless_dec;
1953
}
1954
1955
/* Reset the "barriers don't work" bits here, then force meta data to
1956
* be written, to ensure we determine if barriers are supported. */
1957
if (new_disk_conf->md_flushes)
1958
clear_bit(MD_NO_FUA, &device->flags);
1959
else
1960
set_bit(MD_NO_FUA, &device->flags);
1961
1962
/* Point of no return reached.
1963
* Devices and memory are no longer released by error cleanup below.
1964
* now device takes over responsibility, and the state engine should
1965
* clean it up somewhere. */
1966
D_ASSERT(device, device->ldev == NULL);
1967
device->ldev = nbc;
1968
device->resync = resync_lru;
1969
device->rs_plan_s = new_plan;
1970
nbc = NULL;
1971
resync_lru = NULL;
1972
new_disk_conf = NULL;
1973
new_plan = NULL;
1974
1975
drbd_resync_after_changed(device);
1976
drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
1977
unlock_all_resources();
1978
1979
if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1980
set_bit(CRASHED_PRIMARY, &device->flags);
1981
else
1982
clear_bit(CRASHED_PRIMARY, &device->flags);
1983
1984
if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1985
!(device->state.role == R_PRIMARY && device->resource->susp_nod))
1986
set_bit(CRASHED_PRIMARY, &device->flags);
1987
1988
device->send_cnt = 0;
1989
device->recv_cnt = 0;
1990
device->read_cnt = 0;
1991
device->writ_cnt = 0;
1992
1993
drbd_reconsider_queue_parameters(device, device->ldev, NULL);
1994
1995
/* If I am currently not R_PRIMARY,
1996
* but meta data primary indicator is set,
1997
* I just now recover from a hard crash,
1998
* and have been R_PRIMARY before that crash.
1999
*
2000
* Now, if I had no connection before that crash
2001
* (have been degraded R_PRIMARY), chances are that
2002
* I won't find my peer now either.
2003
*
2004
* In that case, and _only_ in that case,
2005
* we use the degr-wfc-timeout instead of the default,
2006
* so we can automatically recover from a crash of a
2007
* degraded but active "cluster" after a certain timeout.
2008
*/
2009
clear_bit(USE_DEGR_WFC_T, &device->flags);
2010
if (device->state.role != R_PRIMARY &&
2011
drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
2012
!drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
2013
set_bit(USE_DEGR_WFC_T, &device->flags);
2014
2015
dd = drbd_determine_dev_size(device, 0, NULL);
2016
if (dd <= DS_ERROR) {
2017
retcode = ERR_NOMEM_BITMAP;
2018
goto force_diskless_dec;
2019
} else if (dd == DS_GREW)
2020
set_bit(RESYNC_AFTER_NEG, &device->flags);
2021
2022
if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
2023
(test_bit(CRASHED_PRIMARY, &device->flags) &&
2024
drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
2025
drbd_info(device, "Assuming that all blocks are out of sync "
2026
"(aka FullSync)\n");
2027
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2028
"set_n_write from attaching", BM_LOCKED_MASK,
2029
NULL)) {
2030
retcode = ERR_IO_MD_DISK;
2031
goto force_diskless_dec;
2032
}
2033
} else {
2034
if (drbd_bitmap_io(device, &drbd_bm_read,
2035
"read from attaching", BM_LOCKED_MASK,
2036
NULL)) {
2037
retcode = ERR_IO_MD_DISK;
2038
goto force_diskless_dec;
2039
}
2040
}
2041
2042
if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
2043
drbd_suspend_al(device); /* IO is still suspended here... */
2044
2045
spin_lock_irq(&device->resource->req_lock);
2046
os = drbd_read_state(device);
2047
ns = os;
2048
/* If MDF_CONSISTENT is not set go into inconsistent state,
2049
otherwise investigate MDF_WasUpToDate...
2050
If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
2051
otherwise into D_CONSISTENT state.
2052
*/
2053
if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
2054
if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
2055
ns.disk = D_CONSISTENT;
2056
else
2057
ns.disk = D_OUTDATED;
2058
} else {
2059
ns.disk = D_INCONSISTENT;
2060
}
2061
2062
if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
2063
ns.pdsk = D_OUTDATED;
2064
2065
rcu_read_lock();
2066
if (ns.disk == D_CONSISTENT &&
2067
(ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
2068
ns.disk = D_UP_TO_DATE;
2069
2070
/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
2071
MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
2072
this point, because drbd_request_state() modifies these
2073
flags. */
2074
2075
if (rcu_dereference(device->ldev->disk_conf)->al_updates)
2076
device->ldev->md.flags &= ~MDF_AL_DISABLED;
2077
else
2078
device->ldev->md.flags |= MDF_AL_DISABLED;
2079
2080
rcu_read_unlock();
2081
2082
/* In case we are C_CONNECTED postpone any decision on the new disk
2083
state after the negotiation phase. */
2084
if (device->state.conn == C_CONNECTED) {
2085
device->new_state_tmp.i = ns.i;
2086
ns.i = os.i;
2087
ns.disk = D_NEGOTIATING;
2088
2089
/* We expect to receive up-to-date UUIDs soon.
2090
To avoid a race in receive_state, free p_uuid while
2091
holding req_lock. I.e. atomic with the state change */
2092
kfree(device->p_uuid);
2093
device->p_uuid = NULL;
2094
}
2095
2096
rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
2097
spin_unlock_irq(&device->resource->req_lock);
2098
2099
if (rv < SS_SUCCESS)
2100
goto force_diskless_dec;
2101
2102
mod_timer(&device->request_timer, jiffies + HZ);
2103
2104
if (device->state.role == R_PRIMARY)
2105
device->ldev->md.uuid[UI_CURRENT] |= (u64)1;
2106
else
2107
device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
2108
2109
drbd_md_mark_dirty(device);
2110
drbd_md_sync(device);
2111
2112
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
2113
put_ldev(device);
2114
conn_reconfig_done(connection);
2115
mutex_unlock(&adm_ctx.resource->adm_mutex);
2116
drbd_adm_finish(&adm_ctx, info, retcode);
2117
return 0;
2118
2119
force_diskless_dec:
2120
put_ldev(device);
2121
force_diskless:
2122
drbd_force_state(device, NS(disk, D_DISKLESS));
2123
drbd_md_sync(device);
2124
fail:
2125
conn_reconfig_done(connection);
2126
if (nbc) {
2127
close_backing_dev(device, nbc->f_md_bdev,
2128
nbc->md_bdev != nbc->backing_bdev);
2129
close_backing_dev(device, nbc->backing_bdev_file, true);
2130
kfree(nbc);
2131
}
2132
kfree(new_disk_conf);
2133
lc_destroy(resync_lru);
2134
kfree(new_plan);
2135
mutex_unlock(&adm_ctx.resource->adm_mutex);
2136
finish:
2137
drbd_adm_finish(&adm_ctx, info, retcode);
2138
return 0;
2139
}
2140
2141
static int adm_detach(struct drbd_device *device, int force)
2142
{
2143
if (force) {
2144
set_bit(FORCE_DETACH, &device->flags);
2145
drbd_force_state(device, NS(disk, D_FAILED));
2146
return SS_SUCCESS;
2147
}
2148
2149
return drbd_request_detach_interruptible(device);
2150
}
2151
2152
/* Detaching the disk is a process in multiple stages. First we need to lock
2153
* out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
2154
* Then we transition to D_DISKLESS, and wait for put_ldev() to return all
2155
* internal references as well.
2156
* Only then we have finally detached. */
2157
int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
2158
{
2159
struct drbd_config_context adm_ctx;
2160
enum drbd_ret_code retcode;
2161
struct detach_parms parms = { };
2162
int err;
2163
2164
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2165
if (!adm_ctx.reply_skb)
2166
return retcode;
2167
if (retcode != NO_ERROR)
2168
goto out;
2169
2170
if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
2171
err = detach_parms_from_attrs(&parms, info);
2172
if (err) {
2173
retcode = ERR_MANDATORY_TAG;
2174
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2175
goto out;
2176
}
2177
}
2178
2179
mutex_lock(&adm_ctx.resource->adm_mutex);
2180
retcode = adm_detach(adm_ctx.device, parms.force_detach);
2181
mutex_unlock(&adm_ctx.resource->adm_mutex);
2182
out:
2183
drbd_adm_finish(&adm_ctx, info, retcode);
2184
return 0;
2185
}
2186
2187
static bool conn_resync_running(struct drbd_connection *connection)
2188
{
2189
struct drbd_peer_device *peer_device;
2190
bool rv = false;
2191
int vnr;
2192
2193
rcu_read_lock();
2194
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2195
struct drbd_device *device = peer_device->device;
2196
if (device->state.conn == C_SYNC_SOURCE ||
2197
device->state.conn == C_SYNC_TARGET ||
2198
device->state.conn == C_PAUSED_SYNC_S ||
2199
device->state.conn == C_PAUSED_SYNC_T) {
2200
rv = true;
2201
break;
2202
}
2203
}
2204
rcu_read_unlock();
2205
2206
return rv;
2207
}
2208
2209
static bool conn_ov_running(struct drbd_connection *connection)
2210
{
2211
struct drbd_peer_device *peer_device;
2212
bool rv = false;
2213
int vnr;
2214
2215
rcu_read_lock();
2216
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2217
struct drbd_device *device = peer_device->device;
2218
if (device->state.conn == C_VERIFY_S ||
2219
device->state.conn == C_VERIFY_T) {
2220
rv = true;
2221
break;
2222
}
2223
}
2224
rcu_read_unlock();
2225
2226
return rv;
2227
}
2228
2229
static enum drbd_ret_code
2230
_check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
2231
{
2232
struct drbd_peer_device *peer_device;
2233
int i;
2234
2235
if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
2236
if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
2237
return ERR_NEED_APV_100;
2238
2239
if (new_net_conf->two_primaries != old_net_conf->two_primaries)
2240
return ERR_NEED_APV_100;
2241
2242
if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
2243
return ERR_NEED_APV_100;
2244
}
2245
2246
if (!new_net_conf->two_primaries &&
2247
conn_highest_role(connection) == R_PRIMARY &&
2248
conn_highest_peer(connection) == R_PRIMARY)
2249
return ERR_NEED_ALLOW_TWO_PRI;
2250
2251
if (new_net_conf->two_primaries &&
2252
(new_net_conf->wire_protocol != DRBD_PROT_C))
2253
return ERR_NOT_PROTO_C;
2254
2255
idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2256
struct drbd_device *device = peer_device->device;
2257
if (get_ldev(device)) {
2258
enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2259
put_ldev(device);
2260
if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2261
return ERR_STONITH_AND_PROT_A;
2262
}
2263
if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2264
return ERR_DISCARD_IMPOSSIBLE;
2265
}
2266
2267
if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2268
return ERR_CONG_NOT_PROTO_A;
2269
2270
return NO_ERROR;
2271
}
2272
2273
static enum drbd_ret_code
2274
check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2275
{
2276
enum drbd_ret_code rv;
2277
struct drbd_peer_device *peer_device;
2278
int i;
2279
2280
rcu_read_lock();
2281
rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2282
rcu_read_unlock();
2283
2284
/* connection->peer_devices protected by genl_lock() here */
2285
idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2286
struct drbd_device *device = peer_device->device;
2287
if (!device->bitmap) {
2288
if (drbd_bm_init(device))
2289
return ERR_NOMEM;
2290
}
2291
}
2292
2293
return rv;
2294
}
2295
2296
struct crypto {
2297
struct crypto_shash *verify_tfm;
2298
struct crypto_shash *csums_tfm;
2299
struct crypto_shash *cram_hmac_tfm;
2300
struct crypto_shash *integrity_tfm;
2301
};
2302
2303
static int
2304
alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg)
2305
{
2306
if (!tfm_name[0])
2307
return NO_ERROR;
2308
2309
*tfm = crypto_alloc_shash(tfm_name, 0, 0);
2310
if (IS_ERR(*tfm)) {
2311
*tfm = NULL;
2312
return err_alg;
2313
}
2314
2315
return NO_ERROR;
2316
}
2317
2318
static enum drbd_ret_code
2319
alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2320
{
2321
char hmac_name[CRYPTO_MAX_ALG_NAME];
2322
enum drbd_ret_code rv;
2323
2324
rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg,
2325
ERR_CSUMS_ALG);
2326
if (rv != NO_ERROR)
2327
return rv;
2328
rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg,
2329
ERR_VERIFY_ALG);
2330
if (rv != NO_ERROR)
2331
return rv;
2332
rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2333
ERR_INTEGRITY_ALG);
2334
if (rv != NO_ERROR)
2335
return rv;
2336
if (new_net_conf->cram_hmac_alg[0] != 0) {
2337
snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2338
new_net_conf->cram_hmac_alg);
2339
2340
rv = alloc_shash(&crypto->cram_hmac_tfm, hmac_name,
2341
ERR_AUTH_ALG);
2342
}
2343
2344
return rv;
2345
}
2346
2347
static void free_crypto(struct crypto *crypto)
2348
{
2349
crypto_free_shash(crypto->cram_hmac_tfm);
2350
crypto_free_shash(crypto->integrity_tfm);
2351
crypto_free_shash(crypto->csums_tfm);
2352
crypto_free_shash(crypto->verify_tfm);
2353
}
2354
2355
int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2356
{
2357
struct drbd_config_context adm_ctx;
2358
enum drbd_ret_code retcode;
2359
struct drbd_connection *connection;
2360
struct net_conf *old_net_conf, *new_net_conf = NULL;
2361
int err;
2362
int ovr; /* online verify running */
2363
int rsr; /* re-sync running */
2364
struct crypto crypto = { };
2365
2366
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2367
if (!adm_ctx.reply_skb)
2368
return retcode;
2369
if (retcode != NO_ERROR)
2370
goto finish;
2371
2372
connection = adm_ctx.connection;
2373
mutex_lock(&adm_ctx.resource->adm_mutex);
2374
2375
new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2376
if (!new_net_conf) {
2377
retcode = ERR_NOMEM;
2378
goto out;
2379
}
2380
2381
conn_reconfig_start(connection);
2382
2383
mutex_lock(&connection->data.mutex);
2384
mutex_lock(&connection->resource->conf_update);
2385
old_net_conf = connection->net_conf;
2386
2387
if (!old_net_conf) {
2388
drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2389
retcode = ERR_INVALID_REQUEST;
2390
goto fail;
2391
}
2392
2393
*new_net_conf = *old_net_conf;
2394
if (should_set_defaults(info))
2395
set_net_conf_defaults(new_net_conf);
2396
2397
err = net_conf_from_attrs_for_change(new_net_conf, info);
2398
if (err && err != -ENOMSG) {
2399
retcode = ERR_MANDATORY_TAG;
2400
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2401
goto fail;
2402
}
2403
2404
retcode = check_net_options(connection, new_net_conf);
2405
if (retcode != NO_ERROR)
2406
goto fail;
2407
2408
/* re-sync running */
2409
rsr = conn_resync_running(connection);
2410
if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2411
retcode = ERR_CSUMS_RESYNC_RUNNING;
2412
goto fail;
2413
}
2414
2415
/* online verify running */
2416
ovr = conn_ov_running(connection);
2417
if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2418
retcode = ERR_VERIFY_RUNNING;
2419
goto fail;
2420
}
2421
2422
retcode = alloc_crypto(&crypto, new_net_conf);
2423
if (retcode != NO_ERROR)
2424
goto fail;
2425
2426
rcu_assign_pointer(connection->net_conf, new_net_conf);
2427
2428
if (!rsr) {
2429
crypto_free_shash(connection->csums_tfm);
2430
connection->csums_tfm = crypto.csums_tfm;
2431
crypto.csums_tfm = NULL;
2432
}
2433
if (!ovr) {
2434
crypto_free_shash(connection->verify_tfm);
2435
connection->verify_tfm = crypto.verify_tfm;
2436
crypto.verify_tfm = NULL;
2437
}
2438
2439
crypto_free_shash(connection->integrity_tfm);
2440
connection->integrity_tfm = crypto.integrity_tfm;
2441
if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2442
/* Do this without trying to take connection->data.mutex again. */
2443
__drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2444
2445
crypto_free_shash(connection->cram_hmac_tfm);
2446
connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2447
2448
mutex_unlock(&connection->resource->conf_update);
2449
mutex_unlock(&connection->data.mutex);
2450
kvfree_rcu_mightsleep(old_net_conf);
2451
2452
if (connection->cstate >= C_WF_REPORT_PARAMS) {
2453
struct drbd_peer_device *peer_device;
2454
int vnr;
2455
2456
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2457
drbd_send_sync_param(peer_device);
2458
}
2459
2460
goto done;
2461
2462
fail:
2463
mutex_unlock(&connection->resource->conf_update);
2464
mutex_unlock(&connection->data.mutex);
2465
free_crypto(&crypto);
2466
kfree(new_net_conf);
2467
done:
2468
conn_reconfig_done(connection);
2469
out:
2470
mutex_unlock(&adm_ctx.resource->adm_mutex);
2471
finish:
2472
drbd_adm_finish(&adm_ctx, info, retcode);
2473
return 0;
2474
}
2475
2476
static void connection_to_info(struct connection_info *info,
2477
struct drbd_connection *connection)
2478
{
2479
info->conn_connection_state = connection->cstate;
2480
info->conn_role = conn_highest_peer(connection);
2481
}
2482
2483
static void peer_device_to_info(struct peer_device_info *info,
2484
struct drbd_peer_device *peer_device)
2485
{
2486
struct drbd_device *device = peer_device->device;
2487
2488
info->peer_repl_state =
2489
max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
2490
info->peer_disk_state = device->state.pdsk;
2491
info->peer_resync_susp_user = device->state.user_isp;
2492
info->peer_resync_susp_peer = device->state.peer_isp;
2493
info->peer_resync_susp_dependency = device->state.aftr_isp;
2494
}
2495
2496
int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2497
{
2498
struct connection_info connection_info;
2499
enum drbd_notification_type flags;
2500
unsigned int peer_devices = 0;
2501
struct drbd_config_context adm_ctx;
2502
struct drbd_peer_device *peer_device;
2503
struct net_conf *old_net_conf, *new_net_conf = NULL;
2504
struct crypto crypto = { };
2505
struct drbd_resource *resource;
2506
struct drbd_connection *connection;
2507
enum drbd_ret_code retcode;
2508
enum drbd_state_rv rv;
2509
int i;
2510
int err;
2511
2512
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2513
2514
if (!adm_ctx.reply_skb)
2515
return retcode;
2516
if (retcode != NO_ERROR)
2517
goto out;
2518
if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2519
drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2520
retcode = ERR_INVALID_REQUEST;
2521
goto out;
2522
}
2523
2524
/* No need for _rcu here. All reconfiguration is
2525
* strictly serialized on genl_lock(). We are protected against
2526
* concurrent reconfiguration/addition/deletion */
2527
for_each_resource(resource, &drbd_resources) {
2528
for_each_connection(connection, resource) {
2529
if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2530
!memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2531
connection->my_addr_len)) {
2532
retcode = ERR_LOCAL_ADDR;
2533
goto out;
2534
}
2535
2536
if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2537
!memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2538
connection->peer_addr_len)) {
2539
retcode = ERR_PEER_ADDR;
2540
goto out;
2541
}
2542
}
2543
}
2544
2545
mutex_lock(&adm_ctx.resource->adm_mutex);
2546
connection = first_connection(adm_ctx.resource);
2547
conn_reconfig_start(connection);
2548
2549
if (connection->cstate > C_STANDALONE) {
2550
retcode = ERR_NET_CONFIGURED;
2551
goto fail;
2552
}
2553
2554
/* allocation not in the IO path, drbdsetup / netlink process context */
2555
new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2556
if (!new_net_conf) {
2557
retcode = ERR_NOMEM;
2558
goto fail;
2559
}
2560
2561
set_net_conf_defaults(new_net_conf);
2562
2563
err = net_conf_from_attrs(new_net_conf, info);
2564
if (err && err != -ENOMSG) {
2565
retcode = ERR_MANDATORY_TAG;
2566
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2567
goto fail;
2568
}
2569
2570
retcode = check_net_options(connection, new_net_conf);
2571
if (retcode != NO_ERROR)
2572
goto fail;
2573
2574
retcode = alloc_crypto(&crypto, new_net_conf);
2575
if (retcode != NO_ERROR)
2576
goto fail;
2577
2578
((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2579
2580
drbd_flush_workqueue(&connection->sender_work);
2581
2582
mutex_lock(&adm_ctx.resource->conf_update);
2583
old_net_conf = connection->net_conf;
2584
if (old_net_conf) {
2585
retcode = ERR_NET_CONFIGURED;
2586
mutex_unlock(&adm_ctx.resource->conf_update);
2587
goto fail;
2588
}
2589
rcu_assign_pointer(connection->net_conf, new_net_conf);
2590
2591
conn_free_crypto(connection);
2592
connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2593
connection->integrity_tfm = crypto.integrity_tfm;
2594
connection->csums_tfm = crypto.csums_tfm;
2595
connection->verify_tfm = crypto.verify_tfm;
2596
2597
connection->my_addr_len = nla_len(adm_ctx.my_addr);
2598
memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2599
connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2600
memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2601
2602
idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2603
peer_devices++;
2604
}
2605
2606
connection_to_info(&connection_info, connection);
2607
flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
2608
mutex_lock(&notification_mutex);
2609
notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
2610
idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2611
struct peer_device_info peer_device_info;
2612
2613
peer_device_to_info(&peer_device_info, peer_device);
2614
flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
2615
notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
2616
}
2617
mutex_unlock(&notification_mutex);
2618
mutex_unlock(&adm_ctx.resource->conf_update);
2619
2620
rcu_read_lock();
2621
idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2622
struct drbd_device *device = peer_device->device;
2623
device->send_cnt = 0;
2624
device->recv_cnt = 0;
2625
}
2626
rcu_read_unlock();
2627
2628
rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2629
2630
conn_reconfig_done(connection);
2631
mutex_unlock(&adm_ctx.resource->adm_mutex);
2632
drbd_adm_finish(&adm_ctx, info, rv);
2633
return 0;
2634
2635
fail:
2636
free_crypto(&crypto);
2637
kfree(new_net_conf);
2638
2639
conn_reconfig_done(connection);
2640
mutex_unlock(&adm_ctx.resource->adm_mutex);
2641
out:
2642
drbd_adm_finish(&adm_ctx, info, retcode);
2643
return 0;
2644
}
2645
2646
static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2647
{
2648
enum drbd_conns cstate;
2649
enum drbd_state_rv rv;
2650
2651
repeat:
2652
rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2653
force ? CS_HARD : 0);
2654
2655
switch (rv) {
2656
case SS_NOTHING_TO_DO:
2657
break;
2658
case SS_ALREADY_STANDALONE:
2659
return SS_SUCCESS;
2660
case SS_PRIMARY_NOP:
2661
/* Our state checking code wants to see the peer outdated. */
2662
rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2663
2664
if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2665
rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2666
2667
break;
2668
case SS_CW_FAILED_BY_PEER:
2669
spin_lock_irq(&connection->resource->req_lock);
2670
cstate = connection->cstate;
2671
spin_unlock_irq(&connection->resource->req_lock);
2672
if (cstate <= C_WF_CONNECTION)
2673
goto repeat;
2674
/* The peer probably wants to see us outdated. */
2675
rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2676
disk, D_OUTDATED), 0);
2677
if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2678
rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2679
CS_HARD);
2680
}
2681
break;
2682
default:;
2683
/* no special handling necessary */
2684
}
2685
2686
if (rv >= SS_SUCCESS) {
2687
enum drbd_state_rv rv2;
2688
/* No one else can reconfigure the network while I am here.
2689
* The state handling only uses drbd_thread_stop_nowait(),
2690
* we want to really wait here until the receiver is no more.
2691
*/
2692
drbd_thread_stop(&connection->receiver);
2693
2694
/* Race breaker. This additional state change request may be
2695
* necessary, if this was a forced disconnect during a receiver
2696
* restart. We may have "killed" the receiver thread just
2697
* after drbd_receiver() returned. Typically, we should be
2698
* C_STANDALONE already, now, and this becomes a no-op.
2699
*/
2700
rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2701
CS_VERBOSE | CS_HARD);
2702
if (rv2 < SS_SUCCESS)
2703
drbd_err(connection,
2704
"unexpected rv2=%d in conn_try_disconnect()\n",
2705
rv2);
2706
/* Unlike in DRBD 9, the state engine has generated
2707
* NOTIFY_DESTROY events before clearing connection->net_conf. */
2708
}
2709
return rv;
2710
}
2711
2712
int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2713
{
2714
struct drbd_config_context adm_ctx;
2715
struct disconnect_parms parms;
2716
struct drbd_connection *connection;
2717
enum drbd_state_rv rv;
2718
enum drbd_ret_code retcode;
2719
int err;
2720
2721
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2722
if (!adm_ctx.reply_skb)
2723
return retcode;
2724
if (retcode != NO_ERROR)
2725
goto fail;
2726
2727
connection = adm_ctx.connection;
2728
memset(&parms, 0, sizeof(parms));
2729
if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2730
err = disconnect_parms_from_attrs(&parms, info);
2731
if (err) {
2732
retcode = ERR_MANDATORY_TAG;
2733
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2734
goto fail;
2735
}
2736
}
2737
2738
mutex_lock(&adm_ctx.resource->adm_mutex);
2739
rv = conn_try_disconnect(connection, parms.force_disconnect);
2740
mutex_unlock(&adm_ctx.resource->adm_mutex);
2741
if (rv < SS_SUCCESS) {
2742
drbd_adm_finish(&adm_ctx, info, rv);
2743
return 0;
2744
}
2745
retcode = NO_ERROR;
2746
fail:
2747
drbd_adm_finish(&adm_ctx, info, retcode);
2748
return 0;
2749
}
2750
2751
void resync_after_online_grow(struct drbd_device *device)
2752
{
2753
int iass; /* I am sync source */
2754
2755
drbd_info(device, "Resync of new storage after online grow\n");
2756
if (device->state.role != device->state.peer)
2757
iass = (device->state.role == R_PRIMARY);
2758
else
2759
iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2760
2761
if (iass)
2762
drbd_start_resync(device, C_SYNC_SOURCE);
2763
else
2764
_drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2765
}
2766
2767
int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2768
{
2769
struct drbd_config_context adm_ctx;
2770
struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2771
struct resize_parms rs;
2772
struct drbd_device *device;
2773
enum drbd_ret_code retcode;
2774
enum determine_dev_size dd;
2775
bool change_al_layout = false;
2776
enum dds_flags ddsf;
2777
sector_t u_size;
2778
int err;
2779
2780
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2781
if (!adm_ctx.reply_skb)
2782
return retcode;
2783
if (retcode != NO_ERROR)
2784
goto finish;
2785
2786
mutex_lock(&adm_ctx.resource->adm_mutex);
2787
device = adm_ctx.device;
2788
if (!get_ldev(device)) {
2789
retcode = ERR_NO_DISK;
2790
goto fail;
2791
}
2792
2793
memset(&rs, 0, sizeof(struct resize_parms));
2794
rs.al_stripes = device->ldev->md.al_stripes;
2795
rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2796
if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2797
err = resize_parms_from_attrs(&rs, info);
2798
if (err) {
2799
retcode = ERR_MANDATORY_TAG;
2800
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2801
goto fail_ldev;
2802
}
2803
}
2804
2805
if (device->state.conn > C_CONNECTED) {
2806
retcode = ERR_RESIZE_RESYNC;
2807
goto fail_ldev;
2808
}
2809
2810
if (device->state.role == R_SECONDARY &&
2811
device->state.peer == R_SECONDARY) {
2812
retcode = ERR_NO_PRIMARY;
2813
goto fail_ldev;
2814
}
2815
2816
if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2817
retcode = ERR_NEED_APV_93;
2818
goto fail_ldev;
2819
}
2820
2821
rcu_read_lock();
2822
u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2823
rcu_read_unlock();
2824
if (u_size != (sector_t)rs.resize_size) {
2825
new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2826
if (!new_disk_conf) {
2827
retcode = ERR_NOMEM;
2828
goto fail_ldev;
2829
}
2830
}
2831
2832
if (device->ldev->md.al_stripes != rs.al_stripes ||
2833
device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2834
u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2835
2836
if (al_size_k > (16 * 1024 * 1024)) {
2837
retcode = ERR_MD_LAYOUT_TOO_BIG;
2838
goto fail_ldev;
2839
}
2840
2841
if (al_size_k < MD_32kB_SECT/2) {
2842
retcode = ERR_MD_LAYOUT_TOO_SMALL;
2843
goto fail_ldev;
2844
}
2845
2846
if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2847
retcode = ERR_MD_LAYOUT_CONNECTED;
2848
goto fail_ldev;
2849
}
2850
2851
change_al_layout = true;
2852
}
2853
2854
if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2855
device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2856
2857
if (new_disk_conf) {
2858
mutex_lock(&device->resource->conf_update);
2859
old_disk_conf = device->ldev->disk_conf;
2860
*new_disk_conf = *old_disk_conf;
2861
new_disk_conf->disk_size = (sector_t)rs.resize_size;
2862
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2863
mutex_unlock(&device->resource->conf_update);
2864
kvfree_rcu_mightsleep(old_disk_conf);
2865
new_disk_conf = NULL;
2866
}
2867
2868
ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2869
dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2870
drbd_md_sync(device);
2871
put_ldev(device);
2872
if (dd == DS_ERROR) {
2873
retcode = ERR_NOMEM_BITMAP;
2874
goto fail;
2875
} else if (dd == DS_ERROR_SPACE_MD) {
2876
retcode = ERR_MD_LAYOUT_NO_FIT;
2877
goto fail;
2878
} else if (dd == DS_ERROR_SHRINK) {
2879
retcode = ERR_IMPLICIT_SHRINK;
2880
goto fail;
2881
}
2882
2883
if (device->state.conn == C_CONNECTED) {
2884
if (dd == DS_GREW)
2885
set_bit(RESIZE_PENDING, &device->flags);
2886
2887
drbd_send_uuids(first_peer_device(device));
2888
drbd_send_sizes(first_peer_device(device), 1, ddsf);
2889
}
2890
2891
fail:
2892
mutex_unlock(&adm_ctx.resource->adm_mutex);
2893
finish:
2894
drbd_adm_finish(&adm_ctx, info, retcode);
2895
return 0;
2896
2897
fail_ldev:
2898
put_ldev(device);
2899
kfree(new_disk_conf);
2900
goto fail;
2901
}
2902
2903
int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2904
{
2905
struct drbd_config_context adm_ctx;
2906
enum drbd_ret_code retcode;
2907
struct res_opts res_opts;
2908
int err;
2909
2910
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2911
if (!adm_ctx.reply_skb)
2912
return retcode;
2913
if (retcode != NO_ERROR)
2914
goto fail;
2915
2916
res_opts = adm_ctx.resource->res_opts;
2917
if (should_set_defaults(info))
2918
set_res_opts_defaults(&res_opts);
2919
2920
err = res_opts_from_attrs(&res_opts, info);
2921
if (err && err != -ENOMSG) {
2922
retcode = ERR_MANDATORY_TAG;
2923
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2924
goto fail;
2925
}
2926
2927
mutex_lock(&adm_ctx.resource->adm_mutex);
2928
err = set_resource_options(adm_ctx.resource, &res_opts);
2929
if (err) {
2930
retcode = ERR_INVALID_REQUEST;
2931
if (err == -ENOMEM)
2932
retcode = ERR_NOMEM;
2933
}
2934
mutex_unlock(&adm_ctx.resource->adm_mutex);
2935
2936
fail:
2937
drbd_adm_finish(&adm_ctx, info, retcode);
2938
return 0;
2939
}
2940
2941
int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2942
{
2943
struct drbd_config_context adm_ctx;
2944
struct drbd_device *device;
2945
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2946
2947
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2948
if (!adm_ctx.reply_skb)
2949
return retcode;
2950
if (retcode != NO_ERROR)
2951
goto out;
2952
2953
device = adm_ctx.device;
2954
if (!get_ldev(device)) {
2955
retcode = ERR_NO_DISK;
2956
goto out;
2957
}
2958
2959
mutex_lock(&adm_ctx.resource->adm_mutex);
2960
2961
/* If there is still bitmap IO pending, probably because of a previous
2962
* resync just being finished, wait for it before requesting a new resync.
2963
* Also wait for it's after_state_ch(). */
2964
drbd_suspend_io(device);
2965
wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2966
drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2967
2968
/* If we happen to be C_STANDALONE R_SECONDARY, just change to
2969
* D_INCONSISTENT, and set all bits in the bitmap. Otherwise,
2970
* try to start a resync handshake as sync target for full sync.
2971
*/
2972
if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2973
retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2974
if (retcode >= SS_SUCCESS) {
2975
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2976
"set_n_write from invalidate", BM_LOCKED_MASK, NULL))
2977
retcode = ERR_IO_MD_DISK;
2978
}
2979
} else
2980
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2981
drbd_resume_io(device);
2982
mutex_unlock(&adm_ctx.resource->adm_mutex);
2983
put_ldev(device);
2984
out:
2985
drbd_adm_finish(&adm_ctx, info, retcode);
2986
return 0;
2987
}
2988
2989
static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2990
union drbd_state mask, union drbd_state val)
2991
{
2992
struct drbd_config_context adm_ctx;
2993
enum drbd_ret_code retcode;
2994
2995
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2996
if (!adm_ctx.reply_skb)
2997
return retcode;
2998
if (retcode != NO_ERROR)
2999
goto out;
3000
3001
mutex_lock(&adm_ctx.resource->adm_mutex);
3002
retcode = drbd_request_state(adm_ctx.device, mask, val);
3003
mutex_unlock(&adm_ctx.resource->adm_mutex);
3004
out:
3005
drbd_adm_finish(&adm_ctx, info, retcode);
3006
return 0;
3007
}
3008
3009
static int drbd_bmio_set_susp_al(struct drbd_device *device,
3010
struct drbd_peer_device *peer_device) __must_hold(local)
3011
{
3012
int rv;
3013
3014
rv = drbd_bmio_set_n_write(device, peer_device);
3015
drbd_suspend_al(device);
3016
return rv;
3017
}
3018
3019
int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
3020
{
3021
struct drbd_config_context adm_ctx;
3022
int retcode; /* drbd_ret_code, drbd_state_rv */
3023
struct drbd_device *device;
3024
3025
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3026
if (!adm_ctx.reply_skb)
3027
return retcode;
3028
if (retcode != NO_ERROR)
3029
goto out;
3030
3031
device = adm_ctx.device;
3032
if (!get_ldev(device)) {
3033
retcode = ERR_NO_DISK;
3034
goto out;
3035
}
3036
3037
mutex_lock(&adm_ctx.resource->adm_mutex);
3038
3039
/* If there is still bitmap IO pending, probably because of a previous
3040
* resync just being finished, wait for it before requesting a new resync.
3041
* Also wait for it's after_state_ch(). */
3042
drbd_suspend_io(device);
3043
wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3044
drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
3045
3046
/* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
3047
* in the bitmap. Otherwise, try to start a resync handshake
3048
* as sync source for full sync.
3049
*/
3050
if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
3051
/* The peer will get a resync upon connect anyways. Just make that
3052
into a full resync. */
3053
retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
3054
if (retcode >= SS_SUCCESS) {
3055
if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
3056
"set_n_write from invalidate_peer",
3057
BM_LOCKED_SET_ALLOWED, NULL))
3058
retcode = ERR_IO_MD_DISK;
3059
}
3060
} else
3061
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
3062
drbd_resume_io(device);
3063
mutex_unlock(&adm_ctx.resource->adm_mutex);
3064
put_ldev(device);
3065
out:
3066
drbd_adm_finish(&adm_ctx, info, retcode);
3067
return 0;
3068
}
3069
3070
int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
3071
{
3072
struct drbd_config_context adm_ctx;
3073
enum drbd_ret_code retcode;
3074
3075
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3076
if (!adm_ctx.reply_skb)
3077
return retcode;
3078
if (retcode != NO_ERROR)
3079
goto out;
3080
3081
mutex_lock(&adm_ctx.resource->adm_mutex);
3082
if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
3083
retcode = ERR_PAUSE_IS_SET;
3084
mutex_unlock(&adm_ctx.resource->adm_mutex);
3085
out:
3086
drbd_adm_finish(&adm_ctx, info, retcode);
3087
return 0;
3088
}
3089
3090
int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
3091
{
3092
struct drbd_config_context adm_ctx;
3093
union drbd_dev_state s;
3094
enum drbd_ret_code retcode;
3095
3096
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3097
if (!adm_ctx.reply_skb)
3098
return retcode;
3099
if (retcode != NO_ERROR)
3100
goto out;
3101
3102
mutex_lock(&adm_ctx.resource->adm_mutex);
3103
if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
3104
s = adm_ctx.device->state;
3105
if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
3106
retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
3107
s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
3108
} else {
3109
retcode = ERR_PAUSE_IS_CLEAR;
3110
}
3111
}
3112
mutex_unlock(&adm_ctx.resource->adm_mutex);
3113
out:
3114
drbd_adm_finish(&adm_ctx, info, retcode);
3115
return 0;
3116
}
3117
3118
int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
3119
{
3120
return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
3121
}
3122
3123
int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
3124
{
3125
struct drbd_config_context adm_ctx;
3126
struct drbd_device *device;
3127
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3128
3129
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3130
if (!adm_ctx.reply_skb)
3131
return retcode;
3132
if (retcode != NO_ERROR)
3133
goto out;
3134
3135
mutex_lock(&adm_ctx.resource->adm_mutex);
3136
device = adm_ctx.device;
3137
if (test_bit(NEW_CUR_UUID, &device->flags)) {
3138
if (get_ldev_if_state(device, D_ATTACHING)) {
3139
drbd_uuid_new_current(device);
3140
put_ldev(device);
3141
} else {
3142
/* This is effectively a multi-stage "forced down".
3143
* The NEW_CUR_UUID bit is supposedly only set, if we
3144
* lost the replication connection, and are configured
3145
* to freeze IO and wait for some fence-peer handler.
3146
* So we still don't have a replication connection.
3147
* And now we don't have a local disk either. After
3148
* resume, we will fail all pending and new IO, because
3149
* we don't have any data anymore. Which means we will
3150
* eventually be able to terminate all users of this
3151
* device, and then take it down. By bumping the
3152
* "effective" data uuid, we make sure that you really
3153
* need to tear down before you reconfigure, we will
3154
* the refuse to re-connect or re-attach (because no
3155
* matching real data uuid exists).
3156
*/
3157
u64 val;
3158
get_random_bytes(&val, sizeof(u64));
3159
drbd_set_ed_uuid(device, val);
3160
drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
3161
}
3162
clear_bit(NEW_CUR_UUID, &device->flags);
3163
}
3164
drbd_suspend_io(device);
3165
retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
3166
if (retcode == SS_SUCCESS) {
3167
if (device->state.conn < C_CONNECTED)
3168
tl_clear(first_peer_device(device)->connection);
3169
if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
3170
tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
3171
}
3172
drbd_resume_io(device);
3173
mutex_unlock(&adm_ctx.resource->adm_mutex);
3174
out:
3175
drbd_adm_finish(&adm_ctx, info, retcode);
3176
return 0;
3177
}
3178
3179
int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
3180
{
3181
return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
3182
}
3183
3184
static int nla_put_drbd_cfg_context(struct sk_buff *skb,
3185
struct drbd_resource *resource,
3186
struct drbd_connection *connection,
3187
struct drbd_device *device)
3188
{
3189
struct nlattr *nla;
3190
nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_CONTEXT);
3191
if (!nla)
3192
goto nla_put_failure;
3193
if (device &&
3194
nla_put_u32(skb, T_ctx_volume, device->vnr))
3195
goto nla_put_failure;
3196
if (nla_put_string(skb, T_ctx_resource_name, resource->name))
3197
goto nla_put_failure;
3198
if (connection) {
3199
if (connection->my_addr_len &&
3200
nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
3201
goto nla_put_failure;
3202
if (connection->peer_addr_len &&
3203
nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
3204
goto nla_put_failure;
3205
}
3206
nla_nest_end(skb, nla);
3207
return 0;
3208
3209
nla_put_failure:
3210
if (nla)
3211
nla_nest_cancel(skb, nla);
3212
return -EMSGSIZE;
3213
}
3214
3215
/*
3216
* The generic netlink dump callbacks are called outside the genl_lock(), so
3217
* they cannot use the simple attribute parsing code which uses global
3218
* attribute tables.
3219
*/
3220
static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
3221
{
3222
const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3223
const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3224
struct nlattr *nla;
3225
3226
nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
3227
DRBD_NLA_CFG_CONTEXT);
3228
if (!nla)
3229
return NULL;
3230
return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
3231
}
3232
3233
static void resource_to_info(struct resource_info *, struct drbd_resource *);
3234
3235
int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
3236
{
3237
struct drbd_genlmsghdr *dh;
3238
struct drbd_resource *resource;
3239
struct resource_info resource_info;
3240
struct resource_statistics resource_statistics;
3241
int err;
3242
3243
rcu_read_lock();
3244
if (cb->args[0]) {
3245
for_each_resource_rcu(resource, &drbd_resources)
3246
if (resource == (struct drbd_resource *)cb->args[0])
3247
goto found_resource;
3248
err = 0; /* resource was probably deleted */
3249
goto out;
3250
}
3251
resource = list_entry(&drbd_resources,
3252
struct drbd_resource, resources);
3253
3254
found_resource:
3255
list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
3256
goto put_result;
3257
}
3258
err = 0;
3259
goto out;
3260
3261
put_result:
3262
dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3263
cb->nlh->nlmsg_seq, &drbd_genl_family,
3264
NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
3265
err = -ENOMEM;
3266
if (!dh)
3267
goto out;
3268
dh->minor = -1U;
3269
dh->ret_code = NO_ERROR;
3270
err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
3271
if (err)
3272
goto out;
3273
err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
3274
if (err)
3275
goto out;
3276
resource_to_info(&resource_info, resource);
3277
err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
3278
if (err)
3279
goto out;
3280
resource_statistics.res_stat_write_ordering = resource->write_ordering;
3281
err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
3282
if (err)
3283
goto out;
3284
cb->args[0] = (long)resource;
3285
genlmsg_end(skb, dh);
3286
err = 0;
3287
3288
out:
3289
rcu_read_unlock();
3290
if (err)
3291
return err;
3292
return skb->len;
3293
}
3294
3295
static void device_to_statistics(struct device_statistics *s,
3296
struct drbd_device *device)
3297
{
3298
memset(s, 0, sizeof(*s));
3299
s->dev_upper_blocked = !may_inc_ap_bio(device);
3300
if (get_ldev(device)) {
3301
struct drbd_md *md = &device->ldev->md;
3302
u64 *history_uuids = (u64 *)s->history_uuids;
3303
int n;
3304
3305
spin_lock_irq(&md->uuid_lock);
3306
s->dev_current_uuid = md->uuid[UI_CURRENT];
3307
BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
3308
for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
3309
history_uuids[n] = md->uuid[UI_HISTORY_START + n];
3310
for (; n < HISTORY_UUIDS; n++)
3311
history_uuids[n] = 0;
3312
s->history_uuids_len = HISTORY_UUIDS;
3313
spin_unlock_irq(&md->uuid_lock);
3314
3315
s->dev_disk_flags = md->flags;
3316
put_ldev(device);
3317
}
3318
s->dev_size = get_capacity(device->vdisk);
3319
s->dev_read = device->read_cnt;
3320
s->dev_write = device->writ_cnt;
3321
s->dev_al_writes = device->al_writ_cnt;
3322
s->dev_bm_writes = device->bm_writ_cnt;
3323
s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
3324
s->dev_lower_pending = atomic_read(&device->local_cnt);
3325
s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
3326
s->dev_exposed_data_uuid = device->ed_uuid;
3327
}
3328
3329
static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
3330
{
3331
if (cb->args[0]) {
3332
struct drbd_resource *resource =
3333
(struct drbd_resource *)cb->args[0];
3334
kref_put(&resource->kref, drbd_destroy_resource);
3335
}
3336
3337
return 0;
3338
}
3339
3340
int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
3341
return put_resource_in_arg0(cb, 7);
3342
}
3343
3344
static void device_to_info(struct device_info *, struct drbd_device *);
3345
3346
int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
3347
{
3348
struct nlattr *resource_filter;
3349
struct drbd_resource *resource;
3350
struct drbd_device *device;
3351
int minor, err, retcode;
3352
struct drbd_genlmsghdr *dh;
3353
struct device_info device_info;
3354
struct device_statistics device_statistics;
3355
struct idr *idr_to_search;
3356
3357
resource = (struct drbd_resource *)cb->args[0];
3358
if (!cb->args[0] && !cb->args[1]) {
3359
resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3360
if (resource_filter) {
3361
retcode = ERR_RES_NOT_KNOWN;
3362
resource = drbd_find_resource(nla_data(resource_filter));
3363
if (!resource)
3364
goto put_result;
3365
cb->args[0] = (long)resource;
3366
}
3367
}
3368
3369
rcu_read_lock();
3370
minor = cb->args[1];
3371
idr_to_search = resource ? &resource->devices : &drbd_devices;
3372
device = idr_get_next(idr_to_search, &minor);
3373
if (!device) {
3374
err = 0;
3375
goto out;
3376
}
3377
idr_for_each_entry_continue(idr_to_search, device, minor) {
3378
retcode = NO_ERROR;
3379
goto put_result; /* only one iteration */
3380
}
3381
err = 0;
3382
goto out; /* no more devices */
3383
3384
put_result:
3385
dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3386
cb->nlh->nlmsg_seq, &drbd_genl_family,
3387
NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
3388
err = -ENOMEM;
3389
if (!dh)
3390
goto out;
3391
dh->ret_code = retcode;
3392
dh->minor = -1U;
3393
if (retcode == NO_ERROR) {
3394
dh->minor = device->minor;
3395
err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
3396
if (err)
3397
goto out;
3398
if (get_ldev(device)) {
3399
struct disk_conf *disk_conf =
3400
rcu_dereference(device->ldev->disk_conf);
3401
3402
err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
3403
put_ldev(device);
3404
if (err)
3405
goto out;
3406
}
3407
device_to_info(&device_info, device);
3408
err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
3409
if (err)
3410
goto out;
3411
3412
device_to_statistics(&device_statistics, device);
3413
err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
3414
if (err)
3415
goto out;
3416
cb->args[1] = minor + 1;
3417
}
3418
genlmsg_end(skb, dh);
3419
err = 0;
3420
3421
out:
3422
rcu_read_unlock();
3423
if (err)
3424
return err;
3425
return skb->len;
3426
}
3427
3428
int drbd_adm_dump_connections_done(struct netlink_callback *cb)
3429
{
3430
return put_resource_in_arg0(cb, 6);
3431
}
3432
3433
enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
3434
3435
int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
3436
{
3437
struct nlattr *resource_filter;
3438
struct drbd_resource *resource = NULL, *next_resource;
3439
struct drbd_connection *connection;
3440
int err = 0, retcode;
3441
struct drbd_genlmsghdr *dh;
3442
struct connection_info connection_info;
3443
struct connection_statistics connection_statistics;
3444
3445
rcu_read_lock();
3446
resource = (struct drbd_resource *)cb->args[0];
3447
if (!cb->args[0]) {
3448
resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3449
if (resource_filter) {
3450
retcode = ERR_RES_NOT_KNOWN;
3451
resource = drbd_find_resource(nla_data(resource_filter));
3452
if (!resource)
3453
goto put_result;
3454
cb->args[0] = (long)resource;
3455
cb->args[1] = SINGLE_RESOURCE;
3456
}
3457
}
3458
if (!resource) {
3459
if (list_empty(&drbd_resources))
3460
goto out;
3461
resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
3462
kref_get(&resource->kref);
3463
cb->args[0] = (long)resource;
3464
cb->args[1] = ITERATE_RESOURCES;
3465
}
3466
3467
next_resource:
3468
rcu_read_unlock();
3469
mutex_lock(&resource->conf_update);
3470
rcu_read_lock();
3471
if (cb->args[2]) {
3472
for_each_connection_rcu(connection, resource)
3473
if (connection == (struct drbd_connection *)cb->args[2])
3474
goto found_connection;
3475
/* connection was probably deleted */
3476
goto no_more_connections;
3477
}
3478
connection = list_entry(&resource->connections, struct drbd_connection, connections);
3479
3480
found_connection:
3481
list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
3482
if (!has_net_conf(connection))
3483
continue;
3484
retcode = NO_ERROR;
3485
goto put_result; /* only one iteration */
3486
}
3487
3488
no_more_connections:
3489
if (cb->args[1] == ITERATE_RESOURCES) {
3490
for_each_resource_rcu(next_resource, &drbd_resources) {
3491
if (next_resource == resource)
3492
goto found_resource;
3493
}
3494
/* resource was probably deleted */
3495
}
3496
goto out;
3497
3498
found_resource:
3499
list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
3500
mutex_unlock(&resource->conf_update);
3501
kref_put(&resource->kref, drbd_destroy_resource);
3502
resource = next_resource;
3503
kref_get(&resource->kref);
3504
cb->args[0] = (long)resource;
3505
cb->args[2] = 0;
3506
goto next_resource;
3507
}
3508
goto out; /* no more resources */
3509
3510
put_result:
3511
dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3512
cb->nlh->nlmsg_seq, &drbd_genl_family,
3513
NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
3514
err = -ENOMEM;
3515
if (!dh)
3516
goto out;
3517
dh->ret_code = retcode;
3518
dh->minor = -1U;
3519
if (retcode == NO_ERROR) {
3520
struct net_conf *net_conf;
3521
3522
err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
3523
if (err)
3524
goto out;
3525
net_conf = rcu_dereference(connection->net_conf);
3526
if (net_conf) {
3527
err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
3528
if (err)
3529
goto out;
3530
}
3531
connection_to_info(&connection_info, connection);
3532
err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
3533
if (err)
3534
goto out;
3535
connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
3536
err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
3537
if (err)
3538
goto out;
3539
cb->args[2] = (long)connection;
3540
}
3541
genlmsg_end(skb, dh);
3542
err = 0;
3543
3544
out:
3545
rcu_read_unlock();
3546
if (resource)
3547
mutex_unlock(&resource->conf_update);
3548
if (err)
3549
return err;
3550
return skb->len;
3551
}
3552
3553
enum mdf_peer_flag {
3554
MDF_PEER_CONNECTED = 1 << 0,
3555
MDF_PEER_OUTDATED = 1 << 1,
3556
MDF_PEER_FENCING = 1 << 2,
3557
MDF_PEER_FULL_SYNC = 1 << 3,
3558
};
3559
3560
static void peer_device_to_statistics(struct peer_device_statistics *s,
3561
struct drbd_peer_device *peer_device)
3562
{
3563
struct drbd_device *device = peer_device->device;
3564
3565
memset(s, 0, sizeof(*s));
3566
s->peer_dev_received = device->recv_cnt;
3567
s->peer_dev_sent = device->send_cnt;
3568
s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
3569
atomic_read(&device->rs_pending_cnt);
3570
s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
3571
s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
3572
s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
3573
if (get_ldev(device)) {
3574
struct drbd_md *md = &device->ldev->md;
3575
3576
spin_lock_irq(&md->uuid_lock);
3577
s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
3578
spin_unlock_irq(&md->uuid_lock);
3579
s->peer_dev_flags =
3580
(drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
3581
MDF_PEER_CONNECTED : 0) +
3582
(drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
3583
!drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
3584
MDF_PEER_OUTDATED : 0) +
3585
/* FIXME: MDF_PEER_FENCING? */
3586
(drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
3587
MDF_PEER_FULL_SYNC : 0);
3588
put_ldev(device);
3589
}
3590
}
3591
3592
int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
3593
{
3594
return put_resource_in_arg0(cb, 9);
3595
}
3596
3597
int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
3598
{
3599
struct nlattr *resource_filter;
3600
struct drbd_resource *resource;
3601
struct drbd_device *device;
3602
struct drbd_peer_device *peer_device = NULL;
3603
int minor, err, retcode;
3604
struct drbd_genlmsghdr *dh;
3605
struct idr *idr_to_search;
3606
3607
resource = (struct drbd_resource *)cb->args[0];
3608
if (!cb->args[0] && !cb->args[1]) {
3609
resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
3610
if (resource_filter) {
3611
retcode = ERR_RES_NOT_KNOWN;
3612
resource = drbd_find_resource(nla_data(resource_filter));
3613
if (!resource)
3614
goto put_result;
3615
}
3616
cb->args[0] = (long)resource;
3617
}
3618
3619
rcu_read_lock();
3620
minor = cb->args[1];
3621
idr_to_search = resource ? &resource->devices : &drbd_devices;
3622
device = idr_find(idr_to_search, minor);
3623
if (!device) {
3624
next_device:
3625
minor++;
3626
cb->args[2] = 0;
3627
device = idr_get_next(idr_to_search, &minor);
3628
if (!device) {
3629
err = 0;
3630
goto out;
3631
}
3632
}
3633
if (cb->args[2]) {
3634
for_each_peer_device(peer_device, device)
3635
if (peer_device == (struct drbd_peer_device *)cb->args[2])
3636
goto found_peer_device;
3637
/* peer device was probably deleted */
3638
goto next_device;
3639
}
3640
/* Make peer_device point to the list head (not the first entry). */
3641
peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
3642
3643
found_peer_device:
3644
list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
3645
if (!has_net_conf(peer_device->connection))
3646
continue;
3647
retcode = NO_ERROR;
3648
goto put_result; /* only one iteration */
3649
}
3650
goto next_device;
3651
3652
put_result:
3653
dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3654
cb->nlh->nlmsg_seq, &drbd_genl_family,
3655
NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
3656
err = -ENOMEM;
3657
if (!dh)
3658
goto out;
3659
dh->ret_code = retcode;
3660
dh->minor = -1U;
3661
if (retcode == NO_ERROR) {
3662
struct peer_device_info peer_device_info;
3663
struct peer_device_statistics peer_device_statistics;
3664
3665
dh->minor = minor;
3666
err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
3667
if (err)
3668
goto out;
3669
peer_device_to_info(&peer_device_info, peer_device);
3670
err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
3671
if (err)
3672
goto out;
3673
peer_device_to_statistics(&peer_device_statistics, peer_device);
3674
err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
3675
if (err)
3676
goto out;
3677
cb->args[1] = minor;
3678
cb->args[2] = (long)peer_device;
3679
}
3680
genlmsg_end(skb, dh);
3681
err = 0;
3682
3683
out:
3684
rcu_read_unlock();
3685
if (err)
3686
return err;
3687
return skb->len;
3688
}
3689
/*
3690
* Return the connection of @resource if @resource has exactly one connection.
3691
*/
3692
static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
3693
{
3694
struct list_head *connections = &resource->connections;
3695
3696
if (list_empty(connections) || connections->next->next != connections)
3697
return NULL;
3698
return list_first_entry(&resource->connections, struct drbd_connection, connections);
3699
}
3700
3701
static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
3702
const struct sib_info *sib)
3703
{
3704
struct drbd_resource *resource = device->resource;
3705
struct state_info *si = NULL; /* for sizeof(si->member); */
3706
struct nlattr *nla;
3707
int got_ldev;
3708
int err = 0;
3709
int exclude_sensitive;
3710
3711
/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
3712
* to. So we better exclude_sensitive information.
3713
*
3714
* If sib == NULL, this is drbd_adm_get_status, executed synchronously
3715
* in the context of the requesting user process. Exclude sensitive
3716
* information, unless current has superuser.
3717
*
3718
* NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
3719
* relies on the current implementation of netlink_dump(), which
3720
* executes the dump callback successively from netlink_recvmsg(),
3721
* always in the context of the receiving process */
3722
exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
3723
3724
got_ldev = get_ldev(device);
3725
3726
/* We need to add connection name and volume number information still.
3727
* Minor number is in drbd_genlmsghdr. */
3728
if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
3729
goto nla_put_failure;
3730
3731
if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
3732
goto nla_put_failure;
3733
3734
rcu_read_lock();
3735
if (got_ldev) {
3736
struct disk_conf *disk_conf;
3737
3738
disk_conf = rcu_dereference(device->ldev->disk_conf);
3739
err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
3740
}
3741
if (!err) {
3742
struct net_conf *nc;
3743
3744
nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
3745
if (nc)
3746
err = net_conf_to_skb(skb, nc, exclude_sensitive);
3747
}
3748
rcu_read_unlock();
3749
if (err)
3750
goto nla_put_failure;
3751
3752
nla = nla_nest_start_noflag(skb, DRBD_NLA_STATE_INFO);
3753
if (!nla)
3754
goto nla_put_failure;
3755
if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
3756
nla_put_u32(skb, T_current_state, device->state.i) ||
3757
nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) ||
3758
nla_put_u64_0pad(skb, T_capacity, get_capacity(device->vdisk)) ||
3759
nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) ||
3760
nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) ||
3761
nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) ||
3762
nla_put_u64_0pad(skb, T_writ_cnt, device->writ_cnt) ||
3763
nla_put_u64_0pad(skb, T_al_writ_cnt, device->al_writ_cnt) ||
3764
nla_put_u64_0pad(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
3765
nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
3766
nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
3767
nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
3768
goto nla_put_failure;
3769
3770
if (got_ldev) {
3771
int err;
3772
3773
spin_lock_irq(&device->ldev->md.uuid_lock);
3774
err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
3775
spin_unlock_irq(&device->ldev->md.uuid_lock);
3776
3777
if (err)
3778
goto nla_put_failure;
3779
3780
if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
3781
nla_put_u64_0pad(skb, T_bits_total, drbd_bm_bits(device)) ||
3782
nla_put_u64_0pad(skb, T_bits_oos,
3783
drbd_bm_total_weight(device)))
3784
goto nla_put_failure;
3785
if (C_SYNC_SOURCE <= device->state.conn &&
3786
C_PAUSED_SYNC_T >= device->state.conn) {
3787
if (nla_put_u64_0pad(skb, T_bits_rs_total,
3788
device->rs_total) ||
3789
nla_put_u64_0pad(skb, T_bits_rs_failed,
3790
device->rs_failed))
3791
goto nla_put_failure;
3792
}
3793
}
3794
3795
if (sib) {
3796
switch(sib->sib_reason) {
3797
case SIB_SYNC_PROGRESS:
3798
case SIB_GET_STATUS_REPLY:
3799
break;
3800
case SIB_STATE_CHANGE:
3801
if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
3802
nla_put_u32(skb, T_new_state, sib->ns.i))
3803
goto nla_put_failure;
3804
break;
3805
case SIB_HELPER_POST:
3806
if (nla_put_u32(skb, T_helper_exit_code,
3807
sib->helper_exit_code))
3808
goto nla_put_failure;
3809
fallthrough;
3810
case SIB_HELPER_PRE:
3811
if (nla_put_string(skb, T_helper, sib->helper_name))
3812
goto nla_put_failure;
3813
break;
3814
}
3815
}
3816
nla_nest_end(skb, nla);
3817
3818
if (0)
3819
nla_put_failure:
3820
err = -EMSGSIZE;
3821
if (got_ldev)
3822
put_ldev(device);
3823
return err;
3824
}
3825
3826
int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3827
{
3828
struct drbd_config_context adm_ctx;
3829
enum drbd_ret_code retcode;
3830
int err;
3831
3832
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3833
if (!adm_ctx.reply_skb)
3834
return retcode;
3835
if (retcode != NO_ERROR)
3836
goto out;
3837
3838
err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3839
if (err) {
3840
nlmsg_free(adm_ctx.reply_skb);
3841
return err;
3842
}
3843
out:
3844
drbd_adm_finish(&adm_ctx, info, retcode);
3845
return 0;
3846
}
3847
3848
static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3849
{
3850
struct drbd_device *device;
3851
struct drbd_genlmsghdr *dh;
3852
struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3853
struct drbd_resource *resource = NULL;
3854
struct drbd_resource *tmp;
3855
unsigned volume = cb->args[1];
3856
3857
/* Open coded, deferred, iteration:
3858
* for_each_resource_safe(resource, tmp, &drbd_resources) {
3859
* connection = "first connection of resource or undefined";
3860
* idr_for_each_entry(&resource->devices, device, i) {
3861
* ...
3862
* }
3863
* }
3864
* where resource is cb->args[0];
3865
* and i is cb->args[1];
3866
*
3867
* cb->args[2] indicates if we shall loop over all resources,
3868
* or just dump all volumes of a single resource.
3869
*
3870
* This may miss entries inserted after this dump started,
3871
* or entries deleted before they are reached.
3872
*
3873
* We need to make sure the device won't disappear while
3874
* we are looking at it, and revalidate our iterators
3875
* on each iteration.
3876
*/
3877
3878
/* synchronize with conn_create()/drbd_destroy_connection() */
3879
rcu_read_lock();
3880
/* revalidate iterator position */
3881
for_each_resource_rcu(tmp, &drbd_resources) {
3882
if (pos == NULL) {
3883
/* first iteration */
3884
pos = tmp;
3885
resource = pos;
3886
break;
3887
}
3888
if (tmp == pos) {
3889
resource = pos;
3890
break;
3891
}
3892
}
3893
if (resource) {
3894
next_resource:
3895
device = idr_get_next(&resource->devices, &volume);
3896
if (!device) {
3897
/* No more volumes to dump on this resource.
3898
* Advance resource iterator. */
3899
pos = list_entry_rcu(resource->resources.next,
3900
struct drbd_resource, resources);
3901
/* Did we dump any volume of this resource yet? */
3902
if (volume != 0) {
3903
/* If we reached the end of the list,
3904
* or only a single resource dump was requested,
3905
* we are done. */
3906
if (&pos->resources == &drbd_resources || cb->args[2])
3907
goto out;
3908
volume = 0;
3909
resource = pos;
3910
goto next_resource;
3911
}
3912
}
3913
3914
dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3915
cb->nlh->nlmsg_seq, &drbd_genl_family,
3916
NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3917
if (!dh)
3918
goto out;
3919
3920
if (!device) {
3921
/* This is a connection without a single volume.
3922
* Suprisingly enough, it may have a network
3923
* configuration. */
3924
struct drbd_connection *connection;
3925
3926
dh->minor = -1U;
3927
dh->ret_code = NO_ERROR;
3928
connection = the_only_connection(resource);
3929
if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3930
goto cancel;
3931
if (connection) {
3932
struct net_conf *nc;
3933
3934
nc = rcu_dereference(connection->net_conf);
3935
if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3936
goto cancel;
3937
}
3938
goto done;
3939
}
3940
3941
D_ASSERT(device, device->vnr == volume);
3942
D_ASSERT(device, device->resource == resource);
3943
3944
dh->minor = device_to_minor(device);
3945
dh->ret_code = NO_ERROR;
3946
3947
if (nla_put_status_info(skb, device, NULL)) {
3948
cancel:
3949
genlmsg_cancel(skb, dh);
3950
goto out;
3951
}
3952
done:
3953
genlmsg_end(skb, dh);
3954
}
3955
3956
out:
3957
rcu_read_unlock();
3958
/* where to start the next iteration */
3959
cb->args[0] = (long)pos;
3960
cb->args[1] = (pos == resource) ? volume + 1 : 0;
3961
3962
/* No more resources/volumes/minors found results in an empty skb.
3963
* Which will terminate the dump. */
3964
return skb->len;
3965
}
3966
3967
/*
3968
* Request status of all resources, or of all volumes within a single resource.
3969
*
3970
* This is a dump, as the answer may not fit in a single reply skb otherwise.
3971
* Which means we cannot use the family->attrbuf or other such members, because
3972
* dump is NOT protected by the genl_lock(). During dump, we only have access
3973
* to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3974
*
3975
* Once things are setup properly, we call into get_one_status().
3976
*/
3977
int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3978
{
3979
const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3980
struct nlattr *nla;
3981
const char *resource_name;
3982
struct drbd_resource *resource;
3983
int maxtype;
3984
3985
/* Is this a followup call? */
3986
if (cb->args[0]) {
3987
/* ... of a single resource dump,
3988
* and the resource iterator has been advanced already? */
3989
if (cb->args[2] && cb->args[2] != cb->args[0])
3990
return 0; /* DONE. */
3991
goto dump;
3992
}
3993
3994
/* First call (from netlink_dump_start). We need to figure out
3995
* which resource(s) the user wants us to dump. */
3996
nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3997
nlmsg_attrlen(cb->nlh, hdrlen),
3998
DRBD_NLA_CFG_CONTEXT);
3999
4000
/* No explicit context given. Dump all. */
4001
if (!nla)
4002
goto dump;
4003
maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
4004
nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
4005
if (IS_ERR(nla))
4006
return PTR_ERR(nla);
4007
/* context given, but no name present? */
4008
if (!nla)
4009
return -EINVAL;
4010
resource_name = nla_data(nla);
4011
if (!*resource_name)
4012
return -ENODEV;
4013
resource = drbd_find_resource(resource_name);
4014
if (!resource)
4015
return -ENODEV;
4016
4017
kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
4018
4019
/* prime iterators, and set "filter" mode mark:
4020
* only dump this connection. */
4021
cb->args[0] = (long)resource;
4022
/* cb->args[1] = 0; passed in this way. */
4023
cb->args[2] = (long)resource;
4024
4025
dump:
4026
return get_one_status(skb, cb);
4027
}
4028
4029
int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
4030
{
4031
struct drbd_config_context adm_ctx;
4032
enum drbd_ret_code retcode;
4033
struct timeout_parms tp;
4034
int err;
4035
4036
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4037
if (!adm_ctx.reply_skb)
4038
return retcode;
4039
if (retcode != NO_ERROR)
4040
goto out;
4041
4042
tp.timeout_type =
4043
adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
4044
test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
4045
UT_DEFAULT;
4046
4047
err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
4048
if (err) {
4049
nlmsg_free(adm_ctx.reply_skb);
4050
return err;
4051
}
4052
out:
4053
drbd_adm_finish(&adm_ctx, info, retcode);
4054
return 0;
4055
}
4056
4057
int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
4058
{
4059
struct drbd_config_context adm_ctx;
4060
struct drbd_device *device;
4061
enum drbd_ret_code retcode;
4062
struct start_ov_parms parms;
4063
4064
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4065
if (!adm_ctx.reply_skb)
4066
return retcode;
4067
if (retcode != NO_ERROR)
4068
goto out;
4069
4070
device = adm_ctx.device;
4071
4072
/* resume from last known position, if possible */
4073
parms.ov_start_sector = device->ov_start_sector;
4074
parms.ov_stop_sector = ULLONG_MAX;
4075
if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
4076
int err = start_ov_parms_from_attrs(&parms, info);
4077
if (err) {
4078
retcode = ERR_MANDATORY_TAG;
4079
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4080
goto out;
4081
}
4082
}
4083
mutex_lock(&adm_ctx.resource->adm_mutex);
4084
4085
/* w_make_ov_request expects position to be aligned */
4086
device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
4087
device->ov_stop_sector = parms.ov_stop_sector;
4088
4089
/* If there is still bitmap IO pending, e.g. previous resync or verify
4090
* just being finished, wait for it before requesting a new resync. */
4091
drbd_suspend_io(device);
4092
wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
4093
retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
4094
drbd_resume_io(device);
4095
4096
mutex_unlock(&adm_ctx.resource->adm_mutex);
4097
out:
4098
drbd_adm_finish(&adm_ctx, info, retcode);
4099
return 0;
4100
}
4101
4102
4103
int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
4104
{
4105
struct drbd_config_context adm_ctx;
4106
struct drbd_device *device;
4107
enum drbd_ret_code retcode;
4108
int skip_initial_sync = 0;
4109
int err;
4110
struct new_c_uuid_parms args;
4111
4112
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4113
if (!adm_ctx.reply_skb)
4114
return retcode;
4115
if (retcode != NO_ERROR)
4116
goto out_nolock;
4117
4118
device = adm_ctx.device;
4119
memset(&args, 0, sizeof(args));
4120
if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
4121
err = new_c_uuid_parms_from_attrs(&args, info);
4122
if (err) {
4123
retcode = ERR_MANDATORY_TAG;
4124
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4125
goto out_nolock;
4126
}
4127
}
4128
4129
mutex_lock(&adm_ctx.resource->adm_mutex);
4130
mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
4131
4132
if (!get_ldev(device)) {
4133
retcode = ERR_NO_DISK;
4134
goto out;
4135
}
4136
4137
/* this is "skip initial sync", assume to be clean */
4138
if (device->state.conn == C_CONNECTED &&
4139
first_peer_device(device)->connection->agreed_pro_version >= 90 &&
4140
device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
4141
drbd_info(device, "Preparing to skip initial sync\n");
4142
skip_initial_sync = 1;
4143
} else if (device->state.conn != C_STANDALONE) {
4144
retcode = ERR_CONNECTED;
4145
goto out_dec;
4146
}
4147
4148
drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
4149
drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
4150
4151
if (args.clear_bm) {
4152
err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
4153
"clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL);
4154
if (err) {
4155
drbd_err(device, "Writing bitmap failed with %d\n", err);
4156
retcode = ERR_IO_MD_DISK;
4157
}
4158
if (skip_initial_sync) {
4159
drbd_send_uuids_skip_initial_sync(first_peer_device(device));
4160
_drbd_uuid_set(device, UI_BITMAP, 0);
4161
drbd_print_uuids(device, "cleared bitmap UUID");
4162
spin_lock_irq(&device->resource->req_lock);
4163
_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4164
CS_VERBOSE, NULL);
4165
spin_unlock_irq(&device->resource->req_lock);
4166
}
4167
}
4168
4169
drbd_md_sync(device);
4170
out_dec:
4171
put_ldev(device);
4172
out:
4173
mutex_unlock(device->state_mutex);
4174
mutex_unlock(&adm_ctx.resource->adm_mutex);
4175
out_nolock:
4176
drbd_adm_finish(&adm_ctx, info, retcode);
4177
return 0;
4178
}
4179
4180
static enum drbd_ret_code
4181
drbd_check_resource_name(struct drbd_config_context *adm_ctx)
4182
{
4183
const char *name = adm_ctx->resource_name;
4184
if (!name || !name[0]) {
4185
drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
4186
return ERR_MANDATORY_TAG;
4187
}
4188
/* if we want to use these in sysfs/configfs/debugfs some day,
4189
* we must not allow slashes */
4190
if (strchr(name, '/')) {
4191
drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
4192
return ERR_INVALID_REQUEST;
4193
}
4194
return NO_ERROR;
4195
}
4196
4197
static void resource_to_info(struct resource_info *info,
4198
struct drbd_resource *resource)
4199
{
4200
info->res_role = conn_highest_role(first_connection(resource));
4201
info->res_susp = resource->susp;
4202
info->res_susp_nod = resource->susp_nod;
4203
info->res_susp_fen = resource->susp_fen;
4204
}
4205
4206
int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
4207
{
4208
struct drbd_connection *connection;
4209
struct drbd_config_context adm_ctx;
4210
enum drbd_ret_code retcode;
4211
struct res_opts res_opts;
4212
int err;
4213
4214
retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
4215
if (!adm_ctx.reply_skb)
4216
return retcode;
4217
if (retcode != NO_ERROR)
4218
goto out;
4219
4220
set_res_opts_defaults(&res_opts);
4221
err = res_opts_from_attrs(&res_opts, info);
4222
if (err && err != -ENOMSG) {
4223
retcode = ERR_MANDATORY_TAG;
4224
drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
4225
goto out;
4226
}
4227
4228
retcode = drbd_check_resource_name(&adm_ctx);
4229
if (retcode != NO_ERROR)
4230
goto out;
4231
4232
if (adm_ctx.resource) {
4233
if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
4234
retcode = ERR_INVALID_REQUEST;
4235
drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
4236
}
4237
/* else: still NO_ERROR */
4238
goto out;
4239
}
4240
4241
/* not yet safe for genl_family.parallel_ops */
4242
mutex_lock(&resources_mutex);
4243
connection = conn_create(adm_ctx.resource_name, &res_opts);
4244
mutex_unlock(&resources_mutex);
4245
4246
if (connection) {
4247
struct resource_info resource_info;
4248
4249
mutex_lock(&notification_mutex);
4250
resource_to_info(&resource_info, connection->resource);
4251
notify_resource_state(NULL, 0, connection->resource,
4252
&resource_info, NOTIFY_CREATE);
4253
mutex_unlock(&notification_mutex);
4254
} else
4255
retcode = ERR_NOMEM;
4256
4257
out:
4258
drbd_adm_finish(&adm_ctx, info, retcode);
4259
return 0;
4260
}
4261
4262
static void device_to_info(struct device_info *info,
4263
struct drbd_device *device)
4264
{
4265
info->dev_disk_state = device->state.disk;
4266
}
4267
4268
4269
int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
4270
{
4271
struct drbd_config_context adm_ctx;
4272
struct drbd_genlmsghdr *dh = genl_info_userhdr(info);
4273
enum drbd_ret_code retcode;
4274
4275
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4276
if (!adm_ctx.reply_skb)
4277
return retcode;
4278
if (retcode != NO_ERROR)
4279
goto out;
4280
4281
if (dh->minor > MINORMASK) {
4282
drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
4283
retcode = ERR_INVALID_REQUEST;
4284
goto out;
4285
}
4286
if (adm_ctx.volume > DRBD_VOLUME_MAX) {
4287
drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
4288
retcode = ERR_INVALID_REQUEST;
4289
goto out;
4290
}
4291
4292
/* drbd_adm_prepare made sure already
4293
* that first_peer_device(device)->connection and device->vnr match the request. */
4294
if (adm_ctx.device) {
4295
if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
4296
retcode = ERR_MINOR_OR_VOLUME_EXISTS;
4297
/* else: still NO_ERROR */
4298
goto out;
4299
}
4300
4301
mutex_lock(&adm_ctx.resource->adm_mutex);
4302
retcode = drbd_create_device(&adm_ctx, dh->minor);
4303
if (retcode == NO_ERROR) {
4304
struct drbd_device *device;
4305
struct drbd_peer_device *peer_device;
4306
struct device_info info;
4307
unsigned int peer_devices = 0;
4308
enum drbd_notification_type flags;
4309
4310
device = minor_to_device(dh->minor);
4311
for_each_peer_device(peer_device, device) {
4312
if (!has_net_conf(peer_device->connection))
4313
continue;
4314
peer_devices++;
4315
}
4316
4317
device_to_info(&info, device);
4318
mutex_lock(&notification_mutex);
4319
flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
4320
notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
4321
for_each_peer_device(peer_device, device) {
4322
struct peer_device_info peer_device_info;
4323
4324
if (!has_net_conf(peer_device->connection))
4325
continue;
4326
peer_device_to_info(&peer_device_info, peer_device);
4327
flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
4328
notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
4329
NOTIFY_CREATE | flags);
4330
}
4331
mutex_unlock(&notification_mutex);
4332
}
4333
mutex_unlock(&adm_ctx.resource->adm_mutex);
4334
out:
4335
drbd_adm_finish(&adm_ctx, info, retcode);
4336
return 0;
4337
}
4338
4339
static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
4340
{
4341
struct drbd_peer_device *peer_device;
4342
4343
if (device->state.disk == D_DISKLESS &&
4344
/* no need to be device->state.conn == C_STANDALONE &&
4345
* we may want to delete a minor from a live replication group.
4346
*/
4347
device->state.role == R_SECONDARY) {
4348
struct drbd_connection *connection =
4349
first_connection(device->resource);
4350
4351
_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
4352
CS_VERBOSE + CS_WAIT_COMPLETE);
4353
4354
/* If the state engine hasn't stopped the sender thread yet, we
4355
* need to flush the sender work queue before generating the
4356
* DESTROY events here. */
4357
if (get_t_state(&connection->worker) == RUNNING)
4358
drbd_flush_workqueue(&connection->sender_work);
4359
4360
mutex_lock(&notification_mutex);
4361
for_each_peer_device(peer_device, device) {
4362
if (!has_net_conf(peer_device->connection))
4363
continue;
4364
notify_peer_device_state(NULL, 0, peer_device, NULL,
4365
NOTIFY_DESTROY | NOTIFY_CONTINUES);
4366
}
4367
notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
4368
mutex_unlock(&notification_mutex);
4369
4370
drbd_delete_device(device);
4371
return NO_ERROR;
4372
} else
4373
return ERR_MINOR_CONFIGURED;
4374
}
4375
4376
int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
4377
{
4378
struct drbd_config_context adm_ctx;
4379
enum drbd_ret_code retcode;
4380
4381
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
4382
if (!adm_ctx.reply_skb)
4383
return retcode;
4384
if (retcode != NO_ERROR)
4385
goto out;
4386
4387
mutex_lock(&adm_ctx.resource->adm_mutex);
4388
retcode = adm_del_minor(adm_ctx.device);
4389
mutex_unlock(&adm_ctx.resource->adm_mutex);
4390
out:
4391
drbd_adm_finish(&adm_ctx, info, retcode);
4392
return 0;
4393
}
4394
4395
static int adm_del_resource(struct drbd_resource *resource)
4396
{
4397
struct drbd_connection *connection;
4398
4399
for_each_connection(connection, resource) {
4400
if (connection->cstate > C_STANDALONE)
4401
return ERR_NET_CONFIGURED;
4402
}
4403
if (!idr_is_empty(&resource->devices))
4404
return ERR_RES_IN_USE;
4405
4406
/* The state engine has stopped the sender thread, so we don't
4407
* need to flush the sender work queue before generating the
4408
* DESTROY event here. */
4409
mutex_lock(&notification_mutex);
4410
notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
4411
mutex_unlock(&notification_mutex);
4412
4413
mutex_lock(&resources_mutex);
4414
list_del_rcu(&resource->resources);
4415
mutex_unlock(&resources_mutex);
4416
/* Make sure all threads have actually stopped: state handling only
4417
* does drbd_thread_stop_nowait(). */
4418
list_for_each_entry(connection, &resource->connections, connections)
4419
drbd_thread_stop(&connection->worker);
4420
synchronize_rcu();
4421
drbd_free_resource(resource);
4422
return NO_ERROR;
4423
}
4424
4425
int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
4426
{
4427
struct drbd_config_context adm_ctx;
4428
struct drbd_resource *resource;
4429
struct drbd_connection *connection;
4430
struct drbd_device *device;
4431
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
4432
unsigned i;
4433
4434
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4435
if (!adm_ctx.reply_skb)
4436
return retcode;
4437
if (retcode != NO_ERROR)
4438
goto finish;
4439
4440
resource = adm_ctx.resource;
4441
mutex_lock(&resource->adm_mutex);
4442
/* demote */
4443
for_each_connection(connection, resource) {
4444
struct drbd_peer_device *peer_device;
4445
4446
idr_for_each_entry(&connection->peer_devices, peer_device, i) {
4447
retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
4448
if (retcode < SS_SUCCESS) {
4449
drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
4450
goto out;
4451
}
4452
}
4453
4454
retcode = conn_try_disconnect(connection, 0);
4455
if (retcode < SS_SUCCESS) {
4456
drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
4457
goto out;
4458
}
4459
}
4460
4461
/* detach */
4462
idr_for_each_entry(&resource->devices, device, i) {
4463
retcode = adm_detach(device, 0);
4464
if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
4465
drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
4466
goto out;
4467
}
4468
}
4469
4470
/* delete volumes */
4471
idr_for_each_entry(&resource->devices, device, i) {
4472
retcode = adm_del_minor(device);
4473
if (retcode != NO_ERROR) {
4474
/* "can not happen" */
4475
drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
4476
goto out;
4477
}
4478
}
4479
4480
retcode = adm_del_resource(resource);
4481
out:
4482
mutex_unlock(&resource->adm_mutex);
4483
finish:
4484
drbd_adm_finish(&adm_ctx, info, retcode);
4485
return 0;
4486
}
4487
4488
int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
4489
{
4490
struct drbd_config_context adm_ctx;
4491
struct drbd_resource *resource;
4492
enum drbd_ret_code retcode;
4493
4494
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
4495
if (!adm_ctx.reply_skb)
4496
return retcode;
4497
if (retcode != NO_ERROR)
4498
goto finish;
4499
resource = adm_ctx.resource;
4500
4501
mutex_lock(&resource->adm_mutex);
4502
retcode = adm_del_resource(resource);
4503
mutex_unlock(&resource->adm_mutex);
4504
finish:
4505
drbd_adm_finish(&adm_ctx, info, retcode);
4506
return 0;
4507
}
4508
4509
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
4510
{
4511
struct sk_buff *msg;
4512
struct drbd_genlmsghdr *d_out;
4513
unsigned seq;
4514
int err = -ENOMEM;
4515
4516
seq = atomic_inc_return(&drbd_genl_seq);
4517
msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4518
if (!msg)
4519
goto failed;
4520
4521
err = -EMSGSIZE;
4522
d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
4523
if (!d_out) /* cannot happen, but anyways. */
4524
goto nla_put_failure;
4525
d_out->minor = device_to_minor(device);
4526
d_out->ret_code = NO_ERROR;
4527
4528
if (nla_put_status_info(msg, device, sib))
4529
goto nla_put_failure;
4530
genlmsg_end(msg, d_out);
4531
err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
4532
/* msg has been consumed or freed in netlink_broadcast() */
4533
if (err && err != -ESRCH)
4534
goto failed;
4535
4536
return;
4537
4538
nla_put_failure:
4539
nlmsg_free(msg);
4540
failed:
4541
drbd_err(device, "Error %d while broadcasting event. "
4542
"Event seq:%u sib_reason:%u\n",
4543
err, seq, sib->sib_reason);
4544
}
4545
4546
static int nla_put_notification_header(struct sk_buff *msg,
4547
enum drbd_notification_type type)
4548
{
4549
struct drbd_notification_header nh = {
4550
.nh_type = type,
4551
};
4552
4553
return drbd_notification_header_to_skb(msg, &nh, true);
4554
}
4555
4556
int notify_resource_state(struct sk_buff *skb,
4557
unsigned int seq,
4558
struct drbd_resource *resource,
4559
struct resource_info *resource_info,
4560
enum drbd_notification_type type)
4561
{
4562
struct resource_statistics resource_statistics;
4563
struct drbd_genlmsghdr *dh;
4564
bool multicast = false;
4565
int err;
4566
4567
if (!skb) {
4568
seq = atomic_inc_return(&notify_genl_seq);
4569
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4570
err = -ENOMEM;
4571
if (!skb)
4572
goto failed;
4573
multicast = true;
4574
}
4575
4576
err = -EMSGSIZE;
4577
dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
4578
if (!dh)
4579
goto nla_put_failure;
4580
dh->minor = -1U;
4581
dh->ret_code = NO_ERROR;
4582
if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
4583
nla_put_notification_header(skb, type) ||
4584
((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4585
resource_info_to_skb(skb, resource_info, true)))
4586
goto nla_put_failure;
4587
resource_statistics.res_stat_write_ordering = resource->write_ordering;
4588
err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
4589
if (err)
4590
goto nla_put_failure;
4591
genlmsg_end(skb, dh);
4592
if (multicast) {
4593
err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4594
/* skb has been consumed or freed in netlink_broadcast() */
4595
if (err && err != -ESRCH)
4596
goto failed;
4597
}
4598
return 0;
4599
4600
nla_put_failure:
4601
nlmsg_free(skb);
4602
failed:
4603
drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4604
err, seq);
4605
return err;
4606
}
4607
4608
int notify_device_state(struct sk_buff *skb,
4609
unsigned int seq,
4610
struct drbd_device *device,
4611
struct device_info *device_info,
4612
enum drbd_notification_type type)
4613
{
4614
struct device_statistics device_statistics;
4615
struct drbd_genlmsghdr *dh;
4616
bool multicast = false;
4617
int err;
4618
4619
if (!skb) {
4620
seq = atomic_inc_return(&notify_genl_seq);
4621
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4622
err = -ENOMEM;
4623
if (!skb)
4624
goto failed;
4625
multicast = true;
4626
}
4627
4628
err = -EMSGSIZE;
4629
dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
4630
if (!dh)
4631
goto nla_put_failure;
4632
dh->minor = device->minor;
4633
dh->ret_code = NO_ERROR;
4634
if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
4635
nla_put_notification_header(skb, type) ||
4636
((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4637
device_info_to_skb(skb, device_info, true)))
4638
goto nla_put_failure;
4639
device_to_statistics(&device_statistics, device);
4640
device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
4641
genlmsg_end(skb, dh);
4642
if (multicast) {
4643
err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4644
/* skb has been consumed or freed in netlink_broadcast() */
4645
if (err && err != -ESRCH)
4646
goto failed;
4647
}
4648
return 0;
4649
4650
nla_put_failure:
4651
nlmsg_free(skb);
4652
failed:
4653
drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
4654
err, seq);
4655
return err;
4656
}
4657
4658
int notify_connection_state(struct sk_buff *skb,
4659
unsigned int seq,
4660
struct drbd_connection *connection,
4661
struct connection_info *connection_info,
4662
enum drbd_notification_type type)
4663
{
4664
struct connection_statistics connection_statistics;
4665
struct drbd_genlmsghdr *dh;
4666
bool multicast = false;
4667
int err;
4668
4669
if (!skb) {
4670
seq = atomic_inc_return(&notify_genl_seq);
4671
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4672
err = -ENOMEM;
4673
if (!skb)
4674
goto failed;
4675
multicast = true;
4676
}
4677
4678
err = -EMSGSIZE;
4679
dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
4680
if (!dh)
4681
goto nla_put_failure;
4682
dh->minor = -1U;
4683
dh->ret_code = NO_ERROR;
4684
if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
4685
nla_put_notification_header(skb, type) ||
4686
((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4687
connection_info_to_skb(skb, connection_info, true)))
4688
goto nla_put_failure;
4689
connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
4690
connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
4691
genlmsg_end(skb, dh);
4692
if (multicast) {
4693
err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4694
/* skb has been consumed or freed in netlink_broadcast() */
4695
if (err && err != -ESRCH)
4696
goto failed;
4697
}
4698
return 0;
4699
4700
nla_put_failure:
4701
nlmsg_free(skb);
4702
failed:
4703
drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
4704
err, seq);
4705
return err;
4706
}
4707
4708
int notify_peer_device_state(struct sk_buff *skb,
4709
unsigned int seq,
4710
struct drbd_peer_device *peer_device,
4711
struct peer_device_info *peer_device_info,
4712
enum drbd_notification_type type)
4713
{
4714
struct peer_device_statistics peer_device_statistics;
4715
struct drbd_resource *resource = peer_device->device->resource;
4716
struct drbd_genlmsghdr *dh;
4717
bool multicast = false;
4718
int err;
4719
4720
if (!skb) {
4721
seq = atomic_inc_return(&notify_genl_seq);
4722
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4723
err = -ENOMEM;
4724
if (!skb)
4725
goto failed;
4726
multicast = true;
4727
}
4728
4729
err = -EMSGSIZE;
4730
dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
4731
if (!dh)
4732
goto nla_put_failure;
4733
dh->minor = -1U;
4734
dh->ret_code = NO_ERROR;
4735
if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
4736
nla_put_notification_header(skb, type) ||
4737
((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
4738
peer_device_info_to_skb(skb, peer_device_info, true)))
4739
goto nla_put_failure;
4740
peer_device_to_statistics(&peer_device_statistics, peer_device);
4741
peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
4742
genlmsg_end(skb, dh);
4743
if (multicast) {
4744
err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4745
/* skb has been consumed or freed in netlink_broadcast() */
4746
if (err && err != -ESRCH)
4747
goto failed;
4748
}
4749
return 0;
4750
4751
nla_put_failure:
4752
nlmsg_free(skb);
4753
failed:
4754
drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
4755
err, seq);
4756
return err;
4757
}
4758
4759
void notify_helper(enum drbd_notification_type type,
4760
struct drbd_device *device, struct drbd_connection *connection,
4761
const char *name, int status)
4762
{
4763
struct drbd_resource *resource = device ? device->resource : connection->resource;
4764
struct drbd_helper_info helper_info;
4765
unsigned int seq = atomic_inc_return(&notify_genl_seq);
4766
struct sk_buff *skb = NULL;
4767
struct drbd_genlmsghdr *dh;
4768
int err;
4769
4770
strscpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
4771
helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
4772
helper_info.helper_status = status;
4773
4774
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
4775
err = -ENOMEM;
4776
if (!skb)
4777
goto fail;
4778
4779
err = -EMSGSIZE;
4780
dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
4781
if (!dh)
4782
goto fail;
4783
dh->minor = device ? device->minor : -1;
4784
dh->ret_code = NO_ERROR;
4785
mutex_lock(&notification_mutex);
4786
if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
4787
nla_put_notification_header(skb, type) ||
4788
drbd_helper_info_to_skb(skb, &helper_info, true))
4789
goto unlock_fail;
4790
genlmsg_end(skb, dh);
4791
err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
4792
skb = NULL;
4793
/* skb has been consumed or freed in netlink_broadcast() */
4794
if (err && err != -ESRCH)
4795
goto unlock_fail;
4796
mutex_unlock(&notification_mutex);
4797
return;
4798
4799
unlock_fail:
4800
mutex_unlock(&notification_mutex);
4801
fail:
4802
nlmsg_free(skb);
4803
drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
4804
err, seq);
4805
}
4806
4807
static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
4808
{
4809
struct drbd_genlmsghdr *dh;
4810
int err;
4811
4812
err = -EMSGSIZE;
4813
dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
4814
if (!dh)
4815
goto nla_put_failure;
4816
dh->minor = -1U;
4817
dh->ret_code = NO_ERROR;
4818
if (nla_put_notification_header(skb, NOTIFY_EXISTS))
4819
goto nla_put_failure;
4820
genlmsg_end(skb, dh);
4821
return 0;
4822
4823
nla_put_failure:
4824
nlmsg_free(skb);
4825
pr_err("Error %d sending event. Event seq:%u\n", err, seq);
4826
return err;
4827
}
4828
4829
static void free_state_changes(struct list_head *list)
4830
{
4831
while (!list_empty(list)) {
4832
struct drbd_state_change *state_change =
4833
list_first_entry(list, struct drbd_state_change, list);
4834
list_del(&state_change->list);
4835
forget_state_change(state_change);
4836
}
4837
}
4838
4839
static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
4840
{
4841
return 1 +
4842
state_change->n_connections +
4843
state_change->n_devices +
4844
state_change->n_devices * state_change->n_connections;
4845
}
4846
4847
static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
4848
{
4849
struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
4850
unsigned int seq = cb->args[2];
4851
unsigned int n;
4852
enum drbd_notification_type flags = 0;
4853
int err = 0;
4854
4855
/* There is no need for taking notification_mutex here: it doesn't
4856
matter if the initial state events mix with later state chage
4857
events; we can always tell the events apart by the NOTIFY_EXISTS
4858
flag. */
4859
4860
cb->args[5]--;
4861
if (cb->args[5] == 1) {
4862
err = notify_initial_state_done(skb, seq);
4863
goto out;
4864
}
4865
n = cb->args[4]++;
4866
if (cb->args[4] < cb->args[3])
4867
flags |= NOTIFY_CONTINUES;
4868
if (n < 1) {
4869
err = notify_resource_state_change(skb, seq, state_change->resource,
4870
NOTIFY_EXISTS | flags);
4871
goto next;
4872
}
4873
n--;
4874
if (n < state_change->n_connections) {
4875
err = notify_connection_state_change(skb, seq, &state_change->connections[n],
4876
NOTIFY_EXISTS | flags);
4877
goto next;
4878
}
4879
n -= state_change->n_connections;
4880
if (n < state_change->n_devices) {
4881
err = notify_device_state_change(skb, seq, &state_change->devices[n],
4882
NOTIFY_EXISTS | flags);
4883
goto next;
4884
}
4885
n -= state_change->n_devices;
4886
if (n < state_change->n_devices * state_change->n_connections) {
4887
err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
4888
NOTIFY_EXISTS | flags);
4889
goto next;
4890
}
4891
4892
next:
4893
if (cb->args[4] == cb->args[3]) {
4894
struct drbd_state_change *next_state_change =
4895
list_entry(state_change->list.next,
4896
struct drbd_state_change, list);
4897
cb->args[0] = (long)next_state_change;
4898
cb->args[3] = notifications_for_state_change(next_state_change);
4899
cb->args[4] = 0;
4900
}
4901
out:
4902
if (err)
4903
return err;
4904
else
4905
return skb->len;
4906
}
4907
4908
int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
4909
{
4910
struct drbd_resource *resource;
4911
LIST_HEAD(head);
4912
4913
if (cb->args[5] >= 1) {
4914
if (cb->args[5] > 1)
4915
return get_initial_state(skb, cb);
4916
if (cb->args[0]) {
4917
struct drbd_state_change *state_change =
4918
(struct drbd_state_change *)cb->args[0];
4919
4920
/* connect list to head */
4921
list_add(&head, &state_change->list);
4922
free_state_changes(&head);
4923
}
4924
return 0;
4925
}
4926
4927
cb->args[5] = 2; /* number of iterations */
4928
mutex_lock(&resources_mutex);
4929
for_each_resource(resource, &drbd_resources) {
4930
struct drbd_state_change *state_change;
4931
4932
state_change = remember_old_state(resource, GFP_KERNEL);
4933
if (!state_change) {
4934
if (!list_empty(&head))
4935
free_state_changes(&head);
4936
mutex_unlock(&resources_mutex);
4937
return -ENOMEM;
4938
}
4939
copy_old_to_new_state_change(state_change);
4940
list_add_tail(&state_change->list, &head);
4941
cb->args[5] += notifications_for_state_change(state_change);
4942
}
4943
mutex_unlock(&resources_mutex);
4944
4945
if (!list_empty(&head)) {
4946
struct drbd_state_change *state_change =
4947
list_entry(head.next, struct drbd_state_change, list);
4948
cb->args[0] = (long)state_change;
4949
cb->args[3] = notifications_for_state_change(state_change);
4950
list_del(&head); /* detach list from head */
4951
}
4952
4953
cb->args[2] = cb->nlh->nlmsg_seq;
4954
return get_initial_state(skb, cb);
4955
}
4956
4957