Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2010, Microsoft Corporation.
4 *
5 * Authors:
6 * Haiyang Zhang <haiyangz@microsoft.com>
7 * Hank Janssen <hjanssen@microsoft.com>
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/sysctl.h>
16#include <linux/reboot.h>
17#include <linux/hyperv.h>
18#include <linux/clockchips.h>
19#include <linux/ptp_clock_kernel.h>
20#include <asm/mshyperv.h>
21
22#include "hyperv_vmbus.h"
23
24#define SD_MAJOR 3
25#define SD_MINOR 0
26#define SD_MINOR_1 1
27#define SD_MINOR_2 2
28#define SD_VERSION_3_1 (SD_MAJOR << 16 | SD_MINOR_1)
29#define SD_VERSION_3_2 (SD_MAJOR << 16 | SD_MINOR_2)
30#define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
31
32#define SD_MAJOR_1 1
33#define SD_VERSION_1 (SD_MAJOR_1 << 16 | SD_MINOR)
34
35#define TS_MAJOR 4
36#define TS_MINOR 0
37#define TS_VERSION (TS_MAJOR << 16 | TS_MINOR)
38
39#define TS_MAJOR_1 1
40#define TS_VERSION_1 (TS_MAJOR_1 << 16 | TS_MINOR)
41
42#define TS_MAJOR_3 3
43#define TS_VERSION_3 (TS_MAJOR_3 << 16 | TS_MINOR)
44
45#define HB_MAJOR 3
46#define HB_MINOR 0
47#define HB_VERSION (HB_MAJOR << 16 | HB_MINOR)
48
49#define HB_MAJOR_1 1
50#define HB_VERSION_1 (HB_MAJOR_1 << 16 | HB_MINOR)
51
52static int sd_srv_version;
53static int ts_srv_version;
54static int hb_srv_version;
55
56#define SD_VER_COUNT 4
57static const int sd_versions[] = {
58 SD_VERSION_3_2,
59 SD_VERSION_3_1,
60 SD_VERSION,
61 SD_VERSION_1
62};
63
64#define TS_VER_COUNT 3
65static const int ts_versions[] = {
66 TS_VERSION,
67 TS_VERSION_3,
68 TS_VERSION_1
69};
70
71#define HB_VER_COUNT 2
72static const int hb_versions[] = {
73 HB_VERSION,
74 HB_VERSION_1
75};
76
77#define FW_VER_COUNT 2
78static const int fw_versions[] = {
79 UTIL_FW_VERSION,
80 UTIL_WS2K8_FW_VERSION
81};
82
83/*
84 * Send the "hibernate" udev event in a thread context.
85 */
86struct hibernate_work_context {
87 struct work_struct work;
88 struct hv_device *dev;
89};
90
91static struct hibernate_work_context hibernate_context;
92static bool hibernation_supported;
93
94static void send_hibernate_uevent(struct work_struct *work)
95{
96 char *uevent_env[2] = { "EVENT=hibernate", NULL };
97 struct hibernate_work_context *ctx;
98
99 ctx = container_of(work, struct hibernate_work_context, work);
100
101 kobject_uevent_env(&ctx->dev->device.kobj, KOBJ_CHANGE, uevent_env);
102
103 pr_info("Sent hibernation uevent\n");
104}
105
106static int hv_shutdown_init(struct hv_util_service *srv)
107{
108 struct vmbus_channel *channel = srv->channel;
109
110 INIT_WORK(&hibernate_context.work, send_hibernate_uevent);
111 hibernate_context.dev = channel->device_obj;
112
113 hibernation_supported = hv_is_hibernation_supported();
114
115 return 0;
116}
117
118static void shutdown_onchannelcallback(void *context);
119static struct hv_util_service util_shutdown = {
120 .util_cb = shutdown_onchannelcallback,
121 .util_init = hv_shutdown_init,
122};
123
124static int hv_timesync_init(struct hv_util_service *srv);
125static int hv_timesync_pre_suspend(void);
126static void hv_timesync_deinit(void);
127
128static void timesync_onchannelcallback(void *context);
129static struct hv_util_service util_timesynch = {
130 .util_cb = timesync_onchannelcallback,
131 .util_init = hv_timesync_init,
132 .util_pre_suspend = hv_timesync_pre_suspend,
133 .util_deinit = hv_timesync_deinit,
134};
135
136static void heartbeat_onchannelcallback(void *context);
137static struct hv_util_service util_heartbeat = {
138 .util_cb = heartbeat_onchannelcallback,
139};
140
141static struct hv_util_service util_kvp = {
142 .util_cb = hv_kvp_onchannelcallback,
143 .util_init = hv_kvp_init,
144 .util_pre_suspend = hv_kvp_pre_suspend,
145 .util_pre_resume = hv_kvp_pre_resume,
146 .util_deinit = hv_kvp_deinit,
147};
148
149static struct hv_util_service util_vss = {
150 .util_cb = hv_vss_onchannelcallback,
151 .util_init = hv_vss_init,
152 .util_pre_suspend = hv_vss_pre_suspend,
153 .util_pre_resume = hv_vss_pre_resume,
154 .util_deinit = hv_vss_deinit,
155};
156
157static struct hv_util_service util_fcopy = {
158 .util_cb = hv_fcopy_onchannelcallback,
159 .util_init = hv_fcopy_init,
160 .util_pre_suspend = hv_fcopy_pre_suspend,
161 .util_pre_resume = hv_fcopy_pre_resume,
162 .util_deinit = hv_fcopy_deinit,
163};
164
165static void perform_shutdown(struct work_struct *dummy)
166{
167 orderly_poweroff(true);
168}
169
170static void perform_restart(struct work_struct *dummy)
171{
172 orderly_reboot();
173}
174
175/*
176 * Perform the shutdown operation in a thread context.
177 */
178static DECLARE_WORK(shutdown_work, perform_shutdown);
179
180/*
181 * Perform the restart operation in a thread context.
182 */
183static DECLARE_WORK(restart_work, perform_restart);
184
185static void shutdown_onchannelcallback(void *context)
186{
187 struct vmbus_channel *channel = context;
188 struct work_struct *work = NULL;
189 u32 recvlen;
190 u64 requestid;
191 u8 *shut_txf_buf = util_shutdown.recv_buffer;
192
193 struct shutdown_msg_data *shutdown_msg;
194
195 struct icmsg_hdr *icmsghdrp;
196
197 if (vmbus_recvpacket(channel, shut_txf_buf, HV_HYP_PAGE_SIZE, &recvlen, &requestid)) {
198 pr_err_ratelimited("Shutdown request received. Could not read into shut txf buf\n");
199 return;
200 }
201
202 if (!recvlen)
203 return;
204
205 /* Ensure recvlen is big enough to read header data */
206 if (recvlen < ICMSG_HDR) {
207 pr_err_ratelimited("Shutdown request received. Packet length too small: %d\n",
208 recvlen);
209 return;
210 }
211
212 icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[sizeof(struct vmbuspipe_hdr)];
213
214 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
215 if (vmbus_prep_negotiate_resp(icmsghdrp,
216 shut_txf_buf, recvlen,
217 fw_versions, FW_VER_COUNT,
218 sd_versions, SD_VER_COUNT,
219 NULL, &sd_srv_version)) {
220 pr_info("Shutdown IC version %d.%d\n",
221 sd_srv_version >> 16,
222 sd_srv_version & 0xFFFF);
223 }
224 } else if (icmsghdrp->icmsgtype == ICMSGTYPE_SHUTDOWN) {
225 /* Ensure recvlen is big enough to contain shutdown_msg_data struct */
226 if (recvlen < ICMSG_HDR + sizeof(struct shutdown_msg_data)) {
227 pr_err_ratelimited("Invalid shutdown msg data. Packet length too small: %u\n",
228 recvlen);
229 return;
230 }
231
232 shutdown_msg = (struct shutdown_msg_data *)&shut_txf_buf[ICMSG_HDR];
233
234 /*
235 * shutdown_msg->flags can be 0(shut down), 2(reboot),
236 * or 4(hibernate). It may bitwise-OR 1, which means
237 * performing the request by force. Linux always tries
238 * to perform the request by force.
239 */
240 switch (shutdown_msg->flags) {
241 case 0:
242 case 1:
243 icmsghdrp->status = HV_S_OK;
244 work = &shutdown_work;
245 pr_info("Shutdown request received - graceful shutdown initiated\n");
246 break;
247 case 2:
248 case 3:
249 icmsghdrp->status = HV_S_OK;
250 work = &restart_work;
251 pr_info("Restart request received - graceful restart initiated\n");
252 break;
253 case 4:
254 case 5:
255 pr_info("Hibernation request received\n");
256 icmsghdrp->status = hibernation_supported ?
257 HV_S_OK : HV_E_FAIL;
258 if (hibernation_supported)
259 work = &hibernate_context.work;
260 break;
261 default:
262 icmsghdrp->status = HV_E_FAIL;
263 pr_info("Shutdown request received - Invalid request\n");
264 break;
265 }
266 } else {
267 icmsghdrp->status = HV_E_FAIL;
268 pr_err_ratelimited("Shutdown request received. Invalid msg type: %d\n",
269 icmsghdrp->icmsgtype);
270 }
271
272 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
273 | ICMSGHDRFLAG_RESPONSE;
274
275 vmbus_sendpacket(channel, shut_txf_buf,
276 recvlen, requestid,
277 VM_PKT_DATA_INBAND, 0);
278
279 if (work)
280 schedule_work(work);
281}
282
283/*
284 * Set the host time in a process context.
285 */
286static struct work_struct adj_time_work;
287
288/*
289 * The last time sample, received from the host. PTP device responds to
290 * requests by using this data and the current partition-wide time reference
291 * count.
292 */
293static struct {
294 u64 host_time;
295 u64 ref_time;
296 spinlock_t lock;
297} host_ts;
298
299static bool timesync_implicit;
300
301module_param(timesync_implicit, bool, 0644);
302MODULE_PARM_DESC(timesync_implicit, "If set treat SAMPLE as SYNC when clock is behind");
303
304static inline u64 reftime_to_ns(u64 reftime)
305{
306 return (reftime - WLTIMEDELTA) * 100;
307}
308
309/*
310 * Hard coded threshold for host timesync delay: 600 seconds
311 */
312static const u64 HOST_TIMESYNC_DELAY_THRESH = 600 * (u64)NSEC_PER_SEC;
313
314static int hv_get_adj_host_time(struct timespec64 *ts)
315{
316 u64 newtime, reftime, timediff_adj;
317 unsigned long flags;
318 int ret = 0;
319
320 spin_lock_irqsave(&host_ts.lock, flags);
321 reftime = hv_read_reference_counter();
322
323 /*
324 * We need to let the caller know that last update from host
325 * is older than the max allowable threshold. clock_gettime()
326 * and PTP ioctl do not have a documented error that we could
327 * return for this specific case. Use ESTALE to report this.
328 */
329 timediff_adj = reftime - host_ts.ref_time;
330 if (timediff_adj * 100 > HOST_TIMESYNC_DELAY_THRESH) {
331 pr_warn_once("TIMESYNC IC: Stale time stamp, %llu nsecs old\n",
332 (timediff_adj * 100));
333 ret = -ESTALE;
334 }
335
336 newtime = host_ts.host_time + timediff_adj;
337 *ts = ns_to_timespec64(reftime_to_ns(newtime));
338 spin_unlock_irqrestore(&host_ts.lock, flags);
339
340 return ret;
341}
342
343static void hv_set_host_time(struct work_struct *work)
344{
345
346 struct timespec64 ts;
347
348 if (!hv_get_adj_host_time(&ts))
349 do_settimeofday64(&ts);
350}
351
352/*
353 * Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume.
354 * Force a sync if the guest is behind.
355 */
356static inline bool hv_implicit_sync(u64 host_time)
357{
358 struct timespec64 new_ts;
359 struct timespec64 threshold_ts;
360
361 new_ts = ns_to_timespec64(reftime_to_ns(host_time));
362 ktime_get_real_ts64(&threshold_ts);
363
364 threshold_ts.tv_sec += 5;
365
366 /*
367 * If guest behind the host by 5 or more seconds.
368 */
369 if (timespec64_compare(&new_ts, &threshold_ts) >= 0)
370 return true;
371
372 return false;
373}
374
375/*
376 * Synchronize time with host after reboot, restore, etc.
377 *
378 * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
379 * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
380 * message after the timesync channel is opened. Since the hv_utils module is
381 * loaded after hv_vmbus, the first message is usually missed. This bit is
382 * considered a hard request to discipline the clock.
383 *
384 * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
385 * typically used as a hint to the guest. The guest is under no obligation
386 * to discipline the clock.
387 */
388static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
389{
390 unsigned long flags;
391 u64 cur_reftime;
392
393 /*
394 * Save the adjusted time sample from the host and the snapshot
395 * of the current system time.
396 */
397 spin_lock_irqsave(&host_ts.lock, flags);
398
399 cur_reftime = hv_read_reference_counter();
400 host_ts.host_time = hosttime;
401 host_ts.ref_time = cur_reftime;
402
403 /*
404 * TimeSync v4 messages contain reference time (guest's Hyper-V
405 * clocksource read when the time sample was generated), we can
406 * improve the precision by adding the delta between now and the
407 * time of generation. For older protocols we set
408 * reftime == cur_reftime on call.
409 */
410 host_ts.host_time += (cur_reftime - reftime);
411
412 spin_unlock_irqrestore(&host_ts.lock, flags);
413
414 /* Schedule work to do do_settimeofday64() */
415 if ((adj_flags & ICTIMESYNCFLAG_SYNC) ||
416 (timesync_implicit && hv_implicit_sync(host_ts.host_time)))
417 schedule_work(&adj_time_work);
418}
419
420/*
421 * Time Sync Channel message handler.
422 */
423static void timesync_onchannelcallback(void *context)
424{
425 struct vmbus_channel *channel = context;
426 u32 recvlen;
427 u64 requestid;
428 struct icmsg_hdr *icmsghdrp;
429 struct ictimesync_data *timedatap;
430 struct ictimesync_ref_data *refdata;
431 u8 *time_txf_buf = util_timesynch.recv_buffer;
432
433 /*
434 * Drain the ring buffer and use the last packet to update
435 * host_ts
436 */
437 while (1) {
438 int ret = vmbus_recvpacket(channel, time_txf_buf,
439 HV_HYP_PAGE_SIZE, &recvlen,
440 &requestid);
441 if (ret) {
442 pr_err_ratelimited("TimeSync IC pkt recv failed (Err: %d)\n",
443 ret);
444 break;
445 }
446
447 if (!recvlen)
448 break;
449
450 /* Ensure recvlen is big enough to read header data */
451 if (recvlen < ICMSG_HDR) {
452 pr_err_ratelimited("Timesync request received. Packet length too small: %d\n",
453 recvlen);
454 break;
455 }
456
457 icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[
458 sizeof(struct vmbuspipe_hdr)];
459
460 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
461 if (vmbus_prep_negotiate_resp(icmsghdrp,
462 time_txf_buf, recvlen,
463 fw_versions, FW_VER_COUNT,
464 ts_versions, TS_VER_COUNT,
465 NULL, &ts_srv_version)) {
466 pr_info("TimeSync IC version %d.%d\n",
467 ts_srv_version >> 16,
468 ts_srv_version & 0xFFFF);
469 }
470 } else if (icmsghdrp->icmsgtype == ICMSGTYPE_TIMESYNC) {
471 if (ts_srv_version > TS_VERSION_3) {
472 /* Ensure recvlen is big enough to read ictimesync_ref_data */
473 if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_ref_data)) {
474 pr_err_ratelimited("Invalid ictimesync ref data. Length too small: %u\n",
475 recvlen);
476 break;
477 }
478 refdata = (struct ictimesync_ref_data *)&time_txf_buf[ICMSG_HDR];
479
480 adj_guesttime(refdata->parenttime,
481 refdata->vmreferencetime,
482 refdata->flags);
483 } else {
484 /* Ensure recvlen is big enough to read ictimesync_data */
485 if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_data)) {
486 pr_err_ratelimited("Invalid ictimesync data. Length too small: %u\n",
487 recvlen);
488 break;
489 }
490 timedatap = (struct ictimesync_data *)&time_txf_buf[ICMSG_HDR];
491
492 adj_guesttime(timedatap->parenttime,
493 hv_read_reference_counter(),
494 timedatap->flags);
495 }
496 } else {
497 icmsghdrp->status = HV_E_FAIL;
498 pr_err_ratelimited("Timesync request received. Invalid msg type: %d\n",
499 icmsghdrp->icmsgtype);
500 }
501
502 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
503 | ICMSGHDRFLAG_RESPONSE;
504
505 vmbus_sendpacket(channel, time_txf_buf,
506 recvlen, requestid,
507 VM_PKT_DATA_INBAND, 0);
508 }
509}
510
511/*
512 * Heartbeat functionality.
513 * Every two seconds, Hyper-V send us a heartbeat request message.
514 * we respond to this message, and Hyper-V knows we are alive.
515 */
516static void heartbeat_onchannelcallback(void *context)
517{
518 struct vmbus_channel *channel = context;
519 u32 recvlen;
520 u64 requestid;
521 struct icmsg_hdr *icmsghdrp;
522 struct heartbeat_msg_data *heartbeat_msg;
523 u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
524
525 while (1) {
526
527 if (vmbus_recvpacket(channel, hbeat_txf_buf, HV_HYP_PAGE_SIZE,
528 &recvlen, &requestid)) {
529 pr_err_ratelimited("Heartbeat request received. Could not read into hbeat txf buf\n");
530 return;
531 }
532
533 if (!recvlen)
534 break;
535
536 /* Ensure recvlen is big enough to read header data */
537 if (recvlen < ICMSG_HDR) {
538 pr_err_ratelimited("Heartbeat request received. Packet length too small: %d\n",
539 recvlen);
540 break;
541 }
542
543 icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[
544 sizeof(struct vmbuspipe_hdr)];
545
546 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
547 if (vmbus_prep_negotiate_resp(icmsghdrp,
548 hbeat_txf_buf, recvlen,
549 fw_versions, FW_VER_COUNT,
550 hb_versions, HB_VER_COUNT,
551 NULL, &hb_srv_version)) {
552
553 pr_info("Heartbeat IC version %d.%d\n",
554 hb_srv_version >> 16,
555 hb_srv_version & 0xFFFF);
556 }
557 } else if (icmsghdrp->icmsgtype == ICMSGTYPE_HEARTBEAT) {
558 /*
559 * Ensure recvlen is big enough to read seq_num. Reserved area is not
560 * included in the check as the host may not fill it up entirely
561 */
562 if (recvlen < ICMSG_HDR + sizeof(u64)) {
563 pr_err_ratelimited("Invalid heartbeat msg data. Length too small: %u\n",
564 recvlen);
565 break;
566 }
567 heartbeat_msg = (struct heartbeat_msg_data *)&hbeat_txf_buf[ICMSG_HDR];
568
569 heartbeat_msg->seq_num += 1;
570 } else {
571 icmsghdrp->status = HV_E_FAIL;
572 pr_err_ratelimited("Heartbeat request received. Invalid msg type: %d\n",
573 icmsghdrp->icmsgtype);
574 }
575
576 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
577 | ICMSGHDRFLAG_RESPONSE;
578
579 vmbus_sendpacket(channel, hbeat_txf_buf,
580 recvlen, requestid,
581 VM_PKT_DATA_INBAND, 0);
582 }
583}
584
585#define HV_UTIL_RING_SEND_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
586#define HV_UTIL_RING_RECV_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
587
588static int util_probe(struct hv_device *dev,
589 const struct hv_vmbus_device_id *dev_id)
590{
591 struct hv_util_service *srv =
592 (struct hv_util_service *)dev_id->driver_data;
593 int ret;
594
595 srv->recv_buffer = kmalloc(HV_HYP_PAGE_SIZE * 4, GFP_KERNEL);
596 if (!srv->recv_buffer)
597 return -ENOMEM;
598 srv->channel = dev->channel;
599 if (srv->util_init) {
600 ret = srv->util_init(srv);
601 if (ret) {
602 ret = -ENODEV;
603 goto error1;
604 }
605 }
606
607 /*
608 * The set of services managed by the util driver are not performance
609 * critical and do not need batched reading. Furthermore, some services
610 * such as KVP can only handle one message from the host at a time.
611 * Turn off batched reading for all util drivers before we open the
612 * channel.
613 */
614 set_channel_read_mode(dev->channel, HV_CALL_DIRECT);
615
616 hv_set_drvdata(dev, srv);
617
618 ret = vmbus_open(dev->channel, HV_UTIL_RING_SEND_SIZE,
619 HV_UTIL_RING_RECV_SIZE, NULL, 0, srv->util_cb,
620 dev->channel);
621 if (ret)
622 goto error;
623
624 return 0;
625
626error:
627 if (srv->util_deinit)
628 srv->util_deinit();
629error1:
630 kfree(srv->recv_buffer);
631 return ret;
632}
633
634static void util_remove(struct hv_device *dev)
635{
636 struct hv_util_service *srv = hv_get_drvdata(dev);
637
638 if (srv->util_deinit)
639 srv->util_deinit();
640 vmbus_close(dev->channel);
641 kfree(srv->recv_buffer);
642}
643
644/*
645 * When we're in util_suspend(), all the userspace processes have been frozen
646 * (refer to hibernate() -> freeze_processes()). The userspace is thawed only
647 * after the whole resume procedure, including util_resume(), finishes.
648 */
649static int util_suspend(struct hv_device *dev)
650{
651 struct hv_util_service *srv = hv_get_drvdata(dev);
652 int ret = 0;
653
654 if (srv->util_pre_suspend) {
655 ret = srv->util_pre_suspend();
656 if (ret)
657 return ret;
658 }
659
660 vmbus_close(dev->channel);
661
662 return 0;
663}
664
665static int util_resume(struct hv_device *dev)
666{
667 struct hv_util_service *srv = hv_get_drvdata(dev);
668 int ret = 0;
669
670 if (srv->util_pre_resume) {
671 ret = srv->util_pre_resume();
672 if (ret)
673 return ret;
674 }
675
676 ret = vmbus_open(dev->channel, HV_UTIL_RING_SEND_SIZE,
677 HV_UTIL_RING_RECV_SIZE, NULL, 0, srv->util_cb,
678 dev->channel);
679 return ret;
680}
681
682static const struct hv_vmbus_device_id id_table[] = {
683 /* Shutdown guid */
684 { HV_SHUTDOWN_GUID,
685 .driver_data = (unsigned long)&util_shutdown
686 },
687 /* Time synch guid */
688 { HV_TS_GUID,
689 .driver_data = (unsigned long)&util_timesynch
690 },
691 /* Heartbeat guid */
692 { HV_HEART_BEAT_GUID,
693 .driver_data = (unsigned long)&util_heartbeat
694 },
695 /* KVP guid */
696 { HV_KVP_GUID,
697 .driver_data = (unsigned long)&util_kvp
698 },
699 /* VSS GUID */
700 { HV_VSS_GUID,
701 .driver_data = (unsigned long)&util_vss
702 },
703 /* File copy GUID */
704 { HV_FCOPY_GUID,
705 .driver_data = (unsigned long)&util_fcopy
706 },
707 { },
708};
709
710MODULE_DEVICE_TABLE(vmbus, id_table);
711
712/* The one and only one */
713static struct hv_driver util_drv = {
714 .name = "hv_utils",
715 .id_table = id_table,
716 .probe = util_probe,
717 .remove = util_remove,
718 .suspend = util_suspend,
719 .resume = util_resume,
720 .driver = {
721 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
722 },
723};
724
725static int hv_ptp_enable(struct ptp_clock_info *info,
726 struct ptp_clock_request *request, int on)
727{
728 return -EOPNOTSUPP;
729}
730
731static int hv_ptp_settime(struct ptp_clock_info *p, const struct timespec64 *ts)
732{
733 return -EOPNOTSUPP;
734}
735
736static int hv_ptp_adjfine(struct ptp_clock_info *ptp, long delta)
737{
738 return -EOPNOTSUPP;
739}
740static int hv_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
741{
742 return -EOPNOTSUPP;
743}
744
745static int hv_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts)
746{
747 return hv_get_adj_host_time(ts);
748}
749
750static struct ptp_clock_info ptp_hyperv_info = {
751 .name = "hyperv",
752 .enable = hv_ptp_enable,
753 .adjtime = hv_ptp_adjtime,
754 .adjfine = hv_ptp_adjfine,
755 .gettime64 = hv_ptp_gettime,
756 .settime64 = hv_ptp_settime,
757 .owner = THIS_MODULE,
758};
759
760static struct ptp_clock *hv_ptp_clock;
761
762static int hv_timesync_init(struct hv_util_service *srv)
763{
764 spin_lock_init(&host_ts.lock);
765
766 INIT_WORK(&adj_time_work, hv_set_host_time);
767
768 /*
769 * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is
770 * disabled but the driver is still useful without the PTP device
771 * as it still handles the ICTIMESYNCFLAG_SYNC case.
772 */
773 hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
774 if (IS_ERR_OR_NULL(hv_ptp_clock)) {
775 pr_err("cannot register PTP clock: %d\n",
776 PTR_ERR_OR_ZERO(hv_ptp_clock));
777 hv_ptp_clock = NULL;
778 }
779
780 return 0;
781}
782
783static void hv_timesync_cancel_work(void)
784{
785 cancel_work_sync(&adj_time_work);
786}
787
788static int hv_timesync_pre_suspend(void)
789{
790 hv_timesync_cancel_work();
791 return 0;
792}
793
794static void hv_timesync_deinit(void)
795{
796 if (hv_ptp_clock)
797 ptp_clock_unregister(hv_ptp_clock);
798
799 hv_timesync_cancel_work();
800}
801
802static int __init init_hyperv_utils(void)
803{
804 pr_info("Registering HyperV Utility Driver\n");
805
806 return vmbus_driver_register(&util_drv);
807}
808
809static void exit_hyperv_utils(void)
810{
811 pr_info("De-Registered HyperV Utility Driver\n");
812
813 vmbus_driver_unregister(&util_drv);
814}
815
816module_init(init_hyperv_utils);
817module_exit(exit_hyperv_utils);
818
819MODULE_DESCRIPTION("Hyper-V Utilities");
820MODULE_LICENSE("GPL");
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2010, Microsoft Corporation.
4 *
5 * Authors:
6 * Haiyang Zhang <haiyangz@microsoft.com>
7 * Hank Janssen <hjanssen@microsoft.com>
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/sysctl.h>
16#include <linux/reboot.h>
17#include <linux/hyperv.h>
18#include <linux/clockchips.h>
19#include <linux/ptp_clock_kernel.h>
20#include <clocksource/hyperv_timer.h>
21#include <asm/mshyperv.h>
22
23#include "hyperv_vmbus.h"
24
25#define SD_MAJOR 3
26#define SD_MINOR 0
27#define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
28
29#define SD_MAJOR_1 1
30#define SD_VERSION_1 (SD_MAJOR_1 << 16 | SD_MINOR)
31
32#define TS_MAJOR 4
33#define TS_MINOR 0
34#define TS_VERSION (TS_MAJOR << 16 | TS_MINOR)
35
36#define TS_MAJOR_1 1
37#define TS_VERSION_1 (TS_MAJOR_1 << 16 | TS_MINOR)
38
39#define TS_MAJOR_3 3
40#define TS_VERSION_3 (TS_MAJOR_3 << 16 | TS_MINOR)
41
42#define HB_MAJOR 3
43#define HB_MINOR 0
44#define HB_VERSION (HB_MAJOR << 16 | HB_MINOR)
45
46#define HB_MAJOR_1 1
47#define HB_VERSION_1 (HB_MAJOR_1 << 16 | HB_MINOR)
48
49static int sd_srv_version;
50static int ts_srv_version;
51static int hb_srv_version;
52
53#define SD_VER_COUNT 2
54static const int sd_versions[] = {
55 SD_VERSION,
56 SD_VERSION_1
57};
58
59#define TS_VER_COUNT 3
60static const int ts_versions[] = {
61 TS_VERSION,
62 TS_VERSION_3,
63 TS_VERSION_1
64};
65
66#define HB_VER_COUNT 2
67static const int hb_versions[] = {
68 HB_VERSION,
69 HB_VERSION_1
70};
71
72#define FW_VER_COUNT 2
73static const int fw_versions[] = {
74 UTIL_FW_VERSION,
75 UTIL_WS2K8_FW_VERSION
76};
77
78static void shutdown_onchannelcallback(void *context);
79static struct hv_util_service util_shutdown = {
80 .util_cb = shutdown_onchannelcallback,
81};
82
83static int hv_timesync_init(struct hv_util_service *srv);
84static void hv_timesync_deinit(void);
85
86static void timesync_onchannelcallback(void *context);
87static struct hv_util_service util_timesynch = {
88 .util_cb = timesync_onchannelcallback,
89 .util_init = hv_timesync_init,
90 .util_deinit = hv_timesync_deinit,
91};
92
93static void heartbeat_onchannelcallback(void *context);
94static struct hv_util_service util_heartbeat = {
95 .util_cb = heartbeat_onchannelcallback,
96};
97
98static struct hv_util_service util_kvp = {
99 .util_cb = hv_kvp_onchannelcallback,
100 .util_init = hv_kvp_init,
101 .util_deinit = hv_kvp_deinit,
102};
103
104static struct hv_util_service util_vss = {
105 .util_cb = hv_vss_onchannelcallback,
106 .util_init = hv_vss_init,
107 .util_deinit = hv_vss_deinit,
108};
109
110static struct hv_util_service util_fcopy = {
111 .util_cb = hv_fcopy_onchannelcallback,
112 .util_init = hv_fcopy_init,
113 .util_deinit = hv_fcopy_deinit,
114};
115
116static void perform_shutdown(struct work_struct *dummy)
117{
118 orderly_poweroff(true);
119}
120
121/*
122 * Perform the shutdown operation in a thread context.
123 */
124static DECLARE_WORK(shutdown_work, perform_shutdown);
125
126static void shutdown_onchannelcallback(void *context)
127{
128 struct vmbus_channel *channel = context;
129 u32 recvlen;
130 u64 requestid;
131 bool execute_shutdown = false;
132 u8 *shut_txf_buf = util_shutdown.recv_buffer;
133
134 struct shutdown_msg_data *shutdown_msg;
135
136 struct icmsg_hdr *icmsghdrp;
137
138 vmbus_recvpacket(channel, shut_txf_buf,
139 PAGE_SIZE, &recvlen, &requestid);
140
141 if (recvlen > 0) {
142 icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[
143 sizeof(struct vmbuspipe_hdr)];
144
145 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
146 if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf,
147 fw_versions, FW_VER_COUNT,
148 sd_versions, SD_VER_COUNT,
149 NULL, &sd_srv_version)) {
150 pr_info("Shutdown IC version %d.%d\n",
151 sd_srv_version >> 16,
152 sd_srv_version & 0xFFFF);
153 }
154 } else {
155 shutdown_msg =
156 (struct shutdown_msg_data *)&shut_txf_buf[
157 sizeof(struct vmbuspipe_hdr) +
158 sizeof(struct icmsg_hdr)];
159
160 switch (shutdown_msg->flags) {
161 case 0:
162 case 1:
163 icmsghdrp->status = HV_S_OK;
164 execute_shutdown = true;
165
166 pr_info("Shutdown request received -"
167 " graceful shutdown initiated\n");
168 break;
169 default:
170 icmsghdrp->status = HV_E_FAIL;
171 execute_shutdown = false;
172
173 pr_info("Shutdown request received -"
174 " Invalid request\n");
175 break;
176 }
177 }
178
179 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
180 | ICMSGHDRFLAG_RESPONSE;
181
182 vmbus_sendpacket(channel, shut_txf_buf,
183 recvlen, requestid,
184 VM_PKT_DATA_INBAND, 0);
185 }
186
187 if (execute_shutdown == true)
188 schedule_work(&shutdown_work);
189}
190
191/*
192 * Set the host time in a process context.
193 */
194static struct work_struct adj_time_work;
195
196/*
197 * The last time sample, received from the host. PTP device responds to
198 * requests by using this data and the current partition-wide time reference
199 * count.
200 */
201static struct {
202 u64 host_time;
203 u64 ref_time;
204 spinlock_t lock;
205} host_ts;
206
207static struct timespec64 hv_get_adj_host_time(void)
208{
209 struct timespec64 ts;
210 u64 newtime, reftime;
211 unsigned long flags;
212
213 spin_lock_irqsave(&host_ts.lock, flags);
214 reftime = hyperv_cs->read(hyperv_cs);
215 newtime = host_ts.host_time + (reftime - host_ts.ref_time);
216 ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
217 spin_unlock_irqrestore(&host_ts.lock, flags);
218
219 return ts;
220}
221
222static void hv_set_host_time(struct work_struct *work)
223{
224 struct timespec64 ts = hv_get_adj_host_time();
225
226 do_settimeofday64(&ts);
227}
228
229/*
230 * Synchronize time with host after reboot, restore, etc.
231 *
232 * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
233 * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
234 * message after the timesync channel is opened. Since the hv_utils module is
235 * loaded after hv_vmbus, the first message is usually missed. This bit is
236 * considered a hard request to discipline the clock.
237 *
238 * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
239 * typically used as a hint to the guest. The guest is under no obligation
240 * to discipline the clock.
241 */
242static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
243{
244 unsigned long flags;
245 u64 cur_reftime;
246
247 /*
248 * Save the adjusted time sample from the host and the snapshot
249 * of the current system time.
250 */
251 spin_lock_irqsave(&host_ts.lock, flags);
252
253 cur_reftime = hyperv_cs->read(hyperv_cs);
254 host_ts.host_time = hosttime;
255 host_ts.ref_time = cur_reftime;
256
257 /*
258 * TimeSync v4 messages contain reference time (guest's Hyper-V
259 * clocksource read when the time sample was generated), we can
260 * improve the precision by adding the delta between now and the
261 * time of generation. For older protocols we set
262 * reftime == cur_reftime on call.
263 */
264 host_ts.host_time += (cur_reftime - reftime);
265
266 spin_unlock_irqrestore(&host_ts.lock, flags);
267
268 /* Schedule work to do do_settimeofday64() */
269 if (adj_flags & ICTIMESYNCFLAG_SYNC)
270 schedule_work(&adj_time_work);
271}
272
273/*
274 * Time Sync Channel message handler.
275 */
276static void timesync_onchannelcallback(void *context)
277{
278 struct vmbus_channel *channel = context;
279 u32 recvlen;
280 u64 requestid;
281 struct icmsg_hdr *icmsghdrp;
282 struct ictimesync_data *timedatap;
283 struct ictimesync_ref_data *refdata;
284 u8 *time_txf_buf = util_timesynch.recv_buffer;
285
286 vmbus_recvpacket(channel, time_txf_buf,
287 PAGE_SIZE, &recvlen, &requestid);
288
289 if (recvlen > 0) {
290 icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[
291 sizeof(struct vmbuspipe_hdr)];
292
293 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
294 if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf,
295 fw_versions, FW_VER_COUNT,
296 ts_versions, TS_VER_COUNT,
297 NULL, &ts_srv_version)) {
298 pr_info("TimeSync IC version %d.%d\n",
299 ts_srv_version >> 16,
300 ts_srv_version & 0xFFFF);
301 }
302 } else {
303 if (ts_srv_version > TS_VERSION_3) {
304 refdata = (struct ictimesync_ref_data *)
305 &time_txf_buf[
306 sizeof(struct vmbuspipe_hdr) +
307 sizeof(struct icmsg_hdr)];
308
309 adj_guesttime(refdata->parenttime,
310 refdata->vmreferencetime,
311 refdata->flags);
312 } else {
313 timedatap = (struct ictimesync_data *)
314 &time_txf_buf[
315 sizeof(struct vmbuspipe_hdr) +
316 sizeof(struct icmsg_hdr)];
317 adj_guesttime(timedatap->parenttime,
318 hyperv_cs->read(hyperv_cs),
319 timedatap->flags);
320 }
321 }
322
323 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
324 | ICMSGHDRFLAG_RESPONSE;
325
326 vmbus_sendpacket(channel, time_txf_buf,
327 recvlen, requestid,
328 VM_PKT_DATA_INBAND, 0);
329 }
330}
331
332/*
333 * Heartbeat functionality.
334 * Every two seconds, Hyper-V send us a heartbeat request message.
335 * we respond to this message, and Hyper-V knows we are alive.
336 */
337static void heartbeat_onchannelcallback(void *context)
338{
339 struct vmbus_channel *channel = context;
340 u32 recvlen;
341 u64 requestid;
342 struct icmsg_hdr *icmsghdrp;
343 struct heartbeat_msg_data *heartbeat_msg;
344 u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
345
346 while (1) {
347
348 vmbus_recvpacket(channel, hbeat_txf_buf,
349 PAGE_SIZE, &recvlen, &requestid);
350
351 if (!recvlen)
352 break;
353
354 icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[
355 sizeof(struct vmbuspipe_hdr)];
356
357 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
358 if (vmbus_prep_negotiate_resp(icmsghdrp,
359 hbeat_txf_buf,
360 fw_versions, FW_VER_COUNT,
361 hb_versions, HB_VER_COUNT,
362 NULL, &hb_srv_version)) {
363
364 pr_info("Heartbeat IC version %d.%d\n",
365 hb_srv_version >> 16,
366 hb_srv_version & 0xFFFF);
367 }
368 } else {
369 heartbeat_msg =
370 (struct heartbeat_msg_data *)&hbeat_txf_buf[
371 sizeof(struct vmbuspipe_hdr) +
372 sizeof(struct icmsg_hdr)];
373
374 heartbeat_msg->seq_num += 1;
375 }
376
377 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
378 | ICMSGHDRFLAG_RESPONSE;
379
380 vmbus_sendpacket(channel, hbeat_txf_buf,
381 recvlen, requestid,
382 VM_PKT_DATA_INBAND, 0);
383 }
384}
385
386static int util_probe(struct hv_device *dev,
387 const struct hv_vmbus_device_id *dev_id)
388{
389 struct hv_util_service *srv =
390 (struct hv_util_service *)dev_id->driver_data;
391 int ret;
392
393 srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
394 if (!srv->recv_buffer)
395 return -ENOMEM;
396 srv->channel = dev->channel;
397 if (srv->util_init) {
398 ret = srv->util_init(srv);
399 if (ret) {
400 ret = -ENODEV;
401 goto error1;
402 }
403 }
404
405 /*
406 * The set of services managed by the util driver are not performance
407 * critical and do not need batched reading. Furthermore, some services
408 * such as KVP can only handle one message from the host at a time.
409 * Turn off batched reading for all util drivers before we open the
410 * channel.
411 */
412 set_channel_read_mode(dev->channel, HV_CALL_DIRECT);
413
414 hv_set_drvdata(dev, srv);
415
416 ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
417 srv->util_cb, dev->channel);
418 if (ret)
419 goto error;
420
421 return 0;
422
423error:
424 if (srv->util_deinit)
425 srv->util_deinit();
426error1:
427 kfree(srv->recv_buffer);
428 return ret;
429}
430
431static int util_remove(struct hv_device *dev)
432{
433 struct hv_util_service *srv = hv_get_drvdata(dev);
434
435 if (srv->util_deinit)
436 srv->util_deinit();
437 vmbus_close(dev->channel);
438 kfree(srv->recv_buffer);
439
440 return 0;
441}
442
443static const struct hv_vmbus_device_id id_table[] = {
444 /* Shutdown guid */
445 { HV_SHUTDOWN_GUID,
446 .driver_data = (unsigned long)&util_shutdown
447 },
448 /* Time synch guid */
449 { HV_TS_GUID,
450 .driver_data = (unsigned long)&util_timesynch
451 },
452 /* Heartbeat guid */
453 { HV_HEART_BEAT_GUID,
454 .driver_data = (unsigned long)&util_heartbeat
455 },
456 /* KVP guid */
457 { HV_KVP_GUID,
458 .driver_data = (unsigned long)&util_kvp
459 },
460 /* VSS GUID */
461 { HV_VSS_GUID,
462 .driver_data = (unsigned long)&util_vss
463 },
464 /* File copy GUID */
465 { HV_FCOPY_GUID,
466 .driver_data = (unsigned long)&util_fcopy
467 },
468 { },
469};
470
471MODULE_DEVICE_TABLE(vmbus, id_table);
472
473/* The one and only one */
474static struct hv_driver util_drv = {
475 .name = "hv_utils",
476 .id_table = id_table,
477 .probe = util_probe,
478 .remove = util_remove,
479 .driver = {
480 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
481 },
482};
483
484static int hv_ptp_enable(struct ptp_clock_info *info,
485 struct ptp_clock_request *request, int on)
486{
487 return -EOPNOTSUPP;
488}
489
490static int hv_ptp_settime(struct ptp_clock_info *p, const struct timespec64 *ts)
491{
492 return -EOPNOTSUPP;
493}
494
495static int hv_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
496{
497 return -EOPNOTSUPP;
498}
499static int hv_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
500{
501 return -EOPNOTSUPP;
502}
503
504static int hv_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts)
505{
506 *ts = hv_get_adj_host_time();
507
508 return 0;
509}
510
511static struct ptp_clock_info ptp_hyperv_info = {
512 .name = "hyperv",
513 .enable = hv_ptp_enable,
514 .adjtime = hv_ptp_adjtime,
515 .adjfreq = hv_ptp_adjfreq,
516 .gettime64 = hv_ptp_gettime,
517 .settime64 = hv_ptp_settime,
518 .owner = THIS_MODULE,
519};
520
521static struct ptp_clock *hv_ptp_clock;
522
523static int hv_timesync_init(struct hv_util_service *srv)
524{
525 /* TimeSync requires Hyper-V clocksource. */
526 if (!hyperv_cs)
527 return -ENODEV;
528
529 spin_lock_init(&host_ts.lock);
530
531 INIT_WORK(&adj_time_work, hv_set_host_time);
532
533 /*
534 * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is
535 * disabled but the driver is still useful without the PTP device
536 * as it still handles the ICTIMESYNCFLAG_SYNC case.
537 */
538 hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
539 if (IS_ERR_OR_NULL(hv_ptp_clock)) {
540 pr_err("cannot register PTP clock: %ld\n",
541 PTR_ERR(hv_ptp_clock));
542 hv_ptp_clock = NULL;
543 }
544
545 return 0;
546}
547
548static void hv_timesync_deinit(void)
549{
550 if (hv_ptp_clock)
551 ptp_clock_unregister(hv_ptp_clock);
552 cancel_work_sync(&adj_time_work);
553}
554
555static int __init init_hyperv_utils(void)
556{
557 pr_info("Registering HyperV Utility Driver\n");
558
559 return vmbus_driver_register(&util_drv);
560}
561
562static void exit_hyperv_utils(void)
563{
564 pr_info("De-Registered HyperV Utility Driver\n");
565
566 vmbus_driver_unregister(&util_drv);
567}
568
569module_init(init_hyperv_utils);
570module_exit(exit_hyperv_utils);
571
572MODULE_DESCRIPTION("Hyper-V Utilities");
573MODULE_LICENSE("GPL");