Loading...
1/*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/module.h>
25#include <linux/random.h>
26#include <linux/slab.h>
27#include <linux/timer.h>
28#include <linux/vmalloc.h>
29#include <asm/asm-offsets.h>
30#include <asm/lowcore.h>
31#include <asm/etr.h>
32#include <asm/pgtable.h>
33#include <asm/gmap.h>
34#include <asm/nmi.h>
35#include <asm/switch_to.h>
36#include <asm/isc.h>
37#include <asm/sclp.h>
38#include "kvm-s390.h"
39#include "gaccess.h"
40
41#define KMSG_COMPONENT "kvm-s390"
42#undef pr_fmt
43#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44
45#define CREATE_TRACE_POINTS
46#include "trace.h"
47#include "trace-s390.h"
48
49#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50#define LOCAL_IRQS 32
51#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
52 (KVM_MAX_VCPUS + LOCAL_IRQS))
53
54#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55
56struct kvm_stats_debugfs_item debugfs_entries[] = {
57 { "userspace_handled", VCPU_STAT(exit_userspace) },
58 { "exit_null", VCPU_STAT(exit_null) },
59 { "exit_validity", VCPU_STAT(exit_validity) },
60 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
61 { "exit_external_request", VCPU_STAT(exit_external_request) },
62 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
63 { "exit_instruction", VCPU_STAT(exit_instruction) },
64 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
65 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
66 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
67 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
68 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
69 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
70 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
71 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
72 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
73 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
74 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
75 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
76 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
77 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
78 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
79 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
80 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
81 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
82 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
83 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
84 { "instruction_spx", VCPU_STAT(instruction_spx) },
85 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
86 { "instruction_stap", VCPU_STAT(instruction_stap) },
87 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
88 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
89 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
90 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
91 { "instruction_essa", VCPU_STAT(instruction_essa) },
92 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
93 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
94 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
95 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
96 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
97 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
98 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
99 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
100 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
101 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
102 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
103 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
104 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
105 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
106 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
107 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
108 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
109 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
110 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
111 { "diagnose_10", VCPU_STAT(diagnose_10) },
112 { "diagnose_44", VCPU_STAT(diagnose_44) },
113 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
114 { "diagnose_258", VCPU_STAT(diagnose_258) },
115 { "diagnose_308", VCPU_STAT(diagnose_308) },
116 { "diagnose_500", VCPU_STAT(diagnose_500) },
117 { NULL }
118};
119
120/* upper facilities limit for kvm */
121unsigned long kvm_s390_fac_list_mask[] = {
122 0xffe6fffbfcfdfc40UL,
123 0x005e800000000000UL,
124};
125
126unsigned long kvm_s390_fac_list_mask_size(void)
127{
128 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
129 return ARRAY_SIZE(kvm_s390_fac_list_mask);
130}
131
132static struct gmap_notifier gmap_notifier;
133debug_info_t *kvm_s390_dbf;
134
135/* Section: not file related */
136int kvm_arch_hardware_enable(void)
137{
138 /* every s390 is virtualization enabled ;-) */
139 return 0;
140}
141
142static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
143
144/*
145 * This callback is executed during stop_machine(). All CPUs are therefore
146 * temporarily stopped. In order not to change guest behavior, we have to
147 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
148 * so a CPU won't be stopped while calculating with the epoch.
149 */
150static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
151 void *v)
152{
153 struct kvm *kvm;
154 struct kvm_vcpu *vcpu;
155 int i;
156 unsigned long long *delta = v;
157
158 list_for_each_entry(kvm, &vm_list, vm_list) {
159 kvm->arch.epoch -= *delta;
160 kvm_for_each_vcpu(i, vcpu, kvm) {
161 vcpu->arch.sie_block->epoch -= *delta;
162 if (vcpu->arch.cputm_enabled)
163 vcpu->arch.cputm_start += *delta;
164 }
165 }
166 return NOTIFY_OK;
167}
168
169static struct notifier_block kvm_clock_notifier = {
170 .notifier_call = kvm_clock_sync,
171};
172
173int kvm_arch_hardware_setup(void)
174{
175 gmap_notifier.notifier_call = kvm_gmap_notifier;
176 gmap_register_ipte_notifier(&gmap_notifier);
177 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
178 &kvm_clock_notifier);
179 return 0;
180}
181
182void kvm_arch_hardware_unsetup(void)
183{
184 gmap_unregister_ipte_notifier(&gmap_notifier);
185 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
186 &kvm_clock_notifier);
187}
188
189int kvm_arch_init(void *opaque)
190{
191 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (!kvm_s390_dbf)
193 return -ENOMEM;
194
195 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
196 debug_unregister(kvm_s390_dbf);
197 return -ENOMEM;
198 }
199
200 /* Register floating interrupt controller interface. */
201 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
202}
203
204void kvm_arch_exit(void)
205{
206 debug_unregister(kvm_s390_dbf);
207}
208
209/* Section: device related */
210long kvm_arch_dev_ioctl(struct file *filp,
211 unsigned int ioctl, unsigned long arg)
212{
213 if (ioctl == KVM_S390_ENABLE_SIE)
214 return s390_enable_sie();
215 return -EINVAL;
216}
217
218int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
219{
220 int r;
221
222 switch (ext) {
223 case KVM_CAP_S390_PSW:
224 case KVM_CAP_S390_GMAP:
225 case KVM_CAP_SYNC_MMU:
226#ifdef CONFIG_KVM_S390_UCONTROL
227 case KVM_CAP_S390_UCONTROL:
228#endif
229 case KVM_CAP_ASYNC_PF:
230 case KVM_CAP_SYNC_REGS:
231 case KVM_CAP_ONE_REG:
232 case KVM_CAP_ENABLE_CAP:
233 case KVM_CAP_S390_CSS_SUPPORT:
234 case KVM_CAP_IOEVENTFD:
235 case KVM_CAP_DEVICE_CTRL:
236 case KVM_CAP_ENABLE_CAP_VM:
237 case KVM_CAP_S390_IRQCHIP:
238 case KVM_CAP_VM_ATTRIBUTES:
239 case KVM_CAP_MP_STATE:
240 case KVM_CAP_S390_INJECT_IRQ:
241 case KVM_CAP_S390_USER_SIGP:
242 case KVM_CAP_S390_USER_STSI:
243 case KVM_CAP_S390_SKEYS:
244 case KVM_CAP_S390_IRQ_STATE:
245 r = 1;
246 break;
247 case KVM_CAP_S390_MEM_OP:
248 r = MEM_OP_MAX_SIZE;
249 break;
250 case KVM_CAP_NR_VCPUS:
251 case KVM_CAP_MAX_VCPUS:
252 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
253 : KVM_S390_BSCA_CPU_SLOTS;
254 break;
255 case KVM_CAP_NR_MEMSLOTS:
256 r = KVM_USER_MEM_SLOTS;
257 break;
258 case KVM_CAP_S390_COW:
259 r = MACHINE_HAS_ESOP;
260 break;
261 case KVM_CAP_S390_VECTOR_REGISTERS:
262 r = MACHINE_HAS_VX;
263 break;
264 case KVM_CAP_S390_RI:
265 r = test_facility(64);
266 break;
267 default:
268 r = 0;
269 }
270 return r;
271}
272
273static void kvm_s390_sync_dirty_log(struct kvm *kvm,
274 struct kvm_memory_slot *memslot)
275{
276 gfn_t cur_gfn, last_gfn;
277 unsigned long address;
278 struct gmap *gmap = kvm->arch.gmap;
279
280 /* Loop over all guest pages */
281 last_gfn = memslot->base_gfn + memslot->npages;
282 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
283 address = gfn_to_hva_memslot(memslot, cur_gfn);
284
285 if (test_and_clear_guest_dirty(gmap->mm, address))
286 mark_page_dirty(kvm, cur_gfn);
287 if (fatal_signal_pending(current))
288 return;
289 cond_resched();
290 }
291}
292
293/* Section: vm related */
294static void sca_del_vcpu(struct kvm_vcpu *vcpu);
295
296/*
297 * Get (and clear) the dirty memory log for a memory slot.
298 */
299int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
300 struct kvm_dirty_log *log)
301{
302 int r;
303 unsigned long n;
304 struct kvm_memslots *slots;
305 struct kvm_memory_slot *memslot;
306 int is_dirty = 0;
307
308 mutex_lock(&kvm->slots_lock);
309
310 r = -EINVAL;
311 if (log->slot >= KVM_USER_MEM_SLOTS)
312 goto out;
313
314 slots = kvm_memslots(kvm);
315 memslot = id_to_memslot(slots, log->slot);
316 r = -ENOENT;
317 if (!memslot->dirty_bitmap)
318 goto out;
319
320 kvm_s390_sync_dirty_log(kvm, memslot);
321 r = kvm_get_dirty_log(kvm, log, &is_dirty);
322 if (r)
323 goto out;
324
325 /* Clear the dirty log */
326 if (is_dirty) {
327 n = kvm_dirty_bitmap_bytes(memslot);
328 memset(memslot->dirty_bitmap, 0, n);
329 }
330 r = 0;
331out:
332 mutex_unlock(&kvm->slots_lock);
333 return r;
334}
335
336static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
337{
338 int r;
339
340 if (cap->flags)
341 return -EINVAL;
342
343 switch (cap->cap) {
344 case KVM_CAP_S390_IRQCHIP:
345 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
346 kvm->arch.use_irqchip = 1;
347 r = 0;
348 break;
349 case KVM_CAP_S390_USER_SIGP:
350 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
351 kvm->arch.user_sigp = 1;
352 r = 0;
353 break;
354 case KVM_CAP_S390_VECTOR_REGISTERS:
355 mutex_lock(&kvm->lock);
356 if (atomic_read(&kvm->online_vcpus)) {
357 r = -EBUSY;
358 } else if (MACHINE_HAS_VX) {
359 set_kvm_facility(kvm->arch.model.fac_mask, 129);
360 set_kvm_facility(kvm->arch.model.fac_list, 129);
361 r = 0;
362 } else
363 r = -EINVAL;
364 mutex_unlock(&kvm->lock);
365 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
366 r ? "(not available)" : "(success)");
367 break;
368 case KVM_CAP_S390_RI:
369 r = -EINVAL;
370 mutex_lock(&kvm->lock);
371 if (atomic_read(&kvm->online_vcpus)) {
372 r = -EBUSY;
373 } else if (test_facility(64)) {
374 set_kvm_facility(kvm->arch.model.fac_mask, 64);
375 set_kvm_facility(kvm->arch.model.fac_list, 64);
376 r = 0;
377 }
378 mutex_unlock(&kvm->lock);
379 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
380 r ? "(not available)" : "(success)");
381 break;
382 case KVM_CAP_S390_USER_STSI:
383 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
384 kvm->arch.user_stsi = 1;
385 r = 0;
386 break;
387 default:
388 r = -EINVAL;
389 break;
390 }
391 return r;
392}
393
394static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
395{
396 int ret;
397
398 switch (attr->attr) {
399 case KVM_S390_VM_MEM_LIMIT_SIZE:
400 ret = 0;
401 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
402 kvm->arch.mem_limit);
403 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
404 ret = -EFAULT;
405 break;
406 default:
407 ret = -ENXIO;
408 break;
409 }
410 return ret;
411}
412
413static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
414{
415 int ret;
416 unsigned int idx;
417 switch (attr->attr) {
418 case KVM_S390_VM_MEM_ENABLE_CMMA:
419 /* enable CMMA only for z10 and later (EDAT_1) */
420 ret = -EINVAL;
421 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
422 break;
423
424 ret = -EBUSY;
425 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
426 mutex_lock(&kvm->lock);
427 if (atomic_read(&kvm->online_vcpus) == 0) {
428 kvm->arch.use_cmma = 1;
429 ret = 0;
430 }
431 mutex_unlock(&kvm->lock);
432 break;
433 case KVM_S390_VM_MEM_CLR_CMMA:
434 ret = -EINVAL;
435 if (!kvm->arch.use_cmma)
436 break;
437
438 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
439 mutex_lock(&kvm->lock);
440 idx = srcu_read_lock(&kvm->srcu);
441 s390_reset_cmma(kvm->arch.gmap->mm);
442 srcu_read_unlock(&kvm->srcu, idx);
443 mutex_unlock(&kvm->lock);
444 ret = 0;
445 break;
446 case KVM_S390_VM_MEM_LIMIT_SIZE: {
447 unsigned long new_limit;
448
449 if (kvm_is_ucontrol(kvm))
450 return -EINVAL;
451
452 if (get_user(new_limit, (u64 __user *)attr->addr))
453 return -EFAULT;
454
455 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
456 new_limit > kvm->arch.mem_limit)
457 return -E2BIG;
458
459 if (!new_limit)
460 return -EINVAL;
461
462 /* gmap_alloc takes last usable address */
463 if (new_limit != KVM_S390_NO_MEM_LIMIT)
464 new_limit -= 1;
465
466 ret = -EBUSY;
467 mutex_lock(&kvm->lock);
468 if (atomic_read(&kvm->online_vcpus) == 0) {
469 /* gmap_alloc will round the limit up */
470 struct gmap *new = gmap_alloc(current->mm, new_limit);
471
472 if (!new) {
473 ret = -ENOMEM;
474 } else {
475 gmap_free(kvm->arch.gmap);
476 new->private = kvm;
477 kvm->arch.gmap = new;
478 ret = 0;
479 }
480 }
481 mutex_unlock(&kvm->lock);
482 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
483 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
484 (void *) kvm->arch.gmap->asce);
485 break;
486 }
487 default:
488 ret = -ENXIO;
489 break;
490 }
491 return ret;
492}
493
494static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
495
496static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
497{
498 struct kvm_vcpu *vcpu;
499 int i;
500
501 if (!test_kvm_facility(kvm, 76))
502 return -EINVAL;
503
504 mutex_lock(&kvm->lock);
505 switch (attr->attr) {
506 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
507 get_random_bytes(
508 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
509 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
510 kvm->arch.crypto.aes_kw = 1;
511 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
512 break;
513 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
514 get_random_bytes(
515 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
516 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
517 kvm->arch.crypto.dea_kw = 1;
518 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
519 break;
520 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
521 kvm->arch.crypto.aes_kw = 0;
522 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
523 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
524 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
525 break;
526 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
527 kvm->arch.crypto.dea_kw = 0;
528 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
529 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
530 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
531 break;
532 default:
533 mutex_unlock(&kvm->lock);
534 return -ENXIO;
535 }
536
537 kvm_for_each_vcpu(i, vcpu, kvm) {
538 kvm_s390_vcpu_crypto_setup(vcpu);
539 exit_sie(vcpu);
540 }
541 mutex_unlock(&kvm->lock);
542 return 0;
543}
544
545static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
546{
547 u8 gtod_high;
548
549 if (copy_from_user(>od_high, (void __user *)attr->addr,
550 sizeof(gtod_high)))
551 return -EFAULT;
552
553 if (gtod_high != 0)
554 return -EINVAL;
555 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
556
557 return 0;
558}
559
560static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
561{
562 u64 gtod;
563
564 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
565 return -EFAULT;
566
567 kvm_s390_set_tod_clock(kvm, gtod);
568 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
569 return 0;
570}
571
572static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
573{
574 int ret;
575
576 if (attr->flags)
577 return -EINVAL;
578
579 switch (attr->attr) {
580 case KVM_S390_VM_TOD_HIGH:
581 ret = kvm_s390_set_tod_high(kvm, attr);
582 break;
583 case KVM_S390_VM_TOD_LOW:
584 ret = kvm_s390_set_tod_low(kvm, attr);
585 break;
586 default:
587 ret = -ENXIO;
588 break;
589 }
590 return ret;
591}
592
593static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
594{
595 u8 gtod_high = 0;
596
597 if (copy_to_user((void __user *)attr->addr, >od_high,
598 sizeof(gtod_high)))
599 return -EFAULT;
600 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
601
602 return 0;
603}
604
605static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
606{
607 u64 gtod;
608
609 gtod = kvm_s390_get_tod_clock_fast(kvm);
610 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
611 return -EFAULT;
612 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
613
614 return 0;
615}
616
617static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
618{
619 int ret;
620
621 if (attr->flags)
622 return -EINVAL;
623
624 switch (attr->attr) {
625 case KVM_S390_VM_TOD_HIGH:
626 ret = kvm_s390_get_tod_high(kvm, attr);
627 break;
628 case KVM_S390_VM_TOD_LOW:
629 ret = kvm_s390_get_tod_low(kvm, attr);
630 break;
631 default:
632 ret = -ENXIO;
633 break;
634 }
635 return ret;
636}
637
638static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
639{
640 struct kvm_s390_vm_cpu_processor *proc;
641 int ret = 0;
642
643 mutex_lock(&kvm->lock);
644 if (atomic_read(&kvm->online_vcpus)) {
645 ret = -EBUSY;
646 goto out;
647 }
648 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
649 if (!proc) {
650 ret = -ENOMEM;
651 goto out;
652 }
653 if (!copy_from_user(proc, (void __user *)attr->addr,
654 sizeof(*proc))) {
655 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
656 sizeof(struct cpuid));
657 kvm->arch.model.ibc = proc->ibc;
658 memcpy(kvm->arch.model.fac_list, proc->fac_list,
659 S390_ARCH_FAC_LIST_SIZE_BYTE);
660 } else
661 ret = -EFAULT;
662 kfree(proc);
663out:
664 mutex_unlock(&kvm->lock);
665 return ret;
666}
667
668static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
669{
670 int ret = -ENXIO;
671
672 switch (attr->attr) {
673 case KVM_S390_VM_CPU_PROCESSOR:
674 ret = kvm_s390_set_processor(kvm, attr);
675 break;
676 }
677 return ret;
678}
679
680static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
681{
682 struct kvm_s390_vm_cpu_processor *proc;
683 int ret = 0;
684
685 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
686 if (!proc) {
687 ret = -ENOMEM;
688 goto out;
689 }
690 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
691 proc->ibc = kvm->arch.model.ibc;
692 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
693 S390_ARCH_FAC_LIST_SIZE_BYTE);
694 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
695 ret = -EFAULT;
696 kfree(proc);
697out:
698 return ret;
699}
700
701static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
702{
703 struct kvm_s390_vm_cpu_machine *mach;
704 int ret = 0;
705
706 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
707 if (!mach) {
708 ret = -ENOMEM;
709 goto out;
710 }
711 get_cpu_id((struct cpuid *) &mach->cpuid);
712 mach->ibc = sclp.ibc;
713 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
714 S390_ARCH_FAC_LIST_SIZE_BYTE);
715 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
716 S390_ARCH_FAC_LIST_SIZE_BYTE);
717 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
718 ret = -EFAULT;
719 kfree(mach);
720out:
721 return ret;
722}
723
724static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
725{
726 int ret = -ENXIO;
727
728 switch (attr->attr) {
729 case KVM_S390_VM_CPU_PROCESSOR:
730 ret = kvm_s390_get_processor(kvm, attr);
731 break;
732 case KVM_S390_VM_CPU_MACHINE:
733 ret = kvm_s390_get_machine(kvm, attr);
734 break;
735 }
736 return ret;
737}
738
739static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
740{
741 int ret;
742
743 switch (attr->group) {
744 case KVM_S390_VM_MEM_CTRL:
745 ret = kvm_s390_set_mem_control(kvm, attr);
746 break;
747 case KVM_S390_VM_TOD:
748 ret = kvm_s390_set_tod(kvm, attr);
749 break;
750 case KVM_S390_VM_CPU_MODEL:
751 ret = kvm_s390_set_cpu_model(kvm, attr);
752 break;
753 case KVM_S390_VM_CRYPTO:
754 ret = kvm_s390_vm_set_crypto(kvm, attr);
755 break;
756 default:
757 ret = -ENXIO;
758 break;
759 }
760
761 return ret;
762}
763
764static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
765{
766 int ret;
767
768 switch (attr->group) {
769 case KVM_S390_VM_MEM_CTRL:
770 ret = kvm_s390_get_mem_control(kvm, attr);
771 break;
772 case KVM_S390_VM_TOD:
773 ret = kvm_s390_get_tod(kvm, attr);
774 break;
775 case KVM_S390_VM_CPU_MODEL:
776 ret = kvm_s390_get_cpu_model(kvm, attr);
777 break;
778 default:
779 ret = -ENXIO;
780 break;
781 }
782
783 return ret;
784}
785
786static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
787{
788 int ret;
789
790 switch (attr->group) {
791 case KVM_S390_VM_MEM_CTRL:
792 switch (attr->attr) {
793 case KVM_S390_VM_MEM_ENABLE_CMMA:
794 case KVM_S390_VM_MEM_CLR_CMMA:
795 case KVM_S390_VM_MEM_LIMIT_SIZE:
796 ret = 0;
797 break;
798 default:
799 ret = -ENXIO;
800 break;
801 }
802 break;
803 case KVM_S390_VM_TOD:
804 switch (attr->attr) {
805 case KVM_S390_VM_TOD_LOW:
806 case KVM_S390_VM_TOD_HIGH:
807 ret = 0;
808 break;
809 default:
810 ret = -ENXIO;
811 break;
812 }
813 break;
814 case KVM_S390_VM_CPU_MODEL:
815 switch (attr->attr) {
816 case KVM_S390_VM_CPU_PROCESSOR:
817 case KVM_S390_VM_CPU_MACHINE:
818 ret = 0;
819 break;
820 default:
821 ret = -ENXIO;
822 break;
823 }
824 break;
825 case KVM_S390_VM_CRYPTO:
826 switch (attr->attr) {
827 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
828 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
829 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
830 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
831 ret = 0;
832 break;
833 default:
834 ret = -ENXIO;
835 break;
836 }
837 break;
838 default:
839 ret = -ENXIO;
840 break;
841 }
842
843 return ret;
844}
845
846static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
847{
848 uint8_t *keys;
849 uint64_t hva;
850 unsigned long curkey;
851 int i, r = 0;
852
853 if (args->flags != 0)
854 return -EINVAL;
855
856 /* Is this guest using storage keys? */
857 if (!mm_use_skey(current->mm))
858 return KVM_S390_GET_SKEYS_NONE;
859
860 /* Enforce sane limit on memory allocation */
861 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
862 return -EINVAL;
863
864 keys = kmalloc_array(args->count, sizeof(uint8_t),
865 GFP_KERNEL | __GFP_NOWARN);
866 if (!keys)
867 keys = vmalloc(sizeof(uint8_t) * args->count);
868 if (!keys)
869 return -ENOMEM;
870
871 for (i = 0; i < args->count; i++) {
872 hva = gfn_to_hva(kvm, args->start_gfn + i);
873 if (kvm_is_error_hva(hva)) {
874 r = -EFAULT;
875 goto out;
876 }
877
878 curkey = get_guest_storage_key(current->mm, hva);
879 if (IS_ERR_VALUE(curkey)) {
880 r = curkey;
881 goto out;
882 }
883 keys[i] = curkey;
884 }
885
886 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
887 sizeof(uint8_t) * args->count);
888 if (r)
889 r = -EFAULT;
890out:
891 kvfree(keys);
892 return r;
893}
894
895static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
896{
897 uint8_t *keys;
898 uint64_t hva;
899 int i, r = 0;
900
901 if (args->flags != 0)
902 return -EINVAL;
903
904 /* Enforce sane limit on memory allocation */
905 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
906 return -EINVAL;
907
908 keys = kmalloc_array(args->count, sizeof(uint8_t),
909 GFP_KERNEL | __GFP_NOWARN);
910 if (!keys)
911 keys = vmalloc(sizeof(uint8_t) * args->count);
912 if (!keys)
913 return -ENOMEM;
914
915 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
916 sizeof(uint8_t) * args->count);
917 if (r) {
918 r = -EFAULT;
919 goto out;
920 }
921
922 /* Enable storage key handling for the guest */
923 r = s390_enable_skey();
924 if (r)
925 goto out;
926
927 for (i = 0; i < args->count; i++) {
928 hva = gfn_to_hva(kvm, args->start_gfn + i);
929 if (kvm_is_error_hva(hva)) {
930 r = -EFAULT;
931 goto out;
932 }
933
934 /* Lowest order bit is reserved */
935 if (keys[i] & 0x01) {
936 r = -EINVAL;
937 goto out;
938 }
939
940 r = set_guest_storage_key(current->mm, hva,
941 (unsigned long)keys[i], 0);
942 if (r)
943 goto out;
944 }
945out:
946 kvfree(keys);
947 return r;
948}
949
950long kvm_arch_vm_ioctl(struct file *filp,
951 unsigned int ioctl, unsigned long arg)
952{
953 struct kvm *kvm = filp->private_data;
954 void __user *argp = (void __user *)arg;
955 struct kvm_device_attr attr;
956 int r;
957
958 switch (ioctl) {
959 case KVM_S390_INTERRUPT: {
960 struct kvm_s390_interrupt s390int;
961
962 r = -EFAULT;
963 if (copy_from_user(&s390int, argp, sizeof(s390int)))
964 break;
965 r = kvm_s390_inject_vm(kvm, &s390int);
966 break;
967 }
968 case KVM_ENABLE_CAP: {
969 struct kvm_enable_cap cap;
970 r = -EFAULT;
971 if (copy_from_user(&cap, argp, sizeof(cap)))
972 break;
973 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
974 break;
975 }
976 case KVM_CREATE_IRQCHIP: {
977 struct kvm_irq_routing_entry routing;
978
979 r = -EINVAL;
980 if (kvm->arch.use_irqchip) {
981 /* Set up dummy routing. */
982 memset(&routing, 0, sizeof(routing));
983 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
984 }
985 break;
986 }
987 case KVM_SET_DEVICE_ATTR: {
988 r = -EFAULT;
989 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
990 break;
991 r = kvm_s390_vm_set_attr(kvm, &attr);
992 break;
993 }
994 case KVM_GET_DEVICE_ATTR: {
995 r = -EFAULT;
996 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
997 break;
998 r = kvm_s390_vm_get_attr(kvm, &attr);
999 break;
1000 }
1001 case KVM_HAS_DEVICE_ATTR: {
1002 r = -EFAULT;
1003 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1004 break;
1005 r = kvm_s390_vm_has_attr(kvm, &attr);
1006 break;
1007 }
1008 case KVM_S390_GET_SKEYS: {
1009 struct kvm_s390_skeys args;
1010
1011 r = -EFAULT;
1012 if (copy_from_user(&args, argp,
1013 sizeof(struct kvm_s390_skeys)))
1014 break;
1015 r = kvm_s390_get_skeys(kvm, &args);
1016 break;
1017 }
1018 case KVM_S390_SET_SKEYS: {
1019 struct kvm_s390_skeys args;
1020
1021 r = -EFAULT;
1022 if (copy_from_user(&args, argp,
1023 sizeof(struct kvm_s390_skeys)))
1024 break;
1025 r = kvm_s390_set_skeys(kvm, &args);
1026 break;
1027 }
1028 default:
1029 r = -ENOTTY;
1030 }
1031
1032 return r;
1033}
1034
1035static int kvm_s390_query_ap_config(u8 *config)
1036{
1037 u32 fcn_code = 0x04000000UL;
1038 u32 cc = 0;
1039
1040 memset(config, 0, 128);
1041 asm volatile(
1042 "lgr 0,%1\n"
1043 "lgr 2,%2\n"
1044 ".long 0xb2af0000\n" /* PQAP(QCI) */
1045 "0: ipm %0\n"
1046 "srl %0,28\n"
1047 "1:\n"
1048 EX_TABLE(0b, 1b)
1049 : "+r" (cc)
1050 : "r" (fcn_code), "r" (config)
1051 : "cc", "0", "2", "memory"
1052 );
1053
1054 return cc;
1055}
1056
1057static int kvm_s390_apxa_installed(void)
1058{
1059 u8 config[128];
1060 int cc;
1061
1062 if (test_facility(12)) {
1063 cc = kvm_s390_query_ap_config(config);
1064
1065 if (cc)
1066 pr_err("PQAP(QCI) failed with cc=%d", cc);
1067 else
1068 return config[0] & 0x40;
1069 }
1070
1071 return 0;
1072}
1073
1074static void kvm_s390_set_crycb_format(struct kvm *kvm)
1075{
1076 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1077
1078 if (kvm_s390_apxa_installed())
1079 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1080 else
1081 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1082}
1083
1084static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1085{
1086 get_cpu_id(cpu_id);
1087 cpu_id->version = 0xff;
1088}
1089
1090static void kvm_s390_crypto_init(struct kvm *kvm)
1091{
1092 if (!test_kvm_facility(kvm, 76))
1093 return;
1094
1095 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1096 kvm_s390_set_crycb_format(kvm);
1097
1098 /* Enable AES/DEA protected key functions by default */
1099 kvm->arch.crypto.aes_kw = 1;
1100 kvm->arch.crypto.dea_kw = 1;
1101 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1102 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1103 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1104 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1105}
1106
1107static void sca_dispose(struct kvm *kvm)
1108{
1109 if (kvm->arch.use_esca)
1110 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1111 else
1112 free_page((unsigned long)(kvm->arch.sca));
1113 kvm->arch.sca = NULL;
1114}
1115
1116int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1117{
1118 int i, rc;
1119 char debug_name[16];
1120 static unsigned long sca_offset;
1121
1122 rc = -EINVAL;
1123#ifdef CONFIG_KVM_S390_UCONTROL
1124 if (type & ~KVM_VM_S390_UCONTROL)
1125 goto out_err;
1126 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1127 goto out_err;
1128#else
1129 if (type)
1130 goto out_err;
1131#endif
1132
1133 rc = s390_enable_sie();
1134 if (rc)
1135 goto out_err;
1136
1137 rc = -ENOMEM;
1138
1139 kvm->arch.use_esca = 0; /* start with basic SCA */
1140 rwlock_init(&kvm->arch.sca_lock);
1141 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1142 if (!kvm->arch.sca)
1143 goto out_err;
1144 spin_lock(&kvm_lock);
1145 sca_offset += 16;
1146 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1147 sca_offset = 0;
1148 kvm->arch.sca = (struct bsca_block *)
1149 ((char *) kvm->arch.sca + sca_offset);
1150 spin_unlock(&kvm_lock);
1151
1152 sprintf(debug_name, "kvm-%u", current->pid);
1153
1154 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1155 if (!kvm->arch.dbf)
1156 goto out_err;
1157
1158 kvm->arch.sie_page2 =
1159 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1160 if (!kvm->arch.sie_page2)
1161 goto out_err;
1162
1163 /* Populate the facility mask initially. */
1164 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1165 S390_ARCH_FAC_LIST_SIZE_BYTE);
1166 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1167 if (i < kvm_s390_fac_list_mask_size())
1168 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1169 else
1170 kvm->arch.model.fac_mask[i] = 0UL;
1171 }
1172
1173 /* Populate the facility list initially. */
1174 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1175 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1176 S390_ARCH_FAC_LIST_SIZE_BYTE);
1177
1178 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1179 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1180
1181 kvm_s390_crypto_init(kvm);
1182
1183 spin_lock_init(&kvm->arch.float_int.lock);
1184 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1185 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1186 init_waitqueue_head(&kvm->arch.ipte_wq);
1187 mutex_init(&kvm->arch.ipte_mutex);
1188
1189 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1190 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1191
1192 if (type & KVM_VM_S390_UCONTROL) {
1193 kvm->arch.gmap = NULL;
1194 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1195 } else {
1196 if (sclp.hamax == U64_MAX)
1197 kvm->arch.mem_limit = TASK_MAX_SIZE;
1198 else
1199 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1200 sclp.hamax + 1);
1201 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1202 if (!kvm->arch.gmap)
1203 goto out_err;
1204 kvm->arch.gmap->private = kvm;
1205 kvm->arch.gmap->pfault_enabled = 0;
1206 }
1207
1208 kvm->arch.css_support = 0;
1209 kvm->arch.use_irqchip = 0;
1210 kvm->arch.epoch = 0;
1211
1212 spin_lock_init(&kvm->arch.start_stop_lock);
1213 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1214
1215 return 0;
1216out_err:
1217 free_page((unsigned long)kvm->arch.sie_page2);
1218 debug_unregister(kvm->arch.dbf);
1219 sca_dispose(kvm);
1220 KVM_EVENT(3, "creation of vm failed: %d", rc);
1221 return rc;
1222}
1223
1224void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1225{
1226 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1227 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1228 kvm_s390_clear_local_irqs(vcpu);
1229 kvm_clear_async_pf_completion_queue(vcpu);
1230 if (!kvm_is_ucontrol(vcpu->kvm))
1231 sca_del_vcpu(vcpu);
1232
1233 if (kvm_is_ucontrol(vcpu->kvm))
1234 gmap_free(vcpu->arch.gmap);
1235
1236 if (vcpu->kvm->arch.use_cmma)
1237 kvm_s390_vcpu_unsetup_cmma(vcpu);
1238 free_page((unsigned long)(vcpu->arch.sie_block));
1239
1240 kvm_vcpu_uninit(vcpu);
1241 kmem_cache_free(kvm_vcpu_cache, vcpu);
1242}
1243
1244static void kvm_free_vcpus(struct kvm *kvm)
1245{
1246 unsigned int i;
1247 struct kvm_vcpu *vcpu;
1248
1249 kvm_for_each_vcpu(i, vcpu, kvm)
1250 kvm_arch_vcpu_destroy(vcpu);
1251
1252 mutex_lock(&kvm->lock);
1253 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1254 kvm->vcpus[i] = NULL;
1255
1256 atomic_set(&kvm->online_vcpus, 0);
1257 mutex_unlock(&kvm->lock);
1258}
1259
1260void kvm_arch_destroy_vm(struct kvm *kvm)
1261{
1262 kvm_free_vcpus(kvm);
1263 sca_dispose(kvm);
1264 debug_unregister(kvm->arch.dbf);
1265 free_page((unsigned long)kvm->arch.sie_page2);
1266 if (!kvm_is_ucontrol(kvm))
1267 gmap_free(kvm->arch.gmap);
1268 kvm_s390_destroy_adapters(kvm);
1269 kvm_s390_clear_float_irqs(kvm);
1270 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1271}
1272
1273/* Section: vcpu related */
1274static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1275{
1276 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1277 if (!vcpu->arch.gmap)
1278 return -ENOMEM;
1279 vcpu->arch.gmap->private = vcpu->kvm;
1280
1281 return 0;
1282}
1283
1284static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1285{
1286 read_lock(&vcpu->kvm->arch.sca_lock);
1287 if (vcpu->kvm->arch.use_esca) {
1288 struct esca_block *sca = vcpu->kvm->arch.sca;
1289
1290 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1291 sca->cpu[vcpu->vcpu_id].sda = 0;
1292 } else {
1293 struct bsca_block *sca = vcpu->kvm->arch.sca;
1294
1295 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1296 sca->cpu[vcpu->vcpu_id].sda = 0;
1297 }
1298 read_unlock(&vcpu->kvm->arch.sca_lock);
1299}
1300
1301static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1302{
1303 read_lock(&vcpu->kvm->arch.sca_lock);
1304 if (vcpu->kvm->arch.use_esca) {
1305 struct esca_block *sca = vcpu->kvm->arch.sca;
1306
1307 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1308 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1309 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1310 vcpu->arch.sie_block->ecb2 |= 0x04U;
1311 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1312 } else {
1313 struct bsca_block *sca = vcpu->kvm->arch.sca;
1314
1315 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1316 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1317 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1318 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1319 }
1320 read_unlock(&vcpu->kvm->arch.sca_lock);
1321}
1322
1323/* Basic SCA to Extended SCA data copy routines */
1324static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1325{
1326 d->sda = s->sda;
1327 d->sigp_ctrl.c = s->sigp_ctrl.c;
1328 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1329}
1330
1331static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1332{
1333 int i;
1334
1335 d->ipte_control = s->ipte_control;
1336 d->mcn[0] = s->mcn;
1337 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1338 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1339}
1340
1341static int sca_switch_to_extended(struct kvm *kvm)
1342{
1343 struct bsca_block *old_sca = kvm->arch.sca;
1344 struct esca_block *new_sca;
1345 struct kvm_vcpu *vcpu;
1346 unsigned int vcpu_idx;
1347 u32 scaol, scaoh;
1348
1349 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1350 if (!new_sca)
1351 return -ENOMEM;
1352
1353 scaoh = (u32)((u64)(new_sca) >> 32);
1354 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1355
1356 kvm_s390_vcpu_block_all(kvm);
1357 write_lock(&kvm->arch.sca_lock);
1358
1359 sca_copy_b_to_e(new_sca, old_sca);
1360
1361 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1362 vcpu->arch.sie_block->scaoh = scaoh;
1363 vcpu->arch.sie_block->scaol = scaol;
1364 vcpu->arch.sie_block->ecb2 |= 0x04U;
1365 }
1366 kvm->arch.sca = new_sca;
1367 kvm->arch.use_esca = 1;
1368
1369 write_unlock(&kvm->arch.sca_lock);
1370 kvm_s390_vcpu_unblock_all(kvm);
1371
1372 free_page((unsigned long)old_sca);
1373
1374 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1375 old_sca, kvm->arch.sca);
1376 return 0;
1377}
1378
1379static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1380{
1381 int rc;
1382
1383 if (id < KVM_S390_BSCA_CPU_SLOTS)
1384 return true;
1385 if (!sclp.has_esca)
1386 return false;
1387
1388 mutex_lock(&kvm->lock);
1389 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1390 mutex_unlock(&kvm->lock);
1391
1392 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1393}
1394
1395int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1396{
1397 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1398 kvm_clear_async_pf_completion_queue(vcpu);
1399 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1400 KVM_SYNC_GPRS |
1401 KVM_SYNC_ACRS |
1402 KVM_SYNC_CRS |
1403 KVM_SYNC_ARCH0 |
1404 KVM_SYNC_PFAULT;
1405 if (test_kvm_facility(vcpu->kvm, 64))
1406 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1407 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1408 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1409 */
1410 if (MACHINE_HAS_VX)
1411 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1412 else
1413 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1414
1415 if (kvm_is_ucontrol(vcpu->kvm))
1416 return __kvm_ucontrol_vcpu_init(vcpu);
1417
1418 return 0;
1419}
1420
1421/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1422static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1423{
1424 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1425 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1426 vcpu->arch.cputm_start = get_tod_clock_fast();
1427 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1428}
1429
1430/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1431static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1432{
1433 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1434 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1435 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1436 vcpu->arch.cputm_start = 0;
1437 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1438}
1439
1440/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1441static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1442{
1443 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1444 vcpu->arch.cputm_enabled = true;
1445 __start_cpu_timer_accounting(vcpu);
1446}
1447
1448/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1449static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1450{
1451 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1452 __stop_cpu_timer_accounting(vcpu);
1453 vcpu->arch.cputm_enabled = false;
1454}
1455
1456static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1457{
1458 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1459 __enable_cpu_timer_accounting(vcpu);
1460 preempt_enable();
1461}
1462
1463static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1464{
1465 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1466 __disable_cpu_timer_accounting(vcpu);
1467 preempt_enable();
1468}
1469
1470/* set the cpu timer - may only be called from the VCPU thread itself */
1471void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1472{
1473 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1474 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1475 if (vcpu->arch.cputm_enabled)
1476 vcpu->arch.cputm_start = get_tod_clock_fast();
1477 vcpu->arch.sie_block->cputm = cputm;
1478 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1479 preempt_enable();
1480}
1481
1482/* update and get the cpu timer - can also be called from other VCPU threads */
1483__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1484{
1485 unsigned int seq;
1486 __u64 value;
1487
1488 if (unlikely(!vcpu->arch.cputm_enabled))
1489 return vcpu->arch.sie_block->cputm;
1490
1491 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1492 do {
1493 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1494 /*
1495 * If the writer would ever execute a read in the critical
1496 * section, e.g. in irq context, we have a deadlock.
1497 */
1498 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1499 value = vcpu->arch.sie_block->cputm;
1500 /* if cputm_start is 0, accounting is being started/stopped */
1501 if (likely(vcpu->arch.cputm_start))
1502 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1503 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1504 preempt_enable();
1505 return value;
1506}
1507
1508void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1509{
1510 /* Save host register state */
1511 save_fpu_regs();
1512 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1513 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1514
1515 if (MACHINE_HAS_VX)
1516 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1517 else
1518 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1519 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1520 if (test_fp_ctl(current->thread.fpu.fpc))
1521 /* User space provided an invalid FPC, let's clear it */
1522 current->thread.fpu.fpc = 0;
1523
1524 save_access_regs(vcpu->arch.host_acrs);
1525 restore_access_regs(vcpu->run->s.regs.acrs);
1526 gmap_enable(vcpu->arch.gmap);
1527 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1528 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1529 __start_cpu_timer_accounting(vcpu);
1530 vcpu->cpu = cpu;
1531}
1532
1533void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1534{
1535 vcpu->cpu = -1;
1536 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1537 __stop_cpu_timer_accounting(vcpu);
1538 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1539 gmap_disable(vcpu->arch.gmap);
1540
1541 /* Save guest register state */
1542 save_fpu_regs();
1543 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1544
1545 /* Restore host register state */
1546 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1547 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1548
1549 save_access_regs(vcpu->run->s.regs.acrs);
1550 restore_access_regs(vcpu->arch.host_acrs);
1551}
1552
1553static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1554{
1555 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1556 vcpu->arch.sie_block->gpsw.mask = 0UL;
1557 vcpu->arch.sie_block->gpsw.addr = 0UL;
1558 kvm_s390_set_prefix(vcpu, 0);
1559 kvm_s390_set_cpu_timer(vcpu, 0);
1560 vcpu->arch.sie_block->ckc = 0UL;
1561 vcpu->arch.sie_block->todpr = 0;
1562 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1563 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1564 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1565 /* make sure the new fpc will be lazily loaded */
1566 save_fpu_regs();
1567 current->thread.fpu.fpc = 0;
1568 vcpu->arch.sie_block->gbea = 1;
1569 vcpu->arch.sie_block->pp = 0;
1570 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1571 kvm_clear_async_pf_completion_queue(vcpu);
1572 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1573 kvm_s390_vcpu_stop(vcpu);
1574 kvm_s390_clear_local_irqs(vcpu);
1575}
1576
1577void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1578{
1579 mutex_lock(&vcpu->kvm->lock);
1580 preempt_disable();
1581 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1582 preempt_enable();
1583 mutex_unlock(&vcpu->kvm->lock);
1584 if (!kvm_is_ucontrol(vcpu->kvm)) {
1585 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1586 sca_add_vcpu(vcpu);
1587 }
1588
1589}
1590
1591static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1592{
1593 if (!test_kvm_facility(vcpu->kvm, 76))
1594 return;
1595
1596 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1597
1598 if (vcpu->kvm->arch.crypto.aes_kw)
1599 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1600 if (vcpu->kvm->arch.crypto.dea_kw)
1601 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1602
1603 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1604}
1605
1606void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1607{
1608 free_page(vcpu->arch.sie_block->cbrlo);
1609 vcpu->arch.sie_block->cbrlo = 0;
1610}
1611
1612int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1613{
1614 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1615 if (!vcpu->arch.sie_block->cbrlo)
1616 return -ENOMEM;
1617
1618 vcpu->arch.sie_block->ecb2 |= 0x80;
1619 vcpu->arch.sie_block->ecb2 &= ~0x08;
1620 return 0;
1621}
1622
1623static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1624{
1625 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1626
1627 vcpu->arch.cpu_id = model->cpu_id;
1628 vcpu->arch.sie_block->ibc = model->ibc;
1629 if (test_kvm_facility(vcpu->kvm, 7))
1630 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1631}
1632
1633int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1634{
1635 int rc = 0;
1636
1637 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1638 CPUSTAT_SM |
1639 CPUSTAT_STOPPED);
1640
1641 if (test_kvm_facility(vcpu->kvm, 78))
1642 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1643 else if (test_kvm_facility(vcpu->kvm, 8))
1644 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1645
1646 kvm_s390_vcpu_setup_model(vcpu);
1647
1648 vcpu->arch.sie_block->ecb = 6;
1649 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1650 vcpu->arch.sie_block->ecb |= 0x10;
1651
1652 vcpu->arch.sie_block->ecb2 = 8;
1653 vcpu->arch.sie_block->eca = 0xC1002000U;
1654 if (sclp.has_siif)
1655 vcpu->arch.sie_block->eca |= 1;
1656 if (sclp.has_sigpif)
1657 vcpu->arch.sie_block->eca |= 0x10000000U;
1658 if (test_kvm_facility(vcpu->kvm, 64))
1659 vcpu->arch.sie_block->ecb3 |= 0x01;
1660 if (test_kvm_facility(vcpu->kvm, 129)) {
1661 vcpu->arch.sie_block->eca |= 0x00020000;
1662 vcpu->arch.sie_block->ecd |= 0x20000000;
1663 }
1664 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1665 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1666
1667 if (vcpu->kvm->arch.use_cmma) {
1668 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1669 if (rc)
1670 return rc;
1671 }
1672 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1673 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1674
1675 kvm_s390_vcpu_crypto_setup(vcpu);
1676
1677 return rc;
1678}
1679
1680struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1681 unsigned int id)
1682{
1683 struct kvm_vcpu *vcpu;
1684 struct sie_page *sie_page;
1685 int rc = -EINVAL;
1686
1687 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1688 goto out;
1689
1690 rc = -ENOMEM;
1691
1692 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1693 if (!vcpu)
1694 goto out;
1695
1696 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1697 if (!sie_page)
1698 goto out_free_cpu;
1699
1700 vcpu->arch.sie_block = &sie_page->sie_block;
1701 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1702
1703 vcpu->arch.sie_block->icpua = id;
1704 spin_lock_init(&vcpu->arch.local_int.lock);
1705 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1706 vcpu->arch.local_int.wq = &vcpu->wq;
1707 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1708 seqcount_init(&vcpu->arch.cputm_seqcount);
1709
1710 rc = kvm_vcpu_init(vcpu, kvm, id);
1711 if (rc)
1712 goto out_free_sie_block;
1713 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1714 vcpu->arch.sie_block);
1715 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1716
1717 return vcpu;
1718out_free_sie_block:
1719 free_page((unsigned long)(vcpu->arch.sie_block));
1720out_free_cpu:
1721 kmem_cache_free(kvm_vcpu_cache, vcpu);
1722out:
1723 return ERR_PTR(rc);
1724}
1725
1726int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1727{
1728 return kvm_s390_vcpu_has_irq(vcpu, 0);
1729}
1730
1731void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1732{
1733 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1734 exit_sie(vcpu);
1735}
1736
1737void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1738{
1739 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1740}
1741
1742static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1743{
1744 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1745 exit_sie(vcpu);
1746}
1747
1748static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1749{
1750 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1751}
1752
1753/*
1754 * Kick a guest cpu out of SIE and wait until SIE is not running.
1755 * If the CPU is not running (e.g. waiting as idle) the function will
1756 * return immediately. */
1757void exit_sie(struct kvm_vcpu *vcpu)
1758{
1759 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1760 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1761 cpu_relax();
1762}
1763
1764/* Kick a guest cpu out of SIE to process a request synchronously */
1765void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1766{
1767 kvm_make_request(req, vcpu);
1768 kvm_s390_vcpu_request(vcpu);
1769}
1770
1771static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1772{
1773 int i;
1774 struct kvm *kvm = gmap->private;
1775 struct kvm_vcpu *vcpu;
1776
1777 kvm_for_each_vcpu(i, vcpu, kvm) {
1778 /* match against both prefix pages */
1779 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1780 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1781 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1782 }
1783 }
1784}
1785
1786int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1787{
1788 /* kvm common code refers to this, but never calls it */
1789 BUG();
1790 return 0;
1791}
1792
1793static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1794 struct kvm_one_reg *reg)
1795{
1796 int r = -EINVAL;
1797
1798 switch (reg->id) {
1799 case KVM_REG_S390_TODPR:
1800 r = put_user(vcpu->arch.sie_block->todpr,
1801 (u32 __user *)reg->addr);
1802 break;
1803 case KVM_REG_S390_EPOCHDIFF:
1804 r = put_user(vcpu->arch.sie_block->epoch,
1805 (u64 __user *)reg->addr);
1806 break;
1807 case KVM_REG_S390_CPU_TIMER:
1808 r = put_user(kvm_s390_get_cpu_timer(vcpu),
1809 (u64 __user *)reg->addr);
1810 break;
1811 case KVM_REG_S390_CLOCK_COMP:
1812 r = put_user(vcpu->arch.sie_block->ckc,
1813 (u64 __user *)reg->addr);
1814 break;
1815 case KVM_REG_S390_PFTOKEN:
1816 r = put_user(vcpu->arch.pfault_token,
1817 (u64 __user *)reg->addr);
1818 break;
1819 case KVM_REG_S390_PFCOMPARE:
1820 r = put_user(vcpu->arch.pfault_compare,
1821 (u64 __user *)reg->addr);
1822 break;
1823 case KVM_REG_S390_PFSELECT:
1824 r = put_user(vcpu->arch.pfault_select,
1825 (u64 __user *)reg->addr);
1826 break;
1827 case KVM_REG_S390_PP:
1828 r = put_user(vcpu->arch.sie_block->pp,
1829 (u64 __user *)reg->addr);
1830 break;
1831 case KVM_REG_S390_GBEA:
1832 r = put_user(vcpu->arch.sie_block->gbea,
1833 (u64 __user *)reg->addr);
1834 break;
1835 default:
1836 break;
1837 }
1838
1839 return r;
1840}
1841
1842static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1843 struct kvm_one_reg *reg)
1844{
1845 int r = -EINVAL;
1846 __u64 val;
1847
1848 switch (reg->id) {
1849 case KVM_REG_S390_TODPR:
1850 r = get_user(vcpu->arch.sie_block->todpr,
1851 (u32 __user *)reg->addr);
1852 break;
1853 case KVM_REG_S390_EPOCHDIFF:
1854 r = get_user(vcpu->arch.sie_block->epoch,
1855 (u64 __user *)reg->addr);
1856 break;
1857 case KVM_REG_S390_CPU_TIMER:
1858 r = get_user(val, (u64 __user *)reg->addr);
1859 if (!r)
1860 kvm_s390_set_cpu_timer(vcpu, val);
1861 break;
1862 case KVM_REG_S390_CLOCK_COMP:
1863 r = get_user(vcpu->arch.sie_block->ckc,
1864 (u64 __user *)reg->addr);
1865 break;
1866 case KVM_REG_S390_PFTOKEN:
1867 r = get_user(vcpu->arch.pfault_token,
1868 (u64 __user *)reg->addr);
1869 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1870 kvm_clear_async_pf_completion_queue(vcpu);
1871 break;
1872 case KVM_REG_S390_PFCOMPARE:
1873 r = get_user(vcpu->arch.pfault_compare,
1874 (u64 __user *)reg->addr);
1875 break;
1876 case KVM_REG_S390_PFSELECT:
1877 r = get_user(vcpu->arch.pfault_select,
1878 (u64 __user *)reg->addr);
1879 break;
1880 case KVM_REG_S390_PP:
1881 r = get_user(vcpu->arch.sie_block->pp,
1882 (u64 __user *)reg->addr);
1883 break;
1884 case KVM_REG_S390_GBEA:
1885 r = get_user(vcpu->arch.sie_block->gbea,
1886 (u64 __user *)reg->addr);
1887 break;
1888 default:
1889 break;
1890 }
1891
1892 return r;
1893}
1894
1895static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1896{
1897 kvm_s390_vcpu_initial_reset(vcpu);
1898 return 0;
1899}
1900
1901int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1902{
1903 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1904 return 0;
1905}
1906
1907int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1908{
1909 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1910 return 0;
1911}
1912
1913int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1914 struct kvm_sregs *sregs)
1915{
1916 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1917 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1918 restore_access_regs(vcpu->run->s.regs.acrs);
1919 return 0;
1920}
1921
1922int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1923 struct kvm_sregs *sregs)
1924{
1925 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1926 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1927 return 0;
1928}
1929
1930int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1931{
1932 /* make sure the new values will be lazily loaded */
1933 save_fpu_regs();
1934 if (test_fp_ctl(fpu->fpc))
1935 return -EINVAL;
1936 current->thread.fpu.fpc = fpu->fpc;
1937 if (MACHINE_HAS_VX)
1938 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1939 else
1940 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1941 return 0;
1942}
1943
1944int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1945{
1946 /* make sure we have the latest values */
1947 save_fpu_regs();
1948 if (MACHINE_HAS_VX)
1949 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1950 else
1951 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1952 fpu->fpc = current->thread.fpu.fpc;
1953 return 0;
1954}
1955
1956static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1957{
1958 int rc = 0;
1959
1960 if (!is_vcpu_stopped(vcpu))
1961 rc = -EBUSY;
1962 else {
1963 vcpu->run->psw_mask = psw.mask;
1964 vcpu->run->psw_addr = psw.addr;
1965 }
1966 return rc;
1967}
1968
1969int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1970 struct kvm_translation *tr)
1971{
1972 return -EINVAL; /* not implemented yet */
1973}
1974
1975#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1976 KVM_GUESTDBG_USE_HW_BP | \
1977 KVM_GUESTDBG_ENABLE)
1978
1979int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1980 struct kvm_guest_debug *dbg)
1981{
1982 int rc = 0;
1983
1984 vcpu->guest_debug = 0;
1985 kvm_s390_clear_bp_data(vcpu);
1986
1987 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1988 return -EINVAL;
1989
1990 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1991 vcpu->guest_debug = dbg->control;
1992 /* enforce guest PER */
1993 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1994
1995 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1996 rc = kvm_s390_import_bp_data(vcpu, dbg);
1997 } else {
1998 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1999 vcpu->arch.guestdbg.last_bp = 0;
2000 }
2001
2002 if (rc) {
2003 vcpu->guest_debug = 0;
2004 kvm_s390_clear_bp_data(vcpu);
2005 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2006 }
2007
2008 return rc;
2009}
2010
2011int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2012 struct kvm_mp_state *mp_state)
2013{
2014 /* CHECK_STOP and LOAD are not supported yet */
2015 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2016 KVM_MP_STATE_OPERATING;
2017}
2018
2019int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2020 struct kvm_mp_state *mp_state)
2021{
2022 int rc = 0;
2023
2024 /* user space knows about this interface - let it control the state */
2025 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2026
2027 switch (mp_state->mp_state) {
2028 case KVM_MP_STATE_STOPPED:
2029 kvm_s390_vcpu_stop(vcpu);
2030 break;
2031 case KVM_MP_STATE_OPERATING:
2032 kvm_s390_vcpu_start(vcpu);
2033 break;
2034 case KVM_MP_STATE_LOAD:
2035 case KVM_MP_STATE_CHECK_STOP:
2036 /* fall through - CHECK_STOP and LOAD are not supported yet */
2037 default:
2038 rc = -ENXIO;
2039 }
2040
2041 return rc;
2042}
2043
2044static bool ibs_enabled(struct kvm_vcpu *vcpu)
2045{
2046 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2047}
2048
2049static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2050{
2051retry:
2052 kvm_s390_vcpu_request_handled(vcpu);
2053 if (!vcpu->requests)
2054 return 0;
2055 /*
2056 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2057 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2058 * This ensures that the ipte instruction for this request has
2059 * already finished. We might race against a second unmapper that
2060 * wants to set the blocking bit. Lets just retry the request loop.
2061 */
2062 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2063 int rc;
2064 rc = gmap_ipte_notify(vcpu->arch.gmap,
2065 kvm_s390_get_prefix(vcpu),
2066 PAGE_SIZE * 2);
2067 if (rc)
2068 return rc;
2069 goto retry;
2070 }
2071
2072 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2073 vcpu->arch.sie_block->ihcpu = 0xffff;
2074 goto retry;
2075 }
2076
2077 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2078 if (!ibs_enabled(vcpu)) {
2079 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2080 atomic_or(CPUSTAT_IBS,
2081 &vcpu->arch.sie_block->cpuflags);
2082 }
2083 goto retry;
2084 }
2085
2086 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2087 if (ibs_enabled(vcpu)) {
2088 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2089 atomic_andnot(CPUSTAT_IBS,
2090 &vcpu->arch.sie_block->cpuflags);
2091 }
2092 goto retry;
2093 }
2094
2095 /* nothing to do, just clear the request */
2096 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2097
2098 return 0;
2099}
2100
2101void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2102{
2103 struct kvm_vcpu *vcpu;
2104 int i;
2105
2106 mutex_lock(&kvm->lock);
2107 preempt_disable();
2108 kvm->arch.epoch = tod - get_tod_clock();
2109 kvm_s390_vcpu_block_all(kvm);
2110 kvm_for_each_vcpu(i, vcpu, kvm)
2111 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2112 kvm_s390_vcpu_unblock_all(kvm);
2113 preempt_enable();
2114 mutex_unlock(&kvm->lock);
2115}
2116
2117/**
2118 * kvm_arch_fault_in_page - fault-in guest page if necessary
2119 * @vcpu: The corresponding virtual cpu
2120 * @gpa: Guest physical address
2121 * @writable: Whether the page should be writable or not
2122 *
2123 * Make sure that a guest page has been faulted-in on the host.
2124 *
2125 * Return: Zero on success, negative error code otherwise.
2126 */
2127long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2128{
2129 return gmap_fault(vcpu->arch.gmap, gpa,
2130 writable ? FAULT_FLAG_WRITE : 0);
2131}
2132
2133static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2134 unsigned long token)
2135{
2136 struct kvm_s390_interrupt inti;
2137 struct kvm_s390_irq irq;
2138
2139 if (start_token) {
2140 irq.u.ext.ext_params2 = token;
2141 irq.type = KVM_S390_INT_PFAULT_INIT;
2142 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2143 } else {
2144 inti.type = KVM_S390_INT_PFAULT_DONE;
2145 inti.parm64 = token;
2146 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2147 }
2148}
2149
2150void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2151 struct kvm_async_pf *work)
2152{
2153 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2154 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2155}
2156
2157void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2158 struct kvm_async_pf *work)
2159{
2160 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2161 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2162}
2163
2164void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2165 struct kvm_async_pf *work)
2166{
2167 /* s390 will always inject the page directly */
2168}
2169
2170bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2171{
2172 /*
2173 * s390 will always inject the page directly,
2174 * but we still want check_async_completion to cleanup
2175 */
2176 return true;
2177}
2178
2179static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2180{
2181 hva_t hva;
2182 struct kvm_arch_async_pf arch;
2183 int rc;
2184
2185 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2186 return 0;
2187 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2188 vcpu->arch.pfault_compare)
2189 return 0;
2190 if (psw_extint_disabled(vcpu))
2191 return 0;
2192 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2193 return 0;
2194 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2195 return 0;
2196 if (!vcpu->arch.gmap->pfault_enabled)
2197 return 0;
2198
2199 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2200 hva += current->thread.gmap_addr & ~PAGE_MASK;
2201 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2202 return 0;
2203
2204 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2205 return rc;
2206}
2207
2208static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2209{
2210 int rc, cpuflags;
2211
2212 /*
2213 * On s390 notifications for arriving pages will be delivered directly
2214 * to the guest but the house keeping for completed pfaults is
2215 * handled outside the worker.
2216 */
2217 kvm_check_async_pf_completion(vcpu);
2218
2219 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2220 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2221
2222 if (need_resched())
2223 schedule();
2224
2225 if (test_cpu_flag(CIF_MCCK_PENDING))
2226 s390_handle_mcck();
2227
2228 if (!kvm_is_ucontrol(vcpu->kvm)) {
2229 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2230 if (rc)
2231 return rc;
2232 }
2233
2234 rc = kvm_s390_handle_requests(vcpu);
2235 if (rc)
2236 return rc;
2237
2238 if (guestdbg_enabled(vcpu)) {
2239 kvm_s390_backup_guest_per_regs(vcpu);
2240 kvm_s390_patch_guest_per_regs(vcpu);
2241 }
2242
2243 vcpu->arch.sie_block->icptcode = 0;
2244 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2245 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2246 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2247
2248 return 0;
2249}
2250
2251static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2252{
2253 struct kvm_s390_pgm_info pgm_info = {
2254 .code = PGM_ADDRESSING,
2255 };
2256 u8 opcode, ilen;
2257 int rc;
2258
2259 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2260 trace_kvm_s390_sie_fault(vcpu);
2261
2262 /*
2263 * We want to inject an addressing exception, which is defined as a
2264 * suppressing or terminating exception. However, since we came here
2265 * by a DAT access exception, the PSW still points to the faulting
2266 * instruction since DAT exceptions are nullifying. So we've got
2267 * to look up the current opcode to get the length of the instruction
2268 * to be able to forward the PSW.
2269 */
2270 rc = read_guest_instr(vcpu, &opcode, 1);
2271 ilen = insn_length(opcode);
2272 if (rc < 0) {
2273 return rc;
2274 } else if (rc) {
2275 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2276 * Forward by arbitrary ilc, injection will take care of
2277 * nullification if necessary.
2278 */
2279 pgm_info = vcpu->arch.pgm;
2280 ilen = 4;
2281 }
2282 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2283 kvm_s390_forward_psw(vcpu, ilen);
2284 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2285}
2286
2287static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2288{
2289 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2290 vcpu->arch.sie_block->icptcode);
2291 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2292
2293 if (guestdbg_enabled(vcpu))
2294 kvm_s390_restore_guest_per_regs(vcpu);
2295
2296 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2297 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2298
2299 if (vcpu->arch.sie_block->icptcode > 0) {
2300 int rc = kvm_handle_sie_intercept(vcpu);
2301
2302 if (rc != -EOPNOTSUPP)
2303 return rc;
2304 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2305 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2306 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2307 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2308 return -EREMOTE;
2309 } else if (exit_reason != -EFAULT) {
2310 vcpu->stat.exit_null++;
2311 return 0;
2312 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2313 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2314 vcpu->run->s390_ucontrol.trans_exc_code =
2315 current->thread.gmap_addr;
2316 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2317 return -EREMOTE;
2318 } else if (current->thread.gmap_pfault) {
2319 trace_kvm_s390_major_guest_pfault(vcpu);
2320 current->thread.gmap_pfault = 0;
2321 if (kvm_arch_setup_async_pf(vcpu))
2322 return 0;
2323 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2324 }
2325 return vcpu_post_run_fault_in_sie(vcpu);
2326}
2327
2328static int __vcpu_run(struct kvm_vcpu *vcpu)
2329{
2330 int rc, exit_reason;
2331
2332 /*
2333 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2334 * ning the guest), so that memslots (and other stuff) are protected
2335 */
2336 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2337
2338 do {
2339 rc = vcpu_pre_run(vcpu);
2340 if (rc)
2341 break;
2342
2343 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2344 /*
2345 * As PF_VCPU will be used in fault handler, between
2346 * guest_enter and guest_exit should be no uaccess.
2347 */
2348 local_irq_disable();
2349 __kvm_guest_enter();
2350 __disable_cpu_timer_accounting(vcpu);
2351 local_irq_enable();
2352 exit_reason = sie64a(vcpu->arch.sie_block,
2353 vcpu->run->s.regs.gprs);
2354 local_irq_disable();
2355 __enable_cpu_timer_accounting(vcpu);
2356 __kvm_guest_exit();
2357 local_irq_enable();
2358 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2359
2360 rc = vcpu_post_run(vcpu, exit_reason);
2361 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2362
2363 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2364 return rc;
2365}
2366
2367static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2368{
2369 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2370 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2371 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2372 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2373 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2374 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2375 /* some control register changes require a tlb flush */
2376 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2377 }
2378 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2379 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2380 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2381 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2382 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2383 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2384 }
2385 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2386 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2387 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2388 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2389 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2390 kvm_clear_async_pf_completion_queue(vcpu);
2391 }
2392 kvm_run->kvm_dirty_regs = 0;
2393}
2394
2395static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2396{
2397 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2398 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2399 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2400 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2401 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2402 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2403 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2404 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2405 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2406 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2407 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2408 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2409}
2410
2411int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2412{
2413 int rc;
2414 sigset_t sigsaved;
2415
2416 if (guestdbg_exit_pending(vcpu)) {
2417 kvm_s390_prepare_debug_exit(vcpu);
2418 return 0;
2419 }
2420
2421 if (vcpu->sigset_active)
2422 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2423
2424 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2425 kvm_s390_vcpu_start(vcpu);
2426 } else if (is_vcpu_stopped(vcpu)) {
2427 pr_err_ratelimited("can't run stopped vcpu %d\n",
2428 vcpu->vcpu_id);
2429 return -EINVAL;
2430 }
2431
2432 sync_regs(vcpu, kvm_run);
2433 enable_cpu_timer_accounting(vcpu);
2434
2435 might_fault();
2436 rc = __vcpu_run(vcpu);
2437
2438 if (signal_pending(current) && !rc) {
2439 kvm_run->exit_reason = KVM_EXIT_INTR;
2440 rc = -EINTR;
2441 }
2442
2443 if (guestdbg_exit_pending(vcpu) && !rc) {
2444 kvm_s390_prepare_debug_exit(vcpu);
2445 rc = 0;
2446 }
2447
2448 if (rc == -EREMOTE) {
2449 /* userspace support is needed, kvm_run has been prepared */
2450 rc = 0;
2451 }
2452
2453 disable_cpu_timer_accounting(vcpu);
2454 store_regs(vcpu, kvm_run);
2455
2456 if (vcpu->sigset_active)
2457 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2458
2459 vcpu->stat.exit_userspace++;
2460 return rc;
2461}
2462
2463/*
2464 * store status at address
2465 * we use have two special cases:
2466 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2467 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2468 */
2469int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2470{
2471 unsigned char archmode = 1;
2472 freg_t fprs[NUM_FPRS];
2473 unsigned int px;
2474 u64 clkcomp, cputm;
2475 int rc;
2476
2477 px = kvm_s390_get_prefix(vcpu);
2478 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2479 if (write_guest_abs(vcpu, 163, &archmode, 1))
2480 return -EFAULT;
2481 gpa = 0;
2482 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2483 if (write_guest_real(vcpu, 163, &archmode, 1))
2484 return -EFAULT;
2485 gpa = px;
2486 } else
2487 gpa -= __LC_FPREGS_SAVE_AREA;
2488
2489 /* manually convert vector registers if necessary */
2490 if (MACHINE_HAS_VX) {
2491 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2492 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2493 fprs, 128);
2494 } else {
2495 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2496 vcpu->run->s.regs.fprs, 128);
2497 }
2498 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2499 vcpu->run->s.regs.gprs, 128);
2500 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2501 &vcpu->arch.sie_block->gpsw, 16);
2502 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2503 &px, 4);
2504 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2505 &vcpu->run->s.regs.fpc, 4);
2506 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2507 &vcpu->arch.sie_block->todpr, 4);
2508 cputm = kvm_s390_get_cpu_timer(vcpu);
2509 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2510 &cputm, 8);
2511 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2512 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2513 &clkcomp, 8);
2514 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2515 &vcpu->run->s.regs.acrs, 64);
2516 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2517 &vcpu->arch.sie_block->gcr, 128);
2518 return rc ? -EFAULT : 0;
2519}
2520
2521int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2522{
2523 /*
2524 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2525 * copying in vcpu load/put. Lets update our copies before we save
2526 * it into the save area
2527 */
2528 save_fpu_regs();
2529 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2530 save_access_regs(vcpu->run->s.regs.acrs);
2531
2532 return kvm_s390_store_status_unloaded(vcpu, addr);
2533}
2534
2535/*
2536 * store additional status at address
2537 */
2538int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2539 unsigned long gpa)
2540{
2541 /* Only bits 0-53 are used for address formation */
2542 if (!(gpa & ~0x3ff))
2543 return 0;
2544
2545 return write_guest_abs(vcpu, gpa & ~0x3ff,
2546 (void *)&vcpu->run->s.regs.vrs, 512);
2547}
2548
2549int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2550{
2551 if (!test_kvm_facility(vcpu->kvm, 129))
2552 return 0;
2553
2554 /*
2555 * The guest VXRS are in the host VXRs due to the lazy
2556 * copying in vcpu load/put. We can simply call save_fpu_regs()
2557 * to save the current register state because we are in the
2558 * middle of a load/put cycle.
2559 *
2560 * Let's update our copies before we save it into the save area.
2561 */
2562 save_fpu_regs();
2563
2564 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2565}
2566
2567static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2568{
2569 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2570 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2571}
2572
2573static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2574{
2575 unsigned int i;
2576 struct kvm_vcpu *vcpu;
2577
2578 kvm_for_each_vcpu(i, vcpu, kvm) {
2579 __disable_ibs_on_vcpu(vcpu);
2580 }
2581}
2582
2583static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2584{
2585 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2586 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2587}
2588
2589void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2590{
2591 int i, online_vcpus, started_vcpus = 0;
2592
2593 if (!is_vcpu_stopped(vcpu))
2594 return;
2595
2596 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2597 /* Only one cpu at a time may enter/leave the STOPPED state. */
2598 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2599 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2600
2601 for (i = 0; i < online_vcpus; i++) {
2602 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2603 started_vcpus++;
2604 }
2605
2606 if (started_vcpus == 0) {
2607 /* we're the only active VCPU -> speed it up */
2608 __enable_ibs_on_vcpu(vcpu);
2609 } else if (started_vcpus == 1) {
2610 /*
2611 * As we are starting a second VCPU, we have to disable
2612 * the IBS facility on all VCPUs to remove potentially
2613 * oustanding ENABLE requests.
2614 */
2615 __disable_ibs_on_all_vcpus(vcpu->kvm);
2616 }
2617
2618 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2619 /*
2620 * Another VCPU might have used IBS while we were offline.
2621 * Let's play safe and flush the VCPU at startup.
2622 */
2623 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2624 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2625 return;
2626}
2627
2628void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2629{
2630 int i, online_vcpus, started_vcpus = 0;
2631 struct kvm_vcpu *started_vcpu = NULL;
2632
2633 if (is_vcpu_stopped(vcpu))
2634 return;
2635
2636 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2637 /* Only one cpu at a time may enter/leave the STOPPED state. */
2638 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2639 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2640
2641 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2642 kvm_s390_clear_stop_irq(vcpu);
2643
2644 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2645 __disable_ibs_on_vcpu(vcpu);
2646
2647 for (i = 0; i < online_vcpus; i++) {
2648 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2649 started_vcpus++;
2650 started_vcpu = vcpu->kvm->vcpus[i];
2651 }
2652 }
2653
2654 if (started_vcpus == 1) {
2655 /*
2656 * As we only have one VCPU left, we want to enable the
2657 * IBS facility for that VCPU to speed it up.
2658 */
2659 __enable_ibs_on_vcpu(started_vcpu);
2660 }
2661
2662 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2663 return;
2664}
2665
2666static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2667 struct kvm_enable_cap *cap)
2668{
2669 int r;
2670
2671 if (cap->flags)
2672 return -EINVAL;
2673
2674 switch (cap->cap) {
2675 case KVM_CAP_S390_CSS_SUPPORT:
2676 if (!vcpu->kvm->arch.css_support) {
2677 vcpu->kvm->arch.css_support = 1;
2678 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2679 trace_kvm_s390_enable_css(vcpu->kvm);
2680 }
2681 r = 0;
2682 break;
2683 default:
2684 r = -EINVAL;
2685 break;
2686 }
2687 return r;
2688}
2689
2690static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2691 struct kvm_s390_mem_op *mop)
2692{
2693 void __user *uaddr = (void __user *)mop->buf;
2694 void *tmpbuf = NULL;
2695 int r, srcu_idx;
2696 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2697 | KVM_S390_MEMOP_F_CHECK_ONLY;
2698
2699 if (mop->flags & ~supported_flags)
2700 return -EINVAL;
2701
2702 if (mop->size > MEM_OP_MAX_SIZE)
2703 return -E2BIG;
2704
2705 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2706 tmpbuf = vmalloc(mop->size);
2707 if (!tmpbuf)
2708 return -ENOMEM;
2709 }
2710
2711 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2712
2713 switch (mop->op) {
2714 case KVM_S390_MEMOP_LOGICAL_READ:
2715 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2716 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2717 mop->size, GACC_FETCH);
2718 break;
2719 }
2720 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2721 if (r == 0) {
2722 if (copy_to_user(uaddr, tmpbuf, mop->size))
2723 r = -EFAULT;
2724 }
2725 break;
2726 case KVM_S390_MEMOP_LOGICAL_WRITE:
2727 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2728 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2729 mop->size, GACC_STORE);
2730 break;
2731 }
2732 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2733 r = -EFAULT;
2734 break;
2735 }
2736 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2737 break;
2738 default:
2739 r = -EINVAL;
2740 }
2741
2742 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2743
2744 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2745 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2746
2747 vfree(tmpbuf);
2748 return r;
2749}
2750
2751long kvm_arch_vcpu_ioctl(struct file *filp,
2752 unsigned int ioctl, unsigned long arg)
2753{
2754 struct kvm_vcpu *vcpu = filp->private_data;
2755 void __user *argp = (void __user *)arg;
2756 int idx;
2757 long r;
2758
2759 switch (ioctl) {
2760 case KVM_S390_IRQ: {
2761 struct kvm_s390_irq s390irq;
2762
2763 r = -EFAULT;
2764 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2765 break;
2766 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2767 break;
2768 }
2769 case KVM_S390_INTERRUPT: {
2770 struct kvm_s390_interrupt s390int;
2771 struct kvm_s390_irq s390irq;
2772
2773 r = -EFAULT;
2774 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2775 break;
2776 if (s390int_to_s390irq(&s390int, &s390irq))
2777 return -EINVAL;
2778 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2779 break;
2780 }
2781 case KVM_S390_STORE_STATUS:
2782 idx = srcu_read_lock(&vcpu->kvm->srcu);
2783 r = kvm_s390_vcpu_store_status(vcpu, arg);
2784 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2785 break;
2786 case KVM_S390_SET_INITIAL_PSW: {
2787 psw_t psw;
2788
2789 r = -EFAULT;
2790 if (copy_from_user(&psw, argp, sizeof(psw)))
2791 break;
2792 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2793 break;
2794 }
2795 case KVM_S390_INITIAL_RESET:
2796 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2797 break;
2798 case KVM_SET_ONE_REG:
2799 case KVM_GET_ONE_REG: {
2800 struct kvm_one_reg reg;
2801 r = -EFAULT;
2802 if (copy_from_user(®, argp, sizeof(reg)))
2803 break;
2804 if (ioctl == KVM_SET_ONE_REG)
2805 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2806 else
2807 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2808 break;
2809 }
2810#ifdef CONFIG_KVM_S390_UCONTROL
2811 case KVM_S390_UCAS_MAP: {
2812 struct kvm_s390_ucas_mapping ucasmap;
2813
2814 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2815 r = -EFAULT;
2816 break;
2817 }
2818
2819 if (!kvm_is_ucontrol(vcpu->kvm)) {
2820 r = -EINVAL;
2821 break;
2822 }
2823
2824 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2825 ucasmap.vcpu_addr, ucasmap.length);
2826 break;
2827 }
2828 case KVM_S390_UCAS_UNMAP: {
2829 struct kvm_s390_ucas_mapping ucasmap;
2830
2831 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2832 r = -EFAULT;
2833 break;
2834 }
2835
2836 if (!kvm_is_ucontrol(vcpu->kvm)) {
2837 r = -EINVAL;
2838 break;
2839 }
2840
2841 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2842 ucasmap.length);
2843 break;
2844 }
2845#endif
2846 case KVM_S390_VCPU_FAULT: {
2847 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2848 break;
2849 }
2850 case KVM_ENABLE_CAP:
2851 {
2852 struct kvm_enable_cap cap;
2853 r = -EFAULT;
2854 if (copy_from_user(&cap, argp, sizeof(cap)))
2855 break;
2856 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2857 break;
2858 }
2859 case KVM_S390_MEM_OP: {
2860 struct kvm_s390_mem_op mem_op;
2861
2862 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2863 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2864 else
2865 r = -EFAULT;
2866 break;
2867 }
2868 case KVM_S390_SET_IRQ_STATE: {
2869 struct kvm_s390_irq_state irq_state;
2870
2871 r = -EFAULT;
2872 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2873 break;
2874 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2875 irq_state.len == 0 ||
2876 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2877 r = -EINVAL;
2878 break;
2879 }
2880 r = kvm_s390_set_irq_state(vcpu,
2881 (void __user *) irq_state.buf,
2882 irq_state.len);
2883 break;
2884 }
2885 case KVM_S390_GET_IRQ_STATE: {
2886 struct kvm_s390_irq_state irq_state;
2887
2888 r = -EFAULT;
2889 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2890 break;
2891 if (irq_state.len == 0) {
2892 r = -EINVAL;
2893 break;
2894 }
2895 r = kvm_s390_get_irq_state(vcpu,
2896 (__u8 __user *) irq_state.buf,
2897 irq_state.len);
2898 break;
2899 }
2900 default:
2901 r = -ENOTTY;
2902 }
2903 return r;
2904}
2905
2906int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2907{
2908#ifdef CONFIG_KVM_S390_UCONTROL
2909 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2910 && (kvm_is_ucontrol(vcpu->kvm))) {
2911 vmf->page = virt_to_page(vcpu->arch.sie_block);
2912 get_page(vmf->page);
2913 return 0;
2914 }
2915#endif
2916 return VM_FAULT_SIGBUS;
2917}
2918
2919int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2920 unsigned long npages)
2921{
2922 return 0;
2923}
2924
2925/* Section: memory related */
2926int kvm_arch_prepare_memory_region(struct kvm *kvm,
2927 struct kvm_memory_slot *memslot,
2928 const struct kvm_userspace_memory_region *mem,
2929 enum kvm_mr_change change)
2930{
2931 /* A few sanity checks. We can have memory slots which have to be
2932 located/ended at a segment boundary (1MB). The memory in userland is
2933 ok to be fragmented into various different vmas. It is okay to mmap()
2934 and munmap() stuff in this slot after doing this call at any time */
2935
2936 if (mem->userspace_addr & 0xffffful)
2937 return -EINVAL;
2938
2939 if (mem->memory_size & 0xffffful)
2940 return -EINVAL;
2941
2942 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2943 return -EINVAL;
2944
2945 return 0;
2946}
2947
2948void kvm_arch_commit_memory_region(struct kvm *kvm,
2949 const struct kvm_userspace_memory_region *mem,
2950 const struct kvm_memory_slot *old,
2951 const struct kvm_memory_slot *new,
2952 enum kvm_mr_change change)
2953{
2954 int rc;
2955
2956 /* If the basics of the memslot do not change, we do not want
2957 * to update the gmap. Every update causes several unnecessary
2958 * segment translation exceptions. This is usually handled just
2959 * fine by the normal fault handler + gmap, but it will also
2960 * cause faults on the prefix page of running guest CPUs.
2961 */
2962 if (old->userspace_addr == mem->userspace_addr &&
2963 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2964 old->npages * PAGE_SIZE == mem->memory_size)
2965 return;
2966
2967 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2968 mem->guest_phys_addr, mem->memory_size);
2969 if (rc)
2970 pr_warn("failed to commit memory region\n");
2971 return;
2972}
2973
2974static int __init kvm_s390_init(void)
2975{
2976 if (!sclp.has_sief2) {
2977 pr_info("SIE not available\n");
2978 return -ENODEV;
2979 }
2980
2981 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2982}
2983
2984static void __exit kvm_s390_exit(void)
2985{
2986 kvm_exit();
2987}
2988
2989module_init(kvm_s390_init);
2990module_exit(kvm_s390_exit);
2991
2992/*
2993 * Enable autoloading of the kvm module.
2994 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2995 * since x86 takes a different approach.
2996 */
2997#include <linux/miscdevice.h>
2998MODULE_ALIAS_MISCDEV(KVM_MINOR);
2999MODULE_ALIAS("devname:kvm");
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Christian Ehrhardt <ehrhardt@de.ibm.com>
10 * Jason J. Herne <jjherne@us.ibm.com>
11 */
12
13#define KMSG_COMPONENT "kvm-s390"
14#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15
16#include <linux/compiler.h>
17#include <linux/err.h>
18#include <linux/fs.h>
19#include <linux/hrtimer.h>
20#include <linux/init.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/mman.h>
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <linux/random.h>
27#include <linux/slab.h>
28#include <linux/timer.h>
29#include <linux/vmalloc.h>
30#include <linux/bitmap.h>
31#include <linux/sched/signal.h>
32#include <linux/string.h>
33#include <linux/pgtable.h>
34#include <linux/mmu_notifier.h>
35
36#include <asm/asm-offsets.h>
37#include <asm/lowcore.h>
38#include <asm/stp.h>
39#include <asm/gmap.h>
40#include <asm/nmi.h>
41#include <asm/switch_to.h>
42#include <asm/isc.h>
43#include <asm/sclp.h>
44#include <asm/cpacf.h>
45#include <asm/timex.h>
46#include <asm/ap.h>
47#include <asm/uv.h>
48#include <asm/fpu/api.h>
49#include "kvm-s390.h"
50#include "gaccess.h"
51#include "pci.h"
52
53#define CREATE_TRACE_POINTS
54#include "trace.h"
55#include "trace-s390.h"
56
57#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58#define LOCAL_IRQS 32
59#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
63 KVM_GENERIC_VM_STATS(),
64 STATS_DESC_COUNTER(VM, inject_io),
65 STATS_DESC_COUNTER(VM, inject_float_mchk),
66 STATS_DESC_COUNTER(VM, inject_pfault_done),
67 STATS_DESC_COUNTER(VM, inject_service_signal),
68 STATS_DESC_COUNTER(VM, inject_virtio),
69 STATS_DESC_COUNTER(VM, aen_forward)
70};
71
72const struct kvm_stats_header kvm_vm_stats_header = {
73 .name_size = KVM_STATS_NAME_SIZE,
74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 .id_offset = sizeof(struct kvm_stats_header),
76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 sizeof(kvm_vm_stats_desc),
79};
80
81const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 KVM_GENERIC_VCPU_STATS(),
83 STATS_DESC_COUNTER(VCPU, exit_userspace),
84 STATS_DESC_COUNTER(VCPU, exit_null),
85 STATS_DESC_COUNTER(VCPU, exit_external_request),
86 STATS_DESC_COUNTER(VCPU, exit_io_request),
87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 STATS_DESC_COUNTER(VCPU, exit_validity),
90 STATS_DESC_COUNTER(VCPU, exit_instruction),
91 STATS_DESC_COUNTER(VCPU, exit_pei),
92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 STATS_DESC_COUNTER(VCPU, deliver_program),
110 STATS_DESC_COUNTER(VCPU, deliver_io),
111 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 STATS_DESC_COUNTER(VCPU, inject_ckc),
114 STATS_DESC_COUNTER(VCPU, inject_cputm),
115 STATS_DESC_COUNTER(VCPU, inject_external_call),
116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 STATS_DESC_COUNTER(VCPU, inject_mchk),
118 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 STATS_DESC_COUNTER(VCPU, inject_program),
120 STATS_DESC_COUNTER(VCPU, inject_restart),
121 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 STATS_DESC_COUNTER(VCPU, instruction_gs),
125 STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 STATS_DESC_COUNTER(VCPU, instruction_sck),
131 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 STATS_DESC_COUNTER(VCPU, instruction_spx),
134 STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 STATS_DESC_COUNTER(VCPU, instruction_stap),
136 STATS_DESC_COUNTER(VCPU, instruction_iske),
137 STATS_DESC_COUNTER(VCPU, instruction_ri),
138 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 STATS_DESC_COUNTER(VCPU, instruction_sske),
140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 STATS_DESC_COUNTER(VCPU, instruction_tb),
144 STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 STATS_DESC_COUNTER(VCPU, instruction_sie),
148 STATS_DESC_COUNTER(VCPU, instruction_essa),
149 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 STATS_DESC_COUNTER(VCPU, pfault_sync)
176};
177
178const struct kvm_stats_header kvm_vcpu_stats_header = {
179 .name_size = KVM_STATS_NAME_SIZE,
180 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
181 .id_offset = sizeof(struct kvm_stats_header),
182 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
183 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
184 sizeof(kvm_vcpu_stats_desc),
185};
186
187/* allow nested virtualization in KVM (if enabled by user space) */
188static int nested;
189module_param(nested, int, S_IRUGO);
190MODULE_PARM_DESC(nested, "Nested virtualization support");
191
192/* allow 1m huge page guest backing, if !nested */
193static int hpage;
194module_param(hpage, int, 0444);
195MODULE_PARM_DESC(hpage, "1m huge page backing support");
196
197/* maximum percentage of steal time for polling. >100 is treated like 100 */
198static u8 halt_poll_max_steal = 10;
199module_param(halt_poll_max_steal, byte, 0644);
200MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
201
202/* if set to true, the GISA will be initialized and used if available */
203static bool use_gisa = true;
204module_param(use_gisa, bool, 0644);
205MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
206
207/* maximum diag9c forwarding per second */
208unsigned int diag9c_forwarding_hz;
209module_param(diag9c_forwarding_hz, uint, 0644);
210MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211
212/*
213 * allow asynchronous deinit for protected guests; enable by default since
214 * the feature is opt-in anyway
215 */
216static int async_destroy = 1;
217module_param(async_destroy, int, 0444);
218MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
219
220/*
221 * For now we handle at most 16 double words as this is what the s390 base
222 * kernel handles and stores in the prefix page. If we ever need to go beyond
223 * this, this requires changes to code, but the external uapi can stay.
224 */
225#define SIZE_INTERNAL 16
226
227/*
228 * Base feature mask that defines default mask for facilities. Consists of the
229 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
230 */
231static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
232/*
233 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
234 * and defines the facilities that can be enabled via a cpu model.
235 */
236static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
237
238static unsigned long kvm_s390_fac_size(void)
239{
240 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
241 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
242 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
243 sizeof(stfle_fac_list));
244
245 return SIZE_INTERNAL;
246}
247
248/* available cpu features supported by kvm */
249static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
250/* available subfunctions indicated via query / "test bit" */
251static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
252
253static struct gmap_notifier gmap_notifier;
254static struct gmap_notifier vsie_gmap_notifier;
255debug_info_t *kvm_s390_dbf;
256debug_info_t *kvm_s390_dbf_uv;
257
258/* Section: not file related */
259int kvm_arch_hardware_enable(void)
260{
261 /* every s390 is virtualization enabled ;-) */
262 return 0;
263}
264
265int kvm_arch_check_processor_compat(void *opaque)
266{
267 return 0;
268}
269
270/* forward declarations */
271static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
272 unsigned long end);
273static int sca_switch_to_extended(struct kvm *kvm);
274
275static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
276{
277 u8 delta_idx = 0;
278
279 /*
280 * The TOD jumps by delta, we have to compensate this by adding
281 * -delta to the epoch.
282 */
283 delta = -delta;
284
285 /* sign-extension - we're adding to signed values below */
286 if ((s64)delta < 0)
287 delta_idx = -1;
288
289 scb->epoch += delta;
290 if (scb->ecd & ECD_MEF) {
291 scb->epdx += delta_idx;
292 if (scb->epoch < delta)
293 scb->epdx += 1;
294 }
295}
296
297/*
298 * This callback is executed during stop_machine(). All CPUs are therefore
299 * temporarily stopped. In order not to change guest behavior, we have to
300 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
301 * so a CPU won't be stopped while calculating with the epoch.
302 */
303static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
304 void *v)
305{
306 struct kvm *kvm;
307 struct kvm_vcpu *vcpu;
308 unsigned long i;
309 unsigned long long *delta = v;
310
311 list_for_each_entry(kvm, &vm_list, vm_list) {
312 kvm_for_each_vcpu(i, vcpu, kvm) {
313 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
314 if (i == 0) {
315 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
316 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
317 }
318 if (vcpu->arch.cputm_enabled)
319 vcpu->arch.cputm_start += *delta;
320 if (vcpu->arch.vsie_block)
321 kvm_clock_sync_scb(vcpu->arch.vsie_block,
322 *delta);
323 }
324 }
325 return NOTIFY_OK;
326}
327
328static struct notifier_block kvm_clock_notifier = {
329 .notifier_call = kvm_clock_sync,
330};
331
332int kvm_arch_hardware_setup(void *opaque)
333{
334 gmap_notifier.notifier_call = kvm_gmap_notifier;
335 gmap_register_pte_notifier(&gmap_notifier);
336 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
337 gmap_register_pte_notifier(&vsie_gmap_notifier);
338 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
339 &kvm_clock_notifier);
340 return 0;
341}
342
343void kvm_arch_hardware_unsetup(void)
344{
345 gmap_unregister_pte_notifier(&gmap_notifier);
346 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
347 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
348 &kvm_clock_notifier);
349}
350
351static void allow_cpu_feat(unsigned long nr)
352{
353 set_bit_inv(nr, kvm_s390_available_cpu_feat);
354}
355
356static inline int plo_test_bit(unsigned char nr)
357{
358 unsigned long function = (unsigned long)nr | 0x100;
359 int cc;
360
361 asm volatile(
362 " lgr 0,%[function]\n"
363 /* Parameter registers are ignored for "test bit" */
364 " plo 0,0,0,0(0)\n"
365 " ipm %0\n"
366 " srl %0,28\n"
367 : "=d" (cc)
368 : [function] "d" (function)
369 : "cc", "0");
370 return cc == 0;
371}
372
373static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
374{
375 asm volatile(
376 " lghi 0,0\n"
377 " lgr 1,%[query]\n"
378 /* Parameter registers are ignored */
379 " .insn rrf,%[opc] << 16,2,4,6,0\n"
380 :
381 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
382 : "cc", "memory", "0", "1");
383}
384
385#define INSN_SORTL 0xb938
386#define INSN_DFLTCC 0xb939
387
388static void kvm_s390_cpu_feat_init(void)
389{
390 int i;
391
392 for (i = 0; i < 256; ++i) {
393 if (plo_test_bit(i))
394 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
395 }
396
397 if (test_facility(28)) /* TOD-clock steering */
398 ptff(kvm_s390_available_subfunc.ptff,
399 sizeof(kvm_s390_available_subfunc.ptff),
400 PTFF_QAF);
401
402 if (test_facility(17)) { /* MSA */
403 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
404 kvm_s390_available_subfunc.kmac);
405 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.kmc);
407 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
408 kvm_s390_available_subfunc.km);
409 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
410 kvm_s390_available_subfunc.kimd);
411 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
412 kvm_s390_available_subfunc.klmd);
413 }
414 if (test_facility(76)) /* MSA3 */
415 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
416 kvm_s390_available_subfunc.pckmo);
417 if (test_facility(77)) { /* MSA4 */
418 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.kmctr);
420 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
421 kvm_s390_available_subfunc.kmf);
422 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kmo);
424 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
425 kvm_s390_available_subfunc.pcc);
426 }
427 if (test_facility(57)) /* MSA5 */
428 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
429 kvm_s390_available_subfunc.ppno);
430
431 if (test_facility(146)) /* MSA8 */
432 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
433 kvm_s390_available_subfunc.kma);
434
435 if (test_facility(155)) /* MSA9 */
436 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
437 kvm_s390_available_subfunc.kdsa);
438
439 if (test_facility(150)) /* SORTL */
440 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
441
442 if (test_facility(151)) /* DFLTCC */
443 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
444
445 if (MACHINE_HAS_ESOP)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
447 /*
448 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
449 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
450 */
451 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
452 !test_facility(3) || !nested)
453 return;
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
455 if (sclp.has_64bscao)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
457 if (sclp.has_siif)
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
459 if (sclp.has_gpere)
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
461 if (sclp.has_gsls)
462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
463 if (sclp.has_ib)
464 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
465 if (sclp.has_cei)
466 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
467 if (sclp.has_ibs)
468 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
469 if (sclp.has_kss)
470 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
471 /*
472 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
473 * all skey handling functions read/set the skey from the PGSTE
474 * instead of the real storage key.
475 *
476 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
477 * pages being detected as preserved although they are resident.
478 *
479 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
480 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
481 *
482 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
483 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
484 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
485 *
486 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
487 * cannot easily shadow the SCA because of the ipte lock.
488 */
489}
490
491int kvm_arch_init(void *opaque)
492{
493 int rc = -ENOMEM;
494
495 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
496 if (!kvm_s390_dbf)
497 return -ENOMEM;
498
499 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
500 if (!kvm_s390_dbf_uv)
501 goto out;
502
503 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
504 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
505 goto out;
506
507 kvm_s390_cpu_feat_init();
508
509 /* Register floating interrupt controller interface. */
510 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
511 if (rc) {
512 pr_err("A FLIC registration call failed with rc=%d\n", rc);
513 goto out;
514 }
515
516 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
517 rc = kvm_s390_pci_init();
518 if (rc) {
519 pr_err("Unable to allocate AIFT for PCI\n");
520 goto out;
521 }
522 }
523
524 rc = kvm_s390_gib_init(GAL_ISC);
525 if (rc)
526 goto out;
527
528 return 0;
529
530out:
531 kvm_arch_exit();
532 return rc;
533}
534
535void kvm_arch_exit(void)
536{
537 kvm_s390_gib_destroy();
538 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
539 kvm_s390_pci_exit();
540 debug_unregister(kvm_s390_dbf);
541 debug_unregister(kvm_s390_dbf_uv);
542}
543
544/* Section: device related */
545long kvm_arch_dev_ioctl(struct file *filp,
546 unsigned int ioctl, unsigned long arg)
547{
548 if (ioctl == KVM_S390_ENABLE_SIE)
549 return s390_enable_sie();
550 return -EINVAL;
551}
552
553int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
554{
555 int r;
556
557 switch (ext) {
558 case KVM_CAP_S390_PSW:
559 case KVM_CAP_S390_GMAP:
560 case KVM_CAP_SYNC_MMU:
561#ifdef CONFIG_KVM_S390_UCONTROL
562 case KVM_CAP_S390_UCONTROL:
563#endif
564 case KVM_CAP_ASYNC_PF:
565 case KVM_CAP_SYNC_REGS:
566 case KVM_CAP_ONE_REG:
567 case KVM_CAP_ENABLE_CAP:
568 case KVM_CAP_S390_CSS_SUPPORT:
569 case KVM_CAP_IOEVENTFD:
570 case KVM_CAP_DEVICE_CTRL:
571 case KVM_CAP_S390_IRQCHIP:
572 case KVM_CAP_VM_ATTRIBUTES:
573 case KVM_CAP_MP_STATE:
574 case KVM_CAP_IMMEDIATE_EXIT:
575 case KVM_CAP_S390_INJECT_IRQ:
576 case KVM_CAP_S390_USER_SIGP:
577 case KVM_CAP_S390_USER_STSI:
578 case KVM_CAP_S390_SKEYS:
579 case KVM_CAP_S390_IRQ_STATE:
580 case KVM_CAP_S390_USER_INSTR0:
581 case KVM_CAP_S390_CMMA_MIGRATION:
582 case KVM_CAP_S390_AIS:
583 case KVM_CAP_S390_AIS_MIGRATION:
584 case KVM_CAP_S390_VCPU_RESETS:
585 case KVM_CAP_SET_GUEST_DEBUG:
586 case KVM_CAP_S390_DIAG318:
587 case KVM_CAP_S390_MEM_OP_EXTENSION:
588 r = 1;
589 break;
590 case KVM_CAP_SET_GUEST_DEBUG2:
591 r = KVM_GUESTDBG_VALID_MASK;
592 break;
593 case KVM_CAP_S390_HPAGE_1M:
594 r = 0;
595 if (hpage && !kvm_is_ucontrol(kvm))
596 r = 1;
597 break;
598 case KVM_CAP_S390_MEM_OP:
599 r = MEM_OP_MAX_SIZE;
600 break;
601 case KVM_CAP_NR_VCPUS:
602 case KVM_CAP_MAX_VCPUS:
603 case KVM_CAP_MAX_VCPU_ID:
604 r = KVM_S390_BSCA_CPU_SLOTS;
605 if (!kvm_s390_use_sca_entries())
606 r = KVM_MAX_VCPUS;
607 else if (sclp.has_esca && sclp.has_64bscao)
608 r = KVM_S390_ESCA_CPU_SLOTS;
609 if (ext == KVM_CAP_NR_VCPUS)
610 r = min_t(unsigned int, num_online_cpus(), r);
611 break;
612 case KVM_CAP_S390_COW:
613 r = MACHINE_HAS_ESOP;
614 break;
615 case KVM_CAP_S390_VECTOR_REGISTERS:
616 r = MACHINE_HAS_VX;
617 break;
618 case KVM_CAP_S390_RI:
619 r = test_facility(64);
620 break;
621 case KVM_CAP_S390_GS:
622 r = test_facility(133);
623 break;
624 case KVM_CAP_S390_BPB:
625 r = test_facility(82);
626 break;
627 case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
628 r = async_destroy && is_prot_virt_host();
629 break;
630 case KVM_CAP_S390_PROTECTED:
631 r = is_prot_virt_host();
632 break;
633 case KVM_CAP_S390_PROTECTED_DUMP: {
634 u64 pv_cmds_dump[] = {
635 BIT_UVC_CMD_DUMP_INIT,
636 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
637 BIT_UVC_CMD_DUMP_CPU,
638 BIT_UVC_CMD_DUMP_COMPLETE,
639 };
640 int i;
641
642 r = is_prot_virt_host();
643
644 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
645 if (!test_bit_inv(pv_cmds_dump[i],
646 (unsigned long *)&uv_info.inst_calls_list)) {
647 r = 0;
648 break;
649 }
650 }
651 break;
652 }
653 case KVM_CAP_S390_ZPCI_OP:
654 r = kvm_s390_pci_interp_allowed();
655 break;
656 case KVM_CAP_S390_CPU_TOPOLOGY:
657 r = test_facility(11);
658 break;
659 default:
660 r = 0;
661 }
662 return r;
663}
664
665void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
666{
667 int i;
668 gfn_t cur_gfn, last_gfn;
669 unsigned long gaddr, vmaddr;
670 struct gmap *gmap = kvm->arch.gmap;
671 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
672
673 /* Loop over all guest segments */
674 cur_gfn = memslot->base_gfn;
675 last_gfn = memslot->base_gfn + memslot->npages;
676 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
677 gaddr = gfn_to_gpa(cur_gfn);
678 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
679 if (kvm_is_error_hva(vmaddr))
680 continue;
681
682 bitmap_zero(bitmap, _PAGE_ENTRIES);
683 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
684 for (i = 0; i < _PAGE_ENTRIES; i++) {
685 if (test_bit(i, bitmap))
686 mark_page_dirty(kvm, cur_gfn + i);
687 }
688
689 if (fatal_signal_pending(current))
690 return;
691 cond_resched();
692 }
693}
694
695/* Section: vm related */
696static void sca_del_vcpu(struct kvm_vcpu *vcpu);
697
698/*
699 * Get (and clear) the dirty memory log for a memory slot.
700 */
701int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
702 struct kvm_dirty_log *log)
703{
704 int r;
705 unsigned long n;
706 struct kvm_memory_slot *memslot;
707 int is_dirty;
708
709 if (kvm_is_ucontrol(kvm))
710 return -EINVAL;
711
712 mutex_lock(&kvm->slots_lock);
713
714 r = -EINVAL;
715 if (log->slot >= KVM_USER_MEM_SLOTS)
716 goto out;
717
718 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
719 if (r)
720 goto out;
721
722 /* Clear the dirty log */
723 if (is_dirty) {
724 n = kvm_dirty_bitmap_bytes(memslot);
725 memset(memslot->dirty_bitmap, 0, n);
726 }
727 r = 0;
728out:
729 mutex_unlock(&kvm->slots_lock);
730 return r;
731}
732
733static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
734{
735 unsigned long i;
736 struct kvm_vcpu *vcpu;
737
738 kvm_for_each_vcpu(i, vcpu, kvm) {
739 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
740 }
741}
742
743int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
744{
745 int r;
746
747 if (cap->flags)
748 return -EINVAL;
749
750 switch (cap->cap) {
751 case KVM_CAP_S390_IRQCHIP:
752 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
753 kvm->arch.use_irqchip = 1;
754 r = 0;
755 break;
756 case KVM_CAP_S390_USER_SIGP:
757 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
758 kvm->arch.user_sigp = 1;
759 r = 0;
760 break;
761 case KVM_CAP_S390_VECTOR_REGISTERS:
762 mutex_lock(&kvm->lock);
763 if (kvm->created_vcpus) {
764 r = -EBUSY;
765 } else if (MACHINE_HAS_VX) {
766 set_kvm_facility(kvm->arch.model.fac_mask, 129);
767 set_kvm_facility(kvm->arch.model.fac_list, 129);
768 if (test_facility(134)) {
769 set_kvm_facility(kvm->arch.model.fac_mask, 134);
770 set_kvm_facility(kvm->arch.model.fac_list, 134);
771 }
772 if (test_facility(135)) {
773 set_kvm_facility(kvm->arch.model.fac_mask, 135);
774 set_kvm_facility(kvm->arch.model.fac_list, 135);
775 }
776 if (test_facility(148)) {
777 set_kvm_facility(kvm->arch.model.fac_mask, 148);
778 set_kvm_facility(kvm->arch.model.fac_list, 148);
779 }
780 if (test_facility(152)) {
781 set_kvm_facility(kvm->arch.model.fac_mask, 152);
782 set_kvm_facility(kvm->arch.model.fac_list, 152);
783 }
784 if (test_facility(192)) {
785 set_kvm_facility(kvm->arch.model.fac_mask, 192);
786 set_kvm_facility(kvm->arch.model.fac_list, 192);
787 }
788 r = 0;
789 } else
790 r = -EINVAL;
791 mutex_unlock(&kvm->lock);
792 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
793 r ? "(not available)" : "(success)");
794 break;
795 case KVM_CAP_S390_RI:
796 r = -EINVAL;
797 mutex_lock(&kvm->lock);
798 if (kvm->created_vcpus) {
799 r = -EBUSY;
800 } else if (test_facility(64)) {
801 set_kvm_facility(kvm->arch.model.fac_mask, 64);
802 set_kvm_facility(kvm->arch.model.fac_list, 64);
803 r = 0;
804 }
805 mutex_unlock(&kvm->lock);
806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
807 r ? "(not available)" : "(success)");
808 break;
809 case KVM_CAP_S390_AIS:
810 mutex_lock(&kvm->lock);
811 if (kvm->created_vcpus) {
812 r = -EBUSY;
813 } else {
814 set_kvm_facility(kvm->arch.model.fac_mask, 72);
815 set_kvm_facility(kvm->arch.model.fac_list, 72);
816 r = 0;
817 }
818 mutex_unlock(&kvm->lock);
819 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
820 r ? "(not available)" : "(success)");
821 break;
822 case KVM_CAP_S390_GS:
823 r = -EINVAL;
824 mutex_lock(&kvm->lock);
825 if (kvm->created_vcpus) {
826 r = -EBUSY;
827 } else if (test_facility(133)) {
828 set_kvm_facility(kvm->arch.model.fac_mask, 133);
829 set_kvm_facility(kvm->arch.model.fac_list, 133);
830 r = 0;
831 }
832 mutex_unlock(&kvm->lock);
833 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
834 r ? "(not available)" : "(success)");
835 break;
836 case KVM_CAP_S390_HPAGE_1M:
837 mutex_lock(&kvm->lock);
838 if (kvm->created_vcpus)
839 r = -EBUSY;
840 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
841 r = -EINVAL;
842 else {
843 r = 0;
844 mmap_write_lock(kvm->mm);
845 kvm->mm->context.allow_gmap_hpage_1m = 1;
846 mmap_write_unlock(kvm->mm);
847 /*
848 * We might have to create fake 4k page
849 * tables. To avoid that the hardware works on
850 * stale PGSTEs, we emulate these instructions.
851 */
852 kvm->arch.use_skf = 0;
853 kvm->arch.use_pfmfi = 0;
854 }
855 mutex_unlock(&kvm->lock);
856 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
857 r ? "(not available)" : "(success)");
858 break;
859 case KVM_CAP_S390_USER_STSI:
860 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
861 kvm->arch.user_stsi = 1;
862 r = 0;
863 break;
864 case KVM_CAP_S390_USER_INSTR0:
865 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
866 kvm->arch.user_instr0 = 1;
867 icpt_operexc_on_all_vcpus(kvm);
868 r = 0;
869 break;
870 case KVM_CAP_S390_CPU_TOPOLOGY:
871 r = -EINVAL;
872 mutex_lock(&kvm->lock);
873 if (kvm->created_vcpus) {
874 r = -EBUSY;
875 } else if (test_facility(11)) {
876 set_kvm_facility(kvm->arch.model.fac_mask, 11);
877 set_kvm_facility(kvm->arch.model.fac_list, 11);
878 r = 0;
879 }
880 mutex_unlock(&kvm->lock);
881 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
882 r ? "(not available)" : "(success)");
883 break;
884 default:
885 r = -EINVAL;
886 break;
887 }
888 return r;
889}
890
891static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
892{
893 int ret;
894
895 switch (attr->attr) {
896 case KVM_S390_VM_MEM_LIMIT_SIZE:
897 ret = 0;
898 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
899 kvm->arch.mem_limit);
900 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
901 ret = -EFAULT;
902 break;
903 default:
904 ret = -ENXIO;
905 break;
906 }
907 return ret;
908}
909
910static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
911{
912 int ret;
913 unsigned int idx;
914 switch (attr->attr) {
915 case KVM_S390_VM_MEM_ENABLE_CMMA:
916 ret = -ENXIO;
917 if (!sclp.has_cmma)
918 break;
919
920 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
921 mutex_lock(&kvm->lock);
922 if (kvm->created_vcpus)
923 ret = -EBUSY;
924 else if (kvm->mm->context.allow_gmap_hpage_1m)
925 ret = -EINVAL;
926 else {
927 kvm->arch.use_cmma = 1;
928 /* Not compatible with cmma. */
929 kvm->arch.use_pfmfi = 0;
930 ret = 0;
931 }
932 mutex_unlock(&kvm->lock);
933 break;
934 case KVM_S390_VM_MEM_CLR_CMMA:
935 ret = -ENXIO;
936 if (!sclp.has_cmma)
937 break;
938 ret = -EINVAL;
939 if (!kvm->arch.use_cmma)
940 break;
941
942 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
943 mutex_lock(&kvm->lock);
944 idx = srcu_read_lock(&kvm->srcu);
945 s390_reset_cmma(kvm->arch.gmap->mm);
946 srcu_read_unlock(&kvm->srcu, idx);
947 mutex_unlock(&kvm->lock);
948 ret = 0;
949 break;
950 case KVM_S390_VM_MEM_LIMIT_SIZE: {
951 unsigned long new_limit;
952
953 if (kvm_is_ucontrol(kvm))
954 return -EINVAL;
955
956 if (get_user(new_limit, (u64 __user *)attr->addr))
957 return -EFAULT;
958
959 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
960 new_limit > kvm->arch.mem_limit)
961 return -E2BIG;
962
963 if (!new_limit)
964 return -EINVAL;
965
966 /* gmap_create takes last usable address */
967 if (new_limit != KVM_S390_NO_MEM_LIMIT)
968 new_limit -= 1;
969
970 ret = -EBUSY;
971 mutex_lock(&kvm->lock);
972 if (!kvm->created_vcpus) {
973 /* gmap_create will round the limit up */
974 struct gmap *new = gmap_create(current->mm, new_limit);
975
976 if (!new) {
977 ret = -ENOMEM;
978 } else {
979 gmap_remove(kvm->arch.gmap);
980 new->private = kvm;
981 kvm->arch.gmap = new;
982 ret = 0;
983 }
984 }
985 mutex_unlock(&kvm->lock);
986 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
987 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
988 (void *) kvm->arch.gmap->asce);
989 break;
990 }
991 default:
992 ret = -ENXIO;
993 break;
994 }
995 return ret;
996}
997
998static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
999
1000void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1001{
1002 struct kvm_vcpu *vcpu;
1003 unsigned long i;
1004
1005 kvm_s390_vcpu_block_all(kvm);
1006
1007 kvm_for_each_vcpu(i, vcpu, kvm) {
1008 kvm_s390_vcpu_crypto_setup(vcpu);
1009 /* recreate the shadow crycb by leaving the VSIE handler */
1010 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1011 }
1012
1013 kvm_s390_vcpu_unblock_all(kvm);
1014}
1015
1016static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1017{
1018 mutex_lock(&kvm->lock);
1019 switch (attr->attr) {
1020 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1021 if (!test_kvm_facility(kvm, 76)) {
1022 mutex_unlock(&kvm->lock);
1023 return -EINVAL;
1024 }
1025 get_random_bytes(
1026 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1027 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1028 kvm->arch.crypto.aes_kw = 1;
1029 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1030 break;
1031 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1032 if (!test_kvm_facility(kvm, 76)) {
1033 mutex_unlock(&kvm->lock);
1034 return -EINVAL;
1035 }
1036 get_random_bytes(
1037 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1038 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1039 kvm->arch.crypto.dea_kw = 1;
1040 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1041 break;
1042 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1043 if (!test_kvm_facility(kvm, 76)) {
1044 mutex_unlock(&kvm->lock);
1045 return -EINVAL;
1046 }
1047 kvm->arch.crypto.aes_kw = 0;
1048 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1049 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1050 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1051 break;
1052 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1053 if (!test_kvm_facility(kvm, 76)) {
1054 mutex_unlock(&kvm->lock);
1055 return -EINVAL;
1056 }
1057 kvm->arch.crypto.dea_kw = 0;
1058 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1059 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1060 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1061 break;
1062 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1063 if (!ap_instructions_available()) {
1064 mutex_unlock(&kvm->lock);
1065 return -EOPNOTSUPP;
1066 }
1067 kvm->arch.crypto.apie = 1;
1068 break;
1069 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1070 if (!ap_instructions_available()) {
1071 mutex_unlock(&kvm->lock);
1072 return -EOPNOTSUPP;
1073 }
1074 kvm->arch.crypto.apie = 0;
1075 break;
1076 default:
1077 mutex_unlock(&kvm->lock);
1078 return -ENXIO;
1079 }
1080
1081 kvm_s390_vcpu_crypto_reset_all(kvm);
1082 mutex_unlock(&kvm->lock);
1083 return 0;
1084}
1085
1086static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1087{
1088 /* Only set the ECB bits after guest requests zPCI interpretation */
1089 if (!vcpu->kvm->arch.use_zpci_interp)
1090 return;
1091
1092 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1093 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1094}
1095
1096void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1097{
1098 struct kvm_vcpu *vcpu;
1099 unsigned long i;
1100
1101 lockdep_assert_held(&kvm->lock);
1102
1103 if (!kvm_s390_pci_interp_allowed())
1104 return;
1105
1106 /*
1107 * If host is configured for PCI and the necessary facilities are
1108 * available, turn on interpretation for the life of this guest
1109 */
1110 kvm->arch.use_zpci_interp = 1;
1111
1112 kvm_s390_vcpu_block_all(kvm);
1113
1114 kvm_for_each_vcpu(i, vcpu, kvm) {
1115 kvm_s390_vcpu_pci_setup(vcpu);
1116 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1117 }
1118
1119 kvm_s390_vcpu_unblock_all(kvm);
1120}
1121
1122static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1123{
1124 unsigned long cx;
1125 struct kvm_vcpu *vcpu;
1126
1127 kvm_for_each_vcpu(cx, vcpu, kvm)
1128 kvm_s390_sync_request(req, vcpu);
1129}
1130
1131/*
1132 * Must be called with kvm->srcu held to avoid races on memslots, and with
1133 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1134 */
1135static int kvm_s390_vm_start_migration(struct kvm *kvm)
1136{
1137 struct kvm_memory_slot *ms;
1138 struct kvm_memslots *slots;
1139 unsigned long ram_pages = 0;
1140 int bkt;
1141
1142 /* migration mode already enabled */
1143 if (kvm->arch.migration_mode)
1144 return 0;
1145 slots = kvm_memslots(kvm);
1146 if (!slots || kvm_memslots_empty(slots))
1147 return -EINVAL;
1148
1149 if (!kvm->arch.use_cmma) {
1150 kvm->arch.migration_mode = 1;
1151 return 0;
1152 }
1153 /* mark all the pages in active slots as dirty */
1154 kvm_for_each_memslot(ms, bkt, slots) {
1155 if (!ms->dirty_bitmap)
1156 return -EINVAL;
1157 /*
1158 * The second half of the bitmap is only used on x86,
1159 * and would be wasted otherwise, so we put it to good
1160 * use here to keep track of the state of the storage
1161 * attributes.
1162 */
1163 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1164 ram_pages += ms->npages;
1165 }
1166 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1167 kvm->arch.migration_mode = 1;
1168 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1169 return 0;
1170}
1171
1172/*
1173 * Must be called with kvm->slots_lock to avoid races with ourselves and
1174 * kvm_s390_vm_start_migration.
1175 */
1176static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1177{
1178 /* migration mode already disabled */
1179 if (!kvm->arch.migration_mode)
1180 return 0;
1181 kvm->arch.migration_mode = 0;
1182 if (kvm->arch.use_cmma)
1183 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1184 return 0;
1185}
1186
1187static int kvm_s390_vm_set_migration(struct kvm *kvm,
1188 struct kvm_device_attr *attr)
1189{
1190 int res = -ENXIO;
1191
1192 mutex_lock(&kvm->slots_lock);
1193 switch (attr->attr) {
1194 case KVM_S390_VM_MIGRATION_START:
1195 res = kvm_s390_vm_start_migration(kvm);
1196 break;
1197 case KVM_S390_VM_MIGRATION_STOP:
1198 res = kvm_s390_vm_stop_migration(kvm);
1199 break;
1200 default:
1201 break;
1202 }
1203 mutex_unlock(&kvm->slots_lock);
1204
1205 return res;
1206}
1207
1208static int kvm_s390_vm_get_migration(struct kvm *kvm,
1209 struct kvm_device_attr *attr)
1210{
1211 u64 mig = kvm->arch.migration_mode;
1212
1213 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1214 return -ENXIO;
1215
1216 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1217 return -EFAULT;
1218 return 0;
1219}
1220
1221static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1222
1223static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1224{
1225 struct kvm_s390_vm_tod_clock gtod;
1226
1227 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1228 return -EFAULT;
1229
1230 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1231 return -EINVAL;
1232 __kvm_s390_set_tod_clock(kvm, >od);
1233
1234 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1235 gtod.epoch_idx, gtod.tod);
1236
1237 return 0;
1238}
1239
1240static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1241{
1242 u8 gtod_high;
1243
1244 if (copy_from_user(>od_high, (void __user *)attr->addr,
1245 sizeof(gtod_high)))
1246 return -EFAULT;
1247
1248 if (gtod_high != 0)
1249 return -EINVAL;
1250 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1251
1252 return 0;
1253}
1254
1255static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1256{
1257 struct kvm_s390_vm_tod_clock gtod = { 0 };
1258
1259 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1260 sizeof(gtod.tod)))
1261 return -EFAULT;
1262
1263 __kvm_s390_set_tod_clock(kvm, >od);
1264 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1265 return 0;
1266}
1267
1268static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1269{
1270 int ret;
1271
1272 if (attr->flags)
1273 return -EINVAL;
1274
1275 mutex_lock(&kvm->lock);
1276 /*
1277 * For protected guests, the TOD is managed by the ultravisor, so trying
1278 * to change it will never bring the expected results.
1279 */
1280 if (kvm_s390_pv_is_protected(kvm)) {
1281 ret = -EOPNOTSUPP;
1282 goto out_unlock;
1283 }
1284
1285 switch (attr->attr) {
1286 case KVM_S390_VM_TOD_EXT:
1287 ret = kvm_s390_set_tod_ext(kvm, attr);
1288 break;
1289 case KVM_S390_VM_TOD_HIGH:
1290 ret = kvm_s390_set_tod_high(kvm, attr);
1291 break;
1292 case KVM_S390_VM_TOD_LOW:
1293 ret = kvm_s390_set_tod_low(kvm, attr);
1294 break;
1295 default:
1296 ret = -ENXIO;
1297 break;
1298 }
1299
1300out_unlock:
1301 mutex_unlock(&kvm->lock);
1302 return ret;
1303}
1304
1305static void kvm_s390_get_tod_clock(struct kvm *kvm,
1306 struct kvm_s390_vm_tod_clock *gtod)
1307{
1308 union tod_clock clk;
1309
1310 preempt_disable();
1311
1312 store_tod_clock_ext(&clk);
1313
1314 gtod->tod = clk.tod + kvm->arch.epoch;
1315 gtod->epoch_idx = 0;
1316 if (test_kvm_facility(kvm, 139)) {
1317 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1318 if (gtod->tod < clk.tod)
1319 gtod->epoch_idx += 1;
1320 }
1321
1322 preempt_enable();
1323}
1324
1325static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1326{
1327 struct kvm_s390_vm_tod_clock gtod;
1328
1329 memset(>od, 0, sizeof(gtod));
1330 kvm_s390_get_tod_clock(kvm, >od);
1331 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1332 return -EFAULT;
1333
1334 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1335 gtod.epoch_idx, gtod.tod);
1336 return 0;
1337}
1338
1339static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1340{
1341 u8 gtod_high = 0;
1342
1343 if (copy_to_user((void __user *)attr->addr, >od_high,
1344 sizeof(gtod_high)))
1345 return -EFAULT;
1346 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1347
1348 return 0;
1349}
1350
1351static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1352{
1353 u64 gtod;
1354
1355 gtod = kvm_s390_get_tod_clock_fast(kvm);
1356 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1357 return -EFAULT;
1358 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1359
1360 return 0;
1361}
1362
1363static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1364{
1365 int ret;
1366
1367 if (attr->flags)
1368 return -EINVAL;
1369
1370 switch (attr->attr) {
1371 case KVM_S390_VM_TOD_EXT:
1372 ret = kvm_s390_get_tod_ext(kvm, attr);
1373 break;
1374 case KVM_S390_VM_TOD_HIGH:
1375 ret = kvm_s390_get_tod_high(kvm, attr);
1376 break;
1377 case KVM_S390_VM_TOD_LOW:
1378 ret = kvm_s390_get_tod_low(kvm, attr);
1379 break;
1380 default:
1381 ret = -ENXIO;
1382 break;
1383 }
1384 return ret;
1385}
1386
1387static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1388{
1389 struct kvm_s390_vm_cpu_processor *proc;
1390 u16 lowest_ibc, unblocked_ibc;
1391 int ret = 0;
1392
1393 mutex_lock(&kvm->lock);
1394 if (kvm->created_vcpus) {
1395 ret = -EBUSY;
1396 goto out;
1397 }
1398 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1399 if (!proc) {
1400 ret = -ENOMEM;
1401 goto out;
1402 }
1403 if (!copy_from_user(proc, (void __user *)attr->addr,
1404 sizeof(*proc))) {
1405 kvm->arch.model.cpuid = proc->cpuid;
1406 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1407 unblocked_ibc = sclp.ibc & 0xfff;
1408 if (lowest_ibc && proc->ibc) {
1409 if (proc->ibc > unblocked_ibc)
1410 kvm->arch.model.ibc = unblocked_ibc;
1411 else if (proc->ibc < lowest_ibc)
1412 kvm->arch.model.ibc = lowest_ibc;
1413 else
1414 kvm->arch.model.ibc = proc->ibc;
1415 }
1416 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1417 S390_ARCH_FAC_LIST_SIZE_BYTE);
1418 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419 kvm->arch.model.ibc,
1420 kvm->arch.model.cpuid);
1421 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422 kvm->arch.model.fac_list[0],
1423 kvm->arch.model.fac_list[1],
1424 kvm->arch.model.fac_list[2]);
1425 } else
1426 ret = -EFAULT;
1427 kfree(proc);
1428out:
1429 mutex_unlock(&kvm->lock);
1430 return ret;
1431}
1432
1433static int kvm_s390_set_processor_feat(struct kvm *kvm,
1434 struct kvm_device_attr *attr)
1435{
1436 struct kvm_s390_vm_cpu_feat data;
1437
1438 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1439 return -EFAULT;
1440 if (!bitmap_subset((unsigned long *) data.feat,
1441 kvm_s390_available_cpu_feat,
1442 KVM_S390_VM_CPU_FEAT_NR_BITS))
1443 return -EINVAL;
1444
1445 mutex_lock(&kvm->lock);
1446 if (kvm->created_vcpus) {
1447 mutex_unlock(&kvm->lock);
1448 return -EBUSY;
1449 }
1450 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1451 mutex_unlock(&kvm->lock);
1452 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1453 data.feat[0],
1454 data.feat[1],
1455 data.feat[2]);
1456 return 0;
1457}
1458
1459static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1460 struct kvm_device_attr *attr)
1461{
1462 mutex_lock(&kvm->lock);
1463 if (kvm->created_vcpus) {
1464 mutex_unlock(&kvm->lock);
1465 return -EBUSY;
1466 }
1467
1468 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1469 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1470 mutex_unlock(&kvm->lock);
1471 return -EFAULT;
1472 }
1473 mutex_unlock(&kvm->lock);
1474
1475 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1476 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1477 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1478 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1479 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1480 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1481 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1482 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1483 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1484 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1485 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1486 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1487 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1488 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1489 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1490 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1491 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1492 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1493 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1494 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1495 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1496 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1497 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1498 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1499 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1500 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1501 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1502 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1503 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1504 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1505 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1506 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1507 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1508 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1509 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1510 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1511 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1512 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1513 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1514 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1515 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1516 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1517 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1518 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1519 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1520 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1521 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1522 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1523 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1524 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1525 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1526 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1527 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1528 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1529 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1530 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1531 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1532
1533 return 0;
1534}
1535
1536static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1537{
1538 int ret = -ENXIO;
1539
1540 switch (attr->attr) {
1541 case KVM_S390_VM_CPU_PROCESSOR:
1542 ret = kvm_s390_set_processor(kvm, attr);
1543 break;
1544 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1545 ret = kvm_s390_set_processor_feat(kvm, attr);
1546 break;
1547 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1548 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1549 break;
1550 }
1551 return ret;
1552}
1553
1554static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1555{
1556 struct kvm_s390_vm_cpu_processor *proc;
1557 int ret = 0;
1558
1559 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1560 if (!proc) {
1561 ret = -ENOMEM;
1562 goto out;
1563 }
1564 proc->cpuid = kvm->arch.model.cpuid;
1565 proc->ibc = kvm->arch.model.ibc;
1566 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1567 S390_ARCH_FAC_LIST_SIZE_BYTE);
1568 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1569 kvm->arch.model.ibc,
1570 kvm->arch.model.cpuid);
1571 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1572 kvm->arch.model.fac_list[0],
1573 kvm->arch.model.fac_list[1],
1574 kvm->arch.model.fac_list[2]);
1575 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1576 ret = -EFAULT;
1577 kfree(proc);
1578out:
1579 return ret;
1580}
1581
1582static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1583{
1584 struct kvm_s390_vm_cpu_machine *mach;
1585 int ret = 0;
1586
1587 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1588 if (!mach) {
1589 ret = -ENOMEM;
1590 goto out;
1591 }
1592 get_cpu_id((struct cpuid *) &mach->cpuid);
1593 mach->ibc = sclp.ibc;
1594 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1595 S390_ARCH_FAC_LIST_SIZE_BYTE);
1596 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1597 sizeof(stfle_fac_list));
1598 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1599 kvm->arch.model.ibc,
1600 kvm->arch.model.cpuid);
1601 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1602 mach->fac_mask[0],
1603 mach->fac_mask[1],
1604 mach->fac_mask[2]);
1605 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1606 mach->fac_list[0],
1607 mach->fac_list[1],
1608 mach->fac_list[2]);
1609 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1610 ret = -EFAULT;
1611 kfree(mach);
1612out:
1613 return ret;
1614}
1615
1616static int kvm_s390_get_processor_feat(struct kvm *kvm,
1617 struct kvm_device_attr *attr)
1618{
1619 struct kvm_s390_vm_cpu_feat data;
1620
1621 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1622 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1623 return -EFAULT;
1624 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1625 data.feat[0],
1626 data.feat[1],
1627 data.feat[2]);
1628 return 0;
1629}
1630
1631static int kvm_s390_get_machine_feat(struct kvm *kvm,
1632 struct kvm_device_attr *attr)
1633{
1634 struct kvm_s390_vm_cpu_feat data;
1635
1636 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1637 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1638 return -EFAULT;
1639 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1640 data.feat[0],
1641 data.feat[1],
1642 data.feat[2]);
1643 return 0;
1644}
1645
1646static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1647 struct kvm_device_attr *attr)
1648{
1649 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1650 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1651 return -EFAULT;
1652
1653 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1654 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1655 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1656 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1657 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1658 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1659 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1660 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1661 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1662 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1663 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1664 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1665 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1666 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1667 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1668 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1669 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1670 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1671 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1672 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1673 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1674 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1675 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1676 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1677 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1678 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1679 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1680 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1681 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1682 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1683 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1684 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1685 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1686 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1687 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1688 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1689 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1690 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1691 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1692 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1693 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1694 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1695 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1696 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1697 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1698 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1699 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1700 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1701 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1702 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1703 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1704 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1705 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1706 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1707 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1708 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1709 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1710
1711 return 0;
1712}
1713
1714static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1715 struct kvm_device_attr *attr)
1716{
1717 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1718 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1719 return -EFAULT;
1720
1721 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1722 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1723 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1724 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1725 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1726 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1727 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1728 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1729 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1730 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1731 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1732 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1733 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1734 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1735 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1736 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1737 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1738 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1739 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1740 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1741 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1742 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1743 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1744 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1745 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1746 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1747 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1748 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1749 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1750 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1751 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1752 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1753 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1754 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1755 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1756 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1757 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1758 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1759 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1760 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1761 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1762 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1763 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1764 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1765 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1766 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1767 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1768 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1769 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1770 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1771 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1772 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1773 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1774 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1775 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1776 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1777 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1778
1779 return 0;
1780}
1781
1782static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1783{
1784 int ret = -ENXIO;
1785
1786 switch (attr->attr) {
1787 case KVM_S390_VM_CPU_PROCESSOR:
1788 ret = kvm_s390_get_processor(kvm, attr);
1789 break;
1790 case KVM_S390_VM_CPU_MACHINE:
1791 ret = kvm_s390_get_machine(kvm, attr);
1792 break;
1793 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1794 ret = kvm_s390_get_processor_feat(kvm, attr);
1795 break;
1796 case KVM_S390_VM_CPU_MACHINE_FEAT:
1797 ret = kvm_s390_get_machine_feat(kvm, attr);
1798 break;
1799 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1800 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1801 break;
1802 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1803 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1804 break;
1805 }
1806 return ret;
1807}
1808
1809/**
1810 * kvm_s390_update_topology_change_report - update CPU topology change report
1811 * @kvm: guest KVM description
1812 * @val: set or clear the MTCR bit
1813 *
1814 * Updates the Multiprocessor Topology-Change-Report bit to signal
1815 * the guest with a topology change.
1816 * This is only relevant if the topology facility is present.
1817 *
1818 * The SCA version, bsca or esca, doesn't matter as offset is the same.
1819 */
1820static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1821{
1822 union sca_utility new, old;
1823 struct bsca_block *sca;
1824
1825 read_lock(&kvm->arch.sca_lock);
1826 sca = kvm->arch.sca;
1827 do {
1828 old = READ_ONCE(sca->utility);
1829 new = old;
1830 new.mtcr = val;
1831 } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1832 read_unlock(&kvm->arch.sca_lock);
1833}
1834
1835static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1836 struct kvm_device_attr *attr)
1837{
1838 if (!test_kvm_facility(kvm, 11))
1839 return -ENXIO;
1840
1841 kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1842 return 0;
1843}
1844
1845static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1846 struct kvm_device_attr *attr)
1847{
1848 u8 topo;
1849
1850 if (!test_kvm_facility(kvm, 11))
1851 return -ENXIO;
1852
1853 read_lock(&kvm->arch.sca_lock);
1854 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1855 read_unlock(&kvm->arch.sca_lock);
1856
1857 return put_user(topo, (u8 __user *)attr->addr);
1858}
1859
1860static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1861{
1862 int ret;
1863
1864 switch (attr->group) {
1865 case KVM_S390_VM_MEM_CTRL:
1866 ret = kvm_s390_set_mem_control(kvm, attr);
1867 break;
1868 case KVM_S390_VM_TOD:
1869 ret = kvm_s390_set_tod(kvm, attr);
1870 break;
1871 case KVM_S390_VM_CPU_MODEL:
1872 ret = kvm_s390_set_cpu_model(kvm, attr);
1873 break;
1874 case KVM_S390_VM_CRYPTO:
1875 ret = kvm_s390_vm_set_crypto(kvm, attr);
1876 break;
1877 case KVM_S390_VM_MIGRATION:
1878 ret = kvm_s390_vm_set_migration(kvm, attr);
1879 break;
1880 case KVM_S390_VM_CPU_TOPOLOGY:
1881 ret = kvm_s390_set_topo_change_indication(kvm, attr);
1882 break;
1883 default:
1884 ret = -ENXIO;
1885 break;
1886 }
1887
1888 return ret;
1889}
1890
1891static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1892{
1893 int ret;
1894
1895 switch (attr->group) {
1896 case KVM_S390_VM_MEM_CTRL:
1897 ret = kvm_s390_get_mem_control(kvm, attr);
1898 break;
1899 case KVM_S390_VM_TOD:
1900 ret = kvm_s390_get_tod(kvm, attr);
1901 break;
1902 case KVM_S390_VM_CPU_MODEL:
1903 ret = kvm_s390_get_cpu_model(kvm, attr);
1904 break;
1905 case KVM_S390_VM_MIGRATION:
1906 ret = kvm_s390_vm_get_migration(kvm, attr);
1907 break;
1908 case KVM_S390_VM_CPU_TOPOLOGY:
1909 ret = kvm_s390_get_topo_change_indication(kvm, attr);
1910 break;
1911 default:
1912 ret = -ENXIO;
1913 break;
1914 }
1915
1916 return ret;
1917}
1918
1919static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1920{
1921 int ret;
1922
1923 switch (attr->group) {
1924 case KVM_S390_VM_MEM_CTRL:
1925 switch (attr->attr) {
1926 case KVM_S390_VM_MEM_ENABLE_CMMA:
1927 case KVM_S390_VM_MEM_CLR_CMMA:
1928 ret = sclp.has_cmma ? 0 : -ENXIO;
1929 break;
1930 case KVM_S390_VM_MEM_LIMIT_SIZE:
1931 ret = 0;
1932 break;
1933 default:
1934 ret = -ENXIO;
1935 break;
1936 }
1937 break;
1938 case KVM_S390_VM_TOD:
1939 switch (attr->attr) {
1940 case KVM_S390_VM_TOD_LOW:
1941 case KVM_S390_VM_TOD_HIGH:
1942 ret = 0;
1943 break;
1944 default:
1945 ret = -ENXIO;
1946 break;
1947 }
1948 break;
1949 case KVM_S390_VM_CPU_MODEL:
1950 switch (attr->attr) {
1951 case KVM_S390_VM_CPU_PROCESSOR:
1952 case KVM_S390_VM_CPU_MACHINE:
1953 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1954 case KVM_S390_VM_CPU_MACHINE_FEAT:
1955 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1956 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1957 ret = 0;
1958 break;
1959 default:
1960 ret = -ENXIO;
1961 break;
1962 }
1963 break;
1964 case KVM_S390_VM_CRYPTO:
1965 switch (attr->attr) {
1966 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1968 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1969 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1970 ret = 0;
1971 break;
1972 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1973 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1974 ret = ap_instructions_available() ? 0 : -ENXIO;
1975 break;
1976 default:
1977 ret = -ENXIO;
1978 break;
1979 }
1980 break;
1981 case KVM_S390_VM_MIGRATION:
1982 ret = 0;
1983 break;
1984 case KVM_S390_VM_CPU_TOPOLOGY:
1985 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1986 break;
1987 default:
1988 ret = -ENXIO;
1989 break;
1990 }
1991
1992 return ret;
1993}
1994
1995static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1996{
1997 uint8_t *keys;
1998 uint64_t hva;
1999 int srcu_idx, i, r = 0;
2000
2001 if (args->flags != 0)
2002 return -EINVAL;
2003
2004 /* Is this guest using storage keys? */
2005 if (!mm_uses_skeys(current->mm))
2006 return KVM_S390_GET_SKEYS_NONE;
2007
2008 /* Enforce sane limit on memory allocation */
2009 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2010 return -EINVAL;
2011
2012 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2013 if (!keys)
2014 return -ENOMEM;
2015
2016 mmap_read_lock(current->mm);
2017 srcu_idx = srcu_read_lock(&kvm->srcu);
2018 for (i = 0; i < args->count; i++) {
2019 hva = gfn_to_hva(kvm, args->start_gfn + i);
2020 if (kvm_is_error_hva(hva)) {
2021 r = -EFAULT;
2022 break;
2023 }
2024
2025 r = get_guest_storage_key(current->mm, hva, &keys[i]);
2026 if (r)
2027 break;
2028 }
2029 srcu_read_unlock(&kvm->srcu, srcu_idx);
2030 mmap_read_unlock(current->mm);
2031
2032 if (!r) {
2033 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2034 sizeof(uint8_t) * args->count);
2035 if (r)
2036 r = -EFAULT;
2037 }
2038
2039 kvfree(keys);
2040 return r;
2041}
2042
2043static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2044{
2045 uint8_t *keys;
2046 uint64_t hva;
2047 int srcu_idx, i, r = 0;
2048 bool unlocked;
2049
2050 if (args->flags != 0)
2051 return -EINVAL;
2052
2053 /* Enforce sane limit on memory allocation */
2054 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2055 return -EINVAL;
2056
2057 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2058 if (!keys)
2059 return -ENOMEM;
2060
2061 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2062 sizeof(uint8_t) * args->count);
2063 if (r) {
2064 r = -EFAULT;
2065 goto out;
2066 }
2067
2068 /* Enable storage key handling for the guest */
2069 r = s390_enable_skey();
2070 if (r)
2071 goto out;
2072
2073 i = 0;
2074 mmap_read_lock(current->mm);
2075 srcu_idx = srcu_read_lock(&kvm->srcu);
2076 while (i < args->count) {
2077 unlocked = false;
2078 hva = gfn_to_hva(kvm, args->start_gfn + i);
2079 if (kvm_is_error_hva(hva)) {
2080 r = -EFAULT;
2081 break;
2082 }
2083
2084 /* Lowest order bit is reserved */
2085 if (keys[i] & 0x01) {
2086 r = -EINVAL;
2087 break;
2088 }
2089
2090 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2091 if (r) {
2092 r = fixup_user_fault(current->mm, hva,
2093 FAULT_FLAG_WRITE, &unlocked);
2094 if (r)
2095 break;
2096 }
2097 if (!r)
2098 i++;
2099 }
2100 srcu_read_unlock(&kvm->srcu, srcu_idx);
2101 mmap_read_unlock(current->mm);
2102out:
2103 kvfree(keys);
2104 return r;
2105}
2106
2107/*
2108 * Base address and length must be sent at the start of each block, therefore
2109 * it's cheaper to send some clean data, as long as it's less than the size of
2110 * two longs.
2111 */
2112#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2113/* for consistency */
2114#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2115
2116static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2117 u8 *res, unsigned long bufsize)
2118{
2119 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2120
2121 args->count = 0;
2122 while (args->count < bufsize) {
2123 hva = gfn_to_hva(kvm, cur_gfn);
2124 /*
2125 * We return an error if the first value was invalid, but we
2126 * return successfully if at least one value was copied.
2127 */
2128 if (kvm_is_error_hva(hva))
2129 return args->count ? 0 : -EFAULT;
2130 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2131 pgstev = 0;
2132 res[args->count++] = (pgstev >> 24) & 0x43;
2133 cur_gfn++;
2134 }
2135
2136 return 0;
2137}
2138
2139static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2140 gfn_t gfn)
2141{
2142 return ____gfn_to_memslot(slots, gfn, true);
2143}
2144
2145static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2146 unsigned long cur_gfn)
2147{
2148 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2149 unsigned long ofs = cur_gfn - ms->base_gfn;
2150 struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2151
2152 if (ms->base_gfn + ms->npages <= cur_gfn) {
2153 mnode = rb_next(mnode);
2154 /* If we are above the highest slot, wrap around */
2155 if (!mnode)
2156 mnode = rb_first(&slots->gfn_tree);
2157
2158 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2159 ofs = 0;
2160 }
2161 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2162 while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2163 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2164 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2165 }
2166 return ms->base_gfn + ofs;
2167}
2168
2169static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2170 u8 *res, unsigned long bufsize)
2171{
2172 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2173 struct kvm_memslots *slots = kvm_memslots(kvm);
2174 struct kvm_memory_slot *ms;
2175
2176 if (unlikely(kvm_memslots_empty(slots)))
2177 return 0;
2178
2179 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2180 ms = gfn_to_memslot(kvm, cur_gfn);
2181 args->count = 0;
2182 args->start_gfn = cur_gfn;
2183 if (!ms)
2184 return 0;
2185 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2186 mem_end = kvm_s390_get_gfn_end(slots);
2187
2188 while (args->count < bufsize) {
2189 hva = gfn_to_hva(kvm, cur_gfn);
2190 if (kvm_is_error_hva(hva))
2191 return 0;
2192 /* Decrement only if we actually flipped the bit to 0 */
2193 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2194 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2195 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2196 pgstev = 0;
2197 /* Save the value */
2198 res[args->count++] = (pgstev >> 24) & 0x43;
2199 /* If the next bit is too far away, stop. */
2200 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2201 return 0;
2202 /* If we reached the previous "next", find the next one */
2203 if (cur_gfn == next_gfn)
2204 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2205 /* Reached the end of memory or of the buffer, stop */
2206 if ((next_gfn >= mem_end) ||
2207 (next_gfn - args->start_gfn >= bufsize))
2208 return 0;
2209 cur_gfn++;
2210 /* Reached the end of the current memslot, take the next one. */
2211 if (cur_gfn - ms->base_gfn >= ms->npages) {
2212 ms = gfn_to_memslot(kvm, cur_gfn);
2213 if (!ms)
2214 return 0;
2215 }
2216 }
2217 return 0;
2218}
2219
2220/*
2221 * This function searches for the next page with dirty CMMA attributes, and
2222 * saves the attributes in the buffer up to either the end of the buffer or
2223 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2224 * no trailing clean bytes are saved.
2225 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2226 * output buffer will indicate 0 as length.
2227 */
2228static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2229 struct kvm_s390_cmma_log *args)
2230{
2231 unsigned long bufsize;
2232 int srcu_idx, peek, ret;
2233 u8 *values;
2234
2235 if (!kvm->arch.use_cmma)
2236 return -ENXIO;
2237 /* Invalid/unsupported flags were specified */
2238 if (args->flags & ~KVM_S390_CMMA_PEEK)
2239 return -EINVAL;
2240 /* Migration mode query, and we are not doing a migration */
2241 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2242 if (!peek && !kvm->arch.migration_mode)
2243 return -EINVAL;
2244 /* CMMA is disabled or was not used, or the buffer has length zero */
2245 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2246 if (!bufsize || !kvm->mm->context.uses_cmm) {
2247 memset(args, 0, sizeof(*args));
2248 return 0;
2249 }
2250 /* We are not peeking, and there are no dirty pages */
2251 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2252 memset(args, 0, sizeof(*args));
2253 return 0;
2254 }
2255
2256 values = vmalloc(bufsize);
2257 if (!values)
2258 return -ENOMEM;
2259
2260 mmap_read_lock(kvm->mm);
2261 srcu_idx = srcu_read_lock(&kvm->srcu);
2262 if (peek)
2263 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2264 else
2265 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2266 srcu_read_unlock(&kvm->srcu, srcu_idx);
2267 mmap_read_unlock(kvm->mm);
2268
2269 if (kvm->arch.migration_mode)
2270 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2271 else
2272 args->remaining = 0;
2273
2274 if (copy_to_user((void __user *)args->values, values, args->count))
2275 ret = -EFAULT;
2276
2277 vfree(values);
2278 return ret;
2279}
2280
2281/*
2282 * This function sets the CMMA attributes for the given pages. If the input
2283 * buffer has zero length, no action is taken, otherwise the attributes are
2284 * set and the mm->context.uses_cmm flag is set.
2285 */
2286static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2287 const struct kvm_s390_cmma_log *args)
2288{
2289 unsigned long hva, mask, pgstev, i;
2290 uint8_t *bits;
2291 int srcu_idx, r = 0;
2292
2293 mask = args->mask;
2294
2295 if (!kvm->arch.use_cmma)
2296 return -ENXIO;
2297 /* invalid/unsupported flags */
2298 if (args->flags != 0)
2299 return -EINVAL;
2300 /* Enforce sane limit on memory allocation */
2301 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2302 return -EINVAL;
2303 /* Nothing to do */
2304 if (args->count == 0)
2305 return 0;
2306
2307 bits = vmalloc(array_size(sizeof(*bits), args->count));
2308 if (!bits)
2309 return -ENOMEM;
2310
2311 r = copy_from_user(bits, (void __user *)args->values, args->count);
2312 if (r) {
2313 r = -EFAULT;
2314 goto out;
2315 }
2316
2317 mmap_read_lock(kvm->mm);
2318 srcu_idx = srcu_read_lock(&kvm->srcu);
2319 for (i = 0; i < args->count; i++) {
2320 hva = gfn_to_hva(kvm, args->start_gfn + i);
2321 if (kvm_is_error_hva(hva)) {
2322 r = -EFAULT;
2323 break;
2324 }
2325
2326 pgstev = bits[i];
2327 pgstev = pgstev << 24;
2328 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2329 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2330 }
2331 srcu_read_unlock(&kvm->srcu, srcu_idx);
2332 mmap_read_unlock(kvm->mm);
2333
2334 if (!kvm->mm->context.uses_cmm) {
2335 mmap_write_lock(kvm->mm);
2336 kvm->mm->context.uses_cmm = 1;
2337 mmap_write_unlock(kvm->mm);
2338 }
2339out:
2340 vfree(bits);
2341 return r;
2342}
2343
2344/**
2345 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2346 * non protected.
2347 * @kvm: the VM whose protected vCPUs are to be converted
2348 * @rc: return value for the RC field of the UVC (in case of error)
2349 * @rrc: return value for the RRC field of the UVC (in case of error)
2350 *
2351 * Does not stop in case of error, tries to convert as many
2352 * CPUs as possible. In case of error, the RC and RRC of the last error are
2353 * returned.
2354 *
2355 * Return: 0 in case of success, otherwise -EIO
2356 */
2357int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2358{
2359 struct kvm_vcpu *vcpu;
2360 unsigned long i;
2361 u16 _rc, _rrc;
2362 int ret = 0;
2363
2364 /*
2365 * We ignore failures and try to destroy as many CPUs as possible.
2366 * At the same time we must not free the assigned resources when
2367 * this fails, as the ultravisor has still access to that memory.
2368 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2369 * behind.
2370 * We want to return the first failure rc and rrc, though.
2371 */
2372 kvm_for_each_vcpu(i, vcpu, kvm) {
2373 mutex_lock(&vcpu->mutex);
2374 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2375 *rc = _rc;
2376 *rrc = _rrc;
2377 ret = -EIO;
2378 }
2379 mutex_unlock(&vcpu->mutex);
2380 }
2381 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2382 if (use_gisa)
2383 kvm_s390_gisa_enable(kvm);
2384 return ret;
2385}
2386
2387/**
2388 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2389 * to protected.
2390 * @kvm: the VM whose protected vCPUs are to be converted
2391 * @rc: return value for the RC field of the UVC (in case of error)
2392 * @rrc: return value for the RRC field of the UVC (in case of error)
2393 *
2394 * Tries to undo the conversion in case of error.
2395 *
2396 * Return: 0 in case of success, otherwise -EIO
2397 */
2398static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2399{
2400 unsigned long i;
2401 int r = 0;
2402 u16 dummy;
2403
2404 struct kvm_vcpu *vcpu;
2405
2406 /* Disable the GISA if the ultravisor does not support AIV. */
2407 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2408 kvm_s390_gisa_disable(kvm);
2409
2410 kvm_for_each_vcpu(i, vcpu, kvm) {
2411 mutex_lock(&vcpu->mutex);
2412 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2413 mutex_unlock(&vcpu->mutex);
2414 if (r)
2415 break;
2416 }
2417 if (r)
2418 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2419 return r;
2420}
2421
2422/*
2423 * Here we provide user space with a direct interface to query UV
2424 * related data like UV maxima and available features as well as
2425 * feature specific data.
2426 *
2427 * To facilitate future extension of the data structures we'll try to
2428 * write data up to the maximum requested length.
2429 */
2430static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2431{
2432 ssize_t len_min;
2433
2434 switch (info->header.id) {
2435 case KVM_PV_INFO_VM: {
2436 len_min = sizeof(info->header) + sizeof(info->vm);
2437
2438 if (info->header.len_max < len_min)
2439 return -EINVAL;
2440
2441 memcpy(info->vm.inst_calls_list,
2442 uv_info.inst_calls_list,
2443 sizeof(uv_info.inst_calls_list));
2444
2445 /* It's max cpuid not max cpus, so it's off by one */
2446 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2447 info->vm.max_guests = uv_info.max_num_sec_conf;
2448 info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2449 info->vm.feature_indication = uv_info.uv_feature_indications;
2450
2451 return len_min;
2452 }
2453 case KVM_PV_INFO_DUMP: {
2454 len_min = sizeof(info->header) + sizeof(info->dump);
2455
2456 if (info->header.len_max < len_min)
2457 return -EINVAL;
2458
2459 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2460 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2461 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2462 return len_min;
2463 }
2464 default:
2465 return -EINVAL;
2466 }
2467}
2468
2469static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2470 struct kvm_s390_pv_dmp dmp)
2471{
2472 int r = -EINVAL;
2473 void __user *result_buff = (void __user *)dmp.buff_addr;
2474
2475 switch (dmp.subcmd) {
2476 case KVM_PV_DUMP_INIT: {
2477 if (kvm->arch.pv.dumping)
2478 break;
2479
2480 /*
2481 * Block SIE entry as concurrent dump UVCs could lead
2482 * to validities.
2483 */
2484 kvm_s390_vcpu_block_all(kvm);
2485
2486 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2487 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2488 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2489 cmd->rc, cmd->rrc);
2490 if (!r) {
2491 kvm->arch.pv.dumping = true;
2492 } else {
2493 kvm_s390_vcpu_unblock_all(kvm);
2494 r = -EINVAL;
2495 }
2496 break;
2497 }
2498 case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2499 if (!kvm->arch.pv.dumping)
2500 break;
2501
2502 /*
2503 * gaddr is an output parameter since we might stop
2504 * early. As dmp will be copied back in our caller, we
2505 * don't need to do it ourselves.
2506 */
2507 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2508 &cmd->rc, &cmd->rrc);
2509 break;
2510 }
2511 case KVM_PV_DUMP_COMPLETE: {
2512 if (!kvm->arch.pv.dumping)
2513 break;
2514
2515 r = -EINVAL;
2516 if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2517 break;
2518
2519 r = kvm_s390_pv_dump_complete(kvm, result_buff,
2520 &cmd->rc, &cmd->rrc);
2521 break;
2522 }
2523 default:
2524 r = -ENOTTY;
2525 break;
2526 }
2527
2528 return r;
2529}
2530
2531static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2532{
2533 const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2534 void __user *argp = (void __user *)cmd->data;
2535 int r = 0;
2536 u16 dummy;
2537
2538 if (need_lock)
2539 mutex_lock(&kvm->lock);
2540
2541 switch (cmd->cmd) {
2542 case KVM_PV_ENABLE: {
2543 r = -EINVAL;
2544 if (kvm_s390_pv_is_protected(kvm))
2545 break;
2546
2547 /*
2548 * FMT 4 SIE needs esca. As we never switch back to bsca from
2549 * esca, we need no cleanup in the error cases below
2550 */
2551 r = sca_switch_to_extended(kvm);
2552 if (r)
2553 break;
2554
2555 mmap_write_lock(current->mm);
2556 r = gmap_mark_unmergeable();
2557 mmap_write_unlock(current->mm);
2558 if (r)
2559 break;
2560
2561 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2562 if (r)
2563 break;
2564
2565 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2566 if (r)
2567 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2568
2569 /* we need to block service interrupts from now on */
2570 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2571 break;
2572 }
2573 case KVM_PV_ASYNC_CLEANUP_PREPARE:
2574 r = -EINVAL;
2575 if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2576 break;
2577
2578 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2579 /*
2580 * If a CPU could not be destroyed, destroy VM will also fail.
2581 * There is no point in trying to destroy it. Instead return
2582 * the rc and rrc from the first CPU that failed destroying.
2583 */
2584 if (r)
2585 break;
2586 r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2587
2588 /* no need to block service interrupts any more */
2589 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2590 break;
2591 case KVM_PV_ASYNC_CLEANUP_PERFORM:
2592 r = -EINVAL;
2593 if (!async_destroy)
2594 break;
2595 /* kvm->lock must not be held; this is asserted inside the function. */
2596 r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2597 break;
2598 case KVM_PV_DISABLE: {
2599 r = -EINVAL;
2600 if (!kvm_s390_pv_is_protected(kvm))
2601 break;
2602
2603 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2604 /*
2605 * If a CPU could not be destroyed, destroy VM will also fail.
2606 * There is no point in trying to destroy it. Instead return
2607 * the rc and rrc from the first CPU that failed destroying.
2608 */
2609 if (r)
2610 break;
2611 r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2612
2613 /* no need to block service interrupts any more */
2614 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2615 break;
2616 }
2617 case KVM_PV_SET_SEC_PARMS: {
2618 struct kvm_s390_pv_sec_parm parms = {};
2619 void *hdr;
2620
2621 r = -EINVAL;
2622 if (!kvm_s390_pv_is_protected(kvm))
2623 break;
2624
2625 r = -EFAULT;
2626 if (copy_from_user(&parms, argp, sizeof(parms)))
2627 break;
2628
2629 /* Currently restricted to 8KB */
2630 r = -EINVAL;
2631 if (parms.length > PAGE_SIZE * 2)
2632 break;
2633
2634 r = -ENOMEM;
2635 hdr = vmalloc(parms.length);
2636 if (!hdr)
2637 break;
2638
2639 r = -EFAULT;
2640 if (!copy_from_user(hdr, (void __user *)parms.origin,
2641 parms.length))
2642 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2643 &cmd->rc, &cmd->rrc);
2644
2645 vfree(hdr);
2646 break;
2647 }
2648 case KVM_PV_UNPACK: {
2649 struct kvm_s390_pv_unp unp = {};
2650
2651 r = -EINVAL;
2652 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2653 break;
2654
2655 r = -EFAULT;
2656 if (copy_from_user(&unp, argp, sizeof(unp)))
2657 break;
2658
2659 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2660 &cmd->rc, &cmd->rrc);
2661 break;
2662 }
2663 case KVM_PV_VERIFY: {
2664 r = -EINVAL;
2665 if (!kvm_s390_pv_is_protected(kvm))
2666 break;
2667
2668 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2669 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2670 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2671 cmd->rrc);
2672 break;
2673 }
2674 case KVM_PV_PREP_RESET: {
2675 r = -EINVAL;
2676 if (!kvm_s390_pv_is_protected(kvm))
2677 break;
2678
2679 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2680 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2681 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2682 cmd->rc, cmd->rrc);
2683 break;
2684 }
2685 case KVM_PV_UNSHARE_ALL: {
2686 r = -EINVAL;
2687 if (!kvm_s390_pv_is_protected(kvm))
2688 break;
2689
2690 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2691 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2692 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2693 cmd->rc, cmd->rrc);
2694 break;
2695 }
2696 case KVM_PV_INFO: {
2697 struct kvm_s390_pv_info info = {};
2698 ssize_t data_len;
2699
2700 /*
2701 * No need to check the VM protection here.
2702 *
2703 * Maybe user space wants to query some of the data
2704 * when the VM is still unprotected. If we see the
2705 * need to fence a new data command we can still
2706 * return an error in the info handler.
2707 */
2708
2709 r = -EFAULT;
2710 if (copy_from_user(&info, argp, sizeof(info.header)))
2711 break;
2712
2713 r = -EINVAL;
2714 if (info.header.len_max < sizeof(info.header))
2715 break;
2716
2717 data_len = kvm_s390_handle_pv_info(&info);
2718 if (data_len < 0) {
2719 r = data_len;
2720 break;
2721 }
2722 /*
2723 * If a data command struct is extended (multiple
2724 * times) this can be used to determine how much of it
2725 * is valid.
2726 */
2727 info.header.len_written = data_len;
2728
2729 r = -EFAULT;
2730 if (copy_to_user(argp, &info, data_len))
2731 break;
2732
2733 r = 0;
2734 break;
2735 }
2736 case KVM_PV_DUMP: {
2737 struct kvm_s390_pv_dmp dmp;
2738
2739 r = -EINVAL;
2740 if (!kvm_s390_pv_is_protected(kvm))
2741 break;
2742
2743 r = -EFAULT;
2744 if (copy_from_user(&dmp, argp, sizeof(dmp)))
2745 break;
2746
2747 r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2748 if (r)
2749 break;
2750
2751 if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2752 r = -EFAULT;
2753 break;
2754 }
2755
2756 break;
2757 }
2758 default:
2759 r = -ENOTTY;
2760 }
2761 if (need_lock)
2762 mutex_unlock(&kvm->lock);
2763
2764 return r;
2765}
2766
2767static bool access_key_invalid(u8 access_key)
2768{
2769 return access_key > 0xf;
2770}
2771
2772static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2773{
2774 void __user *uaddr = (void __user *)mop->buf;
2775 u64 supported_flags;
2776 void *tmpbuf = NULL;
2777 int r, srcu_idx;
2778
2779 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2780 | KVM_S390_MEMOP_F_CHECK_ONLY;
2781 if (mop->flags & ~supported_flags || !mop->size)
2782 return -EINVAL;
2783 if (mop->size > MEM_OP_MAX_SIZE)
2784 return -E2BIG;
2785 /*
2786 * This is technically a heuristic only, if the kvm->lock is not
2787 * taken, it is not guaranteed that the vm is/remains non-protected.
2788 * This is ok from a kernel perspective, wrongdoing is detected
2789 * on the access, -EFAULT is returned and the vm may crash the
2790 * next time it accesses the memory in question.
2791 * There is no sane usecase to do switching and a memop on two
2792 * different CPUs at the same time.
2793 */
2794 if (kvm_s390_pv_get_handle(kvm))
2795 return -EINVAL;
2796 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2797 if (access_key_invalid(mop->key))
2798 return -EINVAL;
2799 } else {
2800 mop->key = 0;
2801 }
2802 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2803 tmpbuf = vmalloc(mop->size);
2804 if (!tmpbuf)
2805 return -ENOMEM;
2806 }
2807
2808 srcu_idx = srcu_read_lock(&kvm->srcu);
2809
2810 if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2811 r = PGM_ADDRESSING;
2812 goto out_unlock;
2813 }
2814
2815 switch (mop->op) {
2816 case KVM_S390_MEMOP_ABSOLUTE_READ: {
2817 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2818 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2819 } else {
2820 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2821 mop->size, GACC_FETCH, mop->key);
2822 if (r == 0) {
2823 if (copy_to_user(uaddr, tmpbuf, mop->size))
2824 r = -EFAULT;
2825 }
2826 }
2827 break;
2828 }
2829 case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2830 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2831 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2832 } else {
2833 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2834 r = -EFAULT;
2835 break;
2836 }
2837 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2838 mop->size, GACC_STORE, mop->key);
2839 }
2840 break;
2841 }
2842 default:
2843 r = -EINVAL;
2844 }
2845
2846out_unlock:
2847 srcu_read_unlock(&kvm->srcu, srcu_idx);
2848
2849 vfree(tmpbuf);
2850 return r;
2851}
2852
2853long kvm_arch_vm_ioctl(struct file *filp,
2854 unsigned int ioctl, unsigned long arg)
2855{
2856 struct kvm *kvm = filp->private_data;
2857 void __user *argp = (void __user *)arg;
2858 struct kvm_device_attr attr;
2859 int r;
2860
2861 switch (ioctl) {
2862 case KVM_S390_INTERRUPT: {
2863 struct kvm_s390_interrupt s390int;
2864
2865 r = -EFAULT;
2866 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2867 break;
2868 r = kvm_s390_inject_vm(kvm, &s390int);
2869 break;
2870 }
2871 case KVM_CREATE_IRQCHIP: {
2872 struct kvm_irq_routing_entry routing;
2873
2874 r = -EINVAL;
2875 if (kvm->arch.use_irqchip) {
2876 /* Set up dummy routing. */
2877 memset(&routing, 0, sizeof(routing));
2878 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2879 }
2880 break;
2881 }
2882 case KVM_SET_DEVICE_ATTR: {
2883 r = -EFAULT;
2884 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2885 break;
2886 r = kvm_s390_vm_set_attr(kvm, &attr);
2887 break;
2888 }
2889 case KVM_GET_DEVICE_ATTR: {
2890 r = -EFAULT;
2891 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2892 break;
2893 r = kvm_s390_vm_get_attr(kvm, &attr);
2894 break;
2895 }
2896 case KVM_HAS_DEVICE_ATTR: {
2897 r = -EFAULT;
2898 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2899 break;
2900 r = kvm_s390_vm_has_attr(kvm, &attr);
2901 break;
2902 }
2903 case KVM_S390_GET_SKEYS: {
2904 struct kvm_s390_skeys args;
2905
2906 r = -EFAULT;
2907 if (copy_from_user(&args, argp,
2908 sizeof(struct kvm_s390_skeys)))
2909 break;
2910 r = kvm_s390_get_skeys(kvm, &args);
2911 break;
2912 }
2913 case KVM_S390_SET_SKEYS: {
2914 struct kvm_s390_skeys args;
2915
2916 r = -EFAULT;
2917 if (copy_from_user(&args, argp,
2918 sizeof(struct kvm_s390_skeys)))
2919 break;
2920 r = kvm_s390_set_skeys(kvm, &args);
2921 break;
2922 }
2923 case KVM_S390_GET_CMMA_BITS: {
2924 struct kvm_s390_cmma_log args;
2925
2926 r = -EFAULT;
2927 if (copy_from_user(&args, argp, sizeof(args)))
2928 break;
2929 mutex_lock(&kvm->slots_lock);
2930 r = kvm_s390_get_cmma_bits(kvm, &args);
2931 mutex_unlock(&kvm->slots_lock);
2932 if (!r) {
2933 r = copy_to_user(argp, &args, sizeof(args));
2934 if (r)
2935 r = -EFAULT;
2936 }
2937 break;
2938 }
2939 case KVM_S390_SET_CMMA_BITS: {
2940 struct kvm_s390_cmma_log args;
2941
2942 r = -EFAULT;
2943 if (copy_from_user(&args, argp, sizeof(args)))
2944 break;
2945 mutex_lock(&kvm->slots_lock);
2946 r = kvm_s390_set_cmma_bits(kvm, &args);
2947 mutex_unlock(&kvm->slots_lock);
2948 break;
2949 }
2950 case KVM_S390_PV_COMMAND: {
2951 struct kvm_pv_cmd args;
2952
2953 /* protvirt means user cpu state */
2954 kvm_s390_set_user_cpu_state_ctrl(kvm);
2955 r = 0;
2956 if (!is_prot_virt_host()) {
2957 r = -EINVAL;
2958 break;
2959 }
2960 if (copy_from_user(&args, argp, sizeof(args))) {
2961 r = -EFAULT;
2962 break;
2963 }
2964 if (args.flags) {
2965 r = -EINVAL;
2966 break;
2967 }
2968 /* must be called without kvm->lock */
2969 r = kvm_s390_handle_pv(kvm, &args);
2970 if (copy_to_user(argp, &args, sizeof(args))) {
2971 r = -EFAULT;
2972 break;
2973 }
2974 break;
2975 }
2976 case KVM_S390_MEM_OP: {
2977 struct kvm_s390_mem_op mem_op;
2978
2979 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2980 r = kvm_s390_vm_mem_op(kvm, &mem_op);
2981 else
2982 r = -EFAULT;
2983 break;
2984 }
2985 case KVM_S390_ZPCI_OP: {
2986 struct kvm_s390_zpci_op args;
2987
2988 r = -EINVAL;
2989 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2990 break;
2991 if (copy_from_user(&args, argp, sizeof(args))) {
2992 r = -EFAULT;
2993 break;
2994 }
2995 r = kvm_s390_pci_zpci_op(kvm, &args);
2996 break;
2997 }
2998 default:
2999 r = -ENOTTY;
3000 }
3001
3002 return r;
3003}
3004
3005static int kvm_s390_apxa_installed(void)
3006{
3007 struct ap_config_info info;
3008
3009 if (ap_instructions_available()) {
3010 if (ap_qci(&info) == 0)
3011 return info.apxa;
3012 }
3013
3014 return 0;
3015}
3016
3017/*
3018 * The format of the crypto control block (CRYCB) is specified in the 3 low
3019 * order bits of the CRYCB designation (CRYCBD) field as follows:
3020 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3021 * AP extended addressing (APXA) facility are installed.
3022 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3023 * Format 2: Both the APXA and MSAX3 facilities are installed
3024 */
3025static void kvm_s390_set_crycb_format(struct kvm *kvm)
3026{
3027 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
3028
3029 /* Clear the CRYCB format bits - i.e., set format 0 by default */
3030 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3031
3032 /* Check whether MSAX3 is installed */
3033 if (!test_kvm_facility(kvm, 76))
3034 return;
3035
3036 if (kvm_s390_apxa_installed())
3037 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3038 else
3039 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3040}
3041
3042/*
3043 * kvm_arch_crypto_set_masks
3044 *
3045 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3046 * to be set.
3047 * @apm: the mask identifying the accessible AP adapters
3048 * @aqm: the mask identifying the accessible AP domains
3049 * @adm: the mask identifying the accessible AP control domains
3050 *
3051 * Set the masks that identify the adapters, domains and control domains to
3052 * which the KVM guest is granted access.
3053 *
3054 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3055 * function.
3056 */
3057void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3058 unsigned long *aqm, unsigned long *adm)
3059{
3060 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3061
3062 kvm_s390_vcpu_block_all(kvm);
3063
3064 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3065 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3066 memcpy(crycb->apcb1.apm, apm, 32);
3067 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3068 apm[0], apm[1], apm[2], apm[3]);
3069 memcpy(crycb->apcb1.aqm, aqm, 32);
3070 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3071 aqm[0], aqm[1], aqm[2], aqm[3]);
3072 memcpy(crycb->apcb1.adm, adm, 32);
3073 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3074 adm[0], adm[1], adm[2], adm[3]);
3075 break;
3076 case CRYCB_FORMAT1:
3077 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3078 memcpy(crycb->apcb0.apm, apm, 8);
3079 memcpy(crycb->apcb0.aqm, aqm, 2);
3080 memcpy(crycb->apcb0.adm, adm, 2);
3081 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3082 apm[0], *((unsigned short *)aqm),
3083 *((unsigned short *)adm));
3084 break;
3085 default: /* Can not happen */
3086 break;
3087 }
3088
3089 /* recreate the shadow crycb for each vcpu */
3090 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3091 kvm_s390_vcpu_unblock_all(kvm);
3092}
3093EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3094
3095/*
3096 * kvm_arch_crypto_clear_masks
3097 *
3098 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3099 * to be cleared.
3100 *
3101 * Clear the masks that identify the adapters, domains and control domains to
3102 * which the KVM guest is granted access.
3103 *
3104 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3105 * function.
3106 */
3107void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3108{
3109 kvm_s390_vcpu_block_all(kvm);
3110
3111 memset(&kvm->arch.crypto.crycb->apcb0, 0,
3112 sizeof(kvm->arch.crypto.crycb->apcb0));
3113 memset(&kvm->arch.crypto.crycb->apcb1, 0,
3114 sizeof(kvm->arch.crypto.crycb->apcb1));
3115
3116 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3117 /* recreate the shadow crycb for each vcpu */
3118 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3119 kvm_s390_vcpu_unblock_all(kvm);
3120}
3121EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3122
3123static u64 kvm_s390_get_initial_cpuid(void)
3124{
3125 struct cpuid cpuid;
3126
3127 get_cpu_id(&cpuid);
3128 cpuid.version = 0xff;
3129 return *((u64 *) &cpuid);
3130}
3131
3132static void kvm_s390_crypto_init(struct kvm *kvm)
3133{
3134 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3135 kvm_s390_set_crycb_format(kvm);
3136 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3137
3138 if (!test_kvm_facility(kvm, 76))
3139 return;
3140
3141 /* Enable AES/DEA protected key functions by default */
3142 kvm->arch.crypto.aes_kw = 1;
3143 kvm->arch.crypto.dea_kw = 1;
3144 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3145 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3146 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3147 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3148}
3149
3150static void sca_dispose(struct kvm *kvm)
3151{
3152 if (kvm->arch.use_esca)
3153 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3154 else
3155 free_page((unsigned long)(kvm->arch.sca));
3156 kvm->arch.sca = NULL;
3157}
3158
3159void kvm_arch_free_vm(struct kvm *kvm)
3160{
3161 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3162 kvm_s390_pci_clear_list(kvm);
3163
3164 __kvm_arch_free_vm(kvm);
3165}
3166
3167int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3168{
3169 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3170 int i, rc;
3171 char debug_name[16];
3172 static unsigned long sca_offset;
3173
3174 rc = -EINVAL;
3175#ifdef CONFIG_KVM_S390_UCONTROL
3176 if (type & ~KVM_VM_S390_UCONTROL)
3177 goto out_err;
3178 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3179 goto out_err;
3180#else
3181 if (type)
3182 goto out_err;
3183#endif
3184
3185 rc = s390_enable_sie();
3186 if (rc)
3187 goto out_err;
3188
3189 rc = -ENOMEM;
3190
3191 if (!sclp.has_64bscao)
3192 alloc_flags |= GFP_DMA;
3193 rwlock_init(&kvm->arch.sca_lock);
3194 /* start with basic SCA */
3195 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3196 if (!kvm->arch.sca)
3197 goto out_err;
3198 mutex_lock(&kvm_lock);
3199 sca_offset += 16;
3200 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3201 sca_offset = 0;
3202 kvm->arch.sca = (struct bsca_block *)
3203 ((char *) kvm->arch.sca + sca_offset);
3204 mutex_unlock(&kvm_lock);
3205
3206 sprintf(debug_name, "kvm-%u", current->pid);
3207
3208 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3209 if (!kvm->arch.dbf)
3210 goto out_err;
3211
3212 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3213 kvm->arch.sie_page2 =
3214 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3215 if (!kvm->arch.sie_page2)
3216 goto out_err;
3217
3218 kvm->arch.sie_page2->kvm = kvm;
3219 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3220
3221 for (i = 0; i < kvm_s390_fac_size(); i++) {
3222 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3223 (kvm_s390_fac_base[i] |
3224 kvm_s390_fac_ext[i]);
3225 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3226 kvm_s390_fac_base[i];
3227 }
3228 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3229
3230 /* we are always in czam mode - even on pre z14 machines */
3231 set_kvm_facility(kvm->arch.model.fac_mask, 138);
3232 set_kvm_facility(kvm->arch.model.fac_list, 138);
3233 /* we emulate STHYI in kvm */
3234 set_kvm_facility(kvm->arch.model.fac_mask, 74);
3235 set_kvm_facility(kvm->arch.model.fac_list, 74);
3236 if (MACHINE_HAS_TLB_GUEST) {
3237 set_kvm_facility(kvm->arch.model.fac_mask, 147);
3238 set_kvm_facility(kvm->arch.model.fac_list, 147);
3239 }
3240
3241 if (css_general_characteristics.aiv && test_facility(65))
3242 set_kvm_facility(kvm->arch.model.fac_mask, 65);
3243
3244 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3245 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3246
3247 kvm_s390_crypto_init(kvm);
3248
3249 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3250 mutex_lock(&kvm->lock);
3251 kvm_s390_pci_init_list(kvm);
3252 kvm_s390_vcpu_pci_enable_interp(kvm);
3253 mutex_unlock(&kvm->lock);
3254 }
3255
3256 mutex_init(&kvm->arch.float_int.ais_lock);
3257 spin_lock_init(&kvm->arch.float_int.lock);
3258 for (i = 0; i < FIRQ_LIST_COUNT; i++)
3259 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3260 init_waitqueue_head(&kvm->arch.ipte_wq);
3261 mutex_init(&kvm->arch.ipte_mutex);
3262
3263 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3264 VM_EVENT(kvm, 3, "vm created with type %lu", type);
3265
3266 if (type & KVM_VM_S390_UCONTROL) {
3267 kvm->arch.gmap = NULL;
3268 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3269 } else {
3270 if (sclp.hamax == U64_MAX)
3271 kvm->arch.mem_limit = TASK_SIZE_MAX;
3272 else
3273 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3274 sclp.hamax + 1);
3275 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3276 if (!kvm->arch.gmap)
3277 goto out_err;
3278 kvm->arch.gmap->private = kvm;
3279 kvm->arch.gmap->pfault_enabled = 0;
3280 }
3281
3282 kvm->arch.use_pfmfi = sclp.has_pfmfi;
3283 kvm->arch.use_skf = sclp.has_skey;
3284 spin_lock_init(&kvm->arch.start_stop_lock);
3285 kvm_s390_vsie_init(kvm);
3286 if (use_gisa)
3287 kvm_s390_gisa_init(kvm);
3288 INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3289 kvm->arch.pv.set_aside = NULL;
3290 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3291
3292 return 0;
3293out_err:
3294 free_page((unsigned long)kvm->arch.sie_page2);
3295 debug_unregister(kvm->arch.dbf);
3296 sca_dispose(kvm);
3297 KVM_EVENT(3, "creation of vm failed: %d", rc);
3298 return rc;
3299}
3300
3301void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3302{
3303 u16 rc, rrc;
3304
3305 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3306 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3307 kvm_s390_clear_local_irqs(vcpu);
3308 kvm_clear_async_pf_completion_queue(vcpu);
3309 if (!kvm_is_ucontrol(vcpu->kvm))
3310 sca_del_vcpu(vcpu);
3311 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3312
3313 if (kvm_is_ucontrol(vcpu->kvm))
3314 gmap_remove(vcpu->arch.gmap);
3315
3316 if (vcpu->kvm->arch.use_cmma)
3317 kvm_s390_vcpu_unsetup_cmma(vcpu);
3318 /* We can not hold the vcpu mutex here, we are already dying */
3319 if (kvm_s390_pv_cpu_get_handle(vcpu))
3320 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3321 free_page((unsigned long)(vcpu->arch.sie_block));
3322}
3323
3324void kvm_arch_destroy_vm(struct kvm *kvm)
3325{
3326 u16 rc, rrc;
3327
3328 kvm_destroy_vcpus(kvm);
3329 sca_dispose(kvm);
3330 kvm_s390_gisa_destroy(kvm);
3331 /*
3332 * We are already at the end of life and kvm->lock is not taken.
3333 * This is ok as the file descriptor is closed by now and nobody
3334 * can mess with the pv state.
3335 */
3336 kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3337 /*
3338 * Remove the mmu notifier only when the whole KVM VM is torn down,
3339 * and only if one was registered to begin with. If the VM is
3340 * currently not protected, but has been previously been protected,
3341 * then it's possible that the notifier is still registered.
3342 */
3343 if (kvm->arch.pv.mmu_notifier.ops)
3344 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3345
3346 debug_unregister(kvm->arch.dbf);
3347 free_page((unsigned long)kvm->arch.sie_page2);
3348 if (!kvm_is_ucontrol(kvm))
3349 gmap_remove(kvm->arch.gmap);
3350 kvm_s390_destroy_adapters(kvm);
3351 kvm_s390_clear_float_irqs(kvm);
3352 kvm_s390_vsie_destroy(kvm);
3353 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3354}
3355
3356/* Section: vcpu related */
3357static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3358{
3359 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3360 if (!vcpu->arch.gmap)
3361 return -ENOMEM;
3362 vcpu->arch.gmap->private = vcpu->kvm;
3363
3364 return 0;
3365}
3366
3367static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3368{
3369 if (!kvm_s390_use_sca_entries())
3370 return;
3371 read_lock(&vcpu->kvm->arch.sca_lock);
3372 if (vcpu->kvm->arch.use_esca) {
3373 struct esca_block *sca = vcpu->kvm->arch.sca;
3374
3375 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3376 sca->cpu[vcpu->vcpu_id].sda = 0;
3377 } else {
3378 struct bsca_block *sca = vcpu->kvm->arch.sca;
3379
3380 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3381 sca->cpu[vcpu->vcpu_id].sda = 0;
3382 }
3383 read_unlock(&vcpu->kvm->arch.sca_lock);
3384}
3385
3386static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3387{
3388 if (!kvm_s390_use_sca_entries()) {
3389 phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3390
3391 /* we still need the basic sca for the ipte control */
3392 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3393 vcpu->arch.sie_block->scaol = sca_phys;
3394 return;
3395 }
3396 read_lock(&vcpu->kvm->arch.sca_lock);
3397 if (vcpu->kvm->arch.use_esca) {
3398 struct esca_block *sca = vcpu->kvm->arch.sca;
3399 phys_addr_t sca_phys = virt_to_phys(sca);
3400
3401 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3402 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3403 vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3404 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3405 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3406 } else {
3407 struct bsca_block *sca = vcpu->kvm->arch.sca;
3408 phys_addr_t sca_phys = virt_to_phys(sca);
3409
3410 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3411 vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3412 vcpu->arch.sie_block->scaol = sca_phys;
3413 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3414 }
3415 read_unlock(&vcpu->kvm->arch.sca_lock);
3416}
3417
3418/* Basic SCA to Extended SCA data copy routines */
3419static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3420{
3421 d->sda = s->sda;
3422 d->sigp_ctrl.c = s->sigp_ctrl.c;
3423 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3424}
3425
3426static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3427{
3428 int i;
3429
3430 d->ipte_control = s->ipte_control;
3431 d->mcn[0] = s->mcn;
3432 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3433 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3434}
3435
3436static int sca_switch_to_extended(struct kvm *kvm)
3437{
3438 struct bsca_block *old_sca = kvm->arch.sca;
3439 struct esca_block *new_sca;
3440 struct kvm_vcpu *vcpu;
3441 unsigned long vcpu_idx;
3442 u32 scaol, scaoh;
3443 phys_addr_t new_sca_phys;
3444
3445 if (kvm->arch.use_esca)
3446 return 0;
3447
3448 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3449 if (!new_sca)
3450 return -ENOMEM;
3451
3452 new_sca_phys = virt_to_phys(new_sca);
3453 scaoh = new_sca_phys >> 32;
3454 scaol = new_sca_phys & ESCA_SCAOL_MASK;
3455
3456 kvm_s390_vcpu_block_all(kvm);
3457 write_lock(&kvm->arch.sca_lock);
3458
3459 sca_copy_b_to_e(new_sca, old_sca);
3460
3461 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3462 vcpu->arch.sie_block->scaoh = scaoh;
3463 vcpu->arch.sie_block->scaol = scaol;
3464 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3465 }
3466 kvm->arch.sca = new_sca;
3467 kvm->arch.use_esca = 1;
3468
3469 write_unlock(&kvm->arch.sca_lock);
3470 kvm_s390_vcpu_unblock_all(kvm);
3471
3472 free_page((unsigned long)old_sca);
3473
3474 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3475 old_sca, kvm->arch.sca);
3476 return 0;
3477}
3478
3479static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3480{
3481 int rc;
3482
3483 if (!kvm_s390_use_sca_entries()) {
3484 if (id < KVM_MAX_VCPUS)
3485 return true;
3486 return false;
3487 }
3488 if (id < KVM_S390_BSCA_CPU_SLOTS)
3489 return true;
3490 if (!sclp.has_esca || !sclp.has_64bscao)
3491 return false;
3492
3493 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3494
3495 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3496}
3497
3498/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3499static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3500{
3501 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3502 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3503 vcpu->arch.cputm_start = get_tod_clock_fast();
3504 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3505}
3506
3507/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3508static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3509{
3510 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3511 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3512 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3513 vcpu->arch.cputm_start = 0;
3514 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3515}
3516
3517/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3518static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3519{
3520 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3521 vcpu->arch.cputm_enabled = true;
3522 __start_cpu_timer_accounting(vcpu);
3523}
3524
3525/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3526static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3527{
3528 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3529 __stop_cpu_timer_accounting(vcpu);
3530 vcpu->arch.cputm_enabled = false;
3531}
3532
3533static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3534{
3535 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3536 __enable_cpu_timer_accounting(vcpu);
3537 preempt_enable();
3538}
3539
3540static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3541{
3542 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3543 __disable_cpu_timer_accounting(vcpu);
3544 preempt_enable();
3545}
3546
3547/* set the cpu timer - may only be called from the VCPU thread itself */
3548void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3549{
3550 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3551 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3552 if (vcpu->arch.cputm_enabled)
3553 vcpu->arch.cputm_start = get_tod_clock_fast();
3554 vcpu->arch.sie_block->cputm = cputm;
3555 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3556 preempt_enable();
3557}
3558
3559/* update and get the cpu timer - can also be called from other VCPU threads */
3560__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3561{
3562 unsigned int seq;
3563 __u64 value;
3564
3565 if (unlikely(!vcpu->arch.cputm_enabled))
3566 return vcpu->arch.sie_block->cputm;
3567
3568 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3569 do {
3570 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3571 /*
3572 * If the writer would ever execute a read in the critical
3573 * section, e.g. in irq context, we have a deadlock.
3574 */
3575 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3576 value = vcpu->arch.sie_block->cputm;
3577 /* if cputm_start is 0, accounting is being started/stopped */
3578 if (likely(vcpu->arch.cputm_start))
3579 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3580 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3581 preempt_enable();
3582 return value;
3583}
3584
3585void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3586{
3587
3588 gmap_enable(vcpu->arch.enabled_gmap);
3589 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3590 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3591 __start_cpu_timer_accounting(vcpu);
3592 vcpu->cpu = cpu;
3593}
3594
3595void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3596{
3597 vcpu->cpu = -1;
3598 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3599 __stop_cpu_timer_accounting(vcpu);
3600 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3601 vcpu->arch.enabled_gmap = gmap_get_enabled();
3602 gmap_disable(vcpu->arch.enabled_gmap);
3603
3604}
3605
3606void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3607{
3608 mutex_lock(&vcpu->kvm->lock);
3609 preempt_disable();
3610 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3611 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3612 preempt_enable();
3613 mutex_unlock(&vcpu->kvm->lock);
3614 if (!kvm_is_ucontrol(vcpu->kvm)) {
3615 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3616 sca_add_vcpu(vcpu);
3617 }
3618 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3619 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3620 /* make vcpu_load load the right gmap on the first trigger */
3621 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3622}
3623
3624static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3625{
3626 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3627 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3628 return true;
3629 return false;
3630}
3631
3632static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3633{
3634 /* At least one ECC subfunction must be present */
3635 return kvm_has_pckmo_subfunc(kvm, 32) ||
3636 kvm_has_pckmo_subfunc(kvm, 33) ||
3637 kvm_has_pckmo_subfunc(kvm, 34) ||
3638 kvm_has_pckmo_subfunc(kvm, 40) ||
3639 kvm_has_pckmo_subfunc(kvm, 41);
3640
3641}
3642
3643static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3644{
3645 /*
3646 * If the AP instructions are not being interpreted and the MSAX3
3647 * facility is not configured for the guest, there is nothing to set up.
3648 */
3649 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3650 return;
3651
3652 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3653 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3654 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3655 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3656
3657 if (vcpu->kvm->arch.crypto.apie)
3658 vcpu->arch.sie_block->eca |= ECA_APIE;
3659
3660 /* Set up protected key support */
3661 if (vcpu->kvm->arch.crypto.aes_kw) {
3662 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3663 /* ecc is also wrapped with AES key */
3664 if (kvm_has_pckmo_ecc(vcpu->kvm))
3665 vcpu->arch.sie_block->ecd |= ECD_ECC;
3666 }
3667
3668 if (vcpu->kvm->arch.crypto.dea_kw)
3669 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3670}
3671
3672void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3673{
3674 free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3675 vcpu->arch.sie_block->cbrlo = 0;
3676}
3677
3678int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3679{
3680 void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3681
3682 if (!cbrlo_page)
3683 return -ENOMEM;
3684
3685 vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3686 return 0;
3687}
3688
3689static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3690{
3691 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3692
3693 vcpu->arch.sie_block->ibc = model->ibc;
3694 if (test_kvm_facility(vcpu->kvm, 7))
3695 vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3696}
3697
3698static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3699{
3700 int rc = 0;
3701 u16 uvrc, uvrrc;
3702
3703 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3704 CPUSTAT_SM |
3705 CPUSTAT_STOPPED);
3706
3707 if (test_kvm_facility(vcpu->kvm, 78))
3708 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3709 else if (test_kvm_facility(vcpu->kvm, 8))
3710 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3711
3712 kvm_s390_vcpu_setup_model(vcpu);
3713
3714 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3715 if (MACHINE_HAS_ESOP)
3716 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3717 if (test_kvm_facility(vcpu->kvm, 9))
3718 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3719 if (test_kvm_facility(vcpu->kvm, 11))
3720 vcpu->arch.sie_block->ecb |= ECB_PTF;
3721 if (test_kvm_facility(vcpu->kvm, 73))
3722 vcpu->arch.sie_block->ecb |= ECB_TE;
3723 if (!kvm_is_ucontrol(vcpu->kvm))
3724 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3725
3726 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3727 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3728 if (test_kvm_facility(vcpu->kvm, 130))
3729 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3730 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3731 if (sclp.has_cei)
3732 vcpu->arch.sie_block->eca |= ECA_CEI;
3733 if (sclp.has_ib)
3734 vcpu->arch.sie_block->eca |= ECA_IB;
3735 if (sclp.has_siif)
3736 vcpu->arch.sie_block->eca |= ECA_SII;
3737 if (sclp.has_sigpif)
3738 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3739 if (test_kvm_facility(vcpu->kvm, 129)) {
3740 vcpu->arch.sie_block->eca |= ECA_VX;
3741 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3742 }
3743 if (test_kvm_facility(vcpu->kvm, 139))
3744 vcpu->arch.sie_block->ecd |= ECD_MEF;
3745 if (test_kvm_facility(vcpu->kvm, 156))
3746 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3747 if (vcpu->arch.sie_block->gd) {
3748 vcpu->arch.sie_block->eca |= ECA_AIV;
3749 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3750 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3751 }
3752 vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3753 vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3754
3755 if (sclp.has_kss)
3756 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3757 else
3758 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3759
3760 if (vcpu->kvm->arch.use_cmma) {
3761 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3762 if (rc)
3763 return rc;
3764 }
3765 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3766 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3767
3768 vcpu->arch.sie_block->hpid = HPID_KVM;
3769
3770 kvm_s390_vcpu_crypto_setup(vcpu);
3771
3772 kvm_s390_vcpu_pci_setup(vcpu);
3773
3774 mutex_lock(&vcpu->kvm->lock);
3775 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3776 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3777 if (rc)
3778 kvm_s390_vcpu_unsetup_cmma(vcpu);
3779 }
3780 mutex_unlock(&vcpu->kvm->lock);
3781
3782 return rc;
3783}
3784
3785int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3786{
3787 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3788 return -EINVAL;
3789 return 0;
3790}
3791
3792int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3793{
3794 struct sie_page *sie_page;
3795 int rc;
3796
3797 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3798 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3799 if (!sie_page)
3800 return -ENOMEM;
3801
3802 vcpu->arch.sie_block = &sie_page->sie_block;
3803 vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3804
3805 /* the real guest size will always be smaller than msl */
3806 vcpu->arch.sie_block->mso = 0;
3807 vcpu->arch.sie_block->msl = sclp.hamax;
3808
3809 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3810 spin_lock_init(&vcpu->arch.local_int.lock);
3811 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3812 seqcount_init(&vcpu->arch.cputm_seqcount);
3813
3814 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3815 kvm_clear_async_pf_completion_queue(vcpu);
3816 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3817 KVM_SYNC_GPRS |
3818 KVM_SYNC_ACRS |
3819 KVM_SYNC_CRS |
3820 KVM_SYNC_ARCH0 |
3821 KVM_SYNC_PFAULT |
3822 KVM_SYNC_DIAG318;
3823 kvm_s390_set_prefix(vcpu, 0);
3824 if (test_kvm_facility(vcpu->kvm, 64))
3825 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3826 if (test_kvm_facility(vcpu->kvm, 82))
3827 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3828 if (test_kvm_facility(vcpu->kvm, 133))
3829 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3830 if (test_kvm_facility(vcpu->kvm, 156))
3831 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3832 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3833 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3834 */
3835 if (MACHINE_HAS_VX)
3836 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3837 else
3838 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3839
3840 if (kvm_is_ucontrol(vcpu->kvm)) {
3841 rc = __kvm_ucontrol_vcpu_init(vcpu);
3842 if (rc)
3843 goto out_free_sie_block;
3844 }
3845
3846 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3847 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3848 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3849
3850 rc = kvm_s390_vcpu_setup(vcpu);
3851 if (rc)
3852 goto out_ucontrol_uninit;
3853
3854 kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3855 return 0;
3856
3857out_ucontrol_uninit:
3858 if (kvm_is_ucontrol(vcpu->kvm))
3859 gmap_remove(vcpu->arch.gmap);
3860out_free_sie_block:
3861 free_page((unsigned long)(vcpu->arch.sie_block));
3862 return rc;
3863}
3864
3865int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3866{
3867 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3868 return kvm_s390_vcpu_has_irq(vcpu, 0);
3869}
3870
3871bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3872{
3873 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3874}
3875
3876void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3877{
3878 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3879 exit_sie(vcpu);
3880}
3881
3882void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3883{
3884 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3885}
3886
3887static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3888{
3889 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3890 exit_sie(vcpu);
3891}
3892
3893bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3894{
3895 return atomic_read(&vcpu->arch.sie_block->prog20) &
3896 (PROG_BLOCK_SIE | PROG_REQUEST);
3897}
3898
3899static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3900{
3901 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3902}
3903
3904/*
3905 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3906 * If the CPU is not running (e.g. waiting as idle) the function will
3907 * return immediately. */
3908void exit_sie(struct kvm_vcpu *vcpu)
3909{
3910 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3911 kvm_s390_vsie_kick(vcpu);
3912 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3913 cpu_relax();
3914}
3915
3916/* Kick a guest cpu out of SIE to process a request synchronously */
3917void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3918{
3919 __kvm_make_request(req, vcpu);
3920 kvm_s390_vcpu_request(vcpu);
3921}
3922
3923static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3924 unsigned long end)
3925{
3926 struct kvm *kvm = gmap->private;
3927 struct kvm_vcpu *vcpu;
3928 unsigned long prefix;
3929 unsigned long i;
3930
3931 if (gmap_is_shadow(gmap))
3932 return;
3933 if (start >= 1UL << 31)
3934 /* We are only interested in prefix pages */
3935 return;
3936 kvm_for_each_vcpu(i, vcpu, kvm) {
3937 /* match against both prefix pages */
3938 prefix = kvm_s390_get_prefix(vcpu);
3939 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3940 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3941 start, end);
3942 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3943 }
3944 }
3945}
3946
3947bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3948{
3949 /* do not poll with more than halt_poll_max_steal percent of steal time */
3950 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3951 READ_ONCE(halt_poll_max_steal)) {
3952 vcpu->stat.halt_no_poll_steal++;
3953 return true;
3954 }
3955 return false;
3956}
3957
3958int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3959{
3960 /* kvm common code refers to this, but never calls it */
3961 BUG();
3962 return 0;
3963}
3964
3965static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3966 struct kvm_one_reg *reg)
3967{
3968 int r = -EINVAL;
3969
3970 switch (reg->id) {
3971 case KVM_REG_S390_TODPR:
3972 r = put_user(vcpu->arch.sie_block->todpr,
3973 (u32 __user *)reg->addr);
3974 break;
3975 case KVM_REG_S390_EPOCHDIFF:
3976 r = put_user(vcpu->arch.sie_block->epoch,
3977 (u64 __user *)reg->addr);
3978 break;
3979 case KVM_REG_S390_CPU_TIMER:
3980 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3981 (u64 __user *)reg->addr);
3982 break;
3983 case KVM_REG_S390_CLOCK_COMP:
3984 r = put_user(vcpu->arch.sie_block->ckc,
3985 (u64 __user *)reg->addr);
3986 break;
3987 case KVM_REG_S390_PFTOKEN:
3988 r = put_user(vcpu->arch.pfault_token,
3989 (u64 __user *)reg->addr);
3990 break;
3991 case KVM_REG_S390_PFCOMPARE:
3992 r = put_user(vcpu->arch.pfault_compare,
3993 (u64 __user *)reg->addr);
3994 break;
3995 case KVM_REG_S390_PFSELECT:
3996 r = put_user(vcpu->arch.pfault_select,
3997 (u64 __user *)reg->addr);
3998 break;
3999 case KVM_REG_S390_PP:
4000 r = put_user(vcpu->arch.sie_block->pp,
4001 (u64 __user *)reg->addr);
4002 break;
4003 case KVM_REG_S390_GBEA:
4004 r = put_user(vcpu->arch.sie_block->gbea,
4005 (u64 __user *)reg->addr);
4006 break;
4007 default:
4008 break;
4009 }
4010
4011 return r;
4012}
4013
4014static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4015 struct kvm_one_reg *reg)
4016{
4017 int r = -EINVAL;
4018 __u64 val;
4019
4020 switch (reg->id) {
4021 case KVM_REG_S390_TODPR:
4022 r = get_user(vcpu->arch.sie_block->todpr,
4023 (u32 __user *)reg->addr);
4024 break;
4025 case KVM_REG_S390_EPOCHDIFF:
4026 r = get_user(vcpu->arch.sie_block->epoch,
4027 (u64 __user *)reg->addr);
4028 break;
4029 case KVM_REG_S390_CPU_TIMER:
4030 r = get_user(val, (u64 __user *)reg->addr);
4031 if (!r)
4032 kvm_s390_set_cpu_timer(vcpu, val);
4033 break;
4034 case KVM_REG_S390_CLOCK_COMP:
4035 r = get_user(vcpu->arch.sie_block->ckc,
4036 (u64 __user *)reg->addr);
4037 break;
4038 case KVM_REG_S390_PFTOKEN:
4039 r = get_user(vcpu->arch.pfault_token,
4040 (u64 __user *)reg->addr);
4041 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4042 kvm_clear_async_pf_completion_queue(vcpu);
4043 break;
4044 case KVM_REG_S390_PFCOMPARE:
4045 r = get_user(vcpu->arch.pfault_compare,
4046 (u64 __user *)reg->addr);
4047 break;
4048 case KVM_REG_S390_PFSELECT:
4049 r = get_user(vcpu->arch.pfault_select,
4050 (u64 __user *)reg->addr);
4051 break;
4052 case KVM_REG_S390_PP:
4053 r = get_user(vcpu->arch.sie_block->pp,
4054 (u64 __user *)reg->addr);
4055 break;
4056 case KVM_REG_S390_GBEA:
4057 r = get_user(vcpu->arch.sie_block->gbea,
4058 (u64 __user *)reg->addr);
4059 break;
4060 default:
4061 break;
4062 }
4063
4064 return r;
4065}
4066
4067static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4068{
4069 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4070 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4071 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4072
4073 kvm_clear_async_pf_completion_queue(vcpu);
4074 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4075 kvm_s390_vcpu_stop(vcpu);
4076 kvm_s390_clear_local_irqs(vcpu);
4077}
4078
4079static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4080{
4081 /* Initial reset is a superset of the normal reset */
4082 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4083
4084 /*
4085 * This equals initial cpu reset in pop, but we don't switch to ESA.
4086 * We do not only reset the internal data, but also ...
4087 */
4088 vcpu->arch.sie_block->gpsw.mask = 0;
4089 vcpu->arch.sie_block->gpsw.addr = 0;
4090 kvm_s390_set_prefix(vcpu, 0);
4091 kvm_s390_set_cpu_timer(vcpu, 0);
4092 vcpu->arch.sie_block->ckc = 0;
4093 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4094 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4095 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4096
4097 /* ... the data in sync regs */
4098 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4099 vcpu->run->s.regs.ckc = 0;
4100 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4101 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4102 vcpu->run->psw_addr = 0;
4103 vcpu->run->psw_mask = 0;
4104 vcpu->run->s.regs.todpr = 0;
4105 vcpu->run->s.regs.cputm = 0;
4106 vcpu->run->s.regs.ckc = 0;
4107 vcpu->run->s.regs.pp = 0;
4108 vcpu->run->s.regs.gbea = 1;
4109 vcpu->run->s.regs.fpc = 0;
4110 /*
4111 * Do not reset these registers in the protected case, as some of
4112 * them are overlayed and they are not accessible in this case
4113 * anyway.
4114 */
4115 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4116 vcpu->arch.sie_block->gbea = 1;
4117 vcpu->arch.sie_block->pp = 0;
4118 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4119 vcpu->arch.sie_block->todpr = 0;
4120 }
4121}
4122
4123static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4124{
4125 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4126
4127 /* Clear reset is a superset of the initial reset */
4128 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4129
4130 memset(®s->gprs, 0, sizeof(regs->gprs));
4131 memset(®s->vrs, 0, sizeof(regs->vrs));
4132 memset(®s->acrs, 0, sizeof(regs->acrs));
4133 memset(®s->gscb, 0, sizeof(regs->gscb));
4134
4135 regs->etoken = 0;
4136 regs->etoken_extension = 0;
4137}
4138
4139int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4140{
4141 vcpu_load(vcpu);
4142 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
4143 vcpu_put(vcpu);
4144 return 0;
4145}
4146
4147int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4148{
4149 vcpu_load(vcpu);
4150 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4151 vcpu_put(vcpu);
4152 return 0;
4153}
4154
4155int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4156 struct kvm_sregs *sregs)
4157{
4158 vcpu_load(vcpu);
4159
4160 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4161 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4162
4163 vcpu_put(vcpu);
4164 return 0;
4165}
4166
4167int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4168 struct kvm_sregs *sregs)
4169{
4170 vcpu_load(vcpu);
4171
4172 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4173 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4174
4175 vcpu_put(vcpu);
4176 return 0;
4177}
4178
4179int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4180{
4181 int ret = 0;
4182
4183 vcpu_load(vcpu);
4184
4185 if (test_fp_ctl(fpu->fpc)) {
4186 ret = -EINVAL;
4187 goto out;
4188 }
4189 vcpu->run->s.regs.fpc = fpu->fpc;
4190 if (MACHINE_HAS_VX)
4191 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4192 (freg_t *) fpu->fprs);
4193 else
4194 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4195
4196out:
4197 vcpu_put(vcpu);
4198 return ret;
4199}
4200
4201int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4202{
4203 vcpu_load(vcpu);
4204
4205 /* make sure we have the latest values */
4206 save_fpu_regs();
4207 if (MACHINE_HAS_VX)
4208 convert_vx_to_fp((freg_t *) fpu->fprs,
4209 (__vector128 *) vcpu->run->s.regs.vrs);
4210 else
4211 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4212 fpu->fpc = vcpu->run->s.regs.fpc;
4213
4214 vcpu_put(vcpu);
4215 return 0;
4216}
4217
4218static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4219{
4220 int rc = 0;
4221
4222 if (!is_vcpu_stopped(vcpu))
4223 rc = -EBUSY;
4224 else {
4225 vcpu->run->psw_mask = psw.mask;
4226 vcpu->run->psw_addr = psw.addr;
4227 }
4228 return rc;
4229}
4230
4231int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4232 struct kvm_translation *tr)
4233{
4234 return -EINVAL; /* not implemented yet */
4235}
4236
4237#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4238 KVM_GUESTDBG_USE_HW_BP | \
4239 KVM_GUESTDBG_ENABLE)
4240
4241int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4242 struct kvm_guest_debug *dbg)
4243{
4244 int rc = 0;
4245
4246 vcpu_load(vcpu);
4247
4248 vcpu->guest_debug = 0;
4249 kvm_s390_clear_bp_data(vcpu);
4250
4251 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4252 rc = -EINVAL;
4253 goto out;
4254 }
4255 if (!sclp.has_gpere) {
4256 rc = -EINVAL;
4257 goto out;
4258 }
4259
4260 if (dbg->control & KVM_GUESTDBG_ENABLE) {
4261 vcpu->guest_debug = dbg->control;
4262 /* enforce guest PER */
4263 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4264
4265 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4266 rc = kvm_s390_import_bp_data(vcpu, dbg);
4267 } else {
4268 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4269 vcpu->arch.guestdbg.last_bp = 0;
4270 }
4271
4272 if (rc) {
4273 vcpu->guest_debug = 0;
4274 kvm_s390_clear_bp_data(vcpu);
4275 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4276 }
4277
4278out:
4279 vcpu_put(vcpu);
4280 return rc;
4281}
4282
4283int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4284 struct kvm_mp_state *mp_state)
4285{
4286 int ret;
4287
4288 vcpu_load(vcpu);
4289
4290 /* CHECK_STOP and LOAD are not supported yet */
4291 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4292 KVM_MP_STATE_OPERATING;
4293
4294 vcpu_put(vcpu);
4295 return ret;
4296}
4297
4298int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4299 struct kvm_mp_state *mp_state)
4300{
4301 int rc = 0;
4302
4303 vcpu_load(vcpu);
4304
4305 /* user space knows about this interface - let it control the state */
4306 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4307
4308 switch (mp_state->mp_state) {
4309 case KVM_MP_STATE_STOPPED:
4310 rc = kvm_s390_vcpu_stop(vcpu);
4311 break;
4312 case KVM_MP_STATE_OPERATING:
4313 rc = kvm_s390_vcpu_start(vcpu);
4314 break;
4315 case KVM_MP_STATE_LOAD:
4316 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4317 rc = -ENXIO;
4318 break;
4319 }
4320 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4321 break;
4322 case KVM_MP_STATE_CHECK_STOP:
4323 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
4324 default:
4325 rc = -ENXIO;
4326 }
4327
4328 vcpu_put(vcpu);
4329 return rc;
4330}
4331
4332static bool ibs_enabled(struct kvm_vcpu *vcpu)
4333{
4334 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4335}
4336
4337static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4338{
4339retry:
4340 kvm_s390_vcpu_request_handled(vcpu);
4341 if (!kvm_request_pending(vcpu))
4342 return 0;
4343 /*
4344 * If the guest prefix changed, re-arm the ipte notifier for the
4345 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4346 * This ensures that the ipte instruction for this request has
4347 * already finished. We might race against a second unmapper that
4348 * wants to set the blocking bit. Lets just retry the request loop.
4349 */
4350 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4351 int rc;
4352 rc = gmap_mprotect_notify(vcpu->arch.gmap,
4353 kvm_s390_get_prefix(vcpu),
4354 PAGE_SIZE * 2, PROT_WRITE);
4355 if (rc) {
4356 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4357 return rc;
4358 }
4359 goto retry;
4360 }
4361
4362 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4363 vcpu->arch.sie_block->ihcpu = 0xffff;
4364 goto retry;
4365 }
4366
4367 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4368 if (!ibs_enabled(vcpu)) {
4369 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4370 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4371 }
4372 goto retry;
4373 }
4374
4375 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4376 if (ibs_enabled(vcpu)) {
4377 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4378 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4379 }
4380 goto retry;
4381 }
4382
4383 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4384 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4385 goto retry;
4386 }
4387
4388 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4389 /*
4390 * Disable CMM virtualization; we will emulate the ESSA
4391 * instruction manually, in order to provide additional
4392 * functionalities needed for live migration.
4393 */
4394 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4395 goto retry;
4396 }
4397
4398 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4399 /*
4400 * Re-enable CMM virtualization if CMMA is available and
4401 * CMM has been used.
4402 */
4403 if ((vcpu->kvm->arch.use_cmma) &&
4404 (vcpu->kvm->mm->context.uses_cmm))
4405 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4406 goto retry;
4407 }
4408
4409 /* we left the vsie handler, nothing to do, just clear the request */
4410 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4411
4412 return 0;
4413}
4414
4415static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4416{
4417 struct kvm_vcpu *vcpu;
4418 union tod_clock clk;
4419 unsigned long i;
4420
4421 preempt_disable();
4422
4423 store_tod_clock_ext(&clk);
4424
4425 kvm->arch.epoch = gtod->tod - clk.tod;
4426 kvm->arch.epdx = 0;
4427 if (test_kvm_facility(kvm, 139)) {
4428 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4429 if (kvm->arch.epoch > gtod->tod)
4430 kvm->arch.epdx -= 1;
4431 }
4432
4433 kvm_s390_vcpu_block_all(kvm);
4434 kvm_for_each_vcpu(i, vcpu, kvm) {
4435 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4436 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
4437 }
4438
4439 kvm_s390_vcpu_unblock_all(kvm);
4440 preempt_enable();
4441}
4442
4443int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4444{
4445 if (!mutex_trylock(&kvm->lock))
4446 return 0;
4447 __kvm_s390_set_tod_clock(kvm, gtod);
4448 mutex_unlock(&kvm->lock);
4449 return 1;
4450}
4451
4452/**
4453 * kvm_arch_fault_in_page - fault-in guest page if necessary
4454 * @vcpu: The corresponding virtual cpu
4455 * @gpa: Guest physical address
4456 * @writable: Whether the page should be writable or not
4457 *
4458 * Make sure that a guest page has been faulted-in on the host.
4459 *
4460 * Return: Zero on success, negative error code otherwise.
4461 */
4462long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4463{
4464 return gmap_fault(vcpu->arch.gmap, gpa,
4465 writable ? FAULT_FLAG_WRITE : 0);
4466}
4467
4468static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4469 unsigned long token)
4470{
4471 struct kvm_s390_interrupt inti;
4472 struct kvm_s390_irq irq;
4473
4474 if (start_token) {
4475 irq.u.ext.ext_params2 = token;
4476 irq.type = KVM_S390_INT_PFAULT_INIT;
4477 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4478 } else {
4479 inti.type = KVM_S390_INT_PFAULT_DONE;
4480 inti.parm64 = token;
4481 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4482 }
4483}
4484
4485bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4486 struct kvm_async_pf *work)
4487{
4488 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4489 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4490
4491 return true;
4492}
4493
4494void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4495 struct kvm_async_pf *work)
4496{
4497 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4498 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4499}
4500
4501void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4502 struct kvm_async_pf *work)
4503{
4504 /* s390 will always inject the page directly */
4505}
4506
4507bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4508{
4509 /*
4510 * s390 will always inject the page directly,
4511 * but we still want check_async_completion to cleanup
4512 */
4513 return true;
4514}
4515
4516static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4517{
4518 hva_t hva;
4519 struct kvm_arch_async_pf arch;
4520
4521 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4522 return false;
4523 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4524 vcpu->arch.pfault_compare)
4525 return false;
4526 if (psw_extint_disabled(vcpu))
4527 return false;
4528 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4529 return false;
4530 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4531 return false;
4532 if (!vcpu->arch.gmap->pfault_enabled)
4533 return false;
4534
4535 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4536 hva += current->thread.gmap_addr & ~PAGE_MASK;
4537 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4538 return false;
4539
4540 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4541}
4542
4543static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4544{
4545 int rc, cpuflags;
4546
4547 /*
4548 * On s390 notifications for arriving pages will be delivered directly
4549 * to the guest but the house keeping for completed pfaults is
4550 * handled outside the worker.
4551 */
4552 kvm_check_async_pf_completion(vcpu);
4553
4554 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4555 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4556
4557 if (need_resched())
4558 schedule();
4559
4560 if (!kvm_is_ucontrol(vcpu->kvm)) {
4561 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4562 if (rc)
4563 return rc;
4564 }
4565
4566 rc = kvm_s390_handle_requests(vcpu);
4567 if (rc)
4568 return rc;
4569
4570 if (guestdbg_enabled(vcpu)) {
4571 kvm_s390_backup_guest_per_regs(vcpu);
4572 kvm_s390_patch_guest_per_regs(vcpu);
4573 }
4574
4575 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4576
4577 vcpu->arch.sie_block->icptcode = 0;
4578 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4579 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4580 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4581
4582 return 0;
4583}
4584
4585static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4586{
4587 struct kvm_s390_pgm_info pgm_info = {
4588 .code = PGM_ADDRESSING,
4589 };
4590 u8 opcode, ilen;
4591 int rc;
4592
4593 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4594 trace_kvm_s390_sie_fault(vcpu);
4595
4596 /*
4597 * We want to inject an addressing exception, which is defined as a
4598 * suppressing or terminating exception. However, since we came here
4599 * by a DAT access exception, the PSW still points to the faulting
4600 * instruction since DAT exceptions are nullifying. So we've got
4601 * to look up the current opcode to get the length of the instruction
4602 * to be able to forward the PSW.
4603 */
4604 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4605 ilen = insn_length(opcode);
4606 if (rc < 0) {
4607 return rc;
4608 } else if (rc) {
4609 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4610 * Forward by arbitrary ilc, injection will take care of
4611 * nullification if necessary.
4612 */
4613 pgm_info = vcpu->arch.pgm;
4614 ilen = 4;
4615 }
4616 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4617 kvm_s390_forward_psw(vcpu, ilen);
4618 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4619}
4620
4621static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4622{
4623 struct mcck_volatile_info *mcck_info;
4624 struct sie_page *sie_page;
4625
4626 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4627 vcpu->arch.sie_block->icptcode);
4628 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4629
4630 if (guestdbg_enabled(vcpu))
4631 kvm_s390_restore_guest_per_regs(vcpu);
4632
4633 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4634 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4635
4636 if (exit_reason == -EINTR) {
4637 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4638 sie_page = container_of(vcpu->arch.sie_block,
4639 struct sie_page, sie_block);
4640 mcck_info = &sie_page->mcck_info;
4641 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4642 return 0;
4643 }
4644
4645 if (vcpu->arch.sie_block->icptcode > 0) {
4646 int rc = kvm_handle_sie_intercept(vcpu);
4647
4648 if (rc != -EOPNOTSUPP)
4649 return rc;
4650 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4651 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4652 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4653 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4654 return -EREMOTE;
4655 } else if (exit_reason != -EFAULT) {
4656 vcpu->stat.exit_null++;
4657 return 0;
4658 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4659 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4660 vcpu->run->s390_ucontrol.trans_exc_code =
4661 current->thread.gmap_addr;
4662 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4663 return -EREMOTE;
4664 } else if (current->thread.gmap_pfault) {
4665 trace_kvm_s390_major_guest_pfault(vcpu);
4666 current->thread.gmap_pfault = 0;
4667 if (kvm_arch_setup_async_pf(vcpu))
4668 return 0;
4669 vcpu->stat.pfault_sync++;
4670 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4671 }
4672 return vcpu_post_run_fault_in_sie(vcpu);
4673}
4674
4675#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4676static int __vcpu_run(struct kvm_vcpu *vcpu)
4677{
4678 int rc, exit_reason;
4679 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4680
4681 /*
4682 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4683 * ning the guest), so that memslots (and other stuff) are protected
4684 */
4685 kvm_vcpu_srcu_read_lock(vcpu);
4686
4687 do {
4688 rc = vcpu_pre_run(vcpu);
4689 if (rc)
4690 break;
4691
4692 kvm_vcpu_srcu_read_unlock(vcpu);
4693 /*
4694 * As PF_VCPU will be used in fault handler, between
4695 * guest_enter and guest_exit should be no uaccess.
4696 */
4697 local_irq_disable();
4698 guest_enter_irqoff();
4699 __disable_cpu_timer_accounting(vcpu);
4700 local_irq_enable();
4701 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4702 memcpy(sie_page->pv_grregs,
4703 vcpu->run->s.regs.gprs,
4704 sizeof(sie_page->pv_grregs));
4705 }
4706 if (test_cpu_flag(CIF_FPU))
4707 load_fpu_regs();
4708 exit_reason = sie64a(vcpu->arch.sie_block,
4709 vcpu->run->s.regs.gprs);
4710 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4711 memcpy(vcpu->run->s.regs.gprs,
4712 sie_page->pv_grregs,
4713 sizeof(sie_page->pv_grregs));
4714 /*
4715 * We're not allowed to inject interrupts on intercepts
4716 * that leave the guest state in an "in-between" state
4717 * where the next SIE entry will do a continuation.
4718 * Fence interrupts in our "internal" PSW.
4719 */
4720 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4721 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4722 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4723 }
4724 }
4725 local_irq_disable();
4726 __enable_cpu_timer_accounting(vcpu);
4727 guest_exit_irqoff();
4728 local_irq_enable();
4729 kvm_vcpu_srcu_read_lock(vcpu);
4730
4731 rc = vcpu_post_run(vcpu, exit_reason);
4732 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4733
4734 kvm_vcpu_srcu_read_unlock(vcpu);
4735 return rc;
4736}
4737
4738static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4739{
4740 struct kvm_run *kvm_run = vcpu->run;
4741 struct runtime_instr_cb *riccb;
4742 struct gs_cb *gscb;
4743
4744 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4745 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4746 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4747 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4748 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4749 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4750 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4751 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4752 }
4753 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4754 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4755 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4756 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4757 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4758 kvm_clear_async_pf_completion_queue(vcpu);
4759 }
4760 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4761 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4762 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4763 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4764 }
4765 /*
4766 * If userspace sets the riccb (e.g. after migration) to a valid state,
4767 * we should enable RI here instead of doing the lazy enablement.
4768 */
4769 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4770 test_kvm_facility(vcpu->kvm, 64) &&
4771 riccb->v &&
4772 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4773 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4774 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4775 }
4776 /*
4777 * If userspace sets the gscb (e.g. after migration) to non-zero,
4778 * we should enable GS here instead of doing the lazy enablement.
4779 */
4780 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4781 test_kvm_facility(vcpu->kvm, 133) &&
4782 gscb->gssm &&
4783 !vcpu->arch.gs_enabled) {
4784 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4785 vcpu->arch.sie_block->ecb |= ECB_GS;
4786 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4787 vcpu->arch.gs_enabled = 1;
4788 }
4789 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4790 test_kvm_facility(vcpu->kvm, 82)) {
4791 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4792 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4793 }
4794 if (MACHINE_HAS_GS) {
4795 preempt_disable();
4796 __ctl_set_bit(2, 4);
4797 if (current->thread.gs_cb) {
4798 vcpu->arch.host_gscb = current->thread.gs_cb;
4799 save_gs_cb(vcpu->arch.host_gscb);
4800 }
4801 if (vcpu->arch.gs_enabled) {
4802 current->thread.gs_cb = (struct gs_cb *)
4803 &vcpu->run->s.regs.gscb;
4804 restore_gs_cb(current->thread.gs_cb);
4805 }
4806 preempt_enable();
4807 }
4808 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4809}
4810
4811static void sync_regs(struct kvm_vcpu *vcpu)
4812{
4813 struct kvm_run *kvm_run = vcpu->run;
4814
4815 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4816 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4817 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4818 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4819 /* some control register changes require a tlb flush */
4820 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4821 }
4822 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4823 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4824 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4825 }
4826 save_access_regs(vcpu->arch.host_acrs);
4827 restore_access_regs(vcpu->run->s.regs.acrs);
4828 /* save host (userspace) fprs/vrs */
4829 save_fpu_regs();
4830 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4831 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4832 if (MACHINE_HAS_VX)
4833 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4834 else
4835 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4836 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4837 if (test_fp_ctl(current->thread.fpu.fpc))
4838 /* User space provided an invalid FPC, let's clear it */
4839 current->thread.fpu.fpc = 0;
4840
4841 /* Sync fmt2 only data */
4842 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4843 sync_regs_fmt2(vcpu);
4844 } else {
4845 /*
4846 * In several places we have to modify our internal view to
4847 * not do things that are disallowed by the ultravisor. For
4848 * example we must not inject interrupts after specific exits
4849 * (e.g. 112 prefix page not secure). We do this by turning
4850 * off the machine check, external and I/O interrupt bits
4851 * of our PSW copy. To avoid getting validity intercepts, we
4852 * do only accept the condition code from userspace.
4853 */
4854 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4855 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4856 PSW_MASK_CC;
4857 }
4858
4859 kvm_run->kvm_dirty_regs = 0;
4860}
4861
4862static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4863{
4864 struct kvm_run *kvm_run = vcpu->run;
4865
4866 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4867 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4868 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4869 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4870 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4871 if (MACHINE_HAS_GS) {
4872 preempt_disable();
4873 __ctl_set_bit(2, 4);
4874 if (vcpu->arch.gs_enabled)
4875 save_gs_cb(current->thread.gs_cb);
4876 current->thread.gs_cb = vcpu->arch.host_gscb;
4877 restore_gs_cb(vcpu->arch.host_gscb);
4878 if (!vcpu->arch.host_gscb)
4879 __ctl_clear_bit(2, 4);
4880 vcpu->arch.host_gscb = NULL;
4881 preempt_enable();
4882 }
4883 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4884}
4885
4886static void store_regs(struct kvm_vcpu *vcpu)
4887{
4888 struct kvm_run *kvm_run = vcpu->run;
4889
4890 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4891 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4892 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4893 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4894 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4895 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4896 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4897 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4898 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4899 save_access_regs(vcpu->run->s.regs.acrs);
4900 restore_access_regs(vcpu->arch.host_acrs);
4901 /* Save guest register state */
4902 save_fpu_regs();
4903 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4904 /* Restore will be done lazily at return */
4905 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4906 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4907 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4908 store_regs_fmt2(vcpu);
4909}
4910
4911int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4912{
4913 struct kvm_run *kvm_run = vcpu->run;
4914 int rc;
4915
4916 /*
4917 * Running a VM while dumping always has the potential to
4918 * produce inconsistent dump data. But for PV vcpus a SIE
4919 * entry while dumping could also lead to a fatal validity
4920 * intercept which we absolutely want to avoid.
4921 */
4922 if (vcpu->kvm->arch.pv.dumping)
4923 return -EINVAL;
4924
4925 if (kvm_run->immediate_exit)
4926 return -EINTR;
4927
4928 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4929 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4930 return -EINVAL;
4931
4932 vcpu_load(vcpu);
4933
4934 if (guestdbg_exit_pending(vcpu)) {
4935 kvm_s390_prepare_debug_exit(vcpu);
4936 rc = 0;
4937 goto out;
4938 }
4939
4940 kvm_sigset_activate(vcpu);
4941
4942 /*
4943 * no need to check the return value of vcpu_start as it can only have
4944 * an error for protvirt, but protvirt means user cpu state
4945 */
4946 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4947 kvm_s390_vcpu_start(vcpu);
4948 } else if (is_vcpu_stopped(vcpu)) {
4949 pr_err_ratelimited("can't run stopped vcpu %d\n",
4950 vcpu->vcpu_id);
4951 rc = -EINVAL;
4952 goto out;
4953 }
4954
4955 sync_regs(vcpu);
4956 enable_cpu_timer_accounting(vcpu);
4957
4958 might_fault();
4959 rc = __vcpu_run(vcpu);
4960
4961 if (signal_pending(current) && !rc) {
4962 kvm_run->exit_reason = KVM_EXIT_INTR;
4963 rc = -EINTR;
4964 }
4965
4966 if (guestdbg_exit_pending(vcpu) && !rc) {
4967 kvm_s390_prepare_debug_exit(vcpu);
4968 rc = 0;
4969 }
4970
4971 if (rc == -EREMOTE) {
4972 /* userspace support is needed, kvm_run has been prepared */
4973 rc = 0;
4974 }
4975
4976 disable_cpu_timer_accounting(vcpu);
4977 store_regs(vcpu);
4978
4979 kvm_sigset_deactivate(vcpu);
4980
4981 vcpu->stat.exit_userspace++;
4982out:
4983 vcpu_put(vcpu);
4984 return rc;
4985}
4986
4987/*
4988 * store status at address
4989 * we use have two special cases:
4990 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4991 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4992 */
4993int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4994{
4995 unsigned char archmode = 1;
4996 freg_t fprs[NUM_FPRS];
4997 unsigned int px;
4998 u64 clkcomp, cputm;
4999 int rc;
5000
5001 px = kvm_s390_get_prefix(vcpu);
5002 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5003 if (write_guest_abs(vcpu, 163, &archmode, 1))
5004 return -EFAULT;
5005 gpa = 0;
5006 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5007 if (write_guest_real(vcpu, 163, &archmode, 1))
5008 return -EFAULT;
5009 gpa = px;
5010 } else
5011 gpa -= __LC_FPREGS_SAVE_AREA;
5012
5013 /* manually convert vector registers if necessary */
5014 if (MACHINE_HAS_VX) {
5015 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5016 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5017 fprs, 128);
5018 } else {
5019 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5020 vcpu->run->s.regs.fprs, 128);
5021 }
5022 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5023 vcpu->run->s.regs.gprs, 128);
5024 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5025 &vcpu->arch.sie_block->gpsw, 16);
5026 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5027 &px, 4);
5028 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5029 &vcpu->run->s.regs.fpc, 4);
5030 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5031 &vcpu->arch.sie_block->todpr, 4);
5032 cputm = kvm_s390_get_cpu_timer(vcpu);
5033 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5034 &cputm, 8);
5035 clkcomp = vcpu->arch.sie_block->ckc >> 8;
5036 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5037 &clkcomp, 8);
5038 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5039 &vcpu->run->s.regs.acrs, 64);
5040 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5041 &vcpu->arch.sie_block->gcr, 128);
5042 return rc ? -EFAULT : 0;
5043}
5044
5045int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5046{
5047 /*
5048 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5049 * switch in the run ioctl. Let's update our copies before we save
5050 * it into the save area
5051 */
5052 save_fpu_regs();
5053 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5054 save_access_regs(vcpu->run->s.regs.acrs);
5055
5056 return kvm_s390_store_status_unloaded(vcpu, addr);
5057}
5058
5059static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5060{
5061 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5062 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5063}
5064
5065static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5066{
5067 unsigned long i;
5068 struct kvm_vcpu *vcpu;
5069
5070 kvm_for_each_vcpu(i, vcpu, kvm) {
5071 __disable_ibs_on_vcpu(vcpu);
5072 }
5073}
5074
5075static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5076{
5077 if (!sclp.has_ibs)
5078 return;
5079 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5080 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5081}
5082
5083int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5084{
5085 int i, online_vcpus, r = 0, started_vcpus = 0;
5086
5087 if (!is_vcpu_stopped(vcpu))
5088 return 0;
5089
5090 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5091 /* Only one cpu at a time may enter/leave the STOPPED state. */
5092 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5093 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5094
5095 /* Let's tell the UV that we want to change into the operating state */
5096 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5097 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5098 if (r) {
5099 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5100 return r;
5101 }
5102 }
5103
5104 for (i = 0; i < online_vcpus; i++) {
5105 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5106 started_vcpus++;
5107 }
5108
5109 if (started_vcpus == 0) {
5110 /* we're the only active VCPU -> speed it up */
5111 __enable_ibs_on_vcpu(vcpu);
5112 } else if (started_vcpus == 1) {
5113 /*
5114 * As we are starting a second VCPU, we have to disable
5115 * the IBS facility on all VCPUs to remove potentially
5116 * outstanding ENABLE requests.
5117 */
5118 __disable_ibs_on_all_vcpus(vcpu->kvm);
5119 }
5120
5121 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5122 /*
5123 * The real PSW might have changed due to a RESTART interpreted by the
5124 * ultravisor. We block all interrupts and let the next sie exit
5125 * refresh our view.
5126 */
5127 if (kvm_s390_pv_cpu_is_protected(vcpu))
5128 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5129 /*
5130 * Another VCPU might have used IBS while we were offline.
5131 * Let's play safe and flush the VCPU at startup.
5132 */
5133 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5134 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5135 return 0;
5136}
5137
5138int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5139{
5140 int i, online_vcpus, r = 0, started_vcpus = 0;
5141 struct kvm_vcpu *started_vcpu = NULL;
5142
5143 if (is_vcpu_stopped(vcpu))
5144 return 0;
5145
5146 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5147 /* Only one cpu at a time may enter/leave the STOPPED state. */
5148 spin_lock(&vcpu->kvm->arch.start_stop_lock);
5149 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5150
5151 /* Let's tell the UV that we want to change into the stopped state */
5152 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5153 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5154 if (r) {
5155 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5156 return r;
5157 }
5158 }
5159
5160 /*
5161 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5162 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5163 * have been fully processed. This will ensure that the VCPU
5164 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5165 */
5166 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5167 kvm_s390_clear_stop_irq(vcpu);
5168
5169 __disable_ibs_on_vcpu(vcpu);
5170
5171 for (i = 0; i < online_vcpus; i++) {
5172 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5173
5174 if (!is_vcpu_stopped(tmp)) {
5175 started_vcpus++;
5176 started_vcpu = tmp;
5177 }
5178 }
5179
5180 if (started_vcpus == 1) {
5181 /*
5182 * As we only have one VCPU left, we want to enable the
5183 * IBS facility for that VCPU to speed it up.
5184 */
5185 __enable_ibs_on_vcpu(started_vcpu);
5186 }
5187
5188 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5189 return 0;
5190}
5191
5192static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5193 struct kvm_enable_cap *cap)
5194{
5195 int r;
5196
5197 if (cap->flags)
5198 return -EINVAL;
5199
5200 switch (cap->cap) {
5201 case KVM_CAP_S390_CSS_SUPPORT:
5202 if (!vcpu->kvm->arch.css_support) {
5203 vcpu->kvm->arch.css_support = 1;
5204 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5205 trace_kvm_s390_enable_css(vcpu->kvm);
5206 }
5207 r = 0;
5208 break;
5209 default:
5210 r = -EINVAL;
5211 break;
5212 }
5213 return r;
5214}
5215
5216static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5217 struct kvm_s390_mem_op *mop)
5218{
5219 void __user *uaddr = (void __user *)mop->buf;
5220 void *sida_addr;
5221 int r = 0;
5222
5223 if (mop->flags || !mop->size)
5224 return -EINVAL;
5225 if (mop->size + mop->sida_offset < mop->size)
5226 return -EINVAL;
5227 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5228 return -E2BIG;
5229 if (!kvm_s390_pv_cpu_is_protected(vcpu))
5230 return -EINVAL;
5231
5232 sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5233
5234 switch (mop->op) {
5235 case KVM_S390_MEMOP_SIDA_READ:
5236 if (copy_to_user(uaddr, sida_addr, mop->size))
5237 r = -EFAULT;
5238
5239 break;
5240 case KVM_S390_MEMOP_SIDA_WRITE:
5241 if (copy_from_user(sida_addr, uaddr, mop->size))
5242 r = -EFAULT;
5243 break;
5244 }
5245 return r;
5246}
5247
5248static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5249 struct kvm_s390_mem_op *mop)
5250{
5251 void __user *uaddr = (void __user *)mop->buf;
5252 void *tmpbuf = NULL;
5253 int r = 0;
5254 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5255 | KVM_S390_MEMOP_F_CHECK_ONLY
5256 | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5257
5258 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5259 return -EINVAL;
5260 if (mop->size > MEM_OP_MAX_SIZE)
5261 return -E2BIG;
5262 if (kvm_s390_pv_cpu_is_protected(vcpu))
5263 return -EINVAL;
5264 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5265 if (access_key_invalid(mop->key))
5266 return -EINVAL;
5267 } else {
5268 mop->key = 0;
5269 }
5270 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5271 tmpbuf = vmalloc(mop->size);
5272 if (!tmpbuf)
5273 return -ENOMEM;
5274 }
5275
5276 switch (mop->op) {
5277 case KVM_S390_MEMOP_LOGICAL_READ:
5278 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5279 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5280 GACC_FETCH, mop->key);
5281 break;
5282 }
5283 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5284 mop->size, mop->key);
5285 if (r == 0) {
5286 if (copy_to_user(uaddr, tmpbuf, mop->size))
5287 r = -EFAULT;
5288 }
5289 break;
5290 case KVM_S390_MEMOP_LOGICAL_WRITE:
5291 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5292 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5293 GACC_STORE, mop->key);
5294 break;
5295 }
5296 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5297 r = -EFAULT;
5298 break;
5299 }
5300 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5301 mop->size, mop->key);
5302 break;
5303 }
5304
5305 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5306 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5307
5308 vfree(tmpbuf);
5309 return r;
5310}
5311
5312static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5313 struct kvm_s390_mem_op *mop)
5314{
5315 int r, srcu_idx;
5316
5317 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5318
5319 switch (mop->op) {
5320 case KVM_S390_MEMOP_LOGICAL_READ:
5321 case KVM_S390_MEMOP_LOGICAL_WRITE:
5322 r = kvm_s390_vcpu_mem_op(vcpu, mop);
5323 break;
5324 case KVM_S390_MEMOP_SIDA_READ:
5325 case KVM_S390_MEMOP_SIDA_WRITE:
5326 /* we are locked against sida going away by the vcpu->mutex */
5327 r = kvm_s390_vcpu_sida_op(vcpu, mop);
5328 break;
5329 default:
5330 r = -EINVAL;
5331 }
5332
5333 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5334 return r;
5335}
5336
5337long kvm_arch_vcpu_async_ioctl(struct file *filp,
5338 unsigned int ioctl, unsigned long arg)
5339{
5340 struct kvm_vcpu *vcpu = filp->private_data;
5341 void __user *argp = (void __user *)arg;
5342
5343 switch (ioctl) {
5344 case KVM_S390_IRQ: {
5345 struct kvm_s390_irq s390irq;
5346
5347 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5348 return -EFAULT;
5349 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5350 }
5351 case KVM_S390_INTERRUPT: {
5352 struct kvm_s390_interrupt s390int;
5353 struct kvm_s390_irq s390irq = {};
5354
5355 if (copy_from_user(&s390int, argp, sizeof(s390int)))
5356 return -EFAULT;
5357 if (s390int_to_s390irq(&s390int, &s390irq))
5358 return -EINVAL;
5359 return kvm_s390_inject_vcpu(vcpu, &s390irq);
5360 }
5361 }
5362 return -ENOIOCTLCMD;
5363}
5364
5365static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5366 struct kvm_pv_cmd *cmd)
5367{
5368 struct kvm_s390_pv_dmp dmp;
5369 void *data;
5370 int ret;
5371
5372 /* Dump initialization is a prerequisite */
5373 if (!vcpu->kvm->arch.pv.dumping)
5374 return -EINVAL;
5375
5376 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5377 return -EFAULT;
5378
5379 /* We only handle this subcmd right now */
5380 if (dmp.subcmd != KVM_PV_DUMP_CPU)
5381 return -EINVAL;
5382
5383 /* CPU dump length is the same as create cpu storage donation. */
5384 if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5385 return -EINVAL;
5386
5387 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5388 if (!data)
5389 return -ENOMEM;
5390
5391 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5392
5393 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5394 vcpu->vcpu_id, cmd->rc, cmd->rrc);
5395
5396 if (ret)
5397 ret = -EINVAL;
5398
5399 /* On success copy over the dump data */
5400 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5401 ret = -EFAULT;
5402
5403 kvfree(data);
5404 return ret;
5405}
5406
5407long kvm_arch_vcpu_ioctl(struct file *filp,
5408 unsigned int ioctl, unsigned long arg)
5409{
5410 struct kvm_vcpu *vcpu = filp->private_data;
5411 void __user *argp = (void __user *)arg;
5412 int idx;
5413 long r;
5414 u16 rc, rrc;
5415
5416 vcpu_load(vcpu);
5417
5418 switch (ioctl) {
5419 case KVM_S390_STORE_STATUS:
5420 idx = srcu_read_lock(&vcpu->kvm->srcu);
5421 r = kvm_s390_store_status_unloaded(vcpu, arg);
5422 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5423 break;
5424 case KVM_S390_SET_INITIAL_PSW: {
5425 psw_t psw;
5426
5427 r = -EFAULT;
5428 if (copy_from_user(&psw, argp, sizeof(psw)))
5429 break;
5430 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5431 break;
5432 }
5433 case KVM_S390_CLEAR_RESET:
5434 r = 0;
5435 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5436 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5437 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5438 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5439 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5440 rc, rrc);
5441 }
5442 break;
5443 case KVM_S390_INITIAL_RESET:
5444 r = 0;
5445 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5446 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5447 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5448 UVC_CMD_CPU_RESET_INITIAL,
5449 &rc, &rrc);
5450 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5451 rc, rrc);
5452 }
5453 break;
5454 case KVM_S390_NORMAL_RESET:
5455 r = 0;
5456 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5457 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5458 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5459 UVC_CMD_CPU_RESET, &rc, &rrc);
5460 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5461 rc, rrc);
5462 }
5463 break;
5464 case KVM_SET_ONE_REG:
5465 case KVM_GET_ONE_REG: {
5466 struct kvm_one_reg reg;
5467 r = -EINVAL;
5468 if (kvm_s390_pv_cpu_is_protected(vcpu))
5469 break;
5470 r = -EFAULT;
5471 if (copy_from_user(®, argp, sizeof(reg)))
5472 break;
5473 if (ioctl == KVM_SET_ONE_REG)
5474 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
5475 else
5476 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
5477 break;
5478 }
5479#ifdef CONFIG_KVM_S390_UCONTROL
5480 case KVM_S390_UCAS_MAP: {
5481 struct kvm_s390_ucas_mapping ucasmap;
5482
5483 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5484 r = -EFAULT;
5485 break;
5486 }
5487
5488 if (!kvm_is_ucontrol(vcpu->kvm)) {
5489 r = -EINVAL;
5490 break;
5491 }
5492
5493 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5494 ucasmap.vcpu_addr, ucasmap.length);
5495 break;
5496 }
5497 case KVM_S390_UCAS_UNMAP: {
5498 struct kvm_s390_ucas_mapping ucasmap;
5499
5500 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5501 r = -EFAULT;
5502 break;
5503 }
5504
5505 if (!kvm_is_ucontrol(vcpu->kvm)) {
5506 r = -EINVAL;
5507 break;
5508 }
5509
5510 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5511 ucasmap.length);
5512 break;
5513 }
5514#endif
5515 case KVM_S390_VCPU_FAULT: {
5516 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5517 break;
5518 }
5519 case KVM_ENABLE_CAP:
5520 {
5521 struct kvm_enable_cap cap;
5522 r = -EFAULT;
5523 if (copy_from_user(&cap, argp, sizeof(cap)))
5524 break;
5525 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5526 break;
5527 }
5528 case KVM_S390_MEM_OP: {
5529 struct kvm_s390_mem_op mem_op;
5530
5531 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5532 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5533 else
5534 r = -EFAULT;
5535 break;
5536 }
5537 case KVM_S390_SET_IRQ_STATE: {
5538 struct kvm_s390_irq_state irq_state;
5539
5540 r = -EFAULT;
5541 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5542 break;
5543 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5544 irq_state.len == 0 ||
5545 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5546 r = -EINVAL;
5547 break;
5548 }
5549 /* do not use irq_state.flags, it will break old QEMUs */
5550 r = kvm_s390_set_irq_state(vcpu,
5551 (void __user *) irq_state.buf,
5552 irq_state.len);
5553 break;
5554 }
5555 case KVM_S390_GET_IRQ_STATE: {
5556 struct kvm_s390_irq_state irq_state;
5557
5558 r = -EFAULT;
5559 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5560 break;
5561 if (irq_state.len == 0) {
5562 r = -EINVAL;
5563 break;
5564 }
5565 /* do not use irq_state.flags, it will break old QEMUs */
5566 r = kvm_s390_get_irq_state(vcpu,
5567 (__u8 __user *) irq_state.buf,
5568 irq_state.len);
5569 break;
5570 }
5571 case KVM_S390_PV_CPU_COMMAND: {
5572 struct kvm_pv_cmd cmd;
5573
5574 r = -EINVAL;
5575 if (!is_prot_virt_host())
5576 break;
5577
5578 r = -EFAULT;
5579 if (copy_from_user(&cmd, argp, sizeof(cmd)))
5580 break;
5581
5582 r = -EINVAL;
5583 if (cmd.flags)
5584 break;
5585
5586 /* We only handle this cmd right now */
5587 if (cmd.cmd != KVM_PV_DUMP)
5588 break;
5589
5590 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5591
5592 /* Always copy over UV rc / rrc data */
5593 if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5594 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5595 r = -EFAULT;
5596 break;
5597 }
5598 default:
5599 r = -ENOTTY;
5600 }
5601
5602 vcpu_put(vcpu);
5603 return r;
5604}
5605
5606vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5607{
5608#ifdef CONFIG_KVM_S390_UCONTROL
5609 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5610 && (kvm_is_ucontrol(vcpu->kvm))) {
5611 vmf->page = virt_to_page(vcpu->arch.sie_block);
5612 get_page(vmf->page);
5613 return 0;
5614 }
5615#endif
5616 return VM_FAULT_SIGBUS;
5617}
5618
5619bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
5620{
5621 return true;
5622}
5623
5624/* Section: memory related */
5625int kvm_arch_prepare_memory_region(struct kvm *kvm,
5626 const struct kvm_memory_slot *old,
5627 struct kvm_memory_slot *new,
5628 enum kvm_mr_change change)
5629{
5630 gpa_t size;
5631
5632 /* When we are protected, we should not change the memory slots */
5633 if (kvm_s390_pv_get_handle(kvm))
5634 return -EINVAL;
5635
5636 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5637 return 0;
5638
5639 /* A few sanity checks. We can have memory slots which have to be
5640 located/ended at a segment boundary (1MB). The memory in userland is
5641 ok to be fragmented into various different vmas. It is okay to mmap()
5642 and munmap() stuff in this slot after doing this call at any time */
5643
5644 if (new->userspace_addr & 0xffffful)
5645 return -EINVAL;
5646
5647 size = new->npages * PAGE_SIZE;
5648 if (size & 0xffffful)
5649 return -EINVAL;
5650
5651 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5652 return -EINVAL;
5653
5654 return 0;
5655}
5656
5657void kvm_arch_commit_memory_region(struct kvm *kvm,
5658 struct kvm_memory_slot *old,
5659 const struct kvm_memory_slot *new,
5660 enum kvm_mr_change change)
5661{
5662 int rc = 0;
5663
5664 switch (change) {
5665 case KVM_MR_DELETE:
5666 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5667 old->npages * PAGE_SIZE);
5668 break;
5669 case KVM_MR_MOVE:
5670 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5671 old->npages * PAGE_SIZE);
5672 if (rc)
5673 break;
5674 fallthrough;
5675 case KVM_MR_CREATE:
5676 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5677 new->base_gfn * PAGE_SIZE,
5678 new->npages * PAGE_SIZE);
5679 break;
5680 case KVM_MR_FLAGS_ONLY:
5681 break;
5682 default:
5683 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5684 }
5685 if (rc)
5686 pr_warn("failed to commit memory region\n");
5687 return;
5688}
5689
5690static inline unsigned long nonhyp_mask(int i)
5691{
5692 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5693
5694 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5695}
5696
5697static int __init kvm_s390_init(void)
5698{
5699 int i;
5700
5701 if (!sclp.has_sief2) {
5702 pr_info("SIE is not available\n");
5703 return -ENODEV;
5704 }
5705
5706 if (nested && hpage) {
5707 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5708 return -EINVAL;
5709 }
5710
5711 for (i = 0; i < 16; i++)
5712 kvm_s390_fac_base[i] |=
5713 stfle_fac_list[i] & nonhyp_mask(i);
5714
5715 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5716}
5717
5718static void __exit kvm_s390_exit(void)
5719{
5720 kvm_exit();
5721}
5722
5723module_init(kvm_s390_init);
5724module_exit(kvm_s390_exit);
5725
5726/*
5727 * Enable autoloading of the kvm module.
5728 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5729 * since x86 takes a different approach.
5730 */
5731#include <linux/miscdevice.h>
5732MODULE_ALIAS_MISCDEV(KVM_MINOR);
5733MODULE_ALIAS("devname:kvm");