kvm-s390.c - arch/s390/kvm/kvm-s390.c - Linux diff v4.10.11

 
   1/*
   2 * hosting zSeries kernel virtual machines
   3 *
   4 * Copyright IBM Corp. 2008, 2009
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License (version 2 only)
   8 * as published by the Free Software Foundation.
   9 *
  10 *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11 *               Christian Borntraeger <borntraeger@de.ibm.com>
  12 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14 *               Jason J. Herne <jjherne@us.ibm.com>
  15 */
  16
 
 
 
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
 
  26#include <linux/random.h>
  27#include <linux/slab.h>
  28#include <linux/timer.h>
  29#include <linux/vmalloc.h>
  30#include <linux/bitmap.h>
 
 
 
 
 
 
  31#include <asm/asm-offsets.h>
  32#include <asm/lowcore.h>
  33#include <asm/stp.h>
  34#include <asm/pgtable.h>
  35#include <asm/gmap.h>
  36#include <asm/nmi.h>
  37#include <asm/switch_to.h>
  38#include <asm/isc.h>
  39#include <asm/sclp.h>
  40#include <asm/cpacf.h>
  41#include <asm/timex.h>
 
 
 
 
  42#include "kvm-s390.h"
  43#include "gaccess.h"
  44
  45#define KMSG_COMPONENT "kvm-s390"
  46#undef pr_fmt
  47#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49#define CREATE_TRACE_POINTS
  50#include "trace.h"
  51#include "trace-s390.h"
  52
  53#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
  54#define LOCAL_IRQS 32
  55#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56			   (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  59
  60struct kvm_stats_debugfs_item debugfs_entries[] = {
  61	{ "userspace_handled", VCPU_STAT(exit_userspace) },
  62	{ "exit_null", VCPU_STAT(exit_null) },
  63	{ "exit_validity", VCPU_STAT(exit_validity) },
  64	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65	{ "exit_external_request", VCPU_STAT(exit_external_request) },
  66	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67	{ "exit_instruction", VCPU_STAT(exit_instruction) },
  68	{ "exit_pei", VCPU_STAT(exit_pei) },
  69	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  70	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  71	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  72	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  73	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  74	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  75	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
  76	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  77	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
  78	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
  79	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
  80	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  81	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
  82	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  83	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  84	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  85	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  86	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  87	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  88	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
  89	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  90	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
  91	{ "instruction_spx", VCPU_STAT(instruction_spx) },
  92	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
  93	{ "instruction_stap", VCPU_STAT(instruction_stap) },
  94	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  95	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  96	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
  97	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
  98	{ "instruction_essa", VCPU_STAT(instruction_essa) },
  99	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
 100	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 101	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
 102	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 103	{ "instruction_sie", VCPU_STAT(instruction_sie) },
 104	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 105	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 106	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 107	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 108	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 109	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 110	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 111	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 112	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 113	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 114	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 115	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 116	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 117	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 118	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 119	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 120	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 121	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 122	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
 123	{ "diagnose_258", VCPU_STAT(diagnose_258) },
 124	{ "diagnose_308", VCPU_STAT(diagnose_308) },
 125	{ "diagnose_500", VCPU_STAT(diagnose_500) },
 126	{ NULL }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 127};
 128
 129/* allow nested virtualization in KVM (if enabled by user space) */
 130static int nested;
 131module_param(nested, int, S_IRUGO);
 132MODULE_PARM_DESC(nested, "Nested virtualization support");
 133
 134/* upper facilities limit for kvm */
 135unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 136
 137unsigned long kvm_s390_fac_list_mask_size(void)
 138{
 139	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 140	return ARRAY_SIZE(kvm_s390_fac_list_mask);
 
 
 
 
 141}
 142
 143/* available cpu features supported by kvm */
 144static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 145/* available subfunctions indicated via query / "test bit" */
 146static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 147
 148static struct gmap_notifier gmap_notifier;
 149static struct gmap_notifier vsie_gmap_notifier;
 150debug_info_t *kvm_s390_dbf;
 
 151
 152/* Section: not file related */
 153int kvm_arch_hardware_enable(void)
 154{
 155	/* every s390 is virtualization enabled ;-) */
 156	return 0;
 157}
 158
 159static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 160			      unsigned long end);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 161
 162/*
 163 * This callback is executed during stop_machine(). All CPUs are therefore
 164 * temporarily stopped. In order not to change guest behavior, we have to
 165 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 166 * so a CPU won't be stopped while calculating with the epoch.
 167 */
 168static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 169			  void *v)
 170{
 171	struct kvm *kvm;
 172	struct kvm_vcpu *vcpu;
 173	int i;
 174	unsigned long long *delta = v;
 175
 176	list_for_each_entry(kvm, &vm_list, vm_list) {
 177		kvm->arch.epoch -= *delta;
 178		kvm_for_each_vcpu(i, vcpu, kvm) {
 179			vcpu->arch.sie_block->epoch -= *delta;
 
 
 
 
 180			if (vcpu->arch.cputm_enabled)
 181				vcpu->arch.cputm_start += *delta;
 182			if (vcpu->arch.vsie_block)
 183				vcpu->arch.vsie_block->epoch -= *delta;
 
 184		}
 185	}
 186	return NOTIFY_OK;
 187}
 188
 189static struct notifier_block kvm_clock_notifier = {
 190	.notifier_call = kvm_clock_sync,
 191};
 192
 193int kvm_arch_hardware_setup(void)
 194{
 195	gmap_notifier.notifier_call = kvm_gmap_notifier;
 196	gmap_register_pte_notifier(&gmap_notifier);
 197	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 198	gmap_register_pte_notifier(&vsie_gmap_notifier);
 199	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 200				       &kvm_clock_notifier);
 201	return 0;
 202}
 203
 204void kvm_arch_hardware_unsetup(void)
 205{
 206	gmap_unregister_pte_notifier(&gmap_notifier);
 207	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 208	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 209					 &kvm_clock_notifier);
 
 
 
 
 
 
 
 
 210}
 211
 212static void allow_cpu_feat(unsigned long nr)
 213{
 214	set_bit_inv(nr, kvm_s390_available_cpu_feat);
 
 
 
 
 
 215}
 216
 217static inline int plo_test_bit(unsigned char nr)
 218{
 219	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 220	int cc = 3; /* subfunction not available */
 
 
 
 
 
 
 
 221
 
 
 222	asm volatile(
 223		/* Parameter registers are ignored for "test bit" */
 224		"	plo	0,0,0,0(0)\n"
 225		"	ipm	%0\n"
 226		"	srl	%0,28\n"
 227		: "=d" (cc)
 228		: "d" (r0)
 229		: "cc");
 230	return cc == 0;
 231}
 232
 233static void kvm_s390_cpu_feat_init(void)
 234{
 235	int i;
 236
 237	for (i = 0; i < 256; ++i) {
 238		if (plo_test_bit(i))
 239			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 240	}
 241
 242	if (test_facility(28)) /* TOD-clock steering */
 243		ptff(kvm_s390_available_subfunc.ptff,
 244		     sizeof(kvm_s390_available_subfunc.ptff),
 245		     PTFF_QAF);
 246
 247	if (test_facility(17)) { /* MSA */
 248		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 249			      kvm_s390_available_subfunc.kmac);
 250		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 251			      kvm_s390_available_subfunc.kmc);
 252		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
 253			      kvm_s390_available_subfunc.km);
 254		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 255			      kvm_s390_available_subfunc.kimd);
 256		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 257			      kvm_s390_available_subfunc.klmd);
 258	}
 259	if (test_facility(76)) /* MSA3 */
 260		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 261			      kvm_s390_available_subfunc.pckmo);
 262	if (test_facility(77)) { /* MSA4 */
 263		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 264			      kvm_s390_available_subfunc.kmctr);
 265		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 266			      kvm_s390_available_subfunc.kmf);
 267		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 268			      kvm_s390_available_subfunc.kmo);
 269		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 270			      kvm_s390_available_subfunc.pcc);
 271	}
 272	if (test_facility(57)) /* MSA5 */
 273		__cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
 274			      kvm_s390_available_subfunc.ppno);
 275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 276	if (MACHINE_HAS_ESOP)
 277		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 278	/*
 279	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 280	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 281	 */
 282	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 283	    !test_facility(3) || !nested)
 284		return;
 285	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 286	if (sclp.has_64bscao)
 287		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 288	if (sclp.has_siif)
 289		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 290	if (sclp.has_gpere)
 291		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 292	if (sclp.has_gsls)
 293		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 294	if (sclp.has_ib)
 295		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 296	if (sclp.has_cei)
 297		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 298	if (sclp.has_ibs)
 299		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 
 
 300	/*
 301	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 302	 * all skey handling functions read/set the skey from the PGSTE
 303	 * instead of the real storage key.
 304	 *
 305	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 306	 * pages being detected as preserved although they are resident.
 307	 *
 308	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 309	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 310	 *
 311	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 312	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 313	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 314	 *
 315	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 316	 * cannot easily shadow the SCA because of the ipte lock.
 317	 */
 318}
 319
 320int kvm_arch_init(void *opaque)
 321{
 
 
 322	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 323	if (!kvm_s390_dbf)
 324		return -ENOMEM;
 325
 326	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 327		debug_unregister(kvm_s390_dbf);
 328		return -ENOMEM;
 329	}
 
 
 
 330
 331	kvm_s390_cpu_feat_init();
 332
 333	/* Register floating interrupt controller interface. */
 334	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 335}
 336
 337void kvm_arch_exit(void)
 338{
 
 
 
 
 
 
 
 
 339	debug_unregister(kvm_s390_dbf);
 
 340}
 341
 342/* Section: device related */
 343long kvm_arch_dev_ioctl(struct file *filp,
 344			unsigned int ioctl, unsigned long arg)
 345{
 346	if (ioctl == KVM_S390_ENABLE_SIE)
 347		return s390_enable_sie();
 348	return -EINVAL;
 349}
 350
 351int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 352{
 353	int r;
 354
 355	switch (ext) {
 356	case KVM_CAP_S390_PSW:
 357	case KVM_CAP_S390_GMAP:
 358	case KVM_CAP_SYNC_MMU:
 359#ifdef CONFIG_KVM_S390_UCONTROL
 360	case KVM_CAP_S390_UCONTROL:
 361#endif
 362	case KVM_CAP_ASYNC_PF:
 363	case KVM_CAP_SYNC_REGS:
 364	case KVM_CAP_ONE_REG:
 365	case KVM_CAP_ENABLE_CAP:
 366	case KVM_CAP_S390_CSS_SUPPORT:
 367	case KVM_CAP_IOEVENTFD:
 368	case KVM_CAP_DEVICE_CTRL:
 369	case KVM_CAP_ENABLE_CAP_VM:
 370	case KVM_CAP_S390_IRQCHIP:
 371	case KVM_CAP_VM_ATTRIBUTES:
 372	case KVM_CAP_MP_STATE:
 
 373	case KVM_CAP_S390_INJECT_IRQ:
 374	case KVM_CAP_S390_USER_SIGP:
 375	case KVM_CAP_S390_USER_STSI:
 376	case KVM_CAP_S390_SKEYS:
 377	case KVM_CAP_S390_IRQ_STATE:
 378	case KVM_CAP_S390_USER_INSTR0:
 
 
 
 
 
 
 
 379		r = 1;
 380		break;
 
 
 
 
 
 
 
 
 381	case KVM_CAP_S390_MEM_OP:
 382		r = MEM_OP_MAX_SIZE;
 383		break;
 
 
 
 
 
 
 
 
 
 384	case KVM_CAP_NR_VCPUS:
 385	case KVM_CAP_MAX_VCPUS:
 
 386		r = KVM_S390_BSCA_CPU_SLOTS;
 387		if (!kvm_s390_use_sca_entries())
 388			r = KVM_MAX_VCPUS;
 389		else if (sclp.has_esca && sclp.has_64bscao)
 390			r = KVM_S390_ESCA_CPU_SLOTS;
 391		break;
 392	case KVM_CAP_NR_MEMSLOTS:
 393		r = KVM_USER_MEM_SLOTS;
 394		break;
 395	case KVM_CAP_S390_COW:
 396		r = MACHINE_HAS_ESOP;
 397		break;
 398	case KVM_CAP_S390_VECTOR_REGISTERS:
 399		r = MACHINE_HAS_VX;
 400		break;
 401	case KVM_CAP_S390_RI:
 402		r = test_facility(64);
 403		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 404	default:
 405		r = 0;
 406	}
 407	return r;
 408}
 409
 410static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 411					struct kvm_memory_slot *memslot)
 412{
 
 413	gfn_t cur_gfn, last_gfn;
 414	unsigned long address;
 415	struct gmap *gmap = kvm->arch.gmap;
 
 416
 417	/* Loop over all guest pages */
 
 418	last_gfn = memslot->base_gfn + memslot->npages;
 419	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 420		address = gfn_to_hva_memslot(memslot, cur_gfn);
 
 
 
 
 
 
 
 
 
 
 421
 422		if (test_and_clear_guest_dirty(gmap->mm, address))
 423			mark_page_dirty(kvm, cur_gfn);
 424		if (fatal_signal_pending(current))
 425			return;
 426		cond_resched();
 427	}
 428}
 429
 430/* Section: vm related */
 431static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 432
 433/*
 434 * Get (and clear) the dirty memory log for a memory slot.
 435 */
 436int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 437			       struct kvm_dirty_log *log)
 438{
 439	int r;
 440	unsigned long n;
 441	struct kvm_memslots *slots;
 442	struct kvm_memory_slot *memslot;
 443	int is_dirty = 0;
 444
 445	if (kvm_is_ucontrol(kvm))
 446		return -EINVAL;
 447
 448	mutex_lock(&kvm->slots_lock);
 449
 450	r = -EINVAL;
 451	if (log->slot >= KVM_USER_MEM_SLOTS)
 452		goto out;
 453
 454	slots = kvm_memslots(kvm);
 455	memslot = id_to_memslot(slots, log->slot);
 456	r = -ENOENT;
 457	if (!memslot->dirty_bitmap)
 458		goto out;
 459
 460	kvm_s390_sync_dirty_log(kvm, memslot);
 461	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 462	if (r)
 463		goto out;
 464
 465	/* Clear the dirty log */
 466	if (is_dirty) {
 467		n = kvm_dirty_bitmap_bytes(memslot);
 468		memset(memslot->dirty_bitmap, 0, n);
 469	}
 470	r = 0;
 471out:
 472	mutex_unlock(&kvm->slots_lock);
 473	return r;
 474}
 475
 476static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 477{
 478	unsigned int i;
 479	struct kvm_vcpu *vcpu;
 480
 481	kvm_for_each_vcpu(i, vcpu, kvm) {
 482		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 483	}
 484}
 485
 486static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 487{
 488	int r;
 489
 490	if (cap->flags)
 491		return -EINVAL;
 492
 493	switch (cap->cap) {
 494	case KVM_CAP_S390_IRQCHIP:
 495		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 496		kvm->arch.use_irqchip = 1;
 497		r = 0;
 498		break;
 499	case KVM_CAP_S390_USER_SIGP:
 500		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 501		kvm->arch.user_sigp = 1;
 502		r = 0;
 503		break;
 504	case KVM_CAP_S390_VECTOR_REGISTERS:
 505		mutex_lock(&kvm->lock);
 506		if (kvm->created_vcpus) {
 507			r = -EBUSY;
 508		} else if (MACHINE_HAS_VX) {
 509			set_kvm_facility(kvm->arch.model.fac_mask, 129);
 510			set_kvm_facility(kvm->arch.model.fac_list, 129);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 511			r = 0;
 512		} else
 513			r = -EINVAL;
 514		mutex_unlock(&kvm->lock);
 515		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 516			 r ? "(not available)" : "(success)");
 517		break;
 518	case KVM_CAP_S390_RI:
 519		r = -EINVAL;
 520		mutex_lock(&kvm->lock);
 521		if (kvm->created_vcpus) {
 522			r = -EBUSY;
 523		} else if (test_facility(64)) {
 524			set_kvm_facility(kvm->arch.model.fac_mask, 64);
 525			set_kvm_facility(kvm->arch.model.fac_list, 64);
 526			r = 0;
 527		}
 528		mutex_unlock(&kvm->lock);
 529		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 530			 r ? "(not available)" : "(success)");
 531		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 532	case KVM_CAP_S390_USER_STSI:
 533		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 534		kvm->arch.user_stsi = 1;
 535		r = 0;
 536		break;
 537	case KVM_CAP_S390_USER_INSTR0:
 538		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 539		kvm->arch.user_instr0 = 1;
 540		icpt_operexc_on_all_vcpus(kvm);
 541		r = 0;
 542		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 543	default:
 544		r = -EINVAL;
 545		break;
 546	}
 547	return r;
 548}
 549
 550static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 551{
 552	int ret;
 553
 554	switch (attr->attr) {
 555	case KVM_S390_VM_MEM_LIMIT_SIZE:
 556		ret = 0;
 557		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 558			 kvm->arch.mem_limit);
 559		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 560			ret = -EFAULT;
 561		break;
 562	default:
 563		ret = -ENXIO;
 564		break;
 565	}
 566	return ret;
 567}
 568
 569static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 570{
 571	int ret;
 572	unsigned int idx;
 573	switch (attr->attr) {
 574	case KVM_S390_VM_MEM_ENABLE_CMMA:
 575		ret = -ENXIO;
 576		if (!sclp.has_cmma)
 577			break;
 578
 579		ret = -EBUSY;
 580		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 581		mutex_lock(&kvm->lock);
 582		if (!kvm->created_vcpus) {
 
 
 
 
 583			kvm->arch.use_cmma = 1;
 
 
 584			ret = 0;
 585		}
 586		mutex_unlock(&kvm->lock);
 587		break;
 588	case KVM_S390_VM_MEM_CLR_CMMA:
 589		ret = -ENXIO;
 590		if (!sclp.has_cmma)
 591			break;
 592		ret = -EINVAL;
 593		if (!kvm->arch.use_cmma)
 594			break;
 595
 596		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 597		mutex_lock(&kvm->lock);
 598		idx = srcu_read_lock(&kvm->srcu);
 599		s390_reset_cmma(kvm->arch.gmap->mm);
 600		srcu_read_unlock(&kvm->srcu, idx);
 601		mutex_unlock(&kvm->lock);
 602		ret = 0;
 603		break;
 604	case KVM_S390_VM_MEM_LIMIT_SIZE: {
 605		unsigned long new_limit;
 606
 607		if (kvm_is_ucontrol(kvm))
 608			return -EINVAL;
 609
 610		if (get_user(new_limit, (u64 __user *)attr->addr))
 611			return -EFAULT;
 612
 613		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 614		    new_limit > kvm->arch.mem_limit)
 615			return -E2BIG;
 616
 617		if (!new_limit)
 618			return -EINVAL;
 619
 620		/* gmap_create takes last usable address */
 621		if (new_limit != KVM_S390_NO_MEM_LIMIT)
 622			new_limit -= 1;
 623
 624		ret = -EBUSY;
 625		mutex_lock(&kvm->lock);
 626		if (!kvm->created_vcpus) {
 627			/* gmap_create will round the limit up */
 628			struct gmap *new = gmap_create(current->mm, new_limit);
 629
 630			if (!new) {
 631				ret = -ENOMEM;
 632			} else {
 633				gmap_remove(kvm->arch.gmap);
 634				new->private = kvm;
 635				kvm->arch.gmap = new;
 636				ret = 0;
 637			}
 638		}
 639		mutex_unlock(&kvm->lock);
 640		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 641		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 642			 (void *) kvm->arch.gmap->asce);
 643		break;
 644	}
 645	default:
 646		ret = -ENXIO;
 647		break;
 648	}
 649	return ret;
 650}
 651
 652static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 653
 654static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 655{
 656	struct kvm_vcpu *vcpu;
 657	int i;
 658
 659	if (!test_kvm_facility(kvm, 76))
 660		return -EINVAL;
 
 
 
 
 
 
 
 
 661
 
 
 662	mutex_lock(&kvm->lock);
 663	switch (attr->attr) {
 664	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 
 
 
 
 665		get_random_bytes(
 666			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 667			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 668		kvm->arch.crypto.aes_kw = 1;
 669		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 670		break;
 671	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 
 
 
 
 672		get_random_bytes(
 673			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 674			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 675		kvm->arch.crypto.dea_kw = 1;
 676		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 677		break;
 678	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 
 
 
 
 679		kvm->arch.crypto.aes_kw = 0;
 680		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 681			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 682		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 683		break;
 684	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 
 
 
 
 685		kvm->arch.crypto.dea_kw = 0;
 686		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 687			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 688		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 689		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 690	default:
 691		mutex_unlock(&kvm->lock);
 692		return -ENXIO;
 693	}
 694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 695	kvm_for_each_vcpu(i, vcpu, kvm) {
 696		kvm_s390_vcpu_crypto_setup(vcpu);
 697		exit_sie(vcpu);
 698	}
 699	mutex_unlock(&kvm->lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 700	return 0;
 701}
 702
 703static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 704{
 705	u8 gtod_high;
 706
 707	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 708					   sizeof(gtod_high)))
 709		return -EFAULT;
 710
 711	if (gtod_high != 0)
 712		return -EINVAL;
 713	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 714
 715	return 0;
 716}
 717
 718static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 719{
 720	u64 gtod;
 721
 722	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 
 723		return -EFAULT;
 724
 725	kvm_s390_set_tod_clock(kvm, gtod);
 726	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 727	return 0;
 728}
 729
 730static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 731{
 732	int ret;
 733
 734	if (attr->flags)
 735		return -EINVAL;
 736
 
 
 
 
 
 
 
 
 
 
 737	switch (attr->attr) {
 
 
 
 738	case KVM_S390_VM_TOD_HIGH:
 739		ret = kvm_s390_set_tod_high(kvm, attr);
 740		break;
 741	case KVM_S390_VM_TOD_LOW:
 742		ret = kvm_s390_set_tod_low(kvm, attr);
 743		break;
 744	default:
 745		ret = -ENXIO;
 746		break;
 747	}
 
 
 
 748	return ret;
 749}
 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 751static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 752{
 753	u8 gtod_high = 0;
 754
 755	if (copy_to_user((void __user *)attr->addr, &gtod_high,
 756					 sizeof(gtod_high)))
 757		return -EFAULT;
 758	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 759
 760	return 0;
 761}
 762
 763static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 764{
 765	u64 gtod;
 766
 767	gtod = kvm_s390_get_tod_clock_fast(kvm);
 768	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 769		return -EFAULT;
 770	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 771
 772	return 0;
 773}
 774
 775static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 776{
 777	int ret;
 778
 779	if (attr->flags)
 780		return -EINVAL;
 781
 782	switch (attr->attr) {
 
 
 
 783	case KVM_S390_VM_TOD_HIGH:
 784		ret = kvm_s390_get_tod_high(kvm, attr);
 785		break;
 786	case KVM_S390_VM_TOD_LOW:
 787		ret = kvm_s390_get_tod_low(kvm, attr);
 788		break;
 789	default:
 790		ret = -ENXIO;
 791		break;
 792	}
 793	return ret;
 794}
 795
 796static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 797{
 798	struct kvm_s390_vm_cpu_processor *proc;
 799	u16 lowest_ibc, unblocked_ibc;
 800	int ret = 0;
 801
 802	mutex_lock(&kvm->lock);
 803	if (kvm->created_vcpus) {
 804		ret = -EBUSY;
 805		goto out;
 806	}
 807	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 808	if (!proc) {
 809		ret = -ENOMEM;
 810		goto out;
 811	}
 812	if (!copy_from_user(proc, (void __user *)attr->addr,
 813			    sizeof(*proc))) {
 814		kvm->arch.model.cpuid = proc->cpuid;
 815		lowest_ibc = sclp.ibc >> 16 & 0xfff;
 816		unblocked_ibc = sclp.ibc & 0xfff;
 817		if (lowest_ibc && proc->ibc) {
 818			if (proc->ibc > unblocked_ibc)
 819				kvm->arch.model.ibc = unblocked_ibc;
 820			else if (proc->ibc < lowest_ibc)
 821				kvm->arch.model.ibc = lowest_ibc;
 822			else
 823				kvm->arch.model.ibc = proc->ibc;
 824		}
 825		memcpy(kvm->arch.model.fac_list, proc->fac_list,
 826		       S390_ARCH_FAC_LIST_SIZE_BYTE);
 
 
 
 
 
 
 
 827	} else
 828		ret = -EFAULT;
 829	kfree(proc);
 830out:
 831	mutex_unlock(&kvm->lock);
 832	return ret;
 833}
 834
 835static int kvm_s390_set_processor_feat(struct kvm *kvm,
 836				       struct kvm_device_attr *attr)
 837{
 838	struct kvm_s390_vm_cpu_feat data;
 839	int ret = -EBUSY;
 840
 841	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 842		return -EFAULT;
 843	if (!bitmap_subset((unsigned long *) data.feat,
 844			   kvm_s390_available_cpu_feat,
 845			   KVM_S390_VM_CPU_FEAT_NR_BITS))
 846		return -EINVAL;
 847
 848	mutex_lock(&kvm->lock);
 849	if (!atomic_read(&kvm->online_vcpus)) {
 850		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 851			    KVM_S390_VM_CPU_FEAT_NR_BITS);
 852		ret = 0;
 853	}
 
 854	mutex_unlock(&kvm->lock);
 855	return ret;
 
 
 
 
 856}
 857
 858static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 859					  struct kvm_device_attr *attr)
 860{
 861	/*
 862	 * Once supported by kernel + hw, we have to store the subfunctions
 863	 * in kvm->arch and remember that user space configured them.
 864	 */
 865	return -ENXIO;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 866}
 867
 868static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 869{
 870	int ret = -ENXIO;
 871
 872	switch (attr->attr) {
 873	case KVM_S390_VM_CPU_PROCESSOR:
 874		ret = kvm_s390_set_processor(kvm, attr);
 875		break;
 876	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 877		ret = kvm_s390_set_processor_feat(kvm, attr);
 878		break;
 879	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 880		ret = kvm_s390_set_processor_subfunc(kvm, attr);
 881		break;
 
 
 
 882	}
 883	return ret;
 884}
 885
 886static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 887{
 888	struct kvm_s390_vm_cpu_processor *proc;
 889	int ret = 0;
 890
 891	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 892	if (!proc) {
 893		ret = -ENOMEM;
 894		goto out;
 895	}
 896	proc->cpuid = kvm->arch.model.cpuid;
 897	proc->ibc = kvm->arch.model.ibc;
 898	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 899	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 
 
 
 
 
 
 
 900	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 901		ret = -EFAULT;
 902	kfree(proc);
 903out:
 904	return ret;
 905}
 906
 907static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 908{
 909	struct kvm_s390_vm_cpu_machine *mach;
 910	int ret = 0;
 911
 912	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 913	if (!mach) {
 914		ret = -ENOMEM;
 915		goto out;
 916	}
 917	get_cpu_id((struct cpuid *) &mach->cpuid);
 918	mach->ibc = sclp.ibc;
 919	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 920	       S390_ARCH_FAC_LIST_SIZE_BYTE);
 921	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 922	       sizeof(S390_lowcore.stfle_fac_list));
 
 
 
 
 
 
 
 
 
 
 
 923	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 924		ret = -EFAULT;
 925	kfree(mach);
 926out:
 927	return ret;
 928}
 929
 930static int kvm_s390_get_processor_feat(struct kvm *kvm,
 931				       struct kvm_device_attr *attr)
 932{
 933	struct kvm_s390_vm_cpu_feat data;
 934
 935	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 936		    KVM_S390_VM_CPU_FEAT_NR_BITS);
 937	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 938		return -EFAULT;
 
 
 
 
 939	return 0;
 940}
 941
 942static int kvm_s390_get_machine_feat(struct kvm *kvm,
 943				     struct kvm_device_attr *attr)
 944{
 945	struct kvm_s390_vm_cpu_feat data;
 946
 947	bitmap_copy((unsigned long *) data.feat,
 948		    kvm_s390_available_cpu_feat,
 949		    KVM_S390_VM_CPU_FEAT_NR_BITS);
 950	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 951		return -EFAULT;
 
 
 
 
 952	return 0;
 953}
 954
 955static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 956					  struct kvm_device_attr *attr)
 957{
 958	/*
 959	 * Once we can actually configure subfunctions (kernel + hw support),
 960	 * we have to check if they were already set by user space, if so copy
 961	 * them from kvm->arch.
 962	 */
 963	return -ENXIO;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 964}
 965
 966static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 967					struct kvm_device_attr *attr)
 968{
 969	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 970	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
 971		return -EFAULT;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 972	return 0;
 973}
 
 974static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 975{
 976	int ret = -ENXIO;
 977
 978	switch (attr->attr) {
 979	case KVM_S390_VM_CPU_PROCESSOR:
 980		ret = kvm_s390_get_processor(kvm, attr);
 981		break;
 982	case KVM_S390_VM_CPU_MACHINE:
 983		ret = kvm_s390_get_machine(kvm, attr);
 984		break;
 985	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 986		ret = kvm_s390_get_processor_feat(kvm, attr);
 987		break;
 988	case KVM_S390_VM_CPU_MACHINE_FEAT:
 989		ret = kvm_s390_get_machine_feat(kvm, attr);
 990		break;
 991	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 992		ret = kvm_s390_get_processor_subfunc(kvm, attr);
 993		break;
 994	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 995		ret = kvm_s390_get_machine_subfunc(kvm, attr);
 996		break;
 
 
 
 
 
 
 997	}
 998	return ret;
 999}
1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1001static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1002{
1003	int ret;
1004
1005	switch (attr->group) {
1006	case KVM_S390_VM_MEM_CTRL:
1007		ret = kvm_s390_set_mem_control(kvm, attr);
1008		break;
1009	case KVM_S390_VM_TOD:
1010		ret = kvm_s390_set_tod(kvm, attr);
1011		break;
1012	case KVM_S390_VM_CPU_MODEL:
1013		ret = kvm_s390_set_cpu_model(kvm, attr);
1014		break;
1015	case KVM_S390_VM_CRYPTO:
1016		ret = kvm_s390_vm_set_crypto(kvm, attr);
1017		break;
 
 
 
 
 
 
1018	default:
1019		ret = -ENXIO;
1020		break;
1021	}
1022
1023	return ret;
1024}
1025
1026static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1027{
1028	int ret;
1029
1030	switch (attr->group) {
1031	case KVM_S390_VM_MEM_CTRL:
1032		ret = kvm_s390_get_mem_control(kvm, attr);
1033		break;
1034	case KVM_S390_VM_TOD:
1035		ret = kvm_s390_get_tod(kvm, attr);
1036		break;
1037	case KVM_S390_VM_CPU_MODEL:
1038		ret = kvm_s390_get_cpu_model(kvm, attr);
1039		break;
 
 
 
 
 
 
1040	default:
1041		ret = -ENXIO;
1042		break;
1043	}
1044
1045	return ret;
1046}
1047
1048static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1049{
1050	int ret;
1051
1052	switch (attr->group) {
1053	case KVM_S390_VM_MEM_CTRL:
1054		switch (attr->attr) {
1055		case KVM_S390_VM_MEM_ENABLE_CMMA:
1056		case KVM_S390_VM_MEM_CLR_CMMA:
1057			ret = sclp.has_cmma ? 0 : -ENXIO;
1058			break;
1059		case KVM_S390_VM_MEM_LIMIT_SIZE:
1060			ret = 0;
1061			break;
1062		default:
1063			ret = -ENXIO;
1064			break;
1065		}
1066		break;
1067	case KVM_S390_VM_TOD:
1068		switch (attr->attr) {
1069		case KVM_S390_VM_TOD_LOW:
1070		case KVM_S390_VM_TOD_HIGH:
1071			ret = 0;
1072			break;
1073		default:
1074			ret = -ENXIO;
1075			break;
1076		}
1077		break;
1078	case KVM_S390_VM_CPU_MODEL:
1079		switch (attr->attr) {
1080		case KVM_S390_VM_CPU_PROCESSOR:
1081		case KVM_S390_VM_CPU_MACHINE:
1082		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1083		case KVM_S390_VM_CPU_MACHINE_FEAT:
1084		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 
 
 
1085			ret = 0;
1086			break;
1087		/* configuring subfunctions is not supported yet */
1088		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1089		default:
1090			ret = -ENXIO;
1091			break;
1092		}
1093		break;
1094	case KVM_S390_VM_CRYPTO:
1095		switch (attr->attr) {
1096		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1097		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1098		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1099		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1100			ret = 0;
1101			break;
 
 
 
 
1102		default:
1103			ret = -ENXIO;
1104			break;
1105		}
1106		break;
 
 
 
 
 
 
1107	default:
1108		ret = -ENXIO;
1109		break;
1110	}
1111
1112	return ret;
1113}
1114
1115static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1116{
1117	uint8_t *keys;
1118	uint64_t hva;
1119	int i, r = 0;
1120
1121	if (args->flags != 0)
1122		return -EINVAL;
1123
1124	/* Is this guest using storage keys? */
1125	if (!mm_use_skey(current->mm))
1126		return KVM_S390_GET_SKEYS_NONE;
1127
1128	/* Enforce sane limit on memory allocation */
1129	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1130		return -EINVAL;
1131
1132	keys = kmalloc_array(args->count, sizeof(uint8_t),
1133			     GFP_KERNEL | __GFP_NOWARN);
1134	if (!keys)
1135		keys = vmalloc(sizeof(uint8_t) * args->count);
1136	if (!keys)
1137		return -ENOMEM;
1138
1139	down_read(&current->mm->mmap_sem);
 
1140	for (i = 0; i < args->count; i++) {
1141		hva = gfn_to_hva(kvm, args->start_gfn + i);
1142		if (kvm_is_error_hva(hva)) {
1143			r = -EFAULT;
1144			break;
1145		}
1146
1147		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1148		if (r)
1149			break;
1150	}
1151	up_read(&current->mm->mmap_sem);
 
1152
1153	if (!r) {
1154		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1155				 sizeof(uint8_t) * args->count);
1156		if (r)
1157			r = -EFAULT;
1158	}
1159
1160	kvfree(keys);
1161	return r;
1162}
1163
1164static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1165{
1166	uint8_t *keys;
1167	uint64_t hva;
1168	int i, r = 0;
 
1169
1170	if (args->flags != 0)
1171		return -EINVAL;
1172
1173	/* Enforce sane limit on memory allocation */
1174	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1175		return -EINVAL;
1176
1177	keys = kmalloc_array(args->count, sizeof(uint8_t),
1178			     GFP_KERNEL | __GFP_NOWARN);
1179	if (!keys)
1180		keys = vmalloc(sizeof(uint8_t) * args->count);
1181	if (!keys)
1182		return -ENOMEM;
1183
1184	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1185			   sizeof(uint8_t) * args->count);
1186	if (r) {
1187		r = -EFAULT;
1188		goto out;
1189	}
1190
1191	/* Enable storage key handling for the guest */
1192	r = s390_enable_skey();
1193	if (r)
1194		goto out;
1195
1196	down_read(&current->mm->mmap_sem);
1197	for (i = 0; i < args->count; i++) {
 
 
 
1198		hva = gfn_to_hva(kvm, args->start_gfn + i);
1199		if (kvm_is_error_hva(hva)) {
1200			r = -EFAULT;
1201			break;
1202		}
1203
1204		/* Lowest order bit is reserved */
1205		if (keys[i] & 0x01) {
1206			r = -EINVAL;
1207			break;
1208		}
1209
1210		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1211		if (r)
1212			break;
 
 
 
 
 
 
1213	}
1214	up_read(&current->mm->mmap_sem);
 
1215out:
1216	kvfree(keys);
1217	return r;
1218}
1219
1220long kvm_arch_vm_ioctl(struct file *filp,
1221		       unsigned int ioctl, unsigned long arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1222{
1223	struct kvm *kvm = filp->private_data;
1224	void __user *argp = (void __user *)arg;
1225	struct kvm_device_attr attr;
1226	int r;
1227
1228	switch (ioctl) {
1229	case KVM_S390_INTERRUPT: {
1230		struct kvm_s390_interrupt s390int;
1231
1232		r = -EFAULT;
1233		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1234			break;
1235		r = kvm_s390_inject_vm(kvm, &s390int);
1236		break;
1237	}
1238	case KVM_ENABLE_CAP: {
1239		struct kvm_enable_cap cap;
1240		r = -EFAULT;
1241		if (copy_from_user(&cap, argp, sizeof(cap)))
1242			break;
1243		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1244		break;
1245	}
1246	case KVM_CREATE_IRQCHIP: {
1247		struct kvm_irq_routing_entry routing;
1248
1249		r = -EINVAL;
1250		if (kvm->arch.use_irqchip) {
1251			/* Set up dummy routing. */
1252			memset(&routing, 0, sizeof(routing));
1253			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1254		}
1255		break;
1256	}
1257	case KVM_SET_DEVICE_ATTR: {
1258		r = -EFAULT;
1259		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1260			break;
1261		r = kvm_s390_vm_set_attr(kvm, &attr);
1262		break;
1263	}
1264	case KVM_GET_DEVICE_ATTR: {
1265		r = -EFAULT;
1266		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1267			break;
1268		r = kvm_s390_vm_get_attr(kvm, &attr);
1269		break;
1270	}
1271	case KVM_HAS_DEVICE_ATTR: {
1272		r = -EFAULT;
1273		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1274			break;
1275		r = kvm_s390_vm_has_attr(kvm, &attr);
1276		break;
1277	}
1278	case KVM_S390_GET_SKEYS: {
1279		struct kvm_s390_skeys args;
1280
1281		r = -EFAULT;
1282		if (copy_from_user(&args, argp,
1283				   sizeof(struct kvm_s390_skeys)))
1284			break;
1285		r = kvm_s390_get_skeys(kvm, &args);
1286		break;
1287	}
1288	case KVM_S390_SET_SKEYS: {
1289		struct kvm_s390_skeys args;
1290
1291		r = -EFAULT;
1292		if (copy_from_user(&args, argp,
1293				   sizeof(struct kvm_s390_skeys)))
1294			break;
1295		r = kvm_s390_set_skeys(kvm, &args);
1296		break;
1297	}
1298	default:
1299		r = -ENOTTY;
 
 
 
 
 
 
 
 
 
 
 
 
 
1300	}
 
 
1301
1302	return r;
1303}
 
 
 
 
 
 
 
 
1304
1305static int kvm_s390_query_ap_config(u8 *config)
1306{
1307	u32 fcn_code = 0x04000000UL;
1308	u32 cc = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1309
1310	memset(config, 0, 128);
1311	asm volatile(
1312		"lgr 0,%1\n"
1313		"lgr 2,%2\n"
1314		".long 0xb2af0000\n"		/* PQAP(QCI) */
1315		"0: ipm %0\n"
1316		"srl %0,28\n"
1317		"1:\n"
1318		EX_TABLE(0b, 1b)
1319		: "+r" (cc)
1320		: "r" (fcn_code), "r" (config)
1321		: "cc", "0", "2", "memory"
1322	);
1323
1324	return cc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1325}
1326
1327static int kvm_s390_apxa_installed(void)
1328{
1329	u8 config[128];
1330	int cc;
1331
1332	if (test_facility(12)) {
1333		cc = kvm_s390_query_ap_config(config);
1334
1335		if (cc)
1336			pr_err("PQAP(QCI) failed with cc=%d", cc);
1337		else
1338			return config[0] & 0x40;
1339	}
1340
1341	return 0;
1342}
1343
 
 
 
 
 
 
 
 
1344static void kvm_s390_set_crycb_format(struct kvm *kvm)
1345{
1346	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
 
 
 
 
 
 
 
1347
1348	if (kvm_s390_apxa_installed())
1349		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1350	else
1351		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1352}
1353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1354static u64 kvm_s390_get_initial_cpuid(void)
1355{
1356	struct cpuid cpuid;
1357
1358	get_cpu_id(&cpuid);
1359	cpuid.version = 0xff;
1360	return *((u64 *) &cpuid);
1361}
1362
1363static void kvm_s390_crypto_init(struct kvm *kvm)
1364{
1365	if (!test_kvm_facility(kvm, 76))
1366		return;
1367
1368	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1369	kvm_s390_set_crycb_format(kvm);
 
 
 
 
1370
1371	/* Enable AES/DEA protected key functions by default */
1372	kvm->arch.crypto.aes_kw = 1;
1373	kvm->arch.crypto.dea_kw = 1;
1374	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1375			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1376	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1377			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1378}
1379
1380static void sca_dispose(struct kvm *kvm)
1381{
1382	if (kvm->arch.use_esca)
1383		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1384	else
1385		free_page((unsigned long)(kvm->arch.sca));
1386	kvm->arch.sca = NULL;
1387}
1388
 
 
 
 
 
 
 
 
1389int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1390{
1391	gfp_t alloc_flags = GFP_KERNEL;
1392	int i, rc;
1393	char debug_name[16];
1394	static unsigned long sca_offset;
1395
1396	rc = -EINVAL;
1397#ifdef CONFIG_KVM_S390_UCONTROL
1398	if (type & ~KVM_VM_S390_UCONTROL)
1399		goto out_err;
1400	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1401		goto out_err;
1402#else
1403	if (type)
1404		goto out_err;
1405#endif
1406
1407	rc = s390_enable_sie();
1408	if (rc)
1409		goto out_err;
1410
1411	rc = -ENOMEM;
1412
1413	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1414
1415	kvm->arch.use_esca = 0; /* start with basic SCA */
1416	if (!sclp.has_64bscao)
1417		alloc_flags |= GFP_DMA;
1418	rwlock_init(&kvm->arch.sca_lock);
 
1419	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1420	if (!kvm->arch.sca)
1421		goto out_err;
1422	spin_lock(&kvm_lock);
1423	sca_offset += 16;
1424	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1425		sca_offset = 0;
1426	kvm->arch.sca = (struct bsca_block *)
1427			((char *) kvm->arch.sca + sca_offset);
1428	spin_unlock(&kvm_lock);
1429
1430	sprintf(debug_name, "kvm-%u", current->pid);
1431
1432	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1433	if (!kvm->arch.dbf)
1434		goto out_err;
1435
 
1436	kvm->arch.sie_page2 =
1437	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1438	if (!kvm->arch.sie_page2)
1439		goto out_err;
1440
1441	/* Populate the facility mask initially. */
1442	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1443	       sizeof(S390_lowcore.stfle_fac_list));
1444	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1445		if (i < kvm_s390_fac_list_mask_size())
1446			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1447		else
1448			kvm->arch.model.fac_mask[i] = 0UL;
1449	}
1450
1451	/* Populate the facility list initially. */
1452	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1453	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1454	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1455
 
 
 
 
 
 
 
 
 
 
 
 
 
1456	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1457	set_kvm_facility(kvm->arch.model.fac_list, 74);
 
 
 
 
 
 
 
1458
1459	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1460	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1461
 
 
1462	kvm_s390_crypto_init(kvm);
1463
 
 
 
 
 
 
 
 
1464	spin_lock_init(&kvm->arch.float_int.lock);
1465	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1466		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1467	init_waitqueue_head(&kvm->arch.ipte_wq);
1468	mutex_init(&kvm->arch.ipte_mutex);
1469
1470	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1471	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1472
1473	if (type & KVM_VM_S390_UCONTROL) {
1474		kvm->arch.gmap = NULL;
1475		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1476	} else {
1477		if (sclp.hamax == U64_MAX)
1478			kvm->arch.mem_limit = TASK_MAX_SIZE;
1479		else
1480			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1481						    sclp.hamax + 1);
1482		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1483		if (!kvm->arch.gmap)
1484			goto out_err;
1485		kvm->arch.gmap->private = kvm;
1486		kvm->arch.gmap->pfault_enabled = 0;
1487	}
1488
1489	kvm->arch.css_support = 0;
1490	kvm->arch.use_irqchip = 0;
1491	kvm->arch.epoch = 0;
1492
1493	spin_lock_init(&kvm->arch.start_stop_lock);
1494	kvm_s390_vsie_init(kvm);
 
 
 
 
1495	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1496
1497	return 0;
1498out_err:
1499	free_page((unsigned long)kvm->arch.sie_page2);
1500	debug_unregister(kvm->arch.dbf);
1501	sca_dispose(kvm);
1502	KVM_EVENT(3, "creation of vm failed: %d", rc);
1503	return rc;
1504}
1505
1506bool kvm_arch_has_vcpu_debugfs(void)
1507{
1508	return false;
1509}
1510
1511int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1512{
1513	return 0;
1514}
1515
1516void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1517{
 
 
1518	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1519	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1520	kvm_s390_clear_local_irqs(vcpu);
1521	kvm_clear_async_pf_completion_queue(vcpu);
1522	if (!kvm_is_ucontrol(vcpu->kvm))
1523		sca_del_vcpu(vcpu);
 
1524
1525	if (kvm_is_ucontrol(vcpu->kvm))
1526		gmap_remove(vcpu->arch.gmap);
1527
1528	if (vcpu->kvm->arch.use_cmma)
1529		kvm_s390_vcpu_unsetup_cmma(vcpu);
 
 
 
1530	free_page((unsigned long)(vcpu->arch.sie_block));
1531
1532	kvm_vcpu_uninit(vcpu);
1533	kmem_cache_free(kvm_vcpu_cache, vcpu);
1534}
1535
1536static void kvm_free_vcpus(struct kvm *kvm)
1537{
1538	unsigned int i;
1539	struct kvm_vcpu *vcpu;
1540
1541	kvm_for_each_vcpu(i, vcpu, kvm)
1542		kvm_arch_vcpu_destroy(vcpu);
1543
1544	mutex_lock(&kvm->lock);
1545	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1546		kvm->vcpus[i] = NULL;
1547
1548	atomic_set(&kvm->online_vcpus, 0);
1549	mutex_unlock(&kvm->lock);
1550}
1551
1552void kvm_arch_destroy_vm(struct kvm *kvm)
1553{
1554	kvm_free_vcpus(kvm);
 
 
1555	sca_dispose(kvm);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1556	debug_unregister(kvm->arch.dbf);
1557	free_page((unsigned long)kvm->arch.sie_page2);
1558	if (!kvm_is_ucontrol(kvm))
1559		gmap_remove(kvm->arch.gmap);
1560	kvm_s390_destroy_adapters(kvm);
1561	kvm_s390_clear_float_irqs(kvm);
1562	kvm_s390_vsie_destroy(kvm);
1563	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1564}
1565
1566/* Section: vcpu related */
1567static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1568{
1569	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1570	if (!vcpu->arch.gmap)
1571		return -ENOMEM;
1572	vcpu->arch.gmap->private = vcpu->kvm;
1573
1574	return 0;
1575}
1576
1577static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1578{
1579	if (!kvm_s390_use_sca_entries())
1580		return;
1581	read_lock(&vcpu->kvm->arch.sca_lock);
1582	if (vcpu->kvm->arch.use_esca) {
1583		struct esca_block *sca = vcpu->kvm->arch.sca;
1584
1585		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1586		sca->cpu[vcpu->vcpu_id].sda = 0;
1587	} else {
1588		struct bsca_block *sca = vcpu->kvm->arch.sca;
1589
1590		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1591		sca->cpu[vcpu->vcpu_id].sda = 0;
1592	}
1593	read_unlock(&vcpu->kvm->arch.sca_lock);
1594}
1595
1596static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1597{
1598	if (!kvm_s390_use_sca_entries()) {
1599		struct bsca_block *sca = vcpu->kvm->arch.sca;
1600
1601		/* we still need the basic sca for the ipte control */
1602		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1603		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
 
1604	}
1605	read_lock(&vcpu->kvm->arch.sca_lock);
1606	if (vcpu->kvm->arch.use_esca) {
1607		struct esca_block *sca = vcpu->kvm->arch.sca;
 
1608
1609		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1610		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1611		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1612		vcpu->arch.sie_block->ecb2 |= 0x04U;
1613		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1614	} else {
1615		struct bsca_block *sca = vcpu->kvm->arch.sca;
 
1616
1617		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1618		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1619		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1620		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1621	}
1622	read_unlock(&vcpu->kvm->arch.sca_lock);
1623}
1624
1625/* Basic SCA to Extended SCA data copy routines */
1626static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1627{
1628	d->sda = s->sda;
1629	d->sigp_ctrl.c = s->sigp_ctrl.c;
1630	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1631}
1632
1633static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1634{
1635	int i;
1636
1637	d->ipte_control = s->ipte_control;
1638	d->mcn[0] = s->mcn;
1639	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1640		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1641}
1642
1643static int sca_switch_to_extended(struct kvm *kvm)
1644{
1645	struct bsca_block *old_sca = kvm->arch.sca;
1646	struct esca_block *new_sca;
1647	struct kvm_vcpu *vcpu;
1648	unsigned int vcpu_idx;
1649	u32 scaol, scaoh;
 
 
 
 
1650
1651	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1652	if (!new_sca)
1653		return -ENOMEM;
1654
1655	scaoh = (u32)((u64)(new_sca) >> 32);
1656	scaol = (u32)(u64)(new_sca) & ~0x3fU;
 
1657
1658	kvm_s390_vcpu_block_all(kvm);
1659	write_lock(&kvm->arch.sca_lock);
1660
1661	sca_copy_b_to_e(new_sca, old_sca);
1662
1663	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1664		vcpu->arch.sie_block->scaoh = scaoh;
1665		vcpu->arch.sie_block->scaol = scaol;
1666		vcpu->arch.sie_block->ecb2 |= 0x04U;
1667	}
1668	kvm->arch.sca = new_sca;
1669	kvm->arch.use_esca = 1;
1670
1671	write_unlock(&kvm->arch.sca_lock);
1672	kvm_s390_vcpu_unblock_all(kvm);
1673
1674	free_page((unsigned long)old_sca);
1675
1676	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1677		 old_sca, kvm->arch.sca);
1678	return 0;
1679}
1680
1681static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1682{
1683	int rc;
1684
1685	if (!kvm_s390_use_sca_entries()) {
1686		if (id < KVM_MAX_VCPUS)
1687			return true;
1688		return false;
1689	}
1690	if (id < KVM_S390_BSCA_CPU_SLOTS)
1691		return true;
1692	if (!sclp.has_esca || !sclp.has_64bscao)
1693		return false;
1694
1695	mutex_lock(&kvm->lock);
1696	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1697	mutex_unlock(&kvm->lock);
1698
1699	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1700}
1701
1702int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1703{
1704	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1705	kvm_clear_async_pf_completion_queue(vcpu);
1706	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1707				    KVM_SYNC_GPRS |
1708				    KVM_SYNC_ACRS |
1709				    KVM_SYNC_CRS |
1710				    KVM_SYNC_ARCH0 |
1711				    KVM_SYNC_PFAULT;
1712	kvm_s390_set_prefix(vcpu, 0);
1713	if (test_kvm_facility(vcpu->kvm, 64))
1714		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1715	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1716	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1717	 */
1718	if (MACHINE_HAS_VX)
1719		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1720	else
1721		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1722
1723	if (kvm_is_ucontrol(vcpu->kvm))
1724		return __kvm_ucontrol_vcpu_init(vcpu);
1725
1726	return 0;
1727}
1728
1729/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1730static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1731{
1732	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1733	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1734	vcpu->arch.cputm_start = get_tod_clock_fast();
1735	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1736}
1737
1738/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1739static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1740{
1741	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1742	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1743	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1744	vcpu->arch.cputm_start = 0;
1745	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1746}
1747
1748/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1749static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1750{
1751	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1752	vcpu->arch.cputm_enabled = true;
1753	__start_cpu_timer_accounting(vcpu);
1754}
1755
1756/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1757static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1758{
1759	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1760	__stop_cpu_timer_accounting(vcpu);
1761	vcpu->arch.cputm_enabled = false;
1762}
1763
1764static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1765{
1766	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1767	__enable_cpu_timer_accounting(vcpu);
1768	preempt_enable();
1769}
1770
1771static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1772{
1773	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1774	__disable_cpu_timer_accounting(vcpu);
1775	preempt_enable();
1776}
1777
1778/* set the cpu timer - may only be called from the VCPU thread itself */
1779void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1780{
1781	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1782	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1783	if (vcpu->arch.cputm_enabled)
1784		vcpu->arch.cputm_start = get_tod_clock_fast();
1785	vcpu->arch.sie_block->cputm = cputm;
1786	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1787	preempt_enable();
1788}
1789
1790/* update and get the cpu timer - can also be called from other VCPU threads */
1791__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1792{
1793	unsigned int seq;
1794	__u64 value;
1795
1796	if (unlikely(!vcpu->arch.cputm_enabled))
1797		return vcpu->arch.sie_block->cputm;
1798
1799	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1800	do {
1801		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1802		/*
1803		 * If the writer would ever execute a read in the critical
1804		 * section, e.g. in irq context, we have a deadlock.
1805		 */
1806		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1807		value = vcpu->arch.sie_block->cputm;
1808		/* if cputm_start is 0, accounting is being started/stopped */
1809		if (likely(vcpu->arch.cputm_start))
1810			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1811	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1812	preempt_enable();
1813	return value;
1814}
1815
1816void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1817{
1818
1819	gmap_enable(vcpu->arch.enabled_gmap);
1820	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1821	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1822		__start_cpu_timer_accounting(vcpu);
1823	vcpu->cpu = cpu;
1824}
1825
1826void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1827{
1828	vcpu->cpu = -1;
1829	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1830		__stop_cpu_timer_accounting(vcpu);
1831	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1832	vcpu->arch.enabled_gmap = gmap_get_enabled();
1833	gmap_disable(vcpu->arch.enabled_gmap);
1834
1835}
1836
1837static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1838{
1839	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1840	vcpu->arch.sie_block->gpsw.mask = 0UL;
1841	vcpu->arch.sie_block->gpsw.addr = 0UL;
1842	kvm_s390_set_prefix(vcpu, 0);
1843	kvm_s390_set_cpu_timer(vcpu, 0);
1844	vcpu->arch.sie_block->ckc       = 0UL;
1845	vcpu->arch.sie_block->todpr     = 0;
1846	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1847	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1848	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1849	/* make sure the new fpc will be lazily loaded */
1850	save_fpu_regs();
1851	current->thread.fpu.fpc = 0;
1852	vcpu->arch.sie_block->gbea = 1;
1853	vcpu->arch.sie_block->pp = 0;
1854	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1855	kvm_clear_async_pf_completion_queue(vcpu);
1856	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1857		kvm_s390_vcpu_stop(vcpu);
1858	kvm_s390_clear_local_irqs(vcpu);
1859}
1860
1861void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1862{
1863	mutex_lock(&vcpu->kvm->lock);
1864	preempt_disable();
1865	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
 
1866	preempt_enable();
1867	mutex_unlock(&vcpu->kvm->lock);
1868	if (!kvm_is_ucontrol(vcpu->kvm)) {
1869		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1870		sca_add_vcpu(vcpu);
1871	}
1872	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1873		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1874	/* make vcpu_load load the right gmap on the first trigger */
1875	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1876}
1877
1878static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1879{
1880	if (!test_kvm_facility(vcpu->kvm, 76))
 
 
 
 
1881		return;
1882
 
1883	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
 
 
 
 
 
1884
1885	if (vcpu->kvm->arch.crypto.aes_kw)
 
1886		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
 
 
 
 
 
 
 
1887	if (vcpu->kvm->arch.crypto.dea_kw)
1888		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1889
1890	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1891}
1892
1893void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1894{
1895	free_page(vcpu->arch.sie_block->cbrlo);
1896	vcpu->arch.sie_block->cbrlo = 0;
1897}
1898
1899int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1900{
1901	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1902	if (!vcpu->arch.sie_block->cbrlo)
 
1903		return -ENOMEM;
1904
1905	vcpu->arch.sie_block->ecb2 |= 0x80;
1906	vcpu->arch.sie_block->ecb2 &= ~0x08;
1907	return 0;
1908}
1909
1910static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1911{
1912	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1913
1914	vcpu->arch.sie_block->ibc = model->ibc;
1915	if (test_kvm_facility(vcpu->kvm, 7))
1916		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1917}
1918
1919int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1920{
1921	int rc = 0;
 
1922
1923	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1924						    CPUSTAT_SM |
1925						    CPUSTAT_STOPPED);
1926
1927	if (test_kvm_facility(vcpu->kvm, 78))
1928		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1929	else if (test_kvm_facility(vcpu->kvm, 8))
1930		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1931
1932	kvm_s390_vcpu_setup_model(vcpu);
1933
1934	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1935	if (MACHINE_HAS_ESOP)
1936		vcpu->arch.sie_block->ecb |= 0x02;
1937	if (test_kvm_facility(vcpu->kvm, 9))
1938		vcpu->arch.sie_block->ecb |= 0x04;
 
 
1939	if (test_kvm_facility(vcpu->kvm, 73))
1940		vcpu->arch.sie_block->ecb |= 0x10;
 
 
1941
1942	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1943		vcpu->arch.sie_block->ecb2 |= 0x08;
1944	vcpu->arch.sie_block->eca = 0x1002000U;
 
 
1945	if (sclp.has_cei)
1946		vcpu->arch.sie_block->eca |= 0x80000000U;
1947	if (sclp.has_ib)
1948		vcpu->arch.sie_block->eca |= 0x40000000U;
1949	if (sclp.has_siif)
1950		vcpu->arch.sie_block->eca |= 1;
1951	if (sclp.has_sigpif)
1952		vcpu->arch.sie_block->eca |= 0x10000000U;
1953	if (test_kvm_facility(vcpu->kvm, 129)) {
1954		vcpu->arch.sie_block->eca |= 0x00020000;
1955		vcpu->arch.sie_block->ecd |= 0x20000000;
 
 
 
 
 
 
 
 
 
1956	}
1957	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1958	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
 
 
 
 
1959
1960	if (vcpu->kvm->arch.use_cmma) {
1961		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1962		if (rc)
1963			return rc;
1964	}
1965	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1966	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1967
 
 
1968	kvm_s390_vcpu_crypto_setup(vcpu);
1969
 
 
 
 
 
 
 
 
 
 
1970	return rc;
1971}
1972
1973struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1974				      unsigned int id)
1975{
1976	struct kvm_vcpu *vcpu;
1977	struct sie_page *sie_page;
1978	int rc = -EINVAL;
1979
1980	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1981		goto out;
1982
1983	rc = -ENOMEM;
1984
1985	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1986	if (!vcpu)
1987		goto out;
 
1988
1989	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
 
1990	if (!sie_page)
1991		goto out_free_cpu;
1992
1993	vcpu->arch.sie_block = &sie_page->sie_block;
1994	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1995
1996	/* the real guest size will always be smaller than msl */
1997	vcpu->arch.sie_block->mso = 0;
1998	vcpu->arch.sie_block->msl = sclp.hamax;
1999
2000	vcpu->arch.sie_block->icpua = id;
2001	spin_lock_init(&vcpu->arch.local_int.lock);
2002	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2003	vcpu->arch.local_int.wq = &vcpu->wq;
2004	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2005	seqcount_init(&vcpu->arch.cputm_seqcount);
2006
2007	rc = kvm_vcpu_init(vcpu, kvm, id);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2008	if (rc)
2009		goto out_free_sie_block;
2010	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2011		 vcpu->arch.sie_block);
2012	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2013
2014	return vcpu;
 
 
 
 
 
2015out_free_sie_block:
2016	free_page((unsigned long)(vcpu->arch.sie_block));
2017out_free_cpu:
2018	kmem_cache_free(kvm_vcpu_cache, vcpu);
2019out:
2020	return ERR_PTR(rc);
2021}
2022
2023int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2024{
 
2025	return kvm_s390_vcpu_has_irq(vcpu, 0);
2026}
2027
 
 
 
 
 
2028void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2029{
2030	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2031	exit_sie(vcpu);
2032}
2033
2034void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2035{
2036	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2037}
2038
2039static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2040{
2041	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2042	exit_sie(vcpu);
2043}
2044
 
 
 
 
 
 
2045static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2046{
2047	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2048}
2049
2050/*
2051 * Kick a guest cpu out of SIE and wait until SIE is not running.
2052 * If the CPU is not running (e.g. waiting as idle) the function will
2053 * return immediately. */
2054void exit_sie(struct kvm_vcpu *vcpu)
2055{
2056	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
 
2057	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2058		cpu_relax();
2059}
2060
2061/* Kick a guest cpu out of SIE to process a request synchronously */
2062void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2063{
2064	kvm_make_request(req, vcpu);
2065	kvm_s390_vcpu_request(vcpu);
2066}
2067
2068static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2069			      unsigned long end)
2070{
2071	struct kvm *kvm = gmap->private;
2072	struct kvm_vcpu *vcpu;
2073	unsigned long prefix;
2074	int i;
 
 
2075
2076	if (gmap_is_shadow(gmap))
2077		return;
2078	if (start >= 1UL << 31)
2079		/* We are only interested in prefix pages */
2080		return;
2081	kvm_for_each_vcpu(i, vcpu, kvm) {
2082		/* match against both prefix pages */
2083		prefix = kvm_s390_get_prefix(vcpu);
2084		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2085			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2086				   start, end);
2087			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2088		}
2089	}
2090}
2091
 
 
 
 
 
 
 
 
 
 
 
2092int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2093{
2094	/* kvm common code refers to this, but never calls it */
2095	BUG();
2096	return 0;
2097}
2098
2099static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2100					   struct kvm_one_reg *reg)
2101{
2102	int r = -EINVAL;
2103
2104	switch (reg->id) {
2105	case KVM_REG_S390_TODPR:
2106		r = put_user(vcpu->arch.sie_block->todpr,
2107			     (u32 __user *)reg->addr);
2108		break;
2109	case KVM_REG_S390_EPOCHDIFF:
2110		r = put_user(vcpu->arch.sie_block->epoch,
2111			     (u64 __user *)reg->addr);
2112		break;
2113	case KVM_REG_S390_CPU_TIMER:
2114		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2115			     (u64 __user *)reg->addr);
2116		break;
2117	case KVM_REG_S390_CLOCK_COMP:
2118		r = put_user(vcpu->arch.sie_block->ckc,
2119			     (u64 __user *)reg->addr);
2120		break;
2121	case KVM_REG_S390_PFTOKEN:
2122		r = put_user(vcpu->arch.pfault_token,
2123			     (u64 __user *)reg->addr);
2124		break;
2125	case KVM_REG_S390_PFCOMPARE:
2126		r = put_user(vcpu->arch.pfault_compare,
2127			     (u64 __user *)reg->addr);
2128		break;
2129	case KVM_REG_S390_PFSELECT:
2130		r = put_user(vcpu->arch.pfault_select,
2131			     (u64 __user *)reg->addr);
2132		break;
2133	case KVM_REG_S390_PP:
2134		r = put_user(vcpu->arch.sie_block->pp,
2135			     (u64 __user *)reg->addr);
2136		break;
2137	case KVM_REG_S390_GBEA:
2138		r = put_user(vcpu->arch.sie_block->gbea,
2139			     (u64 __user *)reg->addr);
2140		break;
2141	default:
2142		break;
2143	}
2144
2145	return r;
2146}
2147
2148static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2149					   struct kvm_one_reg *reg)
2150{
2151	int r = -EINVAL;
2152	__u64 val;
2153
2154	switch (reg->id) {
2155	case KVM_REG_S390_TODPR:
2156		r = get_user(vcpu->arch.sie_block->todpr,
2157			     (u32 __user *)reg->addr);
2158		break;
2159	case KVM_REG_S390_EPOCHDIFF:
2160		r = get_user(vcpu->arch.sie_block->epoch,
2161			     (u64 __user *)reg->addr);
2162		break;
2163	case KVM_REG_S390_CPU_TIMER:
2164		r = get_user(val, (u64 __user *)reg->addr);
2165		if (!r)
2166			kvm_s390_set_cpu_timer(vcpu, val);
2167		break;
2168	case KVM_REG_S390_CLOCK_COMP:
2169		r = get_user(vcpu->arch.sie_block->ckc,
2170			     (u64 __user *)reg->addr);
2171		break;
2172	case KVM_REG_S390_PFTOKEN:
2173		r = get_user(vcpu->arch.pfault_token,
2174			     (u64 __user *)reg->addr);
2175		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2176			kvm_clear_async_pf_completion_queue(vcpu);
2177		break;
2178	case KVM_REG_S390_PFCOMPARE:
2179		r = get_user(vcpu->arch.pfault_compare,
2180			     (u64 __user *)reg->addr);
2181		break;
2182	case KVM_REG_S390_PFSELECT:
2183		r = get_user(vcpu->arch.pfault_select,
2184			     (u64 __user *)reg->addr);
2185		break;
2186	case KVM_REG_S390_PP:
2187		r = get_user(vcpu->arch.sie_block->pp,
2188			     (u64 __user *)reg->addr);
2189		break;
2190	case KVM_REG_S390_GBEA:
2191		r = get_user(vcpu->arch.sie_block->gbea,
2192			     (u64 __user *)reg->addr);
2193		break;
2194	default:
2195		break;
2196	}
2197
2198	return r;
2199}
2200
2201static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2202{
2203	kvm_s390_vcpu_initial_reset(vcpu);
2204	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2205}
2206
2207int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2208{
 
2209	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
 
2210	return 0;
2211}
2212
2213int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2214{
 
2215	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
 
2216	return 0;
2217}
2218
2219int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2220				  struct kvm_sregs *sregs)
2221{
 
 
2222	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2223	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
 
 
2224	return 0;
2225}
2226
2227int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2228				  struct kvm_sregs *sregs)
2229{
 
 
2230	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2231	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
 
 
2232	return 0;
2233}
2234
2235int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2236{
2237	if (test_fp_ctl(fpu->fpc))
2238		return -EINVAL;
 
 
2239	vcpu->run->s.regs.fpc = fpu->fpc;
2240	if (MACHINE_HAS_VX)
2241		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2242				 (freg_t *) fpu->fprs);
2243	else
2244		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2245	return 0;
 
 
2246}
2247
2248int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2249{
2250	/* make sure we have the latest values */
2251	save_fpu_regs();
2252	if (MACHINE_HAS_VX)
2253		convert_vx_to_fp((freg_t *) fpu->fprs,
2254				 (__vector128 *) vcpu->run->s.regs.vrs);
2255	else
2256		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2257	fpu->fpc = vcpu->run->s.regs.fpc;
 
 
2258	return 0;
2259}
2260
2261static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2262{
2263	int rc = 0;
2264
2265	if (!is_vcpu_stopped(vcpu))
2266		rc = -EBUSY;
2267	else {
2268		vcpu->run->psw_mask = psw.mask;
2269		vcpu->run->psw_addr = psw.addr;
2270	}
2271	return rc;
2272}
2273
2274int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2275				  struct kvm_translation *tr)
2276{
2277	return -EINVAL; /* not implemented yet */
2278}
2279
2280#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2281			      KVM_GUESTDBG_USE_HW_BP | \
2282			      KVM_GUESTDBG_ENABLE)
2283
2284int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2285					struct kvm_guest_debug *dbg)
2286{
2287	int rc = 0;
2288
 
 
2289	vcpu->guest_debug = 0;
2290	kvm_s390_clear_bp_data(vcpu);
2291
2292	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2293		return -EINVAL;
2294	if (!sclp.has_gpere)
2295		return -EINVAL;
 
 
 
 
2296
2297	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2298		vcpu->guest_debug = dbg->control;
2299		/* enforce guest PER */
2300		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2301
2302		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2303			rc = kvm_s390_import_bp_data(vcpu, dbg);
2304	} else {
2305		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2306		vcpu->arch.guestdbg.last_bp = 0;
2307	}
2308
2309	if (rc) {
2310		vcpu->guest_debug = 0;
2311		kvm_s390_clear_bp_data(vcpu);
2312		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2313	}
2314
 
 
2315	return rc;
2316}
2317
2318int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2319				    struct kvm_mp_state *mp_state)
2320{
 
 
 
 
2321	/* CHECK_STOP and LOAD are not supported yet */
2322	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2323				       KVM_MP_STATE_OPERATING;
 
 
 
2324}
2325
2326int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2327				    struct kvm_mp_state *mp_state)
2328{
2329	int rc = 0;
2330
 
 
2331	/* user space knows about this interface - let it control the state */
2332	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2333
2334	switch (mp_state->mp_state) {
2335	case KVM_MP_STATE_STOPPED:
2336		kvm_s390_vcpu_stop(vcpu);
2337		break;
2338	case KVM_MP_STATE_OPERATING:
2339		kvm_s390_vcpu_start(vcpu);
2340		break;
2341	case KVM_MP_STATE_LOAD:
 
 
 
 
 
 
2342	case KVM_MP_STATE_CHECK_STOP:
2343		/* fall through - CHECK_STOP and LOAD are not supported yet */
2344	default:
2345		rc = -ENXIO;
2346	}
2347
 
2348	return rc;
2349}
2350
2351static bool ibs_enabled(struct kvm_vcpu *vcpu)
2352{
2353	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2354}
2355
2356static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2357{
2358retry:
2359	kvm_s390_vcpu_request_handled(vcpu);
2360	if (!vcpu->requests)
2361		return 0;
2362	/*
2363	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2364	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2365	 * This ensures that the ipte instruction for this request has
2366	 * already finished. We might race against a second unmapper that
2367	 * wants to set the blocking bit. Lets just retry the request loop.
2368	 */
2369	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2370		int rc;
2371		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2372					  kvm_s390_get_prefix(vcpu),
2373					  PAGE_SIZE * 2, PROT_WRITE);
2374		if (rc) {
2375			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2376			return rc;
2377		}
2378		goto retry;
2379	}
2380
2381	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2382		vcpu->arch.sie_block->ihcpu = 0xffff;
2383		goto retry;
2384	}
2385
2386	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2387		if (!ibs_enabled(vcpu)) {
2388			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2389			atomic_or(CPUSTAT_IBS,
2390					&vcpu->arch.sie_block->cpuflags);
2391		}
2392		goto retry;
2393	}
2394
2395	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2396		if (ibs_enabled(vcpu)) {
2397			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2398			atomic_andnot(CPUSTAT_IBS,
2399					  &vcpu->arch.sie_block->cpuflags);
2400		}
2401		goto retry;
2402	}
2403
2404	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2405		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2406		goto retry;
2407	}
2408
2409	/* nothing to do, just clear the request */
2410	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2411
2412	return 0;
2413}
2414
2415void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2416{
2417	struct kvm_vcpu *vcpu;
2418	int i;
 
2419
2420	mutex_lock(&kvm->lock);
2421	preempt_disable();
2422	kvm->arch.epoch = tod - get_tod_clock();
 
 
 
 
 
 
 
 
 
 
2423	kvm_s390_vcpu_block_all(kvm);
2424	kvm_for_each_vcpu(i, vcpu, kvm)
2425		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
 
 
 
2426	kvm_s390_vcpu_unblock_all(kvm);
2427	preempt_enable();
2428	mutex_unlock(&kvm->lock);
2429}
2430
2431/**
2432 * kvm_arch_fault_in_page - fault-in guest page if necessary
2433 * @vcpu: The corresponding virtual cpu
2434 * @gpa: Guest physical address
2435 * @writable: Whether the page should be writable or not
2436 *
2437 * Make sure that a guest page has been faulted-in on the host.
2438 *
2439 * Return: Zero on success, negative error code otherwise.
2440 */
2441long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2442{
2443	return gmap_fault(vcpu->arch.gmap, gpa,
2444			  writable ? FAULT_FLAG_WRITE : 0);
 
 
 
2445}
2446
2447static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2448				      unsigned long token)
2449{
2450	struct kvm_s390_interrupt inti;
2451	struct kvm_s390_irq irq;
2452
2453	if (start_token) {
2454		irq.u.ext.ext_params2 = token;
2455		irq.type = KVM_S390_INT_PFAULT_INIT;
2456		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2457	} else {
2458		inti.type = KVM_S390_INT_PFAULT_DONE;
2459		inti.parm64 = token;
2460		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2461	}
2462}
2463
2464void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2465				     struct kvm_async_pf *work)
2466{
2467	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2468	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
 
 
2469}
2470
2471void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2472				 struct kvm_async_pf *work)
2473{
2474	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2475	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2476}
2477
2478void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2479			       struct kvm_async_pf *work)
2480{
2481	/* s390 will always inject the page directly */
2482}
2483
2484bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2485{
2486	/*
2487	 * s390 will always inject the page directly,
2488	 * but we still want check_async_completion to cleanup
2489	 */
2490	return true;
2491}
2492
2493static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2494{
2495	hva_t hva;
2496	struct kvm_arch_async_pf arch;
2497	int rc;
2498
2499	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2500		return 0;
2501	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2502	    vcpu->arch.pfault_compare)
2503		return 0;
2504	if (psw_extint_disabled(vcpu))
2505		return 0;
2506	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2507		return 0;
2508	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2509		return 0;
2510	if (!vcpu->arch.gmap->pfault_enabled)
2511		return 0;
2512
2513	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2514	hva += current->thread.gmap_addr & ~PAGE_MASK;
2515	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2516		return 0;
2517
2518	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2519	return rc;
2520}
2521
2522static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2523{
2524	int rc, cpuflags;
2525
2526	/*
2527	 * On s390 notifications for arriving pages will be delivered directly
2528	 * to the guest but the house keeping for completed pfaults is
2529	 * handled outside the worker.
2530	 */
2531	kvm_check_async_pf_completion(vcpu);
2532
2533	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2534	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2535
2536	if (need_resched())
2537		schedule();
2538
2539	if (test_cpu_flag(CIF_MCCK_PENDING))
2540		s390_handle_mcck();
2541
2542	if (!kvm_is_ucontrol(vcpu->kvm)) {
2543		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2544		if (rc)
2545			return rc;
2546	}
2547
2548	rc = kvm_s390_handle_requests(vcpu);
2549	if (rc)
2550		return rc;
2551
2552	if (guestdbg_enabled(vcpu)) {
2553		kvm_s390_backup_guest_per_regs(vcpu);
2554		kvm_s390_patch_guest_per_regs(vcpu);
2555	}
2556
 
 
2557	vcpu->arch.sie_block->icptcode = 0;
 
2558	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2559	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2560	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2561
2562	return 0;
2563}
2564
2565static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2566{
2567	struct kvm_s390_pgm_info pgm_info = {
2568		.code = PGM_ADDRESSING,
2569	};
2570	u8 opcode, ilen;
2571	int rc;
2572
2573	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2574	trace_kvm_s390_sie_fault(vcpu);
2575
2576	/*
2577	 * We want to inject an addressing exception, which is defined as a
2578	 * suppressing or terminating exception. However, since we came here
2579	 * by a DAT access exception, the PSW still points to the faulting
2580	 * instruction since DAT exceptions are nullifying. So we've got
2581	 * to look up the current opcode to get the length of the instruction
2582	 * to be able to forward the PSW.
2583	 */
2584	rc = read_guest_instr(vcpu, &opcode, 1);
2585	ilen = insn_length(opcode);
2586	if (rc < 0) {
2587		return rc;
2588	} else if (rc) {
2589		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2590		 * Forward by arbitrary ilc, injection will take care of
2591		 * nullification if necessary.
2592		 */
2593		pgm_info = vcpu->arch.pgm;
2594		ilen = 4;
2595	}
2596	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2597	kvm_s390_forward_psw(vcpu, ilen);
2598	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2599}
2600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2601static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2602{
 
 
 
 
2603	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2604		   vcpu->arch.sie_block->icptcode);
2605	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2606
2607	if (guestdbg_enabled(vcpu))
2608		kvm_s390_restore_guest_per_regs(vcpu);
2609
2610	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2611	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2612
 
 
 
 
 
 
 
 
 
2613	if (vcpu->arch.sie_block->icptcode > 0) {
2614		int rc = kvm_handle_sie_intercept(vcpu);
2615
2616		if (rc != -EOPNOTSUPP)
2617			return rc;
2618		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2619		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2620		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2621		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2622		return -EREMOTE;
2623	} else if (exit_reason != -EFAULT) {
2624		vcpu->stat.exit_null++;
2625		return 0;
2626	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2627		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2628		vcpu->run->s390_ucontrol.trans_exc_code =
2629						current->thread.gmap_addr;
2630		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2631		return -EREMOTE;
2632	} else if (current->thread.gmap_pfault) {
2633		trace_kvm_s390_major_guest_pfault(vcpu);
2634		current->thread.gmap_pfault = 0;
2635		if (kvm_arch_setup_async_pf(vcpu))
2636			return 0;
2637		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2638	}
2639	return vcpu_post_run_fault_in_sie(vcpu);
 
2640}
2641
 
2642static int __vcpu_run(struct kvm_vcpu *vcpu)
2643{
2644	int rc, exit_reason;
 
2645
2646	/*
2647	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2648	 * ning the guest), so that memslots (and other stuff) are protected
2649	 */
2650	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2651
2652	do {
2653		rc = vcpu_pre_run(vcpu);
2654		if (rc)
2655			break;
2656
2657		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2658		/*
2659		 * As PF_VCPU will be used in fault handler, between
2660		 * guest_enter and guest_exit should be no uaccess.
2661		 */
2662		local_irq_disable();
2663		guest_enter_irqoff();
2664		__disable_cpu_timer_accounting(vcpu);
2665		local_irq_enable();
 
 
 
 
 
2666		exit_reason = sie64a(vcpu->arch.sie_block,
2667				     vcpu->run->s.regs.gprs);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2668		local_irq_disable();
2669		__enable_cpu_timer_accounting(vcpu);
2670		guest_exit_irqoff();
2671		local_irq_enable();
2672		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2673
2674		rc = vcpu_post_run(vcpu, exit_reason);
2675	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2676
2677	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2678	return rc;
2679}
2680
2681static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2682{
 
 
 
 
 
 
2683	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2684	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2685	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2686		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2687	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2688		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2689		/* some control register changes require a tlb flush */
2690		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2691	}
2692	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2693		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2694		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2695		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2696		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2697		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2698	}
2699	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2700		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2701		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2702		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2703		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2704			kvm_clear_async_pf_completion_queue(vcpu);
2705	}
 
 
 
 
 
2706	/*
2707	 * If userspace sets the riccb (e.g. after migration) to a valid state,
2708	 * we should enable RI here instead of doing the lazy enablement.
2709	 */
2710	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2711	    test_kvm_facility(vcpu->kvm, 64)) {
2712		struct runtime_instr_cb *riccb =
2713			(struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2714
2715		if (riccb->valid)
2716			vcpu->arch.sie_block->ecb3 |= 0x01;
 
 
 
 
 
 
 
 
2717	}
2718	save_access_regs(vcpu->arch.host_acrs);
2719	restore_access_regs(vcpu->run->s.regs.acrs);
2720	/* save host (userspace) fprs/vrs */
2721	save_fpu_regs();
2722	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2723	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2724	if (MACHINE_HAS_VX)
2725		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2726	else
2727		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2728	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2729	if (test_fp_ctl(current->thread.fpu.fpc))
2730		/* User space provided an invalid FPC, let's clear it */
2731		current->thread.fpu.fpc = 0;
 
 
 
 
 
 
 
2732
2733	kvm_run->kvm_dirty_regs = 0;
2734}
2735
2736static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2737{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2738	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2739	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2740	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2741	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2742	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2743	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2744	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2745	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2746	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2747	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2748	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2749	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2750	save_access_regs(vcpu->run->s.regs.acrs);
2751	restore_access_regs(vcpu->arch.host_acrs);
2752	/* Save guest register state */
2753	save_fpu_regs();
2754	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2755	/* Restore will be done lazily at return */
2756	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2757	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2758
2759}
2760
2761int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2762{
 
 
2763	int rc;
2764	sigset_t sigsaved;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2765
2766	if (guestdbg_exit_pending(vcpu)) {
2767		kvm_s390_prepare_debug_exit(vcpu);
2768		return 0;
 
2769	}
2770
2771	if (vcpu->sigset_active)
2772		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2773
 
 
 
 
2774	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2775		kvm_s390_vcpu_start(vcpu);
2776	} else if (is_vcpu_stopped(vcpu)) {
2777		pr_err_ratelimited("can't run stopped vcpu %d\n",
2778				   vcpu->vcpu_id);
2779		return -EINVAL;
 
2780	}
2781
2782	sync_regs(vcpu, kvm_run);
 
2783	enable_cpu_timer_accounting(vcpu);
2784
2785	might_fault();
2786	rc = __vcpu_run(vcpu);
2787
2788	if (signal_pending(current) && !rc) {
2789		kvm_run->exit_reason = KVM_EXIT_INTR;
2790		rc = -EINTR;
2791	}
2792
2793	if (guestdbg_exit_pending(vcpu) && !rc)  {
2794		kvm_s390_prepare_debug_exit(vcpu);
2795		rc = 0;
2796	}
2797
2798	if (rc == -EREMOTE) {
2799		/* userspace support is needed, kvm_run has been prepared */
2800		rc = 0;
2801	}
2802
2803	disable_cpu_timer_accounting(vcpu);
2804	store_regs(vcpu, kvm_run);
 
2805
2806	if (vcpu->sigset_active)
2807		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2808
2809	vcpu->stat.exit_userspace++;
 
 
2810	return rc;
2811}
2812
2813/*
2814 * store status at address
2815 * we use have two special cases:
2816 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2817 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2818 */
2819int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2820{
2821	unsigned char archmode = 1;
2822	freg_t fprs[NUM_FPRS];
2823	unsigned int px;
2824	u64 clkcomp, cputm;
2825	int rc;
2826
2827	px = kvm_s390_get_prefix(vcpu);
2828	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2829		if (write_guest_abs(vcpu, 163, &archmode, 1))
2830			return -EFAULT;
2831		gpa = 0;
2832	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2833		if (write_guest_real(vcpu, 163, &archmode, 1))
2834			return -EFAULT;
2835		gpa = px;
2836	} else
2837		gpa -= __LC_FPREGS_SAVE_AREA;
2838
2839	/* manually convert vector registers if necessary */
2840	if (MACHINE_HAS_VX) {
2841		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2842		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2843				     fprs, 128);
2844	} else {
2845		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2846				     vcpu->run->s.regs.fprs, 128);
2847	}
2848	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2849			      vcpu->run->s.regs.gprs, 128);
2850	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2851			      &vcpu->arch.sie_block->gpsw, 16);
2852	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2853			      &px, 4);
2854	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2855			      &vcpu->run->s.regs.fpc, 4);
2856	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2857			      &vcpu->arch.sie_block->todpr, 4);
2858	cputm = kvm_s390_get_cpu_timer(vcpu);
2859	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2860			      &cputm, 8);
2861	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2862	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2863			      &clkcomp, 8);
2864	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2865			      &vcpu->run->s.regs.acrs, 64);
2866	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2867			      &vcpu->arch.sie_block->gcr, 128);
2868	return rc ? -EFAULT : 0;
2869}
2870
2871int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2872{
2873	/*
2874	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2875	 * switch in the run ioctl. Let's update our copies before we save
2876	 * it into the save area
2877	 */
2878	save_fpu_regs();
2879	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2880	save_access_regs(vcpu->run->s.regs.acrs);
2881
2882	return kvm_s390_store_status_unloaded(vcpu, addr);
2883}
2884
2885static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2886{
2887	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2888	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2889}
2890
2891static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2892{
2893	unsigned int i;
2894	struct kvm_vcpu *vcpu;
2895
2896	kvm_for_each_vcpu(i, vcpu, kvm) {
2897		__disable_ibs_on_vcpu(vcpu);
2898	}
2899}
2900
2901static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2902{
2903	if (!sclp.has_ibs)
2904		return;
2905	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2906	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2907}
2908
2909void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2910{
2911	int i, online_vcpus, started_vcpus = 0;
2912
2913	if (!is_vcpu_stopped(vcpu))
2914		return;
2915
2916	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2917	/* Only one cpu at a time may enter/leave the STOPPED state. */
2918	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2919	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2920
 
 
 
 
 
 
 
 
 
2921	for (i = 0; i < online_vcpus; i++) {
2922		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2923			started_vcpus++;
2924	}
2925
2926	if (started_vcpus == 0) {
2927		/* we're the only active VCPU -> speed it up */
2928		__enable_ibs_on_vcpu(vcpu);
2929	} else if (started_vcpus == 1) {
2930		/*
2931		 * As we are starting a second VCPU, we have to disable
2932		 * the IBS facility on all VCPUs to remove potentially
2933		 * oustanding ENABLE requests.
2934		 */
2935		__disable_ibs_on_all_vcpus(vcpu->kvm);
2936	}
2937
2938	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 
 
 
 
 
 
 
2939	/*
2940	 * Another VCPU might have used IBS while we were offline.
2941	 * Let's play safe and flush the VCPU at startup.
2942	 */
2943	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2944	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2945	return;
2946}
2947
2948void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2949{
2950	int i, online_vcpus, started_vcpus = 0;
2951	struct kvm_vcpu *started_vcpu = NULL;
2952
2953	if (is_vcpu_stopped(vcpu))
2954		return;
2955
2956	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2957	/* Only one cpu at a time may enter/leave the STOPPED state. */
2958	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2959	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2960
2961	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2962	kvm_s390_clear_stop_irq(vcpu);
2963
2964	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2965	__disable_ibs_on_vcpu(vcpu);
2966
2967	for (i = 0; i < online_vcpus; i++) {
2968		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
 
 
2969			started_vcpus++;
2970			started_vcpu = vcpu->kvm->vcpus[i];
2971		}
2972	}
2973
2974	if (started_vcpus == 1) {
2975		/*
2976		 * As we only have one VCPU left, we want to enable the
2977		 * IBS facility for that VCPU to speed it up.
2978		 */
2979		__enable_ibs_on_vcpu(started_vcpu);
2980	}
2981
2982	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2983	return;
2984}
2985
2986static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2987				     struct kvm_enable_cap *cap)
2988{
2989	int r;
2990
2991	if (cap->flags)
2992		return -EINVAL;
2993
2994	switch (cap->cap) {
2995	case KVM_CAP_S390_CSS_SUPPORT:
2996		if (!vcpu->kvm->arch.css_support) {
2997			vcpu->kvm->arch.css_support = 1;
2998			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2999			trace_kvm_s390_enable_css(vcpu->kvm);
3000		}
3001		r = 0;
3002		break;
3003	default:
3004		r = -EINVAL;
3005		break;
3006	}
3007	return r;
3008}
3009
3010static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3011				  struct kvm_s390_mem_op *mop)
3012{
3013	void __user *uaddr = (void __user *)mop->buf;
3014	void *tmpbuf = NULL;
3015	int r, srcu_idx;
3016	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3017				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3018
3019	if (mop->flags & ~supported_flags)
3020		return -EINVAL;
3021
3022	if (mop->size > MEM_OP_MAX_SIZE)
 
3023		return -E2BIG;
 
 
 
 
3024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3025	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3026		tmpbuf = vmalloc(mop->size);
3027		if (!tmpbuf)
3028			return -ENOMEM;
3029	}
3030
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3031	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3032
3033	switch (mop->op) {
3034	case KVM_S390_MEMOP_LOGICAL_READ:
3035		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3036			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3037					    mop->size, GACC_FETCH);
3038			break;
3039		}
3040		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3041		if (r == 0) {
3042			if (copy_to_user(uaddr, tmpbuf, mop->size))
3043				r = -EFAULT;
3044		}
3045		break;
3046	case KVM_S390_MEMOP_LOGICAL_WRITE:
3047		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3048			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3049					    mop->size, GACC_STORE);
3050			break;
3051		}
3052		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3053			r = -EFAULT;
3054			break;
3055		}
3056		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3057		break;
3058	default:
3059		r = -EINVAL;
3060	}
3061
3062	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3063
3064	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3065		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3066
3067	vfree(tmpbuf);
3068	return r;
3069}
3070
3071long kvm_arch_vcpu_ioctl(struct file *filp,
3072			 unsigned int ioctl, unsigned long arg)
3073{
3074	struct kvm_vcpu *vcpu = filp->private_data;
3075	void __user *argp = (void __user *)arg;
3076	int idx;
3077	long r;
3078
3079	switch (ioctl) {
3080	case KVM_S390_IRQ: {
3081		struct kvm_s390_irq s390irq;
3082
3083		r = -EFAULT;
3084		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3085			break;
3086		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3087		break;
3088	}
3089	case KVM_S390_INTERRUPT: {
3090		struct kvm_s390_interrupt s390int;
3091		struct kvm_s390_irq s390irq;
3092
3093		r = -EFAULT;
3094		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3095			break;
3096		if (s390int_to_s390irq(&s390int, &s390irq))
3097			return -EINVAL;
3098		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
 
 
 
 
3099		break;
3100	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3101	case KVM_S390_STORE_STATUS:
3102		idx = srcu_read_lock(&vcpu->kvm->srcu);
3103		r = kvm_s390_vcpu_store_status(vcpu, arg);
3104		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3105		break;
3106	case KVM_S390_SET_INITIAL_PSW: {
3107		psw_t psw;
3108
3109		r = -EFAULT;
3110		if (copy_from_user(&psw, argp, sizeof(psw)))
3111			break;
3112		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3113		break;
3114	}
 
 
 
 
 
 
 
 
 
 
3115	case KVM_S390_INITIAL_RESET:
3116		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3117		break;
3118	case KVM_SET_ONE_REG:
3119	case KVM_GET_ONE_REG: {
3120		struct kvm_one_reg reg;
 
 
 
3121		r = -EFAULT;
3122		if (copy_from_user(&reg, argp, sizeof(reg)))
3123			break;
3124		if (ioctl == KVM_SET_ONE_REG)
3125			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3126		else
3127			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3128		break;
3129	}
3130#ifdef CONFIG_KVM_S390_UCONTROL
3131	case KVM_S390_UCAS_MAP: {
3132		struct kvm_s390_ucas_mapping ucasmap;
3133
3134		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3135			r = -EFAULT;
3136			break;
3137		}
3138
3139		if (!kvm_is_ucontrol(vcpu->kvm)) {
3140			r = -EINVAL;
3141			break;
3142		}
3143
3144		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3145				     ucasmap.vcpu_addr, ucasmap.length);
3146		break;
3147	}
3148	case KVM_S390_UCAS_UNMAP: {
3149		struct kvm_s390_ucas_mapping ucasmap;
3150
3151		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3152			r = -EFAULT;
3153			break;
3154		}
3155
3156		if (!kvm_is_ucontrol(vcpu->kvm)) {
3157			r = -EINVAL;
3158			break;
3159		}
3160
3161		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3162			ucasmap.length);
3163		break;
3164	}
3165#endif
3166	case KVM_S390_VCPU_FAULT: {
3167		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3168		break;
3169	}
3170	case KVM_ENABLE_CAP:
3171	{
3172		struct kvm_enable_cap cap;
3173		r = -EFAULT;
3174		if (copy_from_user(&cap, argp, sizeof(cap)))
3175			break;
3176		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3177		break;
3178	}
3179	case KVM_S390_MEM_OP: {
3180		struct kvm_s390_mem_op mem_op;
3181
3182		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3183			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3184		else
3185			r = -EFAULT;
3186		break;
3187	}
3188	case KVM_S390_SET_IRQ_STATE: {
3189		struct kvm_s390_irq_state irq_state;
3190
3191		r = -EFAULT;
3192		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3193			break;
3194		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3195		    irq_state.len == 0 ||
3196		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3197			r = -EINVAL;
3198			break;
3199		}
 
3200		r = kvm_s390_set_irq_state(vcpu,
3201					   (void __user *) irq_state.buf,
3202					   irq_state.len);
3203		break;
3204	}
3205	case KVM_S390_GET_IRQ_STATE: {
3206		struct kvm_s390_irq_state irq_state;
3207
3208		r = -EFAULT;
3209		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3210			break;
3211		if (irq_state.len == 0) {
3212			r = -EINVAL;
3213			break;
3214		}
 
3215		r = kvm_s390_get_irq_state(vcpu,
3216					   (__u8 __user *)  irq_state.buf,
3217					   irq_state.len);
3218		break;
3219	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3220	default:
3221		r = -ENOTTY;
3222	}
 
 
3223	return r;
3224}
3225
3226int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3227{
3228#ifdef CONFIG_KVM_S390_UCONTROL
3229	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3230		 && (kvm_is_ucontrol(vcpu->kvm))) {
3231		vmf->page = virt_to_page(vcpu->arch.sie_block);
3232		get_page(vmf->page);
3233		return 0;
3234	}
3235#endif
3236	return VM_FAULT_SIGBUS;
3237}
3238
3239int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3240			    unsigned long npages)
3241{
3242	return 0;
3243}
3244
3245/* Section: memory related */
3246int kvm_arch_prepare_memory_region(struct kvm *kvm,
3247				   struct kvm_memory_slot *memslot,
3248				   const struct kvm_userspace_memory_region *mem,
3249				   enum kvm_mr_change change)
3250{
3251	/* A few sanity checks. We can have memory slots which have to be
3252	   located/ended at a segment boundary (1MB). The memory in userland is
3253	   ok to be fragmented into various different vmas. It is okay to mmap()
3254	   and munmap() stuff in this slot after doing this call at any time */
3255
3256	if (mem->userspace_addr & 0xffffful)
3257		return -EINVAL;
3258
3259	if (mem->memory_size & 0xffffful)
 
3260		return -EINVAL;
3261
3262	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3263		return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3264
3265	return 0;
3266}
3267
3268void kvm_arch_commit_memory_region(struct kvm *kvm,
3269				const struct kvm_userspace_memory_region *mem,
3270				const struct kvm_memory_slot *old,
3271				const struct kvm_memory_slot *new,
3272				enum kvm_mr_change change)
3273{
3274	int rc;
3275
3276	/* If the basics of the memslot do not change, we do not want
3277	 * to update the gmap. Every update causes several unnecessary
3278	 * segment translation exceptions. This is usually handled just
3279	 * fine by the normal fault handler + gmap, but it will also
3280	 * cause faults on the prefix page of running guest CPUs.
3281	 */
3282	if (old->userspace_addr == mem->userspace_addr &&
3283	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3284	    old->npages * PAGE_SIZE == mem->memory_size)
3285		return;
3286
3287	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3288		mem->guest_phys_addr, mem->memory_size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3289	if (rc)
3290		pr_warn("failed to commit memory region\n");
3291	return;
3292}
3293
3294static inline unsigned long nonhyp_mask(int i)
3295{
3296	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3297
3298	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3299}
3300
3301void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3302{
3303	vcpu->valid_wakeup = false;
3304}
3305
3306static int __init kvm_s390_init(void)
3307{
3308	int i;
3309
3310	if (!sclp.has_sief2) {
3311		pr_info("SIE not available\n");
3312		return -ENODEV;
3313	}
3314
 
 
 
 
 
3315	for (i = 0; i < 16; i++)
3316		kvm_s390_fac_list_mask[i] |=
3317			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3318
3319	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 
 
 
 
 
 
 
 
 
3320}
3321
3322static void __exit kvm_s390_exit(void)
3323{
3324	kvm_exit();
 
 
3325}
3326
3327module_init(kvm_s390_init);
3328module_exit(kvm_s390_exit);
3329
3330/*
3331 * Enable autoloading of the kvm module.
3332 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3333 * since x86 takes a different approach.
3334 */
3335#include <linux/miscdevice.h>
3336MODULE_ALIAS_MISCDEV(KVM_MINOR);
3337MODULE_ALIAS("devname:kvm");

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2020
 
 
 
 
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
 
   9 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  10 *               Jason J. Herne <jjherne@us.ibm.com>
  11 */
  12
  13#define KMSG_COMPONENT "kvm-s390"
  14#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  15
  16#include <linux/compiler.h>
  17#include <linux/err.h>
  18#include <linux/fs.h>
  19#include <linux/hrtimer.h>
  20#include <linux/init.h>
  21#include <linux/kvm.h>
  22#include <linux/kvm_host.h>
  23#include <linux/mman.h>
  24#include <linux/module.h>
  25#include <linux/moduleparam.h>
  26#include <linux/random.h>
  27#include <linux/slab.h>
  28#include <linux/timer.h>
  29#include <linux/vmalloc.h>
  30#include <linux/bitmap.h>
  31#include <linux/sched/signal.h>
  32#include <linux/string.h>
  33#include <linux/pgtable.h>
  34#include <linux/mmu_notifier.h>
  35
  36#include <asm/access-regs.h>
  37#include <asm/asm-offsets.h>
  38#include <asm/lowcore.h>
  39#include <asm/stp.h>
 
  40#include <asm/gmap.h>
  41#include <asm/nmi.h>
 
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include <asm/asm.h>
  47#include <asm/fpu.h>
  48#include <asm/ap.h>
  49#include <asm/uv.h>
  50#include "kvm-s390.h"
  51#include "gaccess.h"
  52#include "pci.h"
 
 
 
  53
  54#define CREATE_TRACE_POINTS
  55#include "trace.h"
  56#include "trace-s390.h"
  57
  58#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
  59#define LOCAL_IRQS 32
  60#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  61			   (KVM_MAX_VCPUS + LOCAL_IRQS))
  62
  63const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  64	KVM_GENERIC_VM_STATS(),
  65	STATS_DESC_COUNTER(VM, inject_io),
  66	STATS_DESC_COUNTER(VM, inject_float_mchk),
  67	STATS_DESC_COUNTER(VM, inject_pfault_done),
  68	STATS_DESC_COUNTER(VM, inject_service_signal),
  69	STATS_DESC_COUNTER(VM, inject_virtio),
  70	STATS_DESC_COUNTER(VM, aen_forward),
  71	STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
  72	STATS_DESC_COUNTER(VM, gmap_shadow_create),
  73	STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
  74	STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
  75	STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
  76	STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
  77	STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
  78};
  79
  80const struct kvm_stats_header kvm_vm_stats_header = {
  81	.name_size = KVM_STATS_NAME_SIZE,
  82	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  83	.id_offset = sizeof(struct kvm_stats_header),
  84	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  85	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  86		       sizeof(kvm_vm_stats_desc),
  87};
  88
  89const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  90	KVM_GENERIC_VCPU_STATS(),
  91	STATS_DESC_COUNTER(VCPU, exit_userspace),
  92	STATS_DESC_COUNTER(VCPU, exit_null),
  93	STATS_DESC_COUNTER(VCPU, exit_external_request),
  94	STATS_DESC_COUNTER(VCPU, exit_io_request),
  95	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  96	STATS_DESC_COUNTER(VCPU, exit_stop_request),
  97	STATS_DESC_COUNTER(VCPU, exit_validity),
  98	STATS_DESC_COUNTER(VCPU, exit_instruction),
  99	STATS_DESC_COUNTER(VCPU, exit_pei),
 100	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
 101	STATS_DESC_COUNTER(VCPU, instruction_lctl),
 102	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
 103	STATS_DESC_COUNTER(VCPU, instruction_stctl),
 104	STATS_DESC_COUNTER(VCPU, instruction_stctg),
 105	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
 106	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
 107	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
 108	STATS_DESC_COUNTER(VCPU, deliver_ckc),
 109	STATS_DESC_COUNTER(VCPU, deliver_cputm),
 110	STATS_DESC_COUNTER(VCPU, deliver_external_call),
 111	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 112	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 113	STATS_DESC_COUNTER(VCPU, deliver_virtio),
 114	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 115	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 116	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 117	STATS_DESC_COUNTER(VCPU, deliver_program),
 118	STATS_DESC_COUNTER(VCPU, deliver_io),
 119	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 120	STATS_DESC_COUNTER(VCPU, exit_wait_state),
 121	STATS_DESC_COUNTER(VCPU, inject_ckc),
 122	STATS_DESC_COUNTER(VCPU, inject_cputm),
 123	STATS_DESC_COUNTER(VCPU, inject_external_call),
 124	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 125	STATS_DESC_COUNTER(VCPU, inject_mchk),
 126	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 127	STATS_DESC_COUNTER(VCPU, inject_program),
 128	STATS_DESC_COUNTER(VCPU, inject_restart),
 129	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 130	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 131	STATS_DESC_COUNTER(VCPU, instruction_epsw),
 132	STATS_DESC_COUNTER(VCPU, instruction_gs),
 133	STATS_DESC_COUNTER(VCPU, instruction_io_other),
 134	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 135	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 136	STATS_DESC_COUNTER(VCPU, instruction_lpswey),
 137	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 138	STATS_DESC_COUNTER(VCPU, instruction_ptff),
 139	STATS_DESC_COUNTER(VCPU, instruction_sck),
 140	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 141	STATS_DESC_COUNTER(VCPU, instruction_stidp),
 142	STATS_DESC_COUNTER(VCPU, instruction_spx),
 143	STATS_DESC_COUNTER(VCPU, instruction_stpx),
 144	STATS_DESC_COUNTER(VCPU, instruction_stap),
 145	STATS_DESC_COUNTER(VCPU, instruction_iske),
 146	STATS_DESC_COUNTER(VCPU, instruction_ri),
 147	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 148	STATS_DESC_COUNTER(VCPU, instruction_sske),
 149	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 150	STATS_DESC_COUNTER(VCPU, instruction_stsi),
 151	STATS_DESC_COUNTER(VCPU, instruction_stfl),
 152	STATS_DESC_COUNTER(VCPU, instruction_tb),
 153	STATS_DESC_COUNTER(VCPU, instruction_tpi),
 154	STATS_DESC_COUNTER(VCPU, instruction_tprot),
 155	STATS_DESC_COUNTER(VCPU, instruction_tsch),
 156	STATS_DESC_COUNTER(VCPU, instruction_sie),
 157	STATS_DESC_COUNTER(VCPU, instruction_essa),
 158	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 159	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 160	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 161	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 162	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 163	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 164	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 165	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 166	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 167	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 168	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 169	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 170	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 171	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 172	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 173	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 174	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 175	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 176	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 177	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 178	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 179	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 180	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 181	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 182	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 183	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 184	STATS_DESC_COUNTER(VCPU, pfault_sync)
 185};
 186
 187const struct kvm_stats_header kvm_vcpu_stats_header = {
 188	.name_size = KVM_STATS_NAME_SIZE,
 189	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 190	.id_offset = sizeof(struct kvm_stats_header),
 191	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 192	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 193		       sizeof(kvm_vcpu_stats_desc),
 194};
 195
 196/* allow nested virtualization in KVM (if enabled by user space) */
 197static int nested;
 198module_param(nested, int, S_IRUGO);
 199MODULE_PARM_DESC(nested, "Nested virtualization support");
 200
 201/* allow 1m huge page guest backing, if !nested */
 202static int hpage;
 203module_param(hpage, int, 0444);
 204MODULE_PARM_DESC(hpage, "1m huge page backing support");
 205
 206/* maximum percentage of steal time for polling.  >100 is treated like 100 */
 207static u8 halt_poll_max_steal = 10;
 208module_param(halt_poll_max_steal, byte, 0644);
 209MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 210
 211/* if set to true, the GISA will be initialized and used if available */
 212static bool use_gisa  = true;
 213module_param(use_gisa, bool, 0644);
 214MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 215
 216/* maximum diag9c forwarding per second */
 217unsigned int diag9c_forwarding_hz;
 218module_param(diag9c_forwarding_hz, uint, 0644);
 219MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 220
 221/*
 222 * allow asynchronous deinit for protected guests; enable by default since
 223 * the feature is opt-in anyway
 224 */
 225static int async_destroy = 1;
 226module_param(async_destroy, int, 0444);
 227MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
 228
 229/*
 230 * For now we handle at most 16 double words as this is what the s390 base
 231 * kernel handles and stores in the prefix page. If we ever need to go beyond
 232 * this, this requires changes to code, but the external uapi can stay.
 233 */
 234#define SIZE_INTERNAL 16
 235
 236/*
 237 * Base feature mask that defines default mask for facilities. Consists of the
 238 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 239 */
 240static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 241/*
 242 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 243 * and defines the facilities that can be enabled via a cpu model.
 244 */
 245static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 246
 247static unsigned long kvm_s390_fac_size(void)
 248{
 249	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 250	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 251	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 252		sizeof(stfle_fac_list));
 253
 254	return SIZE_INTERNAL;
 255}
 256
 257/* available cpu features supported by kvm */
 258static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 259/* available subfunctions indicated via query / "test bit" */
 260static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 261
 262static struct gmap_notifier gmap_notifier;
 263static struct gmap_notifier vsie_gmap_notifier;
 264debug_info_t *kvm_s390_dbf;
 265debug_info_t *kvm_s390_dbf_uv;
 266
 267/* Section: not file related */
 268/* forward declarations */
 
 
 
 
 
 269static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 270			      unsigned long end);
 271static int sca_switch_to_extended(struct kvm *kvm);
 272
 273static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 274{
 275	u8 delta_idx = 0;
 276
 277	/*
 278	 * The TOD jumps by delta, we have to compensate this by adding
 279	 * -delta to the epoch.
 280	 */
 281	delta = -delta;
 282
 283	/* sign-extension - we're adding to signed values below */
 284	if ((s64)delta < 0)
 285		delta_idx = -1;
 286
 287	scb->epoch += delta;
 288	if (scb->ecd & ECD_MEF) {
 289		scb->epdx += delta_idx;
 290		if (scb->epoch < delta)
 291			scb->epdx += 1;
 292	}
 293}
 294
 295/*
 296 * This callback is executed during stop_machine(). All CPUs are therefore
 297 * temporarily stopped. In order not to change guest behavior, we have to
 298 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 299 * so a CPU won't be stopped while calculating with the epoch.
 300 */
 301static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 302			  void *v)
 303{
 304	struct kvm *kvm;
 305	struct kvm_vcpu *vcpu;
 306	unsigned long i;
 307	unsigned long long *delta = v;
 308
 309	list_for_each_entry(kvm, &vm_list, vm_list) {
 
 310		kvm_for_each_vcpu(i, vcpu, kvm) {
 311			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 312			if (i == 0) {
 313				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 314				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 315			}
 316			if (vcpu->arch.cputm_enabled)
 317				vcpu->arch.cputm_start += *delta;
 318			if (vcpu->arch.vsie_block)
 319				kvm_clock_sync_scb(vcpu->arch.vsie_block,
 320						   *delta);
 321		}
 322	}
 323	return NOTIFY_OK;
 324}
 325
 326static struct notifier_block kvm_clock_notifier = {
 327	.notifier_call = kvm_clock_sync,
 328};
 329
 330static void allow_cpu_feat(unsigned long nr)
 331{
 332	set_bit_inv(nr, kvm_s390_available_cpu_feat);
 
 
 
 
 
 
 333}
 334
 335static inline int plo_test_bit(unsigned char nr)
 336{
 337	unsigned long function = (unsigned long)nr | 0x100;
 338	int cc;
 339
 340	asm volatile(
 341		"	lgr	0,%[function]\n"
 342		/* Parameter registers are ignored for "test bit" */
 343		"	plo	0,0,0,0(0)\n"
 344		CC_IPM(cc)
 345		: CC_OUT(cc, cc)
 346		: [function] "d" (function)
 347		: CC_CLOBBER_LIST("0"));
 348	return CC_TRANSFORM(cc) == 0;
 349}
 350
 351static __always_inline void pfcr_query(u8 (*query)[16])
 352{
 353	asm volatile(
 354		"	lghi	0,0\n"
 355		"	.insn   rsy,0xeb0000000016,0,0,%[query]\n"
 356		: [query] "=QS" (*query)
 357		:
 358		: "cc", "0");
 359}
 360
 361static __always_inline void __sortl_query(u8 (*query)[32])
 362{
 363	asm volatile(
 364		"	lghi	0,0\n"
 365		"	la	1,%[query]\n"
 366		/* Parameter registers are ignored */
 367		"	.insn	rre,0xb9380000,2,4\n"
 368		: [query] "=R" (*query)
 369		:
 370		: "cc", "0", "1");
 371}
 372
 373static __always_inline void __dfltcc_query(u8 (*query)[32])
 374{
 375	asm volatile(
 376		"	lghi	0,0\n"
 377		"	la	1,%[query]\n"
 378		/* Parameter registers are ignored */
 379		"	.insn	rrf,0xb9390000,2,4,6,0\n"
 380		: [query] "=R" (*query)
 381		:
 382		: "cc", "0", "1");
 
 383}
 384
 385static void __init kvm_s390_cpu_feat_init(void)
 386{
 387	int i;
 388
 389	for (i = 0; i < 256; ++i) {
 390		if (plo_test_bit(i))
 391			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 392	}
 393
 394	if (test_facility(28)) /* TOD-clock steering */
 395		ptff(kvm_s390_available_subfunc.ptff,
 396		     sizeof(kvm_s390_available_subfunc.ptff),
 397		     PTFF_QAF);
 398
 399	if (test_facility(17)) { /* MSA */
 400		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 401			      kvm_s390_available_subfunc.kmac);
 402		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 403			      kvm_s390_available_subfunc.kmc);
 404		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
 405			      kvm_s390_available_subfunc.km);
 406		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 407			      kvm_s390_available_subfunc.kimd);
 408		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 409			      kvm_s390_available_subfunc.klmd);
 410	}
 411	if (test_facility(76)) /* MSA3 */
 412		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 413			      kvm_s390_available_subfunc.pckmo);
 414	if (test_facility(77)) { /* MSA4 */
 415		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 416			      kvm_s390_available_subfunc.kmctr);
 417		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 418			      kvm_s390_available_subfunc.kmf);
 419		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 420			      kvm_s390_available_subfunc.kmo);
 421		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 422			      kvm_s390_available_subfunc.pcc);
 423	}
 424	if (test_facility(57)) /* MSA5 */
 425		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 426			      kvm_s390_available_subfunc.ppno);
 427
 428	if (test_facility(146)) /* MSA8 */
 429		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 430			      kvm_s390_available_subfunc.kma);
 431
 432	if (test_facility(155)) /* MSA9 */
 433		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 434			      kvm_s390_available_subfunc.kdsa);
 435
 436	if (test_facility(150)) /* SORTL */
 437		__sortl_query(&kvm_s390_available_subfunc.sortl);
 438
 439	if (test_facility(151)) /* DFLTCC */
 440		__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
 441
 442	if (test_facility(201))	/* PFCR */
 443		pfcr_query(&kvm_s390_available_subfunc.pfcr);
 444
 445	if (MACHINE_HAS_ESOP)
 446		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 447	/*
 448	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 449	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 450	 */
 451	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 452	    !test_facility(3) || !nested)
 453		return;
 454	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 455	if (sclp.has_64bscao)
 456		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 457	if (sclp.has_siif)
 458		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 459	if (sclp.has_gpere)
 460		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 461	if (sclp.has_gsls)
 462		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 463	if (sclp.has_ib)
 464		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 465	if (sclp.has_cei)
 466		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 467	if (sclp.has_ibs)
 468		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 469	if (sclp.has_kss)
 470		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 471	/*
 472	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 473	 * all skey handling functions read/set the skey from the PGSTE
 474	 * instead of the real storage key.
 475	 *
 476	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 477	 * pages being detected as preserved although they are resident.
 478	 *
 479	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 480	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 481	 *
 482	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 483	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 484	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 485	 *
 486	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 487	 * cannot easily shadow the SCA because of the ipte lock.
 488	 */
 489}
 490
 491static int __init __kvm_s390_init(void)
 492{
 493	int rc = -ENOMEM;
 494
 495	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 496	if (!kvm_s390_dbf)
 497		return -ENOMEM;
 498
 499	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 500	if (!kvm_s390_dbf_uv)
 501		goto err_kvm_uv;
 502
 503	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 504	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 505		goto err_debug_view;
 506
 507	kvm_s390_cpu_feat_init();
 508
 509	/* Register floating interrupt controller interface. */
 510	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 511	if (rc) {
 512		pr_err("A FLIC registration call failed with rc=%d\n", rc);
 513		goto err_flic;
 514	}
 515
 516	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
 517		rc = kvm_s390_pci_init();
 518		if (rc) {
 519			pr_err("Unable to allocate AIFT for PCI\n");
 520			goto err_pci;
 521		}
 522	}
 523
 524	rc = kvm_s390_gib_init(GAL_ISC);
 525	if (rc)
 526		goto err_gib;
 527
 528	gmap_notifier.notifier_call = kvm_gmap_notifier;
 529	gmap_register_pte_notifier(&gmap_notifier);
 530	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 531	gmap_register_pte_notifier(&vsie_gmap_notifier);
 532	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 533				       &kvm_clock_notifier);
 534
 535	return 0;
 536
 537err_gib:
 538	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
 539		kvm_s390_pci_exit();
 540err_pci:
 541err_flic:
 542err_debug_view:
 543	debug_unregister(kvm_s390_dbf_uv);
 544err_kvm_uv:
 545	debug_unregister(kvm_s390_dbf);
 546	return rc;
 547}
 548
 549static void __kvm_s390_exit(void)
 550{
 551	gmap_unregister_pte_notifier(&gmap_notifier);
 552	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 553	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 554					 &kvm_clock_notifier);
 555
 556	kvm_s390_gib_destroy();
 557	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
 558		kvm_s390_pci_exit();
 559	debug_unregister(kvm_s390_dbf);
 560	debug_unregister(kvm_s390_dbf_uv);
 561}
 562
 563/* Section: device related */
 564long kvm_arch_dev_ioctl(struct file *filp,
 565			unsigned int ioctl, unsigned long arg)
 566{
 567	if (ioctl == KVM_S390_ENABLE_SIE)
 568		return s390_enable_sie();
 569	return -EINVAL;
 570}
 571
 572int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 573{
 574	int r;
 575
 576	switch (ext) {
 577	case KVM_CAP_S390_PSW:
 578	case KVM_CAP_S390_GMAP:
 579	case KVM_CAP_SYNC_MMU:
 580#ifdef CONFIG_KVM_S390_UCONTROL
 581	case KVM_CAP_S390_UCONTROL:
 582#endif
 583	case KVM_CAP_ASYNC_PF:
 584	case KVM_CAP_SYNC_REGS:
 585	case KVM_CAP_ONE_REG:
 586	case KVM_CAP_ENABLE_CAP:
 587	case KVM_CAP_S390_CSS_SUPPORT:
 588	case KVM_CAP_IOEVENTFD:
 
 
 589	case KVM_CAP_S390_IRQCHIP:
 590	case KVM_CAP_VM_ATTRIBUTES:
 591	case KVM_CAP_MP_STATE:
 592	case KVM_CAP_IMMEDIATE_EXIT:
 593	case KVM_CAP_S390_INJECT_IRQ:
 594	case KVM_CAP_S390_USER_SIGP:
 595	case KVM_CAP_S390_USER_STSI:
 596	case KVM_CAP_S390_SKEYS:
 597	case KVM_CAP_S390_IRQ_STATE:
 598	case KVM_CAP_S390_USER_INSTR0:
 599	case KVM_CAP_S390_CMMA_MIGRATION:
 600	case KVM_CAP_S390_AIS:
 601	case KVM_CAP_S390_AIS_MIGRATION:
 602	case KVM_CAP_S390_VCPU_RESETS:
 603	case KVM_CAP_SET_GUEST_DEBUG:
 604	case KVM_CAP_S390_DIAG318:
 605	case KVM_CAP_IRQFD_RESAMPLE:
 606		r = 1;
 607		break;
 608	case KVM_CAP_SET_GUEST_DEBUG2:
 609		r = KVM_GUESTDBG_VALID_MASK;
 610		break;
 611	case KVM_CAP_S390_HPAGE_1M:
 612		r = 0;
 613		if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
 614			r = 1;
 615		break;
 616	case KVM_CAP_S390_MEM_OP:
 617		r = MEM_OP_MAX_SIZE;
 618		break;
 619	case KVM_CAP_S390_MEM_OP_EXTENSION:
 620		/*
 621		 * Flag bits indicating which extensions are supported.
 622		 * If r > 0, the base extension must also be supported/indicated,
 623		 * in order to maintain backwards compatibility.
 624		 */
 625		r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
 626		    KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
 627		break;
 628	case KVM_CAP_NR_VCPUS:
 629	case KVM_CAP_MAX_VCPUS:
 630	case KVM_CAP_MAX_VCPU_ID:
 631		r = KVM_S390_BSCA_CPU_SLOTS;
 632		if (!kvm_s390_use_sca_entries())
 633			r = KVM_MAX_VCPUS;
 634		else if (sclp.has_esca && sclp.has_64bscao)
 635			r = KVM_S390_ESCA_CPU_SLOTS;
 636		if (ext == KVM_CAP_NR_VCPUS)
 637			r = min_t(unsigned int, num_online_cpus(), r);
 
 638		break;
 639	case KVM_CAP_S390_COW:
 640		r = MACHINE_HAS_ESOP;
 641		break;
 642	case KVM_CAP_S390_VECTOR_REGISTERS:
 643		r = test_facility(129);
 644		break;
 645	case KVM_CAP_S390_RI:
 646		r = test_facility(64);
 647		break;
 648	case KVM_CAP_S390_GS:
 649		r = test_facility(133);
 650		break;
 651	case KVM_CAP_S390_BPB:
 652		r = test_facility(82);
 653		break;
 654	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
 655		r = async_destroy && is_prot_virt_host();
 656		break;
 657	case KVM_CAP_S390_PROTECTED:
 658		r = is_prot_virt_host();
 659		break;
 660	case KVM_CAP_S390_PROTECTED_DUMP: {
 661		u64 pv_cmds_dump[] = {
 662			BIT_UVC_CMD_DUMP_INIT,
 663			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
 664			BIT_UVC_CMD_DUMP_CPU,
 665			BIT_UVC_CMD_DUMP_COMPLETE,
 666		};
 667		int i;
 668
 669		r = is_prot_virt_host();
 670
 671		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
 672			if (!test_bit_inv(pv_cmds_dump[i],
 673					  (unsigned long *)&uv_info.inst_calls_list)) {
 674				r = 0;
 675				break;
 676			}
 677		}
 678		break;
 679	}
 680	case KVM_CAP_S390_ZPCI_OP:
 681		r = kvm_s390_pci_interp_allowed();
 682		break;
 683	case KVM_CAP_S390_CPU_TOPOLOGY:
 684		r = test_facility(11);
 685		break;
 686	default:
 687		r = 0;
 688	}
 689	return r;
 690}
 691
 692void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 693{
 694	int i;
 695	gfn_t cur_gfn, last_gfn;
 696	unsigned long gaddr, vmaddr;
 697	struct gmap *gmap = kvm->arch.gmap;
 698	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 699
 700	/* Loop over all guest segments */
 701	cur_gfn = memslot->base_gfn;
 702	last_gfn = memslot->base_gfn + memslot->npages;
 703	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 704		gaddr = gfn_to_gpa(cur_gfn);
 705		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 706		if (kvm_is_error_hva(vmaddr))
 707			continue;
 708
 709		bitmap_zero(bitmap, _PAGE_ENTRIES);
 710		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 711		for (i = 0; i < _PAGE_ENTRIES; i++) {
 712			if (test_bit(i, bitmap))
 713				mark_page_dirty(kvm, cur_gfn + i);
 714		}
 715
 
 
 716		if (fatal_signal_pending(current))
 717			return;
 718		cond_resched();
 719	}
 720}
 721
 722/* Section: vm related */
 723static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 724
 725/*
 726 * Get (and clear) the dirty memory log for a memory slot.
 727 */
 728int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 729			       struct kvm_dirty_log *log)
 730{
 731	int r;
 732	unsigned long n;
 
 733	struct kvm_memory_slot *memslot;
 734	int is_dirty;
 735
 736	if (kvm_is_ucontrol(kvm))
 737		return -EINVAL;
 738
 739	mutex_lock(&kvm->slots_lock);
 740
 741	r = -EINVAL;
 742	if (log->slot >= KVM_USER_MEM_SLOTS)
 743		goto out;
 744
 745	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 
 
 
 
 
 
 
 746	if (r)
 747		goto out;
 748
 749	/* Clear the dirty log */
 750	if (is_dirty) {
 751		n = kvm_dirty_bitmap_bytes(memslot);
 752		memset(memslot->dirty_bitmap, 0, n);
 753	}
 754	r = 0;
 755out:
 756	mutex_unlock(&kvm->slots_lock);
 757	return r;
 758}
 759
 760static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 761{
 762	unsigned long i;
 763	struct kvm_vcpu *vcpu;
 764
 765	kvm_for_each_vcpu(i, vcpu, kvm) {
 766		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 767	}
 768}
 769
 770int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 771{
 772	int r;
 773
 774	if (cap->flags)
 775		return -EINVAL;
 776
 777	switch (cap->cap) {
 778	case KVM_CAP_S390_IRQCHIP:
 779		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 780		kvm->arch.use_irqchip = 1;
 781		r = 0;
 782		break;
 783	case KVM_CAP_S390_USER_SIGP:
 784		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 785		kvm->arch.user_sigp = 1;
 786		r = 0;
 787		break;
 788	case KVM_CAP_S390_VECTOR_REGISTERS:
 789		mutex_lock(&kvm->lock);
 790		if (kvm->created_vcpus) {
 791			r = -EBUSY;
 792		} else if (cpu_has_vx()) {
 793			set_kvm_facility(kvm->arch.model.fac_mask, 129);
 794			set_kvm_facility(kvm->arch.model.fac_list, 129);
 795			if (test_facility(134)) {
 796				set_kvm_facility(kvm->arch.model.fac_mask, 134);
 797				set_kvm_facility(kvm->arch.model.fac_list, 134);
 798			}
 799			if (test_facility(135)) {
 800				set_kvm_facility(kvm->arch.model.fac_mask, 135);
 801				set_kvm_facility(kvm->arch.model.fac_list, 135);
 802			}
 803			if (test_facility(148)) {
 804				set_kvm_facility(kvm->arch.model.fac_mask, 148);
 805				set_kvm_facility(kvm->arch.model.fac_list, 148);
 806			}
 807			if (test_facility(152)) {
 808				set_kvm_facility(kvm->arch.model.fac_mask, 152);
 809				set_kvm_facility(kvm->arch.model.fac_list, 152);
 810			}
 811			if (test_facility(192)) {
 812				set_kvm_facility(kvm->arch.model.fac_mask, 192);
 813				set_kvm_facility(kvm->arch.model.fac_list, 192);
 814			}
 815			if (test_facility(198)) {
 816				set_kvm_facility(kvm->arch.model.fac_mask, 198);
 817				set_kvm_facility(kvm->arch.model.fac_list, 198);
 818			}
 819			if (test_facility(199)) {
 820				set_kvm_facility(kvm->arch.model.fac_mask, 199);
 821				set_kvm_facility(kvm->arch.model.fac_list, 199);
 822			}
 823			r = 0;
 824		} else
 825			r = -EINVAL;
 826		mutex_unlock(&kvm->lock);
 827		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 828			 r ? "(not available)" : "(success)");
 829		break;
 830	case KVM_CAP_S390_RI:
 831		r = -EINVAL;
 832		mutex_lock(&kvm->lock);
 833		if (kvm->created_vcpus) {
 834			r = -EBUSY;
 835		} else if (test_facility(64)) {
 836			set_kvm_facility(kvm->arch.model.fac_mask, 64);
 837			set_kvm_facility(kvm->arch.model.fac_list, 64);
 838			r = 0;
 839		}
 840		mutex_unlock(&kvm->lock);
 841		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 842			 r ? "(not available)" : "(success)");
 843		break;
 844	case KVM_CAP_S390_AIS:
 845		mutex_lock(&kvm->lock);
 846		if (kvm->created_vcpus) {
 847			r = -EBUSY;
 848		} else {
 849			set_kvm_facility(kvm->arch.model.fac_mask, 72);
 850			set_kvm_facility(kvm->arch.model.fac_list, 72);
 851			r = 0;
 852		}
 853		mutex_unlock(&kvm->lock);
 854		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 855			 r ? "(not available)" : "(success)");
 856		break;
 857	case KVM_CAP_S390_GS:
 858		r = -EINVAL;
 859		mutex_lock(&kvm->lock);
 860		if (kvm->created_vcpus) {
 861			r = -EBUSY;
 862		} else if (test_facility(133)) {
 863			set_kvm_facility(kvm->arch.model.fac_mask, 133);
 864			set_kvm_facility(kvm->arch.model.fac_list, 133);
 865			r = 0;
 866		}
 867		mutex_unlock(&kvm->lock);
 868		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 869			 r ? "(not available)" : "(success)");
 870		break;
 871	case KVM_CAP_S390_HPAGE_1M:
 872		mutex_lock(&kvm->lock);
 873		if (kvm->created_vcpus)
 874			r = -EBUSY;
 875		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 876			r = -EINVAL;
 877		else {
 878			r = 0;
 879			mmap_write_lock(kvm->mm);
 880			kvm->mm->context.allow_gmap_hpage_1m = 1;
 881			mmap_write_unlock(kvm->mm);
 882			/*
 883			 * We might have to create fake 4k page
 884			 * tables. To avoid that the hardware works on
 885			 * stale PGSTEs, we emulate these instructions.
 886			 */
 887			kvm->arch.use_skf = 0;
 888			kvm->arch.use_pfmfi = 0;
 889		}
 890		mutex_unlock(&kvm->lock);
 891		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 892			 r ? "(not available)" : "(success)");
 893		break;
 894	case KVM_CAP_S390_USER_STSI:
 895		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 896		kvm->arch.user_stsi = 1;
 897		r = 0;
 898		break;
 899	case KVM_CAP_S390_USER_INSTR0:
 900		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 901		kvm->arch.user_instr0 = 1;
 902		icpt_operexc_on_all_vcpus(kvm);
 903		r = 0;
 904		break;
 905	case KVM_CAP_S390_CPU_TOPOLOGY:
 906		r = -EINVAL;
 907		mutex_lock(&kvm->lock);
 908		if (kvm->created_vcpus) {
 909			r = -EBUSY;
 910		} else if (test_facility(11)) {
 911			set_kvm_facility(kvm->arch.model.fac_mask, 11);
 912			set_kvm_facility(kvm->arch.model.fac_list, 11);
 913			r = 0;
 914		}
 915		mutex_unlock(&kvm->lock);
 916		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
 917			 r ? "(not available)" : "(success)");
 918		break;
 919	default:
 920		r = -EINVAL;
 921		break;
 922	}
 923	return r;
 924}
 925
 926static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 927{
 928	int ret;
 929
 930	switch (attr->attr) {
 931	case KVM_S390_VM_MEM_LIMIT_SIZE:
 932		ret = 0;
 933		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 934			 kvm->arch.mem_limit);
 935		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 936			ret = -EFAULT;
 937		break;
 938	default:
 939		ret = -ENXIO;
 940		break;
 941	}
 942	return ret;
 943}
 944
 945static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 946{
 947	int ret;
 948	unsigned int idx;
 949	switch (attr->attr) {
 950	case KVM_S390_VM_MEM_ENABLE_CMMA:
 951		ret = -ENXIO;
 952		if (!sclp.has_cmma)
 953			break;
 954
 
 955		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 956		mutex_lock(&kvm->lock);
 957		if (kvm->created_vcpus)
 958			ret = -EBUSY;
 959		else if (kvm->mm->context.allow_gmap_hpage_1m)
 960			ret = -EINVAL;
 961		else {
 962			kvm->arch.use_cmma = 1;
 963			/* Not compatible with cmma. */
 964			kvm->arch.use_pfmfi = 0;
 965			ret = 0;
 966		}
 967		mutex_unlock(&kvm->lock);
 968		break;
 969	case KVM_S390_VM_MEM_CLR_CMMA:
 970		ret = -ENXIO;
 971		if (!sclp.has_cmma)
 972			break;
 973		ret = -EINVAL;
 974		if (!kvm->arch.use_cmma)
 975			break;
 976
 977		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 978		mutex_lock(&kvm->lock);
 979		idx = srcu_read_lock(&kvm->srcu);
 980		s390_reset_cmma(kvm->arch.gmap->mm);
 981		srcu_read_unlock(&kvm->srcu, idx);
 982		mutex_unlock(&kvm->lock);
 983		ret = 0;
 984		break;
 985	case KVM_S390_VM_MEM_LIMIT_SIZE: {
 986		unsigned long new_limit;
 987
 988		if (kvm_is_ucontrol(kvm))
 989			return -EINVAL;
 990
 991		if (get_user(new_limit, (u64 __user *)attr->addr))
 992			return -EFAULT;
 993
 994		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 995		    new_limit > kvm->arch.mem_limit)
 996			return -E2BIG;
 997
 998		if (!new_limit)
 999			return -EINVAL;
1000
1001		/* gmap_create takes last usable address */
1002		if (new_limit != KVM_S390_NO_MEM_LIMIT)
1003			new_limit -= 1;
1004
1005		ret = -EBUSY;
1006		mutex_lock(&kvm->lock);
1007		if (!kvm->created_vcpus) {
1008			/* gmap_create will round the limit up */
1009			struct gmap *new = gmap_create(current->mm, new_limit);
1010
1011			if (!new) {
1012				ret = -ENOMEM;
1013			} else {
1014				gmap_remove(kvm->arch.gmap);
1015				new->private = kvm;
1016				kvm->arch.gmap = new;
1017				ret = 0;
1018			}
1019		}
1020		mutex_unlock(&kvm->lock);
1021		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
1022		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
1023			 (void *) kvm->arch.gmap->asce);
1024		break;
1025	}
1026	default:
1027		ret = -ENXIO;
1028		break;
1029	}
1030	return ret;
1031}
1032
1033static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
1034
1035void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1036{
1037	struct kvm_vcpu *vcpu;
1038	unsigned long i;
1039
1040	kvm_s390_vcpu_block_all(kvm);
1041
1042	kvm_for_each_vcpu(i, vcpu, kvm) {
1043		kvm_s390_vcpu_crypto_setup(vcpu);
1044		/* recreate the shadow crycb by leaving the VSIE handler */
1045		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1046	}
1047
1048	kvm_s390_vcpu_unblock_all(kvm);
1049}
1050
1051static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1052{
1053	mutex_lock(&kvm->lock);
1054	switch (attr->attr) {
1055	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1056		if (!test_kvm_facility(kvm, 76)) {
1057			mutex_unlock(&kvm->lock);
1058			return -EINVAL;
1059		}
1060		get_random_bytes(
1061			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1062			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1063		kvm->arch.crypto.aes_kw = 1;
1064		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1065		break;
1066	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1067		if (!test_kvm_facility(kvm, 76)) {
1068			mutex_unlock(&kvm->lock);
1069			return -EINVAL;
1070		}
1071		get_random_bytes(
1072			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1073			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1074		kvm->arch.crypto.dea_kw = 1;
1075		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1076		break;
1077	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1078		if (!test_kvm_facility(kvm, 76)) {
1079			mutex_unlock(&kvm->lock);
1080			return -EINVAL;
1081		}
1082		kvm->arch.crypto.aes_kw = 0;
1083		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1084			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1085		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1086		break;
1087	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1088		if (!test_kvm_facility(kvm, 76)) {
1089			mutex_unlock(&kvm->lock);
1090			return -EINVAL;
1091		}
1092		kvm->arch.crypto.dea_kw = 0;
1093		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1094			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1095		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1096		break;
1097	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1098		if (!ap_instructions_available()) {
1099			mutex_unlock(&kvm->lock);
1100			return -EOPNOTSUPP;
1101		}
1102		kvm->arch.crypto.apie = 1;
1103		break;
1104	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1105		if (!ap_instructions_available()) {
1106			mutex_unlock(&kvm->lock);
1107			return -EOPNOTSUPP;
1108		}
1109		kvm->arch.crypto.apie = 0;
1110		break;
1111	default:
1112		mutex_unlock(&kvm->lock);
1113		return -ENXIO;
1114	}
1115
1116	kvm_s390_vcpu_crypto_reset_all(kvm);
1117	mutex_unlock(&kvm->lock);
1118	return 0;
1119}
1120
1121static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1122{
1123	/* Only set the ECB bits after guest requests zPCI interpretation */
1124	if (!vcpu->kvm->arch.use_zpci_interp)
1125		return;
1126
1127	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1128	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1129}
1130
1131void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1132{
1133	struct kvm_vcpu *vcpu;
1134	unsigned long i;
1135
1136	lockdep_assert_held(&kvm->lock);
1137
1138	if (!kvm_s390_pci_interp_allowed())
1139		return;
1140
1141	/*
1142	 * If host is configured for PCI and the necessary facilities are
1143	 * available, turn on interpretation for the life of this guest
1144	 */
1145	kvm->arch.use_zpci_interp = 1;
1146
1147	kvm_s390_vcpu_block_all(kvm);
1148
1149	kvm_for_each_vcpu(i, vcpu, kvm) {
1150		kvm_s390_vcpu_pci_setup(vcpu);
1151		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1152	}
1153
1154	kvm_s390_vcpu_unblock_all(kvm);
1155}
1156
1157static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1158{
1159	unsigned long cx;
1160	struct kvm_vcpu *vcpu;
1161
1162	kvm_for_each_vcpu(cx, vcpu, kvm)
1163		kvm_s390_sync_request(req, vcpu);
1164}
1165
1166/*
1167 * Must be called with kvm->srcu held to avoid races on memslots, and with
1168 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1169 */
1170static int kvm_s390_vm_start_migration(struct kvm *kvm)
1171{
1172	struct kvm_memory_slot *ms;
1173	struct kvm_memslots *slots;
1174	unsigned long ram_pages = 0;
1175	int bkt;
1176
1177	/* migration mode already enabled */
1178	if (kvm->arch.migration_mode)
1179		return 0;
1180	slots = kvm_memslots(kvm);
1181	if (!slots || kvm_memslots_empty(slots))
1182		return -EINVAL;
1183
1184	if (!kvm->arch.use_cmma) {
1185		kvm->arch.migration_mode = 1;
1186		return 0;
1187	}
1188	/* mark all the pages in active slots as dirty */
1189	kvm_for_each_memslot(ms, bkt, slots) {
1190		if (!ms->dirty_bitmap)
1191			return -EINVAL;
1192		/*
1193		 * The second half of the bitmap is only used on x86,
1194		 * and would be wasted otherwise, so we put it to good
1195		 * use here to keep track of the state of the storage
1196		 * attributes.
1197		 */
1198		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1199		ram_pages += ms->npages;
1200	}
1201	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1202	kvm->arch.migration_mode = 1;
1203	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1204	return 0;
1205}
1206
1207/*
1208 * Must be called with kvm->slots_lock to avoid races with ourselves and
1209 * kvm_s390_vm_start_migration.
1210 */
1211static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1212{
1213	/* migration mode already disabled */
1214	if (!kvm->arch.migration_mode)
1215		return 0;
1216	kvm->arch.migration_mode = 0;
1217	if (kvm->arch.use_cmma)
1218		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1219	return 0;
1220}
1221
1222static int kvm_s390_vm_set_migration(struct kvm *kvm,
1223				     struct kvm_device_attr *attr)
1224{
1225	int res = -ENXIO;
1226
1227	mutex_lock(&kvm->slots_lock);
1228	switch (attr->attr) {
1229	case KVM_S390_VM_MIGRATION_START:
1230		res = kvm_s390_vm_start_migration(kvm);
1231		break;
1232	case KVM_S390_VM_MIGRATION_STOP:
1233		res = kvm_s390_vm_stop_migration(kvm);
1234		break;
1235	default:
1236		break;
1237	}
1238	mutex_unlock(&kvm->slots_lock);
1239
1240	return res;
1241}
1242
1243static int kvm_s390_vm_get_migration(struct kvm *kvm,
1244				     struct kvm_device_attr *attr)
1245{
1246	u64 mig = kvm->arch.migration_mode;
1247
1248	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1249		return -ENXIO;
1250
1251	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1252		return -EFAULT;
1253	return 0;
1254}
1255
1256static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1257
1258static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1259{
1260	struct kvm_s390_vm_tod_clock gtod;
1261
1262	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1263		return -EFAULT;
1264
1265	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1266		return -EINVAL;
1267	__kvm_s390_set_tod_clock(kvm, &gtod);
1268
1269	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1270		gtod.epoch_idx, gtod.tod);
1271
1272	return 0;
1273}
1274
1275static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1276{
1277	u8 gtod_high;
1278
1279	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1280					   sizeof(gtod_high)))
1281		return -EFAULT;
1282
1283	if (gtod_high != 0)
1284		return -EINVAL;
1285	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1286
1287	return 0;
1288}
1289
1290static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1291{
1292	struct kvm_s390_vm_tod_clock gtod = { 0 };
1293
1294	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1295			   sizeof(gtod.tod)))
1296		return -EFAULT;
1297
1298	__kvm_s390_set_tod_clock(kvm, &gtod);
1299	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1300	return 0;
1301}
1302
1303static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1304{
1305	int ret;
1306
1307	if (attr->flags)
1308		return -EINVAL;
1309
1310	mutex_lock(&kvm->lock);
1311	/*
1312	 * For protected guests, the TOD is managed by the ultravisor, so trying
1313	 * to change it will never bring the expected results.
1314	 */
1315	if (kvm_s390_pv_is_protected(kvm)) {
1316		ret = -EOPNOTSUPP;
1317		goto out_unlock;
1318	}
1319
1320	switch (attr->attr) {
1321	case KVM_S390_VM_TOD_EXT:
1322		ret = kvm_s390_set_tod_ext(kvm, attr);
1323		break;
1324	case KVM_S390_VM_TOD_HIGH:
1325		ret = kvm_s390_set_tod_high(kvm, attr);
1326		break;
1327	case KVM_S390_VM_TOD_LOW:
1328		ret = kvm_s390_set_tod_low(kvm, attr);
1329		break;
1330	default:
1331		ret = -ENXIO;
1332		break;
1333	}
1334
1335out_unlock:
1336	mutex_unlock(&kvm->lock);
1337	return ret;
1338}
1339
1340static void kvm_s390_get_tod_clock(struct kvm *kvm,
1341				   struct kvm_s390_vm_tod_clock *gtod)
1342{
1343	union tod_clock clk;
1344
1345	preempt_disable();
1346
1347	store_tod_clock_ext(&clk);
1348
1349	gtod->tod = clk.tod + kvm->arch.epoch;
1350	gtod->epoch_idx = 0;
1351	if (test_kvm_facility(kvm, 139)) {
1352		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1353		if (gtod->tod < clk.tod)
1354			gtod->epoch_idx += 1;
1355	}
1356
1357	preempt_enable();
1358}
1359
1360static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1361{
1362	struct kvm_s390_vm_tod_clock gtod;
1363
1364	memset(&gtod, 0, sizeof(gtod));
1365	kvm_s390_get_tod_clock(kvm, &gtod);
1366	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1367		return -EFAULT;
1368
1369	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1370		gtod.epoch_idx, gtod.tod);
1371	return 0;
1372}
1373
1374static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1375{
1376	u8 gtod_high = 0;
1377
1378	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1379					 sizeof(gtod_high)))
1380		return -EFAULT;
1381	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1382
1383	return 0;
1384}
1385
1386static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1387{
1388	u64 gtod;
1389
1390	gtod = kvm_s390_get_tod_clock_fast(kvm);
1391	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1392		return -EFAULT;
1393	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1394
1395	return 0;
1396}
1397
1398static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1399{
1400	int ret;
1401
1402	if (attr->flags)
1403		return -EINVAL;
1404
1405	switch (attr->attr) {
1406	case KVM_S390_VM_TOD_EXT:
1407		ret = kvm_s390_get_tod_ext(kvm, attr);
1408		break;
1409	case KVM_S390_VM_TOD_HIGH:
1410		ret = kvm_s390_get_tod_high(kvm, attr);
1411		break;
1412	case KVM_S390_VM_TOD_LOW:
1413		ret = kvm_s390_get_tod_low(kvm, attr);
1414		break;
1415	default:
1416		ret = -ENXIO;
1417		break;
1418	}
1419	return ret;
1420}
1421
1422static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1423{
1424	struct kvm_s390_vm_cpu_processor *proc;
1425	u16 lowest_ibc, unblocked_ibc;
1426	int ret = 0;
1427
1428	mutex_lock(&kvm->lock);
1429	if (kvm->created_vcpus) {
1430		ret = -EBUSY;
1431		goto out;
1432	}
1433	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1434	if (!proc) {
1435		ret = -ENOMEM;
1436		goto out;
1437	}
1438	if (!copy_from_user(proc, (void __user *)attr->addr,
1439			    sizeof(*proc))) {
1440		kvm->arch.model.cpuid = proc->cpuid;
1441		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1442		unblocked_ibc = sclp.ibc & 0xfff;
1443		if (lowest_ibc && proc->ibc) {
1444			if (proc->ibc > unblocked_ibc)
1445				kvm->arch.model.ibc = unblocked_ibc;
1446			else if (proc->ibc < lowest_ibc)
1447				kvm->arch.model.ibc = lowest_ibc;
1448			else
1449				kvm->arch.model.ibc = proc->ibc;
1450		}
1451		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1452		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1453		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454			 kvm->arch.model.ibc,
1455			 kvm->arch.model.cpuid);
1456		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457			 kvm->arch.model.fac_list[0],
1458			 kvm->arch.model.fac_list[1],
1459			 kvm->arch.model.fac_list[2]);
1460	} else
1461		ret = -EFAULT;
1462	kfree(proc);
1463out:
1464	mutex_unlock(&kvm->lock);
1465	return ret;
1466}
1467
1468static int kvm_s390_set_processor_feat(struct kvm *kvm,
1469				       struct kvm_device_attr *attr)
1470{
1471	struct kvm_s390_vm_cpu_feat data;
 
1472
1473	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1474		return -EFAULT;
1475	if (!bitmap_subset((unsigned long *) data.feat,
1476			   kvm_s390_available_cpu_feat,
1477			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1478		return -EINVAL;
1479
1480	mutex_lock(&kvm->lock);
1481	if (kvm->created_vcpus) {
1482		mutex_unlock(&kvm->lock);
1483		return -EBUSY;
 
1484	}
1485	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1486	mutex_unlock(&kvm->lock);
1487	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1488			 data.feat[0],
1489			 data.feat[1],
1490			 data.feat[2]);
1491	return 0;
1492}
1493
1494static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1495					  struct kvm_device_attr *attr)
1496{
1497	mutex_lock(&kvm->lock);
1498	if (kvm->created_vcpus) {
1499		mutex_unlock(&kvm->lock);
1500		return -EBUSY;
1501	}
1502
1503	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1504			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1505		mutex_unlock(&kvm->lock);
1506		return -EFAULT;
1507	}
1508	mutex_unlock(&kvm->lock);
1509
1510	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1511		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1512		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1513		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1514		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1515	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1516		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1517		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1518	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1519		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1520		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1521	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1522		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1523		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1524	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1525		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1526		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1527	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1528		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1529		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1530	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1531		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1532		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1533	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1534		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1535		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1536	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1537		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1538		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1539	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1540		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1541		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1542	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1543		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1544		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1545	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1546		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1547		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1548	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1549		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1550		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1551	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1552		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1553		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1554	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1555		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1556		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1557	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1559		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1560		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1561		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1562	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1563		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1564		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1565		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1566		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1567	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
1568		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1569		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1570
1571	return 0;
1572}
1573
1574#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK	\
1575(						\
1576	((struct kvm_s390_vm_cpu_uv_feat){	\
1577		.ap = 1,			\
1578		.ap_intr = 1,			\
1579	})					\
1580	.feat					\
1581)
1582
1583static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1584{
1585	struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
1586	unsigned long data, filter;
1587
1588	filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1589	if (get_user(data, &ptr->feat))
1590		return -EFAULT;
1591	if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
1592		return -EINVAL;
1593
1594	mutex_lock(&kvm->lock);
1595	if (kvm->created_vcpus) {
1596		mutex_unlock(&kvm->lock);
1597		return -EBUSY;
1598	}
1599	kvm->arch.model.uv_feat_guest.feat = data;
1600	mutex_unlock(&kvm->lock);
1601
1602	VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
1603
1604	return 0;
1605}
1606
1607static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1608{
1609	int ret = -ENXIO;
1610
1611	switch (attr->attr) {
1612	case KVM_S390_VM_CPU_PROCESSOR:
1613		ret = kvm_s390_set_processor(kvm, attr);
1614		break;
1615	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1616		ret = kvm_s390_set_processor_feat(kvm, attr);
1617		break;
1618	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1619		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1620		break;
1621	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1622		ret = kvm_s390_set_uv_feat(kvm, attr);
1623		break;
1624	}
1625	return ret;
1626}
1627
1628static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1629{
1630	struct kvm_s390_vm_cpu_processor *proc;
1631	int ret = 0;
1632
1633	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1634	if (!proc) {
1635		ret = -ENOMEM;
1636		goto out;
1637	}
1638	proc->cpuid = kvm->arch.model.cpuid;
1639	proc->ibc = kvm->arch.model.ibc;
1640	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1641	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1642	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1643		 kvm->arch.model.ibc,
1644		 kvm->arch.model.cpuid);
1645	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1646		 kvm->arch.model.fac_list[0],
1647		 kvm->arch.model.fac_list[1],
1648		 kvm->arch.model.fac_list[2]);
1649	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1650		ret = -EFAULT;
1651	kfree(proc);
1652out:
1653	return ret;
1654}
1655
1656static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1657{
1658	struct kvm_s390_vm_cpu_machine *mach;
1659	int ret = 0;
1660
1661	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1662	if (!mach) {
1663		ret = -ENOMEM;
1664		goto out;
1665	}
1666	get_cpu_id((struct cpuid *) &mach->cpuid);
1667	mach->ibc = sclp.ibc;
1668	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1669	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1670	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1671	       sizeof(stfle_fac_list));
1672	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1673		 kvm->arch.model.ibc,
1674		 kvm->arch.model.cpuid);
1675	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1676		 mach->fac_mask[0],
1677		 mach->fac_mask[1],
1678		 mach->fac_mask[2]);
1679	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1680		 mach->fac_list[0],
1681		 mach->fac_list[1],
1682		 mach->fac_list[2]);
1683	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1684		ret = -EFAULT;
1685	kfree(mach);
1686out:
1687	return ret;
1688}
1689
1690static int kvm_s390_get_processor_feat(struct kvm *kvm,
1691				       struct kvm_device_attr *attr)
1692{
1693	struct kvm_s390_vm_cpu_feat data;
1694
1695	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 
1696	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1697		return -EFAULT;
1698	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1699			 data.feat[0],
1700			 data.feat[1],
1701			 data.feat[2]);
1702	return 0;
1703}
1704
1705static int kvm_s390_get_machine_feat(struct kvm *kvm,
1706				     struct kvm_device_attr *attr)
1707{
1708	struct kvm_s390_vm_cpu_feat data;
1709
1710	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 
 
1711	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1712		return -EFAULT;
1713	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1714			 data.feat[0],
1715			 data.feat[1],
1716			 data.feat[2]);
1717	return 0;
1718}
1719
1720static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1721					  struct kvm_device_attr *attr)
1722{
1723	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1724	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1725		return -EFAULT;
1726
1727	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1728		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1729		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1730		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1731		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1732	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1733		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1734		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1735	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1736		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1737		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1738	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1739		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1740		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1741	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1742		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1743		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1744	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1745		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1746		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1747	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1748		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1749		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1750	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1751		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1752		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1753	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1754		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1755		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1756	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1757		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1758		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1759	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1760		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1761		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1762	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1763		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1764		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1765	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1766		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1767		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1768	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1769		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1770		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1771	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1772		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1773		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1774	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1775		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1776		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1777		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1778		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1779	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1780		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1781		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1782		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1783		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1784	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
1785		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1786		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1787
1788	return 0;
1789}
1790
1791static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1792					struct kvm_device_attr *attr)
1793{
1794	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1795	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1796		return -EFAULT;
1797
1798	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1799		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1800		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1801		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1802		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1803	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1804		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1805		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1806	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1807		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1808		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1809	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1810		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1811		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1812	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1813		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1814		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1815	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1816		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1817		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1818	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1819		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1820		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1821	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1822		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1823		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1824	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1825		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1826		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1827	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1828		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1829		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1830	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1831		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1832		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1833	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1834		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1835		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1836	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1837		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1838		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1839	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1840		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1841		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1842	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1843		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1844		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1845	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1846		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1847		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1848		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1849		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1850	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1851		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1852		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1853		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1854		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1855	VM_EVENT(kvm, 3, "GET: host  PFCR   subfunc 0x%16.16lx.%16.16lx",
1856		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1857		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1858
1859	return 0;
1860}
1861
1862static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1863{
1864	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1865	unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
1866
1867	if (put_user(feat, &dst->feat))
1868		return -EFAULT;
1869	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1870
1871	return 0;
1872}
1873
1874static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1875{
1876	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1877	unsigned long feat;
1878
1879	BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
1880
1881	feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1882	if (put_user(feat, &dst->feat))
1883		return -EFAULT;
1884	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1885
1886	return 0;
1887}
1888
1889static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1890{
1891	int ret = -ENXIO;
1892
1893	switch (attr->attr) {
1894	case KVM_S390_VM_CPU_PROCESSOR:
1895		ret = kvm_s390_get_processor(kvm, attr);
1896		break;
1897	case KVM_S390_VM_CPU_MACHINE:
1898		ret = kvm_s390_get_machine(kvm, attr);
1899		break;
1900	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1901		ret = kvm_s390_get_processor_feat(kvm, attr);
1902		break;
1903	case KVM_S390_VM_CPU_MACHINE_FEAT:
1904		ret = kvm_s390_get_machine_feat(kvm, attr);
1905		break;
1906	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1907		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1908		break;
1909	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1910		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1911		break;
1912	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1913		ret = kvm_s390_get_processor_uv_feat(kvm, attr);
1914		break;
1915	case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
1916		ret = kvm_s390_get_machine_uv_feat(kvm, attr);
1917		break;
1918	}
1919	return ret;
1920}
1921
1922/**
1923 * kvm_s390_update_topology_change_report - update CPU topology change report
1924 * @kvm: guest KVM description
1925 * @val: set or clear the MTCR bit
1926 *
1927 * Updates the Multiprocessor Topology-Change-Report bit to signal
1928 * the guest with a topology change.
1929 * This is only relevant if the topology facility is present.
1930 *
1931 * The SCA version, bsca or esca, doesn't matter as offset is the same.
1932 */
1933static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1934{
1935	union sca_utility new, old;
1936	struct bsca_block *sca;
1937
1938	read_lock(&kvm->arch.sca_lock);
1939	sca = kvm->arch.sca;
1940	old = READ_ONCE(sca->utility);
1941	do {
1942		new = old;
1943		new.mtcr = val;
1944	} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
1945	read_unlock(&kvm->arch.sca_lock);
1946}
1947
1948static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1949					       struct kvm_device_attr *attr)
1950{
1951	if (!test_kvm_facility(kvm, 11))
1952		return -ENXIO;
1953
1954	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1955	return 0;
1956}
1957
1958static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1959					       struct kvm_device_attr *attr)
1960{
1961	u8 topo;
1962
1963	if (!test_kvm_facility(kvm, 11))
1964		return -ENXIO;
1965
1966	read_lock(&kvm->arch.sca_lock);
1967	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1968	read_unlock(&kvm->arch.sca_lock);
1969
1970	return put_user(topo, (u8 __user *)attr->addr);
1971}
1972
1973static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1974{
1975	int ret;
1976
1977	switch (attr->group) {
1978	case KVM_S390_VM_MEM_CTRL:
1979		ret = kvm_s390_set_mem_control(kvm, attr);
1980		break;
1981	case KVM_S390_VM_TOD:
1982		ret = kvm_s390_set_tod(kvm, attr);
1983		break;
1984	case KVM_S390_VM_CPU_MODEL:
1985		ret = kvm_s390_set_cpu_model(kvm, attr);
1986		break;
1987	case KVM_S390_VM_CRYPTO:
1988		ret = kvm_s390_vm_set_crypto(kvm, attr);
1989		break;
1990	case KVM_S390_VM_MIGRATION:
1991		ret = kvm_s390_vm_set_migration(kvm, attr);
1992		break;
1993	case KVM_S390_VM_CPU_TOPOLOGY:
1994		ret = kvm_s390_set_topo_change_indication(kvm, attr);
1995		break;
1996	default:
1997		ret = -ENXIO;
1998		break;
1999	}
2000
2001	return ret;
2002}
2003
2004static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2005{
2006	int ret;
2007
2008	switch (attr->group) {
2009	case KVM_S390_VM_MEM_CTRL:
2010		ret = kvm_s390_get_mem_control(kvm, attr);
2011		break;
2012	case KVM_S390_VM_TOD:
2013		ret = kvm_s390_get_tod(kvm, attr);
2014		break;
2015	case KVM_S390_VM_CPU_MODEL:
2016		ret = kvm_s390_get_cpu_model(kvm, attr);
2017		break;
2018	case KVM_S390_VM_MIGRATION:
2019		ret = kvm_s390_vm_get_migration(kvm, attr);
2020		break;
2021	case KVM_S390_VM_CPU_TOPOLOGY:
2022		ret = kvm_s390_get_topo_change_indication(kvm, attr);
2023		break;
2024	default:
2025		ret = -ENXIO;
2026		break;
2027	}
2028
2029	return ret;
2030}
2031
2032static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2033{
2034	int ret;
2035
2036	switch (attr->group) {
2037	case KVM_S390_VM_MEM_CTRL:
2038		switch (attr->attr) {
2039		case KVM_S390_VM_MEM_ENABLE_CMMA:
2040		case KVM_S390_VM_MEM_CLR_CMMA:
2041			ret = sclp.has_cmma ? 0 : -ENXIO;
2042			break;
2043		case KVM_S390_VM_MEM_LIMIT_SIZE:
2044			ret = 0;
2045			break;
2046		default:
2047			ret = -ENXIO;
2048			break;
2049		}
2050		break;
2051	case KVM_S390_VM_TOD:
2052		switch (attr->attr) {
2053		case KVM_S390_VM_TOD_LOW:
2054		case KVM_S390_VM_TOD_HIGH:
2055			ret = 0;
2056			break;
2057		default:
2058			ret = -ENXIO;
2059			break;
2060		}
2061		break;
2062	case KVM_S390_VM_CPU_MODEL:
2063		switch (attr->attr) {
2064		case KVM_S390_VM_CPU_PROCESSOR:
2065		case KVM_S390_VM_CPU_MACHINE:
2066		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
2067		case KVM_S390_VM_CPU_MACHINE_FEAT:
2068		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
2069		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
2070		case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
2071		case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
2072			ret = 0;
2073			break;
 
 
2074		default:
2075			ret = -ENXIO;
2076			break;
2077		}
2078		break;
2079	case KVM_S390_VM_CRYPTO:
2080		switch (attr->attr) {
2081		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
2082		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
2083		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
2084		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
2085			ret = 0;
2086			break;
2087		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
2088		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
2089			ret = ap_instructions_available() ? 0 : -ENXIO;
2090			break;
2091		default:
2092			ret = -ENXIO;
2093			break;
2094		}
2095		break;
2096	case KVM_S390_VM_MIGRATION:
2097		ret = 0;
2098		break;
2099	case KVM_S390_VM_CPU_TOPOLOGY:
2100		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
2101		break;
2102	default:
2103		ret = -ENXIO;
2104		break;
2105	}
2106
2107	return ret;
2108}
2109
2110static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2111{
2112	uint8_t *keys;
2113	uint64_t hva;
2114	int srcu_idx, i, r = 0;
2115
2116	if (args->flags != 0)
2117		return -EINVAL;
2118
2119	/* Is this guest using storage keys? */
2120	if (!mm_uses_skeys(current->mm))
2121		return KVM_S390_GET_SKEYS_NONE;
2122
2123	/* Enforce sane limit on memory allocation */
2124	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2125		return -EINVAL;
2126
2127	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
 
 
 
2128	if (!keys)
2129		return -ENOMEM;
2130
2131	mmap_read_lock(current->mm);
2132	srcu_idx = srcu_read_lock(&kvm->srcu);
2133	for (i = 0; i < args->count; i++) {
2134		hva = gfn_to_hva(kvm, args->start_gfn + i);
2135		if (kvm_is_error_hva(hva)) {
2136			r = -EFAULT;
2137			break;
2138		}
2139
2140		r = get_guest_storage_key(current->mm, hva, &keys[i]);
2141		if (r)
2142			break;
2143	}
2144	srcu_read_unlock(&kvm->srcu, srcu_idx);
2145	mmap_read_unlock(current->mm);
2146
2147	if (!r) {
2148		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2149				 sizeof(uint8_t) * args->count);
2150		if (r)
2151			r = -EFAULT;
2152	}
2153
2154	kvfree(keys);
2155	return r;
2156}
2157
2158static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2159{
2160	uint8_t *keys;
2161	uint64_t hva;
2162	int srcu_idx, i, r = 0;
2163	bool unlocked;
2164
2165	if (args->flags != 0)
2166		return -EINVAL;
2167
2168	/* Enforce sane limit on memory allocation */
2169	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2170		return -EINVAL;
2171
2172	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
 
 
 
2173	if (!keys)
2174		return -ENOMEM;
2175
2176	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2177			   sizeof(uint8_t) * args->count);
2178	if (r) {
2179		r = -EFAULT;
2180		goto out;
2181	}
2182
2183	/* Enable storage key handling for the guest */
2184	r = s390_enable_skey();
2185	if (r)
2186		goto out;
2187
2188	i = 0;
2189	mmap_read_lock(current->mm);
2190	srcu_idx = srcu_read_lock(&kvm->srcu);
2191        while (i < args->count) {
2192		unlocked = false;
2193		hva = gfn_to_hva(kvm, args->start_gfn + i);
2194		if (kvm_is_error_hva(hva)) {
2195			r = -EFAULT;
2196			break;
2197		}
2198
2199		/* Lowest order bit is reserved */
2200		if (keys[i] & 0x01) {
2201			r = -EINVAL;
2202			break;
2203		}
2204
2205		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2206		if (r) {
2207			r = fixup_user_fault(current->mm, hva,
2208					     FAULT_FLAG_WRITE, &unlocked);
2209			if (r)
2210				break;
2211		}
2212		if (!r)
2213			i++;
2214	}
2215	srcu_read_unlock(&kvm->srcu, srcu_idx);
2216	mmap_read_unlock(current->mm);
2217out:
2218	kvfree(keys);
2219	return r;
2220}
2221
2222/*
2223 * Base address and length must be sent at the start of each block, therefore
2224 * it's cheaper to send some clean data, as long as it's less than the size of
2225 * two longs.
2226 */
2227#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2228/* for consistency */
2229#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2230
2231static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2232			      u8 *res, unsigned long bufsize)
2233{
2234	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2235
2236	args->count = 0;
2237	while (args->count < bufsize) {
2238		hva = gfn_to_hva(kvm, cur_gfn);
2239		/*
2240		 * We return an error if the first value was invalid, but we
2241		 * return successfully if at least one value was copied.
2242		 */
2243		if (kvm_is_error_hva(hva))
2244			return args->count ? 0 : -EFAULT;
2245		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2246			pgstev = 0;
2247		res[args->count++] = (pgstev >> 24) & 0x43;
2248		cur_gfn++;
2249	}
2250
2251	return 0;
2252}
2253
2254static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2255						     gfn_t gfn)
2256{
2257	return ____gfn_to_memslot(slots, gfn, true);
2258}
2259
2260static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2261					      unsigned long cur_gfn)
2262{
2263	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2264	unsigned long ofs = cur_gfn - ms->base_gfn;
2265	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2266
2267	if (ms->base_gfn + ms->npages <= cur_gfn) {
2268		mnode = rb_next(mnode);
2269		/* If we are above the highest slot, wrap around */
2270		if (!mnode)
2271			mnode = rb_first(&slots->gfn_tree);
2272
2273		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2274		ofs = 0;
2275	}
2276
2277	if (cur_gfn < ms->base_gfn)
2278		ofs = 0;
2279
2280	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2281	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2282		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2283		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2284	}
2285	return ms->base_gfn + ofs;
2286}
2287
2288static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2289			     u8 *res, unsigned long bufsize)
2290{
2291	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2292	struct kvm_memslots *slots = kvm_memslots(kvm);
2293	struct kvm_memory_slot *ms;
2294
2295	if (unlikely(kvm_memslots_empty(slots)))
2296		return 0;
2297
2298	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2299	ms = gfn_to_memslot(kvm, cur_gfn);
2300	args->count = 0;
2301	args->start_gfn = cur_gfn;
2302	if (!ms)
2303		return 0;
2304	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2305	mem_end = kvm_s390_get_gfn_end(slots);
2306
2307	while (args->count < bufsize) {
2308		hva = gfn_to_hva(kvm, cur_gfn);
2309		if (kvm_is_error_hva(hva))
2310			return 0;
2311		/* Decrement only if we actually flipped the bit to 0 */
2312		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2313			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2314		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2315			pgstev = 0;
2316		/* Save the value */
2317		res[args->count++] = (pgstev >> 24) & 0x43;
2318		/* If the next bit is too far away, stop. */
2319		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2320			return 0;
2321		/* If we reached the previous "next", find the next one */
2322		if (cur_gfn == next_gfn)
2323			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2324		/* Reached the end of memory or of the buffer, stop */
2325		if ((next_gfn >= mem_end) ||
2326		    (next_gfn - args->start_gfn >= bufsize))
2327			return 0;
2328		cur_gfn++;
2329		/* Reached the end of the current memslot, take the next one. */
2330		if (cur_gfn - ms->base_gfn >= ms->npages) {
2331			ms = gfn_to_memslot(kvm, cur_gfn);
2332			if (!ms)
2333				return 0;
2334		}
2335	}
2336	return 0;
2337}
2338
2339/*
2340 * This function searches for the next page with dirty CMMA attributes, and
2341 * saves the attributes in the buffer up to either the end of the buffer or
2342 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2343 * no trailing clean bytes are saved.
2344 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2345 * output buffer will indicate 0 as length.
2346 */
2347static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2348				  struct kvm_s390_cmma_log *args)
2349{
2350	unsigned long bufsize;
2351	int srcu_idx, peek, ret;
2352	u8 *values;
2353
2354	if (!kvm->arch.use_cmma)
2355		return -ENXIO;
2356	/* Invalid/unsupported flags were specified */
2357	if (args->flags & ~KVM_S390_CMMA_PEEK)
2358		return -EINVAL;
2359	/* Migration mode query, and we are not doing a migration */
2360	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2361	if (!peek && !kvm->arch.migration_mode)
2362		return -EINVAL;
2363	/* CMMA is disabled or was not used, or the buffer has length zero */
2364	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2365	if (!bufsize || !kvm->mm->context.uses_cmm) {
2366		memset(args, 0, sizeof(*args));
2367		return 0;
2368	}
2369	/* We are not peeking, and there are no dirty pages */
2370	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2371		memset(args, 0, sizeof(*args));
2372		return 0;
2373	}
2374
2375	values = vmalloc(bufsize);
2376	if (!values)
2377		return -ENOMEM;
2378
2379	mmap_read_lock(kvm->mm);
2380	srcu_idx = srcu_read_lock(&kvm->srcu);
2381	if (peek)
2382		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2383	else
2384		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2385	srcu_read_unlock(&kvm->srcu, srcu_idx);
2386	mmap_read_unlock(kvm->mm);
2387
2388	if (kvm->arch.migration_mode)
2389		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2390	else
2391		args->remaining = 0;
2392
2393	if (copy_to_user((void __user *)args->values, values, args->count))
2394		ret = -EFAULT;
2395
2396	vfree(values);
2397	return ret;
2398}
2399
2400/*
2401 * This function sets the CMMA attributes for the given pages. If the input
2402 * buffer has zero length, no action is taken, otherwise the attributes are
2403 * set and the mm->context.uses_cmm flag is set.
2404 */
2405static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2406				  const struct kvm_s390_cmma_log *args)
2407{
2408	unsigned long hva, mask, pgstev, i;
2409	uint8_t *bits;
2410	int srcu_idx, r = 0;
2411
2412	mask = args->mask;
2413
2414	if (!kvm->arch.use_cmma)
2415		return -ENXIO;
2416	/* invalid/unsupported flags */
2417	if (args->flags != 0)
2418		return -EINVAL;
2419	/* Enforce sane limit on memory allocation */
2420	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2421		return -EINVAL;
2422	/* Nothing to do */
2423	if (args->count == 0)
2424		return 0;
2425
2426	bits = vmalloc(array_size(sizeof(*bits), args->count));
2427	if (!bits)
2428		return -ENOMEM;
2429
2430	r = copy_from_user(bits, (void __user *)args->values, args->count);
2431	if (r) {
2432		r = -EFAULT;
2433		goto out;
2434	}
2435
2436	mmap_read_lock(kvm->mm);
2437	srcu_idx = srcu_read_lock(&kvm->srcu);
2438	for (i = 0; i < args->count; i++) {
2439		hva = gfn_to_hva(kvm, args->start_gfn + i);
2440		if (kvm_is_error_hva(hva)) {
2441			r = -EFAULT;
2442			break;
2443		}
2444
2445		pgstev = bits[i];
2446		pgstev = pgstev << 24;
2447		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2448		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2449	}
2450	srcu_read_unlock(&kvm->srcu, srcu_idx);
2451	mmap_read_unlock(kvm->mm);
2452
2453	if (!kvm->mm->context.uses_cmm) {
2454		mmap_write_lock(kvm->mm);
2455		kvm->mm->context.uses_cmm = 1;
2456		mmap_write_unlock(kvm->mm);
2457	}
2458out:
2459	vfree(bits);
2460	return r;
2461}
2462
2463/**
2464 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2465 * non protected.
2466 * @kvm: the VM whose protected vCPUs are to be converted
2467 * @rc: return value for the RC field of the UVC (in case of error)
2468 * @rrc: return value for the RRC field of the UVC (in case of error)
2469 *
2470 * Does not stop in case of error, tries to convert as many
2471 * CPUs as possible. In case of error, the RC and RRC of the last error are
2472 * returned.
2473 *
2474 * Return: 0 in case of success, otherwise -EIO
2475 */
2476int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2477{
2478	struct kvm_vcpu *vcpu;
2479	unsigned long i;
2480	u16 _rc, _rrc;
2481	int ret = 0;
2482
2483	/*
2484	 * We ignore failures and try to destroy as many CPUs as possible.
2485	 * At the same time we must not free the assigned resources when
2486	 * this fails, as the ultravisor has still access to that memory.
2487	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2488	 * behind.
2489	 * We want to return the first failure rc and rrc, though.
2490	 */
2491	kvm_for_each_vcpu(i, vcpu, kvm) {
2492		mutex_lock(&vcpu->mutex);
2493		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2494			*rc = _rc;
2495			*rrc = _rrc;
2496			ret = -EIO;
2497		}
2498		mutex_unlock(&vcpu->mutex);
2499	}
2500	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2501	if (use_gisa)
2502		kvm_s390_gisa_enable(kvm);
2503	return ret;
2504}
2505
2506/**
2507 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2508 * to protected.
2509 * @kvm: the VM whose protected vCPUs are to be converted
2510 * @rc: return value for the RC field of the UVC (in case of error)
2511 * @rrc: return value for the RRC field of the UVC (in case of error)
2512 *
2513 * Tries to undo the conversion in case of error.
2514 *
2515 * Return: 0 in case of success, otherwise -EIO
2516 */
2517static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2518{
2519	unsigned long i;
2520	int r = 0;
2521	u16 dummy;
2522
2523	struct kvm_vcpu *vcpu;
2524
2525	/* Disable the GISA if the ultravisor does not support AIV. */
2526	if (!uv_has_feature(BIT_UV_FEAT_AIV))
2527		kvm_s390_gisa_disable(kvm);
2528
2529	kvm_for_each_vcpu(i, vcpu, kvm) {
2530		mutex_lock(&vcpu->mutex);
2531		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2532		mutex_unlock(&vcpu->mutex);
2533		if (r)
2534			break;
2535	}
2536	if (r)
2537		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2538	return r;
2539}
2540
2541/*
2542 * Here we provide user space with a direct interface to query UV
2543 * related data like UV maxima and available features as well as
2544 * feature specific data.
2545 *
2546 * To facilitate future extension of the data structures we'll try to
2547 * write data up to the maximum requested length.
2548 */
2549static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2550{
2551	ssize_t len_min;
2552
2553	switch (info->header.id) {
2554	case KVM_PV_INFO_VM: {
2555		len_min =  sizeof(info->header) + sizeof(info->vm);
2556
2557		if (info->header.len_max < len_min)
2558			return -EINVAL;
2559
2560		memcpy(info->vm.inst_calls_list,
2561		       uv_info.inst_calls_list,
2562		       sizeof(uv_info.inst_calls_list));
2563
2564		/* It's max cpuid not max cpus, so it's off by one */
2565		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2566		info->vm.max_guests = uv_info.max_num_sec_conf;
2567		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2568		info->vm.feature_indication = uv_info.uv_feature_indications;
2569
2570		return len_min;
2571	}
2572	case KVM_PV_INFO_DUMP: {
2573		len_min =  sizeof(info->header) + sizeof(info->dump);
2574
2575		if (info->header.len_max < len_min)
2576			return -EINVAL;
2577
2578		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2579		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2580		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2581		return len_min;
2582	}
2583	default:
2584		return -EINVAL;
2585	}
2586}
2587
2588static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2589			   struct kvm_s390_pv_dmp dmp)
2590{
2591	int r = -EINVAL;
2592	void __user *result_buff = (void __user *)dmp.buff_addr;
2593
2594	switch (dmp.subcmd) {
2595	case KVM_PV_DUMP_INIT: {
2596		if (kvm->arch.pv.dumping)
2597			break;
2598
2599		/*
2600		 * Block SIE entry as concurrent dump UVCs could lead
2601		 * to validities.
2602		 */
2603		kvm_s390_vcpu_block_all(kvm);
2604
2605		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2606				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2607		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2608			     cmd->rc, cmd->rrc);
2609		if (!r) {
2610			kvm->arch.pv.dumping = true;
2611		} else {
2612			kvm_s390_vcpu_unblock_all(kvm);
2613			r = -EINVAL;
2614		}
2615		break;
2616	}
2617	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2618		if (!kvm->arch.pv.dumping)
2619			break;
2620
2621		/*
2622		 * gaddr is an output parameter since we might stop
2623		 * early. As dmp will be copied back in our caller, we
2624		 * don't need to do it ourselves.
2625		 */
2626		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2627						&cmd->rc, &cmd->rrc);
2628		break;
2629	}
2630	case KVM_PV_DUMP_COMPLETE: {
2631		if (!kvm->arch.pv.dumping)
2632			break;
2633
2634		r = -EINVAL;
2635		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2636			break;
2637
2638		r = kvm_s390_pv_dump_complete(kvm, result_buff,
2639					      &cmd->rc, &cmd->rrc);
2640		break;
2641	}
2642	default:
2643		r = -ENOTTY;
2644		break;
2645	}
2646
2647	return r;
2648}
2649
2650static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2651{
2652	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2653	void __user *argp = (void __user *)cmd->data;
2654	int r = 0;
2655	u16 dummy;
2656
2657	if (need_lock)
2658		mutex_lock(&kvm->lock);
2659
2660	switch (cmd->cmd) {
2661	case KVM_PV_ENABLE: {
2662		r = -EINVAL;
2663		if (kvm_s390_pv_is_protected(kvm))
2664			break;
2665
2666		/*
2667		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2668		 *  esca, we need no cleanup in the error cases below
2669		 */
2670		r = sca_switch_to_extended(kvm);
2671		if (r)
2672			break;
2673
2674		r = s390_disable_cow_sharing();
2675		if (r)
2676			break;
2677
2678		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2679		if (r)
2680			break;
2681
2682		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2683		if (r)
2684			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2685
2686		/* we need to block service interrupts from now on */
2687		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2688		break;
2689	}
2690	case KVM_PV_ASYNC_CLEANUP_PREPARE:
2691		r = -EINVAL;
2692		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2693			break;
2694
2695		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2696		/*
2697		 * If a CPU could not be destroyed, destroy VM will also fail.
2698		 * There is no point in trying to destroy it. Instead return
2699		 * the rc and rrc from the first CPU that failed destroying.
2700		 */
2701		if (r)
2702			break;
2703		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2704
2705		/* no need to block service interrupts any more */
2706		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2707		break;
2708	case KVM_PV_ASYNC_CLEANUP_PERFORM:
2709		r = -EINVAL;
2710		if (!async_destroy)
2711			break;
2712		/* kvm->lock must not be held; this is asserted inside the function. */
2713		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2714		break;
2715	case KVM_PV_DISABLE: {
2716		r = -EINVAL;
2717		if (!kvm_s390_pv_is_protected(kvm))
2718			break;
2719
2720		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2721		/*
2722		 * If a CPU could not be destroyed, destroy VM will also fail.
2723		 * There is no point in trying to destroy it. Instead return
2724		 * the rc and rrc from the first CPU that failed destroying.
2725		 */
2726		if (r)
2727			break;
2728		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2729
2730		/* no need to block service interrupts any more */
2731		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2732		break;
2733	}
2734	case KVM_PV_SET_SEC_PARMS: {
2735		struct kvm_s390_pv_sec_parm parms = {};
2736		void *hdr;
2737
2738		r = -EINVAL;
2739		if (!kvm_s390_pv_is_protected(kvm))
2740			break;
2741
2742		r = -EFAULT;
2743		if (copy_from_user(&parms, argp, sizeof(parms)))
2744			break;
2745
2746		/* Currently restricted to 8KB */
2747		r = -EINVAL;
2748		if (parms.length > PAGE_SIZE * 2)
2749			break;
2750
2751		r = -ENOMEM;
2752		hdr = vmalloc(parms.length);
2753		if (!hdr)
2754			break;
2755
2756		r = -EFAULT;
2757		if (!copy_from_user(hdr, (void __user *)parms.origin,
2758				    parms.length))
2759			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2760						      &cmd->rc, &cmd->rrc);
2761
2762		vfree(hdr);
2763		break;
2764	}
2765	case KVM_PV_UNPACK: {
2766		struct kvm_s390_pv_unp unp = {};
2767
2768		r = -EINVAL;
2769		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2770			break;
2771
2772		r = -EFAULT;
2773		if (copy_from_user(&unp, argp, sizeof(unp)))
2774			break;
2775
2776		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2777				       &cmd->rc, &cmd->rrc);
2778		break;
2779	}
2780	case KVM_PV_VERIFY: {
2781		r = -EINVAL;
2782		if (!kvm_s390_pv_is_protected(kvm))
2783			break;
2784
2785		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2786				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2787		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2788			     cmd->rrc);
2789		break;
2790	}
2791	case KVM_PV_PREP_RESET: {
2792		r = -EINVAL;
2793		if (!kvm_s390_pv_is_protected(kvm))
2794			break;
2795
2796		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2797				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2798		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2799			     cmd->rc, cmd->rrc);
2800		break;
2801	}
2802	case KVM_PV_UNSHARE_ALL: {
2803		r = -EINVAL;
2804		if (!kvm_s390_pv_is_protected(kvm))
2805			break;
2806
2807		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2808				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2809		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2810			     cmd->rc, cmd->rrc);
2811		break;
2812	}
2813	case KVM_PV_INFO: {
2814		struct kvm_s390_pv_info info = {};
2815		ssize_t data_len;
2816
2817		/*
2818		 * No need to check the VM protection here.
2819		 *
2820		 * Maybe user space wants to query some of the data
2821		 * when the VM is still unprotected. If we see the
2822		 * need to fence a new data command we can still
2823		 * return an error in the info handler.
2824		 */
2825
2826		r = -EFAULT;
2827		if (copy_from_user(&info, argp, sizeof(info.header)))
2828			break;
2829
2830		r = -EINVAL;
2831		if (info.header.len_max < sizeof(info.header))
2832			break;
2833
2834		data_len = kvm_s390_handle_pv_info(&info);
2835		if (data_len < 0) {
2836			r = data_len;
2837			break;
2838		}
2839		/*
2840		 * If a data command struct is extended (multiple
2841		 * times) this can be used to determine how much of it
2842		 * is valid.
2843		 */
2844		info.header.len_written = data_len;
2845
2846		r = -EFAULT;
2847		if (copy_to_user(argp, &info, data_len))
2848			break;
2849
2850		r = 0;
2851		break;
2852	}
2853	case KVM_PV_DUMP: {
2854		struct kvm_s390_pv_dmp dmp;
2855
2856		r = -EINVAL;
2857		if (!kvm_s390_pv_is_protected(kvm))
2858			break;
2859
2860		r = -EFAULT;
2861		if (copy_from_user(&dmp, argp, sizeof(dmp)))
2862			break;
2863
2864		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2865		if (r)
2866			break;
2867
2868		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2869			r = -EFAULT;
2870			break;
2871		}
2872
2873		break;
2874	}
2875	default:
2876		r = -ENOTTY;
2877	}
2878	if (need_lock)
2879		mutex_unlock(&kvm->lock);
2880
2881	return r;
2882}
2883
2884static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
2885{
2886	if (mop->flags & ~supported_flags || !mop->size)
2887		return -EINVAL;
2888	if (mop->size > MEM_OP_MAX_SIZE)
2889		return -E2BIG;
2890	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2891		if (mop->key > 0xf)
2892			return -EINVAL;
2893	} else {
2894		mop->key = 0;
2895	}
2896	return 0;
2897}
2898
2899static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2900{
2901	void __user *uaddr = (void __user *)mop->buf;
2902	enum gacc_mode acc_mode;
2903	void *tmpbuf = NULL;
2904	int r, srcu_idx;
2905
2906	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
2907					KVM_S390_MEMOP_F_CHECK_ONLY);
2908	if (r)
2909		return r;
2910
2911	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2912		tmpbuf = vmalloc(mop->size);
2913		if (!tmpbuf)
2914			return -ENOMEM;
2915	}
2916
2917	srcu_idx = srcu_read_lock(&kvm->srcu);
2918
2919	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2920		r = PGM_ADDRESSING;
2921		goto out_unlock;
2922	}
2923
2924	acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
2925	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2926		r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
2927		goto out_unlock;
2928	}
2929	if (acc_mode == GACC_FETCH) {
2930		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2931					      mop->size, GACC_FETCH, mop->key);
2932		if (r)
2933			goto out_unlock;
2934		if (copy_to_user(uaddr, tmpbuf, mop->size))
2935			r = -EFAULT;
2936	} else {
2937		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2938			r = -EFAULT;
2939			goto out_unlock;
2940		}
2941		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2942					      mop->size, GACC_STORE, mop->key);
2943	}
2944
2945out_unlock:
2946	srcu_read_unlock(&kvm->srcu, srcu_idx);
2947
2948	vfree(tmpbuf);
2949	return r;
2950}
2951
2952static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2953{
2954	void __user *uaddr = (void __user *)mop->buf;
2955	void __user *old_addr = (void __user *)mop->old_addr;
2956	union {
2957		__uint128_t quad;
2958		char raw[sizeof(__uint128_t)];
2959	} old = { .quad = 0}, new = { .quad = 0 };
2960	unsigned int off_in_quad = sizeof(new) - mop->size;
2961	int r, srcu_idx;
2962	bool success;
2963
2964	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
2965	if (r)
2966		return r;
2967	/*
2968	 * This validates off_in_quad. Checking that size is a power
2969	 * of two is not necessary, as cmpxchg_guest_abs_with_key
2970	 * takes care of that
2971	 */
2972	if (mop->size > sizeof(new))
2973		return -EINVAL;
2974	if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
2975		return -EFAULT;
2976	if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
2977		return -EFAULT;
2978
2979	srcu_idx = srcu_read_lock(&kvm->srcu);
2980
2981	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2982		r = PGM_ADDRESSING;
2983		goto out_unlock;
2984	}
2985
2986	r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
2987				       new.quad, mop->key, &success);
2988	if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
2989		r = -EFAULT;
2990
2991out_unlock:
2992	srcu_read_unlock(&kvm->srcu, srcu_idx);
2993	return r;
2994}
2995
2996static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2997{
2998	/*
2999	 * This is technically a heuristic only, if the kvm->lock is not
3000	 * taken, it is not guaranteed that the vm is/remains non-protected.
3001	 * This is ok from a kernel perspective, wrongdoing is detected
3002	 * on the access, -EFAULT is returned and the vm may crash the
3003	 * next time it accesses the memory in question.
3004	 * There is no sane usecase to do switching and a memop on two
3005	 * different CPUs at the same time.
3006	 */
3007	if (kvm_s390_pv_get_handle(kvm))
3008		return -EINVAL;
3009
3010	switch (mop->op) {
3011	case KVM_S390_MEMOP_ABSOLUTE_READ:
3012	case KVM_S390_MEMOP_ABSOLUTE_WRITE:
3013		return kvm_s390_vm_mem_op_abs(kvm, mop);
3014	case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
3015		return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
3016	default:
3017		return -EINVAL;
3018	}
3019}
3020
3021int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
3022{
3023	struct kvm *kvm = filp->private_data;
3024	void __user *argp = (void __user *)arg;
3025	struct kvm_device_attr attr;
3026	int r;
3027
3028	switch (ioctl) {
3029	case KVM_S390_INTERRUPT: {
3030		struct kvm_s390_interrupt s390int;
3031
3032		r = -EFAULT;
3033		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3034			break;
3035		r = kvm_s390_inject_vm(kvm, &s390int);
3036		break;
3037	}
 
 
 
 
 
 
 
 
3038	case KVM_CREATE_IRQCHIP: {
 
 
3039		r = -EINVAL;
3040		if (kvm->arch.use_irqchip)
3041			r = 0;
 
 
 
3042		break;
3043	}
3044	case KVM_SET_DEVICE_ATTR: {
3045		r = -EFAULT;
3046		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3047			break;
3048		r = kvm_s390_vm_set_attr(kvm, &attr);
3049		break;
3050	}
3051	case KVM_GET_DEVICE_ATTR: {
3052		r = -EFAULT;
3053		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3054			break;
3055		r = kvm_s390_vm_get_attr(kvm, &attr);
3056		break;
3057	}
3058	case KVM_HAS_DEVICE_ATTR: {
3059		r = -EFAULT;
3060		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3061			break;
3062		r = kvm_s390_vm_has_attr(kvm, &attr);
3063		break;
3064	}
3065	case KVM_S390_GET_SKEYS: {
3066		struct kvm_s390_skeys args;
3067
3068		r = -EFAULT;
3069		if (copy_from_user(&args, argp,
3070				   sizeof(struct kvm_s390_skeys)))
3071			break;
3072		r = kvm_s390_get_skeys(kvm, &args);
3073		break;
3074	}
3075	case KVM_S390_SET_SKEYS: {
3076		struct kvm_s390_skeys args;
3077
3078		r = -EFAULT;
3079		if (copy_from_user(&args, argp,
3080				   sizeof(struct kvm_s390_skeys)))
3081			break;
3082		r = kvm_s390_set_skeys(kvm, &args);
3083		break;
3084	}
3085	case KVM_S390_GET_CMMA_BITS: {
3086		struct kvm_s390_cmma_log args;
3087
3088		r = -EFAULT;
3089		if (copy_from_user(&args, argp, sizeof(args)))
3090			break;
3091		mutex_lock(&kvm->slots_lock);
3092		r = kvm_s390_get_cmma_bits(kvm, &args);
3093		mutex_unlock(&kvm->slots_lock);
3094		if (!r) {
3095			r = copy_to_user(argp, &args, sizeof(args));
3096			if (r)
3097				r = -EFAULT;
3098		}
3099		break;
3100	}
3101	case KVM_S390_SET_CMMA_BITS: {
3102		struct kvm_s390_cmma_log args;
3103
3104		r = -EFAULT;
3105		if (copy_from_user(&args, argp, sizeof(args)))
3106			break;
3107		mutex_lock(&kvm->slots_lock);
3108		r = kvm_s390_set_cmma_bits(kvm, &args);
3109		mutex_unlock(&kvm->slots_lock);
3110		break;
3111	}
3112	case KVM_S390_PV_COMMAND: {
3113		struct kvm_pv_cmd args;
3114
3115		/* protvirt means user cpu state */
3116		kvm_s390_set_user_cpu_state_ctrl(kvm);
3117		r = 0;
3118		if (!is_prot_virt_host()) {
3119			r = -EINVAL;
3120			break;
3121		}
3122		if (copy_from_user(&args, argp, sizeof(args))) {
3123			r = -EFAULT;
3124			break;
3125		}
3126		if (args.flags) {
3127			r = -EINVAL;
3128			break;
3129		}
3130		/* must be called without kvm->lock */
3131		r = kvm_s390_handle_pv(kvm, &args);
3132		if (copy_to_user(argp, &args, sizeof(args))) {
3133			r = -EFAULT;
3134			break;
3135		}
3136		break;
3137	}
3138	case KVM_S390_MEM_OP: {
3139		struct kvm_s390_mem_op mem_op;
3140
3141		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3142			r = kvm_s390_vm_mem_op(kvm, &mem_op);
3143		else
3144			r = -EFAULT;
3145		break;
3146	}
3147	case KVM_S390_ZPCI_OP: {
3148		struct kvm_s390_zpci_op args;
 
 
 
 
 
3149
3150		r = -EINVAL;
3151		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3152			break;
3153		if (copy_from_user(&args, argp, sizeof(args))) {
3154			r = -EFAULT;
3155			break;
3156		}
3157		r = kvm_s390_pci_zpci_op(kvm, &args);
3158		break;
3159	}
3160	default:
3161		r = -ENOTTY;
3162	}
3163
3164	return r;
3165}
3166
3167static int kvm_s390_apxa_installed(void)
3168{
3169	struct ap_config_info info;
 
 
 
 
3170
3171	if (ap_instructions_available()) {
3172		if (ap_qci(&info) == 0)
3173			return info.apxa;
 
3174	}
3175
3176	return 0;
3177}
3178
3179/*
3180 * The format of the crypto control block (CRYCB) is specified in the 3 low
3181 * order bits of the CRYCB designation (CRYCBD) field as follows:
3182 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3183 *	     AP extended addressing (APXA) facility are installed.
3184 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3185 * Format 2: Both the APXA and MSAX3 facilities are installed
3186 */
3187static void kvm_s390_set_crycb_format(struct kvm *kvm)
3188{
3189	kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
3190
3191	/* Clear the CRYCB format bits - i.e., set format 0 by default */
3192	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3193
3194	/* Check whether MSAX3 is installed */
3195	if (!test_kvm_facility(kvm, 76))
3196		return;
3197
3198	if (kvm_s390_apxa_installed())
3199		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3200	else
3201		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3202}
3203
3204/*
3205 * kvm_arch_crypto_set_masks
3206 *
3207 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3208 *	 to be set.
3209 * @apm: the mask identifying the accessible AP adapters
3210 * @aqm: the mask identifying the accessible AP domains
3211 * @adm: the mask identifying the accessible AP control domains
3212 *
3213 * Set the masks that identify the adapters, domains and control domains to
3214 * which the KVM guest is granted access.
3215 *
3216 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3217 *	 function.
3218 */
3219void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3220			       unsigned long *aqm, unsigned long *adm)
3221{
3222	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3223
3224	kvm_s390_vcpu_block_all(kvm);
3225
3226	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3227	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3228		memcpy(crycb->apcb1.apm, apm, 32);
3229		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3230			 apm[0], apm[1], apm[2], apm[3]);
3231		memcpy(crycb->apcb1.aqm, aqm, 32);
3232		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3233			 aqm[0], aqm[1], aqm[2], aqm[3]);
3234		memcpy(crycb->apcb1.adm, adm, 32);
3235		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3236			 adm[0], adm[1], adm[2], adm[3]);
3237		break;
3238	case CRYCB_FORMAT1:
3239	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3240		memcpy(crycb->apcb0.apm, apm, 8);
3241		memcpy(crycb->apcb0.aqm, aqm, 2);
3242		memcpy(crycb->apcb0.adm, adm, 2);
3243		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3244			 apm[0], *((unsigned short *)aqm),
3245			 *((unsigned short *)adm));
3246		break;
3247	default:	/* Can not happen */
3248		break;
3249	}
3250
3251	/* recreate the shadow crycb for each vcpu */
3252	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3253	kvm_s390_vcpu_unblock_all(kvm);
3254}
3255EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3256
3257/*
3258 * kvm_arch_crypto_clear_masks
3259 *
3260 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3261 *	 to be cleared.
3262 *
3263 * Clear the masks that identify the adapters, domains and control domains to
3264 * which the KVM guest is granted access.
3265 *
3266 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3267 *	 function.
3268 */
3269void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3270{
3271	kvm_s390_vcpu_block_all(kvm);
3272
3273	memset(&kvm->arch.crypto.crycb->apcb0, 0,
3274	       sizeof(kvm->arch.crypto.crycb->apcb0));
3275	memset(&kvm->arch.crypto.crycb->apcb1, 0,
3276	       sizeof(kvm->arch.crypto.crycb->apcb1));
3277
3278	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3279	/* recreate the shadow crycb for each vcpu */
3280	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3281	kvm_s390_vcpu_unblock_all(kvm);
3282}
3283EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3284
3285static u64 kvm_s390_get_initial_cpuid(void)
3286{
3287	struct cpuid cpuid;
3288
3289	get_cpu_id(&cpuid);
3290	cpuid.version = 0xff;
3291	return *((u64 *) &cpuid);
3292}
3293
3294static void kvm_s390_crypto_init(struct kvm *kvm)
3295{
 
 
 
3296	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3297	kvm_s390_set_crycb_format(kvm);
3298	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3299
3300	if (!test_kvm_facility(kvm, 76))
3301		return;
3302
3303	/* Enable AES/DEA protected key functions by default */
3304	kvm->arch.crypto.aes_kw = 1;
3305	kvm->arch.crypto.dea_kw = 1;
3306	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3307			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3308	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3309			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3310}
3311
3312static void sca_dispose(struct kvm *kvm)
3313{
3314	if (kvm->arch.use_esca)
3315		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3316	else
3317		free_page((unsigned long)(kvm->arch.sca));
3318	kvm->arch.sca = NULL;
3319}
3320
3321void kvm_arch_free_vm(struct kvm *kvm)
3322{
3323	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3324		kvm_s390_pci_clear_list(kvm);
3325
3326	__kvm_arch_free_vm(kvm);
3327}
3328
3329int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3330{
3331	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3332	int i, rc;
3333	char debug_name[16];
3334	static unsigned long sca_offset;
3335
3336	rc = -EINVAL;
3337#ifdef CONFIG_KVM_S390_UCONTROL
3338	if (type & ~KVM_VM_S390_UCONTROL)
3339		goto out_err;
3340	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3341		goto out_err;
3342#else
3343	if (type)
3344		goto out_err;
3345#endif
3346
3347	rc = s390_enable_sie();
3348	if (rc)
3349		goto out_err;
3350
3351	rc = -ENOMEM;
3352
 
 
 
3353	if (!sclp.has_64bscao)
3354		alloc_flags |= GFP_DMA;
3355	rwlock_init(&kvm->arch.sca_lock);
3356	/* start with basic SCA */
3357	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3358	if (!kvm->arch.sca)
3359		goto out_err;
3360	mutex_lock(&kvm_lock);
3361	sca_offset += 16;
3362	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3363		sca_offset = 0;
3364	kvm->arch.sca = (struct bsca_block *)
3365			((char *) kvm->arch.sca + sca_offset);
3366	mutex_unlock(&kvm_lock);
3367
3368	sprintf(debug_name, "kvm-%u", current->pid);
3369
3370	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3371	if (!kvm->arch.dbf)
3372		goto out_err;
3373
3374	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3375	kvm->arch.sie_page2 =
3376	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3377	if (!kvm->arch.sie_page2)
3378		goto out_err;
3379
3380	kvm->arch.sie_page2->kvm = kvm;
 
 
 
 
 
 
 
 
 
 
3381	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
 
 
3382
3383	for (i = 0; i < kvm_s390_fac_size(); i++) {
3384		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3385					      (kvm_s390_fac_base[i] |
3386					       kvm_s390_fac_ext[i]);
3387		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3388					      kvm_s390_fac_base[i];
3389	}
3390	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3391
3392	/* we are always in czam mode - even on pre z14 machines */
3393	set_kvm_facility(kvm->arch.model.fac_mask, 138);
3394	set_kvm_facility(kvm->arch.model.fac_list, 138);
3395	/* we emulate STHYI in kvm */
3396	set_kvm_facility(kvm->arch.model.fac_mask, 74);
3397	set_kvm_facility(kvm->arch.model.fac_list, 74);
3398	if (MACHINE_HAS_TLB_GUEST) {
3399		set_kvm_facility(kvm->arch.model.fac_mask, 147);
3400		set_kvm_facility(kvm->arch.model.fac_list, 147);
3401	}
3402
3403	if (css_general_characteristics.aiv && test_facility(65))
3404		set_kvm_facility(kvm->arch.model.fac_mask, 65);
3405
3406	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3407	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3408
3409	kvm->arch.model.uv_feat_guest.feat = 0;
3410
3411	kvm_s390_crypto_init(kvm);
3412
3413	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3414		mutex_lock(&kvm->lock);
3415		kvm_s390_pci_init_list(kvm);
3416		kvm_s390_vcpu_pci_enable_interp(kvm);
3417		mutex_unlock(&kvm->lock);
3418	}
3419
3420	mutex_init(&kvm->arch.float_int.ais_lock);
3421	spin_lock_init(&kvm->arch.float_int.lock);
3422	for (i = 0; i < FIRQ_LIST_COUNT; i++)
3423		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3424	init_waitqueue_head(&kvm->arch.ipte_wq);
3425	mutex_init(&kvm->arch.ipte_mutex);
3426
3427	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3428	VM_EVENT(kvm, 3, "vm created with type %lu", type);
3429
3430	if (type & KVM_VM_S390_UCONTROL) {
3431		kvm->arch.gmap = NULL;
3432		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3433	} else {
3434		if (sclp.hamax == U64_MAX)
3435			kvm->arch.mem_limit = TASK_SIZE_MAX;
3436		else
3437			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3438						    sclp.hamax + 1);
3439		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3440		if (!kvm->arch.gmap)
3441			goto out_err;
3442		kvm->arch.gmap->private = kvm;
3443		kvm->arch.gmap->pfault_enabled = 0;
3444	}
3445
3446	kvm->arch.use_pfmfi = sclp.has_pfmfi;
3447	kvm->arch.use_skf = sclp.has_skey;
 
 
3448	spin_lock_init(&kvm->arch.start_stop_lock);
3449	kvm_s390_vsie_init(kvm);
3450	if (use_gisa)
3451		kvm_s390_gisa_init(kvm);
3452	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3453	kvm->arch.pv.set_aside = NULL;
3454	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3455
3456	return 0;
3457out_err:
3458	free_page((unsigned long)kvm->arch.sie_page2);
3459	debug_unregister(kvm->arch.dbf);
3460	sca_dispose(kvm);
3461	KVM_EVENT(3, "creation of vm failed: %d", rc);
3462	return rc;
3463}
3464
 
 
 
 
 
 
 
 
 
 
3465void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3466{
3467	u16 rc, rrc;
3468
3469	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3470	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3471	kvm_s390_clear_local_irqs(vcpu);
3472	kvm_clear_async_pf_completion_queue(vcpu);
3473	if (!kvm_is_ucontrol(vcpu->kvm))
3474		sca_del_vcpu(vcpu);
3475	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3476
3477	if (kvm_is_ucontrol(vcpu->kvm))
3478		gmap_remove(vcpu->arch.gmap);
3479
3480	if (vcpu->kvm->arch.use_cmma)
3481		kvm_s390_vcpu_unsetup_cmma(vcpu);
3482	/* We can not hold the vcpu mutex here, we are already dying */
3483	if (kvm_s390_pv_cpu_get_handle(vcpu))
3484		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3485	free_page((unsigned long)(vcpu->arch.sie_block));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3486}
3487
3488void kvm_arch_destroy_vm(struct kvm *kvm)
3489{
3490	u16 rc, rrc;
3491
3492	kvm_destroy_vcpus(kvm);
3493	sca_dispose(kvm);
3494	kvm_s390_gisa_destroy(kvm);
3495	/*
3496	 * We are already at the end of life and kvm->lock is not taken.
3497	 * This is ok as the file descriptor is closed by now and nobody
3498	 * can mess with the pv state.
3499	 */
3500	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3501	/*
3502	 * Remove the mmu notifier only when the whole KVM VM is torn down,
3503	 * and only if one was registered to begin with. If the VM is
3504	 * currently not protected, but has been previously been protected,
3505	 * then it's possible that the notifier is still registered.
3506	 */
3507	if (kvm->arch.pv.mmu_notifier.ops)
3508		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3509
3510	debug_unregister(kvm->arch.dbf);
3511	free_page((unsigned long)kvm->arch.sie_page2);
3512	if (!kvm_is_ucontrol(kvm))
3513		gmap_remove(kvm->arch.gmap);
3514	kvm_s390_destroy_adapters(kvm);
3515	kvm_s390_clear_float_irqs(kvm);
3516	kvm_s390_vsie_destroy(kvm);
3517	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3518}
3519
3520/* Section: vcpu related */
3521static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3522{
3523	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3524	if (!vcpu->arch.gmap)
3525		return -ENOMEM;
3526	vcpu->arch.gmap->private = vcpu->kvm;
3527
3528	return 0;
3529}
3530
3531static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3532{
3533	if (!kvm_s390_use_sca_entries())
3534		return;
3535	read_lock(&vcpu->kvm->arch.sca_lock);
3536	if (vcpu->kvm->arch.use_esca) {
3537		struct esca_block *sca = vcpu->kvm->arch.sca;
3538
3539		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3540		sca->cpu[vcpu->vcpu_id].sda = 0;
3541	} else {
3542		struct bsca_block *sca = vcpu->kvm->arch.sca;
3543
3544		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3545		sca->cpu[vcpu->vcpu_id].sda = 0;
3546	}
3547	read_unlock(&vcpu->kvm->arch.sca_lock);
3548}
3549
3550static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3551{
3552	if (!kvm_s390_use_sca_entries()) {
3553		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3554
3555		/* we still need the basic sca for the ipte control */
3556		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3557		vcpu->arch.sie_block->scaol = sca_phys;
3558		return;
3559	}
3560	read_lock(&vcpu->kvm->arch.sca_lock);
3561	if (vcpu->kvm->arch.use_esca) {
3562		struct esca_block *sca = vcpu->kvm->arch.sca;
3563		phys_addr_t sca_phys = virt_to_phys(sca);
3564
3565		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3566		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3567		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3568		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3569		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3570	} else {
3571		struct bsca_block *sca = vcpu->kvm->arch.sca;
3572		phys_addr_t sca_phys = virt_to_phys(sca);
3573
3574		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3575		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3576		vcpu->arch.sie_block->scaol = sca_phys;
3577		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3578	}
3579	read_unlock(&vcpu->kvm->arch.sca_lock);
3580}
3581
3582/* Basic SCA to Extended SCA data copy routines */
3583static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3584{
3585	d->sda = s->sda;
3586	d->sigp_ctrl.c = s->sigp_ctrl.c;
3587	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3588}
3589
3590static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3591{
3592	int i;
3593
3594	d->ipte_control = s->ipte_control;
3595	d->mcn[0] = s->mcn;
3596	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3597		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3598}
3599
3600static int sca_switch_to_extended(struct kvm *kvm)
3601{
3602	struct bsca_block *old_sca = kvm->arch.sca;
3603	struct esca_block *new_sca;
3604	struct kvm_vcpu *vcpu;
3605	unsigned long vcpu_idx;
3606	u32 scaol, scaoh;
3607	phys_addr_t new_sca_phys;
3608
3609	if (kvm->arch.use_esca)
3610		return 0;
3611
3612	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3613	if (!new_sca)
3614		return -ENOMEM;
3615
3616	new_sca_phys = virt_to_phys(new_sca);
3617	scaoh = new_sca_phys >> 32;
3618	scaol = new_sca_phys & ESCA_SCAOL_MASK;
3619
3620	kvm_s390_vcpu_block_all(kvm);
3621	write_lock(&kvm->arch.sca_lock);
3622
3623	sca_copy_b_to_e(new_sca, old_sca);
3624
3625	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3626		vcpu->arch.sie_block->scaoh = scaoh;
3627		vcpu->arch.sie_block->scaol = scaol;
3628		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3629	}
3630	kvm->arch.sca = new_sca;
3631	kvm->arch.use_esca = 1;
3632
3633	write_unlock(&kvm->arch.sca_lock);
3634	kvm_s390_vcpu_unblock_all(kvm);
3635
3636	free_page((unsigned long)old_sca);
3637
3638	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3639		 old_sca, kvm->arch.sca);
3640	return 0;
3641}
3642
3643static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3644{
3645	int rc;
3646
3647	if (!kvm_s390_use_sca_entries()) {
3648		if (id < KVM_MAX_VCPUS)
3649			return true;
3650		return false;
3651	}
3652	if (id < KVM_S390_BSCA_CPU_SLOTS)
3653		return true;
3654	if (!sclp.has_esca || !sclp.has_64bscao)
3655		return false;
3656
 
3657	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
 
3658
3659	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3660}
3661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3662/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3663static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3664{
3665	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3666	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3667	vcpu->arch.cputm_start = get_tod_clock_fast();
3668	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3669}
3670
3671/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3672static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3673{
3674	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3675	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3676	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3677	vcpu->arch.cputm_start = 0;
3678	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3679}
3680
3681/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3682static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3683{
3684	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3685	vcpu->arch.cputm_enabled = true;
3686	__start_cpu_timer_accounting(vcpu);
3687}
3688
3689/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3690static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3691{
3692	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3693	__stop_cpu_timer_accounting(vcpu);
3694	vcpu->arch.cputm_enabled = false;
3695}
3696
3697static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3698{
3699	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3700	__enable_cpu_timer_accounting(vcpu);
3701	preempt_enable();
3702}
3703
3704static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3705{
3706	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3707	__disable_cpu_timer_accounting(vcpu);
3708	preempt_enable();
3709}
3710
3711/* set the cpu timer - may only be called from the VCPU thread itself */
3712void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3713{
3714	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3715	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3716	if (vcpu->arch.cputm_enabled)
3717		vcpu->arch.cputm_start = get_tod_clock_fast();
3718	vcpu->arch.sie_block->cputm = cputm;
3719	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3720	preempt_enable();
3721}
3722
3723/* update and get the cpu timer - can also be called from other VCPU threads */
3724__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3725{
3726	unsigned int seq;
3727	__u64 value;
3728
3729	if (unlikely(!vcpu->arch.cputm_enabled))
3730		return vcpu->arch.sie_block->cputm;
3731
3732	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3733	do {
3734		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3735		/*
3736		 * If the writer would ever execute a read in the critical
3737		 * section, e.g. in irq context, we have a deadlock.
3738		 */
3739		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3740		value = vcpu->arch.sie_block->cputm;
3741		/* if cputm_start is 0, accounting is being started/stopped */
3742		if (likely(vcpu->arch.cputm_start))
3743			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3744	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3745	preempt_enable();
3746	return value;
3747}
3748
3749void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3750{
3751
3752	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
 
3753	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3754		__start_cpu_timer_accounting(vcpu);
3755	vcpu->cpu = cpu;
3756}
3757
3758void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3759{
3760	vcpu->cpu = -1;
3761	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3762		__stop_cpu_timer_accounting(vcpu);
3763	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
 
 
3764
3765}
3766
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3767void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3768{
3769	mutex_lock(&vcpu->kvm->lock);
3770	preempt_disable();
3771	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3772	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3773	preempt_enable();
3774	mutex_unlock(&vcpu->kvm->lock);
3775	if (!kvm_is_ucontrol(vcpu->kvm)) {
3776		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3777		sca_add_vcpu(vcpu);
3778	}
3779	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3780		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3781}
3782
3783static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3784{
3785	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3786	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3787		return true;
3788	return false;
3789}
3790
3791static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3792{
3793	/* At least one ECC subfunction must be present */
3794	return kvm_has_pckmo_subfunc(kvm, 32) ||
3795	       kvm_has_pckmo_subfunc(kvm, 33) ||
3796	       kvm_has_pckmo_subfunc(kvm, 34) ||
3797	       kvm_has_pckmo_subfunc(kvm, 40) ||
3798	       kvm_has_pckmo_subfunc(kvm, 41);
3799
3800}
3801
3802static bool kvm_has_pckmo_hmac(struct kvm *kvm)
3803{
3804	/* At least one HMAC subfunction must be present */
3805	return kvm_has_pckmo_subfunc(kvm, 118) ||
3806	       kvm_has_pckmo_subfunc(kvm, 122);
3807}
3808
3809static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3810{
3811	/*
3812	 * If the AP instructions are not being interpreted and the MSAX3
3813	 * facility is not configured for the guest, there is nothing to set up.
3814	 */
3815	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3816		return;
3817
3818	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3819	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3820	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3821	vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
3822
3823	if (vcpu->kvm->arch.crypto.apie)
3824		vcpu->arch.sie_block->eca |= ECA_APIE;
3825
3826	/* Set up protected key support */
3827	if (vcpu->kvm->arch.crypto.aes_kw) {
3828		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3829		/* ecc/hmac is also wrapped with AES key */
3830		if (kvm_has_pckmo_ecc(vcpu->kvm))
3831			vcpu->arch.sie_block->ecd |= ECD_ECC;
3832		if (kvm_has_pckmo_hmac(vcpu->kvm))
3833			vcpu->arch.sie_block->ecd |= ECD_HMAC;
3834	}
3835
3836	if (vcpu->kvm->arch.crypto.dea_kw)
3837		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
 
 
3838}
3839
3840void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3841{
3842	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3843	vcpu->arch.sie_block->cbrlo = 0;
3844}
3845
3846int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3847{
3848	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3849
3850	if (!cbrlo_page)
3851		return -ENOMEM;
3852
3853	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
 
3854	return 0;
3855}
3856
3857static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3858{
3859	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3860
3861	vcpu->arch.sie_block->ibc = model->ibc;
3862	if (test_kvm_facility(vcpu->kvm, 7))
3863		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3864}
3865
3866static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3867{
3868	int rc = 0;
3869	u16 uvrc, uvrrc;
3870
3871	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3872						    CPUSTAT_SM |
3873						    CPUSTAT_STOPPED);
3874
3875	if (test_kvm_facility(vcpu->kvm, 78))
3876		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3877	else if (test_kvm_facility(vcpu->kvm, 8))
3878		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3879
3880	kvm_s390_vcpu_setup_model(vcpu);
3881
3882	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3883	if (MACHINE_HAS_ESOP)
3884		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3885	if (test_kvm_facility(vcpu->kvm, 9))
3886		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3887	if (test_kvm_facility(vcpu->kvm, 11))
3888		vcpu->arch.sie_block->ecb |= ECB_PTF;
3889	if (test_kvm_facility(vcpu->kvm, 73))
3890		vcpu->arch.sie_block->ecb |= ECB_TE;
3891	if (!kvm_is_ucontrol(vcpu->kvm))
3892		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3893
3894	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3895		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3896	if (test_kvm_facility(vcpu->kvm, 130))
3897		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3898	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3899	if (sclp.has_cei)
3900		vcpu->arch.sie_block->eca |= ECA_CEI;
3901	if (sclp.has_ib)
3902		vcpu->arch.sie_block->eca |= ECA_IB;
3903	if (sclp.has_siif)
3904		vcpu->arch.sie_block->eca |= ECA_SII;
3905	if (sclp.has_sigpif)
3906		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3907	if (test_kvm_facility(vcpu->kvm, 129)) {
3908		vcpu->arch.sie_block->eca |= ECA_VX;
3909		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3910	}
3911	if (test_kvm_facility(vcpu->kvm, 139))
3912		vcpu->arch.sie_block->ecd |= ECD_MEF;
3913	if (test_kvm_facility(vcpu->kvm, 156))
3914		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3915	if (vcpu->arch.sie_block->gd) {
3916		vcpu->arch.sie_block->eca |= ECA_AIV;
3917		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3918			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3919	}
3920	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3921	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3922
3923	if (sclp.has_kss)
3924		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3925	else
3926		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3927
3928	if (vcpu->kvm->arch.use_cmma) {
3929		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3930		if (rc)
3931			return rc;
3932	}
3933	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3934	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3935
3936	vcpu->arch.sie_block->hpid = HPID_KVM;
3937
3938	kvm_s390_vcpu_crypto_setup(vcpu);
3939
3940	kvm_s390_vcpu_pci_setup(vcpu);
3941
3942	mutex_lock(&vcpu->kvm->lock);
3943	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3944		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3945		if (rc)
3946			kvm_s390_vcpu_unsetup_cmma(vcpu);
3947	}
3948	mutex_unlock(&vcpu->kvm->lock);
3949
3950	return rc;
3951}
3952
3953int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 
3954{
 
 
 
 
3955	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3956		return -EINVAL;
3957	return 0;
3958}
3959
3960int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3961{
3962	struct sie_page *sie_page;
3963	int rc;
3964
3965	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3966	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3967	if (!sie_page)
3968		return -ENOMEM;
3969
3970	vcpu->arch.sie_block = &sie_page->sie_block;
3971	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3972
3973	/* the real guest size will always be smaller than msl */
3974	vcpu->arch.sie_block->mso = 0;
3975	vcpu->arch.sie_block->msl = sclp.hamax;
3976
3977	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3978	spin_lock_init(&vcpu->arch.local_int.lock);
3979	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
 
 
3980	seqcount_init(&vcpu->arch.cputm_seqcount);
3981
3982	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3983	kvm_clear_async_pf_completion_queue(vcpu);
3984	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3985				    KVM_SYNC_GPRS |
3986				    KVM_SYNC_ACRS |
3987				    KVM_SYNC_CRS |
3988				    KVM_SYNC_ARCH0 |
3989				    KVM_SYNC_PFAULT |
3990				    KVM_SYNC_DIAG318;
3991	vcpu->arch.acrs_loaded = false;
3992	kvm_s390_set_prefix(vcpu, 0);
3993	if (test_kvm_facility(vcpu->kvm, 64))
3994		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3995	if (test_kvm_facility(vcpu->kvm, 82))
3996		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3997	if (test_kvm_facility(vcpu->kvm, 133))
3998		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3999	if (test_kvm_facility(vcpu->kvm, 156))
4000		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
4001	/* fprs can be synchronized via vrs, even if the guest has no vx. With
4002	 * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
4003	 */
4004	if (cpu_has_vx())
4005		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
4006	else
4007		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
4008
4009	if (kvm_is_ucontrol(vcpu->kvm)) {
4010		rc = __kvm_ucontrol_vcpu_init(vcpu);
4011		if (rc)
4012			goto out_free_sie_block;
4013	}
4014
4015	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
4016		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4017	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4018
4019	rc = kvm_s390_vcpu_setup(vcpu);
4020	if (rc)
4021		goto out_ucontrol_uninit;
 
 
 
4022
4023	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
4024	return 0;
4025
4026out_ucontrol_uninit:
4027	if (kvm_is_ucontrol(vcpu->kvm))
4028		gmap_remove(vcpu->arch.gmap);
4029out_free_sie_block:
4030	free_page((unsigned long)(vcpu->arch.sie_block));
4031	return rc;
 
 
 
4032}
4033
4034int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4035{
4036	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4037	return kvm_s390_vcpu_has_irq(vcpu, 0);
4038}
4039
4040bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
4041{
4042	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
4043}
4044
4045void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
4046{
4047	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4048	exit_sie(vcpu);
4049}
4050
4051void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
4052{
4053	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4054}
4055
4056static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
4057{
4058	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4059	exit_sie(vcpu);
4060}
4061
4062bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
4063{
4064	return atomic_read(&vcpu->arch.sie_block->prog20) &
4065	       (PROG_BLOCK_SIE | PROG_REQUEST);
4066}
4067
4068static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
4069{
4070	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4071}
4072
4073/*
4074 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
4075 * If the CPU is not running (e.g. waiting as idle) the function will
4076 * return immediately. */
4077void exit_sie(struct kvm_vcpu *vcpu)
4078{
4079	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
4080	kvm_s390_vsie_kick(vcpu);
4081	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
4082		cpu_relax();
4083}
4084
4085/* Kick a guest cpu out of SIE to process a request synchronously */
4086void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
4087{
4088	__kvm_make_request(req, vcpu);
4089	kvm_s390_vcpu_request(vcpu);
4090}
4091
4092static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
4093			      unsigned long end)
4094{
4095	struct kvm *kvm = gmap->private;
4096	struct kvm_vcpu *vcpu;
4097	unsigned long prefix;
4098	unsigned long i;
4099
4100	trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap));
4101
4102	if (gmap_is_shadow(gmap))
4103		return;
4104	if (start >= 1UL << 31)
4105		/* We are only interested in prefix pages */
4106		return;
4107	kvm_for_each_vcpu(i, vcpu, kvm) {
4108		/* match against both prefix pages */
4109		prefix = kvm_s390_get_prefix(vcpu);
4110		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
4111			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
4112				   start, end);
4113			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4114		}
4115	}
4116}
4117
4118bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
4119{
4120	/* do not poll with more than halt_poll_max_steal percent of steal time */
4121	if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
4122	    READ_ONCE(halt_poll_max_steal)) {
4123		vcpu->stat.halt_no_poll_steal++;
4124		return true;
4125	}
4126	return false;
4127}
4128
4129int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
4130{
4131	/* kvm common code refers to this, but never calls it */
4132	BUG();
4133	return 0;
4134}
4135
4136static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
4137					   struct kvm_one_reg *reg)
4138{
4139	int r = -EINVAL;
4140
4141	switch (reg->id) {
4142	case KVM_REG_S390_TODPR:
4143		r = put_user(vcpu->arch.sie_block->todpr,
4144			     (u32 __user *)reg->addr);
4145		break;
4146	case KVM_REG_S390_EPOCHDIFF:
4147		r = put_user(vcpu->arch.sie_block->epoch,
4148			     (u64 __user *)reg->addr);
4149		break;
4150	case KVM_REG_S390_CPU_TIMER:
4151		r = put_user(kvm_s390_get_cpu_timer(vcpu),
4152			     (u64 __user *)reg->addr);
4153		break;
4154	case KVM_REG_S390_CLOCK_COMP:
4155		r = put_user(vcpu->arch.sie_block->ckc,
4156			     (u64 __user *)reg->addr);
4157		break;
4158	case KVM_REG_S390_PFTOKEN:
4159		r = put_user(vcpu->arch.pfault_token,
4160			     (u64 __user *)reg->addr);
4161		break;
4162	case KVM_REG_S390_PFCOMPARE:
4163		r = put_user(vcpu->arch.pfault_compare,
4164			     (u64 __user *)reg->addr);
4165		break;
4166	case KVM_REG_S390_PFSELECT:
4167		r = put_user(vcpu->arch.pfault_select,
4168			     (u64 __user *)reg->addr);
4169		break;
4170	case KVM_REG_S390_PP:
4171		r = put_user(vcpu->arch.sie_block->pp,
4172			     (u64 __user *)reg->addr);
4173		break;
4174	case KVM_REG_S390_GBEA:
4175		r = put_user(vcpu->arch.sie_block->gbea,
4176			     (u64 __user *)reg->addr);
4177		break;
4178	default:
4179		break;
4180	}
4181
4182	return r;
4183}
4184
4185static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4186					   struct kvm_one_reg *reg)
4187{
4188	int r = -EINVAL;
4189	__u64 val;
4190
4191	switch (reg->id) {
4192	case KVM_REG_S390_TODPR:
4193		r = get_user(vcpu->arch.sie_block->todpr,
4194			     (u32 __user *)reg->addr);
4195		break;
4196	case KVM_REG_S390_EPOCHDIFF:
4197		r = get_user(vcpu->arch.sie_block->epoch,
4198			     (u64 __user *)reg->addr);
4199		break;
4200	case KVM_REG_S390_CPU_TIMER:
4201		r = get_user(val, (u64 __user *)reg->addr);
4202		if (!r)
4203			kvm_s390_set_cpu_timer(vcpu, val);
4204		break;
4205	case KVM_REG_S390_CLOCK_COMP:
4206		r = get_user(vcpu->arch.sie_block->ckc,
4207			     (u64 __user *)reg->addr);
4208		break;
4209	case KVM_REG_S390_PFTOKEN:
4210		r = get_user(vcpu->arch.pfault_token,
4211			     (u64 __user *)reg->addr);
4212		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4213			kvm_clear_async_pf_completion_queue(vcpu);
4214		break;
4215	case KVM_REG_S390_PFCOMPARE:
4216		r = get_user(vcpu->arch.pfault_compare,
4217			     (u64 __user *)reg->addr);
4218		break;
4219	case KVM_REG_S390_PFSELECT:
4220		r = get_user(vcpu->arch.pfault_select,
4221			     (u64 __user *)reg->addr);
4222		break;
4223	case KVM_REG_S390_PP:
4224		r = get_user(vcpu->arch.sie_block->pp,
4225			     (u64 __user *)reg->addr);
4226		break;
4227	case KVM_REG_S390_GBEA:
4228		r = get_user(vcpu->arch.sie_block->gbea,
4229			     (u64 __user *)reg->addr);
4230		break;
4231	default:
4232		break;
4233	}
4234
4235	return r;
4236}
4237
4238static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4239{
4240	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4241	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4242	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4243
4244	kvm_clear_async_pf_completion_queue(vcpu);
4245	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4246		kvm_s390_vcpu_stop(vcpu);
4247	kvm_s390_clear_local_irqs(vcpu);
4248}
4249
4250static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4251{
4252	/* Initial reset is a superset of the normal reset */
4253	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4254
4255	/*
4256	 * This equals initial cpu reset in pop, but we don't switch to ESA.
4257	 * We do not only reset the internal data, but also ...
4258	 */
4259	vcpu->arch.sie_block->gpsw.mask = 0;
4260	vcpu->arch.sie_block->gpsw.addr = 0;
4261	kvm_s390_set_prefix(vcpu, 0);
4262	kvm_s390_set_cpu_timer(vcpu, 0);
4263	vcpu->arch.sie_block->ckc = 0;
4264	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4265	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4266	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4267
4268	/* ... the data in sync regs */
4269	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4270	vcpu->run->s.regs.ckc = 0;
4271	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4272	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4273	vcpu->run->psw_addr = 0;
4274	vcpu->run->psw_mask = 0;
4275	vcpu->run->s.regs.todpr = 0;
4276	vcpu->run->s.regs.cputm = 0;
4277	vcpu->run->s.regs.ckc = 0;
4278	vcpu->run->s.regs.pp = 0;
4279	vcpu->run->s.regs.gbea = 1;
4280	vcpu->run->s.regs.fpc = 0;
4281	/*
4282	 * Do not reset these registers in the protected case, as some of
4283	 * them are overlaid and they are not accessible in this case
4284	 * anyway.
4285	 */
4286	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4287		vcpu->arch.sie_block->gbea = 1;
4288		vcpu->arch.sie_block->pp = 0;
4289		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4290		vcpu->arch.sie_block->todpr = 0;
4291	}
4292}
4293
4294static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4295{
4296	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4297
4298	/* Clear reset is a superset of the initial reset */
4299	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4300
4301	memset(&regs->gprs, 0, sizeof(regs->gprs));
4302	memset(&regs->vrs, 0, sizeof(regs->vrs));
4303	memset(&regs->acrs, 0, sizeof(regs->acrs));
4304	memset(&regs->gscb, 0, sizeof(regs->gscb));
4305
4306	regs->etoken = 0;
4307	regs->etoken_extension = 0;
4308}
4309
4310int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4311{
4312	vcpu_load(vcpu);
4313	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4314	vcpu_put(vcpu);
4315	return 0;
4316}
4317
4318int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4319{
4320	vcpu_load(vcpu);
4321	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4322	vcpu_put(vcpu);
4323	return 0;
4324}
4325
4326int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4327				  struct kvm_sregs *sregs)
4328{
4329	vcpu_load(vcpu);
4330
4331	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4332	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4333
4334	vcpu_put(vcpu);
4335	return 0;
4336}
4337
4338int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4339				  struct kvm_sregs *sregs)
4340{
4341	vcpu_load(vcpu);
4342
4343	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4344	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4345
4346	vcpu_put(vcpu);
4347	return 0;
4348}
4349
4350int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4351{
4352	int ret = 0;
4353
4354	vcpu_load(vcpu);
4355
4356	vcpu->run->s.regs.fpc = fpu->fpc;
4357	if (cpu_has_vx())
4358		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4359				 (freg_t *) fpu->fprs);
4360	else
4361		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4362
4363	vcpu_put(vcpu);
4364	return ret;
4365}
4366
4367int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4368{
4369	vcpu_load(vcpu);
4370
4371	if (cpu_has_vx())
4372		convert_vx_to_fp((freg_t *) fpu->fprs,
4373				 (__vector128 *) vcpu->run->s.regs.vrs);
4374	else
4375		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4376	fpu->fpc = vcpu->run->s.regs.fpc;
4377
4378	vcpu_put(vcpu);
4379	return 0;
4380}
4381
4382static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4383{
4384	int rc = 0;
4385
4386	if (!is_vcpu_stopped(vcpu))
4387		rc = -EBUSY;
4388	else {
4389		vcpu->run->psw_mask = psw.mask;
4390		vcpu->run->psw_addr = psw.addr;
4391	}
4392	return rc;
4393}
4394
4395int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4396				  struct kvm_translation *tr)
4397{
4398	return -EINVAL; /* not implemented yet */
4399}
4400
4401#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4402			      KVM_GUESTDBG_USE_HW_BP | \
4403			      KVM_GUESTDBG_ENABLE)
4404
4405int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4406					struct kvm_guest_debug *dbg)
4407{
4408	int rc = 0;
4409
4410	vcpu_load(vcpu);
4411
4412	vcpu->guest_debug = 0;
4413	kvm_s390_clear_bp_data(vcpu);
4414
4415	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4416		rc = -EINVAL;
4417		goto out;
4418	}
4419	if (!sclp.has_gpere) {
4420		rc = -EINVAL;
4421		goto out;
4422	}
4423
4424	if (dbg->control & KVM_GUESTDBG_ENABLE) {
4425		vcpu->guest_debug = dbg->control;
4426		/* enforce guest PER */
4427		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4428
4429		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4430			rc = kvm_s390_import_bp_data(vcpu, dbg);
4431	} else {
4432		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4433		vcpu->arch.guestdbg.last_bp = 0;
4434	}
4435
4436	if (rc) {
4437		vcpu->guest_debug = 0;
4438		kvm_s390_clear_bp_data(vcpu);
4439		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4440	}
4441
4442out:
4443	vcpu_put(vcpu);
4444	return rc;
4445}
4446
4447int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4448				    struct kvm_mp_state *mp_state)
4449{
4450	int ret;
4451
4452	vcpu_load(vcpu);
4453
4454	/* CHECK_STOP and LOAD are not supported yet */
4455	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4456				      KVM_MP_STATE_OPERATING;
4457
4458	vcpu_put(vcpu);
4459	return ret;
4460}
4461
4462int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4463				    struct kvm_mp_state *mp_state)
4464{
4465	int rc = 0;
4466
4467	vcpu_load(vcpu);
4468
4469	/* user space knows about this interface - let it control the state */
4470	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4471
4472	switch (mp_state->mp_state) {
4473	case KVM_MP_STATE_STOPPED:
4474		rc = kvm_s390_vcpu_stop(vcpu);
4475		break;
4476	case KVM_MP_STATE_OPERATING:
4477		rc = kvm_s390_vcpu_start(vcpu);
4478		break;
4479	case KVM_MP_STATE_LOAD:
4480		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4481			rc = -ENXIO;
4482			break;
4483		}
4484		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4485		break;
4486	case KVM_MP_STATE_CHECK_STOP:
4487		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
4488	default:
4489		rc = -ENXIO;
4490	}
4491
4492	vcpu_put(vcpu);
4493	return rc;
4494}
4495
4496static bool ibs_enabled(struct kvm_vcpu *vcpu)
4497{
4498	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4499}
4500
4501static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4502{
4503retry:
4504	kvm_s390_vcpu_request_handled(vcpu);
4505	if (!kvm_request_pending(vcpu))
4506		return 0;
4507	/*
4508	 * If the guest prefix changed, re-arm the ipte notifier for the
4509	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4510	 * This ensures that the ipte instruction for this request has
4511	 * already finished. We might race against a second unmapper that
4512	 * wants to set the blocking bit. Lets just retry the request loop.
4513	 */
4514	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4515		int rc;
4516		rc = gmap_mprotect_notify(vcpu->arch.gmap,
4517					  kvm_s390_get_prefix(vcpu),
4518					  PAGE_SIZE * 2, PROT_WRITE);
4519		if (rc) {
4520			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4521			return rc;
4522		}
4523		goto retry;
4524	}
4525
4526	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4527		vcpu->arch.sie_block->ihcpu = 0xffff;
4528		goto retry;
4529	}
4530
4531	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4532		if (!ibs_enabled(vcpu)) {
4533			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4534			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
 
4535		}
4536		goto retry;
4537	}
4538
4539	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4540		if (ibs_enabled(vcpu)) {
4541			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4542			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
 
4543		}
4544		goto retry;
4545	}
4546
4547	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4548		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4549		goto retry;
4550	}
4551
4552	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4553		/*
4554		 * Disable CMM virtualization; we will emulate the ESSA
4555		 * instruction manually, in order to provide additional
4556		 * functionalities needed for live migration.
4557		 */
4558		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4559		goto retry;
4560	}
4561
4562	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4563		/*
4564		 * Re-enable CMM virtualization if CMMA is available and
4565		 * CMM has been used.
4566		 */
4567		if ((vcpu->kvm->arch.use_cmma) &&
4568		    (vcpu->kvm->mm->context.uses_cmm))
4569			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4570		goto retry;
4571	}
4572
4573	/* we left the vsie handler, nothing to do, just clear the request */
4574	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4575
4576	return 0;
4577}
4578
4579static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4580{
4581	struct kvm_vcpu *vcpu;
4582	union tod_clock clk;
4583	unsigned long i;
4584
 
4585	preempt_disable();
4586
4587	store_tod_clock_ext(&clk);
4588
4589	kvm->arch.epoch = gtod->tod - clk.tod;
4590	kvm->arch.epdx = 0;
4591	if (test_kvm_facility(kvm, 139)) {
4592		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4593		if (kvm->arch.epoch > gtod->tod)
4594			kvm->arch.epdx -= 1;
4595	}
4596
4597	kvm_s390_vcpu_block_all(kvm);
4598	kvm_for_each_vcpu(i, vcpu, kvm) {
4599		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4600		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4601	}
4602
4603	kvm_s390_vcpu_unblock_all(kvm);
4604	preempt_enable();
 
4605}
4606
4607int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
 
 
 
 
 
 
 
 
 
 
4608{
4609	if (!mutex_trylock(&kvm->lock))
4610		return 0;
4611	__kvm_s390_set_tod_clock(kvm, gtod);
4612	mutex_unlock(&kvm->lock);
4613	return 1;
4614}
4615
4616static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4617				      unsigned long token)
4618{
4619	struct kvm_s390_interrupt inti;
4620	struct kvm_s390_irq irq;
4621
4622	if (start_token) {
4623		irq.u.ext.ext_params2 = token;
4624		irq.type = KVM_S390_INT_PFAULT_INIT;
4625		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4626	} else {
4627		inti.type = KVM_S390_INT_PFAULT_DONE;
4628		inti.parm64 = token;
4629		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4630	}
4631}
4632
4633bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4634				     struct kvm_async_pf *work)
4635{
4636	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4637	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4638
4639	return true;
4640}
4641
4642void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4643				 struct kvm_async_pf *work)
4644{
4645	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4646	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4647}
4648
4649void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4650			       struct kvm_async_pf *work)
4651{
4652	/* s390 will always inject the page directly */
4653}
4654
4655bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4656{
4657	/*
4658	 * s390 will always inject the page directly,
4659	 * but we still want check_async_completion to cleanup
4660	 */
4661	return true;
4662}
4663
4664static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4665{
4666	hva_t hva;
4667	struct kvm_arch_async_pf arch;
 
4668
4669	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4670		return false;
4671	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4672	    vcpu->arch.pfault_compare)
4673		return false;
4674	if (psw_extint_disabled(vcpu))
4675		return false;
4676	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4677		return false;
4678	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4679		return false;
4680	if (!vcpu->arch.gmap->pfault_enabled)
4681		return false;
4682
4683	hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
 
4684	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4685		return false;
4686
4687	return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
 
4688}
4689
4690static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4691{
4692	int rc, cpuflags;
4693
4694	/*
4695	 * On s390 notifications for arriving pages will be delivered directly
4696	 * to the guest but the house keeping for completed pfaults is
4697	 * handled outside the worker.
4698	 */
4699	kvm_check_async_pf_completion(vcpu);
4700
4701	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4702	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4703
4704	if (need_resched())
4705		schedule();
4706
 
 
 
4707	if (!kvm_is_ucontrol(vcpu->kvm)) {
4708		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4709		if (rc || guestdbg_exit_pending(vcpu))
4710			return rc;
4711	}
4712
4713	rc = kvm_s390_handle_requests(vcpu);
4714	if (rc)
4715		return rc;
4716
4717	if (guestdbg_enabled(vcpu)) {
4718		kvm_s390_backup_guest_per_regs(vcpu);
4719		kvm_s390_patch_guest_per_regs(vcpu);
4720	}
4721
4722	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4723
4724	vcpu->arch.sie_block->icptcode = 0;
4725	current->thread.gmap_int_code = 0;
4726	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4727	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4728	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4729
4730	return 0;
4731}
4732
4733static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
4734{
4735	struct kvm_s390_pgm_info pgm_info = {
4736		.code = PGM_ADDRESSING,
4737	};
4738	u8 opcode, ilen;
4739	int rc;
4740
4741	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4742	trace_kvm_s390_sie_fault(vcpu);
4743
4744	/*
4745	 * We want to inject an addressing exception, which is defined as a
4746	 * suppressing or terminating exception. However, since we came here
4747	 * by a DAT access exception, the PSW still points to the faulting
4748	 * instruction since DAT exceptions are nullifying. So we've got
4749	 * to look up the current opcode to get the length of the instruction
4750	 * to be able to forward the PSW.
4751	 */
4752	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4753	ilen = insn_length(opcode);
4754	if (rc < 0) {
4755		return rc;
4756	} else if (rc) {
4757		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4758		 * Forward by arbitrary ilc, injection will take care of
4759		 * nullification if necessary.
4760		 */
4761		pgm_info = vcpu->arch.pgm;
4762		ilen = 4;
4763	}
4764	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4765	kvm_s390_forward_psw(vcpu, ilen);
4766	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4767}
4768
4769static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
4770{
4771	unsigned int flags = 0;
4772	unsigned long gaddr;
4773	int rc = 0;
4774
4775	gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
4776	if (kvm_s390_cur_gmap_fault_is_write())
4777		flags = FAULT_FLAG_WRITE;
4778
4779	switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
4780	case 0:
4781		vcpu->stat.exit_null++;
4782		break;
4783	case PGM_NON_SECURE_STORAGE_ACCESS:
4784		KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4785			"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4786			current->thread.gmap_int_code, current->thread.gmap_teid.val);
4787		/*
4788		 * This is normal operation; a page belonging to a protected
4789		 * guest has not been imported yet. Try to import the page into
4790		 * the protected guest.
4791		 */
4792		if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
4793			send_sig(SIGSEGV, current, 0);
4794		break;
4795	case PGM_SECURE_STORAGE_ACCESS:
4796	case PGM_SECURE_STORAGE_VIOLATION:
4797		KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4798			"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4799			current->thread.gmap_int_code, current->thread.gmap_teid.val);
4800		/*
4801		 * This can happen after a reboot with asynchronous teardown;
4802		 * the new guest (normal or protected) will run on top of the
4803		 * previous protected guest. The old pages need to be destroyed
4804		 * so the new guest can use them.
4805		 */
4806		if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
4807			/*
4808			 * Either KVM messed up the secure guest mapping or the
4809			 * same page is mapped into multiple secure guests.
4810			 *
4811			 * This exception is only triggered when a guest 2 is
4812			 * running and can therefore never occur in kernel
4813			 * context.
4814			 */
4815			pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
4816					    current->thread.gmap_int_code, current->comm,
4817					    current->pid);
4818			send_sig(SIGSEGV, current, 0);
4819		}
4820		break;
4821	case PGM_PROTECTION:
4822	case PGM_SEGMENT_TRANSLATION:
4823	case PGM_PAGE_TRANSLATION:
4824	case PGM_ASCE_TYPE:
4825	case PGM_REGION_FIRST_TRANS:
4826	case PGM_REGION_SECOND_TRANS:
4827	case PGM_REGION_THIRD_TRANS:
4828		KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4829			"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4830			current->thread.gmap_int_code, current->thread.gmap_teid.val);
4831		if (vcpu->arch.gmap->pfault_enabled) {
4832			rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
4833			if (rc == -EFAULT)
4834				return vcpu_post_run_addressing_exception(vcpu);
4835			if (rc == -EAGAIN) {
4836				trace_kvm_s390_major_guest_pfault(vcpu);
4837				if (kvm_arch_setup_async_pf(vcpu))
4838					return 0;
4839				vcpu->stat.pfault_sync++;
4840			} else {
4841				return rc;
4842			}
4843		}
4844		rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
4845		if (rc == -EFAULT) {
4846			if (kvm_is_ucontrol(vcpu->kvm)) {
4847				vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4848				vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
4849				vcpu->run->s390_ucontrol.pgm_code = 0x10;
4850				return -EREMOTE;
4851			}
4852			return vcpu_post_run_addressing_exception(vcpu);
4853		}
4854		break;
4855	default:
4856		KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
4857			current->thread.gmap_int_code, current->thread.gmap_teid.val);
4858		send_sig(SIGSEGV, current, 0);
4859		break;
4860	}
4861	return rc;
4862}
4863
4864static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4865{
4866	struct mcck_volatile_info *mcck_info;
4867	struct sie_page *sie_page;
4868	int rc;
4869
4870	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4871		   vcpu->arch.sie_block->icptcode);
4872	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4873
4874	if (guestdbg_enabled(vcpu))
4875		kvm_s390_restore_guest_per_regs(vcpu);
4876
4877	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4878	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4879
4880	if (exit_reason == -EINTR) {
4881		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4882		sie_page = container_of(vcpu->arch.sie_block,
4883					struct sie_page, sie_block);
4884		mcck_info = &sie_page->mcck_info;
4885		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4886		return 0;
4887	}
4888
4889	if (vcpu->arch.sie_block->icptcode > 0) {
4890		rc = kvm_handle_sie_intercept(vcpu);
4891
4892		if (rc != -EOPNOTSUPP)
4893			return rc;
4894		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4895		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4896		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4897		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4898		return -EREMOTE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4899	}
4900
4901	return vcpu_post_run_handle_fault(vcpu);
4902}
4903
4904#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4905static int __vcpu_run(struct kvm_vcpu *vcpu)
4906{
4907	int rc, exit_reason;
4908	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4909
4910	/*
4911	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4912	 * ning the guest), so that memslots (and other stuff) are protected
4913	 */
4914	kvm_vcpu_srcu_read_lock(vcpu);
4915
4916	do {
4917		rc = vcpu_pre_run(vcpu);
4918		if (rc || guestdbg_exit_pending(vcpu))
4919			break;
4920
4921		kvm_vcpu_srcu_read_unlock(vcpu);
4922		/*
4923		 * As PF_VCPU will be used in fault handler, between
4924		 * guest_enter and guest_exit should be no uaccess.
4925		 */
4926		local_irq_disable();
4927		guest_enter_irqoff();
4928		__disable_cpu_timer_accounting(vcpu);
4929		local_irq_enable();
4930		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4931			memcpy(sie_page->pv_grregs,
4932			       vcpu->run->s.regs.gprs,
4933			       sizeof(sie_page->pv_grregs));
4934		}
4935		exit_reason = sie64a(vcpu->arch.sie_block,
4936				     vcpu->run->s.regs.gprs,
4937				     vcpu->arch.gmap->asce);
4938		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4939			memcpy(vcpu->run->s.regs.gprs,
4940			       sie_page->pv_grregs,
4941			       sizeof(sie_page->pv_grregs));
4942			/*
4943			 * We're not allowed to inject interrupts on intercepts
4944			 * that leave the guest state in an "in-between" state
4945			 * where the next SIE entry will do a continuation.
4946			 * Fence interrupts in our "internal" PSW.
4947			 */
4948			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4949			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4950				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4951			}
4952		}
4953		local_irq_disable();
4954		__enable_cpu_timer_accounting(vcpu);
4955		guest_exit_irqoff();
4956		local_irq_enable();
4957		kvm_vcpu_srcu_read_lock(vcpu);
4958
4959		rc = vcpu_post_run(vcpu, exit_reason);
4960	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4961
4962	kvm_vcpu_srcu_read_unlock(vcpu);
4963	return rc;
4964}
4965
4966static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4967{
4968	struct kvm_run *kvm_run = vcpu->run;
4969	struct runtime_instr_cb *riccb;
4970	struct gs_cb *gscb;
4971
4972	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4973	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4974	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4975	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
 
 
 
 
 
 
 
4976	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
 
 
4977		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4978		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4979		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4980	}
4981	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4982		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4983		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4984		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4985		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4986			kvm_clear_async_pf_completion_queue(vcpu);
4987	}
4988	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4989		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4990		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4991		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4992	}
4993	/*
4994	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4995	 * we should enable RI here instead of doing the lazy enablement.
4996	 */
4997	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4998	    test_kvm_facility(vcpu->kvm, 64) &&
4999	    riccb->v &&
5000	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
5001		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
5002		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
5003	}
5004	/*
5005	 * If userspace sets the gscb (e.g. after migration) to non-zero,
5006	 * we should enable GS here instead of doing the lazy enablement.
5007	 */
5008	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
5009	    test_kvm_facility(vcpu->kvm, 133) &&
5010	    gscb->gssm &&
5011	    !vcpu->arch.gs_enabled) {
5012		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
5013		vcpu->arch.sie_block->ecb |= ECB_GS;
5014		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
5015		vcpu->arch.gs_enabled = 1;
5016	}
5017	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
5018	    test_kvm_facility(vcpu->kvm, 82)) {
5019		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
5020		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
5021	}
5022	if (MACHINE_HAS_GS) {
5023		preempt_disable();
5024		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5025		if (current->thread.gs_cb) {
5026			vcpu->arch.host_gscb = current->thread.gs_cb;
5027			save_gs_cb(vcpu->arch.host_gscb);
5028		}
5029		if (vcpu->arch.gs_enabled) {
5030			current->thread.gs_cb = (struct gs_cb *)
5031						&vcpu->run->s.regs.gscb;
5032			restore_gs_cb(current->thread.gs_cb);
5033		}
5034		preempt_enable();
5035	}
5036	/* SIE will load etoken directly from SDNX and therefore kvm_run */
5037}
5038
5039static void sync_regs(struct kvm_vcpu *vcpu)
5040{
5041	struct kvm_run *kvm_run = vcpu->run;
5042
5043	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
5044		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
5045	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
5046		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
5047		/* some control register changes require a tlb flush */
5048		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5049	}
5050	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5051		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
5052		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
5053	}
5054	save_access_regs(vcpu->arch.host_acrs);
5055	restore_access_regs(vcpu->run->s.regs.acrs);
5056	vcpu->arch.acrs_loaded = true;
5057	kvm_s390_fpu_load(vcpu->run);
5058	/* Sync fmt2 only data */
5059	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
5060		sync_regs_fmt2(vcpu);
5061	} else {
5062		/*
5063		 * In several places we have to modify our internal view to
5064		 * not do things that are disallowed by the ultravisor. For
5065		 * example we must not inject interrupts after specific exits
5066		 * (e.g. 112 prefix page not secure). We do this by turning
5067		 * off the machine check, external and I/O interrupt bits
5068		 * of our PSW copy. To avoid getting validity intercepts, we
5069		 * do only accept the condition code from userspace.
5070		 */
5071		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
5072		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
5073						   PSW_MASK_CC;
5074	}
5075
5076	kvm_run->kvm_dirty_regs = 0;
5077}
5078
5079static void store_regs_fmt2(struct kvm_vcpu *vcpu)
5080{
5081	struct kvm_run *kvm_run = vcpu->run;
5082
5083	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
5084	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
5085	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
5086	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
5087	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
5088	if (MACHINE_HAS_GS) {
5089		preempt_disable();
5090		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5091		if (vcpu->arch.gs_enabled)
5092			save_gs_cb(current->thread.gs_cb);
5093		current->thread.gs_cb = vcpu->arch.host_gscb;
5094		restore_gs_cb(vcpu->arch.host_gscb);
5095		if (!vcpu->arch.host_gscb)
5096			local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
5097		vcpu->arch.host_gscb = NULL;
5098		preempt_enable();
5099	}
5100	/* SIE will save etoken directly into SDNX and therefore kvm_run */
5101}
5102
5103static void store_regs(struct kvm_vcpu *vcpu)
5104{
5105	struct kvm_run *kvm_run = vcpu->run;
5106
5107	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
5108	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
5109	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
5110	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
5111	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
5112	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
 
 
 
5113	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
5114	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
5115	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
5116	save_access_regs(vcpu->run->s.regs.acrs);
5117	restore_access_regs(vcpu->arch.host_acrs);
5118	vcpu->arch.acrs_loaded = false;
5119	kvm_s390_fpu_store(vcpu->run);
5120	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
5121		store_regs_fmt2(vcpu);
 
 
 
5122}
5123
5124int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
5125{
5126	struct kvm_run *kvm_run = vcpu->run;
5127	DECLARE_KERNEL_FPU_ONSTACK32(fpu);
5128	int rc;
5129
5130	/*
5131	 * Running a VM while dumping always has the potential to
5132	 * produce inconsistent dump data. But for PV vcpus a SIE
5133	 * entry while dumping could also lead to a fatal validity
5134	 * intercept which we absolutely want to avoid.
5135	 */
5136	if (vcpu->kvm->arch.pv.dumping)
5137		return -EINVAL;
5138
5139	if (!vcpu->wants_to_run)
5140		return -EINTR;
5141
5142	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
5143	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
5144		return -EINVAL;
5145
5146	vcpu_load(vcpu);
5147
5148	if (guestdbg_exit_pending(vcpu)) {
5149		kvm_s390_prepare_debug_exit(vcpu);
5150		rc = 0;
5151		goto out;
5152	}
5153
5154	kvm_sigset_activate(vcpu);
 
5155
5156	/*
5157	 * no need to check the return value of vcpu_start as it can only have
5158	 * an error for protvirt, but protvirt means user cpu state
5159	 */
5160	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
5161		kvm_s390_vcpu_start(vcpu);
5162	} else if (is_vcpu_stopped(vcpu)) {
5163		pr_err_ratelimited("can't run stopped vcpu %d\n",
5164				   vcpu->vcpu_id);
5165		rc = -EINVAL;
5166		goto out;
5167	}
5168
5169	kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR);
5170	sync_regs(vcpu);
5171	enable_cpu_timer_accounting(vcpu);
5172
5173	might_fault();
5174	rc = __vcpu_run(vcpu);
5175
5176	if (signal_pending(current) && !rc) {
5177		kvm_run->exit_reason = KVM_EXIT_INTR;
5178		rc = -EINTR;
5179	}
5180
5181	if (guestdbg_exit_pending(vcpu) && !rc)  {
5182		kvm_s390_prepare_debug_exit(vcpu);
5183		rc = 0;
5184	}
5185
5186	if (rc == -EREMOTE) {
5187		/* userspace support is needed, kvm_run has been prepared */
5188		rc = 0;
5189	}
5190
5191	disable_cpu_timer_accounting(vcpu);
5192	store_regs(vcpu);
5193	kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR);
5194
5195	kvm_sigset_deactivate(vcpu);
 
5196
5197	vcpu->stat.exit_userspace++;
5198out:
5199	vcpu_put(vcpu);
5200	return rc;
5201}
5202
5203/*
5204 * store status at address
5205 * we use have two special cases:
5206 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
5207 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
5208 */
5209int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
5210{
5211	unsigned char archmode = 1;
5212	freg_t fprs[NUM_FPRS];
5213	unsigned int px;
5214	u64 clkcomp, cputm;
5215	int rc;
5216
5217	px = kvm_s390_get_prefix(vcpu);
5218	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5219		if (write_guest_abs(vcpu, 163, &archmode, 1))
5220			return -EFAULT;
5221		gpa = 0;
5222	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5223		if (write_guest_real(vcpu, 163, &archmode, 1))
5224			return -EFAULT;
5225		gpa = px;
5226	} else
5227		gpa -= __LC_FPREGS_SAVE_AREA;
5228
5229	/* manually convert vector registers if necessary */
5230	if (cpu_has_vx()) {
5231		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5232		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5233				     fprs, 128);
5234	} else {
5235		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5236				     vcpu->run->s.regs.fprs, 128);
5237	}
5238	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5239			      vcpu->run->s.regs.gprs, 128);
5240	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5241			      &vcpu->arch.sie_block->gpsw, 16);
5242	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5243			      &px, 4);
5244	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5245			      &vcpu->run->s.regs.fpc, 4);
5246	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5247			      &vcpu->arch.sie_block->todpr, 4);
5248	cputm = kvm_s390_get_cpu_timer(vcpu);
5249	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5250			      &cputm, 8);
5251	clkcomp = vcpu->arch.sie_block->ckc >> 8;
5252	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5253			      &clkcomp, 8);
5254	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5255			      &vcpu->run->s.regs.acrs, 64);
5256	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5257			      &vcpu->arch.sie_block->gcr, 128);
5258	return rc ? -EFAULT : 0;
5259}
5260
5261int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5262{
5263	/*
5264	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5265	 * switch in the run ioctl. Let's update our copies before we save
5266	 * it into the save area
5267	 */
5268	kvm_s390_fpu_store(vcpu->run);
 
5269	save_access_regs(vcpu->run->s.regs.acrs);
5270
5271	return kvm_s390_store_status_unloaded(vcpu, addr);
5272}
5273
5274static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5275{
5276	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5277	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5278}
5279
5280static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5281{
5282	unsigned long i;
5283	struct kvm_vcpu *vcpu;
5284
5285	kvm_for_each_vcpu(i, vcpu, kvm) {
5286		__disable_ibs_on_vcpu(vcpu);
5287	}
5288}
5289
5290static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5291{
5292	if (!sclp.has_ibs)
5293		return;
5294	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5295	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5296}
5297
5298int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5299{
5300	int i, online_vcpus, r = 0, started_vcpus = 0;
5301
5302	if (!is_vcpu_stopped(vcpu))
5303		return 0;
5304
5305	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5306	/* Only one cpu at a time may enter/leave the STOPPED state. */
5307	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5308	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5309
5310	/* Let's tell the UV that we want to change into the operating state */
5311	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5312		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5313		if (r) {
5314			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5315			return r;
5316		}
5317	}
5318
5319	for (i = 0; i < online_vcpus; i++) {
5320		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5321			started_vcpus++;
5322	}
5323
5324	if (started_vcpus == 0) {
5325		/* we're the only active VCPU -> speed it up */
5326		__enable_ibs_on_vcpu(vcpu);
5327	} else if (started_vcpus == 1) {
5328		/*
5329		 * As we are starting a second VCPU, we have to disable
5330		 * the IBS facility on all VCPUs to remove potentially
5331		 * outstanding ENABLE requests.
5332		 */
5333		__disable_ibs_on_all_vcpus(vcpu->kvm);
5334	}
5335
5336	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5337	/*
5338	 * The real PSW might have changed due to a RESTART interpreted by the
5339	 * ultravisor. We block all interrupts and let the next sie exit
5340	 * refresh our view.
5341	 */
5342	if (kvm_s390_pv_cpu_is_protected(vcpu))
5343		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5344	/*
5345	 * Another VCPU might have used IBS while we were offline.
5346	 * Let's play safe and flush the VCPU at startup.
5347	 */
5348	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5349	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5350	return 0;
5351}
5352
5353int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5354{
5355	int i, online_vcpus, r = 0, started_vcpus = 0;
5356	struct kvm_vcpu *started_vcpu = NULL;
5357
5358	if (is_vcpu_stopped(vcpu))
5359		return 0;
5360
5361	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5362	/* Only one cpu at a time may enter/leave the STOPPED state. */
5363	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5364	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5365
5366	/* Let's tell the UV that we want to change into the stopped state */
5367	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5368		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5369		if (r) {
5370			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5371			return r;
5372		}
5373	}
5374
5375	/*
5376	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5377	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5378	 * have been fully processed. This will ensure that the VCPU
5379	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5380	 */
5381	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5382	kvm_s390_clear_stop_irq(vcpu);
5383
 
5384	__disable_ibs_on_vcpu(vcpu);
5385
5386	for (i = 0; i < online_vcpus; i++) {
5387		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5388
5389		if (!is_vcpu_stopped(tmp)) {
5390			started_vcpus++;
5391			started_vcpu = tmp;
5392		}
5393	}
5394
5395	if (started_vcpus == 1) {
5396		/*
5397		 * As we only have one VCPU left, we want to enable the
5398		 * IBS facility for that VCPU to speed it up.
5399		 */
5400		__enable_ibs_on_vcpu(started_vcpu);
5401	}
5402
5403	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5404	return 0;
5405}
5406
5407static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5408				     struct kvm_enable_cap *cap)
5409{
5410	int r;
5411
5412	if (cap->flags)
5413		return -EINVAL;
5414
5415	switch (cap->cap) {
5416	case KVM_CAP_S390_CSS_SUPPORT:
5417		if (!vcpu->kvm->arch.css_support) {
5418			vcpu->kvm->arch.css_support = 1;
5419			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5420			trace_kvm_s390_enable_css(vcpu->kvm);
5421		}
5422		r = 0;
5423		break;
5424	default:
5425		r = -EINVAL;
5426		break;
5427	}
5428	return r;
5429}
5430
5431static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5432				  struct kvm_s390_mem_op *mop)
5433{
5434	void __user *uaddr = (void __user *)mop->buf;
5435	void *sida_addr;
5436	int r = 0;
 
 
5437
5438	if (mop->flags || !mop->size)
5439		return -EINVAL;
5440	if (mop->size + mop->sida_offset < mop->size)
5441		return -EINVAL;
5442	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5443		return -E2BIG;
5444	if (!kvm_s390_pv_cpu_is_protected(vcpu))
5445		return -EINVAL;
5446
5447	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5448
5449	switch (mop->op) {
5450	case KVM_S390_MEMOP_SIDA_READ:
5451		if (copy_to_user(uaddr, sida_addr, mop->size))
5452			r = -EFAULT;
5453
5454		break;
5455	case KVM_S390_MEMOP_SIDA_WRITE:
5456		if (copy_from_user(sida_addr, uaddr, mop->size))
5457			r = -EFAULT;
5458		break;
5459	}
5460	return r;
5461}
5462
5463static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5464				 struct kvm_s390_mem_op *mop)
5465{
5466	void __user *uaddr = (void __user *)mop->buf;
5467	enum gacc_mode acc_mode;
5468	void *tmpbuf = NULL;
5469	int r;
5470
5471	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
5472					KVM_S390_MEMOP_F_CHECK_ONLY |
5473					KVM_S390_MEMOP_F_SKEY_PROTECTION);
5474	if (r)
5475		return r;
5476	if (mop->ar >= NUM_ACRS)
5477		return -EINVAL;
5478	if (kvm_s390_pv_cpu_is_protected(vcpu))
5479		return -EINVAL;
5480	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5481		tmpbuf = vmalloc(mop->size);
5482		if (!tmpbuf)
5483			return -ENOMEM;
5484	}
5485
5486	acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
5487	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5488		r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5489				    acc_mode, mop->key);
5490		goto out_inject;
5491	}
5492	if (acc_mode == GACC_FETCH) {
5493		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5494					mop->size, mop->key);
5495		if (r)
5496			goto out_inject;
5497		if (copy_to_user(uaddr, tmpbuf, mop->size)) {
5498			r = -EFAULT;
5499			goto out_free;
5500		}
5501	} else {
5502		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5503			r = -EFAULT;
5504			goto out_free;
5505		}
5506		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5507					 mop->size, mop->key);
5508	}
5509
5510out_inject:
5511	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5512		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5513
5514out_free:
5515	vfree(tmpbuf);
5516	return r;
5517}
5518
5519static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5520				     struct kvm_s390_mem_op *mop)
5521{
5522	int r, srcu_idx;
5523
5524	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5525
5526	switch (mop->op) {
5527	case KVM_S390_MEMOP_LOGICAL_READ:
 
 
 
 
 
 
 
 
 
 
 
5528	case KVM_S390_MEMOP_LOGICAL_WRITE:
5529		r = kvm_s390_vcpu_mem_op(vcpu, mop);
5530		break;
5531	case KVM_S390_MEMOP_SIDA_READ:
5532	case KVM_S390_MEMOP_SIDA_WRITE:
5533		/* we are locked against sida going away by the vcpu->mutex */
5534		r = kvm_s390_vcpu_sida_op(vcpu, mop);
 
 
 
 
5535		break;
5536	default:
5537		r = -EINVAL;
5538	}
5539
5540	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 
 
 
 
 
5541	return r;
5542}
5543
5544long kvm_arch_vcpu_async_ioctl(struct file *filp,
5545			       unsigned int ioctl, unsigned long arg)
5546{
5547	struct kvm_vcpu *vcpu = filp->private_data;
5548	void __user *argp = (void __user *)arg;
5549	int rc;
 
5550
5551	switch (ioctl) {
5552	case KVM_S390_IRQ: {
5553		struct kvm_s390_irq s390irq;
5554
 
5555		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5556			return -EFAULT;
5557		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5558		break;
5559	}
5560	case KVM_S390_INTERRUPT: {
5561		struct kvm_s390_interrupt s390int;
5562		struct kvm_s390_irq s390irq = {};
5563
 
5564		if (copy_from_user(&s390int, argp, sizeof(s390int)))
5565			return -EFAULT;
5566		if (s390int_to_s390irq(&s390int, &s390irq))
5567			return -EINVAL;
5568		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5569		break;
5570	}
5571	default:
5572		rc = -ENOIOCTLCMD;
5573		break;
5574	}
5575
5576	/*
5577	 * To simplify single stepping of userspace-emulated instructions,
5578	 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
5579	 * should_handle_per_ifetch()). However, if userspace emulation injects
5580	 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
5581	 * after (and not before) the interrupt delivery.
5582	 */
5583	if (!rc)
5584		vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
5585
5586	return rc;
5587}
5588
5589static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5590					struct kvm_pv_cmd *cmd)
5591{
5592	struct kvm_s390_pv_dmp dmp;
5593	void *data;
5594	int ret;
5595
5596	/* Dump initialization is a prerequisite */
5597	if (!vcpu->kvm->arch.pv.dumping)
5598		return -EINVAL;
5599
5600	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5601		return -EFAULT;
5602
5603	/* We only handle this subcmd right now */
5604	if (dmp.subcmd != KVM_PV_DUMP_CPU)
5605		return -EINVAL;
5606
5607	/* CPU dump length is the same as create cpu storage donation. */
5608	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5609		return -EINVAL;
5610
5611	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5612	if (!data)
5613		return -ENOMEM;
5614
5615	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5616
5617	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5618		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
5619
5620	if (ret)
5621		ret = -EINVAL;
5622
5623	/* On success copy over the dump data */
5624	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5625		ret = -EFAULT;
5626
5627	kvfree(data);
5628	return ret;
5629}
5630
5631long kvm_arch_vcpu_ioctl(struct file *filp,
5632			 unsigned int ioctl, unsigned long arg)
5633{
5634	struct kvm_vcpu *vcpu = filp->private_data;
5635	void __user *argp = (void __user *)arg;
5636	int idx;
5637	long r;
5638	u16 rc, rrc;
5639
5640	vcpu_load(vcpu);
5641
5642	switch (ioctl) {
5643	case KVM_S390_STORE_STATUS:
5644		idx = srcu_read_lock(&vcpu->kvm->srcu);
5645		r = kvm_s390_store_status_unloaded(vcpu, arg);
5646		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5647		break;
5648	case KVM_S390_SET_INITIAL_PSW: {
5649		psw_t psw;
5650
5651		r = -EFAULT;
5652		if (copy_from_user(&psw, argp, sizeof(psw)))
5653			break;
5654		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5655		break;
5656	}
5657	case KVM_S390_CLEAR_RESET:
5658		r = 0;
5659		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5660		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5661			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5662					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5663			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5664				   rc, rrc);
5665		}
5666		break;
5667	case KVM_S390_INITIAL_RESET:
5668		r = 0;
5669		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5670		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5671			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5672					  UVC_CMD_CPU_RESET_INITIAL,
5673					  &rc, &rrc);
5674			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5675				   rc, rrc);
5676		}
5677		break;
5678	case KVM_S390_NORMAL_RESET:
5679		r = 0;
5680		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5681		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5682			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5683					  UVC_CMD_CPU_RESET, &rc, &rrc);
5684			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5685				   rc, rrc);
5686		}
5687		break;
5688	case KVM_SET_ONE_REG:
5689	case KVM_GET_ONE_REG: {
5690		struct kvm_one_reg reg;
5691		r = -EINVAL;
5692		if (kvm_s390_pv_cpu_is_protected(vcpu))
5693			break;
5694		r = -EFAULT;
5695		if (copy_from_user(&reg, argp, sizeof(reg)))
5696			break;
5697		if (ioctl == KVM_SET_ONE_REG)
5698			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5699		else
5700			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5701		break;
5702	}
5703#ifdef CONFIG_KVM_S390_UCONTROL
5704	case KVM_S390_UCAS_MAP: {
5705		struct kvm_s390_ucas_mapping ucasmap;
5706
5707		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5708			r = -EFAULT;
5709			break;
5710		}
5711
5712		if (!kvm_is_ucontrol(vcpu->kvm)) {
5713			r = -EINVAL;
5714			break;
5715		}
5716
5717		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5718				     ucasmap.vcpu_addr, ucasmap.length);
5719		break;
5720	}
5721	case KVM_S390_UCAS_UNMAP: {
5722		struct kvm_s390_ucas_mapping ucasmap;
5723
5724		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5725			r = -EFAULT;
5726			break;
5727		}
5728
5729		if (!kvm_is_ucontrol(vcpu->kvm)) {
5730			r = -EINVAL;
5731			break;
5732		}
5733
5734		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5735			ucasmap.length);
5736		break;
5737	}
5738#endif
5739	case KVM_S390_VCPU_FAULT: {
5740		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5741		break;
5742	}
5743	case KVM_ENABLE_CAP:
5744	{
5745		struct kvm_enable_cap cap;
5746		r = -EFAULT;
5747		if (copy_from_user(&cap, argp, sizeof(cap)))
5748			break;
5749		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5750		break;
5751	}
5752	case KVM_S390_MEM_OP: {
5753		struct kvm_s390_mem_op mem_op;
5754
5755		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5756			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5757		else
5758			r = -EFAULT;
5759		break;
5760	}
5761	case KVM_S390_SET_IRQ_STATE: {
5762		struct kvm_s390_irq_state irq_state;
5763
5764		r = -EFAULT;
5765		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5766			break;
5767		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5768		    irq_state.len == 0 ||
5769		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5770			r = -EINVAL;
5771			break;
5772		}
5773		/* do not use irq_state.flags, it will break old QEMUs */
5774		r = kvm_s390_set_irq_state(vcpu,
5775					   (void __user *) irq_state.buf,
5776					   irq_state.len);
5777		break;
5778	}
5779	case KVM_S390_GET_IRQ_STATE: {
5780		struct kvm_s390_irq_state irq_state;
5781
5782		r = -EFAULT;
5783		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5784			break;
5785		if (irq_state.len == 0) {
5786			r = -EINVAL;
5787			break;
5788		}
5789		/* do not use irq_state.flags, it will break old QEMUs */
5790		r = kvm_s390_get_irq_state(vcpu,
5791					   (__u8 __user *)  irq_state.buf,
5792					   irq_state.len);
5793		break;
5794	}
5795	case KVM_S390_PV_CPU_COMMAND: {
5796		struct kvm_pv_cmd cmd;
5797
5798		r = -EINVAL;
5799		if (!is_prot_virt_host())
5800			break;
5801
5802		r = -EFAULT;
5803		if (copy_from_user(&cmd, argp, sizeof(cmd)))
5804			break;
5805
5806		r = -EINVAL;
5807		if (cmd.flags)
5808			break;
5809
5810		/* We only handle this cmd right now */
5811		if (cmd.cmd != KVM_PV_DUMP)
5812			break;
5813
5814		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5815
5816		/* Always copy over UV rc / rrc data */
5817		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5818				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5819			r = -EFAULT;
5820		break;
5821	}
5822	default:
5823		r = -ENOTTY;
5824	}
5825
5826	vcpu_put(vcpu);
5827	return r;
5828}
5829
5830vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5831{
5832#ifdef CONFIG_KVM_S390_UCONTROL
5833	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5834		 && (kvm_is_ucontrol(vcpu->kvm))) {
5835		vmf->page = virt_to_page(vcpu->arch.sie_block);
5836		get_page(vmf->page);
5837		return 0;
5838	}
5839#endif
5840	return VM_FAULT_SIGBUS;
5841}
5842
5843bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
 
5844{
5845	return true;
5846}
5847
5848/* Section: memory related */
5849int kvm_arch_prepare_memory_region(struct kvm *kvm,
5850				   const struct kvm_memory_slot *old,
5851				   struct kvm_memory_slot *new,
5852				   enum kvm_mr_change change)
5853{
5854	gpa_t size;
 
 
 
5855
5856	if (kvm_is_ucontrol(kvm))
5857		return -EINVAL;
5858
5859	/* When we are protected, we should not change the memory slots */
5860	if (kvm_s390_pv_get_handle(kvm))
5861		return -EINVAL;
5862
5863	if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
5864		/*
5865		 * A few sanity checks. We can have memory slots which have to be
5866		 * located/ended at a segment boundary (1MB). The memory in userland is
5867		 * ok to be fragmented into various different vmas. It is okay to mmap()
5868		 * and munmap() stuff in this slot after doing this call at any time
5869		 */
5870
5871		if (new->userspace_addr & 0xffffful)
5872			return -EINVAL;
5873
5874		size = new->npages * PAGE_SIZE;
5875		if (size & 0xffffful)
5876			return -EINVAL;
5877
5878		if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5879			return -EINVAL;
5880	}
5881
5882	if (!kvm->arch.migration_mode)
5883		return 0;
5884
5885	/*
5886	 * Turn off migration mode when:
5887	 * - userspace creates a new memslot with dirty logging off,
5888	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5889	 *   dirty logging is turned off.
5890	 * Migration mode expects dirty page logging being enabled to store
5891	 * its dirty bitmap.
5892	 */
5893	if (change != KVM_MR_DELETE &&
5894	    !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
5895		WARN(kvm_s390_vm_stop_migration(kvm),
5896		     "Failed to stop migration mode");
5897
5898	return 0;
5899}
5900
5901void kvm_arch_commit_memory_region(struct kvm *kvm,
5902				struct kvm_memory_slot *old,
 
5903				const struct kvm_memory_slot *new,
5904				enum kvm_mr_change change)
5905{
5906	int rc = 0;
 
 
 
 
 
 
 
 
 
 
 
5907
5908	switch (change) {
5909	case KVM_MR_DELETE:
5910		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5911					old->npages * PAGE_SIZE);
5912		break;
5913	case KVM_MR_MOVE:
5914		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5915					old->npages * PAGE_SIZE);
5916		if (rc)
5917			break;
5918		fallthrough;
5919	case KVM_MR_CREATE:
5920		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5921				      new->base_gfn * PAGE_SIZE,
5922				      new->npages * PAGE_SIZE);
5923		break;
5924	case KVM_MR_FLAGS_ONLY:
5925		break;
5926	default:
5927		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5928	}
5929	if (rc)
5930		pr_warn("failed to commit memory region\n");
5931	return;
5932}
5933
5934static inline unsigned long nonhyp_mask(int i)
5935{
5936	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5937
5938	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5939}
5940
 
 
 
 
 
5941static int __init kvm_s390_init(void)
5942{
5943	int i, r;
5944
5945	if (!sclp.has_sief2) {
5946		pr_info("SIE is not available\n");
5947		return -ENODEV;
5948	}
5949
5950	if (nested && hpage) {
5951		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5952		return -EINVAL;
5953	}
5954
5955	for (i = 0; i < 16; i++)
5956		kvm_s390_fac_base[i] |=
5957			stfle_fac_list[i] & nonhyp_mask(i);
5958
5959	r = __kvm_s390_init();
5960	if (r)
5961		return r;
5962
5963	r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5964	if (r) {
5965		__kvm_s390_exit();
5966		return r;
5967	}
5968	return 0;
5969}
5970
5971static void __exit kvm_s390_exit(void)
5972{
5973	kvm_exit();
5974
5975	__kvm_s390_exit();
5976}
5977
5978module_init(kvm_s390_init);
5979module_exit(kvm_s390_exit);
5980
5981/*
5982 * Enable autoloading of the kvm module.
5983 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5984 * since x86 takes a different approach.
5985 */
5986#include <linux/miscdevice.h>
5987MODULE_ALIAS_MISCDEV(KVM_MINOR);
5988MODULE_ALIAS("devname:kvm");