kvm-s390.c - arch/s390/kvm/kvm-s390.c - Linux diff v4.17

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2018
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
 
 
 
  14#include <linux/compiler.h>
  15#include <linux/err.h>
  16#include <linux/fs.h>
  17#include <linux/hrtimer.h>
  18#include <linux/init.h>
  19#include <linux/kvm.h>
  20#include <linux/kvm_host.h>
  21#include <linux/mman.h>
  22#include <linux/module.h>
  23#include <linux/moduleparam.h>
  24#include <linux/random.h>
  25#include <linux/slab.h>
  26#include <linux/timer.h>
  27#include <linux/vmalloc.h>
  28#include <linux/bitmap.h>
  29#include <linux/sched/signal.h>
  30#include <linux/string.h>
  31
  32#include <asm/asm-offsets.h>
  33#include <asm/lowcore.h>
  34#include <asm/stp.h>
  35#include <asm/pgtable.h>
  36#include <asm/gmap.h>
  37#include <asm/nmi.h>
  38#include <asm/switch_to.h>
  39#include <asm/isc.h>
  40#include <asm/sclp.h>
  41#include <asm/cpacf.h>
  42#include <asm/timex.h>
 
  43#include "kvm-s390.h"
  44#include "gaccess.h"
  45
  46#define KMSG_COMPONENT "kvm-s390"
  47#undef pr_fmt
  48#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50#define CREATE_TRACE_POINTS
  51#include "trace.h"
  52#include "trace-s390.h"
  53
  54#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
  55#define LOCAL_IRQS 32
  56#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57			   (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62struct kvm_stats_debugfs_item debugfs_entries[] = {
  63	{ "userspace_handled", VCPU_STAT(exit_userspace) },
  64	{ "exit_null", VCPU_STAT(exit_null) },
  65	{ "exit_validity", VCPU_STAT(exit_validity) },
  66	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67	{ "exit_external_request", VCPU_STAT(exit_external_request) },
  68	{ "exit_io_request", VCPU_STAT(exit_io_request) },
  69	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70	{ "exit_instruction", VCPU_STAT(exit_instruction) },
  71	{ "exit_pei", VCPU_STAT(exit_pei) },
  72	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
 
  78	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92	{ "deliver_program", VCPU_STAT(deliver_program) },
  93	{ "deliver_io", VCPU_STAT(deliver_io) },
  94	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96	{ "inject_ckc", VCPU_STAT(inject_ckc) },
  97	{ "inject_cputm", VCPU_STAT(inject_cputm) },
  98	{ "inject_external_call", VCPU_STAT(inject_external_call) },
  99	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101	{ "inject_io", VM_STAT(inject_io) },
 102	{ "inject_mchk", VCPU_STAT(inject_mchk) },
 103	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104	{ "inject_program", VCPU_STAT(inject_program) },
 105	{ "inject_restart", VCPU_STAT(inject_restart) },
 106	{ "inject_service_signal", VM_STAT(inject_service_signal) },
 107	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110	{ "inject_virtio", VM_STAT(inject_virtio) },
 111	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112	{ "instruction_gs", VCPU_STAT(instruction_gs) },
 113	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119	{ "instruction_sck", VCPU_STAT(instruction_sck) },
 120	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121	{ "instruction_spx", VCPU_STAT(instruction_spx) },
 122	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123	{ "instruction_stap", VCPU_STAT(instruction_stap) },
 124	{ "instruction_iske", VCPU_STAT(instruction_iske) },
 125	{ "instruction_ri", VCPU_STAT(instruction_ri) },
 126	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127	{ "instruction_sske", VCPU_STAT(instruction_sske) },
 128	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129	{ "instruction_essa", VCPU_STAT(instruction_essa) },
 130	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132	{ "instruction_tb", VCPU_STAT(instruction_tb) },
 133	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137	{ "instruction_sie", VCPU_STAT(instruction_sie) },
 138	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161	{ NULL }
 162};
 163
 164struct kvm_s390_tod_clock_ext {
 165	__u8 epoch_idx;
 166	__u64 tod;
 167	__u8 reserved[7];
 168} __packed;
 169
 170/* allow nested virtualization in KVM (if enabled by user space) */
 171static int nested;
 172module_param(nested, int, S_IRUGO);
 173MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 
 
 
 
 
 
 
 
 
 175
 176/*
 177 * For now we handle at most 16 double words as this is what the s390 base
 178 * kernel handles and stores in the prefix page. If we ever need to go beyond
 179 * this, this requires changes to code, but the external uapi can stay.
 180 */
 181#define SIZE_INTERNAL 16
 182
 183/*
 184 * Base feature mask that defines default mask for facilities. Consists of the
 185 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 186 */
 187static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 188/*
 189 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 190 * and defines the facilities that can be enabled via a cpu model.
 191 */
 192static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 193
 194static unsigned long kvm_s390_fac_size(void)
 195{
 196	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 197	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 198	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 199		sizeof(S390_lowcore.stfle_fac_list));
 200
 201	return SIZE_INTERNAL;
 202}
 203
 204/* available cpu features supported by kvm */
 205static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 206/* available subfunctions indicated via query / "test bit" */
 207static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 208
 209static struct gmap_notifier gmap_notifier;
 210static struct gmap_notifier vsie_gmap_notifier;
 211debug_info_t *kvm_s390_dbf;
 212
 213/* Section: not file related */
 214int kvm_arch_hardware_enable(void)
 215{
 216	/* every s390 is virtualization enabled ;-) */
 217	return 0;
 218}
 219
 
 
 
 
 
 220static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 221			      unsigned long end);
 222
 223static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 224{
 225	u8 delta_idx = 0;
 226
 227	/*
 228	 * The TOD jumps by delta, we have to compensate this by adding
 229	 * -delta to the epoch.
 230	 */
 231	delta = -delta;
 232
 233	/* sign-extension - we're adding to signed values below */
 234	if ((s64)delta < 0)
 235		delta_idx = -1;
 236
 237	scb->epoch += delta;
 238	if (scb->ecd & ECD_MEF) {
 239		scb->epdx += delta_idx;
 240		if (scb->epoch < delta)
 241			scb->epdx += 1;
 242	}
 243}
 244
 245/*
 246 * This callback is executed during stop_machine(). All CPUs are therefore
 247 * temporarily stopped. In order not to change guest behavior, we have to
 248 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 249 * so a CPU won't be stopped while calculating with the epoch.
 250 */
 251static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 252			  void *v)
 253{
 254	struct kvm *kvm;
 255	struct kvm_vcpu *vcpu;
 256	int i;
 257	unsigned long long *delta = v;
 258
 259	list_for_each_entry(kvm, &vm_list, vm_list) {
 260		kvm_for_each_vcpu(i, vcpu, kvm) {
 261			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 262			if (i == 0) {
 263				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 264				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 265			}
 266			if (vcpu->arch.cputm_enabled)
 267				vcpu->arch.cputm_start += *delta;
 268			if (vcpu->arch.vsie_block)
 269				kvm_clock_sync_scb(vcpu->arch.vsie_block,
 270						   *delta);
 271		}
 272	}
 273	return NOTIFY_OK;
 274}
 275
 276static struct notifier_block kvm_clock_notifier = {
 277	.notifier_call = kvm_clock_sync,
 278};
 279
 280int kvm_arch_hardware_setup(void)
 281{
 282	gmap_notifier.notifier_call = kvm_gmap_notifier;
 283	gmap_register_pte_notifier(&gmap_notifier);
 284	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 285	gmap_register_pte_notifier(&vsie_gmap_notifier);
 286	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 287				       &kvm_clock_notifier);
 288	return 0;
 289}
 290
 291void kvm_arch_hardware_unsetup(void)
 292{
 293	gmap_unregister_pte_notifier(&gmap_notifier);
 294	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 295	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 296					 &kvm_clock_notifier);
 297}
 298
 299static void allow_cpu_feat(unsigned long nr)
 300{
 301	set_bit_inv(nr, kvm_s390_available_cpu_feat);
 302}
 303
 304static inline int plo_test_bit(unsigned char nr)
 305{
 306	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 307	int cc;
 308
 309	asm volatile(
 310		/* Parameter registers are ignored for "test bit" */
 311		"	plo	0,0,0,0(0)\n"
 312		"	ipm	%0\n"
 313		"	srl	%0,28\n"
 314		: "=d" (cc)
 315		: "d" (r0)
 316		: "cc");
 317	return cc == 0;
 318}
 319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 320static void kvm_s390_cpu_feat_init(void)
 321{
 322	int i;
 323
 324	for (i = 0; i < 256; ++i) {
 325		if (plo_test_bit(i))
 326			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 327	}
 328
 329	if (test_facility(28)) /* TOD-clock steering */
 330		ptff(kvm_s390_available_subfunc.ptff,
 331		     sizeof(kvm_s390_available_subfunc.ptff),
 332		     PTFF_QAF);
 333
 334	if (test_facility(17)) { /* MSA */
 335		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 336			      kvm_s390_available_subfunc.kmac);
 337		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 338			      kvm_s390_available_subfunc.kmc);
 339		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
 340			      kvm_s390_available_subfunc.km);
 341		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 342			      kvm_s390_available_subfunc.kimd);
 343		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 344			      kvm_s390_available_subfunc.klmd);
 345	}
 346	if (test_facility(76)) /* MSA3 */
 347		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 348			      kvm_s390_available_subfunc.pckmo);
 349	if (test_facility(77)) { /* MSA4 */
 350		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 351			      kvm_s390_available_subfunc.kmctr);
 352		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 353			      kvm_s390_available_subfunc.kmf);
 354		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 355			      kvm_s390_available_subfunc.kmo);
 356		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 357			      kvm_s390_available_subfunc.pcc);
 358	}
 359	if (test_facility(57)) /* MSA5 */
 360		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 361			      kvm_s390_available_subfunc.ppno);
 362
 363	if (test_facility(146)) /* MSA8 */
 364		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 365			      kvm_s390_available_subfunc.kma);
 366
 
 
 
 
 
 
 
 
 
 
 367	if (MACHINE_HAS_ESOP)
 368		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 369	/*
 370	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 371	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 372	 */
 373	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 374	    !test_facility(3) || !nested)
 375		return;
 376	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 377	if (sclp.has_64bscao)
 378		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 379	if (sclp.has_siif)
 380		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 381	if (sclp.has_gpere)
 382		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 383	if (sclp.has_gsls)
 384		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 385	if (sclp.has_ib)
 386		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 387	if (sclp.has_cei)
 388		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 389	if (sclp.has_ibs)
 390		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 391	if (sclp.has_kss)
 392		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 393	/*
 394	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 395	 * all skey handling functions read/set the skey from the PGSTE
 396	 * instead of the real storage key.
 397	 *
 398	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 399	 * pages being detected as preserved although they are resident.
 400	 *
 401	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 402	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 403	 *
 404	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 405	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 406	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 407	 *
 408	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 409	 * cannot easily shadow the SCA because of the ipte lock.
 410	 */
 411}
 412
 413int kvm_arch_init(void *opaque)
 414{
 
 
 415	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 416	if (!kvm_s390_dbf)
 417		return -ENOMEM;
 418
 419	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 420		debug_unregister(kvm_s390_dbf);
 421		return -ENOMEM;
 422	}
 423
 424	kvm_s390_cpu_feat_init();
 425
 426	/* Register floating interrupt controller interface. */
 427	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 428}
 429
 430void kvm_arch_exit(void)
 431{
 
 432	debug_unregister(kvm_s390_dbf);
 433}
 434
 435/* Section: device related */
 436long kvm_arch_dev_ioctl(struct file *filp,
 437			unsigned int ioctl, unsigned long arg)
 438{
 439	if (ioctl == KVM_S390_ENABLE_SIE)
 440		return s390_enable_sie();
 441	return -EINVAL;
 442}
 443
 444int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 445{
 446	int r;
 447
 448	switch (ext) {
 449	case KVM_CAP_S390_PSW:
 450	case KVM_CAP_S390_GMAP:
 451	case KVM_CAP_SYNC_MMU:
 452#ifdef CONFIG_KVM_S390_UCONTROL
 453	case KVM_CAP_S390_UCONTROL:
 454#endif
 455	case KVM_CAP_ASYNC_PF:
 456	case KVM_CAP_SYNC_REGS:
 457	case KVM_CAP_ONE_REG:
 458	case KVM_CAP_ENABLE_CAP:
 459	case KVM_CAP_S390_CSS_SUPPORT:
 460	case KVM_CAP_IOEVENTFD:
 461	case KVM_CAP_DEVICE_CTRL:
 462	case KVM_CAP_ENABLE_CAP_VM:
 463	case KVM_CAP_S390_IRQCHIP:
 464	case KVM_CAP_VM_ATTRIBUTES:
 465	case KVM_CAP_MP_STATE:
 466	case KVM_CAP_IMMEDIATE_EXIT:
 467	case KVM_CAP_S390_INJECT_IRQ:
 468	case KVM_CAP_S390_USER_SIGP:
 469	case KVM_CAP_S390_USER_STSI:
 470	case KVM_CAP_S390_SKEYS:
 471	case KVM_CAP_S390_IRQ_STATE:
 472	case KVM_CAP_S390_USER_INSTR0:
 473	case KVM_CAP_S390_CMMA_MIGRATION:
 474	case KVM_CAP_S390_AIS:
 475	case KVM_CAP_S390_AIS_MIGRATION:
 476		r = 1;
 477		break;
 
 
 
 
 
 478	case KVM_CAP_S390_MEM_OP:
 479		r = MEM_OP_MAX_SIZE;
 480		break;
 481	case KVM_CAP_NR_VCPUS:
 482	case KVM_CAP_MAX_VCPUS:
 
 483		r = KVM_S390_BSCA_CPU_SLOTS;
 484		if (!kvm_s390_use_sca_entries())
 485			r = KVM_MAX_VCPUS;
 486		else if (sclp.has_esca && sclp.has_64bscao)
 487			r = KVM_S390_ESCA_CPU_SLOTS;
 488		break;
 489	case KVM_CAP_NR_MEMSLOTS:
 490		r = KVM_USER_MEM_SLOTS;
 491		break;
 492	case KVM_CAP_S390_COW:
 493		r = MACHINE_HAS_ESOP;
 494		break;
 495	case KVM_CAP_S390_VECTOR_REGISTERS:
 496		r = MACHINE_HAS_VX;
 497		break;
 498	case KVM_CAP_S390_RI:
 499		r = test_facility(64);
 500		break;
 501	case KVM_CAP_S390_GS:
 502		r = test_facility(133);
 503		break;
 504	case KVM_CAP_S390_BPB:
 505		r = test_facility(82);
 506		break;
 507	default:
 508		r = 0;
 509	}
 510	return r;
 511}
 512
 513static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 514					struct kvm_memory_slot *memslot)
 515{
 
 516	gfn_t cur_gfn, last_gfn;
 517	unsigned long address;
 518	struct gmap *gmap = kvm->arch.gmap;
 
 519
 520	/* Loop over all guest pages */
 
 521	last_gfn = memslot->base_gfn + memslot->npages;
 522	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 523		address = gfn_to_hva_memslot(memslot, cur_gfn);
 
 
 
 
 
 
 
 
 
 
 524
 525		if (test_and_clear_guest_dirty(gmap->mm, address))
 526			mark_page_dirty(kvm, cur_gfn);
 527		if (fatal_signal_pending(current))
 528			return;
 529		cond_resched();
 530	}
 531}
 532
 533/* Section: vm related */
 534static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 535
 536/*
 537 * Get (and clear) the dirty memory log for a memory slot.
 538 */
 539int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 540			       struct kvm_dirty_log *log)
 541{
 542	int r;
 543	unsigned long n;
 544	struct kvm_memslots *slots;
 545	struct kvm_memory_slot *memslot;
 546	int is_dirty = 0;
 547
 548	if (kvm_is_ucontrol(kvm))
 549		return -EINVAL;
 550
 551	mutex_lock(&kvm->slots_lock);
 552
 553	r = -EINVAL;
 554	if (log->slot >= KVM_USER_MEM_SLOTS)
 555		goto out;
 556
 557	slots = kvm_memslots(kvm);
 558	memslot = id_to_memslot(slots, log->slot);
 559	r = -ENOENT;
 560	if (!memslot->dirty_bitmap)
 561		goto out;
 562
 563	kvm_s390_sync_dirty_log(kvm, memslot);
 564	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 565	if (r)
 566		goto out;
 567
 568	/* Clear the dirty log */
 569	if (is_dirty) {
 570		n = kvm_dirty_bitmap_bytes(memslot);
 571		memset(memslot->dirty_bitmap, 0, n);
 572	}
 573	r = 0;
 574out:
 575	mutex_unlock(&kvm->slots_lock);
 576	return r;
 577}
 578
 579static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 580{
 581	unsigned int i;
 582	struct kvm_vcpu *vcpu;
 583
 584	kvm_for_each_vcpu(i, vcpu, kvm) {
 585		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 586	}
 587}
 588
 589static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 590{
 591	int r;
 592
 593	if (cap->flags)
 594		return -EINVAL;
 595
 596	switch (cap->cap) {
 597	case KVM_CAP_S390_IRQCHIP:
 598		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 599		kvm->arch.use_irqchip = 1;
 600		r = 0;
 601		break;
 602	case KVM_CAP_S390_USER_SIGP:
 603		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 604		kvm->arch.user_sigp = 1;
 605		r = 0;
 606		break;
 607	case KVM_CAP_S390_VECTOR_REGISTERS:
 608		mutex_lock(&kvm->lock);
 609		if (kvm->created_vcpus) {
 610			r = -EBUSY;
 611		} else if (MACHINE_HAS_VX) {
 612			set_kvm_facility(kvm->arch.model.fac_mask, 129);
 613			set_kvm_facility(kvm->arch.model.fac_list, 129);
 614			if (test_facility(134)) {
 615				set_kvm_facility(kvm->arch.model.fac_mask, 134);
 616				set_kvm_facility(kvm->arch.model.fac_list, 134);
 617			}
 618			if (test_facility(135)) {
 619				set_kvm_facility(kvm->arch.model.fac_mask, 135);
 620				set_kvm_facility(kvm->arch.model.fac_list, 135);
 621			}
 
 
 
 
 
 
 
 
 622			r = 0;
 623		} else
 624			r = -EINVAL;
 625		mutex_unlock(&kvm->lock);
 626		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 627			 r ? "(not available)" : "(success)");
 628		break;
 629	case KVM_CAP_S390_RI:
 630		r = -EINVAL;
 631		mutex_lock(&kvm->lock);
 632		if (kvm->created_vcpus) {
 633			r = -EBUSY;
 634		} else if (test_facility(64)) {
 635			set_kvm_facility(kvm->arch.model.fac_mask, 64);
 636			set_kvm_facility(kvm->arch.model.fac_list, 64);
 637			r = 0;
 638		}
 639		mutex_unlock(&kvm->lock);
 640		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 641			 r ? "(not available)" : "(success)");
 642		break;
 643	case KVM_CAP_S390_AIS:
 644		mutex_lock(&kvm->lock);
 645		if (kvm->created_vcpus) {
 646			r = -EBUSY;
 647		} else {
 648			set_kvm_facility(kvm->arch.model.fac_mask, 72);
 649			set_kvm_facility(kvm->arch.model.fac_list, 72);
 650			r = 0;
 651		}
 652		mutex_unlock(&kvm->lock);
 653		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 654			 r ? "(not available)" : "(success)");
 655		break;
 656	case KVM_CAP_S390_GS:
 657		r = -EINVAL;
 658		mutex_lock(&kvm->lock);
 659		if (kvm->created_vcpus) {
 660			r = -EBUSY;
 661		} else if (test_facility(133)) {
 662			set_kvm_facility(kvm->arch.model.fac_mask, 133);
 663			set_kvm_facility(kvm->arch.model.fac_list, 133);
 664			r = 0;
 665		}
 666		mutex_unlock(&kvm->lock);
 667		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 668			 r ? "(not available)" : "(success)");
 669		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 670	case KVM_CAP_S390_USER_STSI:
 671		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 672		kvm->arch.user_stsi = 1;
 673		r = 0;
 674		break;
 675	case KVM_CAP_S390_USER_INSTR0:
 676		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 677		kvm->arch.user_instr0 = 1;
 678		icpt_operexc_on_all_vcpus(kvm);
 679		r = 0;
 680		break;
 681	default:
 682		r = -EINVAL;
 683		break;
 684	}
 685	return r;
 686}
 687
 688static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 689{
 690	int ret;
 691
 692	switch (attr->attr) {
 693	case KVM_S390_VM_MEM_LIMIT_SIZE:
 694		ret = 0;
 695		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 696			 kvm->arch.mem_limit);
 697		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 698			ret = -EFAULT;
 699		break;
 700	default:
 701		ret = -ENXIO;
 702		break;
 703	}
 704	return ret;
 705}
 706
 707static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 708{
 709	int ret;
 710	unsigned int idx;
 711	switch (attr->attr) {
 712	case KVM_S390_VM_MEM_ENABLE_CMMA:
 713		ret = -ENXIO;
 714		if (!sclp.has_cmma)
 715			break;
 716
 717		ret = -EBUSY;
 718		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 719		mutex_lock(&kvm->lock);
 720		if (!kvm->created_vcpus) {
 
 
 
 
 721			kvm->arch.use_cmma = 1;
 722			/* Not compatible with cmma. */
 723			kvm->arch.use_pfmfi = 0;
 724			ret = 0;
 725		}
 726		mutex_unlock(&kvm->lock);
 727		break;
 728	case KVM_S390_VM_MEM_CLR_CMMA:
 729		ret = -ENXIO;
 730		if (!sclp.has_cmma)
 731			break;
 732		ret = -EINVAL;
 733		if (!kvm->arch.use_cmma)
 734			break;
 735
 736		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 737		mutex_lock(&kvm->lock);
 738		idx = srcu_read_lock(&kvm->srcu);
 739		s390_reset_cmma(kvm->arch.gmap->mm);
 740		srcu_read_unlock(&kvm->srcu, idx);
 741		mutex_unlock(&kvm->lock);
 742		ret = 0;
 743		break;
 744	case KVM_S390_VM_MEM_LIMIT_SIZE: {
 745		unsigned long new_limit;
 746
 747		if (kvm_is_ucontrol(kvm))
 748			return -EINVAL;
 749
 750		if (get_user(new_limit, (u64 __user *)attr->addr))
 751			return -EFAULT;
 752
 753		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 754		    new_limit > kvm->arch.mem_limit)
 755			return -E2BIG;
 756
 757		if (!new_limit)
 758			return -EINVAL;
 759
 760		/* gmap_create takes last usable address */
 761		if (new_limit != KVM_S390_NO_MEM_LIMIT)
 762			new_limit -= 1;
 763
 764		ret = -EBUSY;
 765		mutex_lock(&kvm->lock);
 766		if (!kvm->created_vcpus) {
 767			/* gmap_create will round the limit up */
 768			struct gmap *new = gmap_create(current->mm, new_limit);
 769
 770			if (!new) {
 771				ret = -ENOMEM;
 772			} else {
 773				gmap_remove(kvm->arch.gmap);
 774				new->private = kvm;
 775				kvm->arch.gmap = new;
 776				ret = 0;
 777			}
 778		}
 779		mutex_unlock(&kvm->lock);
 780		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 781		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 782			 (void *) kvm->arch.gmap->asce);
 783		break;
 784	}
 785	default:
 786		ret = -ENXIO;
 787		break;
 788	}
 789	return ret;
 790}
 791
 792static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 793
 794static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 795{
 796	struct kvm_vcpu *vcpu;
 797	int i;
 798
 799	if (!test_kvm_facility(kvm, 76))
 800		return -EINVAL;
 
 
 
 
 
 
 
 
 801
 
 
 802	mutex_lock(&kvm->lock);
 803	switch (attr->attr) {
 804	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 
 
 
 
 805		get_random_bytes(
 806			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 807			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 808		kvm->arch.crypto.aes_kw = 1;
 809		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 810		break;
 811	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 
 
 
 
 812		get_random_bytes(
 813			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 814			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 815		kvm->arch.crypto.dea_kw = 1;
 816		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 817		break;
 818	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 
 
 
 
 819		kvm->arch.crypto.aes_kw = 0;
 820		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 821			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 822		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 823		break;
 824	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 
 
 
 
 825		kvm->arch.crypto.dea_kw = 0;
 826		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 827			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 828		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 829		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 830	default:
 831		mutex_unlock(&kvm->lock);
 832		return -ENXIO;
 833	}
 834
 835	kvm_for_each_vcpu(i, vcpu, kvm) {
 836		kvm_s390_vcpu_crypto_setup(vcpu);
 837		exit_sie(vcpu);
 838	}
 839	mutex_unlock(&kvm->lock);
 840	return 0;
 841}
 842
 843static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 844{
 845	int cx;
 846	struct kvm_vcpu *vcpu;
 847
 848	kvm_for_each_vcpu(cx, vcpu, kvm)
 849		kvm_s390_sync_request(req, vcpu);
 850}
 851
 852/*
 853 * Must be called with kvm->srcu held to avoid races on memslots, and with
 854 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 855 */
 856static int kvm_s390_vm_start_migration(struct kvm *kvm)
 857{
 858	struct kvm_s390_migration_state *mgs;
 859	struct kvm_memory_slot *ms;
 860	/* should be the only one */
 861	struct kvm_memslots *slots;
 862	unsigned long ram_pages;
 863	int slotnr;
 864
 865	/* migration mode already enabled */
 866	if (kvm->arch.migration_state)
 867		return 0;
 868
 869	slots = kvm_memslots(kvm);
 870	if (!slots || !slots->used_slots)
 871		return -EINVAL;
 872
 873	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 874	if (!mgs)
 875		return -ENOMEM;
 876	kvm->arch.migration_state = mgs;
 877
 878	if (kvm->arch.use_cmma) {
 
 
 
 879		/*
 880		 * Get the first slot. They are reverse sorted by base_gfn, so
 881		 * the first slot is also the one at the end of the address
 882		 * space. We have verified above that at least one slot is
 883		 * present.
 884		 */
 885		ms = slots->memslots;
 886		/* round up so we only use full longs */
 887		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 888		/* allocate enough bytes to store all the bits */
 889		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 890		if (!mgs->pgste_bitmap) {
 891			kfree(mgs);
 892			kvm->arch.migration_state = NULL;
 893			return -ENOMEM;
 894		}
 895
 896		mgs->bitmap_size = ram_pages;
 897		atomic64_set(&mgs->dirty_pages, ram_pages);
 898		/* mark all the pages in active slots as dirty */
 899		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 900			ms = slots->memslots + slotnr;
 901			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 902		}
 903
 904		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 905	}
 
 
 
 906	return 0;
 907}
 908
 909/*
 910 * Must be called with kvm->slots_lock to avoid races with ourselves and
 911 * kvm_s390_vm_start_migration.
 912 */
 913static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 914{
 915	struct kvm_s390_migration_state *mgs;
 916
 917	/* migration mode already disabled */
 918	if (!kvm->arch.migration_state)
 919		return 0;
 920	mgs = kvm->arch.migration_state;
 921	kvm->arch.migration_state = NULL;
 922
 923	if (kvm->arch.use_cmma) {
 924		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 925		/* We have to wait for the essa emulation to finish */
 926		synchronize_srcu(&kvm->srcu);
 927		vfree(mgs->pgste_bitmap);
 928	}
 929	kfree(mgs);
 930	return 0;
 931}
 932
 933static int kvm_s390_vm_set_migration(struct kvm *kvm,
 934				     struct kvm_device_attr *attr)
 935{
 936	int res = -ENXIO;
 937
 938	mutex_lock(&kvm->slots_lock);
 939	switch (attr->attr) {
 940	case KVM_S390_VM_MIGRATION_START:
 941		res = kvm_s390_vm_start_migration(kvm);
 942		break;
 943	case KVM_S390_VM_MIGRATION_STOP:
 944		res = kvm_s390_vm_stop_migration(kvm);
 945		break;
 946	default:
 947		break;
 948	}
 949	mutex_unlock(&kvm->slots_lock);
 950
 951	return res;
 952}
 953
 954static int kvm_s390_vm_get_migration(struct kvm *kvm,
 955				     struct kvm_device_attr *attr)
 956{
 957	u64 mig = (kvm->arch.migration_state != NULL);
 958
 959	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 960		return -ENXIO;
 961
 962	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 963		return -EFAULT;
 964	return 0;
 965}
 966
 967static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 968{
 969	struct kvm_s390_vm_tod_clock gtod;
 970
 971	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 972		return -EFAULT;
 973
 974	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 975		return -EINVAL;
 976	kvm_s390_set_tod_clock(kvm, &gtod);
 977
 978	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 979		gtod.epoch_idx, gtod.tod);
 980
 981	return 0;
 982}
 983
 984static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 985{
 986	u8 gtod_high;
 987
 988	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 989					   sizeof(gtod_high)))
 990		return -EFAULT;
 991
 992	if (gtod_high != 0)
 993		return -EINVAL;
 994	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 995
 996	return 0;
 997}
 998
 999static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1000{
1001	struct kvm_s390_vm_tod_clock gtod = { 0 };
1002
1003	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1004			   sizeof(gtod.tod)))
1005		return -EFAULT;
1006
1007	kvm_s390_set_tod_clock(kvm, &gtod);
1008	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1009	return 0;
1010}
1011
1012static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1013{
1014	int ret;
1015
1016	if (attr->flags)
1017		return -EINVAL;
1018
1019	switch (attr->attr) {
1020	case KVM_S390_VM_TOD_EXT:
1021		ret = kvm_s390_set_tod_ext(kvm, attr);
1022		break;
1023	case KVM_S390_VM_TOD_HIGH:
1024		ret = kvm_s390_set_tod_high(kvm, attr);
1025		break;
1026	case KVM_S390_VM_TOD_LOW:
1027		ret = kvm_s390_set_tod_low(kvm, attr);
1028		break;
1029	default:
1030		ret = -ENXIO;
1031		break;
1032	}
1033	return ret;
1034}
1035
1036static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
1037					struct kvm_s390_vm_tod_clock *gtod)
1038{
1039	struct kvm_s390_tod_clock_ext htod;
1040
1041	preempt_disable();
1042
1043	get_tod_clock_ext((char *)&htod);
1044
1045	gtod->tod = htod.tod + kvm->arch.epoch;
1046	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1047
1048	if (gtod->tod < htod.tod)
1049		gtod->epoch_idx += 1;
 
 
1050
1051	preempt_enable();
1052}
1053
1054static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1055{
1056	struct kvm_s390_vm_tod_clock gtod;
1057
1058	memset(&gtod, 0, sizeof(gtod));
1059
1060	if (test_kvm_facility(kvm, 139))
1061		kvm_s390_get_tod_clock_ext(kvm, &gtod);
1062	else
1063		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1064
1065	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1066		return -EFAULT;
1067
1068	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1069		gtod.epoch_idx, gtod.tod);
1070	return 0;
1071}
1072
1073static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1074{
1075	u8 gtod_high = 0;
1076
1077	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1078					 sizeof(gtod_high)))
1079		return -EFAULT;
1080	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1081
1082	return 0;
1083}
1084
1085static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1086{
1087	u64 gtod;
1088
1089	gtod = kvm_s390_get_tod_clock_fast(kvm);
1090	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1091		return -EFAULT;
1092	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1093
1094	return 0;
1095}
1096
1097static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1098{
1099	int ret;
1100
1101	if (attr->flags)
1102		return -EINVAL;
1103
1104	switch (attr->attr) {
1105	case KVM_S390_VM_TOD_EXT:
1106		ret = kvm_s390_get_tod_ext(kvm, attr);
1107		break;
1108	case KVM_S390_VM_TOD_HIGH:
1109		ret = kvm_s390_get_tod_high(kvm, attr);
1110		break;
1111	case KVM_S390_VM_TOD_LOW:
1112		ret = kvm_s390_get_tod_low(kvm, attr);
1113		break;
1114	default:
1115		ret = -ENXIO;
1116		break;
1117	}
1118	return ret;
1119}
1120
1121static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1122{
1123	struct kvm_s390_vm_cpu_processor *proc;
1124	u16 lowest_ibc, unblocked_ibc;
1125	int ret = 0;
1126
1127	mutex_lock(&kvm->lock);
1128	if (kvm->created_vcpus) {
1129		ret = -EBUSY;
1130		goto out;
1131	}
1132	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1133	if (!proc) {
1134		ret = -ENOMEM;
1135		goto out;
1136	}
1137	if (!copy_from_user(proc, (void __user *)attr->addr,
1138			    sizeof(*proc))) {
1139		kvm->arch.model.cpuid = proc->cpuid;
1140		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1141		unblocked_ibc = sclp.ibc & 0xfff;
1142		if (lowest_ibc && proc->ibc) {
1143			if (proc->ibc > unblocked_ibc)
1144				kvm->arch.model.ibc = unblocked_ibc;
1145			else if (proc->ibc < lowest_ibc)
1146				kvm->arch.model.ibc = lowest_ibc;
1147			else
1148				kvm->arch.model.ibc = proc->ibc;
1149		}
1150		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1151		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1152		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153			 kvm->arch.model.ibc,
1154			 kvm->arch.model.cpuid);
1155		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156			 kvm->arch.model.fac_list[0],
1157			 kvm->arch.model.fac_list[1],
1158			 kvm->arch.model.fac_list[2]);
1159	} else
1160		ret = -EFAULT;
1161	kfree(proc);
1162out:
1163	mutex_unlock(&kvm->lock);
1164	return ret;
1165}
1166
1167static int kvm_s390_set_processor_feat(struct kvm *kvm,
1168				       struct kvm_device_attr *attr)
1169{
1170	struct kvm_s390_vm_cpu_feat data;
1171
1172	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1173		return -EFAULT;
1174	if (!bitmap_subset((unsigned long *) data.feat,
1175			   kvm_s390_available_cpu_feat,
1176			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1177		return -EINVAL;
1178
1179	mutex_lock(&kvm->lock);
1180	if (kvm->created_vcpus) {
1181		mutex_unlock(&kvm->lock);
1182		return -EBUSY;
1183	}
1184	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1185		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1186	mutex_unlock(&kvm->lock);
1187	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1188			 data.feat[0],
1189			 data.feat[1],
1190			 data.feat[2]);
1191	return 0;
1192}
1193
1194static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1195					  struct kvm_device_attr *attr)
1196{
1197	/*
1198	 * Once supported by kernel + hw, we have to store the subfunctions
1199	 * in kvm->arch and remember that user space configured them.
1200	 */
1201	return -ENXIO;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1202}
1203
1204static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1205{
1206	int ret = -ENXIO;
1207
1208	switch (attr->attr) {
1209	case KVM_S390_VM_CPU_PROCESSOR:
1210		ret = kvm_s390_set_processor(kvm, attr);
1211		break;
1212	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1213		ret = kvm_s390_set_processor_feat(kvm, attr);
1214		break;
1215	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1216		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1217		break;
1218	}
1219	return ret;
1220}
1221
1222static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1223{
1224	struct kvm_s390_vm_cpu_processor *proc;
1225	int ret = 0;
1226
1227	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1228	if (!proc) {
1229		ret = -ENOMEM;
1230		goto out;
1231	}
1232	proc->cpuid = kvm->arch.model.cpuid;
1233	proc->ibc = kvm->arch.model.ibc;
1234	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1235	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1236	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1237		 kvm->arch.model.ibc,
1238		 kvm->arch.model.cpuid);
1239	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1240		 kvm->arch.model.fac_list[0],
1241		 kvm->arch.model.fac_list[1],
1242		 kvm->arch.model.fac_list[2]);
1243	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1244		ret = -EFAULT;
1245	kfree(proc);
1246out:
1247	return ret;
1248}
1249
1250static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1251{
1252	struct kvm_s390_vm_cpu_machine *mach;
1253	int ret = 0;
1254
1255	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1256	if (!mach) {
1257		ret = -ENOMEM;
1258		goto out;
1259	}
1260	get_cpu_id((struct cpuid *) &mach->cpuid);
1261	mach->ibc = sclp.ibc;
1262	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1263	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1264	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1265	       sizeof(S390_lowcore.stfle_fac_list));
1266	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1267		 kvm->arch.model.ibc,
1268		 kvm->arch.model.cpuid);
1269	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1270		 mach->fac_mask[0],
1271		 mach->fac_mask[1],
1272		 mach->fac_mask[2]);
1273	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1274		 mach->fac_list[0],
1275		 mach->fac_list[1],
1276		 mach->fac_list[2]);
1277	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1278		ret = -EFAULT;
1279	kfree(mach);
1280out:
1281	return ret;
1282}
1283
1284static int kvm_s390_get_processor_feat(struct kvm *kvm,
1285				       struct kvm_device_attr *attr)
1286{
1287	struct kvm_s390_vm_cpu_feat data;
1288
1289	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1290		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1291	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1292		return -EFAULT;
1293	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1294			 data.feat[0],
1295			 data.feat[1],
1296			 data.feat[2]);
1297	return 0;
1298}
1299
1300static int kvm_s390_get_machine_feat(struct kvm *kvm,
1301				     struct kvm_device_attr *attr)
1302{
1303	struct kvm_s390_vm_cpu_feat data;
1304
1305	bitmap_copy((unsigned long *) data.feat,
1306		    kvm_s390_available_cpu_feat,
1307		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1308	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1309		return -EFAULT;
1310	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1311			 data.feat[0],
1312			 data.feat[1],
1313			 data.feat[2]);
1314	return 0;
1315}
1316
1317static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1318					  struct kvm_device_attr *attr)
1319{
1320	/*
1321	 * Once we can actually configure subfunctions (kernel + hw support),
1322	 * we have to check if they were already set by user space, if so copy
1323	 * them from kvm->arch.
1324	 */
1325	return -ENXIO;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1326}
1327
1328static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1329					struct kvm_device_attr *attr)
1330{
1331	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1332	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1333		return -EFAULT;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1334	return 0;
1335}
 
1336static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1337{
1338	int ret = -ENXIO;
1339
1340	switch (attr->attr) {
1341	case KVM_S390_VM_CPU_PROCESSOR:
1342		ret = kvm_s390_get_processor(kvm, attr);
1343		break;
1344	case KVM_S390_VM_CPU_MACHINE:
1345		ret = kvm_s390_get_machine(kvm, attr);
1346		break;
1347	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1348		ret = kvm_s390_get_processor_feat(kvm, attr);
1349		break;
1350	case KVM_S390_VM_CPU_MACHINE_FEAT:
1351		ret = kvm_s390_get_machine_feat(kvm, attr);
1352		break;
1353	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1354		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1355		break;
1356	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1357		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1358		break;
1359	}
1360	return ret;
1361}
1362
1363static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1364{
1365	int ret;
1366
1367	switch (attr->group) {
1368	case KVM_S390_VM_MEM_CTRL:
1369		ret = kvm_s390_set_mem_control(kvm, attr);
1370		break;
1371	case KVM_S390_VM_TOD:
1372		ret = kvm_s390_set_tod(kvm, attr);
1373		break;
1374	case KVM_S390_VM_CPU_MODEL:
1375		ret = kvm_s390_set_cpu_model(kvm, attr);
1376		break;
1377	case KVM_S390_VM_CRYPTO:
1378		ret = kvm_s390_vm_set_crypto(kvm, attr);
1379		break;
1380	case KVM_S390_VM_MIGRATION:
1381		ret = kvm_s390_vm_set_migration(kvm, attr);
1382		break;
1383	default:
1384		ret = -ENXIO;
1385		break;
1386	}
1387
1388	return ret;
1389}
1390
1391static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1392{
1393	int ret;
1394
1395	switch (attr->group) {
1396	case KVM_S390_VM_MEM_CTRL:
1397		ret = kvm_s390_get_mem_control(kvm, attr);
1398		break;
1399	case KVM_S390_VM_TOD:
1400		ret = kvm_s390_get_tod(kvm, attr);
1401		break;
1402	case KVM_S390_VM_CPU_MODEL:
1403		ret = kvm_s390_get_cpu_model(kvm, attr);
1404		break;
1405	case KVM_S390_VM_MIGRATION:
1406		ret = kvm_s390_vm_get_migration(kvm, attr);
1407		break;
1408	default:
1409		ret = -ENXIO;
1410		break;
1411	}
1412
1413	return ret;
1414}
1415
1416static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1417{
1418	int ret;
1419
1420	switch (attr->group) {
1421	case KVM_S390_VM_MEM_CTRL:
1422		switch (attr->attr) {
1423		case KVM_S390_VM_MEM_ENABLE_CMMA:
1424		case KVM_S390_VM_MEM_CLR_CMMA:
1425			ret = sclp.has_cmma ? 0 : -ENXIO;
1426			break;
1427		case KVM_S390_VM_MEM_LIMIT_SIZE:
1428			ret = 0;
1429			break;
1430		default:
1431			ret = -ENXIO;
1432			break;
1433		}
1434		break;
1435	case KVM_S390_VM_TOD:
1436		switch (attr->attr) {
1437		case KVM_S390_VM_TOD_LOW:
1438		case KVM_S390_VM_TOD_HIGH:
1439			ret = 0;
1440			break;
1441		default:
1442			ret = -ENXIO;
1443			break;
1444		}
1445		break;
1446	case KVM_S390_VM_CPU_MODEL:
1447		switch (attr->attr) {
1448		case KVM_S390_VM_CPU_PROCESSOR:
1449		case KVM_S390_VM_CPU_MACHINE:
1450		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1451		case KVM_S390_VM_CPU_MACHINE_FEAT:
1452		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 
1453			ret = 0;
1454			break;
1455		/* configuring subfunctions is not supported yet */
1456		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1457		default:
1458			ret = -ENXIO;
1459			break;
1460		}
1461		break;
1462	case KVM_S390_VM_CRYPTO:
1463		switch (attr->attr) {
1464		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1465		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1466		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1467		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1468			ret = 0;
1469			break;
 
 
 
 
1470		default:
1471			ret = -ENXIO;
1472			break;
1473		}
1474		break;
1475	case KVM_S390_VM_MIGRATION:
1476		ret = 0;
1477		break;
1478	default:
1479		ret = -ENXIO;
1480		break;
1481	}
1482
1483	return ret;
1484}
1485
1486static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1487{
1488	uint8_t *keys;
1489	uint64_t hva;
1490	int srcu_idx, i, r = 0;
1491
1492	if (args->flags != 0)
1493		return -EINVAL;
1494
1495	/* Is this guest using storage keys? */
1496	if (!mm_use_skey(current->mm))
1497		return KVM_S390_GET_SKEYS_NONE;
1498
1499	/* Enforce sane limit on memory allocation */
1500	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1501		return -EINVAL;
1502
1503	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1504	if (!keys)
1505		return -ENOMEM;
1506
1507	down_read(&current->mm->mmap_sem);
1508	srcu_idx = srcu_read_lock(&kvm->srcu);
1509	for (i = 0; i < args->count; i++) {
1510		hva = gfn_to_hva(kvm, args->start_gfn + i);
1511		if (kvm_is_error_hva(hva)) {
1512			r = -EFAULT;
1513			break;
1514		}
1515
1516		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1517		if (r)
1518			break;
1519	}
1520	srcu_read_unlock(&kvm->srcu, srcu_idx);
1521	up_read(&current->mm->mmap_sem);
1522
1523	if (!r) {
1524		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1525				 sizeof(uint8_t) * args->count);
1526		if (r)
1527			r = -EFAULT;
1528	}
1529
1530	kvfree(keys);
1531	return r;
1532}
1533
1534static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1535{
1536	uint8_t *keys;
1537	uint64_t hva;
1538	int srcu_idx, i, r = 0;
 
1539
1540	if (args->flags != 0)
1541		return -EINVAL;
1542
1543	/* Enforce sane limit on memory allocation */
1544	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1545		return -EINVAL;
1546
1547	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1548	if (!keys)
1549		return -ENOMEM;
1550
1551	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1552			   sizeof(uint8_t) * args->count);
1553	if (r) {
1554		r = -EFAULT;
1555		goto out;
1556	}
1557
1558	/* Enable storage key handling for the guest */
1559	r = s390_enable_skey();
1560	if (r)
1561		goto out;
1562
 
1563	down_read(&current->mm->mmap_sem);
1564	srcu_idx = srcu_read_lock(&kvm->srcu);
1565	for (i = 0; i < args->count; i++) {
 
1566		hva = gfn_to_hva(kvm, args->start_gfn + i);
1567		if (kvm_is_error_hva(hva)) {
1568			r = -EFAULT;
1569			break;
1570		}
1571
1572		/* Lowest order bit is reserved */
1573		if (keys[i] & 0x01) {
1574			r = -EINVAL;
1575			break;
1576		}
1577
1578		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1579		if (r)
1580			break;
 
 
 
 
 
 
1581	}
1582	srcu_read_unlock(&kvm->srcu, srcu_idx);
1583	up_read(&current->mm->mmap_sem);
1584out:
1585	kvfree(keys);
1586	return r;
1587}
1588
1589/*
1590 * Base address and length must be sent at the start of each block, therefore
1591 * it's cheaper to send some clean data, as long as it's less than the size of
1592 * two longs.
1593 */
1594#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1595/* for consistency */
1596#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1597
1598/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1599 * This function searches for the next page with dirty CMMA attributes, and
1600 * saves the attributes in the buffer up to either the end of the buffer or
1601 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1602 * no trailing clean bytes are saved.
1603 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1604 * output buffer will indicate 0 as length.
1605 */
1606static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1607				  struct kvm_s390_cmma_log *args)
1608{
1609	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1610	unsigned long bufsize, hva, pgstev, i, next, cur;
1611	int srcu_idx, peek, r = 0, rr;
1612	u8 *res;
1613
1614	cur = args->start_gfn;
1615	i = next = pgstev = 0;
1616
1617	if (unlikely(!kvm->arch.use_cmma))
1618		return -ENXIO;
1619	/* Invalid/unsupported flags were specified */
1620	if (args->flags & ~KVM_S390_CMMA_PEEK)
1621		return -EINVAL;
1622	/* Migration mode query, and we are not doing a migration */
1623	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1624	if (!peek && !s)
1625		return -EINVAL;
1626	/* CMMA is disabled or was not used, or the buffer has length zero */
1627	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1628	if (!bufsize || !kvm->mm->context.uses_cmm) {
1629		memset(args, 0, sizeof(*args));
1630		return 0;
1631	}
1632
1633	if (!peek) {
1634		/* We are not peeking, and there are no dirty pages */
1635		if (!atomic64_read(&s->dirty_pages)) {
1636			memset(args, 0, sizeof(*args));
1637			return 0;
1638		}
1639		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1640				    args->start_gfn);
1641		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1642			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1643		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1644			memset(args, 0, sizeof(*args));
1645			return 0;
1646		}
1647		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1648	}
1649
1650	res = vmalloc(bufsize);
1651	if (!res)
1652		return -ENOMEM;
1653
1654	args->start_gfn = cur;
1655
1656	down_read(&kvm->mm->mmap_sem);
1657	srcu_idx = srcu_read_lock(&kvm->srcu);
1658	while (i < bufsize) {
1659		hva = gfn_to_hva(kvm, cur);
1660		if (kvm_is_error_hva(hva)) {
1661			r = -EFAULT;
1662			break;
1663		}
1664		/* decrement only if we actually flipped the bit to 0 */
1665		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1666			atomic64_dec(&s->dirty_pages);
1667		r = get_pgste(kvm->mm, hva, &pgstev);
1668		if (r < 0)
1669			pgstev = 0;
1670		/* save the value */
1671		res[i++] = (pgstev >> 24) & 0x43;
1672		/*
1673		 * if the next bit is too far away, stop.
1674		 * if we reached the previous "next", find the next one
1675		 */
1676		if (!peek) {
1677			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1678				break;
1679			if (cur == next)
1680				next = find_next_bit(s->pgste_bitmap,
1681						     s->bitmap_size, cur + 1);
1682		/* reached the end of the bitmap or of the buffer, stop */
1683			if ((next >= s->bitmap_size) ||
1684			    (next >= args->start_gfn + bufsize))
1685				break;
1686		}
1687		cur++;
1688	}
1689	srcu_read_unlock(&kvm->srcu, srcu_idx);
1690	up_read(&kvm->mm->mmap_sem);
1691	args->count = i;
1692	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1693
1694	rr = copy_to_user((void __user *)args->values, res, args->count);
1695	if (rr)
1696		r = -EFAULT;
 
1697
1698	vfree(res);
1699	return r;
 
 
 
1700}
1701
1702/*
1703 * This function sets the CMMA attributes for the given pages. If the input
1704 * buffer has zero length, no action is taken, otherwise the attributes are
1705 * set and the mm->context.uses_cmm flag is set.
1706 */
1707static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1708				  const struct kvm_s390_cmma_log *args)
1709{
1710	unsigned long hva, mask, pgstev, i;
1711	uint8_t *bits;
1712	int srcu_idx, r = 0;
1713
1714	mask = args->mask;
1715
1716	if (!kvm->arch.use_cmma)
1717		return -ENXIO;
1718	/* invalid/unsupported flags */
1719	if (args->flags != 0)
1720		return -EINVAL;
1721	/* Enforce sane limit on memory allocation */
1722	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1723		return -EINVAL;
1724	/* Nothing to do */
1725	if (args->count == 0)
1726		return 0;
1727
1728	bits = vmalloc(sizeof(*bits) * args->count);
1729	if (!bits)
1730		return -ENOMEM;
1731
1732	r = copy_from_user(bits, (void __user *)args->values, args->count);
1733	if (r) {
1734		r = -EFAULT;
1735		goto out;
1736	}
1737
1738	down_read(&kvm->mm->mmap_sem);
1739	srcu_idx = srcu_read_lock(&kvm->srcu);
1740	for (i = 0; i < args->count; i++) {
1741		hva = gfn_to_hva(kvm, args->start_gfn + i);
1742		if (kvm_is_error_hva(hva)) {
1743			r = -EFAULT;
1744			break;
1745		}
1746
1747		pgstev = bits[i];
1748		pgstev = pgstev << 24;
1749		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1750		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1751	}
1752	srcu_read_unlock(&kvm->srcu, srcu_idx);
1753	up_read(&kvm->mm->mmap_sem);
1754
1755	if (!kvm->mm->context.uses_cmm) {
1756		down_write(&kvm->mm->mmap_sem);
1757		kvm->mm->context.uses_cmm = 1;
1758		up_write(&kvm->mm->mmap_sem);
1759	}
1760out:
1761	vfree(bits);
1762	return r;
1763}
1764
1765long kvm_arch_vm_ioctl(struct file *filp,
1766		       unsigned int ioctl, unsigned long arg)
1767{
1768	struct kvm *kvm = filp->private_data;
1769	void __user *argp = (void __user *)arg;
1770	struct kvm_device_attr attr;
1771	int r;
1772
1773	switch (ioctl) {
1774	case KVM_S390_INTERRUPT: {
1775		struct kvm_s390_interrupt s390int;
1776
1777		r = -EFAULT;
1778		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1779			break;
1780		r = kvm_s390_inject_vm(kvm, &s390int);
1781		break;
1782	}
1783	case KVM_ENABLE_CAP: {
1784		struct kvm_enable_cap cap;
1785		r = -EFAULT;
1786		if (copy_from_user(&cap, argp, sizeof(cap)))
1787			break;
1788		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1789		break;
1790	}
1791	case KVM_CREATE_IRQCHIP: {
1792		struct kvm_irq_routing_entry routing;
1793
1794		r = -EINVAL;
1795		if (kvm->arch.use_irqchip) {
1796			/* Set up dummy routing. */
1797			memset(&routing, 0, sizeof(routing));
1798			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1799		}
1800		break;
1801	}
1802	case KVM_SET_DEVICE_ATTR: {
1803		r = -EFAULT;
1804		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1805			break;
1806		r = kvm_s390_vm_set_attr(kvm, &attr);
1807		break;
1808	}
1809	case KVM_GET_DEVICE_ATTR: {
1810		r = -EFAULT;
1811		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1812			break;
1813		r = kvm_s390_vm_get_attr(kvm, &attr);
1814		break;
1815	}
1816	case KVM_HAS_DEVICE_ATTR: {
1817		r = -EFAULT;
1818		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1819			break;
1820		r = kvm_s390_vm_has_attr(kvm, &attr);
1821		break;
1822	}
1823	case KVM_S390_GET_SKEYS: {
1824		struct kvm_s390_skeys args;
1825
1826		r = -EFAULT;
1827		if (copy_from_user(&args, argp,
1828				   sizeof(struct kvm_s390_skeys)))
1829			break;
1830		r = kvm_s390_get_skeys(kvm, &args);
1831		break;
1832	}
1833	case KVM_S390_SET_SKEYS: {
1834		struct kvm_s390_skeys args;
1835
1836		r = -EFAULT;
1837		if (copy_from_user(&args, argp,
1838				   sizeof(struct kvm_s390_skeys)))
1839			break;
1840		r = kvm_s390_set_skeys(kvm, &args);
1841		break;
1842	}
1843	case KVM_S390_GET_CMMA_BITS: {
1844		struct kvm_s390_cmma_log args;
1845
1846		r = -EFAULT;
1847		if (copy_from_user(&args, argp, sizeof(args)))
1848			break;
1849		mutex_lock(&kvm->slots_lock);
1850		r = kvm_s390_get_cmma_bits(kvm, &args);
1851		mutex_unlock(&kvm->slots_lock);
1852		if (!r) {
1853			r = copy_to_user(argp, &args, sizeof(args));
1854			if (r)
1855				r = -EFAULT;
1856		}
1857		break;
1858	}
1859	case KVM_S390_SET_CMMA_BITS: {
1860		struct kvm_s390_cmma_log args;
1861
1862		r = -EFAULT;
1863		if (copy_from_user(&args, argp, sizeof(args)))
1864			break;
1865		mutex_lock(&kvm->slots_lock);
1866		r = kvm_s390_set_cmma_bits(kvm, &args);
1867		mutex_unlock(&kvm->slots_lock);
1868		break;
1869	}
1870	default:
1871		r = -ENOTTY;
1872	}
1873
1874	return r;
1875}
1876
1877static int kvm_s390_query_ap_config(u8 *config)
1878{
1879	u32 fcn_code = 0x04000000UL;
1880	u32 cc = 0;
1881
1882	memset(config, 0, 128);
1883	asm volatile(
1884		"lgr 0,%1\n"
1885		"lgr 2,%2\n"
1886		".long 0xb2af0000\n"		/* PQAP(QCI) */
1887		"0: ipm %0\n"
1888		"srl %0,28\n"
1889		"1:\n"
1890		EX_TABLE(0b, 1b)
1891		: "+r" (cc)
1892		: "r" (fcn_code), "r" (config)
1893		: "cc", "0", "2", "memory"
1894	);
1895
1896	return cc;
1897}
1898
1899static int kvm_s390_apxa_installed(void)
1900{
1901	u8 config[128];
1902	int cc;
1903
1904	if (test_facility(12)) {
1905		cc = kvm_s390_query_ap_config(config);
1906
1907		if (cc)
1908			pr_err("PQAP(QCI) failed with cc=%d", cc);
1909		else
1910			return config[0] & 0x40;
1911	}
1912
1913	return 0;
1914}
1915
 
 
 
 
 
 
 
 
1916static void kvm_s390_set_crycb_format(struct kvm *kvm)
1917{
1918	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1919
 
 
 
 
 
 
 
1920	if (kvm_s390_apxa_installed())
1921		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1922	else
1923		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1924}
1925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1926static u64 kvm_s390_get_initial_cpuid(void)
1927{
1928	struct cpuid cpuid;
1929
1930	get_cpu_id(&cpuid);
1931	cpuid.version = 0xff;
1932	return *((u64 *) &cpuid);
1933}
1934
1935static void kvm_s390_crypto_init(struct kvm *kvm)
1936{
1937	if (!test_kvm_facility(kvm, 76))
1938		return;
1939
1940	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1941	kvm_s390_set_crycb_format(kvm);
1942
 
 
 
1943	/* Enable AES/DEA protected key functions by default */
1944	kvm->arch.crypto.aes_kw = 1;
1945	kvm->arch.crypto.dea_kw = 1;
1946	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1947			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1948	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1949			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1950}
1951
1952static void sca_dispose(struct kvm *kvm)
1953{
1954	if (kvm->arch.use_esca)
1955		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1956	else
1957		free_page((unsigned long)(kvm->arch.sca));
1958	kvm->arch.sca = NULL;
1959}
1960
1961int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1962{
1963	gfp_t alloc_flags = GFP_KERNEL;
1964	int i, rc;
1965	char debug_name[16];
1966	static unsigned long sca_offset;
1967
1968	rc = -EINVAL;
1969#ifdef CONFIG_KVM_S390_UCONTROL
1970	if (type & ~KVM_VM_S390_UCONTROL)
1971		goto out_err;
1972	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1973		goto out_err;
1974#else
1975	if (type)
1976		goto out_err;
1977#endif
1978
1979	rc = s390_enable_sie();
1980	if (rc)
1981		goto out_err;
1982
1983	rc = -ENOMEM;
1984
1985	kvm->arch.use_esca = 0; /* start with basic SCA */
1986	if (!sclp.has_64bscao)
1987		alloc_flags |= GFP_DMA;
1988	rwlock_init(&kvm->arch.sca_lock);
 
1989	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1990	if (!kvm->arch.sca)
1991		goto out_err;
1992	spin_lock(&kvm_lock);
1993	sca_offset += 16;
1994	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1995		sca_offset = 0;
1996	kvm->arch.sca = (struct bsca_block *)
1997			((char *) kvm->arch.sca + sca_offset);
1998	spin_unlock(&kvm_lock);
1999
2000	sprintf(debug_name, "kvm-%u", current->pid);
2001
2002	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2003	if (!kvm->arch.dbf)
2004		goto out_err;
2005
2006	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2007	kvm->arch.sie_page2 =
2008	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2009	if (!kvm->arch.sie_page2)
2010		goto out_err;
2011
 
2012	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2013
2014	for (i = 0; i < kvm_s390_fac_size(); i++) {
2015		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2016					      (kvm_s390_fac_base[i] |
2017					       kvm_s390_fac_ext[i]);
2018		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2019					      kvm_s390_fac_base[i];
2020	}
 
2021
2022	/* we are always in czam mode - even on pre z14 machines */
2023	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2024	set_kvm_facility(kvm->arch.model.fac_list, 138);
2025	/* we emulate STHYI in kvm */
2026	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2027	set_kvm_facility(kvm->arch.model.fac_list, 74);
2028	if (MACHINE_HAS_TLB_GUEST) {
2029		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2030		set_kvm_facility(kvm->arch.model.fac_list, 147);
2031	}
2032
 
 
 
2033	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2034	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2035
2036	kvm_s390_crypto_init(kvm);
2037
2038	mutex_init(&kvm->arch.float_int.ais_lock);
2039	kvm->arch.float_int.simm = 0;
2040	kvm->arch.float_int.nimm = 0;
2041	spin_lock_init(&kvm->arch.float_int.lock);
2042	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2043		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2044	init_waitqueue_head(&kvm->arch.ipte_wq);
2045	mutex_init(&kvm->arch.ipte_mutex);
2046
2047	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2048	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2049
2050	if (type & KVM_VM_S390_UCONTROL) {
2051		kvm->arch.gmap = NULL;
2052		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2053	} else {
2054		if (sclp.hamax == U64_MAX)
2055			kvm->arch.mem_limit = TASK_SIZE_MAX;
2056		else
2057			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2058						    sclp.hamax + 1);
2059		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2060		if (!kvm->arch.gmap)
2061			goto out_err;
2062		kvm->arch.gmap->private = kvm;
2063		kvm->arch.gmap->pfault_enabled = 0;
2064	}
2065
2066	kvm->arch.css_support = 0;
2067	kvm->arch.use_irqchip = 0;
2068	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2069	kvm->arch.epoch = 0;
2070
2071	spin_lock_init(&kvm->arch.start_stop_lock);
2072	kvm_s390_vsie_init(kvm);
2073	kvm_s390_gisa_init(kvm);
2074	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2075
2076	return 0;
2077out_err:
2078	free_page((unsigned long)kvm->arch.sie_page2);
2079	debug_unregister(kvm->arch.dbf);
2080	sca_dispose(kvm);
2081	KVM_EVENT(3, "creation of vm failed: %d", rc);
2082	return rc;
2083}
2084
2085bool kvm_arch_has_vcpu_debugfs(void)
2086{
2087	return false;
2088}
2089
2090int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2091{
2092	return 0;
2093}
2094
2095void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2096{
2097	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2098	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2099	kvm_s390_clear_local_irqs(vcpu);
2100	kvm_clear_async_pf_completion_queue(vcpu);
2101	if (!kvm_is_ucontrol(vcpu->kvm))
2102		sca_del_vcpu(vcpu);
2103
2104	if (kvm_is_ucontrol(vcpu->kvm))
2105		gmap_remove(vcpu->arch.gmap);
2106
2107	if (vcpu->kvm->arch.use_cmma)
2108		kvm_s390_vcpu_unsetup_cmma(vcpu);
2109	free_page((unsigned long)(vcpu->arch.sie_block));
2110
2111	kvm_vcpu_uninit(vcpu);
2112	kmem_cache_free(kvm_vcpu_cache, vcpu);
2113}
2114
2115static void kvm_free_vcpus(struct kvm *kvm)
2116{
2117	unsigned int i;
2118	struct kvm_vcpu *vcpu;
2119
2120	kvm_for_each_vcpu(i, vcpu, kvm)
2121		kvm_arch_vcpu_destroy(vcpu);
2122
2123	mutex_lock(&kvm->lock);
2124	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2125		kvm->vcpus[i] = NULL;
2126
2127	atomic_set(&kvm->online_vcpus, 0);
2128	mutex_unlock(&kvm->lock);
2129}
2130
2131void kvm_arch_destroy_vm(struct kvm *kvm)
2132{
2133	kvm_free_vcpus(kvm);
2134	sca_dispose(kvm);
2135	debug_unregister(kvm->arch.dbf);
2136	kvm_s390_gisa_destroy(kvm);
2137	free_page((unsigned long)kvm->arch.sie_page2);
2138	if (!kvm_is_ucontrol(kvm))
2139		gmap_remove(kvm->arch.gmap);
2140	kvm_s390_destroy_adapters(kvm);
2141	kvm_s390_clear_float_irqs(kvm);
2142	kvm_s390_vsie_destroy(kvm);
2143	if (kvm->arch.migration_state) {
2144		vfree(kvm->arch.migration_state->pgste_bitmap);
2145		kfree(kvm->arch.migration_state);
2146	}
2147	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2148}
2149
2150/* Section: vcpu related */
2151static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2152{
2153	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2154	if (!vcpu->arch.gmap)
2155		return -ENOMEM;
2156	vcpu->arch.gmap->private = vcpu->kvm;
2157
2158	return 0;
2159}
2160
2161static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2162{
2163	if (!kvm_s390_use_sca_entries())
2164		return;
2165	read_lock(&vcpu->kvm->arch.sca_lock);
2166	if (vcpu->kvm->arch.use_esca) {
2167		struct esca_block *sca = vcpu->kvm->arch.sca;
2168
2169		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2170		sca->cpu[vcpu->vcpu_id].sda = 0;
2171	} else {
2172		struct bsca_block *sca = vcpu->kvm->arch.sca;
2173
2174		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2175		sca->cpu[vcpu->vcpu_id].sda = 0;
2176	}
2177	read_unlock(&vcpu->kvm->arch.sca_lock);
2178}
2179
2180static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2181{
2182	if (!kvm_s390_use_sca_entries()) {
2183		struct bsca_block *sca = vcpu->kvm->arch.sca;
2184
2185		/* we still need the basic sca for the ipte control */
2186		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2187		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2188		return;
2189	}
2190	read_lock(&vcpu->kvm->arch.sca_lock);
2191	if (vcpu->kvm->arch.use_esca) {
2192		struct esca_block *sca = vcpu->kvm->arch.sca;
2193
2194		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2195		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2196		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2197		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2198		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2199	} else {
2200		struct bsca_block *sca = vcpu->kvm->arch.sca;
2201
2202		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2203		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2204		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2205		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2206	}
2207	read_unlock(&vcpu->kvm->arch.sca_lock);
2208}
2209
2210/* Basic SCA to Extended SCA data copy routines */
2211static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2212{
2213	d->sda = s->sda;
2214	d->sigp_ctrl.c = s->sigp_ctrl.c;
2215	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2216}
2217
2218static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2219{
2220	int i;
2221
2222	d->ipte_control = s->ipte_control;
2223	d->mcn[0] = s->mcn;
2224	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2225		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2226}
2227
2228static int sca_switch_to_extended(struct kvm *kvm)
2229{
2230	struct bsca_block *old_sca = kvm->arch.sca;
2231	struct esca_block *new_sca;
2232	struct kvm_vcpu *vcpu;
2233	unsigned int vcpu_idx;
2234	u32 scaol, scaoh;
2235
2236	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2237	if (!new_sca)
2238		return -ENOMEM;
2239
2240	scaoh = (u32)((u64)(new_sca) >> 32);
2241	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2242
2243	kvm_s390_vcpu_block_all(kvm);
2244	write_lock(&kvm->arch.sca_lock);
2245
2246	sca_copy_b_to_e(new_sca, old_sca);
2247
2248	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2249		vcpu->arch.sie_block->scaoh = scaoh;
2250		vcpu->arch.sie_block->scaol = scaol;
2251		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252	}
2253	kvm->arch.sca = new_sca;
2254	kvm->arch.use_esca = 1;
2255
2256	write_unlock(&kvm->arch.sca_lock);
2257	kvm_s390_vcpu_unblock_all(kvm);
2258
2259	free_page((unsigned long)old_sca);
2260
2261	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2262		 old_sca, kvm->arch.sca);
2263	return 0;
2264}
2265
2266static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2267{
2268	int rc;
2269
2270	if (!kvm_s390_use_sca_entries()) {
2271		if (id < KVM_MAX_VCPUS)
2272			return true;
2273		return false;
2274	}
2275	if (id < KVM_S390_BSCA_CPU_SLOTS)
2276		return true;
2277	if (!sclp.has_esca || !sclp.has_64bscao)
2278		return false;
2279
2280	mutex_lock(&kvm->lock);
2281	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2282	mutex_unlock(&kvm->lock);
2283
2284	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2285}
2286
2287int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2288{
2289	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2290	kvm_clear_async_pf_completion_queue(vcpu);
2291	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2292				    KVM_SYNC_GPRS |
2293				    KVM_SYNC_ACRS |
2294				    KVM_SYNC_CRS |
2295				    KVM_SYNC_ARCH0 |
2296				    KVM_SYNC_PFAULT;
2297	kvm_s390_set_prefix(vcpu, 0);
2298	if (test_kvm_facility(vcpu->kvm, 64))
2299		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2300	if (test_kvm_facility(vcpu->kvm, 82))
2301		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2302	if (test_kvm_facility(vcpu->kvm, 133))
2303		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
 
 
2304	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2305	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2306	 */
2307	if (MACHINE_HAS_VX)
2308		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2309	else
2310		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2311
2312	if (kvm_is_ucontrol(vcpu->kvm))
2313		return __kvm_ucontrol_vcpu_init(vcpu);
2314
2315	return 0;
2316}
2317
2318/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2319static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2320{
2321	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2322	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2323	vcpu->arch.cputm_start = get_tod_clock_fast();
2324	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2325}
2326
2327/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2328static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2329{
2330	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2331	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2332	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2333	vcpu->arch.cputm_start = 0;
2334	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2335}
2336
2337/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2338static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2339{
2340	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2341	vcpu->arch.cputm_enabled = true;
2342	__start_cpu_timer_accounting(vcpu);
2343}
2344
2345/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2346static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2347{
2348	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2349	__stop_cpu_timer_accounting(vcpu);
2350	vcpu->arch.cputm_enabled = false;
2351}
2352
2353static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2354{
2355	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2356	__enable_cpu_timer_accounting(vcpu);
2357	preempt_enable();
2358}
2359
2360static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2361{
2362	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2363	__disable_cpu_timer_accounting(vcpu);
2364	preempt_enable();
2365}
2366
2367/* set the cpu timer - may only be called from the VCPU thread itself */
2368void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2369{
2370	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2371	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2372	if (vcpu->arch.cputm_enabled)
2373		vcpu->arch.cputm_start = get_tod_clock_fast();
2374	vcpu->arch.sie_block->cputm = cputm;
2375	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2376	preempt_enable();
2377}
2378
2379/* update and get the cpu timer - can also be called from other VCPU threads */
2380__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2381{
2382	unsigned int seq;
2383	__u64 value;
2384
2385	if (unlikely(!vcpu->arch.cputm_enabled))
2386		return vcpu->arch.sie_block->cputm;
2387
2388	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2389	do {
2390		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2391		/*
2392		 * If the writer would ever execute a read in the critical
2393		 * section, e.g. in irq context, we have a deadlock.
2394		 */
2395		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2396		value = vcpu->arch.sie_block->cputm;
2397		/* if cputm_start is 0, accounting is being started/stopped */
2398		if (likely(vcpu->arch.cputm_start))
2399			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2400	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2401	preempt_enable();
2402	return value;
2403}
2404
2405void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2406{
2407
2408	gmap_enable(vcpu->arch.enabled_gmap);
2409	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2410	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2411		__start_cpu_timer_accounting(vcpu);
2412	vcpu->cpu = cpu;
2413}
2414
2415void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2416{
2417	vcpu->cpu = -1;
2418	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2419		__stop_cpu_timer_accounting(vcpu);
2420	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2421	vcpu->arch.enabled_gmap = gmap_get_enabled();
2422	gmap_disable(vcpu->arch.enabled_gmap);
2423
2424}
2425
2426static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2427{
2428	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2429	vcpu->arch.sie_block->gpsw.mask = 0UL;
2430	vcpu->arch.sie_block->gpsw.addr = 0UL;
2431	kvm_s390_set_prefix(vcpu, 0);
2432	kvm_s390_set_cpu_timer(vcpu, 0);
2433	vcpu->arch.sie_block->ckc       = 0UL;
2434	vcpu->arch.sie_block->todpr     = 0;
2435	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2436	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2437	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
 
 
 
 
2438	/* make sure the new fpc will be lazily loaded */
2439	save_fpu_regs();
2440	current->thread.fpu.fpc = 0;
2441	vcpu->arch.sie_block->gbea = 1;
2442	vcpu->arch.sie_block->pp = 0;
2443	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2444	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2445	kvm_clear_async_pf_completion_queue(vcpu);
2446	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2447		kvm_s390_vcpu_stop(vcpu);
2448	kvm_s390_clear_local_irqs(vcpu);
2449}
2450
2451void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2452{
2453	mutex_lock(&vcpu->kvm->lock);
2454	preempt_disable();
2455	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2456	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2457	preempt_enable();
2458	mutex_unlock(&vcpu->kvm->lock);
2459	if (!kvm_is_ucontrol(vcpu->kvm)) {
2460		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2461		sca_add_vcpu(vcpu);
2462	}
2463	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2464		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2465	/* make vcpu_load load the right gmap on the first trigger */
2466	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2467}
2468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2469static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2470{
2471	if (!test_kvm_facility(vcpu->kvm, 76))
 
 
 
 
2472		return;
2473
 
2474	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
 
 
2475
2476	if (vcpu->kvm->arch.crypto.aes_kw)
 
 
 
 
2477		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
 
 
 
 
 
2478	if (vcpu->kvm->arch.crypto.dea_kw)
2479		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2480
2481	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2482}
2483
2484void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2485{
2486	free_page(vcpu->arch.sie_block->cbrlo);
2487	vcpu->arch.sie_block->cbrlo = 0;
2488}
2489
2490int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2491{
2492	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2493	if (!vcpu->arch.sie_block->cbrlo)
2494		return -ENOMEM;
2495	return 0;
2496}
2497
2498static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2499{
2500	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2501
2502	vcpu->arch.sie_block->ibc = model->ibc;
2503	if (test_kvm_facility(vcpu->kvm, 7))
2504		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2505}
2506
2507int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2508{
2509	int rc = 0;
2510
2511	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2512						    CPUSTAT_SM |
2513						    CPUSTAT_STOPPED);
2514
2515	if (test_kvm_facility(vcpu->kvm, 78))
2516		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2517	else if (test_kvm_facility(vcpu->kvm, 8))
2518		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2519
2520	kvm_s390_vcpu_setup_model(vcpu);
2521
2522	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2523	if (MACHINE_HAS_ESOP)
2524		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2525	if (test_kvm_facility(vcpu->kvm, 9))
2526		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2527	if (test_kvm_facility(vcpu->kvm, 73))
2528		vcpu->arch.sie_block->ecb |= ECB_TE;
2529
2530	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2531		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2532	if (test_kvm_facility(vcpu->kvm, 130))
2533		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2534	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2535	if (sclp.has_cei)
2536		vcpu->arch.sie_block->eca |= ECA_CEI;
2537	if (sclp.has_ib)
2538		vcpu->arch.sie_block->eca |= ECA_IB;
2539	if (sclp.has_siif)
2540		vcpu->arch.sie_block->eca |= ECA_SII;
2541	if (sclp.has_sigpif)
2542		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2543	if (test_kvm_facility(vcpu->kvm, 129)) {
2544		vcpu->arch.sie_block->eca |= ECA_VX;
2545		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2546	}
2547	if (test_kvm_facility(vcpu->kvm, 139))
2548		vcpu->arch.sie_block->ecd |= ECD_MEF;
2549
 
2550	if (vcpu->arch.sie_block->gd) {
2551		vcpu->arch.sie_block->eca |= ECA_AIV;
2552		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2553			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2554	}
2555	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2556					| SDNXC;
2557	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2558
2559	if (sclp.has_kss)
2560		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2561	else
2562		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2563
2564	if (vcpu->kvm->arch.use_cmma) {
2565		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2566		if (rc)
2567			return rc;
2568	}
2569	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2570	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2571
 
 
2572	kvm_s390_vcpu_crypto_setup(vcpu);
2573
2574	return rc;
2575}
2576
2577struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2578				      unsigned int id)
2579{
2580	struct kvm_vcpu *vcpu;
2581	struct sie_page *sie_page;
2582	int rc = -EINVAL;
2583
2584	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2585		goto out;
2586
2587	rc = -ENOMEM;
2588
2589	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2590	if (!vcpu)
2591		goto out;
2592
2593	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2594	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2595	if (!sie_page)
2596		goto out_free_cpu;
2597
2598	vcpu->arch.sie_block = &sie_page->sie_block;
2599	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2600
2601	/* the real guest size will always be smaller than msl */
2602	vcpu->arch.sie_block->mso = 0;
2603	vcpu->arch.sie_block->msl = sclp.hamax;
2604
2605	vcpu->arch.sie_block->icpua = id;
2606	spin_lock_init(&vcpu->arch.local_int.lock);
2607	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2608	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2609		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2610	seqcount_init(&vcpu->arch.cputm_seqcount);
2611
2612	rc = kvm_vcpu_init(vcpu, kvm, id);
2613	if (rc)
2614		goto out_free_sie_block;
2615	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2616		 vcpu->arch.sie_block);
2617	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2618
2619	return vcpu;
2620out_free_sie_block:
2621	free_page((unsigned long)(vcpu->arch.sie_block));
2622out_free_cpu:
2623	kmem_cache_free(kvm_vcpu_cache, vcpu);
2624out:
2625	return ERR_PTR(rc);
2626}
2627
2628int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2629{
2630	return kvm_s390_vcpu_has_irq(vcpu, 0);
2631}
2632
2633bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2634{
2635	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2636}
2637
2638void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2639{
2640	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2641	exit_sie(vcpu);
2642}
2643
2644void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2645{
2646	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2647}
2648
2649static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2650{
2651	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2652	exit_sie(vcpu);
2653}
2654
 
 
 
 
 
 
2655static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2656{
2657	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2658}
2659
2660/*
2661 * Kick a guest cpu out of SIE and wait until SIE is not running.
2662 * If the CPU is not running (e.g. waiting as idle) the function will
2663 * return immediately. */
2664void exit_sie(struct kvm_vcpu *vcpu)
2665{
2666	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
 
2667	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2668		cpu_relax();
2669}
2670
2671/* Kick a guest cpu out of SIE to process a request synchronously */
2672void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2673{
2674	kvm_make_request(req, vcpu);
2675	kvm_s390_vcpu_request(vcpu);
2676}
2677
2678static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2679			      unsigned long end)
2680{
2681	struct kvm *kvm = gmap->private;
2682	struct kvm_vcpu *vcpu;
2683	unsigned long prefix;
2684	int i;
2685
2686	if (gmap_is_shadow(gmap))
2687		return;
2688	if (start >= 1UL << 31)
2689		/* We are only interested in prefix pages */
2690		return;
2691	kvm_for_each_vcpu(i, vcpu, kvm) {
2692		/* match against both prefix pages */
2693		prefix = kvm_s390_get_prefix(vcpu);
2694		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2695			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2696				   start, end);
2697			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2698		}
2699	}
2700}
2701
 
 
 
 
 
 
 
 
 
 
 
2702int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2703{
2704	/* kvm common code refers to this, but never calls it */
2705	BUG();
2706	return 0;
2707}
2708
2709static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2710					   struct kvm_one_reg *reg)
2711{
2712	int r = -EINVAL;
2713
2714	switch (reg->id) {
2715	case KVM_REG_S390_TODPR:
2716		r = put_user(vcpu->arch.sie_block->todpr,
2717			     (u32 __user *)reg->addr);
2718		break;
2719	case KVM_REG_S390_EPOCHDIFF:
2720		r = put_user(vcpu->arch.sie_block->epoch,
2721			     (u64 __user *)reg->addr);
2722		break;
2723	case KVM_REG_S390_CPU_TIMER:
2724		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2725			     (u64 __user *)reg->addr);
2726		break;
2727	case KVM_REG_S390_CLOCK_COMP:
2728		r = put_user(vcpu->arch.sie_block->ckc,
2729			     (u64 __user *)reg->addr);
2730		break;
2731	case KVM_REG_S390_PFTOKEN:
2732		r = put_user(vcpu->arch.pfault_token,
2733			     (u64 __user *)reg->addr);
2734		break;
2735	case KVM_REG_S390_PFCOMPARE:
2736		r = put_user(vcpu->arch.pfault_compare,
2737			     (u64 __user *)reg->addr);
2738		break;
2739	case KVM_REG_S390_PFSELECT:
2740		r = put_user(vcpu->arch.pfault_select,
2741			     (u64 __user *)reg->addr);
2742		break;
2743	case KVM_REG_S390_PP:
2744		r = put_user(vcpu->arch.sie_block->pp,
2745			     (u64 __user *)reg->addr);
2746		break;
2747	case KVM_REG_S390_GBEA:
2748		r = put_user(vcpu->arch.sie_block->gbea,
2749			     (u64 __user *)reg->addr);
2750		break;
2751	default:
2752		break;
2753	}
2754
2755	return r;
2756}
2757
2758static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2759					   struct kvm_one_reg *reg)
2760{
2761	int r = -EINVAL;
2762	__u64 val;
2763
2764	switch (reg->id) {
2765	case KVM_REG_S390_TODPR:
2766		r = get_user(vcpu->arch.sie_block->todpr,
2767			     (u32 __user *)reg->addr);
2768		break;
2769	case KVM_REG_S390_EPOCHDIFF:
2770		r = get_user(vcpu->arch.sie_block->epoch,
2771			     (u64 __user *)reg->addr);
2772		break;
2773	case KVM_REG_S390_CPU_TIMER:
2774		r = get_user(val, (u64 __user *)reg->addr);
2775		if (!r)
2776			kvm_s390_set_cpu_timer(vcpu, val);
2777		break;
2778	case KVM_REG_S390_CLOCK_COMP:
2779		r = get_user(vcpu->arch.sie_block->ckc,
2780			     (u64 __user *)reg->addr);
2781		break;
2782	case KVM_REG_S390_PFTOKEN:
2783		r = get_user(vcpu->arch.pfault_token,
2784			     (u64 __user *)reg->addr);
2785		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2786			kvm_clear_async_pf_completion_queue(vcpu);
2787		break;
2788	case KVM_REG_S390_PFCOMPARE:
2789		r = get_user(vcpu->arch.pfault_compare,
2790			     (u64 __user *)reg->addr);
2791		break;
2792	case KVM_REG_S390_PFSELECT:
2793		r = get_user(vcpu->arch.pfault_select,
2794			     (u64 __user *)reg->addr);
2795		break;
2796	case KVM_REG_S390_PP:
2797		r = get_user(vcpu->arch.sie_block->pp,
2798			     (u64 __user *)reg->addr);
2799		break;
2800	case KVM_REG_S390_GBEA:
2801		r = get_user(vcpu->arch.sie_block->gbea,
2802			     (u64 __user *)reg->addr);
2803		break;
2804	default:
2805		break;
2806	}
2807
2808	return r;
2809}
2810
2811static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2812{
2813	kvm_s390_vcpu_initial_reset(vcpu);
2814	return 0;
2815}
2816
2817int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2818{
2819	vcpu_load(vcpu);
2820	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2821	vcpu_put(vcpu);
2822	return 0;
2823}
2824
2825int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2826{
2827	vcpu_load(vcpu);
2828	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2829	vcpu_put(vcpu);
2830	return 0;
2831}
2832
2833int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2834				  struct kvm_sregs *sregs)
2835{
2836	vcpu_load(vcpu);
2837
2838	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2839	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2840
2841	vcpu_put(vcpu);
2842	return 0;
2843}
2844
2845int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2846				  struct kvm_sregs *sregs)
2847{
2848	vcpu_load(vcpu);
2849
2850	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2851	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2852
2853	vcpu_put(vcpu);
2854	return 0;
2855}
2856
2857int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2858{
2859	int ret = 0;
2860
2861	vcpu_load(vcpu);
2862
2863	if (test_fp_ctl(fpu->fpc)) {
2864		ret = -EINVAL;
2865		goto out;
2866	}
2867	vcpu->run->s.regs.fpc = fpu->fpc;
2868	if (MACHINE_HAS_VX)
2869		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2870				 (freg_t *) fpu->fprs);
2871	else
2872		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2873
2874out:
2875	vcpu_put(vcpu);
2876	return ret;
2877}
2878
2879int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2880{
2881	vcpu_load(vcpu);
2882
2883	/* make sure we have the latest values */
2884	save_fpu_regs();
2885	if (MACHINE_HAS_VX)
2886		convert_vx_to_fp((freg_t *) fpu->fprs,
2887				 (__vector128 *) vcpu->run->s.regs.vrs);
2888	else
2889		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2890	fpu->fpc = vcpu->run->s.regs.fpc;
2891
2892	vcpu_put(vcpu);
2893	return 0;
2894}
2895
2896static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2897{
2898	int rc = 0;
2899
2900	if (!is_vcpu_stopped(vcpu))
2901		rc = -EBUSY;
2902	else {
2903		vcpu->run->psw_mask = psw.mask;
2904		vcpu->run->psw_addr = psw.addr;
2905	}
2906	return rc;
2907}
2908
2909int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2910				  struct kvm_translation *tr)
2911{
2912	return -EINVAL; /* not implemented yet */
2913}
2914
2915#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2916			      KVM_GUESTDBG_USE_HW_BP | \
2917			      KVM_GUESTDBG_ENABLE)
2918
2919int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2920					struct kvm_guest_debug *dbg)
2921{
2922	int rc = 0;
2923
2924	vcpu_load(vcpu);
2925
2926	vcpu->guest_debug = 0;
2927	kvm_s390_clear_bp_data(vcpu);
2928
2929	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2930		rc = -EINVAL;
2931		goto out;
2932	}
2933	if (!sclp.has_gpere) {
2934		rc = -EINVAL;
2935		goto out;
2936	}
2937
2938	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2939		vcpu->guest_debug = dbg->control;
2940		/* enforce guest PER */
2941		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2942
2943		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2944			rc = kvm_s390_import_bp_data(vcpu, dbg);
2945	} else {
2946		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2947		vcpu->arch.guestdbg.last_bp = 0;
2948	}
2949
2950	if (rc) {
2951		vcpu->guest_debug = 0;
2952		kvm_s390_clear_bp_data(vcpu);
2953		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2954	}
2955
2956out:
2957	vcpu_put(vcpu);
2958	return rc;
2959}
2960
2961int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2962				    struct kvm_mp_state *mp_state)
2963{
2964	int ret;
2965
2966	vcpu_load(vcpu);
2967
2968	/* CHECK_STOP and LOAD are not supported yet */
2969	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2970				      KVM_MP_STATE_OPERATING;
2971
2972	vcpu_put(vcpu);
2973	return ret;
2974}
2975
2976int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2977				    struct kvm_mp_state *mp_state)
2978{
2979	int rc = 0;
2980
2981	vcpu_load(vcpu);
2982
2983	/* user space knows about this interface - let it control the state */
2984	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2985
2986	switch (mp_state->mp_state) {
2987	case KVM_MP_STATE_STOPPED:
2988		kvm_s390_vcpu_stop(vcpu);
2989		break;
2990	case KVM_MP_STATE_OPERATING:
2991		kvm_s390_vcpu_start(vcpu);
2992		break;
2993	case KVM_MP_STATE_LOAD:
2994	case KVM_MP_STATE_CHECK_STOP:
2995		/* fall through - CHECK_STOP and LOAD are not supported yet */
2996	default:
2997		rc = -ENXIO;
2998	}
2999
3000	vcpu_put(vcpu);
3001	return rc;
3002}
3003
3004static bool ibs_enabled(struct kvm_vcpu *vcpu)
3005{
3006	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3007}
3008
3009static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3010{
3011retry:
3012	kvm_s390_vcpu_request_handled(vcpu);
3013	if (!kvm_request_pending(vcpu))
3014		return 0;
3015	/*
3016	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3017	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3018	 * This ensures that the ipte instruction for this request has
3019	 * already finished. We might race against a second unmapper that
3020	 * wants to set the blocking bit. Lets just retry the request loop.
3021	 */
3022	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3023		int rc;
3024		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3025					  kvm_s390_get_prefix(vcpu),
3026					  PAGE_SIZE * 2, PROT_WRITE);
3027		if (rc) {
3028			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3029			return rc;
3030		}
3031		goto retry;
3032	}
3033
3034	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3035		vcpu->arch.sie_block->ihcpu = 0xffff;
3036		goto retry;
3037	}
3038
3039	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3040		if (!ibs_enabled(vcpu)) {
3041			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3042			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3043		}
3044		goto retry;
3045	}
3046
3047	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3048		if (ibs_enabled(vcpu)) {
3049			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3050			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3051		}
3052		goto retry;
3053	}
3054
3055	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3056		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3057		goto retry;
3058	}
3059
3060	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3061		/*
3062		 * Disable CMM virtualization; we will emulate the ESSA
3063		 * instruction manually, in order to provide additional
3064		 * functionalities needed for live migration.
3065		 */
3066		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3067		goto retry;
3068	}
3069
3070	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3071		/*
3072		 * Re-enable CMM virtualization if CMMA is available and
3073		 * CMM has been used.
3074		 */
3075		if ((vcpu->kvm->arch.use_cmma) &&
3076		    (vcpu->kvm->mm->context.uses_cmm))
3077			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3078		goto retry;
3079	}
3080
3081	/* nothing to do, just clear the request */
3082	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 
 
3083
3084	return 0;
3085}
3086
3087void kvm_s390_set_tod_clock(struct kvm *kvm,
3088			    const struct kvm_s390_vm_tod_clock *gtod)
3089{
3090	struct kvm_vcpu *vcpu;
3091	struct kvm_s390_tod_clock_ext htod;
3092	int i;
3093
3094	mutex_lock(&kvm->lock);
3095	preempt_disable();
3096
3097	get_tod_clock_ext((char *)&htod);
3098
3099	kvm->arch.epoch = gtod->tod - htod.tod;
3100	kvm->arch.epdx = 0;
3101	if (test_kvm_facility(kvm, 139)) {
3102		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3103		if (kvm->arch.epoch > gtod->tod)
3104			kvm->arch.epdx -= 1;
3105	}
3106
3107	kvm_s390_vcpu_block_all(kvm);
3108	kvm_for_each_vcpu(i, vcpu, kvm) {
3109		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3110		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3111	}
3112
3113	kvm_s390_vcpu_unblock_all(kvm);
3114	preempt_enable();
3115	mutex_unlock(&kvm->lock);
3116}
3117
3118/**
3119 * kvm_arch_fault_in_page - fault-in guest page if necessary
3120 * @vcpu: The corresponding virtual cpu
3121 * @gpa: Guest physical address
3122 * @writable: Whether the page should be writable or not
3123 *
3124 * Make sure that a guest page has been faulted-in on the host.
3125 *
3126 * Return: Zero on success, negative error code otherwise.
3127 */
3128long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3129{
3130	return gmap_fault(vcpu->arch.gmap, gpa,
3131			  writable ? FAULT_FLAG_WRITE : 0);
3132}
3133
3134static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3135				      unsigned long token)
3136{
3137	struct kvm_s390_interrupt inti;
3138	struct kvm_s390_irq irq;
3139
3140	if (start_token) {
3141		irq.u.ext.ext_params2 = token;
3142		irq.type = KVM_S390_INT_PFAULT_INIT;
3143		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3144	} else {
3145		inti.type = KVM_S390_INT_PFAULT_DONE;
3146		inti.parm64 = token;
3147		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3148	}
3149}
3150
3151void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3152				     struct kvm_async_pf *work)
3153{
3154	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3155	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3156}
3157
3158void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3159				 struct kvm_async_pf *work)
3160{
3161	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3162	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3163}
3164
3165void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3166			       struct kvm_async_pf *work)
3167{
3168	/* s390 will always inject the page directly */
3169}
3170
3171bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3172{
3173	/*
3174	 * s390 will always inject the page directly,
3175	 * but we still want check_async_completion to cleanup
3176	 */
3177	return true;
3178}
3179
3180static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3181{
3182	hva_t hva;
3183	struct kvm_arch_async_pf arch;
3184	int rc;
3185
3186	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3187		return 0;
3188	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3189	    vcpu->arch.pfault_compare)
3190		return 0;
3191	if (psw_extint_disabled(vcpu))
3192		return 0;
3193	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3194		return 0;
3195	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3196		return 0;
3197	if (!vcpu->arch.gmap->pfault_enabled)
3198		return 0;
3199
3200	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3201	hva += current->thread.gmap_addr & ~PAGE_MASK;
3202	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3203		return 0;
3204
3205	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3206	return rc;
3207}
3208
3209static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3210{
3211	int rc, cpuflags;
3212
3213	/*
3214	 * On s390 notifications for arriving pages will be delivered directly
3215	 * to the guest but the house keeping for completed pfaults is
3216	 * handled outside the worker.
3217	 */
3218	kvm_check_async_pf_completion(vcpu);
3219
3220	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3221	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3222
3223	if (need_resched())
3224		schedule();
3225
3226	if (test_cpu_flag(CIF_MCCK_PENDING))
3227		s390_handle_mcck();
3228
3229	if (!kvm_is_ucontrol(vcpu->kvm)) {
3230		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3231		if (rc)
3232			return rc;
3233	}
3234
3235	rc = kvm_s390_handle_requests(vcpu);
3236	if (rc)
3237		return rc;
3238
3239	if (guestdbg_enabled(vcpu)) {
3240		kvm_s390_backup_guest_per_regs(vcpu);
3241		kvm_s390_patch_guest_per_regs(vcpu);
3242	}
3243
 
 
3244	vcpu->arch.sie_block->icptcode = 0;
3245	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3246	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3247	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3248
3249	return 0;
3250}
3251
3252static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3253{
3254	struct kvm_s390_pgm_info pgm_info = {
3255		.code = PGM_ADDRESSING,
3256	};
3257	u8 opcode, ilen;
3258	int rc;
3259
3260	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3261	trace_kvm_s390_sie_fault(vcpu);
3262
3263	/*
3264	 * We want to inject an addressing exception, which is defined as a
3265	 * suppressing or terminating exception. However, since we came here
3266	 * by a DAT access exception, the PSW still points to the faulting
3267	 * instruction since DAT exceptions are nullifying. So we've got
3268	 * to look up the current opcode to get the length of the instruction
3269	 * to be able to forward the PSW.
3270	 */
3271	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3272	ilen = insn_length(opcode);
3273	if (rc < 0) {
3274		return rc;
3275	} else if (rc) {
3276		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3277		 * Forward by arbitrary ilc, injection will take care of
3278		 * nullification if necessary.
3279		 */
3280		pgm_info = vcpu->arch.pgm;
3281		ilen = 4;
3282	}
3283	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3284	kvm_s390_forward_psw(vcpu, ilen);
3285	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3286}
3287
3288static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3289{
3290	struct mcck_volatile_info *mcck_info;
3291	struct sie_page *sie_page;
3292
3293	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3294		   vcpu->arch.sie_block->icptcode);
3295	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3296
3297	if (guestdbg_enabled(vcpu))
3298		kvm_s390_restore_guest_per_regs(vcpu);
3299
3300	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3301	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3302
3303	if (exit_reason == -EINTR) {
3304		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3305		sie_page = container_of(vcpu->arch.sie_block,
3306					struct sie_page, sie_block);
3307		mcck_info = &sie_page->mcck_info;
3308		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3309		return 0;
3310	}
3311
3312	if (vcpu->arch.sie_block->icptcode > 0) {
3313		int rc = kvm_handle_sie_intercept(vcpu);
3314
3315		if (rc != -EOPNOTSUPP)
3316			return rc;
3317		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3318		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3319		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3320		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3321		return -EREMOTE;
3322	} else if (exit_reason != -EFAULT) {
3323		vcpu->stat.exit_null++;
3324		return 0;
3325	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3326		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3327		vcpu->run->s390_ucontrol.trans_exc_code =
3328						current->thread.gmap_addr;
3329		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3330		return -EREMOTE;
3331	} else if (current->thread.gmap_pfault) {
3332		trace_kvm_s390_major_guest_pfault(vcpu);
3333		current->thread.gmap_pfault = 0;
3334		if (kvm_arch_setup_async_pf(vcpu))
3335			return 0;
3336		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3337	}
3338	return vcpu_post_run_fault_in_sie(vcpu);
3339}
3340
3341static int __vcpu_run(struct kvm_vcpu *vcpu)
3342{
3343	int rc, exit_reason;
3344
3345	/*
3346	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3347	 * ning the guest), so that memslots (and other stuff) are protected
3348	 */
3349	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3350
3351	do {
3352		rc = vcpu_pre_run(vcpu);
3353		if (rc)
3354			break;
3355
3356		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3357		/*
3358		 * As PF_VCPU will be used in fault handler, between
3359		 * guest_enter and guest_exit should be no uaccess.
3360		 */
3361		local_irq_disable();
3362		guest_enter_irqoff();
3363		__disable_cpu_timer_accounting(vcpu);
3364		local_irq_enable();
3365		exit_reason = sie64a(vcpu->arch.sie_block,
3366				     vcpu->run->s.regs.gprs);
3367		local_irq_disable();
3368		__enable_cpu_timer_accounting(vcpu);
3369		guest_exit_irqoff();
3370		local_irq_enable();
3371		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3372
3373		rc = vcpu_post_run(vcpu, exit_reason);
3374	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3375
3376	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3377	return rc;
3378}
3379
3380static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3381{
3382	struct runtime_instr_cb *riccb;
3383	struct gs_cb *gscb;
3384
3385	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3386	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3387	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3388	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3389	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3390		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3391	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3392		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3393		/* some control register changes require a tlb flush */
3394		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3395	}
3396	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3397		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3398		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3399		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3400		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3401		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3402	}
3403	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3404		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3405		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3406		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3407		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3408			kvm_clear_async_pf_completion_queue(vcpu);
3409	}
3410	/*
3411	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3412	 * we should enable RI here instead of doing the lazy enablement.
3413	 */
3414	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3415	    test_kvm_facility(vcpu->kvm, 64) &&
3416	    riccb->v &&
3417	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3418		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3419		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3420	}
3421	/*
3422	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3423	 * we should enable GS here instead of doing the lazy enablement.
3424	 */
3425	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3426	    test_kvm_facility(vcpu->kvm, 133) &&
3427	    gscb->gssm &&
3428	    !vcpu->arch.gs_enabled) {
3429		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3430		vcpu->arch.sie_block->ecb |= ECB_GS;
3431		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3432		vcpu->arch.gs_enabled = 1;
3433	}
3434	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3435	    test_kvm_facility(vcpu->kvm, 82)) {
3436		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3437		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3438	}
3439	save_access_regs(vcpu->arch.host_acrs);
3440	restore_access_regs(vcpu->run->s.regs.acrs);
3441	/* save host (userspace) fprs/vrs */
3442	save_fpu_regs();
3443	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3444	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3445	if (MACHINE_HAS_VX)
3446		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3447	else
3448		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3449	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3450	if (test_fp_ctl(current->thread.fpu.fpc))
3451		/* User space provided an invalid FPC, let's clear it */
3452		current->thread.fpu.fpc = 0;
3453	if (MACHINE_HAS_GS) {
3454		preempt_disable();
3455		__ctl_set_bit(2, 4);
3456		if (current->thread.gs_cb) {
3457			vcpu->arch.host_gscb = current->thread.gs_cb;
3458			save_gs_cb(vcpu->arch.host_gscb);
3459		}
3460		if (vcpu->arch.gs_enabled) {
3461			current->thread.gs_cb = (struct gs_cb *)
3462						&vcpu->run->s.regs.gscb;
3463			restore_gs_cb(current->thread.gs_cb);
3464		}
3465		preempt_enable();
3466	}
 
3467
3468	kvm_run->kvm_dirty_regs = 0;
3469}
3470
3471static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3472{
3473	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3474	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3475	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3476	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3477	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3478	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3479	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3480	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3481	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3482	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3483	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3484	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3485	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3486	save_access_regs(vcpu->run->s.regs.acrs);
3487	restore_access_regs(vcpu->arch.host_acrs);
3488	/* Save guest register state */
3489	save_fpu_regs();
3490	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491	/* Restore will be done lazily at return */
3492	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3493	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3494	if (MACHINE_HAS_GS) {
3495		__ctl_set_bit(2, 4);
3496		if (vcpu->arch.gs_enabled)
3497			save_gs_cb(current->thread.gs_cb);
3498		preempt_disable();
3499		current->thread.gs_cb = vcpu->arch.host_gscb;
3500		restore_gs_cb(vcpu->arch.host_gscb);
3501		preempt_enable();
3502		if (!vcpu->arch.host_gscb)
3503			__ctl_clear_bit(2, 4);
3504		vcpu->arch.host_gscb = NULL;
3505	}
3506
3507}
3508
3509int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3510{
3511	int rc;
3512
3513	if (kvm_run->immediate_exit)
3514		return -EINTR;
3515
 
 
 
 
3516	vcpu_load(vcpu);
3517
3518	if (guestdbg_exit_pending(vcpu)) {
3519		kvm_s390_prepare_debug_exit(vcpu);
3520		rc = 0;
3521		goto out;
3522	}
3523
3524	kvm_sigset_activate(vcpu);
3525
3526	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3527		kvm_s390_vcpu_start(vcpu);
3528	} else if (is_vcpu_stopped(vcpu)) {
3529		pr_err_ratelimited("can't run stopped vcpu %d\n",
3530				   vcpu->vcpu_id);
3531		rc = -EINVAL;
3532		goto out;
3533	}
3534
3535	sync_regs(vcpu, kvm_run);
3536	enable_cpu_timer_accounting(vcpu);
3537
3538	might_fault();
3539	rc = __vcpu_run(vcpu);
3540
3541	if (signal_pending(current) && !rc) {
3542		kvm_run->exit_reason = KVM_EXIT_INTR;
3543		rc = -EINTR;
3544	}
3545
3546	if (guestdbg_exit_pending(vcpu) && !rc)  {
3547		kvm_s390_prepare_debug_exit(vcpu);
3548		rc = 0;
3549	}
3550
3551	if (rc == -EREMOTE) {
3552		/* userspace support is needed, kvm_run has been prepared */
3553		rc = 0;
3554	}
3555
3556	disable_cpu_timer_accounting(vcpu);
3557	store_regs(vcpu, kvm_run);
3558
3559	kvm_sigset_deactivate(vcpu);
3560
3561	vcpu->stat.exit_userspace++;
3562out:
3563	vcpu_put(vcpu);
3564	return rc;
3565}
3566
3567/*
3568 * store status at address
3569 * we use have two special cases:
3570 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3571 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3572 */
3573int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3574{
3575	unsigned char archmode = 1;
3576	freg_t fprs[NUM_FPRS];
3577	unsigned int px;
3578	u64 clkcomp, cputm;
3579	int rc;
3580
3581	px = kvm_s390_get_prefix(vcpu);
3582	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3583		if (write_guest_abs(vcpu, 163, &archmode, 1))
3584			return -EFAULT;
3585		gpa = 0;
3586	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3587		if (write_guest_real(vcpu, 163, &archmode, 1))
3588			return -EFAULT;
3589		gpa = px;
3590	} else
3591		gpa -= __LC_FPREGS_SAVE_AREA;
3592
3593	/* manually convert vector registers if necessary */
3594	if (MACHINE_HAS_VX) {
3595		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3596		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3597				     fprs, 128);
3598	} else {
3599		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3600				     vcpu->run->s.regs.fprs, 128);
3601	}
3602	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3603			      vcpu->run->s.regs.gprs, 128);
3604	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3605			      &vcpu->arch.sie_block->gpsw, 16);
3606	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3607			      &px, 4);
3608	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3609			      &vcpu->run->s.regs.fpc, 4);
3610	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3611			      &vcpu->arch.sie_block->todpr, 4);
3612	cputm = kvm_s390_get_cpu_timer(vcpu);
3613	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3614			      &cputm, 8);
3615	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3616	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3617			      &clkcomp, 8);
3618	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3619			      &vcpu->run->s.regs.acrs, 64);
3620	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3621			      &vcpu->arch.sie_block->gcr, 128);
3622	return rc ? -EFAULT : 0;
3623}
3624
3625int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3626{
3627	/*
3628	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3629	 * switch in the run ioctl. Let's update our copies before we save
3630	 * it into the save area
3631	 */
3632	save_fpu_regs();
3633	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3634	save_access_regs(vcpu->run->s.regs.acrs);
3635
3636	return kvm_s390_store_status_unloaded(vcpu, addr);
3637}
3638
3639static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3640{
3641	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3642	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3643}
3644
3645static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3646{
3647	unsigned int i;
3648	struct kvm_vcpu *vcpu;
3649
3650	kvm_for_each_vcpu(i, vcpu, kvm) {
3651		__disable_ibs_on_vcpu(vcpu);
3652	}
3653}
3654
3655static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3656{
3657	if (!sclp.has_ibs)
3658		return;
3659	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3660	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3661}
3662
3663void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3664{
3665	int i, online_vcpus, started_vcpus = 0;
3666
3667	if (!is_vcpu_stopped(vcpu))
3668		return;
3669
3670	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3671	/* Only one cpu at a time may enter/leave the STOPPED state. */
3672	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3673	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3674
3675	for (i = 0; i < online_vcpus; i++) {
3676		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3677			started_vcpus++;
3678	}
3679
3680	if (started_vcpus == 0) {
3681		/* we're the only active VCPU -> speed it up */
3682		__enable_ibs_on_vcpu(vcpu);
3683	} else if (started_vcpus == 1) {
3684		/*
3685		 * As we are starting a second VCPU, we have to disable
3686		 * the IBS facility on all VCPUs to remove potentially
3687		 * oustanding ENABLE requests.
3688		 */
3689		__disable_ibs_on_all_vcpus(vcpu->kvm);
3690	}
3691
3692	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3693	/*
3694	 * Another VCPU might have used IBS while we were offline.
3695	 * Let's play safe and flush the VCPU at startup.
3696	 */
3697	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3698	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3699	return;
3700}
3701
3702void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3703{
3704	int i, online_vcpus, started_vcpus = 0;
3705	struct kvm_vcpu *started_vcpu = NULL;
3706
3707	if (is_vcpu_stopped(vcpu))
3708		return;
3709
3710	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3711	/* Only one cpu at a time may enter/leave the STOPPED state. */
3712	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3713	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3714
3715	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3716	kvm_s390_clear_stop_irq(vcpu);
3717
3718	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3719	__disable_ibs_on_vcpu(vcpu);
3720
3721	for (i = 0; i < online_vcpus; i++) {
3722		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3723			started_vcpus++;
3724			started_vcpu = vcpu->kvm->vcpus[i];
3725		}
3726	}
3727
3728	if (started_vcpus == 1) {
3729		/*
3730		 * As we only have one VCPU left, we want to enable the
3731		 * IBS facility for that VCPU to speed it up.
3732		 */
3733		__enable_ibs_on_vcpu(started_vcpu);
3734	}
3735
3736	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3737	return;
3738}
3739
3740static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3741				     struct kvm_enable_cap *cap)
3742{
3743	int r;
3744
3745	if (cap->flags)
3746		return -EINVAL;
3747
3748	switch (cap->cap) {
3749	case KVM_CAP_S390_CSS_SUPPORT:
3750		if (!vcpu->kvm->arch.css_support) {
3751			vcpu->kvm->arch.css_support = 1;
3752			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3753			trace_kvm_s390_enable_css(vcpu->kvm);
3754		}
3755		r = 0;
3756		break;
3757	default:
3758		r = -EINVAL;
3759		break;
3760	}
3761	return r;
3762}
3763
3764static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3765				  struct kvm_s390_mem_op *mop)
3766{
3767	void __user *uaddr = (void __user *)mop->buf;
3768	void *tmpbuf = NULL;
3769	int r, srcu_idx;
3770	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3771				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3772
3773	if (mop->flags & ~supported_flags)
3774		return -EINVAL;
3775
3776	if (mop->size > MEM_OP_MAX_SIZE)
3777		return -E2BIG;
3778
3779	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3780		tmpbuf = vmalloc(mop->size);
3781		if (!tmpbuf)
3782			return -ENOMEM;
3783	}
3784
3785	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3786
3787	switch (mop->op) {
3788	case KVM_S390_MEMOP_LOGICAL_READ:
3789		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3790			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3791					    mop->size, GACC_FETCH);
3792			break;
3793		}
3794		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3795		if (r == 0) {
3796			if (copy_to_user(uaddr, tmpbuf, mop->size))
3797				r = -EFAULT;
3798		}
3799		break;
3800	case KVM_S390_MEMOP_LOGICAL_WRITE:
3801		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3802			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3803					    mop->size, GACC_STORE);
3804			break;
3805		}
3806		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3807			r = -EFAULT;
3808			break;
3809		}
3810		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3811		break;
3812	default:
3813		r = -EINVAL;
3814	}
3815
3816	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3817
3818	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3819		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3820
3821	vfree(tmpbuf);
3822	return r;
3823}
3824
3825long kvm_arch_vcpu_async_ioctl(struct file *filp,
3826			       unsigned int ioctl, unsigned long arg)
3827{
3828	struct kvm_vcpu *vcpu = filp->private_data;
3829	void __user *argp = (void __user *)arg;
3830
3831	switch (ioctl) {
3832	case KVM_S390_IRQ: {
3833		struct kvm_s390_irq s390irq;
3834
3835		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3836			return -EFAULT;
3837		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3838	}
3839	case KVM_S390_INTERRUPT: {
3840		struct kvm_s390_interrupt s390int;
3841		struct kvm_s390_irq s390irq;
3842
3843		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3844			return -EFAULT;
3845		if (s390int_to_s390irq(&s390int, &s390irq))
3846			return -EINVAL;
3847		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3848	}
3849	}
3850	return -ENOIOCTLCMD;
3851}
3852
3853long kvm_arch_vcpu_ioctl(struct file *filp,
3854			 unsigned int ioctl, unsigned long arg)
3855{
3856	struct kvm_vcpu *vcpu = filp->private_data;
3857	void __user *argp = (void __user *)arg;
3858	int idx;
3859	long r;
3860
3861	vcpu_load(vcpu);
3862
3863	switch (ioctl) {
3864	case KVM_S390_STORE_STATUS:
3865		idx = srcu_read_lock(&vcpu->kvm->srcu);
3866		r = kvm_s390_vcpu_store_status(vcpu, arg);
3867		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3868		break;
3869	case KVM_S390_SET_INITIAL_PSW: {
3870		psw_t psw;
3871
3872		r = -EFAULT;
3873		if (copy_from_user(&psw, argp, sizeof(psw)))
3874			break;
3875		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3876		break;
3877	}
3878	case KVM_S390_INITIAL_RESET:
3879		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3880		break;
3881	case KVM_SET_ONE_REG:
3882	case KVM_GET_ONE_REG: {
3883		struct kvm_one_reg reg;
3884		r = -EFAULT;
3885		if (copy_from_user(&reg, argp, sizeof(reg)))
3886			break;
3887		if (ioctl == KVM_SET_ONE_REG)
3888			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3889		else
3890			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3891		break;
3892	}
3893#ifdef CONFIG_KVM_S390_UCONTROL
3894	case KVM_S390_UCAS_MAP: {
3895		struct kvm_s390_ucas_mapping ucasmap;
3896
3897		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3898			r = -EFAULT;
3899			break;
3900		}
3901
3902		if (!kvm_is_ucontrol(vcpu->kvm)) {
3903			r = -EINVAL;
3904			break;
3905		}
3906
3907		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3908				     ucasmap.vcpu_addr, ucasmap.length);
3909		break;
3910	}
3911	case KVM_S390_UCAS_UNMAP: {
3912		struct kvm_s390_ucas_mapping ucasmap;
3913
3914		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3915			r = -EFAULT;
3916			break;
3917		}
3918
3919		if (!kvm_is_ucontrol(vcpu->kvm)) {
3920			r = -EINVAL;
3921			break;
3922		}
3923
3924		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3925			ucasmap.length);
3926		break;
3927	}
3928#endif
3929	case KVM_S390_VCPU_FAULT: {
3930		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3931		break;
3932	}
3933	case KVM_ENABLE_CAP:
3934	{
3935		struct kvm_enable_cap cap;
3936		r = -EFAULT;
3937		if (copy_from_user(&cap, argp, sizeof(cap)))
3938			break;
3939		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3940		break;
3941	}
3942	case KVM_S390_MEM_OP: {
3943		struct kvm_s390_mem_op mem_op;
3944
3945		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3946			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3947		else
3948			r = -EFAULT;
3949		break;
3950	}
3951	case KVM_S390_SET_IRQ_STATE: {
3952		struct kvm_s390_irq_state irq_state;
3953
3954		r = -EFAULT;
3955		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3956			break;
3957		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3958		    irq_state.len == 0 ||
3959		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3960			r = -EINVAL;
3961			break;
3962		}
3963		/* do not use irq_state.flags, it will break old QEMUs */
3964		r = kvm_s390_set_irq_state(vcpu,
3965					   (void __user *) irq_state.buf,
3966					   irq_state.len);
3967		break;
3968	}
3969	case KVM_S390_GET_IRQ_STATE: {
3970		struct kvm_s390_irq_state irq_state;
3971
3972		r = -EFAULT;
3973		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3974			break;
3975		if (irq_state.len == 0) {
3976			r = -EINVAL;
3977			break;
3978		}
3979		/* do not use irq_state.flags, it will break old QEMUs */
3980		r = kvm_s390_get_irq_state(vcpu,
3981					   (__u8 __user *)  irq_state.buf,
3982					   irq_state.len);
3983		break;
3984	}
3985	default:
3986		r = -ENOTTY;
3987	}
3988
3989	vcpu_put(vcpu);
3990	return r;
3991}
3992
3993int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3994{
3995#ifdef CONFIG_KVM_S390_UCONTROL
3996	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3997		 && (kvm_is_ucontrol(vcpu->kvm))) {
3998		vmf->page = virt_to_page(vcpu->arch.sie_block);
3999		get_page(vmf->page);
4000		return 0;
4001	}
4002#endif
4003	return VM_FAULT_SIGBUS;
4004}
4005
4006int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4007			    unsigned long npages)
4008{
4009	return 0;
4010}
4011
4012/* Section: memory related */
4013int kvm_arch_prepare_memory_region(struct kvm *kvm,
4014				   struct kvm_memory_slot *memslot,
4015				   const struct kvm_userspace_memory_region *mem,
4016				   enum kvm_mr_change change)
4017{
4018	/* A few sanity checks. We can have memory slots which have to be
4019	   located/ended at a segment boundary (1MB). The memory in userland is
4020	   ok to be fragmented into various different vmas. It is okay to mmap()
4021	   and munmap() stuff in this slot after doing this call at any time */
4022
4023	if (mem->userspace_addr & 0xffffful)
4024		return -EINVAL;
4025
4026	if (mem->memory_size & 0xffffful)
4027		return -EINVAL;
4028
4029	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4030		return -EINVAL;
4031
4032	return 0;
4033}
4034
4035void kvm_arch_commit_memory_region(struct kvm *kvm,
4036				const struct kvm_userspace_memory_region *mem,
4037				const struct kvm_memory_slot *old,
4038				const struct kvm_memory_slot *new,
4039				enum kvm_mr_change change)
4040{
4041	int rc;
4042
4043	/* If the basics of the memslot do not change, we do not want
4044	 * to update the gmap. Every update causes several unnecessary
4045	 * segment translation exceptions. This is usually handled just
4046	 * fine by the normal fault handler + gmap, but it will also
4047	 * cause faults on the prefix page of running guest CPUs.
4048	 */
4049	if (old->userspace_addr == mem->userspace_addr &&
4050	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4051	    old->npages * PAGE_SIZE == mem->memory_size)
4052		return;
4053
4054	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4055		mem->guest_phys_addr, mem->memory_size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4056	if (rc)
4057		pr_warn("failed to commit memory region\n");
4058	return;
4059}
4060
4061static inline unsigned long nonhyp_mask(int i)
4062{
4063	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4064
4065	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4066}
4067
4068void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4069{
4070	vcpu->valid_wakeup = false;
4071}
4072
4073static int __init kvm_s390_init(void)
4074{
4075	int i;
4076
4077	if (!sclp.has_sief2) {
4078		pr_info("SIE not available\n");
4079		return -ENODEV;
 
 
 
 
 
4080	}
4081
4082	for (i = 0; i < 16; i++)
4083		kvm_s390_fac_base[i] |=
4084			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4085
4086	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4087}
4088
4089static void __exit kvm_s390_exit(void)
4090{
4091	kvm_exit();
4092}
4093
4094module_init(kvm_s390_init);
4095module_exit(kvm_s390_exit);
4096
4097/*
4098 * Enable autoloading of the kvm module.
4099 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4100 * since x86 takes a different approach.
4101 */
4102#include <linux/miscdevice.h>
4103MODULE_ALIAS_MISCDEV(KVM_MINOR);
4104MODULE_ALIAS("devname:kvm");

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2018
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
  14#define KMSG_COMPONENT "kvm-s390"
  15#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
  26#include <linux/moduleparam.h>
  27#include <linux/random.h>
  28#include <linux/slab.h>
  29#include <linux/timer.h>
  30#include <linux/vmalloc.h>
  31#include <linux/bitmap.h>
  32#include <linux/sched/signal.h>
  33#include <linux/string.h>
  34
  35#include <asm/asm-offsets.h>
  36#include <asm/lowcore.h>
  37#include <asm/stp.h>
  38#include <asm/pgtable.h>
  39#include <asm/gmap.h>
  40#include <asm/nmi.h>
  41#include <asm/switch_to.h>
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include <asm/ap.h>
  47#include "kvm-s390.h"
  48#include "gaccess.h"
  49
 
 
 
 
  50#define CREATE_TRACE_POINTS
  51#include "trace.h"
  52#include "trace-s390.h"
  53
  54#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
  55#define LOCAL_IRQS 32
  56#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57			   (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62struct kvm_stats_debugfs_item debugfs_entries[] = {
  63	{ "userspace_handled", VCPU_STAT(exit_userspace) },
  64	{ "exit_null", VCPU_STAT(exit_null) },
  65	{ "exit_validity", VCPU_STAT(exit_validity) },
  66	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67	{ "exit_external_request", VCPU_STAT(exit_external_request) },
  68	{ "exit_io_request", VCPU_STAT(exit_io_request) },
  69	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70	{ "exit_instruction", VCPU_STAT(exit_instruction) },
  71	{ "exit_pei", VCPU_STAT(exit_pei) },
  72	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93	{ "deliver_program", VCPU_STAT(deliver_program) },
  94	{ "deliver_io", VCPU_STAT(deliver_io) },
  95	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97	{ "inject_ckc", VCPU_STAT(inject_ckc) },
  98	{ "inject_cputm", VCPU_STAT(inject_cputm) },
  99	{ "inject_external_call", VCPU_STAT(inject_external_call) },
 100	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102	{ "inject_io", VM_STAT(inject_io) },
 103	{ "inject_mchk", VCPU_STAT(inject_mchk) },
 104	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105	{ "inject_program", VCPU_STAT(inject_program) },
 106	{ "inject_restart", VCPU_STAT(inject_restart) },
 107	{ "inject_service_signal", VM_STAT(inject_service_signal) },
 108	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111	{ "inject_virtio", VM_STAT(inject_virtio) },
 112	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113	{ "instruction_gs", VCPU_STAT(instruction_gs) },
 114	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120	{ "instruction_sck", VCPU_STAT(instruction_sck) },
 121	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122	{ "instruction_spx", VCPU_STAT(instruction_spx) },
 123	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124	{ "instruction_stap", VCPU_STAT(instruction_stap) },
 125	{ "instruction_iske", VCPU_STAT(instruction_iske) },
 126	{ "instruction_ri", VCPU_STAT(instruction_ri) },
 127	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128	{ "instruction_sske", VCPU_STAT(instruction_sske) },
 129	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130	{ "instruction_essa", VCPU_STAT(instruction_essa) },
 131	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133	{ "instruction_tb", VCPU_STAT(instruction_tb) },
 134	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138	{ "instruction_sie", VCPU_STAT(instruction_sie) },
 139	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
 159	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
 160	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
 161	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
 162	{ NULL }
 163};
 164
 165struct kvm_s390_tod_clock_ext {
 166	__u8 epoch_idx;
 167	__u64 tod;
 168	__u8 reserved[7];
 169} __packed;
 170
 171/* allow nested virtualization in KVM (if enabled by user space) */
 172static int nested;
 173module_param(nested, int, S_IRUGO);
 174MODULE_PARM_DESC(nested, "Nested virtualization support");
 175
 176/* allow 1m huge page guest backing, if !nested */
 177static int hpage;
 178module_param(hpage, int, 0444);
 179MODULE_PARM_DESC(hpage, "1m huge page backing support");
 180
 181/* maximum percentage of steal time for polling.  >100 is treated like 100 */
 182static u8 halt_poll_max_steal = 10;
 183module_param(halt_poll_max_steal, byte, 0644);
 184MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 185
 186/*
 187 * For now we handle at most 16 double words as this is what the s390 base
 188 * kernel handles and stores in the prefix page. If we ever need to go beyond
 189 * this, this requires changes to code, but the external uapi can stay.
 190 */
 191#define SIZE_INTERNAL 16
 192
 193/*
 194 * Base feature mask that defines default mask for facilities. Consists of the
 195 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 196 */
 197static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 198/*
 199 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 200 * and defines the facilities that can be enabled via a cpu model.
 201 */
 202static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 203
 204static unsigned long kvm_s390_fac_size(void)
 205{
 206	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 207	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 208	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 209		sizeof(S390_lowcore.stfle_fac_list));
 210
 211	return SIZE_INTERNAL;
 212}
 213
 214/* available cpu features supported by kvm */
 215static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 216/* available subfunctions indicated via query / "test bit" */
 217static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 218
 219static struct gmap_notifier gmap_notifier;
 220static struct gmap_notifier vsie_gmap_notifier;
 221debug_info_t *kvm_s390_dbf;
 222
 223/* Section: not file related */
 224int kvm_arch_hardware_enable(void)
 225{
 226	/* every s390 is virtualization enabled ;-) */
 227	return 0;
 228}
 229
 230int kvm_arch_check_processor_compat(void)
 231{
 232	return 0;
 233}
 234
 235static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 236			      unsigned long end);
 237
 238static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 239{
 240	u8 delta_idx = 0;
 241
 242	/*
 243	 * The TOD jumps by delta, we have to compensate this by adding
 244	 * -delta to the epoch.
 245	 */
 246	delta = -delta;
 247
 248	/* sign-extension - we're adding to signed values below */
 249	if ((s64)delta < 0)
 250		delta_idx = -1;
 251
 252	scb->epoch += delta;
 253	if (scb->ecd & ECD_MEF) {
 254		scb->epdx += delta_idx;
 255		if (scb->epoch < delta)
 256			scb->epdx += 1;
 257	}
 258}
 259
 260/*
 261 * This callback is executed during stop_machine(). All CPUs are therefore
 262 * temporarily stopped. In order not to change guest behavior, we have to
 263 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 264 * so a CPU won't be stopped while calculating with the epoch.
 265 */
 266static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 267			  void *v)
 268{
 269	struct kvm *kvm;
 270	struct kvm_vcpu *vcpu;
 271	int i;
 272	unsigned long long *delta = v;
 273
 274	list_for_each_entry(kvm, &vm_list, vm_list) {
 275		kvm_for_each_vcpu(i, vcpu, kvm) {
 276			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 277			if (i == 0) {
 278				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 279				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 280			}
 281			if (vcpu->arch.cputm_enabled)
 282				vcpu->arch.cputm_start += *delta;
 283			if (vcpu->arch.vsie_block)
 284				kvm_clock_sync_scb(vcpu->arch.vsie_block,
 285						   *delta);
 286		}
 287	}
 288	return NOTIFY_OK;
 289}
 290
 291static struct notifier_block kvm_clock_notifier = {
 292	.notifier_call = kvm_clock_sync,
 293};
 294
 295int kvm_arch_hardware_setup(void)
 296{
 297	gmap_notifier.notifier_call = kvm_gmap_notifier;
 298	gmap_register_pte_notifier(&gmap_notifier);
 299	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 300	gmap_register_pte_notifier(&vsie_gmap_notifier);
 301	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 302				       &kvm_clock_notifier);
 303	return 0;
 304}
 305
 306void kvm_arch_hardware_unsetup(void)
 307{
 308	gmap_unregister_pte_notifier(&gmap_notifier);
 309	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 310	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 311					 &kvm_clock_notifier);
 312}
 313
 314static void allow_cpu_feat(unsigned long nr)
 315{
 316	set_bit_inv(nr, kvm_s390_available_cpu_feat);
 317}
 318
 319static inline int plo_test_bit(unsigned char nr)
 320{
 321	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 322	int cc;
 323
 324	asm volatile(
 325		/* Parameter registers are ignored for "test bit" */
 326		"	plo	0,0,0,0(0)\n"
 327		"	ipm	%0\n"
 328		"	srl	%0,28\n"
 329		: "=d" (cc)
 330		: "d" (r0)
 331		: "cc");
 332	return cc == 0;
 333}
 334
 335static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 336{
 337	register unsigned long r0 asm("0") = 0;	/* query function */
 338	register unsigned long r1 asm("1") = (unsigned long) query;
 339
 340	asm volatile(
 341		/* Parameter regs are ignored */
 342		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
 343		:
 344		: "d" (r0), "a" (r1), [opc] "i" (opcode)
 345		: "cc", "memory");
 346}
 347
 348#define INSN_SORTL 0xb938
 349#define INSN_DFLTCC 0xb939
 350
 351static void kvm_s390_cpu_feat_init(void)
 352{
 353	int i;
 354
 355	for (i = 0; i < 256; ++i) {
 356		if (plo_test_bit(i))
 357			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 358	}
 359
 360	if (test_facility(28)) /* TOD-clock steering */
 361		ptff(kvm_s390_available_subfunc.ptff,
 362		     sizeof(kvm_s390_available_subfunc.ptff),
 363		     PTFF_QAF);
 364
 365	if (test_facility(17)) { /* MSA */
 366		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 367			      kvm_s390_available_subfunc.kmac);
 368		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 369			      kvm_s390_available_subfunc.kmc);
 370		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
 371			      kvm_s390_available_subfunc.km);
 372		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 373			      kvm_s390_available_subfunc.kimd);
 374		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 375			      kvm_s390_available_subfunc.klmd);
 376	}
 377	if (test_facility(76)) /* MSA3 */
 378		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 379			      kvm_s390_available_subfunc.pckmo);
 380	if (test_facility(77)) { /* MSA4 */
 381		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 382			      kvm_s390_available_subfunc.kmctr);
 383		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 384			      kvm_s390_available_subfunc.kmf);
 385		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 386			      kvm_s390_available_subfunc.kmo);
 387		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 388			      kvm_s390_available_subfunc.pcc);
 389	}
 390	if (test_facility(57)) /* MSA5 */
 391		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 392			      kvm_s390_available_subfunc.ppno);
 393
 394	if (test_facility(146)) /* MSA8 */
 395		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 396			      kvm_s390_available_subfunc.kma);
 397
 398	if (test_facility(155)) /* MSA9 */
 399		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 400			      kvm_s390_available_subfunc.kdsa);
 401
 402	if (test_facility(150)) /* SORTL */
 403		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 404
 405	if (test_facility(151)) /* DFLTCC */
 406		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 407
 408	if (MACHINE_HAS_ESOP)
 409		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 410	/*
 411	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 412	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 413	 */
 414	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 415	    !test_facility(3) || !nested)
 416		return;
 417	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 418	if (sclp.has_64bscao)
 419		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 420	if (sclp.has_siif)
 421		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 422	if (sclp.has_gpere)
 423		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 424	if (sclp.has_gsls)
 425		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 426	if (sclp.has_ib)
 427		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 428	if (sclp.has_cei)
 429		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 430	if (sclp.has_ibs)
 431		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 432	if (sclp.has_kss)
 433		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 434	/*
 435	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 436	 * all skey handling functions read/set the skey from the PGSTE
 437	 * instead of the real storage key.
 438	 *
 439	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 440	 * pages being detected as preserved although they are resident.
 441	 *
 442	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 443	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 444	 *
 445	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 446	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 447	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 448	 *
 449	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 450	 * cannot easily shadow the SCA because of the ipte lock.
 451	 */
 452}
 453
 454int kvm_arch_init(void *opaque)
 455{
 456	int rc;
 457
 458	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 459	if (!kvm_s390_dbf)
 460		return -ENOMEM;
 461
 462	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 463		rc = -ENOMEM;
 464		goto out_debug_unreg;
 465	}
 466
 467	kvm_s390_cpu_feat_init();
 468
 469	/* Register floating interrupt controller interface. */
 470	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 471	if (rc) {
 472		pr_err("A FLIC registration call failed with rc=%d\n", rc);
 473		goto out_debug_unreg;
 474	}
 475
 476	rc = kvm_s390_gib_init(GAL_ISC);
 477	if (rc)
 478		goto out_gib_destroy;
 479
 480	return 0;
 481
 482out_gib_destroy:
 483	kvm_s390_gib_destroy();
 484out_debug_unreg:
 485	debug_unregister(kvm_s390_dbf);
 486	return rc;
 487}
 488
 489void kvm_arch_exit(void)
 490{
 491	kvm_s390_gib_destroy();
 492	debug_unregister(kvm_s390_dbf);
 493}
 494
 495/* Section: device related */
 496long kvm_arch_dev_ioctl(struct file *filp,
 497			unsigned int ioctl, unsigned long arg)
 498{
 499	if (ioctl == KVM_S390_ENABLE_SIE)
 500		return s390_enable_sie();
 501	return -EINVAL;
 502}
 503
 504int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 505{
 506	int r;
 507
 508	switch (ext) {
 509	case KVM_CAP_S390_PSW:
 510	case KVM_CAP_S390_GMAP:
 511	case KVM_CAP_SYNC_MMU:
 512#ifdef CONFIG_KVM_S390_UCONTROL
 513	case KVM_CAP_S390_UCONTROL:
 514#endif
 515	case KVM_CAP_ASYNC_PF:
 516	case KVM_CAP_SYNC_REGS:
 517	case KVM_CAP_ONE_REG:
 518	case KVM_CAP_ENABLE_CAP:
 519	case KVM_CAP_S390_CSS_SUPPORT:
 520	case KVM_CAP_IOEVENTFD:
 521	case KVM_CAP_DEVICE_CTRL:
 
 522	case KVM_CAP_S390_IRQCHIP:
 523	case KVM_CAP_VM_ATTRIBUTES:
 524	case KVM_CAP_MP_STATE:
 525	case KVM_CAP_IMMEDIATE_EXIT:
 526	case KVM_CAP_S390_INJECT_IRQ:
 527	case KVM_CAP_S390_USER_SIGP:
 528	case KVM_CAP_S390_USER_STSI:
 529	case KVM_CAP_S390_SKEYS:
 530	case KVM_CAP_S390_IRQ_STATE:
 531	case KVM_CAP_S390_USER_INSTR0:
 532	case KVM_CAP_S390_CMMA_MIGRATION:
 533	case KVM_CAP_S390_AIS:
 534	case KVM_CAP_S390_AIS_MIGRATION:
 535		r = 1;
 536		break;
 537	case KVM_CAP_S390_HPAGE_1M:
 538		r = 0;
 539		if (hpage && !kvm_is_ucontrol(kvm))
 540			r = 1;
 541		break;
 542	case KVM_CAP_S390_MEM_OP:
 543		r = MEM_OP_MAX_SIZE;
 544		break;
 545	case KVM_CAP_NR_VCPUS:
 546	case KVM_CAP_MAX_VCPUS:
 547	case KVM_CAP_MAX_VCPU_ID:
 548		r = KVM_S390_BSCA_CPU_SLOTS;
 549		if (!kvm_s390_use_sca_entries())
 550			r = KVM_MAX_VCPUS;
 551		else if (sclp.has_esca && sclp.has_64bscao)
 552			r = KVM_S390_ESCA_CPU_SLOTS;
 553		break;
 
 
 
 554	case KVM_CAP_S390_COW:
 555		r = MACHINE_HAS_ESOP;
 556		break;
 557	case KVM_CAP_S390_VECTOR_REGISTERS:
 558		r = MACHINE_HAS_VX;
 559		break;
 560	case KVM_CAP_S390_RI:
 561		r = test_facility(64);
 562		break;
 563	case KVM_CAP_S390_GS:
 564		r = test_facility(133);
 565		break;
 566	case KVM_CAP_S390_BPB:
 567		r = test_facility(82);
 568		break;
 569	default:
 570		r = 0;
 571	}
 572	return r;
 573}
 574
 575static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 576				    struct kvm_memory_slot *memslot)
 577{
 578	int i;
 579	gfn_t cur_gfn, last_gfn;
 580	unsigned long gaddr, vmaddr;
 581	struct gmap *gmap = kvm->arch.gmap;
 582	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 583
 584	/* Loop over all guest segments */
 585	cur_gfn = memslot->base_gfn;
 586	last_gfn = memslot->base_gfn + memslot->npages;
 587	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 588		gaddr = gfn_to_gpa(cur_gfn);
 589		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 590		if (kvm_is_error_hva(vmaddr))
 591			continue;
 592
 593		bitmap_zero(bitmap, _PAGE_ENTRIES);
 594		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 595		for (i = 0; i < _PAGE_ENTRIES; i++) {
 596			if (test_bit(i, bitmap))
 597				mark_page_dirty(kvm, cur_gfn + i);
 598		}
 599
 
 
 600		if (fatal_signal_pending(current))
 601			return;
 602		cond_resched();
 603	}
 604}
 605
 606/* Section: vm related */
 607static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 608
 609/*
 610 * Get (and clear) the dirty memory log for a memory slot.
 611 */
 612int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 613			       struct kvm_dirty_log *log)
 614{
 615	int r;
 616	unsigned long n;
 617	struct kvm_memslots *slots;
 618	struct kvm_memory_slot *memslot;
 619	int is_dirty = 0;
 620
 621	if (kvm_is_ucontrol(kvm))
 622		return -EINVAL;
 623
 624	mutex_lock(&kvm->slots_lock);
 625
 626	r = -EINVAL;
 627	if (log->slot >= KVM_USER_MEM_SLOTS)
 628		goto out;
 629
 630	slots = kvm_memslots(kvm);
 631	memslot = id_to_memslot(slots, log->slot);
 632	r = -ENOENT;
 633	if (!memslot->dirty_bitmap)
 634		goto out;
 635
 636	kvm_s390_sync_dirty_log(kvm, memslot);
 637	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 638	if (r)
 639		goto out;
 640
 641	/* Clear the dirty log */
 642	if (is_dirty) {
 643		n = kvm_dirty_bitmap_bytes(memslot);
 644		memset(memslot->dirty_bitmap, 0, n);
 645	}
 646	r = 0;
 647out:
 648	mutex_unlock(&kvm->slots_lock);
 649	return r;
 650}
 651
 652static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 653{
 654	unsigned int i;
 655	struct kvm_vcpu *vcpu;
 656
 657	kvm_for_each_vcpu(i, vcpu, kvm) {
 658		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 659	}
 660}
 661
 662int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 663{
 664	int r;
 665
 666	if (cap->flags)
 667		return -EINVAL;
 668
 669	switch (cap->cap) {
 670	case KVM_CAP_S390_IRQCHIP:
 671		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 672		kvm->arch.use_irqchip = 1;
 673		r = 0;
 674		break;
 675	case KVM_CAP_S390_USER_SIGP:
 676		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 677		kvm->arch.user_sigp = 1;
 678		r = 0;
 679		break;
 680	case KVM_CAP_S390_VECTOR_REGISTERS:
 681		mutex_lock(&kvm->lock);
 682		if (kvm->created_vcpus) {
 683			r = -EBUSY;
 684		} else if (MACHINE_HAS_VX) {
 685			set_kvm_facility(kvm->arch.model.fac_mask, 129);
 686			set_kvm_facility(kvm->arch.model.fac_list, 129);
 687			if (test_facility(134)) {
 688				set_kvm_facility(kvm->arch.model.fac_mask, 134);
 689				set_kvm_facility(kvm->arch.model.fac_list, 134);
 690			}
 691			if (test_facility(135)) {
 692				set_kvm_facility(kvm->arch.model.fac_mask, 135);
 693				set_kvm_facility(kvm->arch.model.fac_list, 135);
 694			}
 695			if (test_facility(148)) {
 696				set_kvm_facility(kvm->arch.model.fac_mask, 148);
 697				set_kvm_facility(kvm->arch.model.fac_list, 148);
 698			}
 699			if (test_facility(152)) {
 700				set_kvm_facility(kvm->arch.model.fac_mask, 152);
 701				set_kvm_facility(kvm->arch.model.fac_list, 152);
 702			}
 703			r = 0;
 704		} else
 705			r = -EINVAL;
 706		mutex_unlock(&kvm->lock);
 707		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 708			 r ? "(not available)" : "(success)");
 709		break;
 710	case KVM_CAP_S390_RI:
 711		r = -EINVAL;
 712		mutex_lock(&kvm->lock);
 713		if (kvm->created_vcpus) {
 714			r = -EBUSY;
 715		} else if (test_facility(64)) {
 716			set_kvm_facility(kvm->arch.model.fac_mask, 64);
 717			set_kvm_facility(kvm->arch.model.fac_list, 64);
 718			r = 0;
 719		}
 720		mutex_unlock(&kvm->lock);
 721		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 722			 r ? "(not available)" : "(success)");
 723		break;
 724	case KVM_CAP_S390_AIS:
 725		mutex_lock(&kvm->lock);
 726		if (kvm->created_vcpus) {
 727			r = -EBUSY;
 728		} else {
 729			set_kvm_facility(kvm->arch.model.fac_mask, 72);
 730			set_kvm_facility(kvm->arch.model.fac_list, 72);
 731			r = 0;
 732		}
 733		mutex_unlock(&kvm->lock);
 734		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 735			 r ? "(not available)" : "(success)");
 736		break;
 737	case KVM_CAP_S390_GS:
 738		r = -EINVAL;
 739		mutex_lock(&kvm->lock);
 740		if (kvm->created_vcpus) {
 741			r = -EBUSY;
 742		} else if (test_facility(133)) {
 743			set_kvm_facility(kvm->arch.model.fac_mask, 133);
 744			set_kvm_facility(kvm->arch.model.fac_list, 133);
 745			r = 0;
 746		}
 747		mutex_unlock(&kvm->lock);
 748		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 749			 r ? "(not available)" : "(success)");
 750		break;
 751	case KVM_CAP_S390_HPAGE_1M:
 752		mutex_lock(&kvm->lock);
 753		if (kvm->created_vcpus)
 754			r = -EBUSY;
 755		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 756			r = -EINVAL;
 757		else {
 758			r = 0;
 759			down_write(&kvm->mm->mmap_sem);
 760			kvm->mm->context.allow_gmap_hpage_1m = 1;
 761			up_write(&kvm->mm->mmap_sem);
 762			/*
 763			 * We might have to create fake 4k page
 764			 * tables. To avoid that the hardware works on
 765			 * stale PGSTEs, we emulate these instructions.
 766			 */
 767			kvm->arch.use_skf = 0;
 768			kvm->arch.use_pfmfi = 0;
 769		}
 770		mutex_unlock(&kvm->lock);
 771		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 772			 r ? "(not available)" : "(success)");
 773		break;
 774	case KVM_CAP_S390_USER_STSI:
 775		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 776		kvm->arch.user_stsi = 1;
 777		r = 0;
 778		break;
 779	case KVM_CAP_S390_USER_INSTR0:
 780		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 781		kvm->arch.user_instr0 = 1;
 782		icpt_operexc_on_all_vcpus(kvm);
 783		r = 0;
 784		break;
 785	default:
 786		r = -EINVAL;
 787		break;
 788	}
 789	return r;
 790}
 791
 792static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 793{
 794	int ret;
 795
 796	switch (attr->attr) {
 797	case KVM_S390_VM_MEM_LIMIT_SIZE:
 798		ret = 0;
 799		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 800			 kvm->arch.mem_limit);
 801		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 802			ret = -EFAULT;
 803		break;
 804	default:
 805		ret = -ENXIO;
 806		break;
 807	}
 808	return ret;
 809}
 810
 811static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 812{
 813	int ret;
 814	unsigned int idx;
 815	switch (attr->attr) {
 816	case KVM_S390_VM_MEM_ENABLE_CMMA:
 817		ret = -ENXIO;
 818		if (!sclp.has_cmma)
 819			break;
 820
 
 821		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 822		mutex_lock(&kvm->lock);
 823		if (kvm->created_vcpus)
 824			ret = -EBUSY;
 825		else if (kvm->mm->context.allow_gmap_hpage_1m)
 826			ret = -EINVAL;
 827		else {
 828			kvm->arch.use_cmma = 1;
 829			/* Not compatible with cmma. */
 830			kvm->arch.use_pfmfi = 0;
 831			ret = 0;
 832		}
 833		mutex_unlock(&kvm->lock);
 834		break;
 835	case KVM_S390_VM_MEM_CLR_CMMA:
 836		ret = -ENXIO;
 837		if (!sclp.has_cmma)
 838			break;
 839		ret = -EINVAL;
 840		if (!kvm->arch.use_cmma)
 841			break;
 842
 843		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 844		mutex_lock(&kvm->lock);
 845		idx = srcu_read_lock(&kvm->srcu);
 846		s390_reset_cmma(kvm->arch.gmap->mm);
 847		srcu_read_unlock(&kvm->srcu, idx);
 848		mutex_unlock(&kvm->lock);
 849		ret = 0;
 850		break;
 851	case KVM_S390_VM_MEM_LIMIT_SIZE: {
 852		unsigned long new_limit;
 853
 854		if (kvm_is_ucontrol(kvm))
 855			return -EINVAL;
 856
 857		if (get_user(new_limit, (u64 __user *)attr->addr))
 858			return -EFAULT;
 859
 860		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 861		    new_limit > kvm->arch.mem_limit)
 862			return -E2BIG;
 863
 864		if (!new_limit)
 865			return -EINVAL;
 866
 867		/* gmap_create takes last usable address */
 868		if (new_limit != KVM_S390_NO_MEM_LIMIT)
 869			new_limit -= 1;
 870
 871		ret = -EBUSY;
 872		mutex_lock(&kvm->lock);
 873		if (!kvm->created_vcpus) {
 874			/* gmap_create will round the limit up */
 875			struct gmap *new = gmap_create(current->mm, new_limit);
 876
 877			if (!new) {
 878				ret = -ENOMEM;
 879			} else {
 880				gmap_remove(kvm->arch.gmap);
 881				new->private = kvm;
 882				kvm->arch.gmap = new;
 883				ret = 0;
 884			}
 885		}
 886		mutex_unlock(&kvm->lock);
 887		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 888		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 889			 (void *) kvm->arch.gmap->asce);
 890		break;
 891	}
 892	default:
 893		ret = -ENXIO;
 894		break;
 895	}
 896	return ret;
 897}
 898
 899static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 900
 901void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 902{
 903	struct kvm_vcpu *vcpu;
 904	int i;
 905
 906	kvm_s390_vcpu_block_all(kvm);
 907
 908	kvm_for_each_vcpu(i, vcpu, kvm) {
 909		kvm_s390_vcpu_crypto_setup(vcpu);
 910		/* recreate the shadow crycb by leaving the VSIE handler */
 911		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 912	}
 913
 914	kvm_s390_vcpu_unblock_all(kvm);
 915}
 916
 917static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 918{
 919	mutex_lock(&kvm->lock);
 920	switch (attr->attr) {
 921	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 922		if (!test_kvm_facility(kvm, 76)) {
 923			mutex_unlock(&kvm->lock);
 924			return -EINVAL;
 925		}
 926		get_random_bytes(
 927			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 928			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 929		kvm->arch.crypto.aes_kw = 1;
 930		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 931		break;
 932	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 933		if (!test_kvm_facility(kvm, 76)) {
 934			mutex_unlock(&kvm->lock);
 935			return -EINVAL;
 936		}
 937		get_random_bytes(
 938			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 939			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 940		kvm->arch.crypto.dea_kw = 1;
 941		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 942		break;
 943	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 944		if (!test_kvm_facility(kvm, 76)) {
 945			mutex_unlock(&kvm->lock);
 946			return -EINVAL;
 947		}
 948		kvm->arch.crypto.aes_kw = 0;
 949		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 950			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 951		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 952		break;
 953	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 954		if (!test_kvm_facility(kvm, 76)) {
 955			mutex_unlock(&kvm->lock);
 956			return -EINVAL;
 957		}
 958		kvm->arch.crypto.dea_kw = 0;
 959		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 960			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 961		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 962		break;
 963	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 964		if (!ap_instructions_available()) {
 965			mutex_unlock(&kvm->lock);
 966			return -EOPNOTSUPP;
 967		}
 968		kvm->arch.crypto.apie = 1;
 969		break;
 970	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 971		if (!ap_instructions_available()) {
 972			mutex_unlock(&kvm->lock);
 973			return -EOPNOTSUPP;
 974		}
 975		kvm->arch.crypto.apie = 0;
 976		break;
 977	default:
 978		mutex_unlock(&kvm->lock);
 979		return -ENXIO;
 980	}
 981
 982	kvm_s390_vcpu_crypto_reset_all(kvm);
 
 
 
 983	mutex_unlock(&kvm->lock);
 984	return 0;
 985}
 986
 987static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 988{
 989	int cx;
 990	struct kvm_vcpu *vcpu;
 991
 992	kvm_for_each_vcpu(cx, vcpu, kvm)
 993		kvm_s390_sync_request(req, vcpu);
 994}
 995
 996/*
 997 * Must be called with kvm->srcu held to avoid races on memslots, and with
 998 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 999 */
1000static int kvm_s390_vm_start_migration(struct kvm *kvm)
1001{
 
1002	struct kvm_memory_slot *ms;
 
1003	struct kvm_memslots *slots;
1004	unsigned long ram_pages = 0;
1005	int slotnr;
1006
1007	/* migration mode already enabled */
1008	if (kvm->arch.migration_mode)
1009		return 0;
 
1010	slots = kvm_memslots(kvm);
1011	if (!slots || !slots->used_slots)
1012		return -EINVAL;
1013
1014	if (!kvm->arch.use_cmma) {
1015		kvm->arch.migration_mode = 1;
1016		return 0;
1017	}
1018	/* mark all the pages in active slots as dirty */
1019	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1020		ms = slots->memslots + slotnr;
1021		if (!ms->dirty_bitmap)
1022			return -EINVAL;
1023		/*
1024		 * The second half of the bitmap is only used on x86,
1025		 * and would be wasted otherwise, so we put it to good
1026		 * use here to keep track of the state of the storage
1027		 * attributes.
1028		 */
1029		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1030		ram_pages += ms->npages;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1031	}
1032	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1033	kvm->arch.migration_mode = 1;
1034	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1035	return 0;
1036}
1037
1038/*
1039 * Must be called with kvm->slots_lock to avoid races with ourselves and
1040 * kvm_s390_vm_start_migration.
1041 */
1042static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1043{
 
 
1044	/* migration mode already disabled */
1045	if (!kvm->arch.migration_mode)
1046		return 0;
1047	kvm->arch.migration_mode = 0;
1048	if (kvm->arch.use_cmma)
 
 
1049		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 
 
 
 
 
1050	return 0;
1051}
1052
1053static int kvm_s390_vm_set_migration(struct kvm *kvm,
1054				     struct kvm_device_attr *attr)
1055{
1056	int res = -ENXIO;
1057
1058	mutex_lock(&kvm->slots_lock);
1059	switch (attr->attr) {
1060	case KVM_S390_VM_MIGRATION_START:
1061		res = kvm_s390_vm_start_migration(kvm);
1062		break;
1063	case KVM_S390_VM_MIGRATION_STOP:
1064		res = kvm_s390_vm_stop_migration(kvm);
1065		break;
1066	default:
1067		break;
1068	}
1069	mutex_unlock(&kvm->slots_lock);
1070
1071	return res;
1072}
1073
1074static int kvm_s390_vm_get_migration(struct kvm *kvm,
1075				     struct kvm_device_attr *attr)
1076{
1077	u64 mig = kvm->arch.migration_mode;
1078
1079	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1080		return -ENXIO;
1081
1082	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1083		return -EFAULT;
1084	return 0;
1085}
1086
1087static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1088{
1089	struct kvm_s390_vm_tod_clock gtod;
1090
1091	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1092		return -EFAULT;
1093
1094	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1095		return -EINVAL;
1096	kvm_s390_set_tod_clock(kvm, &gtod);
1097
1098	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1099		gtod.epoch_idx, gtod.tod);
1100
1101	return 0;
1102}
1103
1104static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1105{
1106	u8 gtod_high;
1107
1108	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1109					   sizeof(gtod_high)))
1110		return -EFAULT;
1111
1112	if (gtod_high != 0)
1113		return -EINVAL;
1114	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1115
1116	return 0;
1117}
1118
1119static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1120{
1121	struct kvm_s390_vm_tod_clock gtod = { 0 };
1122
1123	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1124			   sizeof(gtod.tod)))
1125		return -EFAULT;
1126
1127	kvm_s390_set_tod_clock(kvm, &gtod);
1128	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1129	return 0;
1130}
1131
1132static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1133{
1134	int ret;
1135
1136	if (attr->flags)
1137		return -EINVAL;
1138
1139	switch (attr->attr) {
1140	case KVM_S390_VM_TOD_EXT:
1141		ret = kvm_s390_set_tod_ext(kvm, attr);
1142		break;
1143	case KVM_S390_VM_TOD_HIGH:
1144		ret = kvm_s390_set_tod_high(kvm, attr);
1145		break;
1146	case KVM_S390_VM_TOD_LOW:
1147		ret = kvm_s390_set_tod_low(kvm, attr);
1148		break;
1149	default:
1150		ret = -ENXIO;
1151		break;
1152	}
1153	return ret;
1154}
1155
1156static void kvm_s390_get_tod_clock(struct kvm *kvm,
1157				   struct kvm_s390_vm_tod_clock *gtod)
1158{
1159	struct kvm_s390_tod_clock_ext htod;
1160
1161	preempt_disable();
1162
1163	get_tod_clock_ext((char *)&htod);
1164
1165	gtod->tod = htod.tod + kvm->arch.epoch;
1166	gtod->epoch_idx = 0;
1167	if (test_kvm_facility(kvm, 139)) {
1168		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1169		if (gtod->tod < htod.tod)
1170			gtod->epoch_idx += 1;
1171	}
1172
1173	preempt_enable();
1174}
1175
1176static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1177{
1178	struct kvm_s390_vm_tod_clock gtod;
1179
1180	memset(&gtod, 0, sizeof(gtod));
1181	kvm_s390_get_tod_clock(kvm, &gtod);
 
 
 
 
 
1182	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1183		return -EFAULT;
1184
1185	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1186		gtod.epoch_idx, gtod.tod);
1187	return 0;
1188}
1189
1190static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1191{
1192	u8 gtod_high = 0;
1193
1194	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1195					 sizeof(gtod_high)))
1196		return -EFAULT;
1197	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1198
1199	return 0;
1200}
1201
1202static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1203{
1204	u64 gtod;
1205
1206	gtod = kvm_s390_get_tod_clock_fast(kvm);
1207	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1208		return -EFAULT;
1209	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1210
1211	return 0;
1212}
1213
1214static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1215{
1216	int ret;
1217
1218	if (attr->flags)
1219		return -EINVAL;
1220
1221	switch (attr->attr) {
1222	case KVM_S390_VM_TOD_EXT:
1223		ret = kvm_s390_get_tod_ext(kvm, attr);
1224		break;
1225	case KVM_S390_VM_TOD_HIGH:
1226		ret = kvm_s390_get_tod_high(kvm, attr);
1227		break;
1228	case KVM_S390_VM_TOD_LOW:
1229		ret = kvm_s390_get_tod_low(kvm, attr);
1230		break;
1231	default:
1232		ret = -ENXIO;
1233		break;
1234	}
1235	return ret;
1236}
1237
1238static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1239{
1240	struct kvm_s390_vm_cpu_processor *proc;
1241	u16 lowest_ibc, unblocked_ibc;
1242	int ret = 0;
1243
1244	mutex_lock(&kvm->lock);
1245	if (kvm->created_vcpus) {
1246		ret = -EBUSY;
1247		goto out;
1248	}
1249	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1250	if (!proc) {
1251		ret = -ENOMEM;
1252		goto out;
1253	}
1254	if (!copy_from_user(proc, (void __user *)attr->addr,
1255			    sizeof(*proc))) {
1256		kvm->arch.model.cpuid = proc->cpuid;
1257		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1258		unblocked_ibc = sclp.ibc & 0xfff;
1259		if (lowest_ibc && proc->ibc) {
1260			if (proc->ibc > unblocked_ibc)
1261				kvm->arch.model.ibc = unblocked_ibc;
1262			else if (proc->ibc < lowest_ibc)
1263				kvm->arch.model.ibc = lowest_ibc;
1264			else
1265				kvm->arch.model.ibc = proc->ibc;
1266		}
1267		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1268		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1269		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1270			 kvm->arch.model.ibc,
1271			 kvm->arch.model.cpuid);
1272		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1273			 kvm->arch.model.fac_list[0],
1274			 kvm->arch.model.fac_list[1],
1275			 kvm->arch.model.fac_list[2]);
1276	} else
1277		ret = -EFAULT;
1278	kfree(proc);
1279out:
1280	mutex_unlock(&kvm->lock);
1281	return ret;
1282}
1283
1284static int kvm_s390_set_processor_feat(struct kvm *kvm,
1285				       struct kvm_device_attr *attr)
1286{
1287	struct kvm_s390_vm_cpu_feat data;
1288
1289	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1290		return -EFAULT;
1291	if (!bitmap_subset((unsigned long *) data.feat,
1292			   kvm_s390_available_cpu_feat,
1293			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1294		return -EINVAL;
1295
1296	mutex_lock(&kvm->lock);
1297	if (kvm->created_vcpus) {
1298		mutex_unlock(&kvm->lock);
1299		return -EBUSY;
1300	}
1301	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1302		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1303	mutex_unlock(&kvm->lock);
1304	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1305			 data.feat[0],
1306			 data.feat[1],
1307			 data.feat[2]);
1308	return 0;
1309}
1310
1311static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1312					  struct kvm_device_attr *attr)
1313{
1314	mutex_lock(&kvm->lock);
1315	if (kvm->created_vcpus) {
1316		mutex_unlock(&kvm->lock);
1317		return -EBUSY;
1318	}
1319
1320	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1321			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1322		mutex_unlock(&kvm->lock);
1323		return -EFAULT;
1324	}
1325	mutex_unlock(&kvm->lock);
1326
1327	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1328		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1329		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1330		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1331		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1332	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1333		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1334		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1335	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1336		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1337		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1338	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1339		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1340		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1341	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1342		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1343		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1344	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1345		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1346		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1347	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1348		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1349		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1350	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1351		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1352		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1353	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1354		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1355		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1356	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1357		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1358		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1359	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1360		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1361		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1362	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1363		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1364		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1365	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1366		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1367		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1368	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1369		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1370		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1371	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1372		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1373		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1374	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1375		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1376		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1377		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1378		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1379	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1380		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1381		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1382		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1383		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1384
1385	return 0;
1386}
1387
1388static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1389{
1390	int ret = -ENXIO;
1391
1392	switch (attr->attr) {
1393	case KVM_S390_VM_CPU_PROCESSOR:
1394		ret = kvm_s390_set_processor(kvm, attr);
1395		break;
1396	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1397		ret = kvm_s390_set_processor_feat(kvm, attr);
1398		break;
1399	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1400		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1401		break;
1402	}
1403	return ret;
1404}
1405
1406static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1407{
1408	struct kvm_s390_vm_cpu_processor *proc;
1409	int ret = 0;
1410
1411	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1412	if (!proc) {
1413		ret = -ENOMEM;
1414		goto out;
1415	}
1416	proc->cpuid = kvm->arch.model.cpuid;
1417	proc->ibc = kvm->arch.model.ibc;
1418	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1419	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1420	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1421		 kvm->arch.model.ibc,
1422		 kvm->arch.model.cpuid);
1423	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1424		 kvm->arch.model.fac_list[0],
1425		 kvm->arch.model.fac_list[1],
1426		 kvm->arch.model.fac_list[2]);
1427	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1428		ret = -EFAULT;
1429	kfree(proc);
1430out:
1431	return ret;
1432}
1433
1434static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1435{
1436	struct kvm_s390_vm_cpu_machine *mach;
1437	int ret = 0;
1438
1439	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1440	if (!mach) {
1441		ret = -ENOMEM;
1442		goto out;
1443	}
1444	get_cpu_id((struct cpuid *) &mach->cpuid);
1445	mach->ibc = sclp.ibc;
1446	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1447	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1448	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1449	       sizeof(S390_lowcore.stfle_fac_list));
1450	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1451		 kvm->arch.model.ibc,
1452		 kvm->arch.model.cpuid);
1453	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1454		 mach->fac_mask[0],
1455		 mach->fac_mask[1],
1456		 mach->fac_mask[2]);
1457	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1458		 mach->fac_list[0],
1459		 mach->fac_list[1],
1460		 mach->fac_list[2]);
1461	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1462		ret = -EFAULT;
1463	kfree(mach);
1464out:
1465	return ret;
1466}
1467
1468static int kvm_s390_get_processor_feat(struct kvm *kvm,
1469				       struct kvm_device_attr *attr)
1470{
1471	struct kvm_s390_vm_cpu_feat data;
1472
1473	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1474		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1475	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1476		return -EFAULT;
1477	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1478			 data.feat[0],
1479			 data.feat[1],
1480			 data.feat[2]);
1481	return 0;
1482}
1483
1484static int kvm_s390_get_machine_feat(struct kvm *kvm,
1485				     struct kvm_device_attr *attr)
1486{
1487	struct kvm_s390_vm_cpu_feat data;
1488
1489	bitmap_copy((unsigned long *) data.feat,
1490		    kvm_s390_available_cpu_feat,
1491		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1492	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1493		return -EFAULT;
1494	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1495			 data.feat[0],
1496			 data.feat[1],
1497			 data.feat[2]);
1498	return 0;
1499}
1500
1501static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1502					  struct kvm_device_attr *attr)
1503{
1504	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1505	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1506		return -EFAULT;
1507
1508	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1509		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1510		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1511		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1512		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1513	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1514		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1515		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1516	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1517		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1518		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1519	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1520		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1521		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1522	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1523		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1524		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1525	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1526		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1527		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1528	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1529		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1530		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1531	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1532		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1533		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1534	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1535		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1536		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1537	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1538		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1539		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1540	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1541		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1542		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1543	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1544		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1545		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1546	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1547		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1548		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1549	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1550		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1551		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1552	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1553		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1554		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1555	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1556		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1557		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1558		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1559		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1560	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1561		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1562		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1563		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1564		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1565
1566	return 0;
1567}
1568
1569static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1570					struct kvm_device_attr *attr)
1571{
1572	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1573	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1574		return -EFAULT;
1575
1576	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1577		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1578		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1579		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1580		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1581	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1582		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1583		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1584	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1585		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1586		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1587	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1588		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1589		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1590	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1591		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1592		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1593	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1594		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1595		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1596	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1597		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1598		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1599	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1600		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1601		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1602	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1603		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1604		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1605	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1606		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1607		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1608	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1609		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1610		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1611	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1612		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1613		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1614	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1615		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1616		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1617	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1618		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1619		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1620	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1621		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1622		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1623	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1624		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1625		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1626		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1627		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1628	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1629		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1630		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1631		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1632		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1633
1634	return 0;
1635}
1636
1637static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1638{
1639	int ret = -ENXIO;
1640
1641	switch (attr->attr) {
1642	case KVM_S390_VM_CPU_PROCESSOR:
1643		ret = kvm_s390_get_processor(kvm, attr);
1644		break;
1645	case KVM_S390_VM_CPU_MACHINE:
1646		ret = kvm_s390_get_machine(kvm, attr);
1647		break;
1648	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1649		ret = kvm_s390_get_processor_feat(kvm, attr);
1650		break;
1651	case KVM_S390_VM_CPU_MACHINE_FEAT:
1652		ret = kvm_s390_get_machine_feat(kvm, attr);
1653		break;
1654	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1655		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1656		break;
1657	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1658		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1659		break;
1660	}
1661	return ret;
1662}
1663
1664static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1665{
1666	int ret;
1667
1668	switch (attr->group) {
1669	case KVM_S390_VM_MEM_CTRL:
1670		ret = kvm_s390_set_mem_control(kvm, attr);
1671		break;
1672	case KVM_S390_VM_TOD:
1673		ret = kvm_s390_set_tod(kvm, attr);
1674		break;
1675	case KVM_S390_VM_CPU_MODEL:
1676		ret = kvm_s390_set_cpu_model(kvm, attr);
1677		break;
1678	case KVM_S390_VM_CRYPTO:
1679		ret = kvm_s390_vm_set_crypto(kvm, attr);
1680		break;
1681	case KVM_S390_VM_MIGRATION:
1682		ret = kvm_s390_vm_set_migration(kvm, attr);
1683		break;
1684	default:
1685		ret = -ENXIO;
1686		break;
1687	}
1688
1689	return ret;
1690}
1691
1692static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1693{
1694	int ret;
1695
1696	switch (attr->group) {
1697	case KVM_S390_VM_MEM_CTRL:
1698		ret = kvm_s390_get_mem_control(kvm, attr);
1699		break;
1700	case KVM_S390_VM_TOD:
1701		ret = kvm_s390_get_tod(kvm, attr);
1702		break;
1703	case KVM_S390_VM_CPU_MODEL:
1704		ret = kvm_s390_get_cpu_model(kvm, attr);
1705		break;
1706	case KVM_S390_VM_MIGRATION:
1707		ret = kvm_s390_vm_get_migration(kvm, attr);
1708		break;
1709	default:
1710		ret = -ENXIO;
1711		break;
1712	}
1713
1714	return ret;
1715}
1716
1717static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1718{
1719	int ret;
1720
1721	switch (attr->group) {
1722	case KVM_S390_VM_MEM_CTRL:
1723		switch (attr->attr) {
1724		case KVM_S390_VM_MEM_ENABLE_CMMA:
1725		case KVM_S390_VM_MEM_CLR_CMMA:
1726			ret = sclp.has_cmma ? 0 : -ENXIO;
1727			break;
1728		case KVM_S390_VM_MEM_LIMIT_SIZE:
1729			ret = 0;
1730			break;
1731		default:
1732			ret = -ENXIO;
1733			break;
1734		}
1735		break;
1736	case KVM_S390_VM_TOD:
1737		switch (attr->attr) {
1738		case KVM_S390_VM_TOD_LOW:
1739		case KVM_S390_VM_TOD_HIGH:
1740			ret = 0;
1741			break;
1742		default:
1743			ret = -ENXIO;
1744			break;
1745		}
1746		break;
1747	case KVM_S390_VM_CPU_MODEL:
1748		switch (attr->attr) {
1749		case KVM_S390_VM_CPU_PROCESSOR:
1750		case KVM_S390_VM_CPU_MACHINE:
1751		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1752		case KVM_S390_VM_CPU_MACHINE_FEAT:
1753		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1754		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1755			ret = 0;
1756			break;
 
 
1757		default:
1758			ret = -ENXIO;
1759			break;
1760		}
1761		break;
1762	case KVM_S390_VM_CRYPTO:
1763		switch (attr->attr) {
1764		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1765		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1766		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1767		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1768			ret = 0;
1769			break;
1770		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1771		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1772			ret = ap_instructions_available() ? 0 : -ENXIO;
1773			break;
1774		default:
1775			ret = -ENXIO;
1776			break;
1777		}
1778		break;
1779	case KVM_S390_VM_MIGRATION:
1780		ret = 0;
1781		break;
1782	default:
1783		ret = -ENXIO;
1784		break;
1785	}
1786
1787	return ret;
1788}
1789
1790static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1791{
1792	uint8_t *keys;
1793	uint64_t hva;
1794	int srcu_idx, i, r = 0;
1795
1796	if (args->flags != 0)
1797		return -EINVAL;
1798
1799	/* Is this guest using storage keys? */
1800	if (!mm_uses_skeys(current->mm))
1801		return KVM_S390_GET_SKEYS_NONE;
1802
1803	/* Enforce sane limit on memory allocation */
1804	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1805		return -EINVAL;
1806
1807	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1808	if (!keys)
1809		return -ENOMEM;
1810
1811	down_read(&current->mm->mmap_sem);
1812	srcu_idx = srcu_read_lock(&kvm->srcu);
1813	for (i = 0; i < args->count; i++) {
1814		hva = gfn_to_hva(kvm, args->start_gfn + i);
1815		if (kvm_is_error_hva(hva)) {
1816			r = -EFAULT;
1817			break;
1818		}
1819
1820		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1821		if (r)
1822			break;
1823	}
1824	srcu_read_unlock(&kvm->srcu, srcu_idx);
1825	up_read(&current->mm->mmap_sem);
1826
1827	if (!r) {
1828		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1829				 sizeof(uint8_t) * args->count);
1830		if (r)
1831			r = -EFAULT;
1832	}
1833
1834	kvfree(keys);
1835	return r;
1836}
1837
1838static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1839{
1840	uint8_t *keys;
1841	uint64_t hva;
1842	int srcu_idx, i, r = 0;
1843	bool unlocked;
1844
1845	if (args->flags != 0)
1846		return -EINVAL;
1847
1848	/* Enforce sane limit on memory allocation */
1849	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1850		return -EINVAL;
1851
1852	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1853	if (!keys)
1854		return -ENOMEM;
1855
1856	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1857			   sizeof(uint8_t) * args->count);
1858	if (r) {
1859		r = -EFAULT;
1860		goto out;
1861	}
1862
1863	/* Enable storage key handling for the guest */
1864	r = s390_enable_skey();
1865	if (r)
1866		goto out;
1867
1868	i = 0;
1869	down_read(&current->mm->mmap_sem);
1870	srcu_idx = srcu_read_lock(&kvm->srcu);
1871        while (i < args->count) {
1872		unlocked = false;
1873		hva = gfn_to_hva(kvm, args->start_gfn + i);
1874		if (kvm_is_error_hva(hva)) {
1875			r = -EFAULT;
1876			break;
1877		}
1878
1879		/* Lowest order bit is reserved */
1880		if (keys[i] & 0x01) {
1881			r = -EINVAL;
1882			break;
1883		}
1884
1885		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1886		if (r) {
1887			r = fixup_user_fault(current, current->mm, hva,
1888					     FAULT_FLAG_WRITE, &unlocked);
1889			if (r)
1890				break;
1891		}
1892		if (!r)
1893			i++;
1894	}
1895	srcu_read_unlock(&kvm->srcu, srcu_idx);
1896	up_read(&current->mm->mmap_sem);
1897out:
1898	kvfree(keys);
1899	return r;
1900}
1901
1902/*
1903 * Base address and length must be sent at the start of each block, therefore
1904 * it's cheaper to send some clean data, as long as it's less than the size of
1905 * two longs.
1906 */
1907#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1908/* for consistency */
1909#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1910
1911/*
1912 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1913 * address falls in a hole. In that case the index of one of the memslots
1914 * bordering the hole is returned.
1915 */
1916static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1917{
1918	int start = 0, end = slots->used_slots;
1919	int slot = atomic_read(&slots->lru_slot);
1920	struct kvm_memory_slot *memslots = slots->memslots;
1921
1922	if (gfn >= memslots[slot].base_gfn &&
1923	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1924		return slot;
1925
1926	while (start < end) {
1927		slot = start + (end - start) / 2;
1928
1929		if (gfn >= memslots[slot].base_gfn)
1930			end = slot;
1931		else
1932			start = slot + 1;
1933	}
1934
1935	if (gfn >= memslots[start].base_gfn &&
1936	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1937		atomic_set(&slots->lru_slot, start);
1938	}
1939
1940	return start;
1941}
1942
1943static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1944			      u8 *res, unsigned long bufsize)
1945{
1946	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1947
1948	args->count = 0;
1949	while (args->count < bufsize) {
1950		hva = gfn_to_hva(kvm, cur_gfn);
1951		/*
1952		 * We return an error if the first value was invalid, but we
1953		 * return successfully if at least one value was copied.
1954		 */
1955		if (kvm_is_error_hva(hva))
1956			return args->count ? 0 : -EFAULT;
1957		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1958			pgstev = 0;
1959		res[args->count++] = (pgstev >> 24) & 0x43;
1960		cur_gfn++;
1961	}
1962
1963	return 0;
1964}
1965
1966static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1967					      unsigned long cur_gfn)
1968{
1969	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1970	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1971	unsigned long ofs = cur_gfn - ms->base_gfn;
1972
1973	if (ms->base_gfn + ms->npages <= cur_gfn) {
1974		slotidx--;
1975		/* If we are above the highest slot, wrap around */
1976		if (slotidx < 0)
1977			slotidx = slots->used_slots - 1;
1978
1979		ms = slots->memslots + slotidx;
1980		ofs = 0;
1981	}
1982	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1983	while ((slotidx > 0) && (ofs >= ms->npages)) {
1984		slotidx--;
1985		ms = slots->memslots + slotidx;
1986		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1987	}
1988	return ms->base_gfn + ofs;
1989}
1990
1991static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1992			     u8 *res, unsigned long bufsize)
1993{
1994	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1995	struct kvm_memslots *slots = kvm_memslots(kvm);
1996	struct kvm_memory_slot *ms;
1997
1998	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1999	ms = gfn_to_memslot(kvm, cur_gfn);
2000	args->count = 0;
2001	args->start_gfn = cur_gfn;
2002	if (!ms)
2003		return 0;
2004	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2005	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2006
2007	while (args->count < bufsize) {
2008		hva = gfn_to_hva(kvm, cur_gfn);
2009		if (kvm_is_error_hva(hva))
2010			return 0;
2011		/* Decrement only if we actually flipped the bit to 0 */
2012		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2013			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2014		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2015			pgstev = 0;
2016		/* Save the value */
2017		res[args->count++] = (pgstev >> 24) & 0x43;
2018		/* If the next bit is too far away, stop. */
2019		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2020			return 0;
2021		/* If we reached the previous "next", find the next one */
2022		if (cur_gfn == next_gfn)
2023			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2024		/* Reached the end of memory or of the buffer, stop */
2025		if ((next_gfn >= mem_end) ||
2026		    (next_gfn - args->start_gfn >= bufsize))
2027			return 0;
2028		cur_gfn++;
2029		/* Reached the end of the current memslot, take the next one. */
2030		if (cur_gfn - ms->base_gfn >= ms->npages) {
2031			ms = gfn_to_memslot(kvm, cur_gfn);
2032			if (!ms)
2033				return 0;
2034		}
2035	}
2036	return 0;
2037}
2038
2039/*
2040 * This function searches for the next page with dirty CMMA attributes, and
2041 * saves the attributes in the buffer up to either the end of the buffer or
2042 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2043 * no trailing clean bytes are saved.
2044 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2045 * output buffer will indicate 0 as length.
2046 */
2047static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2048				  struct kvm_s390_cmma_log *args)
2049{
2050	unsigned long bufsize;
2051	int srcu_idx, peek, ret;
2052	u8 *values;
 
2053
2054	if (!kvm->arch.use_cmma)
 
 
 
2055		return -ENXIO;
2056	/* Invalid/unsupported flags were specified */
2057	if (args->flags & ~KVM_S390_CMMA_PEEK)
2058		return -EINVAL;
2059	/* Migration mode query, and we are not doing a migration */
2060	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2061	if (!peek && !kvm->arch.migration_mode)
2062		return -EINVAL;
2063	/* CMMA is disabled or was not used, or the buffer has length zero */
2064	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2065	if (!bufsize || !kvm->mm->context.uses_cmm) {
2066		memset(args, 0, sizeof(*args));
2067		return 0;
2068	}
2069	/* We are not peeking, and there are no dirty pages */
2070	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2071		memset(args, 0, sizeof(*args));
2072		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
2073	}
2074
2075	values = vmalloc(bufsize);
2076	if (!values)
2077		return -ENOMEM;
2078
 
 
2079	down_read(&kvm->mm->mmap_sem);
2080	srcu_idx = srcu_read_lock(&kvm->srcu);
2081	if (peek)
2082		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2083	else
2084		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2085	srcu_read_unlock(&kvm->srcu, srcu_idx);
2086	up_read(&kvm->mm->mmap_sem);
 
 
2087
2088	if (kvm->arch.migration_mode)
2089		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2090	else
2091		args->remaining = 0;
2092
2093	if (copy_to_user((void __user *)args->values, values, args->count))
2094		ret = -EFAULT;
2095
2096	vfree(values);
2097	return ret;
2098}
2099
2100/*
2101 * This function sets the CMMA attributes for the given pages. If the input
2102 * buffer has zero length, no action is taken, otherwise the attributes are
2103 * set and the mm->context.uses_cmm flag is set.
2104 */
2105static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2106				  const struct kvm_s390_cmma_log *args)
2107{
2108	unsigned long hva, mask, pgstev, i;
2109	uint8_t *bits;
2110	int srcu_idx, r = 0;
2111
2112	mask = args->mask;
2113
2114	if (!kvm->arch.use_cmma)
2115		return -ENXIO;
2116	/* invalid/unsupported flags */
2117	if (args->flags != 0)
2118		return -EINVAL;
2119	/* Enforce sane limit on memory allocation */
2120	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2121		return -EINVAL;
2122	/* Nothing to do */
2123	if (args->count == 0)
2124		return 0;
2125
2126	bits = vmalloc(array_size(sizeof(*bits), args->count));
2127	if (!bits)
2128		return -ENOMEM;
2129
2130	r = copy_from_user(bits, (void __user *)args->values, args->count);
2131	if (r) {
2132		r = -EFAULT;
2133		goto out;
2134	}
2135
2136	down_read(&kvm->mm->mmap_sem);
2137	srcu_idx = srcu_read_lock(&kvm->srcu);
2138	for (i = 0; i < args->count; i++) {
2139		hva = gfn_to_hva(kvm, args->start_gfn + i);
2140		if (kvm_is_error_hva(hva)) {
2141			r = -EFAULT;
2142			break;
2143		}
2144
2145		pgstev = bits[i];
2146		pgstev = pgstev << 24;
2147		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2148		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2149	}
2150	srcu_read_unlock(&kvm->srcu, srcu_idx);
2151	up_read(&kvm->mm->mmap_sem);
2152
2153	if (!kvm->mm->context.uses_cmm) {
2154		down_write(&kvm->mm->mmap_sem);
2155		kvm->mm->context.uses_cmm = 1;
2156		up_write(&kvm->mm->mmap_sem);
2157	}
2158out:
2159	vfree(bits);
2160	return r;
2161}
2162
2163long kvm_arch_vm_ioctl(struct file *filp,
2164		       unsigned int ioctl, unsigned long arg)
2165{
2166	struct kvm *kvm = filp->private_data;
2167	void __user *argp = (void __user *)arg;
2168	struct kvm_device_attr attr;
2169	int r;
2170
2171	switch (ioctl) {
2172	case KVM_S390_INTERRUPT: {
2173		struct kvm_s390_interrupt s390int;
2174
2175		r = -EFAULT;
2176		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2177			break;
2178		r = kvm_s390_inject_vm(kvm, &s390int);
2179		break;
2180	}
 
 
 
 
 
 
 
 
2181	case KVM_CREATE_IRQCHIP: {
2182		struct kvm_irq_routing_entry routing;
2183
2184		r = -EINVAL;
2185		if (kvm->arch.use_irqchip) {
2186			/* Set up dummy routing. */
2187			memset(&routing, 0, sizeof(routing));
2188			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2189		}
2190		break;
2191	}
2192	case KVM_SET_DEVICE_ATTR: {
2193		r = -EFAULT;
2194		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2195			break;
2196		r = kvm_s390_vm_set_attr(kvm, &attr);
2197		break;
2198	}
2199	case KVM_GET_DEVICE_ATTR: {
2200		r = -EFAULT;
2201		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2202			break;
2203		r = kvm_s390_vm_get_attr(kvm, &attr);
2204		break;
2205	}
2206	case KVM_HAS_DEVICE_ATTR: {
2207		r = -EFAULT;
2208		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2209			break;
2210		r = kvm_s390_vm_has_attr(kvm, &attr);
2211		break;
2212	}
2213	case KVM_S390_GET_SKEYS: {
2214		struct kvm_s390_skeys args;
2215
2216		r = -EFAULT;
2217		if (copy_from_user(&args, argp,
2218				   sizeof(struct kvm_s390_skeys)))
2219			break;
2220		r = kvm_s390_get_skeys(kvm, &args);
2221		break;
2222	}
2223	case KVM_S390_SET_SKEYS: {
2224		struct kvm_s390_skeys args;
2225
2226		r = -EFAULT;
2227		if (copy_from_user(&args, argp,
2228				   sizeof(struct kvm_s390_skeys)))
2229			break;
2230		r = kvm_s390_set_skeys(kvm, &args);
2231		break;
2232	}
2233	case KVM_S390_GET_CMMA_BITS: {
2234		struct kvm_s390_cmma_log args;
2235
2236		r = -EFAULT;
2237		if (copy_from_user(&args, argp, sizeof(args)))
2238			break;
2239		mutex_lock(&kvm->slots_lock);
2240		r = kvm_s390_get_cmma_bits(kvm, &args);
2241		mutex_unlock(&kvm->slots_lock);
2242		if (!r) {
2243			r = copy_to_user(argp, &args, sizeof(args));
2244			if (r)
2245				r = -EFAULT;
2246		}
2247		break;
2248	}
2249	case KVM_S390_SET_CMMA_BITS: {
2250		struct kvm_s390_cmma_log args;
2251
2252		r = -EFAULT;
2253		if (copy_from_user(&args, argp, sizeof(args)))
2254			break;
2255		mutex_lock(&kvm->slots_lock);
2256		r = kvm_s390_set_cmma_bits(kvm, &args);
2257		mutex_unlock(&kvm->slots_lock);
2258		break;
2259	}
2260	default:
2261		r = -ENOTTY;
2262	}
2263
2264	return r;
2265}
2266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2267static int kvm_s390_apxa_installed(void)
2268{
2269	struct ap_config_info info;
 
2270
2271	if (ap_instructions_available()) {
2272		if (ap_qci(&info) == 0)
2273			return info.apxa;
 
 
 
 
2274	}
2275
2276	return 0;
2277}
2278
2279/*
2280 * The format of the crypto control block (CRYCB) is specified in the 3 low
2281 * order bits of the CRYCB designation (CRYCBD) field as follows:
2282 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2283 *	     AP extended addressing (APXA) facility are installed.
2284 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2285 * Format 2: Both the APXA and MSAX3 facilities are installed
2286 */
2287static void kvm_s390_set_crycb_format(struct kvm *kvm)
2288{
2289	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2290
2291	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2292	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2293
2294	/* Check whether MSAX3 is installed */
2295	if (!test_kvm_facility(kvm, 76))
2296		return;
2297
2298	if (kvm_s390_apxa_installed())
2299		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2300	else
2301		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2302}
2303
2304void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2305			       unsigned long *aqm, unsigned long *adm)
2306{
2307	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2308
2309	mutex_lock(&kvm->lock);
2310	kvm_s390_vcpu_block_all(kvm);
2311
2312	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2313	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2314		memcpy(crycb->apcb1.apm, apm, 32);
2315		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2316			 apm[0], apm[1], apm[2], apm[3]);
2317		memcpy(crycb->apcb1.aqm, aqm, 32);
2318		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2319			 aqm[0], aqm[1], aqm[2], aqm[3]);
2320		memcpy(crycb->apcb1.adm, adm, 32);
2321		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2322			 adm[0], adm[1], adm[2], adm[3]);
2323		break;
2324	case CRYCB_FORMAT1:
2325	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2326		memcpy(crycb->apcb0.apm, apm, 8);
2327		memcpy(crycb->apcb0.aqm, aqm, 2);
2328		memcpy(crycb->apcb0.adm, adm, 2);
2329		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2330			 apm[0], *((unsigned short *)aqm),
2331			 *((unsigned short *)adm));
2332		break;
2333	default:	/* Can not happen */
2334		break;
2335	}
2336
2337	/* recreate the shadow crycb for each vcpu */
2338	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2339	kvm_s390_vcpu_unblock_all(kvm);
2340	mutex_unlock(&kvm->lock);
2341}
2342EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2343
2344void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2345{
2346	mutex_lock(&kvm->lock);
2347	kvm_s390_vcpu_block_all(kvm);
2348
2349	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2350	       sizeof(kvm->arch.crypto.crycb->apcb0));
2351	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2352	       sizeof(kvm->arch.crypto.crycb->apcb1));
2353
2354	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2355	/* recreate the shadow crycb for each vcpu */
2356	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2357	kvm_s390_vcpu_unblock_all(kvm);
2358	mutex_unlock(&kvm->lock);
2359}
2360EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2361
2362static u64 kvm_s390_get_initial_cpuid(void)
2363{
2364	struct cpuid cpuid;
2365
2366	get_cpu_id(&cpuid);
2367	cpuid.version = 0xff;
2368	return *((u64 *) &cpuid);
2369}
2370
2371static void kvm_s390_crypto_init(struct kvm *kvm)
2372{
 
 
 
2373	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2374	kvm_s390_set_crycb_format(kvm);
2375
2376	if (!test_kvm_facility(kvm, 76))
2377		return;
2378
2379	/* Enable AES/DEA protected key functions by default */
2380	kvm->arch.crypto.aes_kw = 1;
2381	kvm->arch.crypto.dea_kw = 1;
2382	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2383			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2384	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2385			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2386}
2387
2388static void sca_dispose(struct kvm *kvm)
2389{
2390	if (kvm->arch.use_esca)
2391		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2392	else
2393		free_page((unsigned long)(kvm->arch.sca));
2394	kvm->arch.sca = NULL;
2395}
2396
2397int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2398{
2399	gfp_t alloc_flags = GFP_KERNEL;
2400	int i, rc;
2401	char debug_name[16];
2402	static unsigned long sca_offset;
2403
2404	rc = -EINVAL;
2405#ifdef CONFIG_KVM_S390_UCONTROL
2406	if (type & ~KVM_VM_S390_UCONTROL)
2407		goto out_err;
2408	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2409		goto out_err;
2410#else
2411	if (type)
2412		goto out_err;
2413#endif
2414
2415	rc = s390_enable_sie();
2416	if (rc)
2417		goto out_err;
2418
2419	rc = -ENOMEM;
2420
 
2421	if (!sclp.has_64bscao)
2422		alloc_flags |= GFP_DMA;
2423	rwlock_init(&kvm->arch.sca_lock);
2424	/* start with basic SCA */
2425	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2426	if (!kvm->arch.sca)
2427		goto out_err;
2428	mutex_lock(&kvm_lock);
2429	sca_offset += 16;
2430	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2431		sca_offset = 0;
2432	kvm->arch.sca = (struct bsca_block *)
2433			((char *) kvm->arch.sca + sca_offset);
2434	mutex_unlock(&kvm_lock);
2435
2436	sprintf(debug_name, "kvm-%u", current->pid);
2437
2438	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2439	if (!kvm->arch.dbf)
2440		goto out_err;
2441
2442	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2443	kvm->arch.sie_page2 =
2444	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2445	if (!kvm->arch.sie_page2)
2446		goto out_err;
2447
2448	kvm->arch.sie_page2->kvm = kvm;
2449	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2450
2451	for (i = 0; i < kvm_s390_fac_size(); i++) {
2452		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2453					      (kvm_s390_fac_base[i] |
2454					       kvm_s390_fac_ext[i]);
2455		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2456					      kvm_s390_fac_base[i];
2457	}
2458	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2459
2460	/* we are always in czam mode - even on pre z14 machines */
2461	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2462	set_kvm_facility(kvm->arch.model.fac_list, 138);
2463	/* we emulate STHYI in kvm */
2464	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2465	set_kvm_facility(kvm->arch.model.fac_list, 74);
2466	if (MACHINE_HAS_TLB_GUEST) {
2467		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2468		set_kvm_facility(kvm->arch.model.fac_list, 147);
2469	}
2470
2471	if (css_general_characteristics.aiv && test_facility(65))
2472		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2473
2474	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2475	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2476
2477	kvm_s390_crypto_init(kvm);
2478
2479	mutex_init(&kvm->arch.float_int.ais_lock);
 
 
2480	spin_lock_init(&kvm->arch.float_int.lock);
2481	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2482		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2483	init_waitqueue_head(&kvm->arch.ipte_wq);
2484	mutex_init(&kvm->arch.ipte_mutex);
2485
2486	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2487	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2488
2489	if (type & KVM_VM_S390_UCONTROL) {
2490		kvm->arch.gmap = NULL;
2491		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2492	} else {
2493		if (sclp.hamax == U64_MAX)
2494			kvm->arch.mem_limit = TASK_SIZE_MAX;
2495		else
2496			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2497						    sclp.hamax + 1);
2498		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2499		if (!kvm->arch.gmap)
2500			goto out_err;
2501		kvm->arch.gmap->private = kvm;
2502		kvm->arch.gmap->pfault_enabled = 0;
2503	}
2504
 
 
2505	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2506	kvm->arch.use_skf = sclp.has_skey;
 
2507	spin_lock_init(&kvm->arch.start_stop_lock);
2508	kvm_s390_vsie_init(kvm);
2509	kvm_s390_gisa_init(kvm);
2510	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2511
2512	return 0;
2513out_err:
2514	free_page((unsigned long)kvm->arch.sie_page2);
2515	debug_unregister(kvm->arch.dbf);
2516	sca_dispose(kvm);
2517	KVM_EVENT(3, "creation of vm failed: %d", rc);
2518	return rc;
2519}
2520
 
 
 
 
 
 
 
 
 
 
2521void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2522{
2523	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2524	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2525	kvm_s390_clear_local_irqs(vcpu);
2526	kvm_clear_async_pf_completion_queue(vcpu);
2527	if (!kvm_is_ucontrol(vcpu->kvm))
2528		sca_del_vcpu(vcpu);
2529
2530	if (kvm_is_ucontrol(vcpu->kvm))
2531		gmap_remove(vcpu->arch.gmap);
2532
2533	if (vcpu->kvm->arch.use_cmma)
2534		kvm_s390_vcpu_unsetup_cmma(vcpu);
2535	free_page((unsigned long)(vcpu->arch.sie_block));
2536
2537	kvm_vcpu_uninit(vcpu);
2538	kmem_cache_free(kvm_vcpu_cache, vcpu);
2539}
2540
2541static void kvm_free_vcpus(struct kvm *kvm)
2542{
2543	unsigned int i;
2544	struct kvm_vcpu *vcpu;
2545
2546	kvm_for_each_vcpu(i, vcpu, kvm)
2547		kvm_arch_vcpu_destroy(vcpu);
2548
2549	mutex_lock(&kvm->lock);
2550	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2551		kvm->vcpus[i] = NULL;
2552
2553	atomic_set(&kvm->online_vcpus, 0);
2554	mutex_unlock(&kvm->lock);
2555}
2556
2557void kvm_arch_destroy_vm(struct kvm *kvm)
2558{
2559	kvm_free_vcpus(kvm);
2560	sca_dispose(kvm);
2561	debug_unregister(kvm->arch.dbf);
2562	kvm_s390_gisa_destroy(kvm);
2563	free_page((unsigned long)kvm->arch.sie_page2);
2564	if (!kvm_is_ucontrol(kvm))
2565		gmap_remove(kvm->arch.gmap);
2566	kvm_s390_destroy_adapters(kvm);
2567	kvm_s390_clear_float_irqs(kvm);
2568	kvm_s390_vsie_destroy(kvm);
 
 
 
 
2569	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2570}
2571
2572/* Section: vcpu related */
2573static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2574{
2575	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2576	if (!vcpu->arch.gmap)
2577		return -ENOMEM;
2578	vcpu->arch.gmap->private = vcpu->kvm;
2579
2580	return 0;
2581}
2582
2583static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2584{
2585	if (!kvm_s390_use_sca_entries())
2586		return;
2587	read_lock(&vcpu->kvm->arch.sca_lock);
2588	if (vcpu->kvm->arch.use_esca) {
2589		struct esca_block *sca = vcpu->kvm->arch.sca;
2590
2591		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2592		sca->cpu[vcpu->vcpu_id].sda = 0;
2593	} else {
2594		struct bsca_block *sca = vcpu->kvm->arch.sca;
2595
2596		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2597		sca->cpu[vcpu->vcpu_id].sda = 0;
2598	}
2599	read_unlock(&vcpu->kvm->arch.sca_lock);
2600}
2601
2602static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2603{
2604	if (!kvm_s390_use_sca_entries()) {
2605		struct bsca_block *sca = vcpu->kvm->arch.sca;
2606
2607		/* we still need the basic sca for the ipte control */
2608		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2609		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2610		return;
2611	}
2612	read_lock(&vcpu->kvm->arch.sca_lock);
2613	if (vcpu->kvm->arch.use_esca) {
2614		struct esca_block *sca = vcpu->kvm->arch.sca;
2615
2616		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2617		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2618		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2619		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2620		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2621	} else {
2622		struct bsca_block *sca = vcpu->kvm->arch.sca;
2623
2624		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2625		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2626		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2627		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2628	}
2629	read_unlock(&vcpu->kvm->arch.sca_lock);
2630}
2631
2632/* Basic SCA to Extended SCA data copy routines */
2633static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2634{
2635	d->sda = s->sda;
2636	d->sigp_ctrl.c = s->sigp_ctrl.c;
2637	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2638}
2639
2640static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2641{
2642	int i;
2643
2644	d->ipte_control = s->ipte_control;
2645	d->mcn[0] = s->mcn;
2646	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2647		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2648}
2649
2650static int sca_switch_to_extended(struct kvm *kvm)
2651{
2652	struct bsca_block *old_sca = kvm->arch.sca;
2653	struct esca_block *new_sca;
2654	struct kvm_vcpu *vcpu;
2655	unsigned int vcpu_idx;
2656	u32 scaol, scaoh;
2657
2658	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2659	if (!new_sca)
2660		return -ENOMEM;
2661
2662	scaoh = (u32)((u64)(new_sca) >> 32);
2663	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2664
2665	kvm_s390_vcpu_block_all(kvm);
2666	write_lock(&kvm->arch.sca_lock);
2667
2668	sca_copy_b_to_e(new_sca, old_sca);
2669
2670	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2671		vcpu->arch.sie_block->scaoh = scaoh;
2672		vcpu->arch.sie_block->scaol = scaol;
2673		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2674	}
2675	kvm->arch.sca = new_sca;
2676	kvm->arch.use_esca = 1;
2677
2678	write_unlock(&kvm->arch.sca_lock);
2679	kvm_s390_vcpu_unblock_all(kvm);
2680
2681	free_page((unsigned long)old_sca);
2682
2683	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2684		 old_sca, kvm->arch.sca);
2685	return 0;
2686}
2687
2688static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2689{
2690	int rc;
2691
2692	if (!kvm_s390_use_sca_entries()) {
2693		if (id < KVM_MAX_VCPUS)
2694			return true;
2695		return false;
2696	}
2697	if (id < KVM_S390_BSCA_CPU_SLOTS)
2698		return true;
2699	if (!sclp.has_esca || !sclp.has_64bscao)
2700		return false;
2701
2702	mutex_lock(&kvm->lock);
2703	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2704	mutex_unlock(&kvm->lock);
2705
2706	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2707}
2708
2709int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2710{
2711	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2712	kvm_clear_async_pf_completion_queue(vcpu);
2713	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2714				    KVM_SYNC_GPRS |
2715				    KVM_SYNC_ACRS |
2716				    KVM_SYNC_CRS |
2717				    KVM_SYNC_ARCH0 |
2718				    KVM_SYNC_PFAULT;
2719	kvm_s390_set_prefix(vcpu, 0);
2720	if (test_kvm_facility(vcpu->kvm, 64))
2721		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2722	if (test_kvm_facility(vcpu->kvm, 82))
2723		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2724	if (test_kvm_facility(vcpu->kvm, 133))
2725		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2726	if (test_kvm_facility(vcpu->kvm, 156))
2727		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2728	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2729	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2730	 */
2731	if (MACHINE_HAS_VX)
2732		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2733	else
2734		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2735
2736	if (kvm_is_ucontrol(vcpu->kvm))
2737		return __kvm_ucontrol_vcpu_init(vcpu);
2738
2739	return 0;
2740}
2741
2742/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2743static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2744{
2745	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2746	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2747	vcpu->arch.cputm_start = get_tod_clock_fast();
2748	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2749}
2750
2751/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2752static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2753{
2754	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2755	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2756	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2757	vcpu->arch.cputm_start = 0;
2758	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2759}
2760
2761/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2762static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2763{
2764	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2765	vcpu->arch.cputm_enabled = true;
2766	__start_cpu_timer_accounting(vcpu);
2767}
2768
2769/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2770static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2771{
2772	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2773	__stop_cpu_timer_accounting(vcpu);
2774	vcpu->arch.cputm_enabled = false;
2775}
2776
2777static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2778{
2779	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2780	__enable_cpu_timer_accounting(vcpu);
2781	preempt_enable();
2782}
2783
2784static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2785{
2786	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2787	__disable_cpu_timer_accounting(vcpu);
2788	preempt_enable();
2789}
2790
2791/* set the cpu timer - may only be called from the VCPU thread itself */
2792void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2793{
2794	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2795	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2796	if (vcpu->arch.cputm_enabled)
2797		vcpu->arch.cputm_start = get_tod_clock_fast();
2798	vcpu->arch.sie_block->cputm = cputm;
2799	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2800	preempt_enable();
2801}
2802
2803/* update and get the cpu timer - can also be called from other VCPU threads */
2804__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2805{
2806	unsigned int seq;
2807	__u64 value;
2808
2809	if (unlikely(!vcpu->arch.cputm_enabled))
2810		return vcpu->arch.sie_block->cputm;
2811
2812	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2813	do {
2814		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2815		/*
2816		 * If the writer would ever execute a read in the critical
2817		 * section, e.g. in irq context, we have a deadlock.
2818		 */
2819		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2820		value = vcpu->arch.sie_block->cputm;
2821		/* if cputm_start is 0, accounting is being started/stopped */
2822		if (likely(vcpu->arch.cputm_start))
2823			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2824	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2825	preempt_enable();
2826	return value;
2827}
2828
2829void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2830{
2831
2832	gmap_enable(vcpu->arch.enabled_gmap);
2833	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2834	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2835		__start_cpu_timer_accounting(vcpu);
2836	vcpu->cpu = cpu;
2837}
2838
2839void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2840{
2841	vcpu->cpu = -1;
2842	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2843		__stop_cpu_timer_accounting(vcpu);
2844	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2845	vcpu->arch.enabled_gmap = gmap_get_enabled();
2846	gmap_disable(vcpu->arch.enabled_gmap);
2847
2848}
2849
2850static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2851{
2852	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2853	vcpu->arch.sie_block->gpsw.mask = 0UL;
2854	vcpu->arch.sie_block->gpsw.addr = 0UL;
2855	kvm_s390_set_prefix(vcpu, 0);
2856	kvm_s390_set_cpu_timer(vcpu, 0);
2857	vcpu->arch.sie_block->ckc       = 0UL;
2858	vcpu->arch.sie_block->todpr     = 0;
2859	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2860	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2861					CR0_INTERRUPT_KEY_SUBMASK |
2862					CR0_MEASUREMENT_ALERT_SUBMASK;
2863	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2864					CR14_UNUSED_33 |
2865					CR14_EXTERNAL_DAMAGE_SUBMASK;
2866	/* make sure the new fpc will be lazily loaded */
2867	save_fpu_regs();
2868	current->thread.fpu.fpc = 0;
2869	vcpu->arch.sie_block->gbea = 1;
2870	vcpu->arch.sie_block->pp = 0;
2871	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2872	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2873	kvm_clear_async_pf_completion_queue(vcpu);
2874	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2875		kvm_s390_vcpu_stop(vcpu);
2876	kvm_s390_clear_local_irqs(vcpu);
2877}
2878
2879void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2880{
2881	mutex_lock(&vcpu->kvm->lock);
2882	preempt_disable();
2883	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2884	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2885	preempt_enable();
2886	mutex_unlock(&vcpu->kvm->lock);
2887	if (!kvm_is_ucontrol(vcpu->kvm)) {
2888		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2889		sca_add_vcpu(vcpu);
2890	}
2891	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2892		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2893	/* make vcpu_load load the right gmap on the first trigger */
2894	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2895}
2896
2897static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2898{
2899	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2900	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2901		return true;
2902	return false;
2903}
2904
2905static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2906{
2907	/* At least one ECC subfunction must be present */
2908	return kvm_has_pckmo_subfunc(kvm, 32) ||
2909	       kvm_has_pckmo_subfunc(kvm, 33) ||
2910	       kvm_has_pckmo_subfunc(kvm, 34) ||
2911	       kvm_has_pckmo_subfunc(kvm, 40) ||
2912	       kvm_has_pckmo_subfunc(kvm, 41);
2913
2914}
2915
2916static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2917{
2918	/*
2919	 * If the AP instructions are not being interpreted and the MSAX3
2920	 * facility is not configured for the guest, there is nothing to set up.
2921	 */
2922	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2923		return;
2924
2925	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2926	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2927	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2928	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2929
2930	if (vcpu->kvm->arch.crypto.apie)
2931		vcpu->arch.sie_block->eca |= ECA_APIE;
2932
2933	/* Set up protected key support */
2934	if (vcpu->kvm->arch.crypto.aes_kw) {
2935		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2936		/* ecc is also wrapped with AES key */
2937		if (kvm_has_pckmo_ecc(vcpu->kvm))
2938			vcpu->arch.sie_block->ecd |= ECD_ECC;
2939	}
2940
2941	if (vcpu->kvm->arch.crypto.dea_kw)
2942		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
 
 
2943}
2944
2945void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2946{
2947	free_page(vcpu->arch.sie_block->cbrlo);
2948	vcpu->arch.sie_block->cbrlo = 0;
2949}
2950
2951int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2952{
2953	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2954	if (!vcpu->arch.sie_block->cbrlo)
2955		return -ENOMEM;
2956	return 0;
2957}
2958
2959static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2960{
2961	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2962
2963	vcpu->arch.sie_block->ibc = model->ibc;
2964	if (test_kvm_facility(vcpu->kvm, 7))
2965		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2966}
2967
2968int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2969{
2970	int rc = 0;
2971
2972	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2973						    CPUSTAT_SM |
2974						    CPUSTAT_STOPPED);
2975
2976	if (test_kvm_facility(vcpu->kvm, 78))
2977		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2978	else if (test_kvm_facility(vcpu->kvm, 8))
2979		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2980
2981	kvm_s390_vcpu_setup_model(vcpu);
2982
2983	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2984	if (MACHINE_HAS_ESOP)
2985		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2986	if (test_kvm_facility(vcpu->kvm, 9))
2987		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2988	if (test_kvm_facility(vcpu->kvm, 73))
2989		vcpu->arch.sie_block->ecb |= ECB_TE;
2990
2991	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2992		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2993	if (test_kvm_facility(vcpu->kvm, 130))
2994		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2995	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2996	if (sclp.has_cei)
2997		vcpu->arch.sie_block->eca |= ECA_CEI;
2998	if (sclp.has_ib)
2999		vcpu->arch.sie_block->eca |= ECA_IB;
3000	if (sclp.has_siif)
3001		vcpu->arch.sie_block->eca |= ECA_SII;
3002	if (sclp.has_sigpif)
3003		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3004	if (test_kvm_facility(vcpu->kvm, 129)) {
3005		vcpu->arch.sie_block->eca |= ECA_VX;
3006		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3007	}
3008	if (test_kvm_facility(vcpu->kvm, 139))
3009		vcpu->arch.sie_block->ecd |= ECD_MEF;
3010	if (test_kvm_facility(vcpu->kvm, 156))
3011		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3012	if (vcpu->arch.sie_block->gd) {
3013		vcpu->arch.sie_block->eca |= ECA_AIV;
3014		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3015			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3016	}
3017	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3018					| SDNXC;
3019	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3020
3021	if (sclp.has_kss)
3022		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3023	else
3024		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3025
3026	if (vcpu->kvm->arch.use_cmma) {
3027		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3028		if (rc)
3029			return rc;
3030	}
3031	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3032	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3033
3034	vcpu->arch.sie_block->hpid = HPID_KVM;
3035
3036	kvm_s390_vcpu_crypto_setup(vcpu);
3037
3038	return rc;
3039}
3040
3041struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3042				      unsigned int id)
3043{
3044	struct kvm_vcpu *vcpu;
3045	struct sie_page *sie_page;
3046	int rc = -EINVAL;
3047
3048	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3049		goto out;
3050
3051	rc = -ENOMEM;
3052
3053	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3054	if (!vcpu)
3055		goto out;
3056
3057	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3058	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3059	if (!sie_page)
3060		goto out_free_cpu;
3061
3062	vcpu->arch.sie_block = &sie_page->sie_block;
3063	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3064
3065	/* the real guest size will always be smaller than msl */
3066	vcpu->arch.sie_block->mso = 0;
3067	vcpu->arch.sie_block->msl = sclp.hamax;
3068
3069	vcpu->arch.sie_block->icpua = id;
3070	spin_lock_init(&vcpu->arch.local_int.lock);
3071	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3072	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3073		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3074	seqcount_init(&vcpu->arch.cputm_seqcount);
3075
3076	rc = kvm_vcpu_init(vcpu, kvm, id);
3077	if (rc)
3078		goto out_free_sie_block;
3079	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3080		 vcpu->arch.sie_block);
3081	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3082
3083	return vcpu;
3084out_free_sie_block:
3085	free_page((unsigned long)(vcpu->arch.sie_block));
3086out_free_cpu:
3087	kmem_cache_free(kvm_vcpu_cache, vcpu);
3088out:
3089	return ERR_PTR(rc);
3090}
3091
3092int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3093{
3094	return kvm_s390_vcpu_has_irq(vcpu, 0);
3095}
3096
3097bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3098{
3099	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3100}
3101
3102void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3103{
3104	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3105	exit_sie(vcpu);
3106}
3107
3108void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3109{
3110	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3111}
3112
3113static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3114{
3115	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3116	exit_sie(vcpu);
3117}
3118
3119bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3120{
3121	return atomic_read(&vcpu->arch.sie_block->prog20) &
3122	       (PROG_BLOCK_SIE | PROG_REQUEST);
3123}
3124
3125static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3126{
3127	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3128}
3129
3130/*
3131 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3132 * If the CPU is not running (e.g. waiting as idle) the function will
3133 * return immediately. */
3134void exit_sie(struct kvm_vcpu *vcpu)
3135{
3136	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3137	kvm_s390_vsie_kick(vcpu);
3138	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3139		cpu_relax();
3140}
3141
3142/* Kick a guest cpu out of SIE to process a request synchronously */
3143void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3144{
3145	kvm_make_request(req, vcpu);
3146	kvm_s390_vcpu_request(vcpu);
3147}
3148
3149static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3150			      unsigned long end)
3151{
3152	struct kvm *kvm = gmap->private;
3153	struct kvm_vcpu *vcpu;
3154	unsigned long prefix;
3155	int i;
3156
3157	if (gmap_is_shadow(gmap))
3158		return;
3159	if (start >= 1UL << 31)
3160		/* We are only interested in prefix pages */
3161		return;
3162	kvm_for_each_vcpu(i, vcpu, kvm) {
3163		/* match against both prefix pages */
3164		prefix = kvm_s390_get_prefix(vcpu);
3165		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3166			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3167				   start, end);
3168			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3169		}
3170	}
3171}
3172
3173bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3174{
3175	/* do not poll with more than halt_poll_max_steal percent of steal time */
3176	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3177	    halt_poll_max_steal) {
3178		vcpu->stat.halt_no_poll_steal++;
3179		return true;
3180	}
3181	return false;
3182}
3183
3184int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3185{
3186	/* kvm common code refers to this, but never calls it */
3187	BUG();
3188	return 0;
3189}
3190
3191static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3192					   struct kvm_one_reg *reg)
3193{
3194	int r = -EINVAL;
3195
3196	switch (reg->id) {
3197	case KVM_REG_S390_TODPR:
3198		r = put_user(vcpu->arch.sie_block->todpr,
3199			     (u32 __user *)reg->addr);
3200		break;
3201	case KVM_REG_S390_EPOCHDIFF:
3202		r = put_user(vcpu->arch.sie_block->epoch,
3203			     (u64 __user *)reg->addr);
3204		break;
3205	case KVM_REG_S390_CPU_TIMER:
3206		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3207			     (u64 __user *)reg->addr);
3208		break;
3209	case KVM_REG_S390_CLOCK_COMP:
3210		r = put_user(vcpu->arch.sie_block->ckc,
3211			     (u64 __user *)reg->addr);
3212		break;
3213	case KVM_REG_S390_PFTOKEN:
3214		r = put_user(vcpu->arch.pfault_token,
3215			     (u64 __user *)reg->addr);
3216		break;
3217	case KVM_REG_S390_PFCOMPARE:
3218		r = put_user(vcpu->arch.pfault_compare,
3219			     (u64 __user *)reg->addr);
3220		break;
3221	case KVM_REG_S390_PFSELECT:
3222		r = put_user(vcpu->arch.pfault_select,
3223			     (u64 __user *)reg->addr);
3224		break;
3225	case KVM_REG_S390_PP:
3226		r = put_user(vcpu->arch.sie_block->pp,
3227			     (u64 __user *)reg->addr);
3228		break;
3229	case KVM_REG_S390_GBEA:
3230		r = put_user(vcpu->arch.sie_block->gbea,
3231			     (u64 __user *)reg->addr);
3232		break;
3233	default:
3234		break;
3235	}
3236
3237	return r;
3238}
3239
3240static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3241					   struct kvm_one_reg *reg)
3242{
3243	int r = -EINVAL;
3244	__u64 val;
3245
3246	switch (reg->id) {
3247	case KVM_REG_S390_TODPR:
3248		r = get_user(vcpu->arch.sie_block->todpr,
3249			     (u32 __user *)reg->addr);
3250		break;
3251	case KVM_REG_S390_EPOCHDIFF:
3252		r = get_user(vcpu->arch.sie_block->epoch,
3253			     (u64 __user *)reg->addr);
3254		break;
3255	case KVM_REG_S390_CPU_TIMER:
3256		r = get_user(val, (u64 __user *)reg->addr);
3257		if (!r)
3258			kvm_s390_set_cpu_timer(vcpu, val);
3259		break;
3260	case KVM_REG_S390_CLOCK_COMP:
3261		r = get_user(vcpu->arch.sie_block->ckc,
3262			     (u64 __user *)reg->addr);
3263		break;
3264	case KVM_REG_S390_PFTOKEN:
3265		r = get_user(vcpu->arch.pfault_token,
3266			     (u64 __user *)reg->addr);
3267		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3268			kvm_clear_async_pf_completion_queue(vcpu);
3269		break;
3270	case KVM_REG_S390_PFCOMPARE:
3271		r = get_user(vcpu->arch.pfault_compare,
3272			     (u64 __user *)reg->addr);
3273		break;
3274	case KVM_REG_S390_PFSELECT:
3275		r = get_user(vcpu->arch.pfault_select,
3276			     (u64 __user *)reg->addr);
3277		break;
3278	case KVM_REG_S390_PP:
3279		r = get_user(vcpu->arch.sie_block->pp,
3280			     (u64 __user *)reg->addr);
3281		break;
3282	case KVM_REG_S390_GBEA:
3283		r = get_user(vcpu->arch.sie_block->gbea,
3284			     (u64 __user *)reg->addr);
3285		break;
3286	default:
3287		break;
3288	}
3289
3290	return r;
3291}
3292
3293static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3294{
3295	kvm_s390_vcpu_initial_reset(vcpu);
3296	return 0;
3297}
3298
3299int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3300{
3301	vcpu_load(vcpu);
3302	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3303	vcpu_put(vcpu);
3304	return 0;
3305}
3306
3307int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3308{
3309	vcpu_load(vcpu);
3310	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3311	vcpu_put(vcpu);
3312	return 0;
3313}
3314
3315int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3316				  struct kvm_sregs *sregs)
3317{
3318	vcpu_load(vcpu);
3319
3320	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3321	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3322
3323	vcpu_put(vcpu);
3324	return 0;
3325}
3326
3327int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3328				  struct kvm_sregs *sregs)
3329{
3330	vcpu_load(vcpu);
3331
3332	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3333	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3334
3335	vcpu_put(vcpu);
3336	return 0;
3337}
3338
3339int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3340{
3341	int ret = 0;
3342
3343	vcpu_load(vcpu);
3344
3345	if (test_fp_ctl(fpu->fpc)) {
3346		ret = -EINVAL;
3347		goto out;
3348	}
3349	vcpu->run->s.regs.fpc = fpu->fpc;
3350	if (MACHINE_HAS_VX)
3351		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3352				 (freg_t *) fpu->fprs);
3353	else
3354		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3355
3356out:
3357	vcpu_put(vcpu);
3358	return ret;
3359}
3360
3361int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3362{
3363	vcpu_load(vcpu);
3364
3365	/* make sure we have the latest values */
3366	save_fpu_regs();
3367	if (MACHINE_HAS_VX)
3368		convert_vx_to_fp((freg_t *) fpu->fprs,
3369				 (__vector128 *) vcpu->run->s.regs.vrs);
3370	else
3371		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3372	fpu->fpc = vcpu->run->s.regs.fpc;
3373
3374	vcpu_put(vcpu);
3375	return 0;
3376}
3377
3378static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3379{
3380	int rc = 0;
3381
3382	if (!is_vcpu_stopped(vcpu))
3383		rc = -EBUSY;
3384	else {
3385		vcpu->run->psw_mask = psw.mask;
3386		vcpu->run->psw_addr = psw.addr;
3387	}
3388	return rc;
3389}
3390
3391int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3392				  struct kvm_translation *tr)
3393{
3394	return -EINVAL; /* not implemented yet */
3395}
3396
3397#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3398			      KVM_GUESTDBG_USE_HW_BP | \
3399			      KVM_GUESTDBG_ENABLE)
3400
3401int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3402					struct kvm_guest_debug *dbg)
3403{
3404	int rc = 0;
3405
3406	vcpu_load(vcpu);
3407
3408	vcpu->guest_debug = 0;
3409	kvm_s390_clear_bp_data(vcpu);
3410
3411	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3412		rc = -EINVAL;
3413		goto out;
3414	}
3415	if (!sclp.has_gpere) {
3416		rc = -EINVAL;
3417		goto out;
3418	}
3419
3420	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3421		vcpu->guest_debug = dbg->control;
3422		/* enforce guest PER */
3423		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3424
3425		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3426			rc = kvm_s390_import_bp_data(vcpu, dbg);
3427	} else {
3428		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3429		vcpu->arch.guestdbg.last_bp = 0;
3430	}
3431
3432	if (rc) {
3433		vcpu->guest_debug = 0;
3434		kvm_s390_clear_bp_data(vcpu);
3435		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3436	}
3437
3438out:
3439	vcpu_put(vcpu);
3440	return rc;
3441}
3442
3443int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3444				    struct kvm_mp_state *mp_state)
3445{
3446	int ret;
3447
3448	vcpu_load(vcpu);
3449
3450	/* CHECK_STOP and LOAD are not supported yet */
3451	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3452				      KVM_MP_STATE_OPERATING;
3453
3454	vcpu_put(vcpu);
3455	return ret;
3456}
3457
3458int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3459				    struct kvm_mp_state *mp_state)
3460{
3461	int rc = 0;
3462
3463	vcpu_load(vcpu);
3464
3465	/* user space knows about this interface - let it control the state */
3466	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3467
3468	switch (mp_state->mp_state) {
3469	case KVM_MP_STATE_STOPPED:
3470		kvm_s390_vcpu_stop(vcpu);
3471		break;
3472	case KVM_MP_STATE_OPERATING:
3473		kvm_s390_vcpu_start(vcpu);
3474		break;
3475	case KVM_MP_STATE_LOAD:
3476	case KVM_MP_STATE_CHECK_STOP:
3477		/* fall through - CHECK_STOP and LOAD are not supported yet */
3478	default:
3479		rc = -ENXIO;
3480	}
3481
3482	vcpu_put(vcpu);
3483	return rc;
3484}
3485
3486static bool ibs_enabled(struct kvm_vcpu *vcpu)
3487{
3488	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3489}
3490
3491static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3492{
3493retry:
3494	kvm_s390_vcpu_request_handled(vcpu);
3495	if (!kvm_request_pending(vcpu))
3496		return 0;
3497	/*
3498	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3499	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3500	 * This ensures that the ipte instruction for this request has
3501	 * already finished. We might race against a second unmapper that
3502	 * wants to set the blocking bit. Lets just retry the request loop.
3503	 */
3504	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3505		int rc;
3506		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3507					  kvm_s390_get_prefix(vcpu),
3508					  PAGE_SIZE * 2, PROT_WRITE);
3509		if (rc) {
3510			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3511			return rc;
3512		}
3513		goto retry;
3514	}
3515
3516	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3517		vcpu->arch.sie_block->ihcpu = 0xffff;
3518		goto retry;
3519	}
3520
3521	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3522		if (!ibs_enabled(vcpu)) {
3523			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3524			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3525		}
3526		goto retry;
3527	}
3528
3529	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3530		if (ibs_enabled(vcpu)) {
3531			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3532			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3533		}
3534		goto retry;
3535	}
3536
3537	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3538		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3539		goto retry;
3540	}
3541
3542	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3543		/*
3544		 * Disable CMM virtualization; we will emulate the ESSA
3545		 * instruction manually, in order to provide additional
3546		 * functionalities needed for live migration.
3547		 */
3548		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3549		goto retry;
3550	}
3551
3552	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3553		/*
3554		 * Re-enable CMM virtualization if CMMA is available and
3555		 * CMM has been used.
3556		 */
3557		if ((vcpu->kvm->arch.use_cmma) &&
3558		    (vcpu->kvm->mm->context.uses_cmm))
3559			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3560		goto retry;
3561	}
3562
3563	/* nothing to do, just clear the request */
3564	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3565	/* we left the vsie handler, nothing to do, just clear the request */
3566	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3567
3568	return 0;
3569}
3570
3571void kvm_s390_set_tod_clock(struct kvm *kvm,
3572			    const struct kvm_s390_vm_tod_clock *gtod)
3573{
3574	struct kvm_vcpu *vcpu;
3575	struct kvm_s390_tod_clock_ext htod;
3576	int i;
3577
3578	mutex_lock(&kvm->lock);
3579	preempt_disable();
3580
3581	get_tod_clock_ext((char *)&htod);
3582
3583	kvm->arch.epoch = gtod->tod - htod.tod;
3584	kvm->arch.epdx = 0;
3585	if (test_kvm_facility(kvm, 139)) {
3586		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3587		if (kvm->arch.epoch > gtod->tod)
3588			kvm->arch.epdx -= 1;
3589	}
3590
3591	kvm_s390_vcpu_block_all(kvm);
3592	kvm_for_each_vcpu(i, vcpu, kvm) {
3593		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3594		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3595	}
3596
3597	kvm_s390_vcpu_unblock_all(kvm);
3598	preempt_enable();
3599	mutex_unlock(&kvm->lock);
3600}
3601
3602/**
3603 * kvm_arch_fault_in_page - fault-in guest page if necessary
3604 * @vcpu: The corresponding virtual cpu
3605 * @gpa: Guest physical address
3606 * @writable: Whether the page should be writable or not
3607 *
3608 * Make sure that a guest page has been faulted-in on the host.
3609 *
3610 * Return: Zero on success, negative error code otherwise.
3611 */
3612long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3613{
3614	return gmap_fault(vcpu->arch.gmap, gpa,
3615			  writable ? FAULT_FLAG_WRITE : 0);
3616}
3617
3618static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3619				      unsigned long token)
3620{
3621	struct kvm_s390_interrupt inti;
3622	struct kvm_s390_irq irq;
3623
3624	if (start_token) {
3625		irq.u.ext.ext_params2 = token;
3626		irq.type = KVM_S390_INT_PFAULT_INIT;
3627		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3628	} else {
3629		inti.type = KVM_S390_INT_PFAULT_DONE;
3630		inti.parm64 = token;
3631		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3632	}
3633}
3634
3635void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3636				     struct kvm_async_pf *work)
3637{
3638	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3639	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3640}
3641
3642void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3643				 struct kvm_async_pf *work)
3644{
3645	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3646	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3647}
3648
3649void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3650			       struct kvm_async_pf *work)
3651{
3652	/* s390 will always inject the page directly */
3653}
3654
3655bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3656{
3657	/*
3658	 * s390 will always inject the page directly,
3659	 * but we still want check_async_completion to cleanup
3660	 */
3661	return true;
3662}
3663
3664static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3665{
3666	hva_t hva;
3667	struct kvm_arch_async_pf arch;
3668	int rc;
3669
3670	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3671		return 0;
3672	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3673	    vcpu->arch.pfault_compare)
3674		return 0;
3675	if (psw_extint_disabled(vcpu))
3676		return 0;
3677	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3678		return 0;
3679	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3680		return 0;
3681	if (!vcpu->arch.gmap->pfault_enabled)
3682		return 0;
3683
3684	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3685	hva += current->thread.gmap_addr & ~PAGE_MASK;
3686	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3687		return 0;
3688
3689	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3690	return rc;
3691}
3692
3693static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3694{
3695	int rc, cpuflags;
3696
3697	/*
3698	 * On s390 notifications for arriving pages will be delivered directly
3699	 * to the guest but the house keeping for completed pfaults is
3700	 * handled outside the worker.
3701	 */
3702	kvm_check_async_pf_completion(vcpu);
3703
3704	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3705	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3706
3707	if (need_resched())
3708		schedule();
3709
3710	if (test_cpu_flag(CIF_MCCK_PENDING))
3711		s390_handle_mcck();
3712
3713	if (!kvm_is_ucontrol(vcpu->kvm)) {
3714		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3715		if (rc)
3716			return rc;
3717	}
3718
3719	rc = kvm_s390_handle_requests(vcpu);
3720	if (rc)
3721		return rc;
3722
3723	if (guestdbg_enabled(vcpu)) {
3724		kvm_s390_backup_guest_per_regs(vcpu);
3725		kvm_s390_patch_guest_per_regs(vcpu);
3726	}
3727
3728	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3729
3730	vcpu->arch.sie_block->icptcode = 0;
3731	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3732	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3733	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3734
3735	return 0;
3736}
3737
3738static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3739{
3740	struct kvm_s390_pgm_info pgm_info = {
3741		.code = PGM_ADDRESSING,
3742	};
3743	u8 opcode, ilen;
3744	int rc;
3745
3746	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3747	trace_kvm_s390_sie_fault(vcpu);
3748
3749	/*
3750	 * We want to inject an addressing exception, which is defined as a
3751	 * suppressing or terminating exception. However, since we came here
3752	 * by a DAT access exception, the PSW still points to the faulting
3753	 * instruction since DAT exceptions are nullifying. So we've got
3754	 * to look up the current opcode to get the length of the instruction
3755	 * to be able to forward the PSW.
3756	 */
3757	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3758	ilen = insn_length(opcode);
3759	if (rc < 0) {
3760		return rc;
3761	} else if (rc) {
3762		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3763		 * Forward by arbitrary ilc, injection will take care of
3764		 * nullification if necessary.
3765		 */
3766		pgm_info = vcpu->arch.pgm;
3767		ilen = 4;
3768	}
3769	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3770	kvm_s390_forward_psw(vcpu, ilen);
3771	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3772}
3773
3774static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3775{
3776	struct mcck_volatile_info *mcck_info;
3777	struct sie_page *sie_page;
3778
3779	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3780		   vcpu->arch.sie_block->icptcode);
3781	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3782
3783	if (guestdbg_enabled(vcpu))
3784		kvm_s390_restore_guest_per_regs(vcpu);
3785
3786	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3787	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3788
3789	if (exit_reason == -EINTR) {
3790		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3791		sie_page = container_of(vcpu->arch.sie_block,
3792					struct sie_page, sie_block);
3793		mcck_info = &sie_page->mcck_info;
3794		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3795		return 0;
3796	}
3797
3798	if (vcpu->arch.sie_block->icptcode > 0) {
3799		int rc = kvm_handle_sie_intercept(vcpu);
3800
3801		if (rc != -EOPNOTSUPP)
3802			return rc;
3803		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3804		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3805		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3806		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3807		return -EREMOTE;
3808	} else if (exit_reason != -EFAULT) {
3809		vcpu->stat.exit_null++;
3810		return 0;
3811	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3812		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3813		vcpu->run->s390_ucontrol.trans_exc_code =
3814						current->thread.gmap_addr;
3815		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3816		return -EREMOTE;
3817	} else if (current->thread.gmap_pfault) {
3818		trace_kvm_s390_major_guest_pfault(vcpu);
3819		current->thread.gmap_pfault = 0;
3820		if (kvm_arch_setup_async_pf(vcpu))
3821			return 0;
3822		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3823	}
3824	return vcpu_post_run_fault_in_sie(vcpu);
3825}
3826
3827static int __vcpu_run(struct kvm_vcpu *vcpu)
3828{
3829	int rc, exit_reason;
3830
3831	/*
3832	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3833	 * ning the guest), so that memslots (and other stuff) are protected
3834	 */
3835	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3836
3837	do {
3838		rc = vcpu_pre_run(vcpu);
3839		if (rc)
3840			break;
3841
3842		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3843		/*
3844		 * As PF_VCPU will be used in fault handler, between
3845		 * guest_enter and guest_exit should be no uaccess.
3846		 */
3847		local_irq_disable();
3848		guest_enter_irqoff();
3849		__disable_cpu_timer_accounting(vcpu);
3850		local_irq_enable();
3851		exit_reason = sie64a(vcpu->arch.sie_block,
3852				     vcpu->run->s.regs.gprs);
3853		local_irq_disable();
3854		__enable_cpu_timer_accounting(vcpu);
3855		guest_exit_irqoff();
3856		local_irq_enable();
3857		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3858
3859		rc = vcpu_post_run(vcpu, exit_reason);
3860	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3861
3862	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3863	return rc;
3864}
3865
3866static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3867{
3868	struct runtime_instr_cb *riccb;
3869	struct gs_cb *gscb;
3870
3871	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3872	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3873	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3874	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3875	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3876		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3877	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3878		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3879		/* some control register changes require a tlb flush */
3880		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3881	}
3882	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3883		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3884		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3885		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3886		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3887		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3888	}
3889	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3890		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3891		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3892		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3893		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3894			kvm_clear_async_pf_completion_queue(vcpu);
3895	}
3896	/*
3897	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3898	 * we should enable RI here instead of doing the lazy enablement.
3899	 */
3900	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3901	    test_kvm_facility(vcpu->kvm, 64) &&
3902	    riccb->v &&
3903	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3904		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3905		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3906	}
3907	/*
3908	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3909	 * we should enable GS here instead of doing the lazy enablement.
3910	 */
3911	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3912	    test_kvm_facility(vcpu->kvm, 133) &&
3913	    gscb->gssm &&
3914	    !vcpu->arch.gs_enabled) {
3915		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3916		vcpu->arch.sie_block->ecb |= ECB_GS;
3917		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3918		vcpu->arch.gs_enabled = 1;
3919	}
3920	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3921	    test_kvm_facility(vcpu->kvm, 82)) {
3922		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3923		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3924	}
3925	save_access_regs(vcpu->arch.host_acrs);
3926	restore_access_regs(vcpu->run->s.regs.acrs);
3927	/* save host (userspace) fprs/vrs */
3928	save_fpu_regs();
3929	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3930	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3931	if (MACHINE_HAS_VX)
3932		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3933	else
3934		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3935	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3936	if (test_fp_ctl(current->thread.fpu.fpc))
3937		/* User space provided an invalid FPC, let's clear it */
3938		current->thread.fpu.fpc = 0;
3939	if (MACHINE_HAS_GS) {
3940		preempt_disable();
3941		__ctl_set_bit(2, 4);
3942		if (current->thread.gs_cb) {
3943			vcpu->arch.host_gscb = current->thread.gs_cb;
3944			save_gs_cb(vcpu->arch.host_gscb);
3945		}
3946		if (vcpu->arch.gs_enabled) {
3947			current->thread.gs_cb = (struct gs_cb *)
3948						&vcpu->run->s.regs.gscb;
3949			restore_gs_cb(current->thread.gs_cb);
3950		}
3951		preempt_enable();
3952	}
3953	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3954
3955	kvm_run->kvm_dirty_regs = 0;
3956}
3957
3958static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3959{
3960	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3961	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3962	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3963	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3964	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3965	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3966	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3967	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3968	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3969	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3970	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3971	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3972	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3973	save_access_regs(vcpu->run->s.regs.acrs);
3974	restore_access_regs(vcpu->arch.host_acrs);
3975	/* Save guest register state */
3976	save_fpu_regs();
3977	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3978	/* Restore will be done lazily at return */
3979	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3980	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3981	if (MACHINE_HAS_GS) {
3982		__ctl_set_bit(2, 4);
3983		if (vcpu->arch.gs_enabled)
3984			save_gs_cb(current->thread.gs_cb);
3985		preempt_disable();
3986		current->thread.gs_cb = vcpu->arch.host_gscb;
3987		restore_gs_cb(vcpu->arch.host_gscb);
3988		preempt_enable();
3989		if (!vcpu->arch.host_gscb)
3990			__ctl_clear_bit(2, 4);
3991		vcpu->arch.host_gscb = NULL;
3992	}
3993	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3994}
3995
3996int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3997{
3998	int rc;
3999
4000	if (kvm_run->immediate_exit)
4001		return -EINTR;
4002
4003	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4004	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4005		return -EINVAL;
4006
4007	vcpu_load(vcpu);
4008
4009	if (guestdbg_exit_pending(vcpu)) {
4010		kvm_s390_prepare_debug_exit(vcpu);
4011		rc = 0;
4012		goto out;
4013	}
4014
4015	kvm_sigset_activate(vcpu);
4016
4017	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4018		kvm_s390_vcpu_start(vcpu);
4019	} else if (is_vcpu_stopped(vcpu)) {
4020		pr_err_ratelimited("can't run stopped vcpu %d\n",
4021				   vcpu->vcpu_id);
4022		rc = -EINVAL;
4023		goto out;
4024	}
4025
4026	sync_regs(vcpu, kvm_run);
4027	enable_cpu_timer_accounting(vcpu);
4028
4029	might_fault();
4030	rc = __vcpu_run(vcpu);
4031
4032	if (signal_pending(current) && !rc) {
4033		kvm_run->exit_reason = KVM_EXIT_INTR;
4034		rc = -EINTR;
4035	}
4036
4037	if (guestdbg_exit_pending(vcpu) && !rc)  {
4038		kvm_s390_prepare_debug_exit(vcpu);
4039		rc = 0;
4040	}
4041
4042	if (rc == -EREMOTE) {
4043		/* userspace support is needed, kvm_run has been prepared */
4044		rc = 0;
4045	}
4046
4047	disable_cpu_timer_accounting(vcpu);
4048	store_regs(vcpu, kvm_run);
4049
4050	kvm_sigset_deactivate(vcpu);
4051
4052	vcpu->stat.exit_userspace++;
4053out:
4054	vcpu_put(vcpu);
4055	return rc;
4056}
4057
4058/*
4059 * store status at address
4060 * we use have two special cases:
4061 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4062 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4063 */
4064int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4065{
4066	unsigned char archmode = 1;
4067	freg_t fprs[NUM_FPRS];
4068	unsigned int px;
4069	u64 clkcomp, cputm;
4070	int rc;
4071
4072	px = kvm_s390_get_prefix(vcpu);
4073	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4074		if (write_guest_abs(vcpu, 163, &archmode, 1))
4075			return -EFAULT;
4076		gpa = 0;
4077	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4078		if (write_guest_real(vcpu, 163, &archmode, 1))
4079			return -EFAULT;
4080		gpa = px;
4081	} else
4082		gpa -= __LC_FPREGS_SAVE_AREA;
4083
4084	/* manually convert vector registers if necessary */
4085	if (MACHINE_HAS_VX) {
4086		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4087		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4088				     fprs, 128);
4089	} else {
4090		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4091				     vcpu->run->s.regs.fprs, 128);
4092	}
4093	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4094			      vcpu->run->s.regs.gprs, 128);
4095	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4096			      &vcpu->arch.sie_block->gpsw, 16);
4097	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4098			      &px, 4);
4099	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4100			      &vcpu->run->s.regs.fpc, 4);
4101	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4102			      &vcpu->arch.sie_block->todpr, 4);
4103	cputm = kvm_s390_get_cpu_timer(vcpu);
4104	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4105			      &cputm, 8);
4106	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4107	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4108			      &clkcomp, 8);
4109	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4110			      &vcpu->run->s.regs.acrs, 64);
4111	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4112			      &vcpu->arch.sie_block->gcr, 128);
4113	return rc ? -EFAULT : 0;
4114}
4115
4116int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4117{
4118	/*
4119	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4120	 * switch in the run ioctl. Let's update our copies before we save
4121	 * it into the save area
4122	 */
4123	save_fpu_regs();
4124	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4125	save_access_regs(vcpu->run->s.regs.acrs);
4126
4127	return kvm_s390_store_status_unloaded(vcpu, addr);
4128}
4129
4130static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4131{
4132	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4133	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4134}
4135
4136static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4137{
4138	unsigned int i;
4139	struct kvm_vcpu *vcpu;
4140
4141	kvm_for_each_vcpu(i, vcpu, kvm) {
4142		__disable_ibs_on_vcpu(vcpu);
4143	}
4144}
4145
4146static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4147{
4148	if (!sclp.has_ibs)
4149		return;
4150	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4151	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4152}
4153
4154void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4155{
4156	int i, online_vcpus, started_vcpus = 0;
4157
4158	if (!is_vcpu_stopped(vcpu))
4159		return;
4160
4161	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4162	/* Only one cpu at a time may enter/leave the STOPPED state. */
4163	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4164	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4165
4166	for (i = 0; i < online_vcpus; i++) {
4167		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4168			started_vcpus++;
4169	}
4170
4171	if (started_vcpus == 0) {
4172		/* we're the only active VCPU -> speed it up */
4173		__enable_ibs_on_vcpu(vcpu);
4174	} else if (started_vcpus == 1) {
4175		/*
4176		 * As we are starting a second VCPU, we have to disable
4177		 * the IBS facility on all VCPUs to remove potentially
4178		 * oustanding ENABLE requests.
4179		 */
4180		__disable_ibs_on_all_vcpus(vcpu->kvm);
4181	}
4182
4183	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4184	/*
4185	 * Another VCPU might have used IBS while we were offline.
4186	 * Let's play safe and flush the VCPU at startup.
4187	 */
4188	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4189	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4190	return;
4191}
4192
4193void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4194{
4195	int i, online_vcpus, started_vcpus = 0;
4196	struct kvm_vcpu *started_vcpu = NULL;
4197
4198	if (is_vcpu_stopped(vcpu))
4199		return;
4200
4201	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4202	/* Only one cpu at a time may enter/leave the STOPPED state. */
4203	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4204	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4205
4206	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4207	kvm_s390_clear_stop_irq(vcpu);
4208
4209	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4210	__disable_ibs_on_vcpu(vcpu);
4211
4212	for (i = 0; i < online_vcpus; i++) {
4213		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4214			started_vcpus++;
4215			started_vcpu = vcpu->kvm->vcpus[i];
4216		}
4217	}
4218
4219	if (started_vcpus == 1) {
4220		/*
4221		 * As we only have one VCPU left, we want to enable the
4222		 * IBS facility for that VCPU to speed it up.
4223		 */
4224		__enable_ibs_on_vcpu(started_vcpu);
4225	}
4226
4227	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4228	return;
4229}
4230
4231static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4232				     struct kvm_enable_cap *cap)
4233{
4234	int r;
4235
4236	if (cap->flags)
4237		return -EINVAL;
4238
4239	switch (cap->cap) {
4240	case KVM_CAP_S390_CSS_SUPPORT:
4241		if (!vcpu->kvm->arch.css_support) {
4242			vcpu->kvm->arch.css_support = 1;
4243			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4244			trace_kvm_s390_enable_css(vcpu->kvm);
4245		}
4246		r = 0;
4247		break;
4248	default:
4249		r = -EINVAL;
4250		break;
4251	}
4252	return r;
4253}
4254
4255static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4256				  struct kvm_s390_mem_op *mop)
4257{
4258	void __user *uaddr = (void __user *)mop->buf;
4259	void *tmpbuf = NULL;
4260	int r, srcu_idx;
4261	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4262				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4263
4264	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4265		return -EINVAL;
4266
4267	if (mop->size > MEM_OP_MAX_SIZE)
4268		return -E2BIG;
4269
4270	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4271		tmpbuf = vmalloc(mop->size);
4272		if (!tmpbuf)
4273			return -ENOMEM;
4274	}
4275
4276	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4277
4278	switch (mop->op) {
4279	case KVM_S390_MEMOP_LOGICAL_READ:
4280		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4281			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4282					    mop->size, GACC_FETCH);
4283			break;
4284		}
4285		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4286		if (r == 0) {
4287			if (copy_to_user(uaddr, tmpbuf, mop->size))
4288				r = -EFAULT;
4289		}
4290		break;
4291	case KVM_S390_MEMOP_LOGICAL_WRITE:
4292		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4293			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4294					    mop->size, GACC_STORE);
4295			break;
4296		}
4297		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4298			r = -EFAULT;
4299			break;
4300		}
4301		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4302		break;
4303	default:
4304		r = -EINVAL;
4305	}
4306
4307	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4308
4309	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4310		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4311
4312	vfree(tmpbuf);
4313	return r;
4314}
4315
4316long kvm_arch_vcpu_async_ioctl(struct file *filp,
4317			       unsigned int ioctl, unsigned long arg)
4318{
4319	struct kvm_vcpu *vcpu = filp->private_data;
4320	void __user *argp = (void __user *)arg;
4321
4322	switch (ioctl) {
4323	case KVM_S390_IRQ: {
4324		struct kvm_s390_irq s390irq;
4325
4326		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4327			return -EFAULT;
4328		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4329	}
4330	case KVM_S390_INTERRUPT: {
4331		struct kvm_s390_interrupt s390int;
4332		struct kvm_s390_irq s390irq = {};
4333
4334		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4335			return -EFAULT;
4336		if (s390int_to_s390irq(&s390int, &s390irq))
4337			return -EINVAL;
4338		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4339	}
4340	}
4341	return -ENOIOCTLCMD;
4342}
4343
4344long kvm_arch_vcpu_ioctl(struct file *filp,
4345			 unsigned int ioctl, unsigned long arg)
4346{
4347	struct kvm_vcpu *vcpu = filp->private_data;
4348	void __user *argp = (void __user *)arg;
4349	int idx;
4350	long r;
4351
4352	vcpu_load(vcpu);
4353
4354	switch (ioctl) {
4355	case KVM_S390_STORE_STATUS:
4356		idx = srcu_read_lock(&vcpu->kvm->srcu);
4357		r = kvm_s390_vcpu_store_status(vcpu, arg);
4358		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4359		break;
4360	case KVM_S390_SET_INITIAL_PSW: {
4361		psw_t psw;
4362
4363		r = -EFAULT;
4364		if (copy_from_user(&psw, argp, sizeof(psw)))
4365			break;
4366		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4367		break;
4368	}
4369	case KVM_S390_INITIAL_RESET:
4370		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4371		break;
4372	case KVM_SET_ONE_REG:
4373	case KVM_GET_ONE_REG: {
4374		struct kvm_one_reg reg;
4375		r = -EFAULT;
4376		if (copy_from_user(&reg, argp, sizeof(reg)))
4377			break;
4378		if (ioctl == KVM_SET_ONE_REG)
4379			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4380		else
4381			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4382		break;
4383	}
4384#ifdef CONFIG_KVM_S390_UCONTROL
4385	case KVM_S390_UCAS_MAP: {
4386		struct kvm_s390_ucas_mapping ucasmap;
4387
4388		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4389			r = -EFAULT;
4390			break;
4391		}
4392
4393		if (!kvm_is_ucontrol(vcpu->kvm)) {
4394			r = -EINVAL;
4395			break;
4396		}
4397
4398		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4399				     ucasmap.vcpu_addr, ucasmap.length);
4400		break;
4401	}
4402	case KVM_S390_UCAS_UNMAP: {
4403		struct kvm_s390_ucas_mapping ucasmap;
4404
4405		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4406			r = -EFAULT;
4407			break;
4408		}
4409
4410		if (!kvm_is_ucontrol(vcpu->kvm)) {
4411			r = -EINVAL;
4412			break;
4413		}
4414
4415		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4416			ucasmap.length);
4417		break;
4418	}
4419#endif
4420	case KVM_S390_VCPU_FAULT: {
4421		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4422		break;
4423	}
4424	case KVM_ENABLE_CAP:
4425	{
4426		struct kvm_enable_cap cap;
4427		r = -EFAULT;
4428		if (copy_from_user(&cap, argp, sizeof(cap)))
4429			break;
4430		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4431		break;
4432	}
4433	case KVM_S390_MEM_OP: {
4434		struct kvm_s390_mem_op mem_op;
4435
4436		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4437			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4438		else
4439			r = -EFAULT;
4440		break;
4441	}
4442	case KVM_S390_SET_IRQ_STATE: {
4443		struct kvm_s390_irq_state irq_state;
4444
4445		r = -EFAULT;
4446		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4447			break;
4448		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4449		    irq_state.len == 0 ||
4450		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4451			r = -EINVAL;
4452			break;
4453		}
4454		/* do not use irq_state.flags, it will break old QEMUs */
4455		r = kvm_s390_set_irq_state(vcpu,
4456					   (void __user *) irq_state.buf,
4457					   irq_state.len);
4458		break;
4459	}
4460	case KVM_S390_GET_IRQ_STATE: {
4461		struct kvm_s390_irq_state irq_state;
4462
4463		r = -EFAULT;
4464		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4465			break;
4466		if (irq_state.len == 0) {
4467			r = -EINVAL;
4468			break;
4469		}
4470		/* do not use irq_state.flags, it will break old QEMUs */
4471		r = kvm_s390_get_irq_state(vcpu,
4472					   (__u8 __user *)  irq_state.buf,
4473					   irq_state.len);
4474		break;
4475	}
4476	default:
4477		r = -ENOTTY;
4478	}
4479
4480	vcpu_put(vcpu);
4481	return r;
4482}
4483
4484vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4485{
4486#ifdef CONFIG_KVM_S390_UCONTROL
4487	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4488		 && (kvm_is_ucontrol(vcpu->kvm))) {
4489		vmf->page = virt_to_page(vcpu->arch.sie_block);
4490		get_page(vmf->page);
4491		return 0;
4492	}
4493#endif
4494	return VM_FAULT_SIGBUS;
4495}
4496
4497int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4498			    unsigned long npages)
4499{
4500	return 0;
4501}
4502
4503/* Section: memory related */
4504int kvm_arch_prepare_memory_region(struct kvm *kvm,
4505				   struct kvm_memory_slot *memslot,
4506				   const struct kvm_userspace_memory_region *mem,
4507				   enum kvm_mr_change change)
4508{
4509	/* A few sanity checks. We can have memory slots which have to be
4510	   located/ended at a segment boundary (1MB). The memory in userland is
4511	   ok to be fragmented into various different vmas. It is okay to mmap()
4512	   and munmap() stuff in this slot after doing this call at any time */
4513
4514	if (mem->userspace_addr & 0xffffful)
4515		return -EINVAL;
4516
4517	if (mem->memory_size & 0xffffful)
4518		return -EINVAL;
4519
4520	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4521		return -EINVAL;
4522
4523	return 0;
4524}
4525
4526void kvm_arch_commit_memory_region(struct kvm *kvm,
4527				const struct kvm_userspace_memory_region *mem,
4528				const struct kvm_memory_slot *old,
4529				const struct kvm_memory_slot *new,
4530				enum kvm_mr_change change)
4531{
4532	int rc = 0;
 
 
 
 
 
 
 
 
 
 
 
4533
4534	switch (change) {
4535	case KVM_MR_DELETE:
4536		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4537					old->npages * PAGE_SIZE);
4538		break;
4539	case KVM_MR_MOVE:
4540		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4541					old->npages * PAGE_SIZE);
4542		if (rc)
4543			break;
4544		/* FALLTHROUGH */
4545	case KVM_MR_CREATE:
4546		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4547				      mem->guest_phys_addr, mem->memory_size);
4548		break;
4549	case KVM_MR_FLAGS_ONLY:
4550		break;
4551	default:
4552		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4553	}
4554	if (rc)
4555		pr_warn("failed to commit memory region\n");
4556	return;
4557}
4558
4559static inline unsigned long nonhyp_mask(int i)
4560{
4561	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4562
4563	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4564}
4565
4566void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4567{
4568	vcpu->valid_wakeup = false;
4569}
4570
4571static int __init kvm_s390_init(void)
4572{
4573	int i;
4574
4575	if (!sclp.has_sief2) {
4576		pr_info("SIE is not available\n");
4577		return -ENODEV;
4578	}
4579
4580	if (nested && hpage) {
4581		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4582		return -EINVAL;
4583	}
4584
4585	for (i = 0; i < 16; i++)
4586		kvm_s390_fac_base[i] |=
4587			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4588
4589	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4590}
4591
4592static void __exit kvm_s390_exit(void)
4593{
4594	kvm_exit();
4595}
4596
4597module_init(kvm_s390_init);
4598module_exit(kvm_s390_exit);
4599
4600/*
4601 * Enable autoloading of the kvm module.
4602 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4603 * since x86 takes a different approach.
4604 */
4605#include <linux/miscdevice.h>
4606MODULE_ALIAS_MISCDEV(KVM_MINOR);
4607MODULE_ALIAS("devname:kvm");