kvm-s390.c - arch/s390/kvm/kvm-s390.c - Linux diff v6.2

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2020
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
 
   9 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  10 *               Jason J. Herne <jjherne@us.ibm.com>
  11 */
  12
  13#define KMSG_COMPONENT "kvm-s390"
  14#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  15
  16#include <linux/compiler.h>
  17#include <linux/err.h>
  18#include <linux/fs.h>
  19#include <linux/hrtimer.h>
  20#include <linux/init.h>
  21#include <linux/kvm.h>
  22#include <linux/kvm_host.h>
  23#include <linux/mman.h>
  24#include <linux/module.h>
  25#include <linux/moduleparam.h>
  26#include <linux/random.h>
  27#include <linux/slab.h>
  28#include <linux/timer.h>
  29#include <linux/vmalloc.h>
  30#include <linux/bitmap.h>
  31#include <linux/sched/signal.h>
  32#include <linux/string.h>
  33#include <linux/pgtable.h>
  34#include <linux/mmu_notifier.h>
  35
  36#include <asm/asm-offsets.h>
  37#include <asm/lowcore.h>
  38#include <asm/stp.h>
 
  39#include <asm/gmap.h>
  40#include <asm/nmi.h>
  41#include <asm/switch_to.h>
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include <asm/ap.h>
  47#include <asm/uv.h>
  48#include <asm/fpu/api.h>
  49#include "kvm-s390.h"
  50#include "gaccess.h"
  51#include "pci.h"
 
 
 
  52
  53#define CREATE_TRACE_POINTS
  54#include "trace.h"
  55#include "trace-s390.h"
  56
  57#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
  58#define LOCAL_IRQS 32
  59#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  60			   (KVM_MAX_VCPUS + LOCAL_IRQS))
  61
  62const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  63	KVM_GENERIC_VM_STATS(),
  64	STATS_DESC_COUNTER(VM, inject_io),
  65	STATS_DESC_COUNTER(VM, inject_float_mchk),
  66	STATS_DESC_COUNTER(VM, inject_pfault_done),
  67	STATS_DESC_COUNTER(VM, inject_service_signal),
  68	STATS_DESC_COUNTER(VM, inject_virtio),
  69	STATS_DESC_COUNTER(VM, aen_forward)
  70};
  71
  72const struct kvm_stats_header kvm_vm_stats_header = {
  73	.name_size = KVM_STATS_NAME_SIZE,
  74	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  75	.id_offset = sizeof(struct kvm_stats_header),
  76	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  77	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  78		       sizeof(kvm_vm_stats_desc),
  79};
  80
  81const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  82	KVM_GENERIC_VCPU_STATS(),
  83	STATS_DESC_COUNTER(VCPU, exit_userspace),
  84	STATS_DESC_COUNTER(VCPU, exit_null),
  85	STATS_DESC_COUNTER(VCPU, exit_external_request),
  86	STATS_DESC_COUNTER(VCPU, exit_io_request),
  87	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  88	STATS_DESC_COUNTER(VCPU, exit_stop_request),
  89	STATS_DESC_COUNTER(VCPU, exit_validity),
  90	STATS_DESC_COUNTER(VCPU, exit_instruction),
  91	STATS_DESC_COUNTER(VCPU, exit_pei),
  92	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  93	STATS_DESC_COUNTER(VCPU, instruction_lctl),
  94	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  95	STATS_DESC_COUNTER(VCPU, instruction_stctl),
  96	STATS_DESC_COUNTER(VCPU, instruction_stctg),
  97	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  98	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  99	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
 100	STATS_DESC_COUNTER(VCPU, deliver_ckc),
 101	STATS_DESC_COUNTER(VCPU, deliver_cputm),
 102	STATS_DESC_COUNTER(VCPU, deliver_external_call),
 103	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 104	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 105	STATS_DESC_COUNTER(VCPU, deliver_virtio),
 106	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 107	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 108	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 109	STATS_DESC_COUNTER(VCPU, deliver_program),
 110	STATS_DESC_COUNTER(VCPU, deliver_io),
 111	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 112	STATS_DESC_COUNTER(VCPU, exit_wait_state),
 113	STATS_DESC_COUNTER(VCPU, inject_ckc),
 114	STATS_DESC_COUNTER(VCPU, inject_cputm),
 115	STATS_DESC_COUNTER(VCPU, inject_external_call),
 116	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 117	STATS_DESC_COUNTER(VCPU, inject_mchk),
 118	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 119	STATS_DESC_COUNTER(VCPU, inject_program),
 120	STATS_DESC_COUNTER(VCPU, inject_restart),
 121	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 122	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 123	STATS_DESC_COUNTER(VCPU, instruction_epsw),
 124	STATS_DESC_COUNTER(VCPU, instruction_gs),
 125	STATS_DESC_COUNTER(VCPU, instruction_io_other),
 126	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 127	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 128	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 129	STATS_DESC_COUNTER(VCPU, instruction_ptff),
 130	STATS_DESC_COUNTER(VCPU, instruction_sck),
 131	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 132	STATS_DESC_COUNTER(VCPU, instruction_stidp),
 133	STATS_DESC_COUNTER(VCPU, instruction_spx),
 134	STATS_DESC_COUNTER(VCPU, instruction_stpx),
 135	STATS_DESC_COUNTER(VCPU, instruction_stap),
 136	STATS_DESC_COUNTER(VCPU, instruction_iske),
 137	STATS_DESC_COUNTER(VCPU, instruction_ri),
 138	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 139	STATS_DESC_COUNTER(VCPU, instruction_sske),
 140	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 141	STATS_DESC_COUNTER(VCPU, instruction_stsi),
 142	STATS_DESC_COUNTER(VCPU, instruction_stfl),
 143	STATS_DESC_COUNTER(VCPU, instruction_tb),
 144	STATS_DESC_COUNTER(VCPU, instruction_tpi),
 145	STATS_DESC_COUNTER(VCPU, instruction_tprot),
 146	STATS_DESC_COUNTER(VCPU, instruction_tsch),
 147	STATS_DESC_COUNTER(VCPU, instruction_sie),
 148	STATS_DESC_COUNTER(VCPU, instruction_essa),
 149	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 150	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 151	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 152	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 153	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 154	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 155	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 156	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 157	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 158	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 159	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 160	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 161	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 162	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 163	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 164	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 165	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 166	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 167	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 168	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 169	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 170	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 171	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 172	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 173	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 174	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 175	STATS_DESC_COUNTER(VCPU, pfault_sync)
 
 
 
 
 
 176};
 177
 178const struct kvm_stats_header kvm_vcpu_stats_header = {
 179	.name_size = KVM_STATS_NAME_SIZE,
 180	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 181	.id_offset = sizeof(struct kvm_stats_header),
 182	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 183	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 184		       sizeof(kvm_vcpu_stats_desc),
 185};
 186
 187/* allow nested virtualization in KVM (if enabled by user space) */
 188static int nested;
 189module_param(nested, int, S_IRUGO);
 190MODULE_PARM_DESC(nested, "Nested virtualization support");
 191
 192/* allow 1m huge page guest backing, if !nested */
 193static int hpage;
 194module_param(hpage, int, 0444);
 195MODULE_PARM_DESC(hpage, "1m huge page backing support");
 196
 197/* maximum percentage of steal time for polling.  >100 is treated like 100 */
 198static u8 halt_poll_max_steal = 10;
 199module_param(halt_poll_max_steal, byte, 0644);
 200MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 201
 202/* if set to true, the GISA will be initialized and used if available */
 203static bool use_gisa  = true;
 204module_param(use_gisa, bool, 0644);
 205MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 206
 207/* maximum diag9c forwarding per second */
 208unsigned int diag9c_forwarding_hz;
 209module_param(diag9c_forwarding_hz, uint, 0644);
 210MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 211
 212/*
 213 * allow asynchronous deinit for protected guests; enable by default since
 214 * the feature is opt-in anyway
 215 */
 216static int async_destroy = 1;
 217module_param(async_destroy, int, 0444);
 218MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
 219
 220/*
 221 * For now we handle at most 16 double words as this is what the s390 base
 222 * kernel handles and stores in the prefix page. If we ever need to go beyond
 223 * this, this requires changes to code, but the external uapi can stay.
 224 */
 225#define SIZE_INTERNAL 16
 226
 227/*
 228 * Base feature mask that defines default mask for facilities. Consists of the
 229 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 230 */
 231static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 232/*
 233 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 234 * and defines the facilities that can be enabled via a cpu model.
 235 */
 236static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 237
 238static unsigned long kvm_s390_fac_size(void)
 239{
 240	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 241	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 242	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 243		sizeof(stfle_fac_list));
 244
 245	return SIZE_INTERNAL;
 246}
 247
 248/* available cpu features supported by kvm */
 249static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 250/* available subfunctions indicated via query / "test bit" */
 251static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 252
 253static struct gmap_notifier gmap_notifier;
 254static struct gmap_notifier vsie_gmap_notifier;
 255debug_info_t *kvm_s390_dbf;
 256debug_info_t *kvm_s390_dbf_uv;
 257
 258/* Section: not file related */
 259int kvm_arch_hardware_enable(void)
 260{
 261	/* every s390 is virtualization enabled ;-) */
 262	return 0;
 263}
 264
 265int kvm_arch_check_processor_compat(void *opaque)
 266{
 267	return 0;
 268}
 269
 270/* forward declarations */
 271static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 272			      unsigned long end);
 273static int sca_switch_to_extended(struct kvm *kvm);
 274
 275static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 276{
 277	u8 delta_idx = 0;
 278
 279	/*
 280	 * The TOD jumps by delta, we have to compensate this by adding
 281	 * -delta to the epoch.
 282	 */
 283	delta = -delta;
 284
 285	/* sign-extension - we're adding to signed values below */
 286	if ((s64)delta < 0)
 287		delta_idx = -1;
 288
 289	scb->epoch += delta;
 290	if (scb->ecd & ECD_MEF) {
 291		scb->epdx += delta_idx;
 292		if (scb->epoch < delta)
 293			scb->epdx += 1;
 294	}
 295}
 296
 297/*
 298 * This callback is executed during stop_machine(). All CPUs are therefore
 299 * temporarily stopped. In order not to change guest behavior, we have to
 300 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 301 * so a CPU won't be stopped while calculating with the epoch.
 302 */
 303static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 304			  void *v)
 305{
 306	struct kvm *kvm;
 307	struct kvm_vcpu *vcpu;
 308	unsigned long i;
 309	unsigned long long *delta = v;
 310
 311	list_for_each_entry(kvm, &vm_list, vm_list) {
 312		kvm_for_each_vcpu(i, vcpu, kvm) {
 313			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 314			if (i == 0) {
 315				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 316				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 317			}
 318			if (vcpu->arch.cputm_enabled)
 319				vcpu->arch.cputm_start += *delta;
 320			if (vcpu->arch.vsie_block)
 321				kvm_clock_sync_scb(vcpu->arch.vsie_block,
 322						   *delta);
 323		}
 324	}
 325	return NOTIFY_OK;
 326}
 327
 328static struct notifier_block kvm_clock_notifier = {
 329	.notifier_call = kvm_clock_sync,
 330};
 331
 332int kvm_arch_hardware_setup(void *opaque)
 333{
 334	gmap_notifier.notifier_call = kvm_gmap_notifier;
 335	gmap_register_pte_notifier(&gmap_notifier);
 336	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 337	gmap_register_pte_notifier(&vsie_gmap_notifier);
 338	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 339				       &kvm_clock_notifier);
 340	return 0;
 341}
 342
 343void kvm_arch_hardware_unsetup(void)
 344{
 345	gmap_unregister_pte_notifier(&gmap_notifier);
 346	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 347	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 348					 &kvm_clock_notifier);
 349}
 350
 351static void allow_cpu_feat(unsigned long nr)
 352{
 353	set_bit_inv(nr, kvm_s390_available_cpu_feat);
 354}
 355
 356static inline int plo_test_bit(unsigned char nr)
 357{
 358	unsigned long function = (unsigned long)nr | 0x100;
 359	int cc;
 360
 361	asm volatile(
 362		"	lgr	0,%[function]\n"
 363		/* Parameter registers are ignored for "test bit" */
 364		"	plo	0,0,0,0(0)\n"
 365		"	ipm	%0\n"
 366		"	srl	%0,28\n"
 367		: "=d" (cc)
 368		: [function] "d" (function)
 369		: "cc", "0");
 370	return cc == 0;
 371}
 372
 373static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 374{
 375	asm volatile(
 376		"	lghi	0,0\n"
 377		"	lgr	1,%[query]\n"
 378		/* Parameter registers are ignored */
 379		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
 380		:
 381		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 382		: "cc", "memory", "0", "1");
 383}
 384
 385#define INSN_SORTL 0xb938
 386#define INSN_DFLTCC 0xb939
 387
 388static void kvm_s390_cpu_feat_init(void)
 389{
 390	int i;
 391
 392	for (i = 0; i < 256; ++i) {
 393		if (plo_test_bit(i))
 394			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 395	}
 396
 397	if (test_facility(28)) /* TOD-clock steering */
 398		ptff(kvm_s390_available_subfunc.ptff,
 399		     sizeof(kvm_s390_available_subfunc.ptff),
 400		     PTFF_QAF);
 401
 402	if (test_facility(17)) { /* MSA */
 403		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 404			      kvm_s390_available_subfunc.kmac);
 405		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 406			      kvm_s390_available_subfunc.kmc);
 407		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
 408			      kvm_s390_available_subfunc.km);
 409		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 410			      kvm_s390_available_subfunc.kimd);
 411		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 412			      kvm_s390_available_subfunc.klmd);
 413	}
 414	if (test_facility(76)) /* MSA3 */
 415		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 416			      kvm_s390_available_subfunc.pckmo);
 417	if (test_facility(77)) { /* MSA4 */
 418		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 419			      kvm_s390_available_subfunc.kmctr);
 420		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 421			      kvm_s390_available_subfunc.kmf);
 422		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 423			      kvm_s390_available_subfunc.kmo);
 424		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 425			      kvm_s390_available_subfunc.pcc);
 426	}
 427	if (test_facility(57)) /* MSA5 */
 428		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 429			      kvm_s390_available_subfunc.ppno);
 430
 431	if (test_facility(146)) /* MSA8 */
 432		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 433			      kvm_s390_available_subfunc.kma);
 434
 435	if (test_facility(155)) /* MSA9 */
 436		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 437			      kvm_s390_available_subfunc.kdsa);
 438
 439	if (test_facility(150)) /* SORTL */
 440		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 441
 442	if (test_facility(151)) /* DFLTCC */
 443		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 444
 445	if (MACHINE_HAS_ESOP)
 446		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 447	/*
 448	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 449	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 450	 */
 451	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 452	    !test_facility(3) || !nested)
 453		return;
 454	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 455	if (sclp.has_64bscao)
 456		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 457	if (sclp.has_siif)
 458		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 459	if (sclp.has_gpere)
 460		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 461	if (sclp.has_gsls)
 462		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 463	if (sclp.has_ib)
 464		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 465	if (sclp.has_cei)
 466		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 467	if (sclp.has_ibs)
 468		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 469	if (sclp.has_kss)
 470		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 471	/*
 472	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 473	 * all skey handling functions read/set the skey from the PGSTE
 474	 * instead of the real storage key.
 475	 *
 476	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 477	 * pages being detected as preserved although they are resident.
 478	 *
 479	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 480	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 481	 *
 482	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 483	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 484	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 485	 *
 486	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 487	 * cannot easily shadow the SCA because of the ipte lock.
 488	 */
 489}
 490
 491int kvm_arch_init(void *opaque)
 492{
 493	int rc = -ENOMEM;
 494
 495	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 496	if (!kvm_s390_dbf)
 497		return -ENOMEM;
 498
 499	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 500	if (!kvm_s390_dbf_uv)
 501		goto out;
 502
 503	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 504	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 505		goto out;
 506
 507	kvm_s390_cpu_feat_init();
 508
 509	/* Register floating interrupt controller interface. */
 510	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 511	if (rc) {
 512		pr_err("A FLIC registration call failed with rc=%d\n", rc);
 513		goto out;
 514	}
 515
 516	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
 517		rc = kvm_s390_pci_init();
 518		if (rc) {
 519			pr_err("Unable to allocate AIFT for PCI\n");
 520			goto out;
 521		}
 522	}
 523
 524	rc = kvm_s390_gib_init(GAL_ISC);
 525	if (rc)
 526		goto out;
 527
 528	return 0;
 529
 530out:
 531	kvm_arch_exit();
 532	return rc;
 533}
 534
 535void kvm_arch_exit(void)
 536{
 537	kvm_s390_gib_destroy();
 538	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
 539		kvm_s390_pci_exit();
 540	debug_unregister(kvm_s390_dbf);
 541	debug_unregister(kvm_s390_dbf_uv);
 542}
 543
 544/* Section: device related */
 545long kvm_arch_dev_ioctl(struct file *filp,
 546			unsigned int ioctl, unsigned long arg)
 547{
 548	if (ioctl == KVM_S390_ENABLE_SIE)
 549		return s390_enable_sie();
 550	return -EINVAL;
 551}
 552
 553int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 554{
 555	int r;
 556
 557	switch (ext) {
 558	case KVM_CAP_S390_PSW:
 559	case KVM_CAP_S390_GMAP:
 560	case KVM_CAP_SYNC_MMU:
 561#ifdef CONFIG_KVM_S390_UCONTROL
 562	case KVM_CAP_S390_UCONTROL:
 563#endif
 564	case KVM_CAP_ASYNC_PF:
 565	case KVM_CAP_SYNC_REGS:
 566	case KVM_CAP_ONE_REG:
 567	case KVM_CAP_ENABLE_CAP:
 568	case KVM_CAP_S390_CSS_SUPPORT:
 569	case KVM_CAP_IOEVENTFD:
 570	case KVM_CAP_DEVICE_CTRL:
 
 571	case KVM_CAP_S390_IRQCHIP:
 572	case KVM_CAP_VM_ATTRIBUTES:
 573	case KVM_CAP_MP_STATE:
 574	case KVM_CAP_IMMEDIATE_EXIT:
 575	case KVM_CAP_S390_INJECT_IRQ:
 576	case KVM_CAP_S390_USER_SIGP:
 577	case KVM_CAP_S390_USER_STSI:
 578	case KVM_CAP_S390_SKEYS:
 579	case KVM_CAP_S390_IRQ_STATE:
 580	case KVM_CAP_S390_USER_INSTR0:
 581	case KVM_CAP_S390_CMMA_MIGRATION:
 582	case KVM_CAP_S390_AIS:
 583	case KVM_CAP_S390_AIS_MIGRATION:
 584	case KVM_CAP_S390_VCPU_RESETS:
 585	case KVM_CAP_SET_GUEST_DEBUG:
 586	case KVM_CAP_S390_DIAG318:
 587	case KVM_CAP_S390_MEM_OP_EXTENSION:
 588		r = 1;
 589		break;
 590	case KVM_CAP_SET_GUEST_DEBUG2:
 591		r = KVM_GUESTDBG_VALID_MASK;
 592		break;
 593	case KVM_CAP_S390_HPAGE_1M:
 594		r = 0;
 595		if (hpage && !kvm_is_ucontrol(kvm))
 596			r = 1;
 597		break;
 598	case KVM_CAP_S390_MEM_OP:
 599		r = MEM_OP_MAX_SIZE;
 600		break;
 601	case KVM_CAP_NR_VCPUS:
 602	case KVM_CAP_MAX_VCPUS:
 603	case KVM_CAP_MAX_VCPU_ID:
 604		r = KVM_S390_BSCA_CPU_SLOTS;
 605		if (!kvm_s390_use_sca_entries())
 606			r = KVM_MAX_VCPUS;
 607		else if (sclp.has_esca && sclp.has_64bscao)
 608			r = KVM_S390_ESCA_CPU_SLOTS;
 609		if (ext == KVM_CAP_NR_VCPUS)
 610			r = min_t(unsigned int, num_online_cpus(), r);
 
 611		break;
 612	case KVM_CAP_S390_COW:
 613		r = MACHINE_HAS_ESOP;
 614		break;
 615	case KVM_CAP_S390_VECTOR_REGISTERS:
 616		r = MACHINE_HAS_VX;
 617		break;
 618	case KVM_CAP_S390_RI:
 619		r = test_facility(64);
 620		break;
 621	case KVM_CAP_S390_GS:
 622		r = test_facility(133);
 623		break;
 624	case KVM_CAP_S390_BPB:
 625		r = test_facility(82);
 626		break;
 627	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
 628		r = async_destroy && is_prot_virt_host();
 629		break;
 630	case KVM_CAP_S390_PROTECTED:
 631		r = is_prot_virt_host();
 632		break;
 633	case KVM_CAP_S390_PROTECTED_DUMP: {
 634		u64 pv_cmds_dump[] = {
 635			BIT_UVC_CMD_DUMP_INIT,
 636			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
 637			BIT_UVC_CMD_DUMP_CPU,
 638			BIT_UVC_CMD_DUMP_COMPLETE,
 639		};
 640		int i;
 641
 642		r = is_prot_virt_host();
 643
 644		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
 645			if (!test_bit_inv(pv_cmds_dump[i],
 646					  (unsigned long *)&uv_info.inst_calls_list)) {
 647				r = 0;
 648				break;
 649			}
 650		}
 651		break;
 652	}
 653	case KVM_CAP_S390_ZPCI_OP:
 654		r = kvm_s390_pci_interp_allowed();
 655		break;
 656	case KVM_CAP_S390_CPU_TOPOLOGY:
 657		r = test_facility(11);
 658		break;
 659	default:
 660		r = 0;
 661	}
 662	return r;
 663}
 664
 665void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 666{
 667	int i;
 668	gfn_t cur_gfn, last_gfn;
 669	unsigned long gaddr, vmaddr;
 670	struct gmap *gmap = kvm->arch.gmap;
 671	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 672
 673	/* Loop over all guest segments */
 674	cur_gfn = memslot->base_gfn;
 675	last_gfn = memslot->base_gfn + memslot->npages;
 676	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 677		gaddr = gfn_to_gpa(cur_gfn);
 678		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 679		if (kvm_is_error_hva(vmaddr))
 680			continue;
 681
 682		bitmap_zero(bitmap, _PAGE_ENTRIES);
 683		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 684		for (i = 0; i < _PAGE_ENTRIES; i++) {
 685			if (test_bit(i, bitmap))
 686				mark_page_dirty(kvm, cur_gfn + i);
 687		}
 688
 
 
 689		if (fatal_signal_pending(current))
 690			return;
 691		cond_resched();
 692	}
 693}
 694
 695/* Section: vm related */
 696static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 697
 698/*
 699 * Get (and clear) the dirty memory log for a memory slot.
 700 */
 701int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 702			       struct kvm_dirty_log *log)
 703{
 704	int r;
 705	unsigned long n;
 
 706	struct kvm_memory_slot *memslot;
 707	int is_dirty;
 708
 709	if (kvm_is_ucontrol(kvm))
 710		return -EINVAL;
 711
 712	mutex_lock(&kvm->slots_lock);
 713
 714	r = -EINVAL;
 715	if (log->slot >= KVM_USER_MEM_SLOTS)
 716		goto out;
 717
 718	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 
 
 
 
 
 
 
 719	if (r)
 720		goto out;
 721
 722	/* Clear the dirty log */
 723	if (is_dirty) {
 724		n = kvm_dirty_bitmap_bytes(memslot);
 725		memset(memslot->dirty_bitmap, 0, n);
 726	}
 727	r = 0;
 728out:
 729	mutex_unlock(&kvm->slots_lock);
 730	return r;
 731}
 732
 733static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 734{
 735	unsigned long i;
 736	struct kvm_vcpu *vcpu;
 737
 738	kvm_for_each_vcpu(i, vcpu, kvm) {
 739		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 740	}
 741}
 742
 743int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 744{
 745	int r;
 746
 747	if (cap->flags)
 748		return -EINVAL;
 749
 750	switch (cap->cap) {
 751	case KVM_CAP_S390_IRQCHIP:
 752		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 753		kvm->arch.use_irqchip = 1;
 754		r = 0;
 755		break;
 756	case KVM_CAP_S390_USER_SIGP:
 757		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 758		kvm->arch.user_sigp = 1;
 759		r = 0;
 760		break;
 761	case KVM_CAP_S390_VECTOR_REGISTERS:
 762		mutex_lock(&kvm->lock);
 763		if (kvm->created_vcpus) {
 764			r = -EBUSY;
 765		} else if (MACHINE_HAS_VX) {
 766			set_kvm_facility(kvm->arch.model.fac_mask, 129);
 767			set_kvm_facility(kvm->arch.model.fac_list, 129);
 768			if (test_facility(134)) {
 769				set_kvm_facility(kvm->arch.model.fac_mask, 134);
 770				set_kvm_facility(kvm->arch.model.fac_list, 134);
 771			}
 772			if (test_facility(135)) {
 773				set_kvm_facility(kvm->arch.model.fac_mask, 135);
 774				set_kvm_facility(kvm->arch.model.fac_list, 135);
 775			}
 776			if (test_facility(148)) {
 777				set_kvm_facility(kvm->arch.model.fac_mask, 148);
 778				set_kvm_facility(kvm->arch.model.fac_list, 148);
 779			}
 780			if (test_facility(152)) {
 781				set_kvm_facility(kvm->arch.model.fac_mask, 152);
 782				set_kvm_facility(kvm->arch.model.fac_list, 152);
 783			}
 784			if (test_facility(192)) {
 785				set_kvm_facility(kvm->arch.model.fac_mask, 192);
 786				set_kvm_facility(kvm->arch.model.fac_list, 192);
 787			}
 788			r = 0;
 789		} else
 790			r = -EINVAL;
 791		mutex_unlock(&kvm->lock);
 792		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 793			 r ? "(not available)" : "(success)");
 794		break;
 795	case KVM_CAP_S390_RI:
 796		r = -EINVAL;
 797		mutex_lock(&kvm->lock);
 798		if (kvm->created_vcpus) {
 799			r = -EBUSY;
 800		} else if (test_facility(64)) {
 801			set_kvm_facility(kvm->arch.model.fac_mask, 64);
 802			set_kvm_facility(kvm->arch.model.fac_list, 64);
 803			r = 0;
 804		}
 805		mutex_unlock(&kvm->lock);
 806		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 807			 r ? "(not available)" : "(success)");
 808		break;
 809	case KVM_CAP_S390_AIS:
 810		mutex_lock(&kvm->lock);
 811		if (kvm->created_vcpus) {
 812			r = -EBUSY;
 813		} else {
 814			set_kvm_facility(kvm->arch.model.fac_mask, 72);
 815			set_kvm_facility(kvm->arch.model.fac_list, 72);
 816			r = 0;
 817		}
 818		mutex_unlock(&kvm->lock);
 819		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 820			 r ? "(not available)" : "(success)");
 821		break;
 822	case KVM_CAP_S390_GS:
 823		r = -EINVAL;
 824		mutex_lock(&kvm->lock);
 825		if (kvm->created_vcpus) {
 826			r = -EBUSY;
 827		} else if (test_facility(133)) {
 828			set_kvm_facility(kvm->arch.model.fac_mask, 133);
 829			set_kvm_facility(kvm->arch.model.fac_list, 133);
 830			r = 0;
 831		}
 832		mutex_unlock(&kvm->lock);
 833		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 834			 r ? "(not available)" : "(success)");
 835		break;
 836	case KVM_CAP_S390_HPAGE_1M:
 837		mutex_lock(&kvm->lock);
 838		if (kvm->created_vcpus)
 839			r = -EBUSY;
 840		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 841			r = -EINVAL;
 842		else {
 843			r = 0;
 844			mmap_write_lock(kvm->mm);
 845			kvm->mm->context.allow_gmap_hpage_1m = 1;
 846			mmap_write_unlock(kvm->mm);
 847			/*
 848			 * We might have to create fake 4k page
 849			 * tables. To avoid that the hardware works on
 850			 * stale PGSTEs, we emulate these instructions.
 851			 */
 852			kvm->arch.use_skf = 0;
 853			kvm->arch.use_pfmfi = 0;
 854		}
 855		mutex_unlock(&kvm->lock);
 856		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 857			 r ? "(not available)" : "(success)");
 858		break;
 859	case KVM_CAP_S390_USER_STSI:
 860		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 861		kvm->arch.user_stsi = 1;
 862		r = 0;
 863		break;
 864	case KVM_CAP_S390_USER_INSTR0:
 865		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 866		kvm->arch.user_instr0 = 1;
 867		icpt_operexc_on_all_vcpus(kvm);
 868		r = 0;
 869		break;
 870	case KVM_CAP_S390_CPU_TOPOLOGY:
 871		r = -EINVAL;
 872		mutex_lock(&kvm->lock);
 873		if (kvm->created_vcpus) {
 874			r = -EBUSY;
 875		} else if (test_facility(11)) {
 876			set_kvm_facility(kvm->arch.model.fac_mask, 11);
 877			set_kvm_facility(kvm->arch.model.fac_list, 11);
 878			r = 0;
 879		}
 880		mutex_unlock(&kvm->lock);
 881		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
 882			 r ? "(not available)" : "(success)");
 883		break;
 884	default:
 885		r = -EINVAL;
 886		break;
 887	}
 888	return r;
 889}
 890
 891static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 892{
 893	int ret;
 894
 895	switch (attr->attr) {
 896	case KVM_S390_VM_MEM_LIMIT_SIZE:
 897		ret = 0;
 898		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 899			 kvm->arch.mem_limit);
 900		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 901			ret = -EFAULT;
 902		break;
 903	default:
 904		ret = -ENXIO;
 905		break;
 906	}
 907	return ret;
 908}
 909
 910static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 911{
 912	int ret;
 913	unsigned int idx;
 914	switch (attr->attr) {
 915	case KVM_S390_VM_MEM_ENABLE_CMMA:
 916		ret = -ENXIO;
 917		if (!sclp.has_cmma)
 918			break;
 919
 
 920		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 921		mutex_lock(&kvm->lock);
 922		if (kvm->created_vcpus)
 923			ret = -EBUSY;
 924		else if (kvm->mm->context.allow_gmap_hpage_1m)
 925			ret = -EINVAL;
 926		else {
 927			kvm->arch.use_cmma = 1;
 928			/* Not compatible with cmma. */
 929			kvm->arch.use_pfmfi = 0;
 930			ret = 0;
 931		}
 932		mutex_unlock(&kvm->lock);
 933		break;
 934	case KVM_S390_VM_MEM_CLR_CMMA:
 935		ret = -ENXIO;
 936		if (!sclp.has_cmma)
 937			break;
 938		ret = -EINVAL;
 939		if (!kvm->arch.use_cmma)
 940			break;
 941
 942		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 943		mutex_lock(&kvm->lock);
 944		idx = srcu_read_lock(&kvm->srcu);
 945		s390_reset_cmma(kvm->arch.gmap->mm);
 946		srcu_read_unlock(&kvm->srcu, idx);
 947		mutex_unlock(&kvm->lock);
 948		ret = 0;
 949		break;
 950	case KVM_S390_VM_MEM_LIMIT_SIZE: {
 951		unsigned long new_limit;
 952
 953		if (kvm_is_ucontrol(kvm))
 954			return -EINVAL;
 955
 956		if (get_user(new_limit, (u64 __user *)attr->addr))
 957			return -EFAULT;
 958
 959		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 960		    new_limit > kvm->arch.mem_limit)
 961			return -E2BIG;
 962
 963		if (!new_limit)
 964			return -EINVAL;
 965
 966		/* gmap_create takes last usable address */
 967		if (new_limit != KVM_S390_NO_MEM_LIMIT)
 968			new_limit -= 1;
 969
 970		ret = -EBUSY;
 971		mutex_lock(&kvm->lock);
 972		if (!kvm->created_vcpus) {
 973			/* gmap_create will round the limit up */
 974			struct gmap *new = gmap_create(current->mm, new_limit);
 975
 976			if (!new) {
 977				ret = -ENOMEM;
 978			} else {
 979				gmap_remove(kvm->arch.gmap);
 980				new->private = kvm;
 981				kvm->arch.gmap = new;
 982				ret = 0;
 983			}
 984		}
 985		mutex_unlock(&kvm->lock);
 986		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 987		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 988			 (void *) kvm->arch.gmap->asce);
 989		break;
 990	}
 991	default:
 992		ret = -ENXIO;
 993		break;
 994	}
 995	return ret;
 996}
 997
 998static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 999
1000void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1001{
1002	struct kvm_vcpu *vcpu;
1003	unsigned long i;
1004
1005	kvm_s390_vcpu_block_all(kvm);
1006
1007	kvm_for_each_vcpu(i, vcpu, kvm) {
1008		kvm_s390_vcpu_crypto_setup(vcpu);
1009		/* recreate the shadow crycb by leaving the VSIE handler */
1010		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1011	}
1012
1013	kvm_s390_vcpu_unblock_all(kvm);
1014}
1015
1016static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1017{
1018	mutex_lock(&kvm->lock);
1019	switch (attr->attr) {
1020	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1021		if (!test_kvm_facility(kvm, 76)) {
1022			mutex_unlock(&kvm->lock);
1023			return -EINVAL;
1024		}
1025		get_random_bytes(
1026			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1027			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1028		kvm->arch.crypto.aes_kw = 1;
1029		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1030		break;
1031	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1032		if (!test_kvm_facility(kvm, 76)) {
1033			mutex_unlock(&kvm->lock);
1034			return -EINVAL;
1035		}
1036		get_random_bytes(
1037			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1038			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1039		kvm->arch.crypto.dea_kw = 1;
1040		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1041		break;
1042	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1043		if (!test_kvm_facility(kvm, 76)) {
1044			mutex_unlock(&kvm->lock);
1045			return -EINVAL;
1046		}
1047		kvm->arch.crypto.aes_kw = 0;
1048		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1049			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1050		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1051		break;
1052	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1053		if (!test_kvm_facility(kvm, 76)) {
1054			mutex_unlock(&kvm->lock);
1055			return -EINVAL;
1056		}
1057		kvm->arch.crypto.dea_kw = 0;
1058		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1059			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1060		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1061		break;
1062	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1063		if (!ap_instructions_available()) {
1064			mutex_unlock(&kvm->lock);
1065			return -EOPNOTSUPP;
1066		}
1067		kvm->arch.crypto.apie = 1;
1068		break;
1069	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1070		if (!ap_instructions_available()) {
1071			mutex_unlock(&kvm->lock);
1072			return -EOPNOTSUPP;
1073		}
1074		kvm->arch.crypto.apie = 0;
1075		break;
1076	default:
1077		mutex_unlock(&kvm->lock);
1078		return -ENXIO;
1079	}
1080
1081	kvm_s390_vcpu_crypto_reset_all(kvm);
1082	mutex_unlock(&kvm->lock);
1083	return 0;
1084}
1085
1086static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1087{
1088	/* Only set the ECB bits after guest requests zPCI interpretation */
1089	if (!vcpu->kvm->arch.use_zpci_interp)
1090		return;
1091
1092	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1093	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1094}
1095
1096void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1097{
1098	struct kvm_vcpu *vcpu;
1099	unsigned long i;
1100
1101	lockdep_assert_held(&kvm->lock);
1102
1103	if (!kvm_s390_pci_interp_allowed())
1104		return;
1105
1106	/*
1107	 * If host is configured for PCI and the necessary facilities are
1108	 * available, turn on interpretation for the life of this guest
1109	 */
1110	kvm->arch.use_zpci_interp = 1;
1111
1112	kvm_s390_vcpu_block_all(kvm);
1113
1114	kvm_for_each_vcpu(i, vcpu, kvm) {
1115		kvm_s390_vcpu_pci_setup(vcpu);
1116		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1117	}
1118
1119	kvm_s390_vcpu_unblock_all(kvm);
1120}
1121
1122static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1123{
1124	unsigned long cx;
1125	struct kvm_vcpu *vcpu;
1126
1127	kvm_for_each_vcpu(cx, vcpu, kvm)
1128		kvm_s390_sync_request(req, vcpu);
1129}
1130
1131/*
1132 * Must be called with kvm->srcu held to avoid races on memslots, and with
1133 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1134 */
1135static int kvm_s390_vm_start_migration(struct kvm *kvm)
1136{
 
1137	struct kvm_memory_slot *ms;
 
1138	struct kvm_memslots *slots;
1139	unsigned long ram_pages = 0;
1140	int bkt;
1141
1142	/* migration mode already enabled */
1143	if (kvm->arch.migration_mode)
1144		return 0;
 
1145	slots = kvm_memslots(kvm);
1146	if (!slots || kvm_memslots_empty(slots))
1147		return -EINVAL;
1148
1149	if (!kvm->arch.use_cmma) {
1150		kvm->arch.migration_mode = 1;
1151		return 0;
1152	}
1153	/* mark all the pages in active slots as dirty */
1154	kvm_for_each_memslot(ms, bkt, slots) {
1155		if (!ms->dirty_bitmap)
1156			return -EINVAL;
1157		/*
1158		 * The second half of the bitmap is only used on x86,
1159		 * and would be wasted otherwise, so we put it to good
1160		 * use here to keep track of the state of the storage
1161		 * attributes.
1162		 */
1163		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1164		ram_pages += ms->npages;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165	}
1166	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1167	kvm->arch.migration_mode = 1;
1168	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1169	return 0;
1170}
1171
1172/*
1173 * Must be called with kvm->slots_lock to avoid races with ourselves and
1174 * kvm_s390_vm_start_migration.
1175 */
1176static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1177{
 
 
1178	/* migration mode already disabled */
1179	if (!kvm->arch.migration_mode)
1180		return 0;
1181	kvm->arch.migration_mode = 0;
1182	if (kvm->arch.use_cmma)
 
 
1183		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 
 
 
 
 
1184	return 0;
1185}
1186
1187static int kvm_s390_vm_set_migration(struct kvm *kvm,
1188				     struct kvm_device_attr *attr)
1189{
1190	int res = -ENXIO;
1191
1192	mutex_lock(&kvm->slots_lock);
1193	switch (attr->attr) {
1194	case KVM_S390_VM_MIGRATION_START:
1195		res = kvm_s390_vm_start_migration(kvm);
1196		break;
1197	case KVM_S390_VM_MIGRATION_STOP:
1198		res = kvm_s390_vm_stop_migration(kvm);
1199		break;
1200	default:
1201		break;
1202	}
1203	mutex_unlock(&kvm->slots_lock);
1204
1205	return res;
1206}
1207
1208static int kvm_s390_vm_get_migration(struct kvm *kvm,
1209				     struct kvm_device_attr *attr)
1210{
1211	u64 mig = kvm->arch.migration_mode;
1212
1213	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1214		return -ENXIO;
1215
1216	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1217		return -EFAULT;
1218	return 0;
1219}
1220
1221static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1222
1223static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1224{
1225	struct kvm_s390_vm_tod_clock gtod;
1226
1227	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1228		return -EFAULT;
1229
1230	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1231		return -EINVAL;
1232	__kvm_s390_set_tod_clock(kvm, &gtod);
1233
1234	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1235		gtod.epoch_idx, gtod.tod);
1236
1237	return 0;
1238}
1239
1240static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1241{
1242	u8 gtod_high;
1243
1244	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1245					   sizeof(gtod_high)))
1246		return -EFAULT;
1247
1248	if (gtod_high != 0)
1249		return -EINVAL;
1250	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1251
1252	return 0;
1253}
1254
1255static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1256{
1257	struct kvm_s390_vm_tod_clock gtod = { 0 };
1258
1259	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1260			   sizeof(gtod.tod)))
1261		return -EFAULT;
1262
1263	__kvm_s390_set_tod_clock(kvm, &gtod);
1264	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1265	return 0;
1266}
1267
1268static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1269{
1270	int ret;
1271
1272	if (attr->flags)
1273		return -EINVAL;
1274
1275	mutex_lock(&kvm->lock);
1276	/*
1277	 * For protected guests, the TOD is managed by the ultravisor, so trying
1278	 * to change it will never bring the expected results.
1279	 */
1280	if (kvm_s390_pv_is_protected(kvm)) {
1281		ret = -EOPNOTSUPP;
1282		goto out_unlock;
1283	}
1284
1285	switch (attr->attr) {
1286	case KVM_S390_VM_TOD_EXT:
1287		ret = kvm_s390_set_tod_ext(kvm, attr);
1288		break;
1289	case KVM_S390_VM_TOD_HIGH:
1290		ret = kvm_s390_set_tod_high(kvm, attr);
1291		break;
1292	case KVM_S390_VM_TOD_LOW:
1293		ret = kvm_s390_set_tod_low(kvm, attr);
1294		break;
1295	default:
1296		ret = -ENXIO;
1297		break;
1298	}
1299
1300out_unlock:
1301	mutex_unlock(&kvm->lock);
1302	return ret;
1303}
1304
1305static void kvm_s390_get_tod_clock(struct kvm *kvm,
1306				   struct kvm_s390_vm_tod_clock *gtod)
1307{
1308	union tod_clock clk;
1309
1310	preempt_disable();
1311
1312	store_tod_clock_ext(&clk);
1313
1314	gtod->tod = clk.tod + kvm->arch.epoch;
1315	gtod->epoch_idx = 0;
1316	if (test_kvm_facility(kvm, 139)) {
1317		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1318		if (gtod->tod < clk.tod)
1319			gtod->epoch_idx += 1;
1320	}
1321
1322	preempt_enable();
1323}
1324
1325static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1326{
1327	struct kvm_s390_vm_tod_clock gtod;
1328
1329	memset(&gtod, 0, sizeof(gtod));
1330	kvm_s390_get_tod_clock(kvm, &gtod);
 
 
 
 
 
1331	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1332		return -EFAULT;
1333
1334	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1335		gtod.epoch_idx, gtod.tod);
1336	return 0;
1337}
1338
1339static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1340{
1341	u8 gtod_high = 0;
1342
1343	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1344					 sizeof(gtod_high)))
1345		return -EFAULT;
1346	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1347
1348	return 0;
1349}
1350
1351static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1352{
1353	u64 gtod;
1354
1355	gtod = kvm_s390_get_tod_clock_fast(kvm);
1356	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1357		return -EFAULT;
1358	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1359
1360	return 0;
1361}
1362
1363static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1364{
1365	int ret;
1366
1367	if (attr->flags)
1368		return -EINVAL;
1369
1370	switch (attr->attr) {
1371	case KVM_S390_VM_TOD_EXT:
1372		ret = kvm_s390_get_tod_ext(kvm, attr);
1373		break;
1374	case KVM_S390_VM_TOD_HIGH:
1375		ret = kvm_s390_get_tod_high(kvm, attr);
1376		break;
1377	case KVM_S390_VM_TOD_LOW:
1378		ret = kvm_s390_get_tod_low(kvm, attr);
1379		break;
1380	default:
1381		ret = -ENXIO;
1382		break;
1383	}
1384	return ret;
1385}
1386
1387static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1388{
1389	struct kvm_s390_vm_cpu_processor *proc;
1390	u16 lowest_ibc, unblocked_ibc;
1391	int ret = 0;
1392
1393	mutex_lock(&kvm->lock);
1394	if (kvm->created_vcpus) {
1395		ret = -EBUSY;
1396		goto out;
1397	}
1398	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1399	if (!proc) {
1400		ret = -ENOMEM;
1401		goto out;
1402	}
1403	if (!copy_from_user(proc, (void __user *)attr->addr,
1404			    sizeof(*proc))) {
1405		kvm->arch.model.cpuid = proc->cpuid;
1406		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1407		unblocked_ibc = sclp.ibc & 0xfff;
1408		if (lowest_ibc && proc->ibc) {
1409			if (proc->ibc > unblocked_ibc)
1410				kvm->arch.model.ibc = unblocked_ibc;
1411			else if (proc->ibc < lowest_ibc)
1412				kvm->arch.model.ibc = lowest_ibc;
1413			else
1414				kvm->arch.model.ibc = proc->ibc;
1415		}
1416		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1417		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1418		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419			 kvm->arch.model.ibc,
1420			 kvm->arch.model.cpuid);
1421		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422			 kvm->arch.model.fac_list[0],
1423			 kvm->arch.model.fac_list[1],
1424			 kvm->arch.model.fac_list[2]);
1425	} else
1426		ret = -EFAULT;
1427	kfree(proc);
1428out:
1429	mutex_unlock(&kvm->lock);
1430	return ret;
1431}
1432
1433static int kvm_s390_set_processor_feat(struct kvm *kvm,
1434				       struct kvm_device_attr *attr)
1435{
1436	struct kvm_s390_vm_cpu_feat data;
1437
1438	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1439		return -EFAULT;
1440	if (!bitmap_subset((unsigned long *) data.feat,
1441			   kvm_s390_available_cpu_feat,
1442			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1443		return -EINVAL;
1444
1445	mutex_lock(&kvm->lock);
1446	if (kvm->created_vcpus) {
1447		mutex_unlock(&kvm->lock);
1448		return -EBUSY;
1449	}
1450	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 
1451	mutex_unlock(&kvm->lock);
1452	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1453			 data.feat[0],
1454			 data.feat[1],
1455			 data.feat[2]);
1456	return 0;
1457}
1458
1459static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1460					  struct kvm_device_attr *attr)
1461{
1462	mutex_lock(&kvm->lock);
1463	if (kvm->created_vcpus) {
1464		mutex_unlock(&kvm->lock);
1465		return -EBUSY;
1466	}
1467
1468	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1469			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1470		mutex_unlock(&kvm->lock);
1471		return -EFAULT;
1472	}
1473	mutex_unlock(&kvm->lock);
1474
1475	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1476		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1477		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1478		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1479		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1480	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1481		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1482		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1483	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1484		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1485		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1486	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1487		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1488		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1489	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1490		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1491		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1492	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1493		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1494		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1495	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1496		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1497		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1498	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1499		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1500		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1501	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1502		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1503		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1504	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1505		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1506		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1507	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1508		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1509		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1510	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1511		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1512		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1513	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1514		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1515		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1516	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1517		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1518		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1519	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1520		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1521		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1522	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1523		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1524		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1525		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1526		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1527	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1528		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1529		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1530		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1531		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1532
1533	return 0;
1534}
1535
1536static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1537{
1538	int ret = -ENXIO;
1539
1540	switch (attr->attr) {
1541	case KVM_S390_VM_CPU_PROCESSOR:
1542		ret = kvm_s390_set_processor(kvm, attr);
1543		break;
1544	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1545		ret = kvm_s390_set_processor_feat(kvm, attr);
1546		break;
1547	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1548		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1549		break;
1550	}
1551	return ret;
1552}
1553
1554static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1555{
1556	struct kvm_s390_vm_cpu_processor *proc;
1557	int ret = 0;
1558
1559	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1560	if (!proc) {
1561		ret = -ENOMEM;
1562		goto out;
1563	}
1564	proc->cpuid = kvm->arch.model.cpuid;
1565	proc->ibc = kvm->arch.model.ibc;
1566	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1567	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1568	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1569		 kvm->arch.model.ibc,
1570		 kvm->arch.model.cpuid);
1571	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1572		 kvm->arch.model.fac_list[0],
1573		 kvm->arch.model.fac_list[1],
1574		 kvm->arch.model.fac_list[2]);
1575	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1576		ret = -EFAULT;
1577	kfree(proc);
1578out:
1579	return ret;
1580}
1581
1582static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1583{
1584	struct kvm_s390_vm_cpu_machine *mach;
1585	int ret = 0;
1586
1587	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1588	if (!mach) {
1589		ret = -ENOMEM;
1590		goto out;
1591	}
1592	get_cpu_id((struct cpuid *) &mach->cpuid);
1593	mach->ibc = sclp.ibc;
1594	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1595	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1596	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1597	       sizeof(stfle_fac_list));
1598	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1599		 kvm->arch.model.ibc,
1600		 kvm->arch.model.cpuid);
1601	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1602		 mach->fac_mask[0],
1603		 mach->fac_mask[1],
1604		 mach->fac_mask[2]);
1605	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1606		 mach->fac_list[0],
1607		 mach->fac_list[1],
1608		 mach->fac_list[2]);
1609	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1610		ret = -EFAULT;
1611	kfree(mach);
1612out:
1613	return ret;
1614}
1615
1616static int kvm_s390_get_processor_feat(struct kvm *kvm,
1617				       struct kvm_device_attr *attr)
1618{
1619	struct kvm_s390_vm_cpu_feat data;
1620
1621	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 
1622	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1623		return -EFAULT;
1624	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1625			 data.feat[0],
1626			 data.feat[1],
1627			 data.feat[2]);
1628	return 0;
1629}
1630
1631static int kvm_s390_get_machine_feat(struct kvm *kvm,
1632				     struct kvm_device_attr *attr)
1633{
1634	struct kvm_s390_vm_cpu_feat data;
1635
1636	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 
 
1637	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1638		return -EFAULT;
1639	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1640			 data.feat[0],
1641			 data.feat[1],
1642			 data.feat[2]);
1643	return 0;
1644}
1645
1646static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1647					  struct kvm_device_attr *attr)
1648{
1649	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1650	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1651		return -EFAULT;
1652
1653	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1654		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1655		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1656		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1657		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1658	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1659		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1660		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1661	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1662		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1663		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1664	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1665		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1666		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1667	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1668		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1669		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1670	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1671		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1672		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1673	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1674		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1675		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1676	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1677		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1678		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1679	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1680		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1681		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1682	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1683		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1684		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1685	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1686		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1687		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1688	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1689		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1690		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1691	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1692		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1693		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1694	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1695		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1696		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1697	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1698		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1699		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1700	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1701		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1702		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1703		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1704		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1705	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1706		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1707		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1708		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1709		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1710
1711	return 0;
1712}
1713
1714static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1715					struct kvm_device_attr *attr)
1716{
1717	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1718	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1719		return -EFAULT;
1720
1721	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1722		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1723		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1724		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1725		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1726	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1727		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1728		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1729	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1730		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1731		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1732	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1733		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1734		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1735	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1736		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1737		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1738	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1739		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1740		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1741	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1742		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1743		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1744	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1745		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1746		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1747	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1748		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1749		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1750	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1751		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1752		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1753	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1754		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1755		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1756	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1757		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1758		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1759	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1760		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1761		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1762	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1763		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1764		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1765	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1766		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1767		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1768	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1769		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1770		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1771		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1772		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1773	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1774		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1775		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1776		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1777		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1778
1779	return 0;
1780}
1781
1782static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1783{
1784	int ret = -ENXIO;
1785
1786	switch (attr->attr) {
1787	case KVM_S390_VM_CPU_PROCESSOR:
1788		ret = kvm_s390_get_processor(kvm, attr);
1789		break;
1790	case KVM_S390_VM_CPU_MACHINE:
1791		ret = kvm_s390_get_machine(kvm, attr);
1792		break;
1793	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1794		ret = kvm_s390_get_processor_feat(kvm, attr);
1795		break;
1796	case KVM_S390_VM_CPU_MACHINE_FEAT:
1797		ret = kvm_s390_get_machine_feat(kvm, attr);
1798		break;
1799	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1800		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1801		break;
1802	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1803		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1804		break;
1805	}
1806	return ret;
1807}
1808
1809/**
1810 * kvm_s390_update_topology_change_report - update CPU topology change report
1811 * @kvm: guest KVM description
1812 * @val: set or clear the MTCR bit
1813 *
1814 * Updates the Multiprocessor Topology-Change-Report bit to signal
1815 * the guest with a topology change.
1816 * This is only relevant if the topology facility is present.
1817 *
1818 * The SCA version, bsca or esca, doesn't matter as offset is the same.
1819 */
1820static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1821{
1822	union sca_utility new, old;
1823	struct bsca_block *sca;
1824
1825	read_lock(&kvm->arch.sca_lock);
1826	sca = kvm->arch.sca;
1827	do {
1828		old = READ_ONCE(sca->utility);
1829		new = old;
1830		new.mtcr = val;
1831	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1832	read_unlock(&kvm->arch.sca_lock);
1833}
1834
1835static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1836					       struct kvm_device_attr *attr)
1837{
1838	if (!test_kvm_facility(kvm, 11))
1839		return -ENXIO;
1840
1841	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1842	return 0;
1843}
1844
1845static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1846					       struct kvm_device_attr *attr)
1847{
1848	u8 topo;
1849
1850	if (!test_kvm_facility(kvm, 11))
1851		return -ENXIO;
1852
1853	read_lock(&kvm->arch.sca_lock);
1854	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1855	read_unlock(&kvm->arch.sca_lock);
1856
1857	return put_user(topo, (u8 __user *)attr->addr);
1858}
1859
1860static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1861{
1862	int ret;
1863
1864	switch (attr->group) {
1865	case KVM_S390_VM_MEM_CTRL:
1866		ret = kvm_s390_set_mem_control(kvm, attr);
1867		break;
1868	case KVM_S390_VM_TOD:
1869		ret = kvm_s390_set_tod(kvm, attr);
1870		break;
1871	case KVM_S390_VM_CPU_MODEL:
1872		ret = kvm_s390_set_cpu_model(kvm, attr);
1873		break;
1874	case KVM_S390_VM_CRYPTO:
1875		ret = kvm_s390_vm_set_crypto(kvm, attr);
1876		break;
1877	case KVM_S390_VM_MIGRATION:
1878		ret = kvm_s390_vm_set_migration(kvm, attr);
1879		break;
1880	case KVM_S390_VM_CPU_TOPOLOGY:
1881		ret = kvm_s390_set_topo_change_indication(kvm, attr);
1882		break;
1883	default:
1884		ret = -ENXIO;
1885		break;
1886	}
1887
1888	return ret;
1889}
1890
1891static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1892{
1893	int ret;
1894
1895	switch (attr->group) {
1896	case KVM_S390_VM_MEM_CTRL:
1897		ret = kvm_s390_get_mem_control(kvm, attr);
1898		break;
1899	case KVM_S390_VM_TOD:
1900		ret = kvm_s390_get_tod(kvm, attr);
1901		break;
1902	case KVM_S390_VM_CPU_MODEL:
1903		ret = kvm_s390_get_cpu_model(kvm, attr);
1904		break;
1905	case KVM_S390_VM_MIGRATION:
1906		ret = kvm_s390_vm_get_migration(kvm, attr);
1907		break;
1908	case KVM_S390_VM_CPU_TOPOLOGY:
1909		ret = kvm_s390_get_topo_change_indication(kvm, attr);
1910		break;
1911	default:
1912		ret = -ENXIO;
1913		break;
1914	}
1915
1916	return ret;
1917}
1918
1919static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1920{
1921	int ret;
1922
1923	switch (attr->group) {
1924	case KVM_S390_VM_MEM_CTRL:
1925		switch (attr->attr) {
1926		case KVM_S390_VM_MEM_ENABLE_CMMA:
1927		case KVM_S390_VM_MEM_CLR_CMMA:
1928			ret = sclp.has_cmma ? 0 : -ENXIO;
1929			break;
1930		case KVM_S390_VM_MEM_LIMIT_SIZE:
1931			ret = 0;
1932			break;
1933		default:
1934			ret = -ENXIO;
1935			break;
1936		}
1937		break;
1938	case KVM_S390_VM_TOD:
1939		switch (attr->attr) {
1940		case KVM_S390_VM_TOD_LOW:
1941		case KVM_S390_VM_TOD_HIGH:
1942			ret = 0;
1943			break;
1944		default:
1945			ret = -ENXIO;
1946			break;
1947		}
1948		break;
1949	case KVM_S390_VM_CPU_MODEL:
1950		switch (attr->attr) {
1951		case KVM_S390_VM_CPU_PROCESSOR:
1952		case KVM_S390_VM_CPU_MACHINE:
1953		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1954		case KVM_S390_VM_CPU_MACHINE_FEAT:
1955		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1956		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1957			ret = 0;
1958			break;
 
 
1959		default:
1960			ret = -ENXIO;
1961			break;
1962		}
1963		break;
1964	case KVM_S390_VM_CRYPTO:
1965		switch (attr->attr) {
1966		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1967		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1968		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1969		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1970			ret = 0;
1971			break;
1972		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1973		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1974			ret = ap_instructions_available() ? 0 : -ENXIO;
1975			break;
1976		default:
1977			ret = -ENXIO;
1978			break;
1979		}
1980		break;
1981	case KVM_S390_VM_MIGRATION:
1982		ret = 0;
1983		break;
1984	case KVM_S390_VM_CPU_TOPOLOGY:
1985		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1986		break;
1987	default:
1988		ret = -ENXIO;
1989		break;
1990	}
1991
1992	return ret;
1993}
1994
1995static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1996{
1997	uint8_t *keys;
1998	uint64_t hva;
1999	int srcu_idx, i, r = 0;
2000
2001	if (args->flags != 0)
2002		return -EINVAL;
2003
2004	/* Is this guest using storage keys? */
2005	if (!mm_uses_skeys(current->mm))
2006		return KVM_S390_GET_SKEYS_NONE;
2007
2008	/* Enforce sane limit on memory allocation */
2009	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2010		return -EINVAL;
2011
2012	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2013	if (!keys)
2014		return -ENOMEM;
2015
2016	mmap_read_lock(current->mm);
2017	srcu_idx = srcu_read_lock(&kvm->srcu);
2018	for (i = 0; i < args->count; i++) {
2019		hva = gfn_to_hva(kvm, args->start_gfn + i);
2020		if (kvm_is_error_hva(hva)) {
2021			r = -EFAULT;
2022			break;
2023		}
2024
2025		r = get_guest_storage_key(current->mm, hva, &keys[i]);
2026		if (r)
2027			break;
2028	}
2029	srcu_read_unlock(&kvm->srcu, srcu_idx);
2030	mmap_read_unlock(current->mm);
2031
2032	if (!r) {
2033		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2034				 sizeof(uint8_t) * args->count);
2035		if (r)
2036			r = -EFAULT;
2037	}
2038
2039	kvfree(keys);
2040	return r;
2041}
2042
2043static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2044{
2045	uint8_t *keys;
2046	uint64_t hva;
2047	int srcu_idx, i, r = 0;
2048	bool unlocked;
2049
2050	if (args->flags != 0)
2051		return -EINVAL;
2052
2053	/* Enforce sane limit on memory allocation */
2054	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2055		return -EINVAL;
2056
2057	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2058	if (!keys)
2059		return -ENOMEM;
2060
2061	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2062			   sizeof(uint8_t) * args->count);
2063	if (r) {
2064		r = -EFAULT;
2065		goto out;
2066	}
2067
2068	/* Enable storage key handling for the guest */
2069	r = s390_enable_skey();
2070	if (r)
2071		goto out;
2072
2073	i = 0;
2074	mmap_read_lock(current->mm);
2075	srcu_idx = srcu_read_lock(&kvm->srcu);
2076        while (i < args->count) {
2077		unlocked = false;
2078		hva = gfn_to_hva(kvm, args->start_gfn + i);
2079		if (kvm_is_error_hva(hva)) {
2080			r = -EFAULT;
2081			break;
2082		}
2083
2084		/* Lowest order bit is reserved */
2085		if (keys[i] & 0x01) {
2086			r = -EINVAL;
2087			break;
2088		}
2089
2090		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2091		if (r) {
2092			r = fixup_user_fault(current->mm, hva,
2093					     FAULT_FLAG_WRITE, &unlocked);
2094			if (r)
2095				break;
2096		}
2097		if (!r)
2098			i++;
2099	}
2100	srcu_read_unlock(&kvm->srcu, srcu_idx);
2101	mmap_read_unlock(current->mm);
2102out:
2103	kvfree(keys);
2104	return r;
2105}
2106
2107/*
2108 * Base address and length must be sent at the start of each block, therefore
2109 * it's cheaper to send some clean data, as long as it's less than the size of
2110 * two longs.
2111 */
2112#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2113/* for consistency */
2114#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2115
2116static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2117			      u8 *res, unsigned long bufsize)
2118{
2119	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2120
2121	args->count = 0;
2122	while (args->count < bufsize) {
2123		hva = gfn_to_hva(kvm, cur_gfn);
2124		/*
2125		 * We return an error if the first value was invalid, but we
2126		 * return successfully if at least one value was copied.
2127		 */
2128		if (kvm_is_error_hva(hva))
2129			return args->count ? 0 : -EFAULT;
2130		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2131			pgstev = 0;
2132		res[args->count++] = (pgstev >> 24) & 0x43;
2133		cur_gfn++;
2134	}
2135
2136	return 0;
2137}
2138
2139static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2140						     gfn_t gfn)
2141{
2142	return ____gfn_to_memslot(slots, gfn, true);
2143}
2144
2145static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2146					      unsigned long cur_gfn)
2147{
2148	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2149	unsigned long ofs = cur_gfn - ms->base_gfn;
2150	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2151
2152	if (ms->base_gfn + ms->npages <= cur_gfn) {
2153		mnode = rb_next(mnode);
2154		/* If we are above the highest slot, wrap around */
2155		if (!mnode)
2156			mnode = rb_first(&slots->gfn_tree);
2157
2158		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2159		ofs = 0;
2160	}
2161	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2162	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2163		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2164		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2165	}
2166	return ms->base_gfn + ofs;
2167}
2168
2169static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2170			     u8 *res, unsigned long bufsize)
2171{
2172	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2173	struct kvm_memslots *slots = kvm_memslots(kvm);
2174	struct kvm_memory_slot *ms;
2175
2176	if (unlikely(kvm_memslots_empty(slots)))
2177		return 0;
2178
2179	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2180	ms = gfn_to_memslot(kvm, cur_gfn);
2181	args->count = 0;
2182	args->start_gfn = cur_gfn;
2183	if (!ms)
2184		return 0;
2185	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2186	mem_end = kvm_s390_get_gfn_end(slots);
2187
2188	while (args->count < bufsize) {
2189		hva = gfn_to_hva(kvm, cur_gfn);
2190		if (kvm_is_error_hva(hva))
2191			return 0;
2192		/* Decrement only if we actually flipped the bit to 0 */
2193		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2194			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2195		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2196			pgstev = 0;
2197		/* Save the value */
2198		res[args->count++] = (pgstev >> 24) & 0x43;
2199		/* If the next bit is too far away, stop. */
2200		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2201			return 0;
2202		/* If we reached the previous "next", find the next one */
2203		if (cur_gfn == next_gfn)
2204			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2205		/* Reached the end of memory or of the buffer, stop */
2206		if ((next_gfn >= mem_end) ||
2207		    (next_gfn - args->start_gfn >= bufsize))
2208			return 0;
2209		cur_gfn++;
2210		/* Reached the end of the current memslot, take the next one. */
2211		if (cur_gfn - ms->base_gfn >= ms->npages) {
2212			ms = gfn_to_memslot(kvm, cur_gfn);
2213			if (!ms)
2214				return 0;
2215		}
2216	}
2217	return 0;
2218}
2219
2220/*
2221 * This function searches for the next page with dirty CMMA attributes, and
2222 * saves the attributes in the buffer up to either the end of the buffer or
2223 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2224 * no trailing clean bytes are saved.
2225 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2226 * output buffer will indicate 0 as length.
2227 */
2228static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2229				  struct kvm_s390_cmma_log *args)
2230{
2231	unsigned long bufsize;
2232	int srcu_idx, peek, ret;
2233	u8 *values;
 
2234
2235	if (!kvm->arch.use_cmma)
 
 
 
2236		return -ENXIO;
2237	/* Invalid/unsupported flags were specified */
2238	if (args->flags & ~KVM_S390_CMMA_PEEK)
2239		return -EINVAL;
2240	/* Migration mode query, and we are not doing a migration */
2241	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2242	if (!peek && !kvm->arch.migration_mode)
2243		return -EINVAL;
2244	/* CMMA is disabled or was not used, or the buffer has length zero */
2245	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2246	if (!bufsize || !kvm->mm->context.uses_cmm) {
2247		memset(args, 0, sizeof(*args));
2248		return 0;
2249	}
2250	/* We are not peeking, and there are no dirty pages */
2251	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2252		memset(args, 0, sizeof(*args));
2253		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
2254	}
2255
2256	values = vmalloc(bufsize);
2257	if (!values)
2258		return -ENOMEM;
2259
2260	mmap_read_lock(kvm->mm);
 
 
2261	srcu_idx = srcu_read_lock(&kvm->srcu);
2262	if (peek)
2263		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2264	else
2265		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2266	srcu_read_unlock(&kvm->srcu, srcu_idx);
2267	mmap_read_unlock(kvm->mm);
2268
2269	if (kvm->arch.migration_mode)
2270		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2271	else
2272		args->remaining = 0;
2273
2274	if (copy_to_user((void __user *)args->values, values, args->count))
2275		ret = -EFAULT;
 
2276
2277	vfree(values);
2278	return ret;
2279}
2280
2281/*
2282 * This function sets the CMMA attributes for the given pages. If the input
2283 * buffer has zero length, no action is taken, otherwise the attributes are
2284 * set and the mm->context.uses_cmm flag is set.
2285 */
2286static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2287				  const struct kvm_s390_cmma_log *args)
2288{
2289	unsigned long hva, mask, pgstev, i;
2290	uint8_t *bits;
2291	int srcu_idx, r = 0;
2292
2293	mask = args->mask;
2294
2295	if (!kvm->arch.use_cmma)
2296		return -ENXIO;
2297	/* invalid/unsupported flags */
2298	if (args->flags != 0)
2299		return -EINVAL;
2300	/* Enforce sane limit on memory allocation */
2301	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2302		return -EINVAL;
2303	/* Nothing to do */
2304	if (args->count == 0)
2305		return 0;
2306
2307	bits = vmalloc(array_size(sizeof(*bits), args->count));
2308	if (!bits)
2309		return -ENOMEM;
2310
2311	r = copy_from_user(bits, (void __user *)args->values, args->count);
2312	if (r) {
2313		r = -EFAULT;
2314		goto out;
2315	}
2316
2317	mmap_read_lock(kvm->mm);
2318	srcu_idx = srcu_read_lock(&kvm->srcu);
2319	for (i = 0; i < args->count; i++) {
2320		hva = gfn_to_hva(kvm, args->start_gfn + i);
2321		if (kvm_is_error_hva(hva)) {
2322			r = -EFAULT;
2323			break;
2324		}
2325
2326		pgstev = bits[i];
2327		pgstev = pgstev << 24;
2328		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2329		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2330	}
2331	srcu_read_unlock(&kvm->srcu, srcu_idx);
2332	mmap_read_unlock(kvm->mm);
2333
2334	if (!kvm->mm->context.uses_cmm) {
2335		mmap_write_lock(kvm->mm);
2336		kvm->mm->context.uses_cmm = 1;
2337		mmap_write_unlock(kvm->mm);
2338	}
2339out:
2340	vfree(bits);
2341	return r;
2342}
2343
2344/**
2345 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2346 * non protected.
2347 * @kvm: the VM whose protected vCPUs are to be converted
2348 * @rc: return value for the RC field of the UVC (in case of error)
2349 * @rrc: return value for the RRC field of the UVC (in case of error)
2350 *
2351 * Does not stop in case of error, tries to convert as many
2352 * CPUs as possible. In case of error, the RC and RRC of the last error are
2353 * returned.
2354 *
2355 * Return: 0 in case of success, otherwise -EIO
2356 */
2357int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2358{
2359	struct kvm_vcpu *vcpu;
2360	unsigned long i;
2361	u16 _rc, _rrc;
2362	int ret = 0;
2363
2364	/*
2365	 * We ignore failures and try to destroy as many CPUs as possible.
2366	 * At the same time we must not free the assigned resources when
2367	 * this fails, as the ultravisor has still access to that memory.
2368	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2369	 * behind.
2370	 * We want to return the first failure rc and rrc, though.
2371	 */
2372	kvm_for_each_vcpu(i, vcpu, kvm) {
2373		mutex_lock(&vcpu->mutex);
2374		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2375			*rc = _rc;
2376			*rrc = _rrc;
2377			ret = -EIO;
2378		}
2379		mutex_unlock(&vcpu->mutex);
2380	}
2381	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2382	if (use_gisa)
2383		kvm_s390_gisa_enable(kvm);
2384	return ret;
2385}
2386
2387/**
2388 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2389 * to protected.
2390 * @kvm: the VM whose protected vCPUs are to be converted
2391 * @rc: return value for the RC field of the UVC (in case of error)
2392 * @rrc: return value for the RRC field of the UVC (in case of error)
2393 *
2394 * Tries to undo the conversion in case of error.
2395 *
2396 * Return: 0 in case of success, otherwise -EIO
2397 */
2398static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2399{
2400	unsigned long i;
2401	int r = 0;
2402	u16 dummy;
2403
2404	struct kvm_vcpu *vcpu;
2405
2406	/* Disable the GISA if the ultravisor does not support AIV. */
2407	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2408		kvm_s390_gisa_disable(kvm);
2409
2410	kvm_for_each_vcpu(i, vcpu, kvm) {
2411		mutex_lock(&vcpu->mutex);
2412		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2413		mutex_unlock(&vcpu->mutex);
2414		if (r)
2415			break;
2416	}
2417	if (r)
2418		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2419	return r;
2420}
2421
2422/*
2423 * Here we provide user space with a direct interface to query UV
2424 * related data like UV maxima and available features as well as
2425 * feature specific data.
2426 *
2427 * To facilitate future extension of the data structures we'll try to
2428 * write data up to the maximum requested length.
2429 */
2430static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2431{
2432	ssize_t len_min;
2433
2434	switch (info->header.id) {
2435	case KVM_PV_INFO_VM: {
2436		len_min =  sizeof(info->header) + sizeof(info->vm);
2437
2438		if (info->header.len_max < len_min)
2439			return -EINVAL;
2440
2441		memcpy(info->vm.inst_calls_list,
2442		       uv_info.inst_calls_list,
2443		       sizeof(uv_info.inst_calls_list));
2444
2445		/* It's max cpuid not max cpus, so it's off by one */
2446		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2447		info->vm.max_guests = uv_info.max_num_sec_conf;
2448		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2449		info->vm.feature_indication = uv_info.uv_feature_indications;
2450
2451		return len_min;
2452	}
2453	case KVM_PV_INFO_DUMP: {
2454		len_min =  sizeof(info->header) + sizeof(info->dump);
2455
2456		if (info->header.len_max < len_min)
2457			return -EINVAL;
2458
2459		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2460		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2461		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2462		return len_min;
2463	}
2464	default:
2465		return -EINVAL;
2466	}
2467}
2468
2469static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2470			   struct kvm_s390_pv_dmp dmp)
2471{
2472	int r = -EINVAL;
2473	void __user *result_buff = (void __user *)dmp.buff_addr;
2474
2475	switch (dmp.subcmd) {
2476	case KVM_PV_DUMP_INIT: {
2477		if (kvm->arch.pv.dumping)
2478			break;
2479
2480		/*
2481		 * Block SIE entry as concurrent dump UVCs could lead
2482		 * to validities.
2483		 */
2484		kvm_s390_vcpu_block_all(kvm);
2485
2486		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2487				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2488		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2489			     cmd->rc, cmd->rrc);
2490		if (!r) {
2491			kvm->arch.pv.dumping = true;
2492		} else {
2493			kvm_s390_vcpu_unblock_all(kvm);
2494			r = -EINVAL;
2495		}
2496		break;
2497	}
2498	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2499		if (!kvm->arch.pv.dumping)
2500			break;
2501
2502		/*
2503		 * gaddr is an output parameter since we might stop
2504		 * early. As dmp will be copied back in our caller, we
2505		 * don't need to do it ourselves.
2506		 */
2507		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2508						&cmd->rc, &cmd->rrc);
2509		break;
2510	}
2511	case KVM_PV_DUMP_COMPLETE: {
2512		if (!kvm->arch.pv.dumping)
2513			break;
2514
2515		r = -EINVAL;
2516		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2517			break;
2518
2519		r = kvm_s390_pv_dump_complete(kvm, result_buff,
2520					      &cmd->rc, &cmd->rrc);
2521		break;
2522	}
2523	default:
2524		r = -ENOTTY;
2525		break;
2526	}
2527
2528	return r;
2529}
2530
2531static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2532{
2533	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2534	void __user *argp = (void __user *)cmd->data;
2535	int r = 0;
2536	u16 dummy;
2537
2538	if (need_lock)
2539		mutex_lock(&kvm->lock);
2540
2541	switch (cmd->cmd) {
2542	case KVM_PV_ENABLE: {
2543		r = -EINVAL;
2544		if (kvm_s390_pv_is_protected(kvm))
2545			break;
2546
2547		/*
2548		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2549		 *  esca, we need no cleanup in the error cases below
2550		 */
2551		r = sca_switch_to_extended(kvm);
2552		if (r)
2553			break;
2554
2555		mmap_write_lock(current->mm);
2556		r = gmap_mark_unmergeable();
2557		mmap_write_unlock(current->mm);
2558		if (r)
2559			break;
2560
2561		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2562		if (r)
2563			break;
2564
2565		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2566		if (r)
2567			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2568
2569		/* we need to block service interrupts from now on */
2570		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2571		break;
2572	}
2573	case KVM_PV_ASYNC_CLEANUP_PREPARE:
2574		r = -EINVAL;
2575		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2576			break;
2577
2578		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2579		/*
2580		 * If a CPU could not be destroyed, destroy VM will also fail.
2581		 * There is no point in trying to destroy it. Instead return
2582		 * the rc and rrc from the first CPU that failed destroying.
2583		 */
2584		if (r)
2585			break;
2586		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2587
2588		/* no need to block service interrupts any more */
2589		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2590		break;
2591	case KVM_PV_ASYNC_CLEANUP_PERFORM:
2592		r = -EINVAL;
2593		if (!async_destroy)
2594			break;
2595		/* kvm->lock must not be held; this is asserted inside the function. */
2596		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2597		break;
2598	case KVM_PV_DISABLE: {
2599		r = -EINVAL;
2600		if (!kvm_s390_pv_is_protected(kvm))
2601			break;
2602
2603		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2604		/*
2605		 * If a CPU could not be destroyed, destroy VM will also fail.
2606		 * There is no point in trying to destroy it. Instead return
2607		 * the rc and rrc from the first CPU that failed destroying.
2608		 */
2609		if (r)
2610			break;
2611		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2612
2613		/* no need to block service interrupts any more */
2614		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2615		break;
2616	}
2617	case KVM_PV_SET_SEC_PARMS: {
2618		struct kvm_s390_pv_sec_parm parms = {};
2619		void *hdr;
2620
2621		r = -EINVAL;
2622		if (!kvm_s390_pv_is_protected(kvm))
2623			break;
2624
2625		r = -EFAULT;
2626		if (copy_from_user(&parms, argp, sizeof(parms)))
2627			break;
2628
2629		/* Currently restricted to 8KB */
2630		r = -EINVAL;
2631		if (parms.length > PAGE_SIZE * 2)
2632			break;
2633
2634		r = -ENOMEM;
2635		hdr = vmalloc(parms.length);
2636		if (!hdr)
2637			break;
2638
2639		r = -EFAULT;
2640		if (!copy_from_user(hdr, (void __user *)parms.origin,
2641				    parms.length))
2642			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2643						      &cmd->rc, &cmd->rrc);
2644
2645		vfree(hdr);
2646		break;
2647	}
2648	case KVM_PV_UNPACK: {
2649		struct kvm_s390_pv_unp unp = {};
2650
2651		r = -EINVAL;
2652		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2653			break;
2654
2655		r = -EFAULT;
2656		if (copy_from_user(&unp, argp, sizeof(unp)))
2657			break;
2658
2659		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2660				       &cmd->rc, &cmd->rrc);
2661		break;
2662	}
2663	case KVM_PV_VERIFY: {
2664		r = -EINVAL;
2665		if (!kvm_s390_pv_is_protected(kvm))
2666			break;
2667
2668		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2669				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2670		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2671			     cmd->rrc);
2672		break;
2673	}
2674	case KVM_PV_PREP_RESET: {
2675		r = -EINVAL;
2676		if (!kvm_s390_pv_is_protected(kvm))
2677			break;
2678
2679		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2680				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2681		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2682			     cmd->rc, cmd->rrc);
2683		break;
2684	}
2685	case KVM_PV_UNSHARE_ALL: {
2686		r = -EINVAL;
2687		if (!kvm_s390_pv_is_protected(kvm))
2688			break;
2689
2690		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2691				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2692		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2693			     cmd->rc, cmd->rrc);
2694		break;
2695	}
2696	case KVM_PV_INFO: {
2697		struct kvm_s390_pv_info info = {};
2698		ssize_t data_len;
2699
2700		/*
2701		 * No need to check the VM protection here.
2702		 *
2703		 * Maybe user space wants to query some of the data
2704		 * when the VM is still unprotected. If we see the
2705		 * need to fence a new data command we can still
2706		 * return an error in the info handler.
2707		 */
2708
2709		r = -EFAULT;
2710		if (copy_from_user(&info, argp, sizeof(info.header)))
2711			break;
2712
2713		r = -EINVAL;
2714		if (info.header.len_max < sizeof(info.header))
2715			break;
2716
2717		data_len = kvm_s390_handle_pv_info(&info);
2718		if (data_len < 0) {
2719			r = data_len;
2720			break;
2721		}
2722		/*
2723		 * If a data command struct is extended (multiple
2724		 * times) this can be used to determine how much of it
2725		 * is valid.
2726		 */
2727		info.header.len_written = data_len;
2728
2729		r = -EFAULT;
2730		if (copy_to_user(argp, &info, data_len))
2731			break;
2732
2733		r = 0;
2734		break;
2735	}
2736	case KVM_PV_DUMP: {
2737		struct kvm_s390_pv_dmp dmp;
2738
2739		r = -EINVAL;
2740		if (!kvm_s390_pv_is_protected(kvm))
2741			break;
2742
2743		r = -EFAULT;
2744		if (copy_from_user(&dmp, argp, sizeof(dmp)))
2745			break;
2746
2747		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2748		if (r)
2749			break;
2750
2751		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2752			r = -EFAULT;
2753			break;
2754		}
2755
2756		break;
2757	}
2758	default:
2759		r = -ENOTTY;
2760	}
2761	if (need_lock)
2762		mutex_unlock(&kvm->lock);
2763
2764	return r;
2765}
2766
2767static bool access_key_invalid(u8 access_key)
2768{
2769	return access_key > 0xf;
2770}
2771
2772static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2773{
2774	void __user *uaddr = (void __user *)mop->buf;
2775	u64 supported_flags;
2776	void *tmpbuf = NULL;
2777	int r, srcu_idx;
2778
2779	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2780			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2781	if (mop->flags & ~supported_flags || !mop->size)
2782		return -EINVAL;
2783	if (mop->size > MEM_OP_MAX_SIZE)
2784		return -E2BIG;
2785	/*
2786	 * This is technically a heuristic only, if the kvm->lock is not
2787	 * taken, it is not guaranteed that the vm is/remains non-protected.
2788	 * This is ok from a kernel perspective, wrongdoing is detected
2789	 * on the access, -EFAULT is returned and the vm may crash the
2790	 * next time it accesses the memory in question.
2791	 * There is no sane usecase to do switching and a memop on two
2792	 * different CPUs at the same time.
2793	 */
2794	if (kvm_s390_pv_get_handle(kvm))
2795		return -EINVAL;
2796	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2797		if (access_key_invalid(mop->key))
2798			return -EINVAL;
2799	} else {
2800		mop->key = 0;
2801	}
2802	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2803		tmpbuf = vmalloc(mop->size);
2804		if (!tmpbuf)
2805			return -ENOMEM;
2806	}
2807
2808	srcu_idx = srcu_read_lock(&kvm->srcu);
2809
2810	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2811		r = PGM_ADDRESSING;
2812		goto out_unlock;
2813	}
2814
2815	switch (mop->op) {
2816	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2817		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2818			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2819		} else {
2820			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2821						      mop->size, GACC_FETCH, mop->key);
2822			if (r == 0) {
2823				if (copy_to_user(uaddr, tmpbuf, mop->size))
2824					r = -EFAULT;
2825			}
2826		}
2827		break;
2828	}
2829	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2830		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2831			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2832		} else {
2833			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2834				r = -EFAULT;
2835				break;
2836			}
2837			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2838						      mop->size, GACC_STORE, mop->key);
2839		}
2840		break;
2841	}
2842	default:
2843		r = -EINVAL;
2844	}
2845
2846out_unlock:
2847	srcu_read_unlock(&kvm->srcu, srcu_idx);
2848
2849	vfree(tmpbuf);
2850	return r;
2851}
2852
2853long kvm_arch_vm_ioctl(struct file *filp,
2854		       unsigned int ioctl, unsigned long arg)
2855{
2856	struct kvm *kvm = filp->private_data;
2857	void __user *argp = (void __user *)arg;
2858	struct kvm_device_attr attr;
2859	int r;
2860
2861	switch (ioctl) {
2862	case KVM_S390_INTERRUPT: {
2863		struct kvm_s390_interrupt s390int;
2864
2865		r = -EFAULT;
2866		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2867			break;
2868		r = kvm_s390_inject_vm(kvm, &s390int);
2869		break;
2870	}
 
 
 
 
 
 
 
 
2871	case KVM_CREATE_IRQCHIP: {
2872		struct kvm_irq_routing_entry routing;
2873
2874		r = -EINVAL;
2875		if (kvm->arch.use_irqchip) {
2876			/* Set up dummy routing. */
2877			memset(&routing, 0, sizeof(routing));
2878			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2879		}
2880		break;
2881	}
2882	case KVM_SET_DEVICE_ATTR: {
2883		r = -EFAULT;
2884		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2885			break;
2886		r = kvm_s390_vm_set_attr(kvm, &attr);
2887		break;
2888	}
2889	case KVM_GET_DEVICE_ATTR: {
2890		r = -EFAULT;
2891		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2892			break;
2893		r = kvm_s390_vm_get_attr(kvm, &attr);
2894		break;
2895	}
2896	case KVM_HAS_DEVICE_ATTR: {
2897		r = -EFAULT;
2898		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2899			break;
2900		r = kvm_s390_vm_has_attr(kvm, &attr);
2901		break;
2902	}
2903	case KVM_S390_GET_SKEYS: {
2904		struct kvm_s390_skeys args;
2905
2906		r = -EFAULT;
2907		if (copy_from_user(&args, argp,
2908				   sizeof(struct kvm_s390_skeys)))
2909			break;
2910		r = kvm_s390_get_skeys(kvm, &args);
2911		break;
2912	}
2913	case KVM_S390_SET_SKEYS: {
2914		struct kvm_s390_skeys args;
2915
2916		r = -EFAULT;
2917		if (copy_from_user(&args, argp,
2918				   sizeof(struct kvm_s390_skeys)))
2919			break;
2920		r = kvm_s390_set_skeys(kvm, &args);
2921		break;
2922	}
2923	case KVM_S390_GET_CMMA_BITS: {
2924		struct kvm_s390_cmma_log args;
2925
2926		r = -EFAULT;
2927		if (copy_from_user(&args, argp, sizeof(args)))
2928			break;
2929		mutex_lock(&kvm->slots_lock);
2930		r = kvm_s390_get_cmma_bits(kvm, &args);
2931		mutex_unlock(&kvm->slots_lock);
2932		if (!r) {
2933			r = copy_to_user(argp, &args, sizeof(args));
2934			if (r)
2935				r = -EFAULT;
2936		}
2937		break;
2938	}
2939	case KVM_S390_SET_CMMA_BITS: {
2940		struct kvm_s390_cmma_log args;
2941
2942		r = -EFAULT;
2943		if (copy_from_user(&args, argp, sizeof(args)))
2944			break;
2945		mutex_lock(&kvm->slots_lock);
2946		r = kvm_s390_set_cmma_bits(kvm, &args);
2947		mutex_unlock(&kvm->slots_lock);
2948		break;
2949	}
2950	case KVM_S390_PV_COMMAND: {
2951		struct kvm_pv_cmd args;
2952
2953		/* protvirt means user cpu state */
2954		kvm_s390_set_user_cpu_state_ctrl(kvm);
2955		r = 0;
2956		if (!is_prot_virt_host()) {
2957			r = -EINVAL;
2958			break;
2959		}
2960		if (copy_from_user(&args, argp, sizeof(args))) {
2961			r = -EFAULT;
2962			break;
2963		}
2964		if (args.flags) {
2965			r = -EINVAL;
2966			break;
2967		}
2968		/* must be called without kvm->lock */
2969		r = kvm_s390_handle_pv(kvm, &args);
2970		if (copy_to_user(argp, &args, sizeof(args))) {
2971			r = -EFAULT;
2972			break;
2973		}
2974		break;
2975	}
2976	case KVM_S390_MEM_OP: {
2977		struct kvm_s390_mem_op mem_op;
2978
2979		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2980			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2981		else
2982			r = -EFAULT;
2983		break;
2984	}
2985	case KVM_S390_ZPCI_OP: {
2986		struct kvm_s390_zpci_op args;
2987
2988		r = -EINVAL;
2989		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2990			break;
2991		if (copy_from_user(&args, argp, sizeof(args))) {
2992			r = -EFAULT;
2993			break;
2994		}
2995		r = kvm_s390_pci_zpci_op(kvm, &args);
2996		break;
2997	}
2998	default:
2999		r = -ENOTTY;
3000	}
3001
3002	return r;
3003}
3004
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3005static int kvm_s390_apxa_installed(void)
3006{
3007	struct ap_config_info info;
 
 
 
 
3008
3009	if (ap_instructions_available()) {
3010		if (ap_qci(&info) == 0)
3011			return info.apxa;
 
3012	}
3013
3014	return 0;
3015}
3016
3017/*
3018 * The format of the crypto control block (CRYCB) is specified in the 3 low
3019 * order bits of the CRYCB designation (CRYCBD) field as follows:
3020 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3021 *	     AP extended addressing (APXA) facility are installed.
3022 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3023 * Format 2: Both the APXA and MSAX3 facilities are installed
3024 */
3025static void kvm_s390_set_crycb_format(struct kvm *kvm)
3026{
3027	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
3028
3029	/* Clear the CRYCB format bits - i.e., set format 0 by default */
3030	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3031
3032	/* Check whether MSAX3 is installed */
3033	if (!test_kvm_facility(kvm, 76))
3034		return;
3035
3036	if (kvm_s390_apxa_installed())
3037		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3038	else
3039		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3040}
3041
3042/*
3043 * kvm_arch_crypto_set_masks
3044 *
3045 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3046 *	 to be set.
3047 * @apm: the mask identifying the accessible AP adapters
3048 * @aqm: the mask identifying the accessible AP domains
3049 * @adm: the mask identifying the accessible AP control domains
3050 *
3051 * Set the masks that identify the adapters, domains and control domains to
3052 * which the KVM guest is granted access.
3053 *
3054 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3055 *	 function.
3056 */
3057void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3058			       unsigned long *aqm, unsigned long *adm)
3059{
3060	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3061
3062	kvm_s390_vcpu_block_all(kvm);
3063
3064	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3065	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3066		memcpy(crycb->apcb1.apm, apm, 32);
3067		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3068			 apm[0], apm[1], apm[2], apm[3]);
3069		memcpy(crycb->apcb1.aqm, aqm, 32);
3070		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3071			 aqm[0], aqm[1], aqm[2], aqm[3]);
3072		memcpy(crycb->apcb1.adm, adm, 32);
3073		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3074			 adm[0], adm[1], adm[2], adm[3]);
3075		break;
3076	case CRYCB_FORMAT1:
3077	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3078		memcpy(crycb->apcb0.apm, apm, 8);
3079		memcpy(crycb->apcb0.aqm, aqm, 2);
3080		memcpy(crycb->apcb0.adm, adm, 2);
3081		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3082			 apm[0], *((unsigned short *)aqm),
3083			 *((unsigned short *)adm));
3084		break;
3085	default:	/* Can not happen */
3086		break;
3087	}
3088
3089	/* recreate the shadow crycb for each vcpu */
3090	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3091	kvm_s390_vcpu_unblock_all(kvm);
3092}
3093EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3094
3095/*
3096 * kvm_arch_crypto_clear_masks
3097 *
3098 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3099 *	 to be cleared.
3100 *
3101 * Clear the masks that identify the adapters, domains and control domains to
3102 * which the KVM guest is granted access.
3103 *
3104 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3105 *	 function.
3106 */
3107void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3108{
3109	kvm_s390_vcpu_block_all(kvm);
3110
3111	memset(&kvm->arch.crypto.crycb->apcb0, 0,
3112	       sizeof(kvm->arch.crypto.crycb->apcb0));
3113	memset(&kvm->arch.crypto.crycb->apcb1, 0,
3114	       sizeof(kvm->arch.crypto.crycb->apcb1));
3115
3116	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3117	/* recreate the shadow crycb for each vcpu */
3118	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3119	kvm_s390_vcpu_unblock_all(kvm);
3120}
3121EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3122
3123static u64 kvm_s390_get_initial_cpuid(void)
3124{
3125	struct cpuid cpuid;
3126
3127	get_cpu_id(&cpuid);
3128	cpuid.version = 0xff;
3129	return *((u64 *) &cpuid);
3130}
3131
3132static void kvm_s390_crypto_init(struct kvm *kvm)
3133{
3134	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3135	kvm_s390_set_crycb_format(kvm);
3136	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3137
3138	if (!test_kvm_facility(kvm, 76))
3139		return;
3140
 
 
 
3141	/* Enable AES/DEA protected key functions by default */
3142	kvm->arch.crypto.aes_kw = 1;
3143	kvm->arch.crypto.dea_kw = 1;
3144	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3145			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3146	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3147			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3148}
3149
3150static void sca_dispose(struct kvm *kvm)
3151{
3152	if (kvm->arch.use_esca)
3153		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3154	else
3155		free_page((unsigned long)(kvm->arch.sca));
3156	kvm->arch.sca = NULL;
3157}
3158
3159void kvm_arch_free_vm(struct kvm *kvm)
3160{
3161	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3162		kvm_s390_pci_clear_list(kvm);
3163
3164	__kvm_arch_free_vm(kvm);
3165}
3166
3167int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3168{
3169	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3170	int i, rc;
3171	char debug_name[16];
3172	static unsigned long sca_offset;
3173
3174	rc = -EINVAL;
3175#ifdef CONFIG_KVM_S390_UCONTROL
3176	if (type & ~KVM_VM_S390_UCONTROL)
3177		goto out_err;
3178	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3179		goto out_err;
3180#else
3181	if (type)
3182		goto out_err;
3183#endif
3184
3185	rc = s390_enable_sie();
3186	if (rc)
3187		goto out_err;
3188
3189	rc = -ENOMEM;
3190
 
3191	if (!sclp.has_64bscao)
3192		alloc_flags |= GFP_DMA;
3193	rwlock_init(&kvm->arch.sca_lock);
3194	/* start with basic SCA */
3195	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3196	if (!kvm->arch.sca)
3197		goto out_err;
3198	mutex_lock(&kvm_lock);
3199	sca_offset += 16;
3200	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3201		sca_offset = 0;
3202	kvm->arch.sca = (struct bsca_block *)
3203			((char *) kvm->arch.sca + sca_offset);
3204	mutex_unlock(&kvm_lock);
3205
3206	sprintf(debug_name, "kvm-%u", current->pid);
3207
3208	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3209	if (!kvm->arch.dbf)
3210		goto out_err;
3211
3212	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3213	kvm->arch.sie_page2 =
3214	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3215	if (!kvm->arch.sie_page2)
3216		goto out_err;
3217
3218	kvm->arch.sie_page2->kvm = kvm;
3219	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3220
3221	for (i = 0; i < kvm_s390_fac_size(); i++) {
3222		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3223					      (kvm_s390_fac_base[i] |
3224					       kvm_s390_fac_ext[i]);
3225		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3226					      kvm_s390_fac_base[i];
3227	}
3228	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3229
3230	/* we are always in czam mode - even on pre z14 machines */
3231	set_kvm_facility(kvm->arch.model.fac_mask, 138);
3232	set_kvm_facility(kvm->arch.model.fac_list, 138);
3233	/* we emulate STHYI in kvm */
3234	set_kvm_facility(kvm->arch.model.fac_mask, 74);
3235	set_kvm_facility(kvm->arch.model.fac_list, 74);
3236	if (MACHINE_HAS_TLB_GUEST) {
3237		set_kvm_facility(kvm->arch.model.fac_mask, 147);
3238		set_kvm_facility(kvm->arch.model.fac_list, 147);
3239	}
3240
3241	if (css_general_characteristics.aiv && test_facility(65))
3242		set_kvm_facility(kvm->arch.model.fac_mask, 65);
3243
3244	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3245	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3246
3247	kvm_s390_crypto_init(kvm);
3248
3249	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3250		mutex_lock(&kvm->lock);
3251		kvm_s390_pci_init_list(kvm);
3252		kvm_s390_vcpu_pci_enable_interp(kvm);
3253		mutex_unlock(&kvm->lock);
3254	}
3255
3256	mutex_init(&kvm->arch.float_int.ais_lock);
 
 
3257	spin_lock_init(&kvm->arch.float_int.lock);
3258	for (i = 0; i < FIRQ_LIST_COUNT; i++)
3259		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3260	init_waitqueue_head(&kvm->arch.ipte_wq);
3261	mutex_init(&kvm->arch.ipte_mutex);
3262
3263	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3264	VM_EVENT(kvm, 3, "vm created with type %lu", type);
3265
3266	if (type & KVM_VM_S390_UCONTROL) {
3267		kvm->arch.gmap = NULL;
3268		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3269	} else {
3270		if (sclp.hamax == U64_MAX)
3271			kvm->arch.mem_limit = TASK_SIZE_MAX;
3272		else
3273			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3274						    sclp.hamax + 1);
3275		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3276		if (!kvm->arch.gmap)
3277			goto out_err;
3278		kvm->arch.gmap->private = kvm;
3279		kvm->arch.gmap->pfault_enabled = 0;
3280	}
3281
 
 
3282	kvm->arch.use_pfmfi = sclp.has_pfmfi;
3283	kvm->arch.use_skf = sclp.has_skey;
 
3284	spin_lock_init(&kvm->arch.start_stop_lock);
3285	kvm_s390_vsie_init(kvm);
3286	if (use_gisa)
3287		kvm_s390_gisa_init(kvm);
3288	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3289	kvm->arch.pv.set_aside = NULL;
3290	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3291
3292	return 0;
3293out_err:
3294	free_page((unsigned long)kvm->arch.sie_page2);
3295	debug_unregister(kvm->arch.dbf);
3296	sca_dispose(kvm);
3297	KVM_EVENT(3, "creation of vm failed: %d", rc);
3298	return rc;
3299}
3300
3301void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 
 
 
 
3302{
3303	u16 rc, rrc;
 
3304
 
 
3305	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3306	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3307	kvm_s390_clear_local_irqs(vcpu);
3308	kvm_clear_async_pf_completion_queue(vcpu);
3309	if (!kvm_is_ucontrol(vcpu->kvm))
3310		sca_del_vcpu(vcpu);
3311	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3312
3313	if (kvm_is_ucontrol(vcpu->kvm))
3314		gmap_remove(vcpu->arch.gmap);
3315
3316	if (vcpu->kvm->arch.use_cmma)
3317		kvm_s390_vcpu_unsetup_cmma(vcpu);
3318	/* We can not hold the vcpu mutex here, we are already dying */
3319	if (kvm_s390_pv_cpu_get_handle(vcpu))
3320		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3321	free_page((unsigned long)(vcpu->arch.sie_block));
 
 
 
3322}
3323
3324void kvm_arch_destroy_vm(struct kvm *kvm)
3325{
3326	u16 rc, rrc;
 
3327
3328	kvm_destroy_vcpus(kvm);
3329	sca_dispose(kvm);
3330	kvm_s390_gisa_destroy(kvm);
3331	/*
3332	 * We are already at the end of life and kvm->lock is not taken.
3333	 * This is ok as the file descriptor is closed by now and nobody
3334	 * can mess with the pv state.
3335	 */
3336	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3337	/*
3338	 * Remove the mmu notifier only when the whole KVM VM is torn down,
3339	 * and only if one was registered to begin with. If the VM is
3340	 * currently not protected, but has been previously been protected,
3341	 * then it's possible that the notifier is still registered.
3342	 */
3343	if (kvm->arch.pv.mmu_notifier.ops)
3344		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3345
 
 
 
 
3346	debug_unregister(kvm->arch.dbf);
 
3347	free_page((unsigned long)kvm->arch.sie_page2);
3348	if (!kvm_is_ucontrol(kvm))
3349		gmap_remove(kvm->arch.gmap);
3350	kvm_s390_destroy_adapters(kvm);
3351	kvm_s390_clear_float_irqs(kvm);
3352	kvm_s390_vsie_destroy(kvm);
 
 
 
 
3353	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3354}
3355
3356/* Section: vcpu related */
3357static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3358{
3359	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3360	if (!vcpu->arch.gmap)
3361		return -ENOMEM;
3362	vcpu->arch.gmap->private = vcpu->kvm;
3363
3364	return 0;
3365}
3366
3367static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3368{
3369	if (!kvm_s390_use_sca_entries())
3370		return;
3371	read_lock(&vcpu->kvm->arch.sca_lock);
3372	if (vcpu->kvm->arch.use_esca) {
3373		struct esca_block *sca = vcpu->kvm->arch.sca;
3374
3375		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3376		sca->cpu[vcpu->vcpu_id].sda = 0;
3377	} else {
3378		struct bsca_block *sca = vcpu->kvm->arch.sca;
3379
3380		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3381		sca->cpu[vcpu->vcpu_id].sda = 0;
3382	}
3383	read_unlock(&vcpu->kvm->arch.sca_lock);
3384}
3385
3386static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3387{
3388	if (!kvm_s390_use_sca_entries()) {
3389		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3390
3391		/* we still need the basic sca for the ipte control */
3392		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3393		vcpu->arch.sie_block->scaol = sca_phys;
3394		return;
3395	}
3396	read_lock(&vcpu->kvm->arch.sca_lock);
3397	if (vcpu->kvm->arch.use_esca) {
3398		struct esca_block *sca = vcpu->kvm->arch.sca;
3399		phys_addr_t sca_phys = virt_to_phys(sca);
3400
3401		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3402		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3403		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3404		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3405		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3406	} else {
3407		struct bsca_block *sca = vcpu->kvm->arch.sca;
3408		phys_addr_t sca_phys = virt_to_phys(sca);
3409
3410		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3411		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3412		vcpu->arch.sie_block->scaol = sca_phys;
3413		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3414	}
3415	read_unlock(&vcpu->kvm->arch.sca_lock);
3416}
3417
3418/* Basic SCA to Extended SCA data copy routines */
3419static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3420{
3421	d->sda = s->sda;
3422	d->sigp_ctrl.c = s->sigp_ctrl.c;
3423	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3424}
3425
3426static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3427{
3428	int i;
3429
3430	d->ipte_control = s->ipte_control;
3431	d->mcn[0] = s->mcn;
3432	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3433		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3434}
3435
3436static int sca_switch_to_extended(struct kvm *kvm)
3437{
3438	struct bsca_block *old_sca = kvm->arch.sca;
3439	struct esca_block *new_sca;
3440	struct kvm_vcpu *vcpu;
3441	unsigned long vcpu_idx;
3442	u32 scaol, scaoh;
3443	phys_addr_t new_sca_phys;
3444
3445	if (kvm->arch.use_esca)
3446		return 0;
3447
3448	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3449	if (!new_sca)
3450		return -ENOMEM;
3451
3452	new_sca_phys = virt_to_phys(new_sca);
3453	scaoh = new_sca_phys >> 32;
3454	scaol = new_sca_phys & ESCA_SCAOL_MASK;
3455
3456	kvm_s390_vcpu_block_all(kvm);
3457	write_lock(&kvm->arch.sca_lock);
3458
3459	sca_copy_b_to_e(new_sca, old_sca);
3460
3461	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3462		vcpu->arch.sie_block->scaoh = scaoh;
3463		vcpu->arch.sie_block->scaol = scaol;
3464		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3465	}
3466	kvm->arch.sca = new_sca;
3467	kvm->arch.use_esca = 1;
3468
3469	write_unlock(&kvm->arch.sca_lock);
3470	kvm_s390_vcpu_unblock_all(kvm);
3471
3472	free_page((unsigned long)old_sca);
3473
3474	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3475		 old_sca, kvm->arch.sca);
3476	return 0;
3477}
3478
3479static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3480{
3481	int rc;
3482
3483	if (!kvm_s390_use_sca_entries()) {
3484		if (id < KVM_MAX_VCPUS)
3485			return true;
3486		return false;
3487	}
3488	if (id < KVM_S390_BSCA_CPU_SLOTS)
3489		return true;
3490	if (!sclp.has_esca || !sclp.has_64bscao)
3491		return false;
3492
 
3493	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
 
3494
3495	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3496}
3497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3498/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3499static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3500{
3501	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3502	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3503	vcpu->arch.cputm_start = get_tod_clock_fast();
3504	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3505}
3506
3507/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3508static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3509{
3510	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3511	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3512	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3513	vcpu->arch.cputm_start = 0;
3514	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3515}
3516
3517/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3518static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3519{
3520	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3521	vcpu->arch.cputm_enabled = true;
3522	__start_cpu_timer_accounting(vcpu);
3523}
3524
3525/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3526static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3527{
3528	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3529	__stop_cpu_timer_accounting(vcpu);
3530	vcpu->arch.cputm_enabled = false;
3531}
3532
3533static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3534{
3535	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3536	__enable_cpu_timer_accounting(vcpu);
3537	preempt_enable();
3538}
3539
3540static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3541{
3542	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3543	__disable_cpu_timer_accounting(vcpu);
3544	preempt_enable();
3545}
3546
3547/* set the cpu timer - may only be called from the VCPU thread itself */
3548void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3549{
3550	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3551	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3552	if (vcpu->arch.cputm_enabled)
3553		vcpu->arch.cputm_start = get_tod_clock_fast();
3554	vcpu->arch.sie_block->cputm = cputm;
3555	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3556	preempt_enable();
3557}
3558
3559/* update and get the cpu timer - can also be called from other VCPU threads */
3560__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3561{
3562	unsigned int seq;
3563	__u64 value;
3564
3565	if (unlikely(!vcpu->arch.cputm_enabled))
3566		return vcpu->arch.sie_block->cputm;
3567
3568	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3569	do {
3570		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3571		/*
3572		 * If the writer would ever execute a read in the critical
3573		 * section, e.g. in irq context, we have a deadlock.
3574		 */
3575		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3576		value = vcpu->arch.sie_block->cputm;
3577		/* if cputm_start is 0, accounting is being started/stopped */
3578		if (likely(vcpu->arch.cputm_start))
3579			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3580	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3581	preempt_enable();
3582	return value;
3583}
3584
3585void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3586{
3587
3588	gmap_enable(vcpu->arch.enabled_gmap);
3589	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3590	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3591		__start_cpu_timer_accounting(vcpu);
3592	vcpu->cpu = cpu;
3593}
3594
3595void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3596{
3597	vcpu->cpu = -1;
3598	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3599		__stop_cpu_timer_accounting(vcpu);
3600	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3601	vcpu->arch.enabled_gmap = gmap_get_enabled();
3602	gmap_disable(vcpu->arch.enabled_gmap);
3603
3604}
3605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3606void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3607{
3608	mutex_lock(&vcpu->kvm->lock);
3609	preempt_disable();
3610	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3611	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3612	preempt_enable();
3613	mutex_unlock(&vcpu->kvm->lock);
3614	if (!kvm_is_ucontrol(vcpu->kvm)) {
3615		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3616		sca_add_vcpu(vcpu);
3617	}
3618	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3619		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3620	/* make vcpu_load load the right gmap on the first trigger */
3621	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3622}
3623
3624static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3625{
3626	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3627	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3628		return true;
3629	return false;
3630}
3631
3632static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3633{
3634	/* At least one ECC subfunction must be present */
3635	return kvm_has_pckmo_subfunc(kvm, 32) ||
3636	       kvm_has_pckmo_subfunc(kvm, 33) ||
3637	       kvm_has_pckmo_subfunc(kvm, 34) ||
3638	       kvm_has_pckmo_subfunc(kvm, 40) ||
3639	       kvm_has_pckmo_subfunc(kvm, 41);
3640
3641}
3642
3643static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3644{
3645	/*
3646	 * If the AP instructions are not being interpreted and the MSAX3
3647	 * facility is not configured for the guest, there is nothing to set up.
3648	 */
3649	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3650		return;
3651
3652	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3653	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3654	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3655	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3656
3657	if (vcpu->kvm->arch.crypto.apie)
3658		vcpu->arch.sie_block->eca |= ECA_APIE;
3659
3660	/* Set up protected key support */
3661	if (vcpu->kvm->arch.crypto.aes_kw) {
3662		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3663		/* ecc is also wrapped with AES key */
3664		if (kvm_has_pckmo_ecc(vcpu->kvm))
3665			vcpu->arch.sie_block->ecd |= ECD_ECC;
3666	}
3667
3668	if (vcpu->kvm->arch.crypto.dea_kw)
3669		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
 
 
3670}
3671
3672void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3673{
3674	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3675	vcpu->arch.sie_block->cbrlo = 0;
3676}
3677
3678int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3679{
3680	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3681
3682	if (!cbrlo_page)
3683		return -ENOMEM;
3684
3685	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3686	return 0;
3687}
3688
3689static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3690{
3691	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3692
3693	vcpu->arch.sie_block->ibc = model->ibc;
3694	if (test_kvm_facility(vcpu->kvm, 7))
3695		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3696}
3697
3698static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3699{
3700	int rc = 0;
3701	u16 uvrc, uvrrc;
3702
3703	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3704						    CPUSTAT_SM |
3705						    CPUSTAT_STOPPED);
3706
3707	if (test_kvm_facility(vcpu->kvm, 78))
3708		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3709	else if (test_kvm_facility(vcpu->kvm, 8))
3710		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3711
3712	kvm_s390_vcpu_setup_model(vcpu);
3713
3714	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3715	if (MACHINE_HAS_ESOP)
3716		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3717	if (test_kvm_facility(vcpu->kvm, 9))
3718		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3719	if (test_kvm_facility(vcpu->kvm, 11))
3720		vcpu->arch.sie_block->ecb |= ECB_PTF;
3721	if (test_kvm_facility(vcpu->kvm, 73))
3722		vcpu->arch.sie_block->ecb |= ECB_TE;
3723	if (!kvm_is_ucontrol(vcpu->kvm))
3724		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3725
3726	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3727		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3728	if (test_kvm_facility(vcpu->kvm, 130))
3729		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3730	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3731	if (sclp.has_cei)
3732		vcpu->arch.sie_block->eca |= ECA_CEI;
3733	if (sclp.has_ib)
3734		vcpu->arch.sie_block->eca |= ECA_IB;
3735	if (sclp.has_siif)
3736		vcpu->arch.sie_block->eca |= ECA_SII;
3737	if (sclp.has_sigpif)
3738		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3739	if (test_kvm_facility(vcpu->kvm, 129)) {
3740		vcpu->arch.sie_block->eca |= ECA_VX;
3741		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3742	}
3743	if (test_kvm_facility(vcpu->kvm, 139))
3744		vcpu->arch.sie_block->ecd |= ECD_MEF;
3745	if (test_kvm_facility(vcpu->kvm, 156))
3746		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3747	if (vcpu->arch.sie_block->gd) {
3748		vcpu->arch.sie_block->eca |= ECA_AIV;
3749		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3750			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3751	}
3752	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3753	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
 
3754
3755	if (sclp.has_kss)
3756		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3757	else
3758		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3759
3760	if (vcpu->kvm->arch.use_cmma) {
3761		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3762		if (rc)
3763			return rc;
3764	}
3765	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3766	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3767
3768	vcpu->arch.sie_block->hpid = HPID_KVM;
3769
3770	kvm_s390_vcpu_crypto_setup(vcpu);
3771
3772	kvm_s390_vcpu_pci_setup(vcpu);
3773
3774	mutex_lock(&vcpu->kvm->lock);
3775	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3776		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3777		if (rc)
3778			kvm_s390_vcpu_unsetup_cmma(vcpu);
3779	}
3780	mutex_unlock(&vcpu->kvm->lock);
3781
3782	return rc;
3783}
3784
3785int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 
3786{
 
 
 
 
3787	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3788		return -EINVAL;
3789	return 0;
3790}
3791
3792int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3793{
3794	struct sie_page *sie_page;
3795	int rc;
 
3796
3797	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3798	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3799	if (!sie_page)
3800		return -ENOMEM;
3801
3802	vcpu->arch.sie_block = &sie_page->sie_block;
3803	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3804
3805	/* the real guest size will always be smaller than msl */
3806	vcpu->arch.sie_block->mso = 0;
3807	vcpu->arch.sie_block->msl = sclp.hamax;
3808
3809	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3810	spin_lock_init(&vcpu->arch.local_int.lock);
3811	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
 
 
3812	seqcount_init(&vcpu->arch.cputm_seqcount);
3813
3814	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3815	kvm_clear_async_pf_completion_queue(vcpu);
3816	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3817				    KVM_SYNC_GPRS |
3818				    KVM_SYNC_ACRS |
3819				    KVM_SYNC_CRS |
3820				    KVM_SYNC_ARCH0 |
3821				    KVM_SYNC_PFAULT |
3822				    KVM_SYNC_DIAG318;
3823	kvm_s390_set_prefix(vcpu, 0);
3824	if (test_kvm_facility(vcpu->kvm, 64))
3825		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3826	if (test_kvm_facility(vcpu->kvm, 82))
3827		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3828	if (test_kvm_facility(vcpu->kvm, 133))
3829		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3830	if (test_kvm_facility(vcpu->kvm, 156))
3831		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3832	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3833	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3834	 */
3835	if (MACHINE_HAS_VX)
3836		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3837	else
3838		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3839
3840	if (kvm_is_ucontrol(vcpu->kvm)) {
3841		rc = __kvm_ucontrol_vcpu_init(vcpu);
3842		if (rc)
3843			goto out_free_sie_block;
3844	}
3845
3846	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3847		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3848	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3849
3850	rc = kvm_s390_vcpu_setup(vcpu);
3851	if (rc)
3852		goto out_ucontrol_uninit;
3853
3854	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3855	return 0;
3856
3857out_ucontrol_uninit:
3858	if (kvm_is_ucontrol(vcpu->kvm))
3859		gmap_remove(vcpu->arch.gmap);
3860out_free_sie_block:
3861	free_page((unsigned long)(vcpu->arch.sie_block));
3862	return rc;
 
 
 
3863}
3864
3865int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3866{
3867	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3868	return kvm_s390_vcpu_has_irq(vcpu, 0);
3869}
3870
3871bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3872{
3873	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3874}
3875
3876void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3877{
3878	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3879	exit_sie(vcpu);
3880}
3881
3882void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3883{
3884	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3885}
3886
3887static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3888{
3889	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3890	exit_sie(vcpu);
3891}
3892
3893bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3894{
3895	return atomic_read(&vcpu->arch.sie_block->prog20) &
3896	       (PROG_BLOCK_SIE | PROG_REQUEST);
3897}
3898
3899static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3900{
3901	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3902}
3903
3904/*
3905 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3906 * If the CPU is not running (e.g. waiting as idle) the function will
3907 * return immediately. */
3908void exit_sie(struct kvm_vcpu *vcpu)
3909{
3910	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3911	kvm_s390_vsie_kick(vcpu);
3912	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3913		cpu_relax();
3914}
3915
3916/* Kick a guest cpu out of SIE to process a request synchronously */
3917void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3918{
3919	__kvm_make_request(req, vcpu);
3920	kvm_s390_vcpu_request(vcpu);
3921}
3922
3923static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3924			      unsigned long end)
3925{
3926	struct kvm *kvm = gmap->private;
3927	struct kvm_vcpu *vcpu;
3928	unsigned long prefix;
3929	unsigned long i;
3930
3931	if (gmap_is_shadow(gmap))
3932		return;
3933	if (start >= 1UL << 31)
3934		/* We are only interested in prefix pages */
3935		return;
3936	kvm_for_each_vcpu(i, vcpu, kvm) {
3937		/* match against both prefix pages */
3938		prefix = kvm_s390_get_prefix(vcpu);
3939		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3940			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3941				   start, end);
3942			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3943		}
3944	}
3945}
3946
3947bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3948{
3949	/* do not poll with more than halt_poll_max_steal percent of steal time */
3950	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3951	    READ_ONCE(halt_poll_max_steal)) {
3952		vcpu->stat.halt_no_poll_steal++;
3953		return true;
3954	}
3955	return false;
3956}
3957
3958int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3959{
3960	/* kvm common code refers to this, but never calls it */
3961	BUG();
3962	return 0;
3963}
3964
3965static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3966					   struct kvm_one_reg *reg)
3967{
3968	int r = -EINVAL;
3969
3970	switch (reg->id) {
3971	case KVM_REG_S390_TODPR:
3972		r = put_user(vcpu->arch.sie_block->todpr,
3973			     (u32 __user *)reg->addr);
3974		break;
3975	case KVM_REG_S390_EPOCHDIFF:
3976		r = put_user(vcpu->arch.sie_block->epoch,
3977			     (u64 __user *)reg->addr);
3978		break;
3979	case KVM_REG_S390_CPU_TIMER:
3980		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3981			     (u64 __user *)reg->addr);
3982		break;
3983	case KVM_REG_S390_CLOCK_COMP:
3984		r = put_user(vcpu->arch.sie_block->ckc,
3985			     (u64 __user *)reg->addr);
3986		break;
3987	case KVM_REG_S390_PFTOKEN:
3988		r = put_user(vcpu->arch.pfault_token,
3989			     (u64 __user *)reg->addr);
3990		break;
3991	case KVM_REG_S390_PFCOMPARE:
3992		r = put_user(vcpu->arch.pfault_compare,
3993			     (u64 __user *)reg->addr);
3994		break;
3995	case KVM_REG_S390_PFSELECT:
3996		r = put_user(vcpu->arch.pfault_select,
3997			     (u64 __user *)reg->addr);
3998		break;
3999	case KVM_REG_S390_PP:
4000		r = put_user(vcpu->arch.sie_block->pp,
4001			     (u64 __user *)reg->addr);
4002		break;
4003	case KVM_REG_S390_GBEA:
4004		r = put_user(vcpu->arch.sie_block->gbea,
4005			     (u64 __user *)reg->addr);
4006		break;
4007	default:
4008		break;
4009	}
4010
4011	return r;
4012}
4013
4014static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4015					   struct kvm_one_reg *reg)
4016{
4017	int r = -EINVAL;
4018	__u64 val;
4019
4020	switch (reg->id) {
4021	case KVM_REG_S390_TODPR:
4022		r = get_user(vcpu->arch.sie_block->todpr,
4023			     (u32 __user *)reg->addr);
4024		break;
4025	case KVM_REG_S390_EPOCHDIFF:
4026		r = get_user(vcpu->arch.sie_block->epoch,
4027			     (u64 __user *)reg->addr);
4028		break;
4029	case KVM_REG_S390_CPU_TIMER:
4030		r = get_user(val, (u64 __user *)reg->addr);
4031		if (!r)
4032			kvm_s390_set_cpu_timer(vcpu, val);
4033		break;
4034	case KVM_REG_S390_CLOCK_COMP:
4035		r = get_user(vcpu->arch.sie_block->ckc,
4036			     (u64 __user *)reg->addr);
4037		break;
4038	case KVM_REG_S390_PFTOKEN:
4039		r = get_user(vcpu->arch.pfault_token,
4040			     (u64 __user *)reg->addr);
4041		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4042			kvm_clear_async_pf_completion_queue(vcpu);
4043		break;
4044	case KVM_REG_S390_PFCOMPARE:
4045		r = get_user(vcpu->arch.pfault_compare,
4046			     (u64 __user *)reg->addr);
4047		break;
4048	case KVM_REG_S390_PFSELECT:
4049		r = get_user(vcpu->arch.pfault_select,
4050			     (u64 __user *)reg->addr);
4051		break;
4052	case KVM_REG_S390_PP:
4053		r = get_user(vcpu->arch.sie_block->pp,
4054			     (u64 __user *)reg->addr);
4055		break;
4056	case KVM_REG_S390_GBEA:
4057		r = get_user(vcpu->arch.sie_block->gbea,
4058			     (u64 __user *)reg->addr);
4059		break;
4060	default:
4061		break;
4062	}
4063
4064	return r;
4065}
4066
4067static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4068{
4069	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4070	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4071	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4072
4073	kvm_clear_async_pf_completion_queue(vcpu);
4074	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4075		kvm_s390_vcpu_stop(vcpu);
4076	kvm_s390_clear_local_irqs(vcpu);
4077}
4078
4079static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4080{
4081	/* Initial reset is a superset of the normal reset */
4082	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4083
4084	/*
4085	 * This equals initial cpu reset in pop, but we don't switch to ESA.
4086	 * We do not only reset the internal data, but also ...
4087	 */
4088	vcpu->arch.sie_block->gpsw.mask = 0;
4089	vcpu->arch.sie_block->gpsw.addr = 0;
4090	kvm_s390_set_prefix(vcpu, 0);
4091	kvm_s390_set_cpu_timer(vcpu, 0);
4092	vcpu->arch.sie_block->ckc = 0;
4093	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4094	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4095	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4096
4097	/* ... the data in sync regs */
4098	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4099	vcpu->run->s.regs.ckc = 0;
4100	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4101	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4102	vcpu->run->psw_addr = 0;
4103	vcpu->run->psw_mask = 0;
4104	vcpu->run->s.regs.todpr = 0;
4105	vcpu->run->s.regs.cputm = 0;
4106	vcpu->run->s.regs.ckc = 0;
4107	vcpu->run->s.regs.pp = 0;
4108	vcpu->run->s.regs.gbea = 1;
4109	vcpu->run->s.regs.fpc = 0;
4110	/*
4111	 * Do not reset these registers in the protected case, as some of
4112	 * them are overlayed and they are not accessible in this case
4113	 * anyway.
4114	 */
4115	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4116		vcpu->arch.sie_block->gbea = 1;
4117		vcpu->arch.sie_block->pp = 0;
4118		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4119		vcpu->arch.sie_block->todpr = 0;
4120	}
4121}
4122
4123static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4124{
4125	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4126
4127	/* Clear reset is a superset of the initial reset */
4128	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4129
4130	memset(&regs->gprs, 0, sizeof(regs->gprs));
4131	memset(&regs->vrs, 0, sizeof(regs->vrs));
4132	memset(&regs->acrs, 0, sizeof(regs->acrs));
4133	memset(&regs->gscb, 0, sizeof(regs->gscb));
4134
4135	regs->etoken = 0;
4136	regs->etoken_extension = 0;
4137}
4138
4139int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4140{
4141	vcpu_load(vcpu);
4142	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4143	vcpu_put(vcpu);
4144	return 0;
4145}
4146
4147int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4148{
4149	vcpu_load(vcpu);
4150	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4151	vcpu_put(vcpu);
4152	return 0;
4153}
4154
4155int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4156				  struct kvm_sregs *sregs)
4157{
4158	vcpu_load(vcpu);
4159
4160	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4161	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4162
4163	vcpu_put(vcpu);
4164	return 0;
4165}
4166
4167int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4168				  struct kvm_sregs *sregs)
4169{
4170	vcpu_load(vcpu);
4171
4172	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4173	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4174
4175	vcpu_put(vcpu);
4176	return 0;
4177}
4178
4179int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4180{
4181	int ret = 0;
4182
4183	vcpu_load(vcpu);
4184
4185	if (test_fp_ctl(fpu->fpc)) {
4186		ret = -EINVAL;
4187		goto out;
4188	}
4189	vcpu->run->s.regs.fpc = fpu->fpc;
4190	if (MACHINE_HAS_VX)
4191		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4192				 (freg_t *) fpu->fprs);
4193	else
4194		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4195
4196out:
4197	vcpu_put(vcpu);
4198	return ret;
4199}
4200
4201int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4202{
4203	vcpu_load(vcpu);
4204
4205	/* make sure we have the latest values */
4206	save_fpu_regs();
4207	if (MACHINE_HAS_VX)
4208		convert_vx_to_fp((freg_t *) fpu->fprs,
4209				 (__vector128 *) vcpu->run->s.regs.vrs);
4210	else
4211		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4212	fpu->fpc = vcpu->run->s.regs.fpc;
4213
4214	vcpu_put(vcpu);
4215	return 0;
4216}
4217
4218static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4219{
4220	int rc = 0;
4221
4222	if (!is_vcpu_stopped(vcpu))
4223		rc = -EBUSY;
4224	else {
4225		vcpu->run->psw_mask = psw.mask;
4226		vcpu->run->psw_addr = psw.addr;
4227	}
4228	return rc;
4229}
4230
4231int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4232				  struct kvm_translation *tr)
4233{
4234	return -EINVAL; /* not implemented yet */
4235}
4236
4237#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4238			      KVM_GUESTDBG_USE_HW_BP | \
4239			      KVM_GUESTDBG_ENABLE)
4240
4241int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4242					struct kvm_guest_debug *dbg)
4243{
4244	int rc = 0;
4245
4246	vcpu_load(vcpu);
4247
4248	vcpu->guest_debug = 0;
4249	kvm_s390_clear_bp_data(vcpu);
4250
4251	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4252		rc = -EINVAL;
4253		goto out;
4254	}
4255	if (!sclp.has_gpere) {
4256		rc = -EINVAL;
4257		goto out;
4258	}
4259
4260	if (dbg->control & KVM_GUESTDBG_ENABLE) {
4261		vcpu->guest_debug = dbg->control;
4262		/* enforce guest PER */
4263		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4264
4265		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4266			rc = kvm_s390_import_bp_data(vcpu, dbg);
4267	} else {
4268		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4269		vcpu->arch.guestdbg.last_bp = 0;
4270	}
4271
4272	if (rc) {
4273		vcpu->guest_debug = 0;
4274		kvm_s390_clear_bp_data(vcpu);
4275		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4276	}
4277
4278out:
4279	vcpu_put(vcpu);
4280	return rc;
4281}
4282
4283int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4284				    struct kvm_mp_state *mp_state)
4285{
4286	int ret;
4287
4288	vcpu_load(vcpu);
4289
4290	/* CHECK_STOP and LOAD are not supported yet */
4291	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4292				      KVM_MP_STATE_OPERATING;
4293
4294	vcpu_put(vcpu);
4295	return ret;
4296}
4297
4298int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4299				    struct kvm_mp_state *mp_state)
4300{
4301	int rc = 0;
4302
4303	vcpu_load(vcpu);
4304
4305	/* user space knows about this interface - let it control the state */
4306	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4307
4308	switch (mp_state->mp_state) {
4309	case KVM_MP_STATE_STOPPED:
4310		rc = kvm_s390_vcpu_stop(vcpu);
4311		break;
4312	case KVM_MP_STATE_OPERATING:
4313		rc = kvm_s390_vcpu_start(vcpu);
4314		break;
4315	case KVM_MP_STATE_LOAD:
4316		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4317			rc = -ENXIO;
4318			break;
4319		}
4320		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4321		break;
4322	case KVM_MP_STATE_CHECK_STOP:
4323		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
4324	default:
4325		rc = -ENXIO;
4326	}
4327
4328	vcpu_put(vcpu);
4329	return rc;
4330}
4331
4332static bool ibs_enabled(struct kvm_vcpu *vcpu)
4333{
4334	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4335}
4336
4337static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4338{
4339retry:
4340	kvm_s390_vcpu_request_handled(vcpu);
4341	if (!kvm_request_pending(vcpu))
4342		return 0;
4343	/*
4344	 * If the guest prefix changed, re-arm the ipte notifier for the
4345	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4346	 * This ensures that the ipte instruction for this request has
4347	 * already finished. We might race against a second unmapper that
4348	 * wants to set the blocking bit. Lets just retry the request loop.
4349	 */
4350	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4351		int rc;
4352		rc = gmap_mprotect_notify(vcpu->arch.gmap,
4353					  kvm_s390_get_prefix(vcpu),
4354					  PAGE_SIZE * 2, PROT_WRITE);
4355		if (rc) {
4356			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4357			return rc;
4358		}
4359		goto retry;
4360	}
4361
4362	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4363		vcpu->arch.sie_block->ihcpu = 0xffff;
4364		goto retry;
4365	}
4366
4367	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4368		if (!ibs_enabled(vcpu)) {
4369			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4370			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4371		}
4372		goto retry;
4373	}
4374
4375	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4376		if (ibs_enabled(vcpu)) {
4377			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4378			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4379		}
4380		goto retry;
4381	}
4382
4383	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4384		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4385		goto retry;
4386	}
4387
4388	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4389		/*
4390		 * Disable CMM virtualization; we will emulate the ESSA
4391		 * instruction manually, in order to provide additional
4392		 * functionalities needed for live migration.
4393		 */
4394		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4395		goto retry;
4396	}
4397
4398	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4399		/*
4400		 * Re-enable CMM virtualization if CMMA is available and
4401		 * CMM has been used.
4402		 */
4403		if ((vcpu->kvm->arch.use_cmma) &&
4404		    (vcpu->kvm->mm->context.uses_cmm))
4405			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4406		goto retry;
4407	}
4408
4409	/* we left the vsie handler, nothing to do, just clear the request */
4410	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4411
4412	return 0;
4413}
4414
4415static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
 
4416{
4417	struct kvm_vcpu *vcpu;
4418	union tod_clock clk;
4419	unsigned long i;
4420
 
4421	preempt_disable();
4422
4423	store_tod_clock_ext(&clk);
4424
4425	kvm->arch.epoch = gtod->tod - clk.tod;
4426	kvm->arch.epdx = 0;
4427	if (test_kvm_facility(kvm, 139)) {
4428		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4429		if (kvm->arch.epoch > gtod->tod)
4430			kvm->arch.epdx -= 1;
4431	}
4432
4433	kvm_s390_vcpu_block_all(kvm);
4434	kvm_for_each_vcpu(i, vcpu, kvm) {
4435		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4436		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4437	}
4438
4439	kvm_s390_vcpu_unblock_all(kvm);
4440	preempt_enable();
4441}
4442
4443int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4444{
4445	if (!mutex_trylock(&kvm->lock))
4446		return 0;
4447	__kvm_s390_set_tod_clock(kvm, gtod);
4448	mutex_unlock(&kvm->lock);
4449	return 1;
4450}
4451
4452/**
4453 * kvm_arch_fault_in_page - fault-in guest page if necessary
4454 * @vcpu: The corresponding virtual cpu
4455 * @gpa: Guest physical address
4456 * @writable: Whether the page should be writable or not
4457 *
4458 * Make sure that a guest page has been faulted-in on the host.
4459 *
4460 * Return: Zero on success, negative error code otherwise.
4461 */
4462long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4463{
4464	return gmap_fault(vcpu->arch.gmap, gpa,
4465			  writable ? FAULT_FLAG_WRITE : 0);
4466}
4467
4468static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4469				      unsigned long token)
4470{
4471	struct kvm_s390_interrupt inti;
4472	struct kvm_s390_irq irq;
4473
4474	if (start_token) {
4475		irq.u.ext.ext_params2 = token;
4476		irq.type = KVM_S390_INT_PFAULT_INIT;
4477		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4478	} else {
4479		inti.type = KVM_S390_INT_PFAULT_DONE;
4480		inti.parm64 = token;
4481		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4482	}
4483}
4484
4485bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4486				     struct kvm_async_pf *work)
4487{
4488	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4489	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4490
4491	return true;
4492}
4493
4494void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4495				 struct kvm_async_pf *work)
4496{
4497	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4498	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4499}
4500
4501void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4502			       struct kvm_async_pf *work)
4503{
4504	/* s390 will always inject the page directly */
4505}
4506
4507bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4508{
4509	/*
4510	 * s390 will always inject the page directly,
4511	 * but we still want check_async_completion to cleanup
4512	 */
4513	return true;
4514}
4515
4516static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4517{
4518	hva_t hva;
4519	struct kvm_arch_async_pf arch;
 
4520
4521	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4522		return false;
4523	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4524	    vcpu->arch.pfault_compare)
4525		return false;
4526	if (psw_extint_disabled(vcpu))
4527		return false;
4528	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4529		return false;
4530	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4531		return false;
4532	if (!vcpu->arch.gmap->pfault_enabled)
4533		return false;
4534
4535	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4536	hva += current->thread.gmap_addr & ~PAGE_MASK;
4537	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4538		return false;
4539
4540	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
 
4541}
4542
4543static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4544{
4545	int rc, cpuflags;
4546
4547	/*
4548	 * On s390 notifications for arriving pages will be delivered directly
4549	 * to the guest but the house keeping for completed pfaults is
4550	 * handled outside the worker.
4551	 */
4552	kvm_check_async_pf_completion(vcpu);
4553
4554	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4555	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4556
4557	if (need_resched())
4558		schedule();
4559
 
 
 
4560	if (!kvm_is_ucontrol(vcpu->kvm)) {
4561		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4562		if (rc)
4563			return rc;
4564	}
4565
4566	rc = kvm_s390_handle_requests(vcpu);
4567	if (rc)
4568		return rc;
4569
4570	if (guestdbg_enabled(vcpu)) {
4571		kvm_s390_backup_guest_per_regs(vcpu);
4572		kvm_s390_patch_guest_per_regs(vcpu);
4573	}
4574
4575	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4576
4577	vcpu->arch.sie_block->icptcode = 0;
4578	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4579	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4580	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4581
4582	return 0;
4583}
4584
4585static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4586{
4587	struct kvm_s390_pgm_info pgm_info = {
4588		.code = PGM_ADDRESSING,
4589	};
4590	u8 opcode, ilen;
4591	int rc;
4592
4593	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4594	trace_kvm_s390_sie_fault(vcpu);
4595
4596	/*
4597	 * We want to inject an addressing exception, which is defined as a
4598	 * suppressing or terminating exception. However, since we came here
4599	 * by a DAT access exception, the PSW still points to the faulting
4600	 * instruction since DAT exceptions are nullifying. So we've got
4601	 * to look up the current opcode to get the length of the instruction
4602	 * to be able to forward the PSW.
4603	 */
4604	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4605	ilen = insn_length(opcode);
4606	if (rc < 0) {
4607		return rc;
4608	} else if (rc) {
4609		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4610		 * Forward by arbitrary ilc, injection will take care of
4611		 * nullification if necessary.
4612		 */
4613		pgm_info = vcpu->arch.pgm;
4614		ilen = 4;
4615	}
4616	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4617	kvm_s390_forward_psw(vcpu, ilen);
4618	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4619}
4620
4621static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4622{
4623	struct mcck_volatile_info *mcck_info;
4624	struct sie_page *sie_page;
4625
4626	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4627		   vcpu->arch.sie_block->icptcode);
4628	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4629
4630	if (guestdbg_enabled(vcpu))
4631		kvm_s390_restore_guest_per_regs(vcpu);
4632
4633	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4634	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4635
4636	if (exit_reason == -EINTR) {
4637		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4638		sie_page = container_of(vcpu->arch.sie_block,
4639					struct sie_page, sie_block);
4640		mcck_info = &sie_page->mcck_info;
4641		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4642		return 0;
4643	}
4644
4645	if (vcpu->arch.sie_block->icptcode > 0) {
4646		int rc = kvm_handle_sie_intercept(vcpu);
4647
4648		if (rc != -EOPNOTSUPP)
4649			return rc;
4650		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4651		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4652		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4653		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4654		return -EREMOTE;
4655	} else if (exit_reason != -EFAULT) {
4656		vcpu->stat.exit_null++;
4657		return 0;
4658	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4659		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4660		vcpu->run->s390_ucontrol.trans_exc_code =
4661						current->thread.gmap_addr;
4662		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4663		return -EREMOTE;
4664	} else if (current->thread.gmap_pfault) {
4665		trace_kvm_s390_major_guest_pfault(vcpu);
4666		current->thread.gmap_pfault = 0;
4667		if (kvm_arch_setup_async_pf(vcpu))
4668			return 0;
4669		vcpu->stat.pfault_sync++;
4670		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4671	}
4672	return vcpu_post_run_fault_in_sie(vcpu);
4673}
4674
4675#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4676static int __vcpu_run(struct kvm_vcpu *vcpu)
4677{
4678	int rc, exit_reason;
4679	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4680
4681	/*
4682	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4683	 * ning the guest), so that memslots (and other stuff) are protected
4684	 */
4685	kvm_vcpu_srcu_read_lock(vcpu);
4686
4687	do {
4688		rc = vcpu_pre_run(vcpu);
4689		if (rc)
4690			break;
4691
4692		kvm_vcpu_srcu_read_unlock(vcpu);
4693		/*
4694		 * As PF_VCPU will be used in fault handler, between
4695		 * guest_enter and guest_exit should be no uaccess.
4696		 */
4697		local_irq_disable();
4698		guest_enter_irqoff();
4699		__disable_cpu_timer_accounting(vcpu);
4700		local_irq_enable();
4701		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4702			memcpy(sie_page->pv_grregs,
4703			       vcpu->run->s.regs.gprs,
4704			       sizeof(sie_page->pv_grregs));
4705		}
4706		if (test_cpu_flag(CIF_FPU))
4707			load_fpu_regs();
4708		exit_reason = sie64a(vcpu->arch.sie_block,
4709				     vcpu->run->s.regs.gprs);
4710		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4711			memcpy(vcpu->run->s.regs.gprs,
4712			       sie_page->pv_grregs,
4713			       sizeof(sie_page->pv_grregs));
4714			/*
4715			 * We're not allowed to inject interrupts on intercepts
4716			 * that leave the guest state in an "in-between" state
4717			 * where the next SIE entry will do a continuation.
4718			 * Fence interrupts in our "internal" PSW.
4719			 */
4720			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4721			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4722				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4723			}
4724		}
4725		local_irq_disable();
4726		__enable_cpu_timer_accounting(vcpu);
4727		guest_exit_irqoff();
4728		local_irq_enable();
4729		kvm_vcpu_srcu_read_lock(vcpu);
4730
4731		rc = vcpu_post_run(vcpu, exit_reason);
4732	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4733
4734	kvm_vcpu_srcu_read_unlock(vcpu);
4735	return rc;
4736}
4737
4738static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4739{
4740	struct kvm_run *kvm_run = vcpu->run;
4741	struct runtime_instr_cb *riccb;
4742	struct gs_cb *gscb;
4743
4744	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4745	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4746	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4747	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
 
 
 
 
 
 
 
4748	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
 
 
4749		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4750		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4751		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4752	}
4753	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4754		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4755		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4756		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4757		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4758			kvm_clear_async_pf_completion_queue(vcpu);
4759	}
4760	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4761		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4762		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4763		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4764	}
4765	/*
4766	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4767	 * we should enable RI here instead of doing the lazy enablement.
4768	 */
4769	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4770	    test_kvm_facility(vcpu->kvm, 64) &&
4771	    riccb->v &&
4772	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4773		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4774		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4775	}
4776	/*
4777	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4778	 * we should enable GS here instead of doing the lazy enablement.
4779	 */
4780	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4781	    test_kvm_facility(vcpu->kvm, 133) &&
4782	    gscb->gssm &&
4783	    !vcpu->arch.gs_enabled) {
4784		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4785		vcpu->arch.sie_block->ecb |= ECB_GS;
4786		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4787		vcpu->arch.gs_enabled = 1;
4788	}
4789	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4790	    test_kvm_facility(vcpu->kvm, 82)) {
4791		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4792		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4793	}
4794	if (MACHINE_HAS_GS) {
4795		preempt_disable();
4796		__ctl_set_bit(2, 4);
4797		if (current->thread.gs_cb) {
4798			vcpu->arch.host_gscb = current->thread.gs_cb;
4799			save_gs_cb(vcpu->arch.host_gscb);
4800		}
4801		if (vcpu->arch.gs_enabled) {
4802			current->thread.gs_cb = (struct gs_cb *)
4803						&vcpu->run->s.regs.gscb;
4804			restore_gs_cb(current->thread.gs_cb);
4805		}
4806		preempt_enable();
4807	}
4808	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4809}
4810
4811static void sync_regs(struct kvm_vcpu *vcpu)
4812{
4813	struct kvm_run *kvm_run = vcpu->run;
4814
4815	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4816		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4817	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4818		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4819		/* some control register changes require a tlb flush */
4820		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4821	}
4822	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4823		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4824		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4825	}
4826	save_access_regs(vcpu->arch.host_acrs);
4827	restore_access_regs(vcpu->run->s.regs.acrs);
4828	/* save host (userspace) fprs/vrs */
4829	save_fpu_regs();
4830	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4831	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4832	if (MACHINE_HAS_VX)
4833		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4834	else
4835		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4836	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4837	if (test_fp_ctl(current->thread.fpu.fpc))
4838		/* User space provided an invalid FPC, let's clear it */
4839		current->thread.fpu.fpc = 0;
4840
4841	/* Sync fmt2 only data */
4842	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4843		sync_regs_fmt2(vcpu);
4844	} else {
4845		/*
4846		 * In several places we have to modify our internal view to
4847		 * not do things that are disallowed by the ultravisor. For
4848		 * example we must not inject interrupts after specific exits
4849		 * (e.g. 112 prefix page not secure). We do this by turning
4850		 * off the machine check, external and I/O interrupt bits
4851		 * of our PSW copy. To avoid getting validity intercepts, we
4852		 * do only accept the condition code from userspace.
4853		 */
4854		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4855		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4856						   PSW_MASK_CC;
4857	}
4858
4859	kvm_run->kvm_dirty_regs = 0;
4860}
4861
4862static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4863{
4864	struct kvm_run *kvm_run = vcpu->run;
4865
4866	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4867	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4868	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4869	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4870	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4871	if (MACHINE_HAS_GS) {
4872		preempt_disable();
4873		__ctl_set_bit(2, 4);
4874		if (vcpu->arch.gs_enabled)
4875			save_gs_cb(current->thread.gs_cb);
4876		current->thread.gs_cb = vcpu->arch.host_gscb;
4877		restore_gs_cb(vcpu->arch.host_gscb);
4878		if (!vcpu->arch.host_gscb)
4879			__ctl_clear_bit(2, 4);
4880		vcpu->arch.host_gscb = NULL;
 
 
4881		preempt_enable();
4882	}
4883	/* SIE will save etoken directly into SDNX and therefore kvm_run */
 
4884}
4885
4886static void store_regs(struct kvm_vcpu *vcpu)
4887{
4888	struct kvm_run *kvm_run = vcpu->run;
4889
4890	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4891	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4892	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4893	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4894	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4895	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
 
 
 
4896	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4897	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4898	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
 
4899	save_access_regs(vcpu->run->s.regs.acrs);
4900	restore_access_regs(vcpu->arch.host_acrs);
4901	/* Save guest register state */
4902	save_fpu_regs();
4903	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4904	/* Restore will be done lazily at return */
4905	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4906	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4907	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4908		store_regs_fmt2(vcpu);
 
 
 
 
 
 
 
 
 
 
 
4909}
4910
4911int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4912{
4913	struct kvm_run *kvm_run = vcpu->run;
4914	int rc;
4915
4916	/*
4917	 * Running a VM while dumping always has the potential to
4918	 * produce inconsistent dump data. But for PV vcpus a SIE
4919	 * entry while dumping could also lead to a fatal validity
4920	 * intercept which we absolutely want to avoid.
4921	 */
4922	if (vcpu->kvm->arch.pv.dumping)
4923		return -EINVAL;
4924
4925	if (kvm_run->immediate_exit)
4926		return -EINTR;
4927
4928	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4929	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4930		return -EINVAL;
4931
4932	vcpu_load(vcpu);
4933
4934	if (guestdbg_exit_pending(vcpu)) {
4935		kvm_s390_prepare_debug_exit(vcpu);
4936		rc = 0;
4937		goto out;
4938	}
4939
4940	kvm_sigset_activate(vcpu);
4941
4942	/*
4943	 * no need to check the return value of vcpu_start as it can only have
4944	 * an error for protvirt, but protvirt means user cpu state
4945	 */
4946	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4947		kvm_s390_vcpu_start(vcpu);
4948	} else if (is_vcpu_stopped(vcpu)) {
4949		pr_err_ratelimited("can't run stopped vcpu %d\n",
4950				   vcpu->vcpu_id);
4951		rc = -EINVAL;
4952		goto out;
4953	}
4954
4955	sync_regs(vcpu);
4956	enable_cpu_timer_accounting(vcpu);
4957
4958	might_fault();
4959	rc = __vcpu_run(vcpu);
4960
4961	if (signal_pending(current) && !rc) {
4962		kvm_run->exit_reason = KVM_EXIT_INTR;
4963		rc = -EINTR;
4964	}
4965
4966	if (guestdbg_exit_pending(vcpu) && !rc)  {
4967		kvm_s390_prepare_debug_exit(vcpu);
4968		rc = 0;
4969	}
4970
4971	if (rc == -EREMOTE) {
4972		/* userspace support is needed, kvm_run has been prepared */
4973		rc = 0;
4974	}
4975
4976	disable_cpu_timer_accounting(vcpu);
4977	store_regs(vcpu);
4978
4979	kvm_sigset_deactivate(vcpu);
4980
4981	vcpu->stat.exit_userspace++;
4982out:
4983	vcpu_put(vcpu);
4984	return rc;
4985}
4986
4987/*
4988 * store status at address
4989 * we use have two special cases:
4990 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4991 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4992 */
4993int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4994{
4995	unsigned char archmode = 1;
4996	freg_t fprs[NUM_FPRS];
4997	unsigned int px;
4998	u64 clkcomp, cputm;
4999	int rc;
5000
5001	px = kvm_s390_get_prefix(vcpu);
5002	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5003		if (write_guest_abs(vcpu, 163, &archmode, 1))
5004			return -EFAULT;
5005		gpa = 0;
5006	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5007		if (write_guest_real(vcpu, 163, &archmode, 1))
5008			return -EFAULT;
5009		gpa = px;
5010	} else
5011		gpa -= __LC_FPREGS_SAVE_AREA;
5012
5013	/* manually convert vector registers if necessary */
5014	if (MACHINE_HAS_VX) {
5015		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5016		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5017				     fprs, 128);
5018	} else {
5019		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5020				     vcpu->run->s.regs.fprs, 128);
5021	}
5022	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5023			      vcpu->run->s.regs.gprs, 128);
5024	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5025			      &vcpu->arch.sie_block->gpsw, 16);
5026	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5027			      &px, 4);
5028	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5029			      &vcpu->run->s.regs.fpc, 4);
5030	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5031			      &vcpu->arch.sie_block->todpr, 4);
5032	cputm = kvm_s390_get_cpu_timer(vcpu);
5033	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5034			      &cputm, 8);
5035	clkcomp = vcpu->arch.sie_block->ckc >> 8;
5036	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5037			      &clkcomp, 8);
5038	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5039			      &vcpu->run->s.regs.acrs, 64);
5040	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5041			      &vcpu->arch.sie_block->gcr, 128);
5042	return rc ? -EFAULT : 0;
5043}
5044
5045int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5046{
5047	/*
5048	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5049	 * switch in the run ioctl. Let's update our copies before we save
5050	 * it into the save area
5051	 */
5052	save_fpu_regs();
5053	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5054	save_access_regs(vcpu->run->s.regs.acrs);
5055
5056	return kvm_s390_store_status_unloaded(vcpu, addr);
5057}
5058
5059static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5060{
5061	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5062	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5063}
5064
5065static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5066{
5067	unsigned long i;
5068	struct kvm_vcpu *vcpu;
5069
5070	kvm_for_each_vcpu(i, vcpu, kvm) {
5071		__disable_ibs_on_vcpu(vcpu);
5072	}
5073}
5074
5075static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5076{
5077	if (!sclp.has_ibs)
5078		return;
5079	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5080	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5081}
5082
5083int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5084{
5085	int i, online_vcpus, r = 0, started_vcpus = 0;
5086
5087	if (!is_vcpu_stopped(vcpu))
5088		return 0;
5089
5090	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5091	/* Only one cpu at a time may enter/leave the STOPPED state. */
5092	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5093	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5094
5095	/* Let's tell the UV that we want to change into the operating state */
5096	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5097		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5098		if (r) {
5099			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5100			return r;
5101		}
5102	}
5103
5104	for (i = 0; i < online_vcpus; i++) {
5105		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5106			started_vcpus++;
5107	}
5108
5109	if (started_vcpus == 0) {
5110		/* we're the only active VCPU -> speed it up */
5111		__enable_ibs_on_vcpu(vcpu);
5112	} else if (started_vcpus == 1) {
5113		/*
5114		 * As we are starting a second VCPU, we have to disable
5115		 * the IBS facility on all VCPUs to remove potentially
5116		 * outstanding ENABLE requests.
5117		 */
5118		__disable_ibs_on_all_vcpus(vcpu->kvm);
5119	}
5120
5121	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5122	/*
5123	 * The real PSW might have changed due to a RESTART interpreted by the
5124	 * ultravisor. We block all interrupts and let the next sie exit
5125	 * refresh our view.
5126	 */
5127	if (kvm_s390_pv_cpu_is_protected(vcpu))
5128		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5129	/*
5130	 * Another VCPU might have used IBS while we were offline.
5131	 * Let's play safe and flush the VCPU at startup.
5132	 */
5133	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5134	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5135	return 0;
5136}
5137
5138int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5139{
5140	int i, online_vcpus, r = 0, started_vcpus = 0;
5141	struct kvm_vcpu *started_vcpu = NULL;
5142
5143	if (is_vcpu_stopped(vcpu))
5144		return 0;
5145
5146	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5147	/* Only one cpu at a time may enter/leave the STOPPED state. */
5148	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5149	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5150
5151	/* Let's tell the UV that we want to change into the stopped state */
5152	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5153		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5154		if (r) {
5155			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5156			return r;
5157		}
5158	}
5159
5160	/*
5161	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5162	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5163	 * have been fully processed. This will ensure that the VCPU
5164	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5165	 */
5166	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5167	kvm_s390_clear_stop_irq(vcpu);
5168
 
5169	__disable_ibs_on_vcpu(vcpu);
5170
5171	for (i = 0; i < online_vcpus; i++) {
5172		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5173
5174		if (!is_vcpu_stopped(tmp)) {
5175			started_vcpus++;
5176			started_vcpu = tmp;
5177		}
5178	}
5179
5180	if (started_vcpus == 1) {
5181		/*
5182		 * As we only have one VCPU left, we want to enable the
5183		 * IBS facility for that VCPU to speed it up.
5184		 */
5185		__enable_ibs_on_vcpu(started_vcpu);
5186	}
5187
5188	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5189	return 0;
5190}
5191
5192static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5193				     struct kvm_enable_cap *cap)
5194{
5195	int r;
5196
5197	if (cap->flags)
5198		return -EINVAL;
5199
5200	switch (cap->cap) {
5201	case KVM_CAP_S390_CSS_SUPPORT:
5202		if (!vcpu->kvm->arch.css_support) {
5203			vcpu->kvm->arch.css_support = 1;
5204			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5205			trace_kvm_s390_enable_css(vcpu->kvm);
5206		}
5207		r = 0;
5208		break;
5209	default:
5210		r = -EINVAL;
5211		break;
5212	}
5213	return r;
5214}
5215
5216static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5217				  struct kvm_s390_mem_op *mop)
5218{
5219	void __user *uaddr = (void __user *)mop->buf;
5220	void *sida_addr;
5221	int r = 0;
5222
5223	if (mop->flags || !mop->size)
5224		return -EINVAL;
5225	if (mop->size + mop->sida_offset < mop->size)
5226		return -EINVAL;
5227	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5228		return -E2BIG;
5229	if (!kvm_s390_pv_cpu_is_protected(vcpu))
5230		return -EINVAL;
5231
5232	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5233
5234	switch (mop->op) {
5235	case KVM_S390_MEMOP_SIDA_READ:
5236		if (copy_to_user(uaddr, sida_addr, mop->size))
5237			r = -EFAULT;
5238
5239		break;
5240	case KVM_S390_MEMOP_SIDA_WRITE:
5241		if (copy_from_user(sida_addr, uaddr, mop->size))
5242			r = -EFAULT;
5243		break;
5244	}
5245	return r;
5246}
5247
5248static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5249				 struct kvm_s390_mem_op *mop)
5250{
5251	void __user *uaddr = (void __user *)mop->buf;
5252	void *tmpbuf = NULL;
5253	int r = 0;
5254	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5255				    | KVM_S390_MEMOP_F_CHECK_ONLY
5256				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5257
5258	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5259		return -EINVAL;
 
5260	if (mop->size > MEM_OP_MAX_SIZE)
5261		return -E2BIG;
5262	if (kvm_s390_pv_cpu_is_protected(vcpu))
5263		return -EINVAL;
5264	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5265		if (access_key_invalid(mop->key))
5266			return -EINVAL;
5267	} else {
5268		mop->key = 0;
5269	}
5270	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5271		tmpbuf = vmalloc(mop->size);
5272		if (!tmpbuf)
5273			return -ENOMEM;
5274	}
5275
 
 
5276	switch (mop->op) {
5277	case KVM_S390_MEMOP_LOGICAL_READ:
5278		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5279			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5280					    GACC_FETCH, mop->key);
5281			break;
5282		}
5283		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5284					mop->size, mop->key);
5285		if (r == 0) {
5286			if (copy_to_user(uaddr, tmpbuf, mop->size))
5287				r = -EFAULT;
5288		}
5289		break;
5290	case KVM_S390_MEMOP_LOGICAL_WRITE:
5291		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5292			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5293					    GACC_STORE, mop->key);
5294			break;
5295		}
5296		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5297			r = -EFAULT;
5298			break;
5299		}
5300		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5301					 mop->size, mop->key);
5302		break;
 
 
5303	}
5304
 
 
5305	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5306		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5307
5308	vfree(tmpbuf);
5309	return r;
5310}
5311
5312static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5313				     struct kvm_s390_mem_op *mop)
5314{
5315	int r, srcu_idx;
5316
5317	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5318
5319	switch (mop->op) {
5320	case KVM_S390_MEMOP_LOGICAL_READ:
5321	case KVM_S390_MEMOP_LOGICAL_WRITE:
5322		r = kvm_s390_vcpu_mem_op(vcpu, mop);
5323		break;
5324	case KVM_S390_MEMOP_SIDA_READ:
5325	case KVM_S390_MEMOP_SIDA_WRITE:
5326		/* we are locked against sida going away by the vcpu->mutex */
5327		r = kvm_s390_vcpu_sida_op(vcpu, mop);
5328		break;
5329	default:
5330		r = -EINVAL;
5331	}
5332
5333	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5334	return r;
5335}
5336
5337long kvm_arch_vcpu_async_ioctl(struct file *filp,
5338			       unsigned int ioctl, unsigned long arg)
5339{
5340	struct kvm_vcpu *vcpu = filp->private_data;
5341	void __user *argp = (void __user *)arg;
5342
5343	switch (ioctl) {
5344	case KVM_S390_IRQ: {
5345		struct kvm_s390_irq s390irq;
5346
5347		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5348			return -EFAULT;
5349		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5350	}
5351	case KVM_S390_INTERRUPT: {
5352		struct kvm_s390_interrupt s390int;
5353		struct kvm_s390_irq s390irq = {};
5354
5355		if (copy_from_user(&s390int, argp, sizeof(s390int)))
5356			return -EFAULT;
5357		if (s390int_to_s390irq(&s390int, &s390irq))
5358			return -EINVAL;
5359		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5360	}
5361	}
5362	return -ENOIOCTLCMD;
5363}
5364
5365static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5366					struct kvm_pv_cmd *cmd)
5367{
5368	struct kvm_s390_pv_dmp dmp;
5369	void *data;
5370	int ret;
5371
5372	/* Dump initialization is a prerequisite */
5373	if (!vcpu->kvm->arch.pv.dumping)
5374		return -EINVAL;
5375
5376	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5377		return -EFAULT;
5378
5379	/* We only handle this subcmd right now */
5380	if (dmp.subcmd != KVM_PV_DUMP_CPU)
5381		return -EINVAL;
5382
5383	/* CPU dump length is the same as create cpu storage donation. */
5384	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5385		return -EINVAL;
5386
5387	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5388	if (!data)
5389		return -ENOMEM;
5390
5391	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5392
5393	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5394		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
5395
5396	if (ret)
5397		ret = -EINVAL;
5398
5399	/* On success copy over the dump data */
5400	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5401		ret = -EFAULT;
5402
5403	kvfree(data);
5404	return ret;
5405}
5406
5407long kvm_arch_vcpu_ioctl(struct file *filp,
5408			 unsigned int ioctl, unsigned long arg)
5409{
5410	struct kvm_vcpu *vcpu = filp->private_data;
5411	void __user *argp = (void __user *)arg;
5412	int idx;
5413	long r;
5414	u16 rc, rrc;
5415
5416	vcpu_load(vcpu);
5417
5418	switch (ioctl) {
5419	case KVM_S390_STORE_STATUS:
5420		idx = srcu_read_lock(&vcpu->kvm->srcu);
5421		r = kvm_s390_store_status_unloaded(vcpu, arg);
5422		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5423		break;
5424	case KVM_S390_SET_INITIAL_PSW: {
5425		psw_t psw;
5426
5427		r = -EFAULT;
5428		if (copy_from_user(&psw, argp, sizeof(psw)))
5429			break;
5430		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5431		break;
5432	}
5433	case KVM_S390_CLEAR_RESET:
5434		r = 0;
5435		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5436		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5437			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5438					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5439			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5440				   rc, rrc);
5441		}
5442		break;
5443	case KVM_S390_INITIAL_RESET:
5444		r = 0;
5445		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5446		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5447			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5448					  UVC_CMD_CPU_RESET_INITIAL,
5449					  &rc, &rrc);
5450			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5451				   rc, rrc);
5452		}
5453		break;
5454	case KVM_S390_NORMAL_RESET:
5455		r = 0;
5456		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5457		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5458			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5459					  UVC_CMD_CPU_RESET, &rc, &rrc);
5460			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5461				   rc, rrc);
5462		}
5463		break;
5464	case KVM_SET_ONE_REG:
5465	case KVM_GET_ONE_REG: {
5466		struct kvm_one_reg reg;
5467		r = -EINVAL;
5468		if (kvm_s390_pv_cpu_is_protected(vcpu))
5469			break;
5470		r = -EFAULT;
5471		if (copy_from_user(&reg, argp, sizeof(reg)))
5472			break;
5473		if (ioctl == KVM_SET_ONE_REG)
5474			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5475		else
5476			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5477		break;
5478	}
5479#ifdef CONFIG_KVM_S390_UCONTROL
5480	case KVM_S390_UCAS_MAP: {
5481		struct kvm_s390_ucas_mapping ucasmap;
5482
5483		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5484			r = -EFAULT;
5485			break;
5486		}
5487
5488		if (!kvm_is_ucontrol(vcpu->kvm)) {
5489			r = -EINVAL;
5490			break;
5491		}
5492
5493		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5494				     ucasmap.vcpu_addr, ucasmap.length);
5495		break;
5496	}
5497	case KVM_S390_UCAS_UNMAP: {
5498		struct kvm_s390_ucas_mapping ucasmap;
5499
5500		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5501			r = -EFAULT;
5502			break;
5503		}
5504
5505		if (!kvm_is_ucontrol(vcpu->kvm)) {
5506			r = -EINVAL;
5507			break;
5508		}
5509
5510		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5511			ucasmap.length);
5512		break;
5513	}
5514#endif
5515	case KVM_S390_VCPU_FAULT: {
5516		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5517		break;
5518	}
5519	case KVM_ENABLE_CAP:
5520	{
5521		struct kvm_enable_cap cap;
5522		r = -EFAULT;
5523		if (copy_from_user(&cap, argp, sizeof(cap)))
5524			break;
5525		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5526		break;
5527	}
5528	case KVM_S390_MEM_OP: {
5529		struct kvm_s390_mem_op mem_op;
5530
5531		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5532			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5533		else
5534			r = -EFAULT;
5535		break;
5536	}
5537	case KVM_S390_SET_IRQ_STATE: {
5538		struct kvm_s390_irq_state irq_state;
5539
5540		r = -EFAULT;
5541		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5542			break;
5543		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5544		    irq_state.len == 0 ||
5545		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5546			r = -EINVAL;
5547			break;
5548		}
5549		/* do not use irq_state.flags, it will break old QEMUs */
5550		r = kvm_s390_set_irq_state(vcpu,
5551					   (void __user *) irq_state.buf,
5552					   irq_state.len);
5553		break;
5554	}
5555	case KVM_S390_GET_IRQ_STATE: {
5556		struct kvm_s390_irq_state irq_state;
5557
5558		r = -EFAULT;
5559		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5560			break;
5561		if (irq_state.len == 0) {
5562			r = -EINVAL;
5563			break;
5564		}
5565		/* do not use irq_state.flags, it will break old QEMUs */
5566		r = kvm_s390_get_irq_state(vcpu,
5567					   (__u8 __user *)  irq_state.buf,
5568					   irq_state.len);
5569		break;
5570	}
5571	case KVM_S390_PV_CPU_COMMAND: {
5572		struct kvm_pv_cmd cmd;
5573
5574		r = -EINVAL;
5575		if (!is_prot_virt_host())
5576			break;
5577
5578		r = -EFAULT;
5579		if (copy_from_user(&cmd, argp, sizeof(cmd)))
5580			break;
5581
5582		r = -EINVAL;
5583		if (cmd.flags)
5584			break;
5585
5586		/* We only handle this cmd right now */
5587		if (cmd.cmd != KVM_PV_DUMP)
5588			break;
5589
5590		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5591
5592		/* Always copy over UV rc / rrc data */
5593		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5594				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5595			r = -EFAULT;
5596		break;
5597	}
5598	default:
5599		r = -ENOTTY;
5600	}
5601
5602	vcpu_put(vcpu);
5603	return r;
5604}
5605
5606vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5607{
5608#ifdef CONFIG_KVM_S390_UCONTROL
5609	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5610		 && (kvm_is_ucontrol(vcpu->kvm))) {
5611		vmf->page = virt_to_page(vcpu->arch.sie_block);
5612		get_page(vmf->page);
5613		return 0;
5614	}
5615#endif
5616	return VM_FAULT_SIGBUS;
5617}
5618
5619bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
 
5620{
5621	return true;
5622}
5623
5624/* Section: memory related */
5625int kvm_arch_prepare_memory_region(struct kvm *kvm,
5626				   const struct kvm_memory_slot *old,
5627				   struct kvm_memory_slot *new,
5628				   enum kvm_mr_change change)
5629{
5630	gpa_t size;
5631
5632	/* When we are protected, we should not change the memory slots */
5633	if (kvm_s390_pv_get_handle(kvm))
5634		return -EINVAL;
5635
5636	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5637		return 0;
5638
5639	/* A few sanity checks. We can have memory slots which have to be
5640	   located/ended at a segment boundary (1MB). The memory in userland is
5641	   ok to be fragmented into various different vmas. It is okay to mmap()
5642	   and munmap() stuff in this slot after doing this call at any time */
5643
5644	if (new->userspace_addr & 0xffffful)
5645		return -EINVAL;
5646
5647	size = new->npages * PAGE_SIZE;
5648	if (size & 0xffffful)
5649		return -EINVAL;
5650
5651	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5652		return -EINVAL;
5653
5654	return 0;
5655}
5656
5657void kvm_arch_commit_memory_region(struct kvm *kvm,
5658				struct kvm_memory_slot *old,
 
5659				const struct kvm_memory_slot *new,
5660				enum kvm_mr_change change)
5661{
5662	int rc = 0;
5663
5664	switch (change) {
5665	case KVM_MR_DELETE:
5666		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5667					old->npages * PAGE_SIZE);
5668		break;
5669	case KVM_MR_MOVE:
5670		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5671					old->npages * PAGE_SIZE);
5672		if (rc)
5673			break;
5674		fallthrough;
5675	case KVM_MR_CREATE:
5676		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5677				      new->base_gfn * PAGE_SIZE,
5678				      new->npages * PAGE_SIZE);
5679		break;
5680	case KVM_MR_FLAGS_ONLY:
5681		break;
5682	default:
5683		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5684	}
5685	if (rc)
5686		pr_warn("failed to commit memory region\n");
5687	return;
5688}
5689
5690static inline unsigned long nonhyp_mask(int i)
5691{
5692	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5693
5694	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5695}
5696
 
 
 
 
 
5697static int __init kvm_s390_init(void)
5698{
5699	int i;
5700
5701	if (!sclp.has_sief2) {
5702		pr_info("SIE is not available\n");
5703		return -ENODEV;
5704	}
5705
5706	if (nested && hpage) {
5707		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5708		return -EINVAL;
5709	}
5710
5711	for (i = 0; i < 16; i++)
5712		kvm_s390_fac_base[i] |=
5713			stfle_fac_list[i] & nonhyp_mask(i);
5714
5715	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5716}
5717
5718static void __exit kvm_s390_exit(void)
5719{
5720	kvm_exit();
5721}
5722
5723module_init(kvm_s390_init);
5724module_exit(kvm_s390_exit);
5725
5726/*
5727 * Enable autoloading of the kvm module.
5728 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5729 * since x86 takes a different approach.
5730 */
5731#include <linux/miscdevice.h>
5732MODULE_ALIAS_MISCDEV(KVM_MINOR);
5733MODULE_ALIAS("devname:kvm");

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2018
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
 
 
 
  14#include <linux/compiler.h>
  15#include <linux/err.h>
  16#include <linux/fs.h>
  17#include <linux/hrtimer.h>
  18#include <linux/init.h>
  19#include <linux/kvm.h>
  20#include <linux/kvm_host.h>
  21#include <linux/mman.h>
  22#include <linux/module.h>
  23#include <linux/moduleparam.h>
  24#include <linux/random.h>
  25#include <linux/slab.h>
  26#include <linux/timer.h>
  27#include <linux/vmalloc.h>
  28#include <linux/bitmap.h>
  29#include <linux/sched/signal.h>
  30#include <linux/string.h>
 
 
  31
  32#include <asm/asm-offsets.h>
  33#include <asm/lowcore.h>
  34#include <asm/stp.h>
  35#include <asm/pgtable.h>
  36#include <asm/gmap.h>
  37#include <asm/nmi.h>
  38#include <asm/switch_to.h>
  39#include <asm/isc.h>
  40#include <asm/sclp.h>
  41#include <asm/cpacf.h>
  42#include <asm/timex.h>
 
 
 
  43#include "kvm-s390.h"
  44#include "gaccess.h"
  45
  46#define KMSG_COMPONENT "kvm-s390"
  47#undef pr_fmt
  48#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50#define CREATE_TRACE_POINTS
  51#include "trace.h"
  52#include "trace-s390.h"
  53
  54#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
  55#define LOCAL_IRQS 32
  56#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57			   (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  61
  62struct kvm_stats_debugfs_item debugfs_entries[] = {
  63	{ "userspace_handled", VCPU_STAT(exit_userspace) },
  64	{ "exit_null", VCPU_STAT(exit_null) },
  65	{ "exit_validity", VCPU_STAT(exit_validity) },
  66	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67	{ "exit_external_request", VCPU_STAT(exit_external_request) },
  68	{ "exit_io_request", VCPU_STAT(exit_io_request) },
  69	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70	{ "exit_instruction", VCPU_STAT(exit_instruction) },
  71	{ "exit_pei", VCPU_STAT(exit_pei) },
  72	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92	{ "deliver_program", VCPU_STAT(deliver_program) },
  93	{ "deliver_io", VCPU_STAT(deliver_io) },
  94	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96	{ "inject_ckc", VCPU_STAT(inject_ckc) },
  97	{ "inject_cputm", VCPU_STAT(inject_cputm) },
  98	{ "inject_external_call", VCPU_STAT(inject_external_call) },
  99	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101	{ "inject_io", VM_STAT(inject_io) },
 102	{ "inject_mchk", VCPU_STAT(inject_mchk) },
 103	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104	{ "inject_program", VCPU_STAT(inject_program) },
 105	{ "inject_restart", VCPU_STAT(inject_restart) },
 106	{ "inject_service_signal", VM_STAT(inject_service_signal) },
 107	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110	{ "inject_virtio", VM_STAT(inject_virtio) },
 111	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112	{ "instruction_gs", VCPU_STAT(instruction_gs) },
 113	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119	{ "instruction_sck", VCPU_STAT(instruction_sck) },
 120	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121	{ "instruction_spx", VCPU_STAT(instruction_spx) },
 122	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123	{ "instruction_stap", VCPU_STAT(instruction_stap) },
 124	{ "instruction_iske", VCPU_STAT(instruction_iske) },
 125	{ "instruction_ri", VCPU_STAT(instruction_ri) },
 126	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127	{ "instruction_sske", VCPU_STAT(instruction_sske) },
 128	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129	{ "instruction_essa", VCPU_STAT(instruction_essa) },
 130	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132	{ "instruction_tb", VCPU_STAT(instruction_tb) },
 133	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137	{ "instruction_sie", VCPU_STAT(instruction_sie) },
 138	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161	{ NULL }
 162};
 163
 164struct kvm_s390_tod_clock_ext {
 165	__u8 epoch_idx;
 166	__u64 tod;
 167	__u8 reserved[7];
 168} __packed;
 
 
 
 169
 170/* allow nested virtualization in KVM (if enabled by user space) */
 171static int nested;
 172module_param(nested, int, S_IRUGO);
 173MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 175
 176/*
 177 * For now we handle at most 16 double words as this is what the s390 base
 178 * kernel handles and stores in the prefix page. If we ever need to go beyond
 179 * this, this requires changes to code, but the external uapi can stay.
 180 */
 181#define SIZE_INTERNAL 16
 182
 183/*
 184 * Base feature mask that defines default mask for facilities. Consists of the
 185 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 186 */
 187static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 188/*
 189 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 190 * and defines the facilities that can be enabled via a cpu model.
 191 */
 192static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 193
 194static unsigned long kvm_s390_fac_size(void)
 195{
 196	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 197	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 198	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 199		sizeof(S390_lowcore.stfle_fac_list));
 200
 201	return SIZE_INTERNAL;
 202}
 203
 204/* available cpu features supported by kvm */
 205static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 206/* available subfunctions indicated via query / "test bit" */
 207static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 208
 209static struct gmap_notifier gmap_notifier;
 210static struct gmap_notifier vsie_gmap_notifier;
 211debug_info_t *kvm_s390_dbf;
 
 212
 213/* Section: not file related */
 214int kvm_arch_hardware_enable(void)
 215{
 216	/* every s390 is virtualization enabled ;-) */
 217	return 0;
 218}
 219
 
 
 
 
 
 
 220static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 221			      unsigned long end);
 
 222
 223static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 224{
 225	u8 delta_idx = 0;
 226
 227	/*
 228	 * The TOD jumps by delta, we have to compensate this by adding
 229	 * -delta to the epoch.
 230	 */
 231	delta = -delta;
 232
 233	/* sign-extension - we're adding to signed values below */
 234	if ((s64)delta < 0)
 235		delta_idx = -1;
 236
 237	scb->epoch += delta;
 238	if (scb->ecd & ECD_MEF) {
 239		scb->epdx += delta_idx;
 240		if (scb->epoch < delta)
 241			scb->epdx += 1;
 242	}
 243}
 244
 245/*
 246 * This callback is executed during stop_machine(). All CPUs are therefore
 247 * temporarily stopped. In order not to change guest behavior, we have to
 248 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 249 * so a CPU won't be stopped while calculating with the epoch.
 250 */
 251static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 252			  void *v)
 253{
 254	struct kvm *kvm;
 255	struct kvm_vcpu *vcpu;
 256	int i;
 257	unsigned long long *delta = v;
 258
 259	list_for_each_entry(kvm, &vm_list, vm_list) {
 260		kvm_for_each_vcpu(i, vcpu, kvm) {
 261			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 262			if (i == 0) {
 263				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 264				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 265			}
 266			if (vcpu->arch.cputm_enabled)
 267				vcpu->arch.cputm_start += *delta;
 268			if (vcpu->arch.vsie_block)
 269				kvm_clock_sync_scb(vcpu->arch.vsie_block,
 270						   *delta);
 271		}
 272	}
 273	return NOTIFY_OK;
 274}
 275
 276static struct notifier_block kvm_clock_notifier = {
 277	.notifier_call = kvm_clock_sync,
 278};
 279
 280int kvm_arch_hardware_setup(void)
 281{
 282	gmap_notifier.notifier_call = kvm_gmap_notifier;
 283	gmap_register_pte_notifier(&gmap_notifier);
 284	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 285	gmap_register_pte_notifier(&vsie_gmap_notifier);
 286	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 287				       &kvm_clock_notifier);
 288	return 0;
 289}
 290
 291void kvm_arch_hardware_unsetup(void)
 292{
 293	gmap_unregister_pte_notifier(&gmap_notifier);
 294	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 295	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 296					 &kvm_clock_notifier);
 297}
 298
 299static void allow_cpu_feat(unsigned long nr)
 300{
 301	set_bit_inv(nr, kvm_s390_available_cpu_feat);
 302}
 303
 304static inline int plo_test_bit(unsigned char nr)
 305{
 306	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 307	int cc;
 308
 309	asm volatile(
 
 310		/* Parameter registers are ignored for "test bit" */
 311		"	plo	0,0,0,0(0)\n"
 312		"	ipm	%0\n"
 313		"	srl	%0,28\n"
 314		: "=d" (cc)
 315		: "d" (r0)
 316		: "cc");
 317	return cc == 0;
 318}
 319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 320static void kvm_s390_cpu_feat_init(void)
 321{
 322	int i;
 323
 324	for (i = 0; i < 256; ++i) {
 325		if (plo_test_bit(i))
 326			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 327	}
 328
 329	if (test_facility(28)) /* TOD-clock steering */
 330		ptff(kvm_s390_available_subfunc.ptff,
 331		     sizeof(kvm_s390_available_subfunc.ptff),
 332		     PTFF_QAF);
 333
 334	if (test_facility(17)) { /* MSA */
 335		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 336			      kvm_s390_available_subfunc.kmac);
 337		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 338			      kvm_s390_available_subfunc.kmc);
 339		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
 340			      kvm_s390_available_subfunc.km);
 341		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 342			      kvm_s390_available_subfunc.kimd);
 343		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 344			      kvm_s390_available_subfunc.klmd);
 345	}
 346	if (test_facility(76)) /* MSA3 */
 347		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 348			      kvm_s390_available_subfunc.pckmo);
 349	if (test_facility(77)) { /* MSA4 */
 350		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 351			      kvm_s390_available_subfunc.kmctr);
 352		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 353			      kvm_s390_available_subfunc.kmf);
 354		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 355			      kvm_s390_available_subfunc.kmo);
 356		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 357			      kvm_s390_available_subfunc.pcc);
 358	}
 359	if (test_facility(57)) /* MSA5 */
 360		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 361			      kvm_s390_available_subfunc.ppno);
 362
 363	if (test_facility(146)) /* MSA8 */
 364		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 365			      kvm_s390_available_subfunc.kma);
 366
 
 
 
 
 
 
 
 
 
 
 367	if (MACHINE_HAS_ESOP)
 368		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 369	/*
 370	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 371	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 372	 */
 373	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 374	    !test_facility(3) || !nested)
 375		return;
 376	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 377	if (sclp.has_64bscao)
 378		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 379	if (sclp.has_siif)
 380		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 381	if (sclp.has_gpere)
 382		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 383	if (sclp.has_gsls)
 384		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 385	if (sclp.has_ib)
 386		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 387	if (sclp.has_cei)
 388		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 389	if (sclp.has_ibs)
 390		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 391	if (sclp.has_kss)
 392		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 393	/*
 394	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 395	 * all skey handling functions read/set the skey from the PGSTE
 396	 * instead of the real storage key.
 397	 *
 398	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 399	 * pages being detected as preserved although they are resident.
 400	 *
 401	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 402	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 403	 *
 404	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 405	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 406	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 407	 *
 408	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 409	 * cannot easily shadow the SCA because of the ipte lock.
 410	 */
 411}
 412
 413int kvm_arch_init(void *opaque)
 414{
 
 
 415	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 416	if (!kvm_s390_dbf)
 417		return -ENOMEM;
 418
 419	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 420		debug_unregister(kvm_s390_dbf);
 421		return -ENOMEM;
 422	}
 
 
 
 423
 424	kvm_s390_cpu_feat_init();
 425
 426	/* Register floating interrupt controller interface. */
 427	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 428}
 429
 430void kvm_arch_exit(void)
 431{
 
 
 
 432	debug_unregister(kvm_s390_dbf);
 
 433}
 434
 435/* Section: device related */
 436long kvm_arch_dev_ioctl(struct file *filp,
 437			unsigned int ioctl, unsigned long arg)
 438{
 439	if (ioctl == KVM_S390_ENABLE_SIE)
 440		return s390_enable_sie();
 441	return -EINVAL;
 442}
 443
 444int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 445{
 446	int r;
 447
 448	switch (ext) {
 449	case KVM_CAP_S390_PSW:
 450	case KVM_CAP_S390_GMAP:
 451	case KVM_CAP_SYNC_MMU:
 452#ifdef CONFIG_KVM_S390_UCONTROL
 453	case KVM_CAP_S390_UCONTROL:
 454#endif
 455	case KVM_CAP_ASYNC_PF:
 456	case KVM_CAP_SYNC_REGS:
 457	case KVM_CAP_ONE_REG:
 458	case KVM_CAP_ENABLE_CAP:
 459	case KVM_CAP_S390_CSS_SUPPORT:
 460	case KVM_CAP_IOEVENTFD:
 461	case KVM_CAP_DEVICE_CTRL:
 462	case KVM_CAP_ENABLE_CAP_VM:
 463	case KVM_CAP_S390_IRQCHIP:
 464	case KVM_CAP_VM_ATTRIBUTES:
 465	case KVM_CAP_MP_STATE:
 466	case KVM_CAP_IMMEDIATE_EXIT:
 467	case KVM_CAP_S390_INJECT_IRQ:
 468	case KVM_CAP_S390_USER_SIGP:
 469	case KVM_CAP_S390_USER_STSI:
 470	case KVM_CAP_S390_SKEYS:
 471	case KVM_CAP_S390_IRQ_STATE:
 472	case KVM_CAP_S390_USER_INSTR0:
 473	case KVM_CAP_S390_CMMA_MIGRATION:
 474	case KVM_CAP_S390_AIS:
 475	case KVM_CAP_S390_AIS_MIGRATION:
 
 
 
 
 476		r = 1;
 477		break;
 
 
 
 
 
 
 
 
 478	case KVM_CAP_S390_MEM_OP:
 479		r = MEM_OP_MAX_SIZE;
 480		break;
 481	case KVM_CAP_NR_VCPUS:
 482	case KVM_CAP_MAX_VCPUS:
 
 483		r = KVM_S390_BSCA_CPU_SLOTS;
 484		if (!kvm_s390_use_sca_entries())
 485			r = KVM_MAX_VCPUS;
 486		else if (sclp.has_esca && sclp.has_64bscao)
 487			r = KVM_S390_ESCA_CPU_SLOTS;
 488		break;
 489	case KVM_CAP_NR_MEMSLOTS:
 490		r = KVM_USER_MEM_SLOTS;
 491		break;
 492	case KVM_CAP_S390_COW:
 493		r = MACHINE_HAS_ESOP;
 494		break;
 495	case KVM_CAP_S390_VECTOR_REGISTERS:
 496		r = MACHINE_HAS_VX;
 497		break;
 498	case KVM_CAP_S390_RI:
 499		r = test_facility(64);
 500		break;
 501	case KVM_CAP_S390_GS:
 502		r = test_facility(133);
 503		break;
 504	case KVM_CAP_S390_BPB:
 505		r = test_facility(82);
 506		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 507	default:
 508		r = 0;
 509	}
 510	return r;
 511}
 512
 513static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 514					struct kvm_memory_slot *memslot)
 515{
 
 516	gfn_t cur_gfn, last_gfn;
 517	unsigned long address;
 518	struct gmap *gmap = kvm->arch.gmap;
 
 519
 520	/* Loop over all guest pages */
 
 521	last_gfn = memslot->base_gfn + memslot->npages;
 522	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 523		address = gfn_to_hva_memslot(memslot, cur_gfn);
 
 
 
 
 
 
 
 
 
 
 524
 525		if (test_and_clear_guest_dirty(gmap->mm, address))
 526			mark_page_dirty(kvm, cur_gfn);
 527		if (fatal_signal_pending(current))
 528			return;
 529		cond_resched();
 530	}
 531}
 532
 533/* Section: vm related */
 534static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 535
 536/*
 537 * Get (and clear) the dirty memory log for a memory slot.
 538 */
 539int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 540			       struct kvm_dirty_log *log)
 541{
 542	int r;
 543	unsigned long n;
 544	struct kvm_memslots *slots;
 545	struct kvm_memory_slot *memslot;
 546	int is_dirty = 0;
 547
 548	if (kvm_is_ucontrol(kvm))
 549		return -EINVAL;
 550
 551	mutex_lock(&kvm->slots_lock);
 552
 553	r = -EINVAL;
 554	if (log->slot >= KVM_USER_MEM_SLOTS)
 555		goto out;
 556
 557	slots = kvm_memslots(kvm);
 558	memslot = id_to_memslot(slots, log->slot);
 559	r = -ENOENT;
 560	if (!memslot->dirty_bitmap)
 561		goto out;
 562
 563	kvm_s390_sync_dirty_log(kvm, memslot);
 564	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 565	if (r)
 566		goto out;
 567
 568	/* Clear the dirty log */
 569	if (is_dirty) {
 570		n = kvm_dirty_bitmap_bytes(memslot);
 571		memset(memslot->dirty_bitmap, 0, n);
 572	}
 573	r = 0;
 574out:
 575	mutex_unlock(&kvm->slots_lock);
 576	return r;
 577}
 578
 579static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 580{
 581	unsigned int i;
 582	struct kvm_vcpu *vcpu;
 583
 584	kvm_for_each_vcpu(i, vcpu, kvm) {
 585		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 586	}
 587}
 588
 589static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 590{
 591	int r;
 592
 593	if (cap->flags)
 594		return -EINVAL;
 595
 596	switch (cap->cap) {
 597	case KVM_CAP_S390_IRQCHIP:
 598		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 599		kvm->arch.use_irqchip = 1;
 600		r = 0;
 601		break;
 602	case KVM_CAP_S390_USER_SIGP:
 603		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 604		kvm->arch.user_sigp = 1;
 605		r = 0;
 606		break;
 607	case KVM_CAP_S390_VECTOR_REGISTERS:
 608		mutex_lock(&kvm->lock);
 609		if (kvm->created_vcpus) {
 610			r = -EBUSY;
 611		} else if (MACHINE_HAS_VX) {
 612			set_kvm_facility(kvm->arch.model.fac_mask, 129);
 613			set_kvm_facility(kvm->arch.model.fac_list, 129);
 614			if (test_facility(134)) {
 615				set_kvm_facility(kvm->arch.model.fac_mask, 134);
 616				set_kvm_facility(kvm->arch.model.fac_list, 134);
 617			}
 618			if (test_facility(135)) {
 619				set_kvm_facility(kvm->arch.model.fac_mask, 135);
 620				set_kvm_facility(kvm->arch.model.fac_list, 135);
 621			}
 
 
 
 
 
 
 
 
 
 
 
 
 622			r = 0;
 623		} else
 624			r = -EINVAL;
 625		mutex_unlock(&kvm->lock);
 626		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 627			 r ? "(not available)" : "(success)");
 628		break;
 629	case KVM_CAP_S390_RI:
 630		r = -EINVAL;
 631		mutex_lock(&kvm->lock);
 632		if (kvm->created_vcpus) {
 633			r = -EBUSY;
 634		} else if (test_facility(64)) {
 635			set_kvm_facility(kvm->arch.model.fac_mask, 64);
 636			set_kvm_facility(kvm->arch.model.fac_list, 64);
 637			r = 0;
 638		}
 639		mutex_unlock(&kvm->lock);
 640		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 641			 r ? "(not available)" : "(success)");
 642		break;
 643	case KVM_CAP_S390_AIS:
 644		mutex_lock(&kvm->lock);
 645		if (kvm->created_vcpus) {
 646			r = -EBUSY;
 647		} else {
 648			set_kvm_facility(kvm->arch.model.fac_mask, 72);
 649			set_kvm_facility(kvm->arch.model.fac_list, 72);
 650			r = 0;
 651		}
 652		mutex_unlock(&kvm->lock);
 653		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 654			 r ? "(not available)" : "(success)");
 655		break;
 656	case KVM_CAP_S390_GS:
 657		r = -EINVAL;
 658		mutex_lock(&kvm->lock);
 659		if (kvm->created_vcpus) {
 660			r = -EBUSY;
 661		} else if (test_facility(133)) {
 662			set_kvm_facility(kvm->arch.model.fac_mask, 133);
 663			set_kvm_facility(kvm->arch.model.fac_list, 133);
 664			r = 0;
 665		}
 666		mutex_unlock(&kvm->lock);
 667		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 668			 r ? "(not available)" : "(success)");
 669		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 670	case KVM_CAP_S390_USER_STSI:
 671		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 672		kvm->arch.user_stsi = 1;
 673		r = 0;
 674		break;
 675	case KVM_CAP_S390_USER_INSTR0:
 676		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 677		kvm->arch.user_instr0 = 1;
 678		icpt_operexc_on_all_vcpus(kvm);
 679		r = 0;
 680		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 681	default:
 682		r = -EINVAL;
 683		break;
 684	}
 685	return r;
 686}
 687
 688static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 689{
 690	int ret;
 691
 692	switch (attr->attr) {
 693	case KVM_S390_VM_MEM_LIMIT_SIZE:
 694		ret = 0;
 695		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 696			 kvm->arch.mem_limit);
 697		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 698			ret = -EFAULT;
 699		break;
 700	default:
 701		ret = -ENXIO;
 702		break;
 703	}
 704	return ret;
 705}
 706
 707static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 708{
 709	int ret;
 710	unsigned int idx;
 711	switch (attr->attr) {
 712	case KVM_S390_VM_MEM_ENABLE_CMMA:
 713		ret = -ENXIO;
 714		if (!sclp.has_cmma)
 715			break;
 716
 717		ret = -EBUSY;
 718		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 719		mutex_lock(&kvm->lock);
 720		if (!kvm->created_vcpus) {
 
 
 
 
 721			kvm->arch.use_cmma = 1;
 722			/* Not compatible with cmma. */
 723			kvm->arch.use_pfmfi = 0;
 724			ret = 0;
 725		}
 726		mutex_unlock(&kvm->lock);
 727		break;
 728	case KVM_S390_VM_MEM_CLR_CMMA:
 729		ret = -ENXIO;
 730		if (!sclp.has_cmma)
 731			break;
 732		ret = -EINVAL;
 733		if (!kvm->arch.use_cmma)
 734			break;
 735
 736		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 737		mutex_lock(&kvm->lock);
 738		idx = srcu_read_lock(&kvm->srcu);
 739		s390_reset_cmma(kvm->arch.gmap->mm);
 740		srcu_read_unlock(&kvm->srcu, idx);
 741		mutex_unlock(&kvm->lock);
 742		ret = 0;
 743		break;
 744	case KVM_S390_VM_MEM_LIMIT_SIZE: {
 745		unsigned long new_limit;
 746
 747		if (kvm_is_ucontrol(kvm))
 748			return -EINVAL;
 749
 750		if (get_user(new_limit, (u64 __user *)attr->addr))
 751			return -EFAULT;
 752
 753		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 754		    new_limit > kvm->arch.mem_limit)
 755			return -E2BIG;
 756
 757		if (!new_limit)
 758			return -EINVAL;
 759
 760		/* gmap_create takes last usable address */
 761		if (new_limit != KVM_S390_NO_MEM_LIMIT)
 762			new_limit -= 1;
 763
 764		ret = -EBUSY;
 765		mutex_lock(&kvm->lock);
 766		if (!kvm->created_vcpus) {
 767			/* gmap_create will round the limit up */
 768			struct gmap *new = gmap_create(current->mm, new_limit);
 769
 770			if (!new) {
 771				ret = -ENOMEM;
 772			} else {
 773				gmap_remove(kvm->arch.gmap);
 774				new->private = kvm;
 775				kvm->arch.gmap = new;
 776				ret = 0;
 777			}
 778		}
 779		mutex_unlock(&kvm->lock);
 780		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 781		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 782			 (void *) kvm->arch.gmap->asce);
 783		break;
 784	}
 785	default:
 786		ret = -ENXIO;
 787		break;
 788	}
 789	return ret;
 790}
 791
 792static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 793
 794static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 795{
 796	struct kvm_vcpu *vcpu;
 797	int i;
 
 
 
 
 
 
 
 
 798
 799	if (!test_kvm_facility(kvm, 76))
 800		return -EINVAL;
 801
 
 
 802	mutex_lock(&kvm->lock);
 803	switch (attr->attr) {
 804	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 
 
 
 
 805		get_random_bytes(
 806			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 807			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 808		kvm->arch.crypto.aes_kw = 1;
 809		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 810		break;
 811	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 
 
 
 
 812		get_random_bytes(
 813			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 814			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 815		kvm->arch.crypto.dea_kw = 1;
 816		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 817		break;
 818	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 
 
 
 
 819		kvm->arch.crypto.aes_kw = 0;
 820		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 821			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 822		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 823		break;
 824	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 
 
 
 
 825		kvm->arch.crypto.dea_kw = 0;
 826		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 827			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 828		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 829		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 830	default:
 831		mutex_unlock(&kvm->lock);
 832		return -ENXIO;
 833	}
 834
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 835	kvm_for_each_vcpu(i, vcpu, kvm) {
 836		kvm_s390_vcpu_crypto_setup(vcpu);
 837		exit_sie(vcpu);
 838	}
 839	mutex_unlock(&kvm->lock);
 840	return 0;
 841}
 842
 843static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 844{
 845	int cx;
 846	struct kvm_vcpu *vcpu;
 847
 848	kvm_for_each_vcpu(cx, vcpu, kvm)
 849		kvm_s390_sync_request(req, vcpu);
 850}
 851
 852/*
 853 * Must be called with kvm->srcu held to avoid races on memslots, and with
 854 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 855 */
 856static int kvm_s390_vm_start_migration(struct kvm *kvm)
 857{
 858	struct kvm_s390_migration_state *mgs;
 859	struct kvm_memory_slot *ms;
 860	/* should be the only one */
 861	struct kvm_memslots *slots;
 862	unsigned long ram_pages;
 863	int slotnr;
 864
 865	/* migration mode already enabled */
 866	if (kvm->arch.migration_state)
 867		return 0;
 868
 869	slots = kvm_memslots(kvm);
 870	if (!slots || !slots->used_slots)
 871		return -EINVAL;
 872
 873	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 874	if (!mgs)
 875		return -ENOMEM;
 876	kvm->arch.migration_state = mgs;
 877
 878	if (kvm->arch.use_cmma) {
 
 
 879		/*
 880		 * Get the first slot. They are reverse sorted by base_gfn, so
 881		 * the first slot is also the one at the end of the address
 882		 * space. We have verified above that at least one slot is
 883		 * present.
 884		 */
 885		ms = slots->memslots;
 886		/* round up so we only use full longs */
 887		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 888		/* allocate enough bytes to store all the bits */
 889		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 890		if (!mgs->pgste_bitmap) {
 891			kfree(mgs);
 892			kvm->arch.migration_state = NULL;
 893			return -ENOMEM;
 894		}
 895
 896		mgs->bitmap_size = ram_pages;
 897		atomic64_set(&mgs->dirty_pages, ram_pages);
 898		/* mark all the pages in active slots as dirty */
 899		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 900			ms = slots->memslots + slotnr;
 901			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 902		}
 903
 904		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 905	}
 
 
 
 906	return 0;
 907}
 908
 909/*
 910 * Must be called with kvm->slots_lock to avoid races with ourselves and
 911 * kvm_s390_vm_start_migration.
 912 */
 913static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 914{
 915	struct kvm_s390_migration_state *mgs;
 916
 917	/* migration mode already disabled */
 918	if (!kvm->arch.migration_state)
 919		return 0;
 920	mgs = kvm->arch.migration_state;
 921	kvm->arch.migration_state = NULL;
 922
 923	if (kvm->arch.use_cmma) {
 924		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 925		/* We have to wait for the essa emulation to finish */
 926		synchronize_srcu(&kvm->srcu);
 927		vfree(mgs->pgste_bitmap);
 928	}
 929	kfree(mgs);
 930	return 0;
 931}
 932
 933static int kvm_s390_vm_set_migration(struct kvm *kvm,
 934				     struct kvm_device_attr *attr)
 935{
 936	int res = -ENXIO;
 937
 938	mutex_lock(&kvm->slots_lock);
 939	switch (attr->attr) {
 940	case KVM_S390_VM_MIGRATION_START:
 941		res = kvm_s390_vm_start_migration(kvm);
 942		break;
 943	case KVM_S390_VM_MIGRATION_STOP:
 944		res = kvm_s390_vm_stop_migration(kvm);
 945		break;
 946	default:
 947		break;
 948	}
 949	mutex_unlock(&kvm->slots_lock);
 950
 951	return res;
 952}
 953
 954static int kvm_s390_vm_get_migration(struct kvm *kvm,
 955				     struct kvm_device_attr *attr)
 956{
 957	u64 mig = (kvm->arch.migration_state != NULL);
 958
 959	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 960		return -ENXIO;
 961
 962	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 963		return -EFAULT;
 964	return 0;
 965}
 966
 
 
 967static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 968{
 969	struct kvm_s390_vm_tod_clock gtod;
 970
 971	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 972		return -EFAULT;
 973
 974	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 975		return -EINVAL;
 976	kvm_s390_set_tod_clock(kvm, &gtod);
 977
 978	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 979		gtod.epoch_idx, gtod.tod);
 980
 981	return 0;
 982}
 983
 984static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 985{
 986	u8 gtod_high;
 987
 988	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 989					   sizeof(gtod_high)))
 990		return -EFAULT;
 991
 992	if (gtod_high != 0)
 993		return -EINVAL;
 994	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 995
 996	return 0;
 997}
 998
 999static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1000{
1001	struct kvm_s390_vm_tod_clock gtod = { 0 };
1002
1003	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1004			   sizeof(gtod.tod)))
1005		return -EFAULT;
1006
1007	kvm_s390_set_tod_clock(kvm, &gtod);
1008	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1009	return 0;
1010}
1011
1012static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1013{
1014	int ret;
1015
1016	if (attr->flags)
1017		return -EINVAL;
1018
 
 
 
 
 
 
 
 
 
 
1019	switch (attr->attr) {
1020	case KVM_S390_VM_TOD_EXT:
1021		ret = kvm_s390_set_tod_ext(kvm, attr);
1022		break;
1023	case KVM_S390_VM_TOD_HIGH:
1024		ret = kvm_s390_set_tod_high(kvm, attr);
1025		break;
1026	case KVM_S390_VM_TOD_LOW:
1027		ret = kvm_s390_set_tod_low(kvm, attr);
1028		break;
1029	default:
1030		ret = -ENXIO;
1031		break;
1032	}
 
 
 
1033	return ret;
1034}
1035
1036static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
1037					struct kvm_s390_vm_tod_clock *gtod)
1038{
1039	struct kvm_s390_tod_clock_ext htod;
1040
1041	preempt_disable();
1042
1043	get_tod_clock_ext((char *)&htod);
1044
1045	gtod->tod = htod.tod + kvm->arch.epoch;
1046	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1047
1048	if (gtod->tod < htod.tod)
1049		gtod->epoch_idx += 1;
 
 
1050
1051	preempt_enable();
1052}
1053
1054static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1055{
1056	struct kvm_s390_vm_tod_clock gtod;
1057
1058	memset(&gtod, 0, sizeof(gtod));
1059
1060	if (test_kvm_facility(kvm, 139))
1061		kvm_s390_get_tod_clock_ext(kvm, &gtod);
1062	else
1063		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1064
1065	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1066		return -EFAULT;
1067
1068	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1069		gtod.epoch_idx, gtod.tod);
1070	return 0;
1071}
1072
1073static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1074{
1075	u8 gtod_high = 0;
1076
1077	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1078					 sizeof(gtod_high)))
1079		return -EFAULT;
1080	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1081
1082	return 0;
1083}
1084
1085static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1086{
1087	u64 gtod;
1088
1089	gtod = kvm_s390_get_tod_clock_fast(kvm);
1090	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1091		return -EFAULT;
1092	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1093
1094	return 0;
1095}
1096
1097static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1098{
1099	int ret;
1100
1101	if (attr->flags)
1102		return -EINVAL;
1103
1104	switch (attr->attr) {
1105	case KVM_S390_VM_TOD_EXT:
1106		ret = kvm_s390_get_tod_ext(kvm, attr);
1107		break;
1108	case KVM_S390_VM_TOD_HIGH:
1109		ret = kvm_s390_get_tod_high(kvm, attr);
1110		break;
1111	case KVM_S390_VM_TOD_LOW:
1112		ret = kvm_s390_get_tod_low(kvm, attr);
1113		break;
1114	default:
1115		ret = -ENXIO;
1116		break;
1117	}
1118	return ret;
1119}
1120
1121static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1122{
1123	struct kvm_s390_vm_cpu_processor *proc;
1124	u16 lowest_ibc, unblocked_ibc;
1125	int ret = 0;
1126
1127	mutex_lock(&kvm->lock);
1128	if (kvm->created_vcpus) {
1129		ret = -EBUSY;
1130		goto out;
1131	}
1132	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1133	if (!proc) {
1134		ret = -ENOMEM;
1135		goto out;
1136	}
1137	if (!copy_from_user(proc, (void __user *)attr->addr,
1138			    sizeof(*proc))) {
1139		kvm->arch.model.cpuid = proc->cpuid;
1140		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1141		unblocked_ibc = sclp.ibc & 0xfff;
1142		if (lowest_ibc && proc->ibc) {
1143			if (proc->ibc > unblocked_ibc)
1144				kvm->arch.model.ibc = unblocked_ibc;
1145			else if (proc->ibc < lowest_ibc)
1146				kvm->arch.model.ibc = lowest_ibc;
1147			else
1148				kvm->arch.model.ibc = proc->ibc;
1149		}
1150		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1151		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1152		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153			 kvm->arch.model.ibc,
1154			 kvm->arch.model.cpuid);
1155		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156			 kvm->arch.model.fac_list[0],
1157			 kvm->arch.model.fac_list[1],
1158			 kvm->arch.model.fac_list[2]);
1159	} else
1160		ret = -EFAULT;
1161	kfree(proc);
1162out:
1163	mutex_unlock(&kvm->lock);
1164	return ret;
1165}
1166
1167static int kvm_s390_set_processor_feat(struct kvm *kvm,
1168				       struct kvm_device_attr *attr)
1169{
1170	struct kvm_s390_vm_cpu_feat data;
1171
1172	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1173		return -EFAULT;
1174	if (!bitmap_subset((unsigned long *) data.feat,
1175			   kvm_s390_available_cpu_feat,
1176			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1177		return -EINVAL;
1178
1179	mutex_lock(&kvm->lock);
1180	if (kvm->created_vcpus) {
1181		mutex_unlock(&kvm->lock);
1182		return -EBUSY;
1183	}
1184	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1185		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1186	mutex_unlock(&kvm->lock);
1187	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1188			 data.feat[0],
1189			 data.feat[1],
1190			 data.feat[2]);
1191	return 0;
1192}
1193
1194static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1195					  struct kvm_device_attr *attr)
1196{
1197	/*
1198	 * Once supported by kernel + hw, we have to store the subfunctions
1199	 * in kvm->arch and remember that user space configured them.
1200	 */
1201	return -ENXIO;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1202}
1203
1204static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1205{
1206	int ret = -ENXIO;
1207
1208	switch (attr->attr) {
1209	case KVM_S390_VM_CPU_PROCESSOR:
1210		ret = kvm_s390_set_processor(kvm, attr);
1211		break;
1212	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1213		ret = kvm_s390_set_processor_feat(kvm, attr);
1214		break;
1215	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1216		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1217		break;
1218	}
1219	return ret;
1220}
1221
1222static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1223{
1224	struct kvm_s390_vm_cpu_processor *proc;
1225	int ret = 0;
1226
1227	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1228	if (!proc) {
1229		ret = -ENOMEM;
1230		goto out;
1231	}
1232	proc->cpuid = kvm->arch.model.cpuid;
1233	proc->ibc = kvm->arch.model.ibc;
1234	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1235	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1236	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1237		 kvm->arch.model.ibc,
1238		 kvm->arch.model.cpuid);
1239	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1240		 kvm->arch.model.fac_list[0],
1241		 kvm->arch.model.fac_list[1],
1242		 kvm->arch.model.fac_list[2]);
1243	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1244		ret = -EFAULT;
1245	kfree(proc);
1246out:
1247	return ret;
1248}
1249
1250static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1251{
1252	struct kvm_s390_vm_cpu_machine *mach;
1253	int ret = 0;
1254
1255	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1256	if (!mach) {
1257		ret = -ENOMEM;
1258		goto out;
1259	}
1260	get_cpu_id((struct cpuid *) &mach->cpuid);
1261	mach->ibc = sclp.ibc;
1262	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1263	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1264	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1265	       sizeof(S390_lowcore.stfle_fac_list));
1266	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1267		 kvm->arch.model.ibc,
1268		 kvm->arch.model.cpuid);
1269	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1270		 mach->fac_mask[0],
1271		 mach->fac_mask[1],
1272		 mach->fac_mask[2]);
1273	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1274		 mach->fac_list[0],
1275		 mach->fac_list[1],
1276		 mach->fac_list[2]);
1277	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1278		ret = -EFAULT;
1279	kfree(mach);
1280out:
1281	return ret;
1282}
1283
1284static int kvm_s390_get_processor_feat(struct kvm *kvm,
1285				       struct kvm_device_attr *attr)
1286{
1287	struct kvm_s390_vm_cpu_feat data;
1288
1289	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1290		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1291	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1292		return -EFAULT;
1293	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1294			 data.feat[0],
1295			 data.feat[1],
1296			 data.feat[2]);
1297	return 0;
1298}
1299
1300static int kvm_s390_get_machine_feat(struct kvm *kvm,
1301				     struct kvm_device_attr *attr)
1302{
1303	struct kvm_s390_vm_cpu_feat data;
1304
1305	bitmap_copy((unsigned long *) data.feat,
1306		    kvm_s390_available_cpu_feat,
1307		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1308	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1309		return -EFAULT;
1310	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1311			 data.feat[0],
1312			 data.feat[1],
1313			 data.feat[2]);
1314	return 0;
1315}
1316
1317static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1318					  struct kvm_device_attr *attr)
1319{
1320	/*
1321	 * Once we can actually configure subfunctions (kernel + hw support),
1322	 * we have to check if they were already set by user space, if so copy
1323	 * them from kvm->arch.
1324	 */
1325	return -ENXIO;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1326}
1327
1328static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1329					struct kvm_device_attr *attr)
1330{
1331	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1332	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1333		return -EFAULT;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1334	return 0;
1335}
 
1336static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1337{
1338	int ret = -ENXIO;
1339
1340	switch (attr->attr) {
1341	case KVM_S390_VM_CPU_PROCESSOR:
1342		ret = kvm_s390_get_processor(kvm, attr);
1343		break;
1344	case KVM_S390_VM_CPU_MACHINE:
1345		ret = kvm_s390_get_machine(kvm, attr);
1346		break;
1347	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1348		ret = kvm_s390_get_processor_feat(kvm, attr);
1349		break;
1350	case KVM_S390_VM_CPU_MACHINE_FEAT:
1351		ret = kvm_s390_get_machine_feat(kvm, attr);
1352		break;
1353	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1354		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1355		break;
1356	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1357		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1358		break;
1359	}
1360	return ret;
1361}
1362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1363static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1364{
1365	int ret;
1366
1367	switch (attr->group) {
1368	case KVM_S390_VM_MEM_CTRL:
1369		ret = kvm_s390_set_mem_control(kvm, attr);
1370		break;
1371	case KVM_S390_VM_TOD:
1372		ret = kvm_s390_set_tod(kvm, attr);
1373		break;
1374	case KVM_S390_VM_CPU_MODEL:
1375		ret = kvm_s390_set_cpu_model(kvm, attr);
1376		break;
1377	case KVM_S390_VM_CRYPTO:
1378		ret = kvm_s390_vm_set_crypto(kvm, attr);
1379		break;
1380	case KVM_S390_VM_MIGRATION:
1381		ret = kvm_s390_vm_set_migration(kvm, attr);
1382		break;
 
 
 
1383	default:
1384		ret = -ENXIO;
1385		break;
1386	}
1387
1388	return ret;
1389}
1390
1391static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1392{
1393	int ret;
1394
1395	switch (attr->group) {
1396	case KVM_S390_VM_MEM_CTRL:
1397		ret = kvm_s390_get_mem_control(kvm, attr);
1398		break;
1399	case KVM_S390_VM_TOD:
1400		ret = kvm_s390_get_tod(kvm, attr);
1401		break;
1402	case KVM_S390_VM_CPU_MODEL:
1403		ret = kvm_s390_get_cpu_model(kvm, attr);
1404		break;
1405	case KVM_S390_VM_MIGRATION:
1406		ret = kvm_s390_vm_get_migration(kvm, attr);
1407		break;
 
 
 
1408	default:
1409		ret = -ENXIO;
1410		break;
1411	}
1412
1413	return ret;
1414}
1415
1416static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1417{
1418	int ret;
1419
1420	switch (attr->group) {
1421	case KVM_S390_VM_MEM_CTRL:
1422		switch (attr->attr) {
1423		case KVM_S390_VM_MEM_ENABLE_CMMA:
1424		case KVM_S390_VM_MEM_CLR_CMMA:
1425			ret = sclp.has_cmma ? 0 : -ENXIO;
1426			break;
1427		case KVM_S390_VM_MEM_LIMIT_SIZE:
1428			ret = 0;
1429			break;
1430		default:
1431			ret = -ENXIO;
1432			break;
1433		}
1434		break;
1435	case KVM_S390_VM_TOD:
1436		switch (attr->attr) {
1437		case KVM_S390_VM_TOD_LOW:
1438		case KVM_S390_VM_TOD_HIGH:
1439			ret = 0;
1440			break;
1441		default:
1442			ret = -ENXIO;
1443			break;
1444		}
1445		break;
1446	case KVM_S390_VM_CPU_MODEL:
1447		switch (attr->attr) {
1448		case KVM_S390_VM_CPU_PROCESSOR:
1449		case KVM_S390_VM_CPU_MACHINE:
1450		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1451		case KVM_S390_VM_CPU_MACHINE_FEAT:
1452		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 
1453			ret = 0;
1454			break;
1455		/* configuring subfunctions is not supported yet */
1456		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1457		default:
1458			ret = -ENXIO;
1459			break;
1460		}
1461		break;
1462	case KVM_S390_VM_CRYPTO:
1463		switch (attr->attr) {
1464		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1465		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1466		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1467		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1468			ret = 0;
1469			break;
 
 
 
 
1470		default:
1471			ret = -ENXIO;
1472			break;
1473		}
1474		break;
1475	case KVM_S390_VM_MIGRATION:
1476		ret = 0;
1477		break;
 
 
 
1478	default:
1479		ret = -ENXIO;
1480		break;
1481	}
1482
1483	return ret;
1484}
1485
1486static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1487{
1488	uint8_t *keys;
1489	uint64_t hva;
1490	int srcu_idx, i, r = 0;
1491
1492	if (args->flags != 0)
1493		return -EINVAL;
1494
1495	/* Is this guest using storage keys? */
1496	if (!mm_use_skey(current->mm))
1497		return KVM_S390_GET_SKEYS_NONE;
1498
1499	/* Enforce sane limit on memory allocation */
1500	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1501		return -EINVAL;
1502
1503	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1504	if (!keys)
1505		return -ENOMEM;
1506
1507	down_read(&current->mm->mmap_sem);
1508	srcu_idx = srcu_read_lock(&kvm->srcu);
1509	for (i = 0; i < args->count; i++) {
1510		hva = gfn_to_hva(kvm, args->start_gfn + i);
1511		if (kvm_is_error_hva(hva)) {
1512			r = -EFAULT;
1513			break;
1514		}
1515
1516		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1517		if (r)
1518			break;
1519	}
1520	srcu_read_unlock(&kvm->srcu, srcu_idx);
1521	up_read(&current->mm->mmap_sem);
1522
1523	if (!r) {
1524		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1525				 sizeof(uint8_t) * args->count);
1526		if (r)
1527			r = -EFAULT;
1528	}
1529
1530	kvfree(keys);
1531	return r;
1532}
1533
1534static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1535{
1536	uint8_t *keys;
1537	uint64_t hva;
1538	int srcu_idx, i, r = 0;
 
1539
1540	if (args->flags != 0)
1541		return -EINVAL;
1542
1543	/* Enforce sane limit on memory allocation */
1544	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1545		return -EINVAL;
1546
1547	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1548	if (!keys)
1549		return -ENOMEM;
1550
1551	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1552			   sizeof(uint8_t) * args->count);
1553	if (r) {
1554		r = -EFAULT;
1555		goto out;
1556	}
1557
1558	/* Enable storage key handling for the guest */
1559	r = s390_enable_skey();
1560	if (r)
1561		goto out;
1562
1563	down_read(&current->mm->mmap_sem);
 
1564	srcu_idx = srcu_read_lock(&kvm->srcu);
1565	for (i = 0; i < args->count; i++) {
 
1566		hva = gfn_to_hva(kvm, args->start_gfn + i);
1567		if (kvm_is_error_hva(hva)) {
1568			r = -EFAULT;
1569			break;
1570		}
1571
1572		/* Lowest order bit is reserved */
1573		if (keys[i] & 0x01) {
1574			r = -EINVAL;
1575			break;
1576		}
1577
1578		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1579		if (r)
1580			break;
 
 
 
 
 
 
1581	}
1582	srcu_read_unlock(&kvm->srcu, srcu_idx);
1583	up_read(&current->mm->mmap_sem);
1584out:
1585	kvfree(keys);
1586	return r;
1587}
1588
1589/*
1590 * Base address and length must be sent at the start of each block, therefore
1591 * it's cheaper to send some clean data, as long as it's less than the size of
1592 * two longs.
1593 */
1594#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1595/* for consistency */
1596#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1597
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1598/*
1599 * This function searches for the next page with dirty CMMA attributes, and
1600 * saves the attributes in the buffer up to either the end of the buffer or
1601 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1602 * no trailing clean bytes are saved.
1603 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1604 * output buffer will indicate 0 as length.
1605 */
1606static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1607				  struct kvm_s390_cmma_log *args)
1608{
1609	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1610	unsigned long bufsize, hva, pgstev, i, next, cur;
1611	int srcu_idx, peek, r = 0, rr;
1612	u8 *res;
1613
1614	cur = args->start_gfn;
1615	i = next = pgstev = 0;
1616
1617	if (unlikely(!kvm->arch.use_cmma))
1618		return -ENXIO;
1619	/* Invalid/unsupported flags were specified */
1620	if (args->flags & ~KVM_S390_CMMA_PEEK)
1621		return -EINVAL;
1622	/* Migration mode query, and we are not doing a migration */
1623	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1624	if (!peek && !s)
1625		return -EINVAL;
1626	/* CMMA is disabled or was not used, or the buffer has length zero */
1627	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1628	if (!bufsize || !kvm->mm->context.uses_cmm) {
1629		memset(args, 0, sizeof(*args));
1630		return 0;
1631	}
1632
1633	if (!peek) {
1634		/* We are not peeking, and there are no dirty pages */
1635		if (!atomic64_read(&s->dirty_pages)) {
1636			memset(args, 0, sizeof(*args));
1637			return 0;
1638		}
1639		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1640				    args->start_gfn);
1641		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1642			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1643		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1644			memset(args, 0, sizeof(*args));
1645			return 0;
1646		}
1647		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1648	}
1649
1650	res = vmalloc(bufsize);
1651	if (!res)
1652		return -ENOMEM;
1653
1654	args->start_gfn = cur;
1655
1656	down_read(&kvm->mm->mmap_sem);
1657	srcu_idx = srcu_read_lock(&kvm->srcu);
1658	while (i < bufsize) {
1659		hva = gfn_to_hva(kvm, cur);
1660		if (kvm_is_error_hva(hva)) {
1661			r = -EFAULT;
1662			break;
1663		}
1664		/* decrement only if we actually flipped the bit to 0 */
1665		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1666			atomic64_dec(&s->dirty_pages);
1667		r = get_pgste(kvm->mm, hva, &pgstev);
1668		if (r < 0)
1669			pgstev = 0;
1670		/* save the value */
1671		res[i++] = (pgstev >> 24) & 0x43;
1672		/*
1673		 * if the next bit is too far away, stop.
1674		 * if we reached the previous "next", find the next one
1675		 */
1676		if (!peek) {
1677			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1678				break;
1679			if (cur == next)
1680				next = find_next_bit(s->pgste_bitmap,
1681						     s->bitmap_size, cur + 1);
1682		/* reached the end of the bitmap or of the buffer, stop */
1683			if ((next >= s->bitmap_size) ||
1684			    (next >= args->start_gfn + bufsize))
1685				break;
1686		}
1687		cur++;
1688	}
1689	srcu_read_unlock(&kvm->srcu, srcu_idx);
1690	up_read(&kvm->mm->mmap_sem);
1691	args->count = i;
1692	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
 
 
 
1693
1694	rr = copy_to_user((void __user *)args->values, res, args->count);
1695	if (rr)
1696		r = -EFAULT;
1697
1698	vfree(res);
1699	return r;
1700}
1701
1702/*
1703 * This function sets the CMMA attributes for the given pages. If the input
1704 * buffer has zero length, no action is taken, otherwise the attributes are
1705 * set and the mm->context.uses_cmm flag is set.
1706 */
1707static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1708				  const struct kvm_s390_cmma_log *args)
1709{
1710	unsigned long hva, mask, pgstev, i;
1711	uint8_t *bits;
1712	int srcu_idx, r = 0;
1713
1714	mask = args->mask;
1715
1716	if (!kvm->arch.use_cmma)
1717		return -ENXIO;
1718	/* invalid/unsupported flags */
1719	if (args->flags != 0)
1720		return -EINVAL;
1721	/* Enforce sane limit on memory allocation */
1722	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1723		return -EINVAL;
1724	/* Nothing to do */
1725	if (args->count == 0)
1726		return 0;
1727
1728	bits = vmalloc(sizeof(*bits) * args->count);
1729	if (!bits)
1730		return -ENOMEM;
1731
1732	r = copy_from_user(bits, (void __user *)args->values, args->count);
1733	if (r) {
1734		r = -EFAULT;
1735		goto out;
1736	}
1737
1738	down_read(&kvm->mm->mmap_sem);
1739	srcu_idx = srcu_read_lock(&kvm->srcu);
1740	for (i = 0; i < args->count; i++) {
1741		hva = gfn_to_hva(kvm, args->start_gfn + i);
1742		if (kvm_is_error_hva(hva)) {
1743			r = -EFAULT;
1744			break;
1745		}
1746
1747		pgstev = bits[i];
1748		pgstev = pgstev << 24;
1749		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1750		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1751	}
1752	srcu_read_unlock(&kvm->srcu, srcu_idx);
1753	up_read(&kvm->mm->mmap_sem);
1754
1755	if (!kvm->mm->context.uses_cmm) {
1756		down_write(&kvm->mm->mmap_sem);
1757		kvm->mm->context.uses_cmm = 1;
1758		up_write(&kvm->mm->mmap_sem);
1759	}
1760out:
1761	vfree(bits);
1762	return r;
1763}
1764
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1765long kvm_arch_vm_ioctl(struct file *filp,
1766		       unsigned int ioctl, unsigned long arg)
1767{
1768	struct kvm *kvm = filp->private_data;
1769	void __user *argp = (void __user *)arg;
1770	struct kvm_device_attr attr;
1771	int r;
1772
1773	switch (ioctl) {
1774	case KVM_S390_INTERRUPT: {
1775		struct kvm_s390_interrupt s390int;
1776
1777		r = -EFAULT;
1778		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1779			break;
1780		r = kvm_s390_inject_vm(kvm, &s390int);
1781		break;
1782	}
1783	case KVM_ENABLE_CAP: {
1784		struct kvm_enable_cap cap;
1785		r = -EFAULT;
1786		if (copy_from_user(&cap, argp, sizeof(cap)))
1787			break;
1788		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1789		break;
1790	}
1791	case KVM_CREATE_IRQCHIP: {
1792		struct kvm_irq_routing_entry routing;
1793
1794		r = -EINVAL;
1795		if (kvm->arch.use_irqchip) {
1796			/* Set up dummy routing. */
1797			memset(&routing, 0, sizeof(routing));
1798			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1799		}
1800		break;
1801	}
1802	case KVM_SET_DEVICE_ATTR: {
1803		r = -EFAULT;
1804		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1805			break;
1806		r = kvm_s390_vm_set_attr(kvm, &attr);
1807		break;
1808	}
1809	case KVM_GET_DEVICE_ATTR: {
1810		r = -EFAULT;
1811		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1812			break;
1813		r = kvm_s390_vm_get_attr(kvm, &attr);
1814		break;
1815	}
1816	case KVM_HAS_DEVICE_ATTR: {
1817		r = -EFAULT;
1818		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1819			break;
1820		r = kvm_s390_vm_has_attr(kvm, &attr);
1821		break;
1822	}
1823	case KVM_S390_GET_SKEYS: {
1824		struct kvm_s390_skeys args;
1825
1826		r = -EFAULT;
1827		if (copy_from_user(&args, argp,
1828				   sizeof(struct kvm_s390_skeys)))
1829			break;
1830		r = kvm_s390_get_skeys(kvm, &args);
1831		break;
1832	}
1833	case KVM_S390_SET_SKEYS: {
1834		struct kvm_s390_skeys args;
1835
1836		r = -EFAULT;
1837		if (copy_from_user(&args, argp,
1838				   sizeof(struct kvm_s390_skeys)))
1839			break;
1840		r = kvm_s390_set_skeys(kvm, &args);
1841		break;
1842	}
1843	case KVM_S390_GET_CMMA_BITS: {
1844		struct kvm_s390_cmma_log args;
1845
1846		r = -EFAULT;
1847		if (copy_from_user(&args, argp, sizeof(args)))
1848			break;
1849		mutex_lock(&kvm->slots_lock);
1850		r = kvm_s390_get_cmma_bits(kvm, &args);
1851		mutex_unlock(&kvm->slots_lock);
1852		if (!r) {
1853			r = copy_to_user(argp, &args, sizeof(args));
1854			if (r)
1855				r = -EFAULT;
1856		}
1857		break;
1858	}
1859	case KVM_S390_SET_CMMA_BITS: {
1860		struct kvm_s390_cmma_log args;
1861
1862		r = -EFAULT;
1863		if (copy_from_user(&args, argp, sizeof(args)))
1864			break;
1865		mutex_lock(&kvm->slots_lock);
1866		r = kvm_s390_set_cmma_bits(kvm, &args);
1867		mutex_unlock(&kvm->slots_lock);
1868		break;
1869	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1870	default:
1871		r = -ENOTTY;
1872	}
1873
1874	return r;
1875}
1876
1877static int kvm_s390_query_ap_config(u8 *config)
1878{
1879	u32 fcn_code = 0x04000000UL;
1880	u32 cc = 0;
1881
1882	memset(config, 0, 128);
1883	asm volatile(
1884		"lgr 0,%1\n"
1885		"lgr 2,%2\n"
1886		".long 0xb2af0000\n"		/* PQAP(QCI) */
1887		"0: ipm %0\n"
1888		"srl %0,28\n"
1889		"1:\n"
1890		EX_TABLE(0b, 1b)
1891		: "+r" (cc)
1892		: "r" (fcn_code), "r" (config)
1893		: "cc", "0", "2", "memory"
1894	);
1895
1896	return cc;
1897}
1898
1899static int kvm_s390_apxa_installed(void)
1900{
1901	u8 config[128];
1902	int cc;
1903
1904	if (test_facility(12)) {
1905		cc = kvm_s390_query_ap_config(config);
1906
1907		if (cc)
1908			pr_err("PQAP(QCI) failed with cc=%d", cc);
1909		else
1910			return config[0] & 0x40;
1911	}
1912
1913	return 0;
1914}
1915
 
 
 
 
 
 
 
 
1916static void kvm_s390_set_crycb_format(struct kvm *kvm)
1917{
1918	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1919
 
 
 
 
 
 
 
1920	if (kvm_s390_apxa_installed())
1921		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1922	else
1923		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1924}
1925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1926static u64 kvm_s390_get_initial_cpuid(void)
1927{
1928	struct cpuid cpuid;
1929
1930	get_cpu_id(&cpuid);
1931	cpuid.version = 0xff;
1932	return *((u64 *) &cpuid);
1933}
1934
1935static void kvm_s390_crypto_init(struct kvm *kvm)
1936{
 
 
 
 
1937	if (!test_kvm_facility(kvm, 76))
1938		return;
1939
1940	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1941	kvm_s390_set_crycb_format(kvm);
1942
1943	/* Enable AES/DEA protected key functions by default */
1944	kvm->arch.crypto.aes_kw = 1;
1945	kvm->arch.crypto.dea_kw = 1;
1946	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1947			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1948	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1949			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1950}
1951
1952static void sca_dispose(struct kvm *kvm)
1953{
1954	if (kvm->arch.use_esca)
1955		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1956	else
1957		free_page((unsigned long)(kvm->arch.sca));
1958	kvm->arch.sca = NULL;
1959}
1960
 
 
 
 
 
 
 
 
1961int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1962{
1963	gfp_t alloc_flags = GFP_KERNEL;
1964	int i, rc;
1965	char debug_name[16];
1966	static unsigned long sca_offset;
1967
1968	rc = -EINVAL;
1969#ifdef CONFIG_KVM_S390_UCONTROL
1970	if (type & ~KVM_VM_S390_UCONTROL)
1971		goto out_err;
1972	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1973		goto out_err;
1974#else
1975	if (type)
1976		goto out_err;
1977#endif
1978
1979	rc = s390_enable_sie();
1980	if (rc)
1981		goto out_err;
1982
1983	rc = -ENOMEM;
1984
1985	kvm->arch.use_esca = 0; /* start with basic SCA */
1986	if (!sclp.has_64bscao)
1987		alloc_flags |= GFP_DMA;
1988	rwlock_init(&kvm->arch.sca_lock);
 
1989	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1990	if (!kvm->arch.sca)
1991		goto out_err;
1992	spin_lock(&kvm_lock);
1993	sca_offset += 16;
1994	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1995		sca_offset = 0;
1996	kvm->arch.sca = (struct bsca_block *)
1997			((char *) kvm->arch.sca + sca_offset);
1998	spin_unlock(&kvm_lock);
1999
2000	sprintf(debug_name, "kvm-%u", current->pid);
2001
2002	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2003	if (!kvm->arch.dbf)
2004		goto out_err;
2005
2006	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2007	kvm->arch.sie_page2 =
2008	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2009	if (!kvm->arch.sie_page2)
2010		goto out_err;
2011
 
2012	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2013
2014	for (i = 0; i < kvm_s390_fac_size(); i++) {
2015		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2016					      (kvm_s390_fac_base[i] |
2017					       kvm_s390_fac_ext[i]);
2018		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2019					      kvm_s390_fac_base[i];
2020	}
 
2021
2022	/* we are always in czam mode - even on pre z14 machines */
2023	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2024	set_kvm_facility(kvm->arch.model.fac_list, 138);
2025	/* we emulate STHYI in kvm */
2026	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2027	set_kvm_facility(kvm->arch.model.fac_list, 74);
2028	if (MACHINE_HAS_TLB_GUEST) {
2029		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2030		set_kvm_facility(kvm->arch.model.fac_list, 147);
2031	}
2032
 
 
 
2033	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2034	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2035
2036	kvm_s390_crypto_init(kvm);
2037
 
 
 
 
 
 
 
2038	mutex_init(&kvm->arch.float_int.ais_lock);
2039	kvm->arch.float_int.simm = 0;
2040	kvm->arch.float_int.nimm = 0;
2041	spin_lock_init(&kvm->arch.float_int.lock);
2042	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2043		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2044	init_waitqueue_head(&kvm->arch.ipte_wq);
2045	mutex_init(&kvm->arch.ipte_mutex);
2046
2047	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2048	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2049
2050	if (type & KVM_VM_S390_UCONTROL) {
2051		kvm->arch.gmap = NULL;
2052		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2053	} else {
2054		if (sclp.hamax == U64_MAX)
2055			kvm->arch.mem_limit = TASK_SIZE_MAX;
2056		else
2057			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2058						    sclp.hamax + 1);
2059		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2060		if (!kvm->arch.gmap)
2061			goto out_err;
2062		kvm->arch.gmap->private = kvm;
2063		kvm->arch.gmap->pfault_enabled = 0;
2064	}
2065
2066	kvm->arch.css_support = 0;
2067	kvm->arch.use_irqchip = 0;
2068	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2069	kvm->arch.epoch = 0;
2070
2071	spin_lock_init(&kvm->arch.start_stop_lock);
2072	kvm_s390_vsie_init(kvm);
2073	kvm_s390_gisa_init(kvm);
 
 
 
2074	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2075
2076	return 0;
2077out_err:
2078	free_page((unsigned long)kvm->arch.sie_page2);
2079	debug_unregister(kvm->arch.dbf);
2080	sca_dispose(kvm);
2081	KVM_EVENT(3, "creation of vm failed: %d", rc);
2082	return rc;
2083}
2084
2085bool kvm_arch_has_vcpu_debugfs(void)
2086{
2087	return false;
2088}
2089
2090int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2091{
2092	return 0;
2093}
2094
2095void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2096{
2097	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2098	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2099	kvm_s390_clear_local_irqs(vcpu);
2100	kvm_clear_async_pf_completion_queue(vcpu);
2101	if (!kvm_is_ucontrol(vcpu->kvm))
2102		sca_del_vcpu(vcpu);
 
2103
2104	if (kvm_is_ucontrol(vcpu->kvm))
2105		gmap_remove(vcpu->arch.gmap);
2106
2107	if (vcpu->kvm->arch.use_cmma)
2108		kvm_s390_vcpu_unsetup_cmma(vcpu);
 
 
 
2109	free_page((unsigned long)(vcpu->arch.sie_block));
2110
2111	kvm_vcpu_uninit(vcpu);
2112	kmem_cache_free(kvm_vcpu_cache, vcpu);
2113}
2114
2115static void kvm_free_vcpus(struct kvm *kvm)
2116{
2117	unsigned int i;
2118	struct kvm_vcpu *vcpu;
2119
2120	kvm_for_each_vcpu(i, vcpu, kvm)
2121		kvm_arch_vcpu_destroy(vcpu);
2122
2123	mutex_lock(&kvm->lock);
2124	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2125		kvm->vcpus[i] = NULL;
2126
2127	atomic_set(&kvm->online_vcpus, 0);
2128	mutex_unlock(&kvm->lock);
2129}
 
 
 
 
 
 
 
2130
2131void kvm_arch_destroy_vm(struct kvm *kvm)
2132{
2133	kvm_free_vcpus(kvm);
2134	sca_dispose(kvm);
2135	debug_unregister(kvm->arch.dbf);
2136	kvm_s390_gisa_destroy(kvm);
2137	free_page((unsigned long)kvm->arch.sie_page2);
2138	if (!kvm_is_ucontrol(kvm))
2139		gmap_remove(kvm->arch.gmap);
2140	kvm_s390_destroy_adapters(kvm);
2141	kvm_s390_clear_float_irqs(kvm);
2142	kvm_s390_vsie_destroy(kvm);
2143	if (kvm->arch.migration_state) {
2144		vfree(kvm->arch.migration_state->pgste_bitmap);
2145		kfree(kvm->arch.migration_state);
2146	}
2147	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2148}
2149
2150/* Section: vcpu related */
2151static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2152{
2153	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2154	if (!vcpu->arch.gmap)
2155		return -ENOMEM;
2156	vcpu->arch.gmap->private = vcpu->kvm;
2157
2158	return 0;
2159}
2160
2161static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2162{
2163	if (!kvm_s390_use_sca_entries())
2164		return;
2165	read_lock(&vcpu->kvm->arch.sca_lock);
2166	if (vcpu->kvm->arch.use_esca) {
2167		struct esca_block *sca = vcpu->kvm->arch.sca;
2168
2169		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2170		sca->cpu[vcpu->vcpu_id].sda = 0;
2171	} else {
2172		struct bsca_block *sca = vcpu->kvm->arch.sca;
2173
2174		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2175		sca->cpu[vcpu->vcpu_id].sda = 0;
2176	}
2177	read_unlock(&vcpu->kvm->arch.sca_lock);
2178}
2179
2180static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2181{
2182	if (!kvm_s390_use_sca_entries()) {
2183		struct bsca_block *sca = vcpu->kvm->arch.sca;
2184
2185		/* we still need the basic sca for the ipte control */
2186		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2187		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2188		return;
2189	}
2190	read_lock(&vcpu->kvm->arch.sca_lock);
2191	if (vcpu->kvm->arch.use_esca) {
2192		struct esca_block *sca = vcpu->kvm->arch.sca;
 
2193
2194		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2195		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2196		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2197		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2198		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2199	} else {
2200		struct bsca_block *sca = vcpu->kvm->arch.sca;
 
2201
2202		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2203		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2204		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2205		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2206	}
2207	read_unlock(&vcpu->kvm->arch.sca_lock);
2208}
2209
2210/* Basic SCA to Extended SCA data copy routines */
2211static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2212{
2213	d->sda = s->sda;
2214	d->sigp_ctrl.c = s->sigp_ctrl.c;
2215	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2216}
2217
2218static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2219{
2220	int i;
2221
2222	d->ipte_control = s->ipte_control;
2223	d->mcn[0] = s->mcn;
2224	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2225		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2226}
2227
2228static int sca_switch_to_extended(struct kvm *kvm)
2229{
2230	struct bsca_block *old_sca = kvm->arch.sca;
2231	struct esca_block *new_sca;
2232	struct kvm_vcpu *vcpu;
2233	unsigned int vcpu_idx;
2234	u32 scaol, scaoh;
 
2235
2236	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
 
 
 
2237	if (!new_sca)
2238		return -ENOMEM;
2239
2240	scaoh = (u32)((u64)(new_sca) >> 32);
2241	scaol = (u32)(u64)(new_sca) & ~0x3fU;
 
2242
2243	kvm_s390_vcpu_block_all(kvm);
2244	write_lock(&kvm->arch.sca_lock);
2245
2246	sca_copy_b_to_e(new_sca, old_sca);
2247
2248	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2249		vcpu->arch.sie_block->scaoh = scaoh;
2250		vcpu->arch.sie_block->scaol = scaol;
2251		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252	}
2253	kvm->arch.sca = new_sca;
2254	kvm->arch.use_esca = 1;
2255
2256	write_unlock(&kvm->arch.sca_lock);
2257	kvm_s390_vcpu_unblock_all(kvm);
2258
2259	free_page((unsigned long)old_sca);
2260
2261	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2262		 old_sca, kvm->arch.sca);
2263	return 0;
2264}
2265
2266static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2267{
2268	int rc;
2269
2270	if (!kvm_s390_use_sca_entries()) {
2271		if (id < KVM_MAX_VCPUS)
2272			return true;
2273		return false;
2274	}
2275	if (id < KVM_S390_BSCA_CPU_SLOTS)
2276		return true;
2277	if (!sclp.has_esca || !sclp.has_64bscao)
2278		return false;
2279
2280	mutex_lock(&kvm->lock);
2281	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2282	mutex_unlock(&kvm->lock);
2283
2284	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2285}
2286
2287int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2288{
2289	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2290	kvm_clear_async_pf_completion_queue(vcpu);
2291	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2292				    KVM_SYNC_GPRS |
2293				    KVM_SYNC_ACRS |
2294				    KVM_SYNC_CRS |
2295				    KVM_SYNC_ARCH0 |
2296				    KVM_SYNC_PFAULT;
2297	kvm_s390_set_prefix(vcpu, 0);
2298	if (test_kvm_facility(vcpu->kvm, 64))
2299		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2300	if (test_kvm_facility(vcpu->kvm, 82))
2301		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2302	if (test_kvm_facility(vcpu->kvm, 133))
2303		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2304	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2305	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2306	 */
2307	if (MACHINE_HAS_VX)
2308		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2309	else
2310		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2311
2312	if (kvm_is_ucontrol(vcpu->kvm))
2313		return __kvm_ucontrol_vcpu_init(vcpu);
2314
2315	return 0;
2316}
2317
2318/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2319static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2320{
2321	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2322	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2323	vcpu->arch.cputm_start = get_tod_clock_fast();
2324	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2325}
2326
2327/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2328static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2329{
2330	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2331	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2332	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2333	vcpu->arch.cputm_start = 0;
2334	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2335}
2336
2337/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2338static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2339{
2340	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2341	vcpu->arch.cputm_enabled = true;
2342	__start_cpu_timer_accounting(vcpu);
2343}
2344
2345/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2346static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2347{
2348	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2349	__stop_cpu_timer_accounting(vcpu);
2350	vcpu->arch.cputm_enabled = false;
2351}
2352
2353static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2354{
2355	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2356	__enable_cpu_timer_accounting(vcpu);
2357	preempt_enable();
2358}
2359
2360static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2361{
2362	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2363	__disable_cpu_timer_accounting(vcpu);
2364	preempt_enable();
2365}
2366
2367/* set the cpu timer - may only be called from the VCPU thread itself */
2368void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2369{
2370	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2371	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2372	if (vcpu->arch.cputm_enabled)
2373		vcpu->arch.cputm_start = get_tod_clock_fast();
2374	vcpu->arch.sie_block->cputm = cputm;
2375	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2376	preempt_enable();
2377}
2378
2379/* update and get the cpu timer - can also be called from other VCPU threads */
2380__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2381{
2382	unsigned int seq;
2383	__u64 value;
2384
2385	if (unlikely(!vcpu->arch.cputm_enabled))
2386		return vcpu->arch.sie_block->cputm;
2387
2388	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2389	do {
2390		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2391		/*
2392		 * If the writer would ever execute a read in the critical
2393		 * section, e.g. in irq context, we have a deadlock.
2394		 */
2395		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2396		value = vcpu->arch.sie_block->cputm;
2397		/* if cputm_start is 0, accounting is being started/stopped */
2398		if (likely(vcpu->arch.cputm_start))
2399			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2400	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2401	preempt_enable();
2402	return value;
2403}
2404
2405void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2406{
2407
2408	gmap_enable(vcpu->arch.enabled_gmap);
2409	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2410	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2411		__start_cpu_timer_accounting(vcpu);
2412	vcpu->cpu = cpu;
2413}
2414
2415void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2416{
2417	vcpu->cpu = -1;
2418	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2419		__stop_cpu_timer_accounting(vcpu);
2420	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2421	vcpu->arch.enabled_gmap = gmap_get_enabled();
2422	gmap_disable(vcpu->arch.enabled_gmap);
2423
2424}
2425
2426static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2427{
2428	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2429	vcpu->arch.sie_block->gpsw.mask = 0UL;
2430	vcpu->arch.sie_block->gpsw.addr = 0UL;
2431	kvm_s390_set_prefix(vcpu, 0);
2432	kvm_s390_set_cpu_timer(vcpu, 0);
2433	vcpu->arch.sie_block->ckc       = 0UL;
2434	vcpu->arch.sie_block->todpr     = 0;
2435	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2436	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2437	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2438	/* make sure the new fpc will be lazily loaded */
2439	save_fpu_regs();
2440	current->thread.fpu.fpc = 0;
2441	vcpu->arch.sie_block->gbea = 1;
2442	vcpu->arch.sie_block->pp = 0;
2443	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2444	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2445	kvm_clear_async_pf_completion_queue(vcpu);
2446	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2447		kvm_s390_vcpu_stop(vcpu);
2448	kvm_s390_clear_local_irqs(vcpu);
2449}
2450
2451void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2452{
2453	mutex_lock(&vcpu->kvm->lock);
2454	preempt_disable();
2455	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2456	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2457	preempt_enable();
2458	mutex_unlock(&vcpu->kvm->lock);
2459	if (!kvm_is_ucontrol(vcpu->kvm)) {
2460		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2461		sca_add_vcpu(vcpu);
2462	}
2463	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2464		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2465	/* make vcpu_load load the right gmap on the first trigger */
2466	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2467}
2468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2469static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2470{
2471	if (!test_kvm_facility(vcpu->kvm, 76))
 
 
 
 
2472		return;
2473
 
2474	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
 
 
 
 
 
2475
2476	if (vcpu->kvm->arch.crypto.aes_kw)
 
2477		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
 
 
 
 
 
2478	if (vcpu->kvm->arch.crypto.dea_kw)
2479		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2480
2481	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2482}
2483
2484void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2485{
2486	free_page(vcpu->arch.sie_block->cbrlo);
2487	vcpu->arch.sie_block->cbrlo = 0;
2488}
2489
2490int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2491{
2492	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2493	if (!vcpu->arch.sie_block->cbrlo)
 
2494		return -ENOMEM;
 
 
2495	return 0;
2496}
2497
2498static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2499{
2500	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2501
2502	vcpu->arch.sie_block->ibc = model->ibc;
2503	if (test_kvm_facility(vcpu->kvm, 7))
2504		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2505}
2506
2507int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2508{
2509	int rc = 0;
 
2510
2511	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2512						    CPUSTAT_SM |
2513						    CPUSTAT_STOPPED);
2514
2515	if (test_kvm_facility(vcpu->kvm, 78))
2516		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2517	else if (test_kvm_facility(vcpu->kvm, 8))
2518		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2519
2520	kvm_s390_vcpu_setup_model(vcpu);
2521
2522	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2523	if (MACHINE_HAS_ESOP)
2524		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2525	if (test_kvm_facility(vcpu->kvm, 9))
2526		vcpu->arch.sie_block->ecb |= ECB_SRSI;
 
 
2527	if (test_kvm_facility(vcpu->kvm, 73))
2528		vcpu->arch.sie_block->ecb |= ECB_TE;
 
 
2529
2530	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2531		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2532	if (test_kvm_facility(vcpu->kvm, 130))
2533		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2534	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2535	if (sclp.has_cei)
2536		vcpu->arch.sie_block->eca |= ECA_CEI;
2537	if (sclp.has_ib)
2538		vcpu->arch.sie_block->eca |= ECA_IB;
2539	if (sclp.has_siif)
2540		vcpu->arch.sie_block->eca |= ECA_SII;
2541	if (sclp.has_sigpif)
2542		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2543	if (test_kvm_facility(vcpu->kvm, 129)) {
2544		vcpu->arch.sie_block->eca |= ECA_VX;
2545		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2546	}
2547	if (test_kvm_facility(vcpu->kvm, 139))
2548		vcpu->arch.sie_block->ecd |= ECD_MEF;
2549
 
2550	if (vcpu->arch.sie_block->gd) {
2551		vcpu->arch.sie_block->eca |= ECA_AIV;
2552		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2553			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2554	}
2555	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2556					| SDNXC;
2557	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2558
2559	if (sclp.has_kss)
2560		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2561	else
2562		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2563
2564	if (vcpu->kvm->arch.use_cmma) {
2565		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2566		if (rc)
2567			return rc;
2568	}
2569	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2570	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2571
 
 
2572	kvm_s390_vcpu_crypto_setup(vcpu);
2573
 
 
 
 
 
 
 
 
 
 
2574	return rc;
2575}
2576
2577struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2578				      unsigned int id)
2579{
2580	struct kvm_vcpu *vcpu;
2581	struct sie_page *sie_page;
2582	int rc = -EINVAL;
2583
2584	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2585		goto out;
 
 
2586
2587	rc = -ENOMEM;
2588
2589	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2590	if (!vcpu)
2591		goto out;
2592
2593	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2594	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2595	if (!sie_page)
2596		goto out_free_cpu;
2597
2598	vcpu->arch.sie_block = &sie_page->sie_block;
2599	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2600
2601	/* the real guest size will always be smaller than msl */
2602	vcpu->arch.sie_block->mso = 0;
2603	vcpu->arch.sie_block->msl = sclp.hamax;
2604
2605	vcpu->arch.sie_block->icpua = id;
2606	spin_lock_init(&vcpu->arch.local_int.lock);
2607	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2608	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2609		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2610	seqcount_init(&vcpu->arch.cputm_seqcount);
2611
2612	rc = kvm_vcpu_init(vcpu, kvm, id);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2613	if (rc)
2614		goto out_free_sie_block;
2615	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2616		 vcpu->arch.sie_block);
2617	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2618
2619	return vcpu;
 
 
2620out_free_sie_block:
2621	free_page((unsigned long)(vcpu->arch.sie_block));
2622out_free_cpu:
2623	kmem_cache_free(kvm_vcpu_cache, vcpu);
2624out:
2625	return ERR_PTR(rc);
2626}
2627
2628int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2629{
 
2630	return kvm_s390_vcpu_has_irq(vcpu, 0);
2631}
2632
2633bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2634{
2635	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2636}
2637
2638void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2639{
2640	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2641	exit_sie(vcpu);
2642}
2643
2644void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2645{
2646	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2647}
2648
2649static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2650{
2651	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2652	exit_sie(vcpu);
2653}
2654
 
 
 
 
 
 
2655static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2656{
2657	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2658}
2659
2660/*
2661 * Kick a guest cpu out of SIE and wait until SIE is not running.
2662 * If the CPU is not running (e.g. waiting as idle) the function will
2663 * return immediately. */
2664void exit_sie(struct kvm_vcpu *vcpu)
2665{
2666	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
 
2667	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2668		cpu_relax();
2669}
2670
2671/* Kick a guest cpu out of SIE to process a request synchronously */
2672void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2673{
2674	kvm_make_request(req, vcpu);
2675	kvm_s390_vcpu_request(vcpu);
2676}
2677
2678static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2679			      unsigned long end)
2680{
2681	struct kvm *kvm = gmap->private;
2682	struct kvm_vcpu *vcpu;
2683	unsigned long prefix;
2684	int i;
2685
2686	if (gmap_is_shadow(gmap))
2687		return;
2688	if (start >= 1UL << 31)
2689		/* We are only interested in prefix pages */
2690		return;
2691	kvm_for_each_vcpu(i, vcpu, kvm) {
2692		/* match against both prefix pages */
2693		prefix = kvm_s390_get_prefix(vcpu);
2694		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2695			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2696				   start, end);
2697			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2698		}
2699	}
2700}
2701
 
 
 
 
 
 
 
 
 
 
 
2702int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2703{
2704	/* kvm common code refers to this, but never calls it */
2705	BUG();
2706	return 0;
2707}
2708
2709static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2710					   struct kvm_one_reg *reg)
2711{
2712	int r = -EINVAL;
2713
2714	switch (reg->id) {
2715	case KVM_REG_S390_TODPR:
2716		r = put_user(vcpu->arch.sie_block->todpr,
2717			     (u32 __user *)reg->addr);
2718		break;
2719	case KVM_REG_S390_EPOCHDIFF:
2720		r = put_user(vcpu->arch.sie_block->epoch,
2721			     (u64 __user *)reg->addr);
2722		break;
2723	case KVM_REG_S390_CPU_TIMER:
2724		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2725			     (u64 __user *)reg->addr);
2726		break;
2727	case KVM_REG_S390_CLOCK_COMP:
2728		r = put_user(vcpu->arch.sie_block->ckc,
2729			     (u64 __user *)reg->addr);
2730		break;
2731	case KVM_REG_S390_PFTOKEN:
2732		r = put_user(vcpu->arch.pfault_token,
2733			     (u64 __user *)reg->addr);
2734		break;
2735	case KVM_REG_S390_PFCOMPARE:
2736		r = put_user(vcpu->arch.pfault_compare,
2737			     (u64 __user *)reg->addr);
2738		break;
2739	case KVM_REG_S390_PFSELECT:
2740		r = put_user(vcpu->arch.pfault_select,
2741			     (u64 __user *)reg->addr);
2742		break;
2743	case KVM_REG_S390_PP:
2744		r = put_user(vcpu->arch.sie_block->pp,
2745			     (u64 __user *)reg->addr);
2746		break;
2747	case KVM_REG_S390_GBEA:
2748		r = put_user(vcpu->arch.sie_block->gbea,
2749			     (u64 __user *)reg->addr);
2750		break;
2751	default:
2752		break;
2753	}
2754
2755	return r;
2756}
2757
2758static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2759					   struct kvm_one_reg *reg)
2760{
2761	int r = -EINVAL;
2762	__u64 val;
2763
2764	switch (reg->id) {
2765	case KVM_REG_S390_TODPR:
2766		r = get_user(vcpu->arch.sie_block->todpr,
2767			     (u32 __user *)reg->addr);
2768		break;
2769	case KVM_REG_S390_EPOCHDIFF:
2770		r = get_user(vcpu->arch.sie_block->epoch,
2771			     (u64 __user *)reg->addr);
2772		break;
2773	case KVM_REG_S390_CPU_TIMER:
2774		r = get_user(val, (u64 __user *)reg->addr);
2775		if (!r)
2776			kvm_s390_set_cpu_timer(vcpu, val);
2777		break;
2778	case KVM_REG_S390_CLOCK_COMP:
2779		r = get_user(vcpu->arch.sie_block->ckc,
2780			     (u64 __user *)reg->addr);
2781		break;
2782	case KVM_REG_S390_PFTOKEN:
2783		r = get_user(vcpu->arch.pfault_token,
2784			     (u64 __user *)reg->addr);
2785		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2786			kvm_clear_async_pf_completion_queue(vcpu);
2787		break;
2788	case KVM_REG_S390_PFCOMPARE:
2789		r = get_user(vcpu->arch.pfault_compare,
2790			     (u64 __user *)reg->addr);
2791		break;
2792	case KVM_REG_S390_PFSELECT:
2793		r = get_user(vcpu->arch.pfault_select,
2794			     (u64 __user *)reg->addr);
2795		break;
2796	case KVM_REG_S390_PP:
2797		r = get_user(vcpu->arch.sie_block->pp,
2798			     (u64 __user *)reg->addr);
2799		break;
2800	case KVM_REG_S390_GBEA:
2801		r = get_user(vcpu->arch.sie_block->gbea,
2802			     (u64 __user *)reg->addr);
2803		break;
2804	default:
2805		break;
2806	}
2807
2808	return r;
2809}
2810
2811static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
 
 
 
 
 
 
 
 
 
 
 
 
2812{
2813	kvm_s390_vcpu_initial_reset(vcpu);
2814	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2815}
2816
2817int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2818{
2819	vcpu_load(vcpu);
2820	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2821	vcpu_put(vcpu);
2822	return 0;
2823}
2824
2825int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2826{
2827	vcpu_load(vcpu);
2828	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2829	vcpu_put(vcpu);
2830	return 0;
2831}
2832
2833int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2834				  struct kvm_sregs *sregs)
2835{
2836	vcpu_load(vcpu);
2837
2838	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2839	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2840
2841	vcpu_put(vcpu);
2842	return 0;
2843}
2844
2845int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2846				  struct kvm_sregs *sregs)
2847{
2848	vcpu_load(vcpu);
2849
2850	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2851	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2852
2853	vcpu_put(vcpu);
2854	return 0;
2855}
2856
2857int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2858{
2859	int ret = 0;
2860
2861	vcpu_load(vcpu);
2862
2863	if (test_fp_ctl(fpu->fpc)) {
2864		ret = -EINVAL;
2865		goto out;
2866	}
2867	vcpu->run->s.regs.fpc = fpu->fpc;
2868	if (MACHINE_HAS_VX)
2869		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2870				 (freg_t *) fpu->fprs);
2871	else
2872		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2873
2874out:
2875	vcpu_put(vcpu);
2876	return ret;
2877}
2878
2879int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2880{
2881	vcpu_load(vcpu);
2882
2883	/* make sure we have the latest values */
2884	save_fpu_regs();
2885	if (MACHINE_HAS_VX)
2886		convert_vx_to_fp((freg_t *) fpu->fprs,
2887				 (__vector128 *) vcpu->run->s.regs.vrs);
2888	else
2889		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2890	fpu->fpc = vcpu->run->s.regs.fpc;
2891
2892	vcpu_put(vcpu);
2893	return 0;
2894}
2895
2896static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2897{
2898	int rc = 0;
2899
2900	if (!is_vcpu_stopped(vcpu))
2901		rc = -EBUSY;
2902	else {
2903		vcpu->run->psw_mask = psw.mask;
2904		vcpu->run->psw_addr = psw.addr;
2905	}
2906	return rc;
2907}
2908
2909int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2910				  struct kvm_translation *tr)
2911{
2912	return -EINVAL; /* not implemented yet */
2913}
2914
2915#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2916			      KVM_GUESTDBG_USE_HW_BP | \
2917			      KVM_GUESTDBG_ENABLE)
2918
2919int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2920					struct kvm_guest_debug *dbg)
2921{
2922	int rc = 0;
2923
2924	vcpu_load(vcpu);
2925
2926	vcpu->guest_debug = 0;
2927	kvm_s390_clear_bp_data(vcpu);
2928
2929	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2930		rc = -EINVAL;
2931		goto out;
2932	}
2933	if (!sclp.has_gpere) {
2934		rc = -EINVAL;
2935		goto out;
2936	}
2937
2938	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2939		vcpu->guest_debug = dbg->control;
2940		/* enforce guest PER */
2941		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2942
2943		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2944			rc = kvm_s390_import_bp_data(vcpu, dbg);
2945	} else {
2946		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2947		vcpu->arch.guestdbg.last_bp = 0;
2948	}
2949
2950	if (rc) {
2951		vcpu->guest_debug = 0;
2952		kvm_s390_clear_bp_data(vcpu);
2953		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2954	}
2955
2956out:
2957	vcpu_put(vcpu);
2958	return rc;
2959}
2960
2961int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2962				    struct kvm_mp_state *mp_state)
2963{
2964	int ret;
2965
2966	vcpu_load(vcpu);
2967
2968	/* CHECK_STOP and LOAD are not supported yet */
2969	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2970				      KVM_MP_STATE_OPERATING;
2971
2972	vcpu_put(vcpu);
2973	return ret;
2974}
2975
2976int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2977				    struct kvm_mp_state *mp_state)
2978{
2979	int rc = 0;
2980
2981	vcpu_load(vcpu);
2982
2983	/* user space knows about this interface - let it control the state */
2984	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2985
2986	switch (mp_state->mp_state) {
2987	case KVM_MP_STATE_STOPPED:
2988		kvm_s390_vcpu_stop(vcpu);
2989		break;
2990	case KVM_MP_STATE_OPERATING:
2991		kvm_s390_vcpu_start(vcpu);
2992		break;
2993	case KVM_MP_STATE_LOAD:
 
 
 
 
 
 
2994	case KVM_MP_STATE_CHECK_STOP:
2995		/* fall through - CHECK_STOP and LOAD are not supported yet */
2996	default:
2997		rc = -ENXIO;
2998	}
2999
3000	vcpu_put(vcpu);
3001	return rc;
3002}
3003
3004static bool ibs_enabled(struct kvm_vcpu *vcpu)
3005{
3006	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3007}
3008
3009static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3010{
3011retry:
3012	kvm_s390_vcpu_request_handled(vcpu);
3013	if (!kvm_request_pending(vcpu))
3014		return 0;
3015	/*
3016	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3017	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3018	 * This ensures that the ipte instruction for this request has
3019	 * already finished. We might race against a second unmapper that
3020	 * wants to set the blocking bit. Lets just retry the request loop.
3021	 */
3022	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3023		int rc;
3024		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3025					  kvm_s390_get_prefix(vcpu),
3026					  PAGE_SIZE * 2, PROT_WRITE);
3027		if (rc) {
3028			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3029			return rc;
3030		}
3031		goto retry;
3032	}
3033
3034	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3035		vcpu->arch.sie_block->ihcpu = 0xffff;
3036		goto retry;
3037	}
3038
3039	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3040		if (!ibs_enabled(vcpu)) {
3041			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3042			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3043		}
3044		goto retry;
3045	}
3046
3047	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3048		if (ibs_enabled(vcpu)) {
3049			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3050			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3051		}
3052		goto retry;
3053	}
3054
3055	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3056		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3057		goto retry;
3058	}
3059
3060	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3061		/*
3062		 * Disable CMM virtualization; we will emulate the ESSA
3063		 * instruction manually, in order to provide additional
3064		 * functionalities needed for live migration.
3065		 */
3066		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3067		goto retry;
3068	}
3069
3070	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3071		/*
3072		 * Re-enable CMM virtualization if CMMA is available and
3073		 * CMM has been used.
3074		 */
3075		if ((vcpu->kvm->arch.use_cmma) &&
3076		    (vcpu->kvm->mm->context.uses_cmm))
3077			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3078		goto retry;
3079	}
3080
3081	/* nothing to do, just clear the request */
3082	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3083
3084	return 0;
3085}
3086
3087void kvm_s390_set_tod_clock(struct kvm *kvm,
3088			    const struct kvm_s390_vm_tod_clock *gtod)
3089{
3090	struct kvm_vcpu *vcpu;
3091	struct kvm_s390_tod_clock_ext htod;
3092	int i;
3093
3094	mutex_lock(&kvm->lock);
3095	preempt_disable();
3096
3097	get_tod_clock_ext((char *)&htod);
3098
3099	kvm->arch.epoch = gtod->tod - htod.tod;
3100	kvm->arch.epdx = 0;
3101	if (test_kvm_facility(kvm, 139)) {
3102		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3103		if (kvm->arch.epoch > gtod->tod)
3104			kvm->arch.epdx -= 1;
3105	}
3106
3107	kvm_s390_vcpu_block_all(kvm);
3108	kvm_for_each_vcpu(i, vcpu, kvm) {
3109		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3110		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3111	}
3112
3113	kvm_s390_vcpu_unblock_all(kvm);
3114	preempt_enable();
 
 
 
 
 
 
 
3115	mutex_unlock(&kvm->lock);
 
3116}
3117
3118/**
3119 * kvm_arch_fault_in_page - fault-in guest page if necessary
3120 * @vcpu: The corresponding virtual cpu
3121 * @gpa: Guest physical address
3122 * @writable: Whether the page should be writable or not
3123 *
3124 * Make sure that a guest page has been faulted-in on the host.
3125 *
3126 * Return: Zero on success, negative error code otherwise.
3127 */
3128long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3129{
3130	return gmap_fault(vcpu->arch.gmap, gpa,
3131			  writable ? FAULT_FLAG_WRITE : 0);
3132}
3133
3134static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3135				      unsigned long token)
3136{
3137	struct kvm_s390_interrupt inti;
3138	struct kvm_s390_irq irq;
3139
3140	if (start_token) {
3141		irq.u.ext.ext_params2 = token;
3142		irq.type = KVM_S390_INT_PFAULT_INIT;
3143		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3144	} else {
3145		inti.type = KVM_S390_INT_PFAULT_DONE;
3146		inti.parm64 = token;
3147		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3148	}
3149}
3150
3151void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3152				     struct kvm_async_pf *work)
3153{
3154	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3155	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
 
 
3156}
3157
3158void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3159				 struct kvm_async_pf *work)
3160{
3161	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3162	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3163}
3164
3165void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3166			       struct kvm_async_pf *work)
3167{
3168	/* s390 will always inject the page directly */
3169}
3170
3171bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3172{
3173	/*
3174	 * s390 will always inject the page directly,
3175	 * but we still want check_async_completion to cleanup
3176	 */
3177	return true;
3178}
3179
3180static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3181{
3182	hva_t hva;
3183	struct kvm_arch_async_pf arch;
3184	int rc;
3185
3186	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3187		return 0;
3188	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3189	    vcpu->arch.pfault_compare)
3190		return 0;
3191	if (psw_extint_disabled(vcpu))
3192		return 0;
3193	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3194		return 0;
3195	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3196		return 0;
3197	if (!vcpu->arch.gmap->pfault_enabled)
3198		return 0;
3199
3200	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3201	hva += current->thread.gmap_addr & ~PAGE_MASK;
3202	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3203		return 0;
3204
3205	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3206	return rc;
3207}
3208
3209static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3210{
3211	int rc, cpuflags;
3212
3213	/*
3214	 * On s390 notifications for arriving pages will be delivered directly
3215	 * to the guest but the house keeping for completed pfaults is
3216	 * handled outside the worker.
3217	 */
3218	kvm_check_async_pf_completion(vcpu);
3219
3220	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3221	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3222
3223	if (need_resched())
3224		schedule();
3225
3226	if (test_cpu_flag(CIF_MCCK_PENDING))
3227		s390_handle_mcck();
3228
3229	if (!kvm_is_ucontrol(vcpu->kvm)) {
3230		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3231		if (rc)
3232			return rc;
3233	}
3234
3235	rc = kvm_s390_handle_requests(vcpu);
3236	if (rc)
3237		return rc;
3238
3239	if (guestdbg_enabled(vcpu)) {
3240		kvm_s390_backup_guest_per_regs(vcpu);
3241		kvm_s390_patch_guest_per_regs(vcpu);
3242	}
3243
 
 
3244	vcpu->arch.sie_block->icptcode = 0;
3245	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3246	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3247	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3248
3249	return 0;
3250}
3251
3252static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3253{
3254	struct kvm_s390_pgm_info pgm_info = {
3255		.code = PGM_ADDRESSING,
3256	};
3257	u8 opcode, ilen;
3258	int rc;
3259
3260	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3261	trace_kvm_s390_sie_fault(vcpu);
3262
3263	/*
3264	 * We want to inject an addressing exception, which is defined as a
3265	 * suppressing or terminating exception. However, since we came here
3266	 * by a DAT access exception, the PSW still points to the faulting
3267	 * instruction since DAT exceptions are nullifying. So we've got
3268	 * to look up the current opcode to get the length of the instruction
3269	 * to be able to forward the PSW.
3270	 */
3271	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3272	ilen = insn_length(opcode);
3273	if (rc < 0) {
3274		return rc;
3275	} else if (rc) {
3276		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3277		 * Forward by arbitrary ilc, injection will take care of
3278		 * nullification if necessary.
3279		 */
3280		pgm_info = vcpu->arch.pgm;
3281		ilen = 4;
3282	}
3283	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3284	kvm_s390_forward_psw(vcpu, ilen);
3285	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3286}
3287
3288static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3289{
3290	struct mcck_volatile_info *mcck_info;
3291	struct sie_page *sie_page;
3292
3293	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3294		   vcpu->arch.sie_block->icptcode);
3295	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3296
3297	if (guestdbg_enabled(vcpu))
3298		kvm_s390_restore_guest_per_regs(vcpu);
3299
3300	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3301	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3302
3303	if (exit_reason == -EINTR) {
3304		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3305		sie_page = container_of(vcpu->arch.sie_block,
3306					struct sie_page, sie_block);
3307		mcck_info = &sie_page->mcck_info;
3308		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3309		return 0;
3310	}
3311
3312	if (vcpu->arch.sie_block->icptcode > 0) {
3313		int rc = kvm_handle_sie_intercept(vcpu);
3314
3315		if (rc != -EOPNOTSUPP)
3316			return rc;
3317		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3318		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3319		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3320		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3321		return -EREMOTE;
3322	} else if (exit_reason != -EFAULT) {
3323		vcpu->stat.exit_null++;
3324		return 0;
3325	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3326		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3327		vcpu->run->s390_ucontrol.trans_exc_code =
3328						current->thread.gmap_addr;
3329		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3330		return -EREMOTE;
3331	} else if (current->thread.gmap_pfault) {
3332		trace_kvm_s390_major_guest_pfault(vcpu);
3333		current->thread.gmap_pfault = 0;
3334		if (kvm_arch_setup_async_pf(vcpu))
3335			return 0;
 
3336		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3337	}
3338	return vcpu_post_run_fault_in_sie(vcpu);
3339}
3340
 
3341static int __vcpu_run(struct kvm_vcpu *vcpu)
3342{
3343	int rc, exit_reason;
 
3344
3345	/*
3346	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3347	 * ning the guest), so that memslots (and other stuff) are protected
3348	 */
3349	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3350
3351	do {
3352		rc = vcpu_pre_run(vcpu);
3353		if (rc)
3354			break;
3355
3356		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3357		/*
3358		 * As PF_VCPU will be used in fault handler, between
3359		 * guest_enter and guest_exit should be no uaccess.
3360		 */
3361		local_irq_disable();
3362		guest_enter_irqoff();
3363		__disable_cpu_timer_accounting(vcpu);
3364		local_irq_enable();
 
 
 
 
 
 
 
3365		exit_reason = sie64a(vcpu->arch.sie_block,
3366				     vcpu->run->s.regs.gprs);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3367		local_irq_disable();
3368		__enable_cpu_timer_accounting(vcpu);
3369		guest_exit_irqoff();
3370		local_irq_enable();
3371		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3372
3373		rc = vcpu_post_run(vcpu, exit_reason);
3374	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3375
3376	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3377	return rc;
3378}
3379
3380static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3381{
 
3382	struct runtime_instr_cb *riccb;
3383	struct gs_cb *gscb;
3384
3385	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3386	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3387	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3388	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3389	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3390		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3391	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3392		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3393		/* some control register changes require a tlb flush */
3394		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3395	}
3396	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3397		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3398		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3399		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3400		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3401		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3402	}
3403	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3404		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3405		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3406		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3407		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3408			kvm_clear_async_pf_completion_queue(vcpu);
3409	}
 
 
 
 
 
3410	/*
3411	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3412	 * we should enable RI here instead of doing the lazy enablement.
3413	 */
3414	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3415	    test_kvm_facility(vcpu->kvm, 64) &&
3416	    riccb->v &&
3417	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3418		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3419		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3420	}
3421	/*
3422	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3423	 * we should enable GS here instead of doing the lazy enablement.
3424	 */
3425	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3426	    test_kvm_facility(vcpu->kvm, 133) &&
3427	    gscb->gssm &&
3428	    !vcpu->arch.gs_enabled) {
3429		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3430		vcpu->arch.sie_block->ecb |= ECB_GS;
3431		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3432		vcpu->arch.gs_enabled = 1;
3433	}
3434	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3435	    test_kvm_facility(vcpu->kvm, 82)) {
3436		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3437		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3438	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3439	save_access_regs(vcpu->arch.host_acrs);
3440	restore_access_regs(vcpu->run->s.regs.acrs);
3441	/* save host (userspace) fprs/vrs */
3442	save_fpu_regs();
3443	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3444	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3445	if (MACHINE_HAS_VX)
3446		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3447	else
3448		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3449	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3450	if (test_fp_ctl(current->thread.fpu.fpc))
3451		/* User space provided an invalid FPC, let's clear it */
3452		current->thread.fpu.fpc = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3453	if (MACHINE_HAS_GS) {
3454		preempt_disable();
3455		__ctl_set_bit(2, 4);
3456		if (current->thread.gs_cb) {
3457			vcpu->arch.host_gscb = current->thread.gs_cb;
3458			save_gs_cb(vcpu->arch.host_gscb);
3459		}
3460		if (vcpu->arch.gs_enabled) {
3461			current->thread.gs_cb = (struct gs_cb *)
3462						&vcpu->run->s.regs.gscb;
3463			restore_gs_cb(current->thread.gs_cb);
3464		}
3465		preempt_enable();
3466	}
3467
3468	kvm_run->kvm_dirty_regs = 0;
3469}
3470
3471static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3472{
 
 
3473	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3474	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3475	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3476	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3477	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3478	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3479	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3480	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3481	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3482	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3483	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3484	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3485	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3486	save_access_regs(vcpu->run->s.regs.acrs);
3487	restore_access_regs(vcpu->arch.host_acrs);
3488	/* Save guest register state */
3489	save_fpu_regs();
3490	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491	/* Restore will be done lazily at return */
3492	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3493	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3494	if (MACHINE_HAS_GS) {
3495		__ctl_set_bit(2, 4);
3496		if (vcpu->arch.gs_enabled)
3497			save_gs_cb(current->thread.gs_cb);
3498		preempt_disable();
3499		current->thread.gs_cb = vcpu->arch.host_gscb;
3500		restore_gs_cb(vcpu->arch.host_gscb);
3501		preempt_enable();
3502		if (!vcpu->arch.host_gscb)
3503			__ctl_clear_bit(2, 4);
3504		vcpu->arch.host_gscb = NULL;
3505	}
3506
3507}
3508
3509int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3510{
 
3511	int rc;
3512
 
 
 
 
 
 
 
 
 
3513	if (kvm_run->immediate_exit)
3514		return -EINTR;
3515
 
 
 
 
3516	vcpu_load(vcpu);
3517
3518	if (guestdbg_exit_pending(vcpu)) {
3519		kvm_s390_prepare_debug_exit(vcpu);
3520		rc = 0;
3521		goto out;
3522	}
3523
3524	kvm_sigset_activate(vcpu);
3525
 
 
 
 
3526	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3527		kvm_s390_vcpu_start(vcpu);
3528	} else if (is_vcpu_stopped(vcpu)) {
3529		pr_err_ratelimited("can't run stopped vcpu %d\n",
3530				   vcpu->vcpu_id);
3531		rc = -EINVAL;
3532		goto out;
3533	}
3534
3535	sync_regs(vcpu, kvm_run);
3536	enable_cpu_timer_accounting(vcpu);
3537
3538	might_fault();
3539	rc = __vcpu_run(vcpu);
3540
3541	if (signal_pending(current) && !rc) {
3542		kvm_run->exit_reason = KVM_EXIT_INTR;
3543		rc = -EINTR;
3544	}
3545
3546	if (guestdbg_exit_pending(vcpu) && !rc)  {
3547		kvm_s390_prepare_debug_exit(vcpu);
3548		rc = 0;
3549	}
3550
3551	if (rc == -EREMOTE) {
3552		/* userspace support is needed, kvm_run has been prepared */
3553		rc = 0;
3554	}
3555
3556	disable_cpu_timer_accounting(vcpu);
3557	store_regs(vcpu, kvm_run);
3558
3559	kvm_sigset_deactivate(vcpu);
3560
3561	vcpu->stat.exit_userspace++;
3562out:
3563	vcpu_put(vcpu);
3564	return rc;
3565}
3566
3567/*
3568 * store status at address
3569 * we use have two special cases:
3570 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3571 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3572 */
3573int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3574{
3575	unsigned char archmode = 1;
3576	freg_t fprs[NUM_FPRS];
3577	unsigned int px;
3578	u64 clkcomp, cputm;
3579	int rc;
3580
3581	px = kvm_s390_get_prefix(vcpu);
3582	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3583		if (write_guest_abs(vcpu, 163, &archmode, 1))
3584			return -EFAULT;
3585		gpa = 0;
3586	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3587		if (write_guest_real(vcpu, 163, &archmode, 1))
3588			return -EFAULT;
3589		gpa = px;
3590	} else
3591		gpa -= __LC_FPREGS_SAVE_AREA;
3592
3593	/* manually convert vector registers if necessary */
3594	if (MACHINE_HAS_VX) {
3595		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3596		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3597				     fprs, 128);
3598	} else {
3599		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3600				     vcpu->run->s.regs.fprs, 128);
3601	}
3602	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3603			      vcpu->run->s.regs.gprs, 128);
3604	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3605			      &vcpu->arch.sie_block->gpsw, 16);
3606	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3607			      &px, 4);
3608	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3609			      &vcpu->run->s.regs.fpc, 4);
3610	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3611			      &vcpu->arch.sie_block->todpr, 4);
3612	cputm = kvm_s390_get_cpu_timer(vcpu);
3613	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3614			      &cputm, 8);
3615	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3616	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3617			      &clkcomp, 8);
3618	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3619			      &vcpu->run->s.regs.acrs, 64);
3620	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3621			      &vcpu->arch.sie_block->gcr, 128);
3622	return rc ? -EFAULT : 0;
3623}
3624
3625int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3626{
3627	/*
3628	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3629	 * switch in the run ioctl. Let's update our copies before we save
3630	 * it into the save area
3631	 */
3632	save_fpu_regs();
3633	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3634	save_access_regs(vcpu->run->s.regs.acrs);
3635
3636	return kvm_s390_store_status_unloaded(vcpu, addr);
3637}
3638
3639static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3640{
3641	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3642	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3643}
3644
3645static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3646{
3647	unsigned int i;
3648	struct kvm_vcpu *vcpu;
3649
3650	kvm_for_each_vcpu(i, vcpu, kvm) {
3651		__disable_ibs_on_vcpu(vcpu);
3652	}
3653}
3654
3655static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3656{
3657	if (!sclp.has_ibs)
3658		return;
3659	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3660	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3661}
3662
3663void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3664{
3665	int i, online_vcpus, started_vcpus = 0;
3666
3667	if (!is_vcpu_stopped(vcpu))
3668		return;
3669
3670	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3671	/* Only one cpu at a time may enter/leave the STOPPED state. */
3672	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3673	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3674
 
 
 
 
 
 
 
 
 
3675	for (i = 0; i < online_vcpus; i++) {
3676		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3677			started_vcpus++;
3678	}
3679
3680	if (started_vcpus == 0) {
3681		/* we're the only active VCPU -> speed it up */
3682		__enable_ibs_on_vcpu(vcpu);
3683	} else if (started_vcpus == 1) {
3684		/*
3685		 * As we are starting a second VCPU, we have to disable
3686		 * the IBS facility on all VCPUs to remove potentially
3687		 * oustanding ENABLE requests.
3688		 */
3689		__disable_ibs_on_all_vcpus(vcpu->kvm);
3690	}
3691
3692	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3693	/*
 
 
 
 
 
 
 
3694	 * Another VCPU might have used IBS while we were offline.
3695	 * Let's play safe and flush the VCPU at startup.
3696	 */
3697	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3698	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3699	return;
3700}
3701
3702void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3703{
3704	int i, online_vcpus, started_vcpus = 0;
3705	struct kvm_vcpu *started_vcpu = NULL;
3706
3707	if (is_vcpu_stopped(vcpu))
3708		return;
3709
3710	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3711	/* Only one cpu at a time may enter/leave the STOPPED state. */
3712	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3713	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3714
3715	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3716	kvm_s390_clear_stop_irq(vcpu);
3717
3718	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3719	__disable_ibs_on_vcpu(vcpu);
3720
3721	for (i = 0; i < online_vcpus; i++) {
3722		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
 
 
3723			started_vcpus++;
3724			started_vcpu = vcpu->kvm->vcpus[i];
3725		}
3726	}
3727
3728	if (started_vcpus == 1) {
3729		/*
3730		 * As we only have one VCPU left, we want to enable the
3731		 * IBS facility for that VCPU to speed it up.
3732		 */
3733		__enable_ibs_on_vcpu(started_vcpu);
3734	}
3735
3736	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3737	return;
3738}
3739
3740static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3741				     struct kvm_enable_cap *cap)
3742{
3743	int r;
3744
3745	if (cap->flags)
3746		return -EINVAL;
3747
3748	switch (cap->cap) {
3749	case KVM_CAP_S390_CSS_SUPPORT:
3750		if (!vcpu->kvm->arch.css_support) {
3751			vcpu->kvm->arch.css_support = 1;
3752			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3753			trace_kvm_s390_enable_css(vcpu->kvm);
3754		}
3755		r = 0;
3756		break;
3757	default:
3758		r = -EINVAL;
3759		break;
3760	}
3761	return r;
3762}
3763
3764static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3765				  struct kvm_s390_mem_op *mop)
3766{
3767	void __user *uaddr = (void __user *)mop->buf;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3768	void *tmpbuf = NULL;
3769	int r, srcu_idx;
3770	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3771				    | KVM_S390_MEMOP_F_CHECK_ONLY;
 
3772
3773	if (mop->flags & ~supported_flags)
3774		return -EINVAL;
3775
3776	if (mop->size > MEM_OP_MAX_SIZE)
3777		return -E2BIG;
3778
 
 
 
 
 
 
 
3779	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3780		tmpbuf = vmalloc(mop->size);
3781		if (!tmpbuf)
3782			return -ENOMEM;
3783	}
3784
3785	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3786
3787	switch (mop->op) {
3788	case KVM_S390_MEMOP_LOGICAL_READ:
3789		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3790			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3791					    mop->size, GACC_FETCH);
3792			break;
3793		}
3794		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
 
3795		if (r == 0) {
3796			if (copy_to_user(uaddr, tmpbuf, mop->size))
3797				r = -EFAULT;
3798		}
3799		break;
3800	case KVM_S390_MEMOP_LOGICAL_WRITE:
3801		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3802			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3803					    mop->size, GACC_STORE);
3804			break;
3805		}
3806		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3807			r = -EFAULT;
3808			break;
3809		}
3810		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
 
3811		break;
3812	default:
3813		r = -EINVAL;
3814	}
3815
3816	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3817
3818	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3819		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3820
3821	vfree(tmpbuf);
3822	return r;
3823}
3824
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3825long kvm_arch_vcpu_async_ioctl(struct file *filp,
3826			       unsigned int ioctl, unsigned long arg)
3827{
3828	struct kvm_vcpu *vcpu = filp->private_data;
3829	void __user *argp = (void __user *)arg;
3830
3831	switch (ioctl) {
3832	case KVM_S390_IRQ: {
3833		struct kvm_s390_irq s390irq;
3834
3835		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3836			return -EFAULT;
3837		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3838	}
3839	case KVM_S390_INTERRUPT: {
3840		struct kvm_s390_interrupt s390int;
3841		struct kvm_s390_irq s390irq;
3842
3843		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3844			return -EFAULT;
3845		if (s390int_to_s390irq(&s390int, &s390irq))
3846			return -EINVAL;
3847		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3848	}
3849	}
3850	return -ENOIOCTLCMD;
3851}
3852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3853long kvm_arch_vcpu_ioctl(struct file *filp,
3854			 unsigned int ioctl, unsigned long arg)
3855{
3856	struct kvm_vcpu *vcpu = filp->private_data;
3857	void __user *argp = (void __user *)arg;
3858	int idx;
3859	long r;
 
3860
3861	vcpu_load(vcpu);
3862
3863	switch (ioctl) {
3864	case KVM_S390_STORE_STATUS:
3865		idx = srcu_read_lock(&vcpu->kvm->srcu);
3866		r = kvm_s390_vcpu_store_status(vcpu, arg);
3867		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3868		break;
3869	case KVM_S390_SET_INITIAL_PSW: {
3870		psw_t psw;
3871
3872		r = -EFAULT;
3873		if (copy_from_user(&psw, argp, sizeof(psw)))
3874			break;
3875		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3876		break;
3877	}
 
 
 
 
 
 
 
 
 
 
3878	case KVM_S390_INITIAL_RESET:
3879		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3880		break;
3881	case KVM_SET_ONE_REG:
3882	case KVM_GET_ONE_REG: {
3883		struct kvm_one_reg reg;
 
 
 
3884		r = -EFAULT;
3885		if (copy_from_user(&reg, argp, sizeof(reg)))
3886			break;
3887		if (ioctl == KVM_SET_ONE_REG)
3888			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3889		else
3890			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3891		break;
3892	}
3893#ifdef CONFIG_KVM_S390_UCONTROL
3894	case KVM_S390_UCAS_MAP: {
3895		struct kvm_s390_ucas_mapping ucasmap;
3896
3897		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3898			r = -EFAULT;
3899			break;
3900		}
3901
3902		if (!kvm_is_ucontrol(vcpu->kvm)) {
3903			r = -EINVAL;
3904			break;
3905		}
3906
3907		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3908				     ucasmap.vcpu_addr, ucasmap.length);
3909		break;
3910	}
3911	case KVM_S390_UCAS_UNMAP: {
3912		struct kvm_s390_ucas_mapping ucasmap;
3913
3914		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3915			r = -EFAULT;
3916			break;
3917		}
3918
3919		if (!kvm_is_ucontrol(vcpu->kvm)) {
3920			r = -EINVAL;
3921			break;
3922		}
3923
3924		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3925			ucasmap.length);
3926		break;
3927	}
3928#endif
3929	case KVM_S390_VCPU_FAULT: {
3930		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3931		break;
3932	}
3933	case KVM_ENABLE_CAP:
3934	{
3935		struct kvm_enable_cap cap;
3936		r = -EFAULT;
3937		if (copy_from_user(&cap, argp, sizeof(cap)))
3938			break;
3939		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3940		break;
3941	}
3942	case KVM_S390_MEM_OP: {
3943		struct kvm_s390_mem_op mem_op;
3944
3945		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3946			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3947		else
3948			r = -EFAULT;
3949		break;
3950	}
3951	case KVM_S390_SET_IRQ_STATE: {
3952		struct kvm_s390_irq_state irq_state;
3953
3954		r = -EFAULT;
3955		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3956			break;
3957		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3958		    irq_state.len == 0 ||
3959		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3960			r = -EINVAL;
3961			break;
3962		}
3963		/* do not use irq_state.flags, it will break old QEMUs */
3964		r = kvm_s390_set_irq_state(vcpu,
3965					   (void __user *) irq_state.buf,
3966					   irq_state.len);
3967		break;
3968	}
3969	case KVM_S390_GET_IRQ_STATE: {
3970		struct kvm_s390_irq_state irq_state;
3971
3972		r = -EFAULT;
3973		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3974			break;
3975		if (irq_state.len == 0) {
3976			r = -EINVAL;
3977			break;
3978		}
3979		/* do not use irq_state.flags, it will break old QEMUs */
3980		r = kvm_s390_get_irq_state(vcpu,
3981					   (__u8 __user *)  irq_state.buf,
3982					   irq_state.len);
3983		break;
3984	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3985	default:
3986		r = -ENOTTY;
3987	}
3988
3989	vcpu_put(vcpu);
3990	return r;
3991}
3992
3993int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3994{
3995#ifdef CONFIG_KVM_S390_UCONTROL
3996	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3997		 && (kvm_is_ucontrol(vcpu->kvm))) {
3998		vmf->page = virt_to_page(vcpu->arch.sie_block);
3999		get_page(vmf->page);
4000		return 0;
4001	}
4002#endif
4003	return VM_FAULT_SIGBUS;
4004}
4005
4006int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4007			    unsigned long npages)
4008{
4009	return 0;
4010}
4011
4012/* Section: memory related */
4013int kvm_arch_prepare_memory_region(struct kvm *kvm,
4014				   struct kvm_memory_slot *memslot,
4015				   const struct kvm_userspace_memory_region *mem,
4016				   enum kvm_mr_change change)
4017{
 
 
 
 
 
 
 
 
 
4018	/* A few sanity checks. We can have memory slots which have to be
4019	   located/ended at a segment boundary (1MB). The memory in userland is
4020	   ok to be fragmented into various different vmas. It is okay to mmap()
4021	   and munmap() stuff in this slot after doing this call at any time */
4022
4023	if (mem->userspace_addr & 0xffffful)
4024		return -EINVAL;
4025
4026	if (mem->memory_size & 0xffffful)
 
4027		return -EINVAL;
4028
4029	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4030		return -EINVAL;
4031
4032	return 0;
4033}
4034
4035void kvm_arch_commit_memory_region(struct kvm *kvm,
4036				const struct kvm_userspace_memory_region *mem,
4037				const struct kvm_memory_slot *old,
4038				const struct kvm_memory_slot *new,
4039				enum kvm_mr_change change)
4040{
4041	int rc;
4042
4043	/* If the basics of the memslot do not change, we do not want
4044	 * to update the gmap. Every update causes several unnecessary
4045	 * segment translation exceptions. This is usually handled just
4046	 * fine by the normal fault handler + gmap, but it will also
4047	 * cause faults on the prefix page of running guest CPUs.
4048	 */
4049	if (old->userspace_addr == mem->userspace_addr &&
4050	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4051	    old->npages * PAGE_SIZE == mem->memory_size)
4052		return;
4053
4054	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4055		mem->guest_phys_addr, mem->memory_size);
 
 
 
 
 
 
 
 
4056	if (rc)
4057		pr_warn("failed to commit memory region\n");
4058	return;
4059}
4060
4061static inline unsigned long nonhyp_mask(int i)
4062{
4063	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4064
4065	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4066}
4067
4068void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4069{
4070	vcpu->valid_wakeup = false;
4071}
4072
4073static int __init kvm_s390_init(void)
4074{
4075	int i;
4076
4077	if (!sclp.has_sief2) {
4078		pr_info("SIE not available\n");
4079		return -ENODEV;
4080	}
4081
 
 
 
 
 
4082	for (i = 0; i < 16; i++)
4083		kvm_s390_fac_base[i] |=
4084			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4085
4086	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4087}
4088
4089static void __exit kvm_s390_exit(void)
4090{
4091	kvm_exit();
4092}
4093
4094module_init(kvm_s390_init);
4095module_exit(kvm_s390_exit);
4096
4097/*
4098 * Enable autoloading of the kvm module.
4099 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4100 * since x86 takes a different approach.
4101 */
4102#include <linux/miscdevice.h>
4103MODULE_ALIAS_MISCDEV(KVM_MINOR);
4104MODULE_ALIAS("devname:kvm");