nbcon.c - kernel/printk/nbcon.c - Linux source code v6.13.7

Note: File does not exist in v6.2.
   1// SPDX-License-Identifier: GPL-2.0-only
   2// Copyright (C) 2022 Linutronix GmbH, John Ogness
   3// Copyright (C) 2022 Intel, Thomas Gleixner
   4
   5#include <linux/atomic.h>
   6#include <linux/bug.h>
   7#include <linux/console.h>
   8#include <linux/delay.h>
   9#include <linux/errno.h>
  10#include <linux/export.h>
  11#include <linux/init.h>
  12#include <linux/irqflags.h>
  13#include <linux/kthread.h>
  14#include <linux/minmax.h>
  15#include <linux/percpu.h>
  16#include <linux/preempt.h>
  17#include <linux/slab.h>
  18#include <linux/smp.h>
  19#include <linux/stddef.h>
  20#include <linux/string.h>
  21#include <linux/types.h>
  22#include "internal.h"
  23#include "printk_ringbuffer.h"
  24/*
  25 * Printk console printing implementation for consoles which does not depend
  26 * on the legacy style console_lock mechanism.
  27 *
  28 * The state of the console is maintained in the "nbcon_state" atomic
  29 * variable.
  30 *
  31 * The console is locked when:
  32 *
  33 *   - The 'prio' field contains the priority of the context that owns the
  34 *     console. Only higher priority contexts are allowed to take over the
  35 *     lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked.
  36 *
  37 *   - The 'cpu' field denotes on which CPU the console is locked. It is used
  38 *     to prevent busy waiting on the same CPU. Also it informs the lock owner
  39 *     that it has lost the lock in a more complex scenario when the lock was
  40 *     taken over by a higher priority context, released, and taken on another
  41 *     CPU with the same priority as the interrupted owner.
  42 *
  43 * The acquire mechanism uses a few more fields:
  44 *
  45 *   - The 'req_prio' field is used by the handover approach to make the
  46 *     current owner aware that there is a context with a higher priority
  47 *     waiting for the friendly handover.
  48 *
  49 *   - The 'unsafe' field allows to take over the console in a safe way in the
  50 *     middle of emitting a message. The field is set only when accessing some
  51 *     shared resources or when the console device is manipulated. It can be
  52 *     cleared, for example, after emitting one character when the console
  53 *     device is in a consistent state.
  54 *
  55 *   - The 'unsafe_takeover' field is set when a hostile takeover took the
  56 *     console in an unsafe state. The console will stay in the unsafe state
  57 *     until re-initialized.
  58 *
  59 * The acquire mechanism uses three approaches:
  60 *
  61 *   1) Direct acquire when the console is not owned or is owned by a lower
  62 *      priority context and is in a safe state.
  63 *
  64 *   2) Friendly handover mechanism uses a request/grant handshake. It is used
  65 *      when the current owner has lower priority and the console is in an
  66 *      unsafe state.
  67 *
  68 *      The requesting context:
  69 *
  70 *        a) Sets its priority into the 'req_prio' field.
  71 *
  72 *        b) Waits (with a timeout) for the owning context to unlock the
  73 *           console.
  74 *
  75 *        c) Takes the lock and clears the 'req_prio' field.
  76 *
  77 *      The owning context:
  78 *
  79 *        a) Observes the 'req_prio' field set on exit from the unsafe
  80 *           console state.
  81 *
  82 *        b) Gives up console ownership by clearing the 'prio' field.
  83 *
  84 *   3) Unsafe hostile takeover allows to take over the lock even when the
  85 *      console is an unsafe state. It is used only in panic() by the final
  86 *      attempt to flush consoles in a try and hope mode.
  87 *
  88 *      Note that separate record buffers are used in panic(). As a result,
  89 *      the messages can be read and formatted without any risk even after
  90 *      using the hostile takeover in unsafe state.
  91 *
  92 * The release function simply clears the 'prio' field.
  93 *
  94 * All operations on @console::nbcon_state are atomic cmpxchg based to
  95 * handle concurrency.
  96 *
  97 * The acquire/release functions implement only minimal policies:
  98 *
  99 *   - Preference for higher priority contexts.
 100 *   - Protection of the panic CPU.
 101 *
 102 * All other policy decisions must be made at the call sites:
 103 *
 104 *   - What is marked as an unsafe section.
 105 *   - Whether to spin-wait if there is already an owner and the console is
 106 *     in an unsafe state.
 107 *   - Whether to attempt an unsafe hostile takeover.
 108 *
 109 * The design allows to implement the well known:
 110 *
 111 *     acquire()
 112 *     output_one_printk_record()
 113 *     release()
 114 *
 115 * The output of one printk record might be interrupted with a higher priority
 116 * context. The new owner is supposed to reprint the entire interrupted record
 117 * from scratch.
 118 */
 119
 120/**
 121 * nbcon_state_set - Helper function to set the console state
 122 * @con:	Console to update
 123 * @new:	The new state to write
 124 *
 125 * Only to be used when the console is not yet or no longer visible in the
 126 * system. Otherwise use nbcon_state_try_cmpxchg().
 127 */
 128static inline void nbcon_state_set(struct console *con, struct nbcon_state *new)
 129{
 130	atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom);
 131}
 132
 133/**
 134 * nbcon_state_read - Helper function to read the console state
 135 * @con:	Console to read
 136 * @state:	The state to store the result
 137 */
 138static inline void nbcon_state_read(struct console *con, struct nbcon_state *state)
 139{
 140	state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state));
 141}
 142
 143/**
 144 * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state
 145 * @con:	Console to update
 146 * @cur:	Old/expected state
 147 * @new:	New state
 148 *
 149 * Return: True on success. False on fail and @cur is updated.
 150 */
 151static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur,
 152					   struct nbcon_state *new)
 153{
 154	return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom);
 155}
 156
 157/**
 158 * nbcon_seq_read - Read the current console sequence
 159 * @con:	Console to read the sequence of
 160 *
 161 * Return:	Sequence number of the next record to print on @con.
 162 */
 163u64 nbcon_seq_read(struct console *con)
 164{
 165	unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq));
 166
 167	return __ulseq_to_u64seq(prb, nbcon_seq);
 168}
 169
 170/**
 171 * nbcon_seq_force - Force console sequence to a specific value
 172 * @con:	Console to work on
 173 * @seq:	Sequence number value to set
 174 *
 175 * Only to be used during init (before registration) or in extreme situations
 176 * (such as panic with CONSOLE_REPLAY_ALL).
 177 */
 178void nbcon_seq_force(struct console *con, u64 seq)
 179{
 180	/*
 181	 * If the specified record no longer exists, the oldest available record
 182	 * is chosen. This is especially important on 32bit systems because only
 183	 * the lower 32 bits of the sequence number are stored. The upper 32 bits
 184	 * are derived from the sequence numbers available in the ringbuffer.
 185	 */
 186	u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
 187
 188	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
 189}
 190
 191/**
 192 * nbcon_seq_try_update - Try to update the console sequence number
 193 * @ctxt:	Pointer to an acquire context that contains
 194 *		all information about the acquire mode
 195 * @new_seq:	The new sequence number to set
 196 *
 197 * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to
 198 * the 64bit value). This could be a different value than @new_seq if
 199 * nbcon_seq_force() was used or the current context no longer owns the
 200 * console. In the later case, it will stop printing anyway.
 201 */
 202static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
 203{
 204	unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq);
 205	struct console *con = ctxt->console;
 206
 207	if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq,
 208				    __u64seq_to_ulseq(new_seq))) {
 209		ctxt->seq = new_seq;
 210	} else {
 211		ctxt->seq = nbcon_seq_read(con);
 212	}
 213}
 214
 215/**
 216 * nbcon_context_try_acquire_direct - Try to acquire directly
 217 * @ctxt:	The context of the caller
 218 * @cur:	The current console state
 219 *
 220 * Acquire the console when it is released. Also acquire the console when
 221 * the current owner has a lower priority and the console is in a safe state.
 222 *
 223 * Return:	0 on success. Otherwise, an error code on failure. Also @cur
 224 *		is updated to the latest state when failed to modify it.
 225 *
 226 * Errors:
 227 *
 228 *	-EPERM:		A panic is in progress and this is not the panic CPU.
 229 *			Or the current owner or waiter has the same or higher
 230 *			priority. No acquire method can be successful in
 231 *			this case.
 232 *
 233 *	-EBUSY:		The current owner has a lower priority but the console
 234 *			in an unsafe state. The caller should try using
 235 *			the handover acquire method.
 236 */
 237static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
 238					    struct nbcon_state *cur)
 239{
 240	unsigned int cpu = smp_processor_id();
 241	struct console *con = ctxt->console;
 242	struct nbcon_state new;
 243
 244	do {
 245		/*
 246		 * Panic does not imply that the console is owned. However, it
 247		 * is critical that non-panic CPUs during panic are unable to
 248		 * acquire ownership in order to satisfy the assumptions of
 249		 * nbcon_waiter_matches(). In particular, the assumption that
 250		 * lower priorities are ignored during panic.
 251		 */
 252		if (other_cpu_in_panic())
 253			return -EPERM;
 254
 255		if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio)
 256			return -EPERM;
 257
 258		if (cur->unsafe)
 259			return -EBUSY;
 260
 261		/*
 262		 * The console should never be safe for a direct acquire
 263		 * if an unsafe hostile takeover has ever happened.
 264		 */
 265		WARN_ON_ONCE(cur->unsafe_takeover);
 266
 267		new.atom = cur->atom;
 268		new.prio	= ctxt->prio;
 269		new.req_prio	= NBCON_PRIO_NONE;
 270		new.unsafe	= cur->unsafe_takeover;
 271		new.cpu		= cpu;
 272
 273	} while (!nbcon_state_try_cmpxchg(con, cur, &new));
 274
 275	return 0;
 276}
 277
 278static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio)
 279{
 280	/*
 281	 * The request context is well defined by the @req_prio because:
 282	 *
 283	 * - Only a context with a priority higher than the owner can become
 284	 *   a waiter.
 285	 * - Only a context with a priority higher than the waiter can
 286	 *   directly take over the request.
 287	 * - There are only three priorities.
 288	 * - Only one CPU is allowed to request PANIC priority.
 289	 * - Lower priorities are ignored during panic() until reboot.
 290	 *
 291	 * As a result, the following scenario is *not* possible:
 292	 *
 293	 * 1. This context is currently a waiter.
 294	 * 2. Another context with a higher priority than this context
 295	 *    directly takes ownership.
 296	 * 3. The higher priority context releases the ownership.
 297	 * 4. Another lower priority context takes the ownership.
 298	 * 5. Another context with the same priority as this context
 299	 *    creates a request and starts waiting.
 300	 *
 301	 * Event #1 implies this context is EMERGENCY.
 302	 * Event #2 implies the new context is PANIC.
 303	 * Event #3 occurs when panic() has flushed the console.
 304	 * Events #4 and #5 are not possible due to the other_cpu_in_panic()
 305	 * check in nbcon_context_try_acquire_direct().
 306	 */
 307
 308	return (cur->req_prio == expected_prio);
 309}
 310
 311/**
 312 * nbcon_context_try_acquire_requested - Try to acquire after having
 313 *					 requested a handover
 314 * @ctxt:	The context of the caller
 315 * @cur:	The current console state
 316 *
 317 * This is a helper function for nbcon_context_try_acquire_handover().
 318 * It is called when the console is in an unsafe state. The current
 319 * owner will release the console on exit from the unsafe region.
 320 *
 321 * Return:	0 on success and @cur is updated to the new console state.
 322 *		Otherwise an error code on failure.
 323 *
 324 * Errors:
 325 *
 326 *	-EPERM:		A panic is in progress and this is not the panic CPU
 327 *			or this context is no longer the waiter.
 328 *
 329 *	-EBUSY:		The console is still locked. The caller should
 330 *			continue waiting.
 331 *
 332 * Note: The caller must still remove the request when an error has occurred
 333 *       except when this context is no longer the waiter.
 334 */
 335static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt,
 336					       struct nbcon_state *cur)
 337{
 338	unsigned int cpu = smp_processor_id();
 339	struct console *con = ctxt->console;
 340	struct nbcon_state new;
 341
 342	/* Note that the caller must still remove the request! */
 343	if (other_cpu_in_panic())
 344		return -EPERM;
 345
 346	/*
 347	 * Note that the waiter will also change if there was an unsafe
 348	 * hostile takeover.
 349	 */
 350	if (!nbcon_waiter_matches(cur, ctxt->prio))
 351		return -EPERM;
 352
 353	/* If still locked, caller should continue waiting. */
 354	if (cur->prio != NBCON_PRIO_NONE)
 355		return -EBUSY;
 356
 357	/*
 358	 * The previous owner should have never released ownership
 359	 * in an unsafe region.
 360	 */
 361	WARN_ON_ONCE(cur->unsafe);
 362
 363	new.atom = cur->atom;
 364	new.prio	= ctxt->prio;
 365	new.req_prio	= NBCON_PRIO_NONE;
 366	new.unsafe	= cur->unsafe_takeover;
 367	new.cpu		= cpu;
 368
 369	if (!nbcon_state_try_cmpxchg(con, cur, &new)) {
 370		/*
 371		 * The acquire could fail only when it has been taken
 372		 * over by a higher priority context.
 373		 */
 374		WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio));
 375		return -EPERM;
 376	}
 377
 378	/* Handover success. This context now owns the console. */
 379	return 0;
 380}
 381
 382/**
 383 * nbcon_context_try_acquire_handover - Try to acquire via handover
 384 * @ctxt:	The context of the caller
 385 * @cur:	The current console state
 386 *
 387 * The function must be called only when the context has higher priority
 388 * than the current owner and the console is in an unsafe state.
 389 * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY.
 390 *
 391 * The function sets "req_prio" field to make the current owner aware of
 392 * the request. Then it waits until the current owner releases the console,
 393 * or an even higher context takes over the request, or timeout expires.
 394 *
 395 * The current owner checks the "req_prio" field on exit from the unsafe
 396 * region and releases the console. It does not touch the "req_prio" field
 397 * so that the console stays reserved for the waiter.
 398 *
 399 * Return:	0 on success. Otherwise, an error code on failure. Also @cur
 400 *		is updated to the latest state when failed to modify it.
 401 *
 402 * Errors:
 403 *
 404 *	-EPERM:		A panic is in progress and this is not the panic CPU.
 405 *			Or a higher priority context has taken over the
 406 *			console or the handover request.
 407 *
 408 *	-EBUSY:		The current owner is on the same CPU so that the hand
 409 *			shake could not work. Or the current owner is not
 410 *			willing to wait (zero timeout). Or the console does
 411 *			not enter the safe state before timeout passed. The
 412 *			caller might still use the unsafe hostile takeover
 413 *			when allowed.
 414 *
 415 *	-EAGAIN:	@cur has changed when creating the handover request.
 416 *			The caller should retry with direct acquire.
 417 */
 418static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt,
 419					      struct nbcon_state *cur)
 420{
 421	unsigned int cpu = smp_processor_id();
 422	struct console *con = ctxt->console;
 423	struct nbcon_state new;
 424	int timeout;
 425	int request_err = -EBUSY;
 426
 427	/*
 428	 * Check that the handover is called when the direct acquire failed
 429	 * with -EBUSY.
 430	 */
 431	WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio);
 432	WARN_ON_ONCE(!cur->unsafe);
 433
 434	/* Handover is not possible on the same CPU. */
 435	if (cur->cpu == cpu)
 436		return -EBUSY;
 437
 438	/*
 439	 * Console stays unsafe after an unsafe takeover until re-initialized.
 440	 * Waiting is not going to help in this case.
 441	 */
 442	if (cur->unsafe_takeover)
 443		return -EBUSY;
 444
 445	/* Is the caller willing to wait? */
 446	if (ctxt->spinwait_max_us == 0)
 447		return -EBUSY;
 448
 449	/*
 450	 * Setup a request for the handover. The caller should try to acquire
 451	 * the console directly when the current state has been modified.
 452	 */
 453	new.atom = cur->atom;
 454	new.req_prio = ctxt->prio;
 455	if (!nbcon_state_try_cmpxchg(con, cur, &new))
 456		return -EAGAIN;
 457
 458	cur->atom = new.atom;
 459
 460	/* Wait until there is no owner and then acquire the console. */
 461	for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) {
 462		/* On successful acquire, this request is cleared. */
 463		request_err = nbcon_context_try_acquire_requested(ctxt, cur);
 464		if (!request_err)
 465			return 0;
 466
 467		/*
 468		 * If the acquire should be aborted, it must be ensured
 469		 * that the request is removed before returning to caller.
 470		 */
 471		if (request_err == -EPERM)
 472			break;
 473
 474		udelay(1);
 475
 476		/* Re-read the state because some time has passed. */
 477		nbcon_state_read(con, cur);
 478	}
 479
 480	/* Timed out or aborted. Carefully remove handover request. */
 481	do {
 482		/*
 483		 * No need to remove request if there is a new waiter. This
 484		 * can only happen if a higher priority context has taken over
 485		 * the console or the handover request.
 486		 */
 487		if (!nbcon_waiter_matches(cur, ctxt->prio))
 488			return -EPERM;
 489
 490		/* Unset request for handover. */
 491		new.atom = cur->atom;
 492		new.req_prio = NBCON_PRIO_NONE;
 493		if (nbcon_state_try_cmpxchg(con, cur, &new)) {
 494			/*
 495			 * Request successfully unset. Report failure of
 496			 * acquiring via handover.
 497			 */
 498			cur->atom = new.atom;
 499			return request_err;
 500		}
 501
 502		/*
 503		 * Unable to remove request. Try to acquire in case
 504		 * the owner has released the lock.
 505		 */
 506	} while (nbcon_context_try_acquire_requested(ctxt, cur));
 507
 508	/* Lucky timing. The acquire succeeded while removing the request. */
 509	return 0;
 510}
 511
 512/**
 513 * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover
 514 * @ctxt:	The context of the caller
 515 * @cur:	The current console state
 516 *
 517 * Acquire the console even in the unsafe state.
 518 *
 519 * It can be permitted by setting the 'allow_unsafe_takeover' field only
 520 * by the final attempt to flush messages in panic().
 521 *
 522 * Return:	0 on success. -EPERM when not allowed by the context.
 523 */
 524static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt,
 525					     struct nbcon_state *cur)
 526{
 527	unsigned int cpu = smp_processor_id();
 528	struct console *con = ctxt->console;
 529	struct nbcon_state new;
 530
 531	if (!ctxt->allow_unsafe_takeover)
 532		return -EPERM;
 533
 534	/* Ensure caller is allowed to perform unsafe hostile takeovers. */
 535	if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC))
 536		return -EPERM;
 537
 538	/*
 539	 * Check that try_acquire_direct() and try_acquire_handover() returned
 540	 * -EBUSY in the right situation.
 541	 */
 542	WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio);
 543	WARN_ON_ONCE(cur->unsafe != true);
 544
 545	do {
 546		new.atom = cur->atom;
 547		new.cpu			= cpu;
 548		new.prio		= ctxt->prio;
 549		new.unsafe		|= cur->unsafe_takeover;
 550		new.unsafe_takeover	|= cur->unsafe;
 551
 552	} while (!nbcon_state_try_cmpxchg(con, cur, &new));
 553
 554	return 0;
 555}
 556
 557static struct printk_buffers panic_nbcon_pbufs;
 558
 559/**
 560 * nbcon_context_try_acquire - Try to acquire nbcon console
 561 * @ctxt:	The context of the caller
 562 *
 563 * Context:	Under @ctxt->con->device_lock() or local_irq_save().
 564 * Return:	True if the console was acquired. False otherwise.
 565 *
 566 * If the caller allowed an unsafe hostile takeover, on success the
 567 * caller should check the current console state to see if it is
 568 * in an unsafe state. Otherwise, on success the caller may assume
 569 * the console is not in an unsafe state.
 570 */
 571static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
 572{
 573	unsigned int cpu = smp_processor_id();
 574	struct console *con = ctxt->console;
 575	struct nbcon_state cur;
 576	int err;
 577
 578	nbcon_state_read(con, &cur);
 579try_again:
 580	err = nbcon_context_try_acquire_direct(ctxt, &cur);
 581	if (err != -EBUSY)
 582		goto out;
 583
 584	err = nbcon_context_try_acquire_handover(ctxt, &cur);
 585	if (err == -EAGAIN)
 586		goto try_again;
 587	if (err != -EBUSY)
 588		goto out;
 589
 590	err = nbcon_context_try_acquire_hostile(ctxt, &cur);
 591out:
 592	if (err)
 593		return false;
 594
 595	/* Acquire succeeded. */
 596
 597	/* Assign the appropriate buffer for this context. */
 598	if (atomic_read(&panic_cpu) == cpu)
 599		ctxt->pbufs = &panic_nbcon_pbufs;
 600	else
 601		ctxt->pbufs = con->pbufs;
 602
 603	/* Set the record sequence for this context to print. */
 604	ctxt->seq = nbcon_seq_read(ctxt->console);
 605
 606	return true;
 607}
 608
 609static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu,
 610				int expected_prio)
 611{
 612	/*
 613	 * A similar function, nbcon_waiter_matches(), only deals with
 614	 * EMERGENCY and PANIC priorities. However, this function must also
 615	 * deal with the NORMAL priority, which requires additional checks
 616	 * and constraints.
 617	 *
 618	 * For the case where preemption and interrupts are disabled, it is
 619	 * enough to also verify that the owning CPU has not changed.
 620	 *
 621	 * For the case where preemption or interrupts are enabled, an
 622	 * external synchronization method *must* be used. In particular,
 623	 * the driver-specific locking mechanism used in device_lock()
 624	 * (including disabling migration) should be used. It prevents
 625	 * scenarios such as:
 626	 *
 627	 * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X] and
 628	 *    is scheduled out.
 629	 * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY
 630	 *    and releases it.
 631	 * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X]
 632	 *    and is scheduled out.
 633	 * 4. [Task A] gets running on [CPU X] and sees that the console is
 634	 *    still owned by a task on [CPU X] with NBON_PRIO_NORMAL. Thus
 635	 *    [Task A] thinks it is the owner when it is not.
 636	 */
 637
 638	if (cur->prio != expected_prio)
 639		return false;
 640
 641	if (cur->cpu != expected_cpu)
 642		return false;
 643
 644	return true;
 645}
 646
 647/**
 648 * nbcon_context_release - Release the console
 649 * @ctxt:	The nbcon context from nbcon_context_try_acquire()
 650 */
 651static void nbcon_context_release(struct nbcon_context *ctxt)
 652{
 653	unsigned int cpu = smp_processor_id();
 654	struct console *con = ctxt->console;
 655	struct nbcon_state cur;
 656	struct nbcon_state new;
 657
 658	nbcon_state_read(con, &cur);
 659
 660	do {
 661		if (!nbcon_owner_matches(&cur, cpu, ctxt->prio))
 662			break;
 663
 664		new.atom = cur.atom;
 665		new.prio = NBCON_PRIO_NONE;
 666
 667		/*
 668		 * If @unsafe_takeover is set, it is kept set so that
 669		 * the state remains permanently unsafe.
 670		 */
 671		new.unsafe |= cur.unsafe_takeover;
 672
 673	} while (!nbcon_state_try_cmpxchg(con, &cur, &new));
 674
 675	ctxt->pbufs = NULL;
 676}
 677
 678/**
 679 * nbcon_context_can_proceed - Check whether ownership can proceed
 680 * @ctxt:	The nbcon context from nbcon_context_try_acquire()
 681 * @cur:	The current console state
 682 *
 683 * Return:	True if this context still owns the console. False if
 684 *		ownership was handed over or taken.
 685 *
 686 * Must be invoked when entering the unsafe state to make sure that it still
 687 * owns the lock. Also must be invoked when exiting the unsafe context
 688 * to eventually free the lock for a higher priority context which asked
 689 * for the friendly handover.
 690 *
 691 * It can be called inside an unsafe section when the console is just
 692 * temporary in safe state instead of exiting and entering the unsafe
 693 * state.
 694 *
 695 * Also it can be called in the safe context before doing an expensive
 696 * safe operation. It does not make sense to do the operation when
 697 * a higher priority context took the lock.
 698 *
 699 * When this function returns false then the calling context no longer owns
 700 * the console and is no longer allowed to go forward. In this case it must
 701 * back out immediately and carefully. The buffer content is also no longer
 702 * trusted since it no longer belongs to the calling context.
 703 */
 704static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur)
 705{
 706	unsigned int cpu = smp_processor_id();
 707
 708	/* Make sure this context still owns the console. */
 709	if (!nbcon_owner_matches(cur, cpu, ctxt->prio))
 710		return false;
 711
 712	/* The console owner can proceed if there is no waiter. */
 713	if (cur->req_prio == NBCON_PRIO_NONE)
 714		return true;
 715
 716	/*
 717	 * A console owner within an unsafe region is always allowed to
 718	 * proceed, even if there are waiters. It can perform a handover
 719	 * when exiting the unsafe region. Otherwise the waiter will
 720	 * need to perform an unsafe hostile takeover.
 721	 */
 722	if (cur->unsafe)
 723		return true;
 724
 725	/* Waiters always have higher priorities than owners. */
 726	WARN_ON_ONCE(cur->req_prio <= cur->prio);
 727
 728	/*
 729	 * Having a safe point for take over and eventually a few
 730	 * duplicated characters or a full line is way better than a
 731	 * hostile takeover. Post processing can take care of the garbage.
 732	 * Release and hand over.
 733	 */
 734	nbcon_context_release(ctxt);
 735
 736	/*
 737	 * It is not clear whether the waiter really took over ownership. The
 738	 * outermost callsite must make the final decision whether console
 739	 * ownership is needed for it to proceed. If yes, it must reacquire
 740	 * ownership (possibly hostile) before carefully proceeding.
 741	 *
 742	 * The calling context no longer owns the console so go back all the
 743	 * way instead of trying to implement reacquire heuristics in tons of
 744	 * places.
 745	 */
 746	return false;
 747}
 748
 749/**
 750 * nbcon_can_proceed - Check whether ownership can proceed
 751 * @wctxt:	The write context that was handed to the write function
 752 *
 753 * Return:	True if this context still owns the console. False if
 754 *		ownership was handed over or taken.
 755 *
 756 * It is used in nbcon_enter_unsafe() to make sure that it still owns the
 757 * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock
 758 * for a higher priority context which asked for the friendly handover.
 759 *
 760 * It can be called inside an unsafe section when the console is just
 761 * temporary in safe state instead of exiting and entering the unsafe state.
 762 *
 763 * Also it can be called in the safe context before doing an expensive safe
 764 * operation. It does not make sense to do the operation when a higher
 765 * priority context took the lock.
 766 *
 767 * When this function returns false then the calling context no longer owns
 768 * the console and is no longer allowed to go forward. In this case it must
 769 * back out immediately and carefully. The buffer content is also no longer
 770 * trusted since it no longer belongs to the calling context.
 771 */
 772bool nbcon_can_proceed(struct nbcon_write_context *wctxt)
 773{
 774	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
 775	struct console *con = ctxt->console;
 776	struct nbcon_state cur;
 777
 778	nbcon_state_read(con, &cur);
 779
 780	return nbcon_context_can_proceed(ctxt, &cur);
 781}
 782EXPORT_SYMBOL_GPL(nbcon_can_proceed);
 783
 784#define nbcon_context_enter_unsafe(c)	__nbcon_context_update_unsafe(c, true)
 785#define nbcon_context_exit_unsafe(c)	__nbcon_context_update_unsafe(c, false)
 786
 787/**
 788 * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state
 789 * @ctxt:	The nbcon context from nbcon_context_try_acquire()
 790 * @unsafe:	The new value for the unsafe bit
 791 *
 792 * Return:	True if the unsafe state was updated and this context still
 793 *		owns the console. Otherwise false if ownership was handed
 794 *		over or taken.
 795 *
 796 * This function allows console owners to modify the unsafe status of the
 797 * console.
 798 *
 799 * When this function returns false then the calling context no longer owns
 800 * the console and is no longer allowed to go forward. In this case it must
 801 * back out immediately and carefully. The buffer content is also no longer
 802 * trusted since it no longer belongs to the calling context.
 803 *
 804 * Internal helper to avoid duplicated code.
 805 */
 806static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe)
 807{
 808	struct console *con = ctxt->console;
 809	struct nbcon_state cur;
 810	struct nbcon_state new;
 811
 812	nbcon_state_read(con, &cur);
 813
 814	do {
 815		/*
 816		 * The unsafe bit must not be cleared if an
 817		 * unsafe hostile takeover has occurred.
 818		 */
 819		if (!unsafe && cur.unsafe_takeover)
 820			goto out;
 821
 822		if (!nbcon_context_can_proceed(ctxt, &cur))
 823			return false;
 824
 825		new.atom = cur.atom;
 826		new.unsafe = unsafe;
 827	} while (!nbcon_state_try_cmpxchg(con, &cur, &new));
 828
 829	cur.atom = new.atom;
 830out:
 831	return nbcon_context_can_proceed(ctxt, &cur);
 832}
 833
 834static void nbcon_write_context_set_buf(struct nbcon_write_context *wctxt,
 835					char *buf, unsigned int len)
 836{
 837	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
 838	struct console *con = ctxt->console;
 839	struct nbcon_state cur;
 840
 841	wctxt->outbuf = buf;
 842	wctxt->len = len;
 843	nbcon_state_read(con, &cur);
 844	wctxt->unsafe_takeover = cur.unsafe_takeover;
 845}
 846
 847/**
 848 * nbcon_enter_unsafe - Enter an unsafe region in the driver
 849 * @wctxt:	The write context that was handed to the write function
 850 *
 851 * Return:	True if this context still owns the console. False if
 852 *		ownership was handed over or taken.
 853 *
 854 * When this function returns false then the calling context no longer owns
 855 * the console and is no longer allowed to go forward. In this case it must
 856 * back out immediately and carefully. The buffer content is also no longer
 857 * trusted since it no longer belongs to the calling context.
 858 */
 859bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt)
 860{
 861	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
 862	bool is_owner;
 863
 864	is_owner = nbcon_context_enter_unsafe(ctxt);
 865	if (!is_owner)
 866		nbcon_write_context_set_buf(wctxt, NULL, 0);
 867	return is_owner;
 868}
 869EXPORT_SYMBOL_GPL(nbcon_enter_unsafe);
 870
 871/**
 872 * nbcon_exit_unsafe - Exit an unsafe region in the driver
 873 * @wctxt:	The write context that was handed to the write function
 874 *
 875 * Return:	True if this context still owns the console. False if
 876 *		ownership was handed over or taken.
 877 *
 878 * When this function returns false then the calling context no longer owns
 879 * the console and is no longer allowed to go forward. In this case it must
 880 * back out immediately and carefully. The buffer content is also no longer
 881 * trusted since it no longer belongs to the calling context.
 882 */
 883bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt)
 884{
 885	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
 886	bool ret;
 887
 888	ret = nbcon_context_exit_unsafe(ctxt);
 889	if (!ret)
 890		nbcon_write_context_set_buf(wctxt, NULL, 0);
 891	return ret;
 892}
 893EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
 894
 895/**
 896 * nbcon_reacquire_nobuf - Reacquire a console after losing ownership
 897 *				while printing
 898 * @wctxt:	The write context that was handed to the write callback
 899 *
 900 * Since ownership can be lost at any time due to handover or takeover, a
 901 * printing context _must_ be prepared to back out immediately and
 902 * carefully. However, there are scenarios where the printing context must
 903 * reacquire ownership in order to finalize or revert hardware changes.
 904 *
 905 * This function allows a printing context to reacquire ownership using the
 906 * same priority as its previous ownership.
 907 *
 908 * Note that after a successful reacquire the printing context will have no
 909 * output buffer because that has been lost. This function cannot be used to
 910 * resume printing.
 911 */
 912void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt)
 913{
 914	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
 915
 916	while (!nbcon_context_try_acquire(ctxt))
 917		cpu_relax();
 918
 919	nbcon_write_context_set_buf(wctxt, NULL, 0);
 920}
 921EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf);
 922
 923/**
 924 * nbcon_emit_next_record - Emit a record in the acquired context
 925 * @wctxt:	The write context that will be handed to the write function
 926 * @use_atomic:	True if the write_atomic() callback is to be used
 927 *
 928 * Return:	True if this context still owns the console. False if
 929 *		ownership was handed over or taken.
 930 *
 931 * When this function returns false then the calling context no longer owns
 932 * the console and is no longer allowed to go forward. In this case it must
 933 * back out immediately and carefully. The buffer content is also no longer
 934 * trusted since it no longer belongs to the calling context. If the caller
 935 * wants to do more it must reacquire the console first.
 936 *
 937 * When true is returned, @wctxt->ctxt.backlog indicates whether there are
 938 * still records pending in the ringbuffer,
 939 */
 940static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic)
 941{
 942	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
 943	struct console *con = ctxt->console;
 944	bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED;
 945	struct printk_message pmsg = {
 946		.pbufs = ctxt->pbufs,
 947	};
 948	unsigned long con_dropped;
 949	struct nbcon_state cur;
 950	unsigned long dropped;
 951	unsigned long ulseq;
 952
 953	/*
 954	 * This function should never be called for consoles that have not
 955	 * implemented the necessary callback for writing: i.e. legacy
 956	 * consoles and, when atomic, nbcon consoles with no write_atomic().
 957	 * Handle it as if ownership was lost and try to continue.
 958	 *
 959	 * Note that for nbcon consoles the write_thread() callback is
 960	 * mandatory and was already checked in nbcon_alloc().
 961	 */
 962	if (WARN_ON_ONCE((use_atomic && !con->write_atomic) ||
 963			 !(console_srcu_read_flags(con) & CON_NBCON))) {
 964		nbcon_context_release(ctxt);
 965		return false;
 966	}
 967
 968	/*
 969	 * The printk buffers are filled within an unsafe section. This
 970	 * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from
 971	 * clobbering each other.
 972	 */
 973
 974	if (!nbcon_context_enter_unsafe(ctxt))
 975		return false;
 976
 977	ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true);
 978	if (!ctxt->backlog)
 979		return nbcon_context_exit_unsafe(ctxt);
 980
 981	/*
 982	 * @con->dropped is not protected in case of an unsafe hostile
 983	 * takeover. In that situation the update can be racy so
 984	 * annotate it accordingly.
 985	 */
 986	con_dropped = data_race(READ_ONCE(con->dropped));
 987
 988	dropped = con_dropped + pmsg.dropped;
 989	if (dropped && !is_extended)
 990		console_prepend_dropped(&pmsg, dropped);
 991
 992	/*
 993	 * If the previous owner was assigned the same record, this context
 994	 * has taken over ownership and is replaying the record. Prepend a
 995	 * message to let the user know the record is replayed.
 996	 */
 997	ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq));
 998	if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) {
 999		console_prepend_replay(&pmsg);
1000	} else {
1001		/*
1002		 * Ensure this context is still the owner before trying to
1003		 * update @nbcon_prev_seq. Otherwise the value in @ulseq may
1004		 * not be from the previous owner and instead be some later
1005		 * value from the context that took over ownership.
1006		 */
1007		nbcon_state_read(con, &cur);
1008		if (!nbcon_context_can_proceed(ctxt, &cur))
1009			return false;
1010
1011		atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq,
1012					__u64seq_to_ulseq(pmsg.seq));
1013	}
1014
1015	if (!nbcon_context_exit_unsafe(ctxt))
1016		return false;
1017
1018	/* For skipped records just update seq/dropped in @con. */
1019	if (pmsg.outbuf_len == 0)
1020		goto update_con;
1021
1022	/* Initialize the write context for driver callbacks. */
1023	nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len);
1024
1025	if (use_atomic)
1026		con->write_atomic(con, wctxt);
1027	else
1028		con->write_thread(con, wctxt);
1029
1030	if (!wctxt->outbuf) {
1031		/*
1032		 * Ownership was lost and reacquired by the driver. Handle it
1033		 * as if ownership was lost.
1034		 */
1035		nbcon_context_release(ctxt);
1036		return false;
1037	}
1038
1039	/*
1040	 * Ownership may have been lost but _not_ reacquired by the driver.
1041	 * This case is detected and handled when entering unsafe to update
1042	 * dropped/seq values.
1043	 */
1044
1045	/*
1046	 * Since any dropped message was successfully output, reset the
1047	 * dropped count for the console.
1048	 */
1049	dropped = 0;
1050update_con:
1051	/*
1052	 * The dropped count and the sequence number are updated within an
1053	 * unsafe section. This limits update races to the panic context and
1054	 * allows the panic context to win.
1055	 */
1056
1057	if (!nbcon_context_enter_unsafe(ctxt))
1058		return false;
1059
1060	if (dropped != con_dropped) {
1061		/* Counterpart to the READ_ONCE() above. */
1062		WRITE_ONCE(con->dropped, dropped);
1063	}
1064
1065	nbcon_seq_try_update(ctxt, pmsg.seq + 1);
1066
1067	return nbcon_context_exit_unsafe(ctxt);
1068}
1069
1070/*
1071 * nbcon_emit_one - Print one record for an nbcon console using the
1072 *			specified callback
1073 * @wctxt:	An initialized write context struct to use for this context
1074 * @use_atomic:	True if the write_atomic() callback is to be used
1075 *
1076 * Return:	True, when a record has been printed and there are still
1077 *		pending records. The caller might want to continue flushing.
1078 *
1079 *		False, when there is no pending record, or when the console
1080 *		context cannot be acquired, or the ownership has been lost.
1081 *		The caller should give up. Either the job is done, cannot be
1082 *		done, or will be handled by the owning context.
1083 *
1084 * This is an internal helper to handle the locking of the console before
1085 * calling nbcon_emit_next_record().
1086 */
1087static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic)
1088{
1089	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
1090	struct console *con = ctxt->console;
1091	unsigned long flags;
1092	bool ret = false;
1093
1094	if (!use_atomic) {
1095		con->device_lock(con, &flags);
1096
1097		/*
1098		 * Ensure this stays on the CPU to make handover and
1099		 * takeover possible.
1100		 */
1101		cant_migrate();
1102	}
1103
1104	if (!nbcon_context_try_acquire(ctxt))
1105		goto out;
1106
1107	/*
1108	 * nbcon_emit_next_record() returns false when the console was
1109	 * handed over or taken over. In both cases the context is no
1110	 * longer valid.
1111	 *
1112	 * The higher priority printing context takes over responsibility
1113	 * to print the pending records.
1114	 */
1115	if (!nbcon_emit_next_record(wctxt, use_atomic))
1116		goto out;
1117
1118	nbcon_context_release(ctxt);
1119
1120	ret = ctxt->backlog;
1121out:
1122	if (!use_atomic)
1123		con->device_unlock(con, flags);
1124	return ret;
1125}
1126
1127/**
1128 * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
1129 * @con:	Console to operate on
1130 * @ctxt:	The nbcon context from nbcon_context_try_acquire()
1131 *
1132 * Return:	True if the thread should shutdown or if the console is
1133 *		allowed to print and a record is available. False otherwise.
1134 *
1135 * After the thread wakes up, it must first check if it should shutdown before
1136 * attempting any printing.
1137 */
1138static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt)
1139{
1140	bool ret = false;
1141	short flags;
1142	int cookie;
1143
1144	if (kthread_should_stop())
1145		return true;
1146
1147	cookie = console_srcu_read_lock();
1148
1149	flags = console_srcu_read_flags(con);
1150	if (console_is_usable(con, flags, false)) {
1151		/* Bring the sequence in @ctxt up to date */
1152		ctxt->seq = nbcon_seq_read(con);
1153
1154		ret = prb_read_valid(prb, ctxt->seq, NULL);
1155	}
1156
1157	console_srcu_read_unlock(cookie);
1158	return ret;
1159}
1160
1161/**
1162 * nbcon_kthread_func - The printer thread function
1163 * @__console:	Console to operate on
1164 *
1165 * Return:	0
1166 */
1167static int nbcon_kthread_func(void *__console)
1168{
1169	struct console *con = __console;
1170	struct nbcon_write_context wctxt = {
1171		.ctxt.console	= con,
1172		.ctxt.prio	= NBCON_PRIO_NORMAL,
1173	};
1174	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1175	short con_flags;
1176	bool backlog;
1177	int cookie;
1178
1179wait_for_event:
1180	/*
1181	 * Guarantee this task is visible on the rcuwait before
1182	 * checking the wake condition.
1183	 *
1184	 * The full memory barrier within set_current_state() of
1185	 * ___rcuwait_wait_event() pairs with the full memory
1186	 * barrier within rcuwait_has_sleeper().
1187	 *
1188	 * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
1189	 */
1190	rcuwait_wait_event(&con->rcuwait,
1191			   nbcon_kthread_should_wakeup(con, ctxt),
1192			   TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
1193
1194	do {
1195		if (kthread_should_stop())
1196			return 0;
1197
1198		backlog = false;
1199
1200		/*
1201		 * Keep the srcu read lock around the entire operation so that
1202		 * synchronize_srcu() can guarantee that the kthread stopped
1203		 * or suspended printing.
1204		 */
1205		cookie = console_srcu_read_lock();
1206
1207		con_flags = console_srcu_read_flags(con);
1208
1209		if (console_is_usable(con, con_flags, false))
1210			backlog = nbcon_emit_one(&wctxt, false);
1211
1212		console_srcu_read_unlock(cookie);
1213
1214		cond_resched();
1215
1216	} while (backlog);
1217
1218	goto wait_for_event;
1219}
1220
1221/**
1222 * nbcon_irq_work - irq work to wake console printer thread
1223 * @irq_work:	The irq work to operate on
1224 */
1225static void nbcon_irq_work(struct irq_work *irq_work)
1226{
1227	struct console *con = container_of(irq_work, struct console, irq_work);
1228
1229	nbcon_kthread_wake(con);
1230}
1231
1232static inline bool rcuwait_has_sleeper(struct rcuwait *w)
1233{
1234	/*
1235	 * Guarantee any new records can be seen by tasks preparing to wait
1236	 * before this context checks if the rcuwait is empty.
1237	 *
1238	 * This full memory barrier pairs with the full memory barrier within
1239	 * set_current_state() of ___rcuwait_wait_event(), which is called
1240	 * after prepare_to_rcuwait() adds the waiter but before it has
1241	 * checked the wait condition.
1242	 *
1243	 * This pairs with nbcon_kthread_func:A.
1244	 */
1245	smp_mb(); /* LMM(rcuwait_has_sleeper:A) */
1246	return rcuwait_active(w);
1247}
1248
1249/**
1250 * nbcon_kthreads_wake - Wake up printing threads using irq_work
1251 */
1252void nbcon_kthreads_wake(void)
1253{
1254	struct console *con;
1255	int cookie;
1256
1257	if (!printk_kthreads_running)
1258		return;
1259
1260	cookie = console_srcu_read_lock();
1261	for_each_console_srcu(con) {
1262		if (!(console_srcu_read_flags(con) & CON_NBCON))
1263			continue;
1264
1265		/*
1266		 * Only schedule irq_work if the printing thread is
1267		 * actively waiting. If not waiting, the thread will
1268		 * notice by itself that it has work to do.
1269		 */
1270		if (rcuwait_has_sleeper(&con->rcuwait))
1271			irq_work_queue(&con->irq_work);
1272	}
1273	console_srcu_read_unlock(cookie);
1274}
1275
1276/*
1277 * nbcon_kthread_stop - Stop a console printer thread
1278 * @con:	Console to operate on
1279 */
1280void nbcon_kthread_stop(struct console *con)
1281{
1282	lockdep_assert_console_list_lock_held();
1283
1284	if (!con->kthread)
1285		return;
1286
1287	kthread_stop(con->kthread);
1288	con->kthread = NULL;
1289}
1290
1291/**
1292 * nbcon_kthread_create - Create a console printer thread
1293 * @con:	Console to operate on
1294 *
1295 * Return:	True if the kthread was started or already exists.
1296 *		Otherwise false and @con must not be registered.
1297 *
1298 * This function is called when it will be expected that nbcon consoles are
1299 * flushed using the kthread. The messages printed with NBCON_PRIO_NORMAL
1300 * will be no longer flushed by the legacy loop. This is why failure must
1301 * be fatal for console registration.
1302 *
1303 * If @con was already registered and this function fails, @con must be
1304 * unregistered before the global state variable @printk_kthreads_running
1305 * can be set.
1306 */
1307bool nbcon_kthread_create(struct console *con)
1308{
1309	struct task_struct *kt;
1310
1311	lockdep_assert_console_list_lock_held();
1312
1313	if (con->kthread)
1314		return true;
1315
1316	kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
1317	if (WARN_ON(IS_ERR(kt))) {
1318		con_printk(KERN_ERR, con, "failed to start printing thread\n");
1319		return false;
1320	}
1321
1322	con->kthread = kt;
1323
1324	/*
1325	 * It is important that console printing threads are scheduled
1326	 * shortly after a printk call and with generous runtime budgets.
1327	 */
1328	sched_set_normal(con->kthread, -20);
1329
1330	return true;
1331}
1332
1333/* Track the nbcon emergency nesting per CPU. */
1334static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
1335static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
1336
1337/**
1338 * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer
1339 *
1340 * Context:	For reading, any context. For writing, any context which could
1341 *		not be migrated to another CPU.
1342 * Return:	Either a pointer to the per CPU emergency nesting counter of
1343 *		the current CPU or to the init data during early boot.
1344 *
1345 * The function is safe for reading per-CPU variables in any context because
1346 * preemption is disabled if the current CPU is in the emergency state. See
1347 * also nbcon_cpu_emergency_enter().
1348 */
1349static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
1350{
1351	/*
1352	 * The value of __printk_percpu_data_ready gets set in normal
1353	 * context and before SMP initialization. As a result it could
1354	 * never change while inside an nbcon emergency section.
1355	 */
1356	if (!printk_percpu_data_ready())
1357		return &early_nbcon_pcpu_emergency_nesting;
1358
1359	return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting);
1360}
1361
1362/**
1363 * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
1364 *				printing on the current CPU
1365 *
1366 * Context:	Any context.
1367 * Return:	The nbcon_prio to use for acquiring an nbcon console in this
1368 *		context for printing.
1369 *
1370 * The function is safe for reading per-CPU data in any context because
1371 * preemption is disabled if the current CPU is in the emergency or panic
1372 * state.
1373 */
1374enum nbcon_prio nbcon_get_default_prio(void)
1375{
1376	unsigned int *cpu_emergency_nesting;
1377
1378	if (this_cpu_in_panic())
1379		return NBCON_PRIO_PANIC;
1380
1381	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1382	if (*cpu_emergency_nesting)
1383		return NBCON_PRIO_EMERGENCY;
1384
1385	return NBCON_PRIO_NORMAL;
1386}
1387
1388/**
1389 * nbcon_legacy_emit_next_record - Print one record for an nbcon console
1390 *					in legacy contexts
1391 * @con:	The console to print on
1392 * @handover:	Will be set to true if a printk waiter has taken over the
1393 *		console_lock, in which case the caller is no longer holding
1394 *		both the console_lock and the SRCU read lock. Otherwise it
1395 *		is set to false.
1396 * @cookie:	The cookie from the SRCU read lock.
1397 * @use_atomic: Set true when called in an atomic or unknown context.
1398 *		It affects which nbcon callback will be used: write_atomic()
1399 *		or write_thread().
1400 *
1401 *		When false, the write_thread() callback is used and would be
1402 *		called in a preemtible context unless disabled by the
1403 *		device_lock. The legacy handover is not allowed in this mode.
1404 *
1405 * Context:	Any context except NMI.
1406 * Return:	True, when a record has been printed and there are still
1407 *		pending records. The caller might want to continue flushing.
1408 *
1409 *		False, when there is no pending record, or when the console
1410 *		context cannot be acquired, or the ownership has been lost.
1411 *		The caller should give up. Either the job is done, cannot be
1412 *		done, or will be handled by the owning context.
1413 *
1414 * This function is meant to be called by console_flush_all() to print records
1415 * on nbcon consoles from legacy context (printing via console unlocking).
1416 * Essentially it is the nbcon version of console_emit_next_record().
1417 */
1418bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
1419				   int cookie, bool use_atomic)
1420{
1421	struct nbcon_write_context wctxt = { };
1422	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1423	unsigned long flags;
1424	bool progress;
1425
1426	ctxt->console	= con;
1427	ctxt->prio	= nbcon_get_default_prio();
1428
1429	if (use_atomic) {
1430		/*
1431		 * In an atomic or unknown context, use the same procedure as
1432		 * in console_emit_next_record(). It allows to handover.
1433		 */
1434		printk_safe_enter_irqsave(flags);
1435		console_lock_spinning_enable();
1436		stop_critical_timings();
1437	}
1438
1439	progress = nbcon_emit_one(&wctxt, use_atomic);
1440
1441	if (use_atomic) {
1442		start_critical_timings();
1443		*handover = console_lock_spinning_disable_and_check(cookie);
1444		printk_safe_exit_irqrestore(flags);
1445	} else {
1446		/* Non-atomic does not perform legacy spinning handovers. */
1447		*handover = false;
1448	}
1449
1450	return progress;
1451}
1452
1453/**
1454 * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
1455 *					write_atomic() callback
1456 * @con:			The nbcon console to flush
1457 * @stop_seq:			Flush up until this record
1458 * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1459 *
1460 * Return:	0 if @con was flushed up to @stop_seq Otherwise, error code on
1461 *		failure.
1462 *
1463 * Errors:
1464 *
1465 *	-EPERM:		Unable to acquire console ownership.
1466 *
1467 *	-EAGAIN:	Another context took over ownership while printing.
1468 *
1469 *	-ENOENT:	A record before @stop_seq is not available.
1470 *
1471 * If flushing up to @stop_seq was not successful, it only makes sense for the
1472 * caller to try again when -EAGAIN was returned. When -EPERM is returned,
1473 * this context is not allowed to acquire the console. When -ENOENT is
1474 * returned, it cannot be expected that the unfinalized record will become
1475 * available.
1476 */
1477static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
1478					    bool allow_unsafe_takeover)
1479{
1480	struct nbcon_write_context wctxt = { };
1481	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1482	int err = 0;
1483
1484	ctxt->console			= con;
1485	ctxt->spinwait_max_us		= 2000;
1486	ctxt->prio			= nbcon_get_default_prio();
1487	ctxt->allow_unsafe_takeover	= allow_unsafe_takeover;
1488
1489	if (!nbcon_context_try_acquire(ctxt))
1490		return -EPERM;
1491
1492	while (nbcon_seq_read(con) < stop_seq) {
1493		/*
1494		 * nbcon_emit_next_record() returns false when the console was
1495		 * handed over or taken over. In both cases the context is no
1496		 * longer valid.
1497		 */
1498		if (!nbcon_emit_next_record(&wctxt, true))
1499			return -EAGAIN;
1500
1501		if (!ctxt->backlog) {
1502			/* Are there reserved but not yet finalized records? */
1503			if (nbcon_seq_read(con) < stop_seq)
1504				err = -ENOENT;
1505			break;
1506		}
1507	}
1508
1509	nbcon_context_release(ctxt);
1510	return err;
1511}
1512
1513/**
1514 * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
1515 *					write_atomic() callback
1516 * @con:			The nbcon console to flush
1517 * @stop_seq:			Flush up until this record
1518 * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1519 *
1520 * This will stop flushing before @stop_seq if another context has ownership.
1521 * That context is then responsible for the flushing. Likewise, if new records
1522 * are added while this context was flushing and there is no other context
1523 * to handle the printing, this context must also flush those records.
1524 */
1525static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
1526					   bool allow_unsafe_takeover)
1527{
1528	struct console_flush_type ft;
1529	unsigned long flags;
1530	int err;
1531
1532again:
1533	/*
1534	 * Atomic flushing does not use console driver synchronization (i.e.
1535	 * it does not hold the port lock for uart consoles). Therefore IRQs
1536	 * must be disabled to avoid being interrupted and then calling into
1537	 * a driver that will deadlock trying to acquire console ownership.
1538	 */
1539	local_irq_save(flags);
1540
1541	err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
1542
1543	local_irq_restore(flags);
1544
1545	/*
1546	 * If there was a new owner (-EPERM, -EAGAIN), that context is
1547	 * responsible for completing.
1548	 *
1549	 * Do not wait for records not yet finalized (-ENOENT) to avoid a
1550	 * possible deadlock. They will either get flushed by the writer or
1551	 * eventually skipped on panic CPU.
1552	 */
1553	if (err)
1554		return;
1555
1556	/*
1557	 * If flushing was successful but more records are available, this
1558	 * context must flush those remaining records if the printer thread
1559	 * is not available do it.
1560	 */
1561	printk_get_console_flush_type(&ft);
1562	if (!ft.nbcon_offload &&
1563	    prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
1564		stop_seq = prb_next_reserve_seq(prb);
1565		goto again;
1566	}
1567}
1568
1569/**
1570 * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their
1571 *					write_atomic() callback
1572 * @stop_seq:			Flush up until this record
1573 * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1574 */
1575static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover)
1576{
1577	struct console *con;
1578	int cookie;
1579
1580	cookie = console_srcu_read_lock();
1581	for_each_console_srcu(con) {
1582		short flags = console_srcu_read_flags(con);
1583
1584		if (!(flags & CON_NBCON))
1585			continue;
1586
1587		if (!console_is_usable(con, flags, true))
1588			continue;
1589
1590		if (nbcon_seq_read(con) >= stop_seq)
1591			continue;
1592
1593		nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
1594	}
1595	console_srcu_read_unlock(cookie);
1596}
1597
1598/**
1599 * nbcon_atomic_flush_pending - Flush all nbcon consoles using their
1600 *				write_atomic() callback
1601 *
1602 * Flush the backlog up through the currently newest record. Any new
1603 * records added while flushing will not be flushed if there is another
1604 * context available to handle the flushing. This is to avoid one CPU
1605 * printing unbounded because other CPUs continue to add records.
1606 */
1607void nbcon_atomic_flush_pending(void)
1608{
1609	__nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false);
1610}
1611
1612/**
1613 * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their
1614 *	write_atomic() callback and allowing unsafe hostile takeovers
1615 *
1616 * Flush the backlog up through the currently newest record. Unsafe hostile
1617 * takeovers will be performed, if necessary.
1618 */
1619void nbcon_atomic_flush_unsafe(void)
1620{
1621	__nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true);
1622}
1623
1624/**
1625 * nbcon_cpu_emergency_enter - Enter an emergency section where printk()
1626 *				messages for that CPU are flushed directly
1627 *
1628 * Context:	Any context. Disables preemption.
1629 *
1630 * When within an emergency section, printk() calls will attempt to flush any
1631 * pending messages in the ringbuffer.
1632 */
1633void nbcon_cpu_emergency_enter(void)
1634{
1635	unsigned int *cpu_emergency_nesting;
1636
1637	preempt_disable();
1638
1639	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1640	(*cpu_emergency_nesting)++;
1641}
1642
1643/**
1644 * nbcon_cpu_emergency_exit - Exit an emergency section
1645 *
1646 * Context:	Within an emergency section. Enables preemption.
1647 */
1648void nbcon_cpu_emergency_exit(void)
1649{
1650	unsigned int *cpu_emergency_nesting;
1651
1652	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1653
1654	if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0))
1655		(*cpu_emergency_nesting)--;
1656
1657	preempt_enable();
1658}
1659
1660/**
1661 * nbcon_alloc - Allocate and init the nbcon console specific data
1662 * @con:	Console to initialize
1663 *
1664 * Return:	True if the console was fully allocated and initialized.
1665 *		Otherwise @con must not be registered.
1666 *
1667 * When allocation and init was successful, the console must be properly
1668 * freed using nbcon_free() once it is no longer needed.
1669 */
1670bool nbcon_alloc(struct console *con)
1671{
1672	struct nbcon_state state = { };
1673
1674	/* The write_thread() callback is mandatory. */
1675	if (WARN_ON(!con->write_thread))
1676		return false;
1677
1678	rcuwait_init(&con->rcuwait);
1679	init_irq_work(&con->irq_work, nbcon_irq_work);
1680	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL);
1681	nbcon_state_set(con, &state);
1682
1683	/*
1684	 * Initialize @nbcon_seq to the highest possible sequence number so
1685	 * that practically speaking it will have nothing to print until a
1686	 * desired initial sequence number has been set via nbcon_seq_force().
1687	 */
1688	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), ULSEQ_MAX(prb));
1689
1690	if (con->flags & CON_BOOT) {
1691		/*
1692		 * Boot console printing is synchronized with legacy console
1693		 * printing, so boot consoles can share the same global printk
1694		 * buffers.
1695		 */
1696		con->pbufs = &printk_shared_pbufs;
1697	} else {
1698		con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL);
1699		if (!con->pbufs) {
1700			con_printk(KERN_ERR, con, "failed to allocate printing buffer\n");
1701			return false;
1702		}
1703
1704		if (printk_kthreads_running) {
1705			if (!nbcon_kthread_create(con)) {
1706				kfree(con->pbufs);
1707				con->pbufs = NULL;
1708				return false;
1709			}
1710		}
1711	}
1712
1713	return true;
1714}
1715
1716/**
1717 * nbcon_free - Free and cleanup the nbcon console specific data
1718 * @con:	Console to free/cleanup nbcon data
1719 */
1720void nbcon_free(struct console *con)
1721{
1722	struct nbcon_state state = { };
1723
1724	if (printk_kthreads_running)
1725		nbcon_kthread_stop(con);
1726
1727	nbcon_state_set(con, &state);
1728
1729	/* Boot consoles share global printk buffers. */
1730	if (!(con->flags & CON_BOOT))
1731		kfree(con->pbufs);
1732
1733	con->pbufs = NULL;
1734}
1735
1736/**
1737 * nbcon_device_try_acquire - Try to acquire nbcon console and enter unsafe
1738 *				section
1739 * @con:	The nbcon console to acquire
1740 *
1741 * Context:	Under the locking mechanism implemented in
1742 *		@con->device_lock() including disabling migration.
1743 * Return:	True if the console was acquired. False otherwise.
1744 *
1745 * Console drivers will usually use their own internal synchronization
1746 * mechasism to synchronize between console printing and non-printing
1747 * activities (such as setting baud rates). However, nbcon console drivers
1748 * supporting atomic consoles may also want to mark unsafe sections when
1749 * performing non-printing activities in order to synchronize against their
1750 * atomic_write() callback.
1751 *
1752 * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL
1753 * and marks it unsafe for handover/takeover.
1754 */
1755bool nbcon_device_try_acquire(struct console *con)
1756{
1757	struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
1758
1759	cant_migrate();
1760
1761	memset(ctxt, 0, sizeof(*ctxt));
1762	ctxt->console	= con;
1763	ctxt->prio	= NBCON_PRIO_NORMAL;
1764
1765	if (!nbcon_context_try_acquire(ctxt))
1766		return false;
1767
1768	if (!nbcon_context_enter_unsafe(ctxt))
1769		return false;
1770
1771	return true;
1772}
1773EXPORT_SYMBOL_GPL(nbcon_device_try_acquire);
1774
1775/**
1776 * nbcon_device_release - Exit unsafe section and release the nbcon console
1777 * @con:	The nbcon console acquired in nbcon_device_try_acquire()
1778 */
1779void nbcon_device_release(struct console *con)
1780{
1781	struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_device_ctxt);
1782	struct console_flush_type ft;
1783	int cookie;
1784
1785	if (!nbcon_context_exit_unsafe(ctxt))
1786		return;
1787
1788	nbcon_context_release(ctxt);
1789
1790	/*
1791	 * This context must flush any new records added while the console
1792	 * was locked if the printer thread is not available to do it. The
1793	 * console_srcu_read_lock must be taken to ensure the console is
1794	 * usable throughout flushing.
1795	 */
1796	cookie = console_srcu_read_lock();
1797	printk_get_console_flush_type(&ft);
1798	if (console_is_usable(con, console_srcu_read_flags(con), true) &&
1799	    !ft.nbcon_offload &&
1800	    prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
1801		/*
1802		 * If nbcon_atomic flushing is not available, fallback to
1803		 * using the legacy loop.
1804		 */
1805		if (ft.nbcon_atomic) {
1806			__nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
1807		} else if (ft.legacy_direct) {
1808			if (console_trylock())
1809				console_unlock();
1810		} else if (ft.legacy_offload) {
1811			printk_trigger_flush();
1812		}
1813	}
1814	console_srcu_read_unlock(cookie);
1815}
1816EXPORT_SYMBOL_GPL(nbcon_device_release);