Loading...
Note: File does not exist in v3.15.
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright 2015, Daniel Axtens, IBM Corporation
5#
6
7
8# do we have ./getscom, ./putscom?
9if [ -x ./getscom ] && [ -x ./putscom ]; then
10 GETSCOM=./getscom
11 PUTSCOM=./putscom
12elif which getscom > /dev/null; then
13 GETSCOM=$(which getscom)
14 PUTSCOM=$(which putscom)
15else
16 cat <<EOF
17Can't find getscom/putscom in . or \$PATH.
18See https://github.com/open-power/skiboot.
19The tool is in external/xscom-utils
20EOF
21 exit 1
22fi
23
24# We will get 8 HMI events per injection
25# todo: deal with things being offline
26expected_hmis=8
27COUNT_HMIS() {
28 dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
29}
30
31# massively expand snooze delay, allowing injection on all cores
32ppc64_cpu --smt-snooze-delay=1000000000
33
34# when we exit, restore it
35trap "ppc64_cpu --smt-snooze-delay=100" 0 1
36
37# for each chip+core combination
38# todo - less fragile parsing
39grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
40while read chipcore; do
41 chip=$(echo "$chipcore"|awk '{print $3}')
42 core=$(echo "$chipcore"|awk '{print $5}')
43 fir="0x1${core}013100"
44
45 # verify that Core FIR is zero as expected
46 if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
47 echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
48 echo "Result of $GETSCOM -c 0x${chip} $fir:"
49 $GETSCOM -c 0x${chip} $fir
50 echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
51 echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
52 exit 1
53 fi
54
55 # keep track of the number of HMIs handled
56 old_hmis=$(COUNT_HMIS)
57
58 # do injection, adding a marker to dmesg for clarity
59 echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
60 # inject a RegFile recoverable error
61 if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
62 echo "Error injecting. Aborting!"
63 exit 1
64 fi
65
66 # now we want to wait for all the HMIs to be processed
67 # we expect one per thread on the core
68 i=0;
69 new_hmis=$(COUNT_HMIS)
70 while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
71 echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
72 sleep 5;
73 i=$((i + 1))
74 new_hmis=$(COUNT_HMIS)
75 done
76 if [ $i = 12 ]; then
77 echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
78 exit 1
79 fi
80 echo "Processed $expected_hmis events; presumed success. Check dmesg."
81 echo ""
82done