Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Hwmon client for disk and solid state drives with temperature sensors
  4 * Copyright (C) 2019 Zodiac Inflight Innovations
  5 *
  6 * With input from:
  7 *    Hwmon client for S.M.A.R.T. hard disk drives with temperature sensors.
  8 *    (C) 2018 Linus Walleij
  9 *
 10 *    hwmon: Driver for SCSI/ATA temperature sensors
 11 *    by Constantin Baranov <const@mimas.ru>, submitted September 2009
 12 *
 13 * This drive supports reporting the temperature of SATA drives. It can be
 14 * easily extended to report the temperature of SCSI drives.
 15 *
 16 * The primary means to read drive temperatures and temperature limits
 17 * for ATA drives is the SCT Command Transport feature set as specified in
 18 * ATA8-ACS.
 19 * It can be used to read the current drive temperature, temperature limits,
 20 * and historic minimum and maximum temperatures. The SCT Command Transport
 21 * feature set is documented in "AT Attachment 8 - ATA/ATAPI Command Set
 22 * (ATA8-ACS)".
 23 *
 24 * If the SCT Command Transport feature set is not available, drive temperatures
 25 * may be readable through SMART attributes. Since SMART attributes are not well
 26 * defined, this method is only used as fallback mechanism.
 27 *
 28 * There are three SMART attributes which may report drive temperatures.
 29 * Those are defined as follows (from
 30 * http://www.cropel.com/library/smart-attribute-list.aspx).
 31 *
 32 * 190	Temperature	Temperature, monitored by a sensor somewhere inside
 33 *			the drive. Raw value typicaly holds the actual
 34 *			temperature (hexadecimal) in its rightmost two digits.
 35 *
 36 * 194	Temperature	Temperature, monitored by a sensor somewhere inside
 37 *			the drive. Raw value typicaly holds the actual
 38 *			temperature (hexadecimal) in its rightmost two digits.
 39 *
 40 * 231	Temperature	Temperature, monitored by a sensor somewhere inside
 41 *			the drive. Raw value typicaly holds the actual
 42 *			temperature (hexadecimal) in its rightmost two digits.
 43 *
 44 * Wikipedia defines attributes a bit differently.
 45 *
 46 * 190	Temperature	Value is equal to (100-temp. °C), allowing manufacturer
 47 *	Difference or	to set a minimum threshold which corresponds to a
 48 *	Airflow		maximum temperature. This also follows the convention of
 49 *	Temperature	100 being a best-case value and lower values being
 50 *			undesirable. However, some older drives may instead
 51 *			report raw Temperature (identical to 0xC2) or
 52 *			Temperature minus 50 here.
 53 * 194	Temperature or	Indicates the device temperature, if the appropriate
 54 *	Temperature	sensor is fitted. Lowest byte of the raw value contains
 55 *	Celsius		the exact temperature value (Celsius degrees).
 56 * 231	Life Left	Indicates the approximate SSD life left, in terms of
 57 *	(SSDs) or	program/erase cycles or available reserved blocks.
 58 *	Temperature	A normalized value of 100 represents a new drive, with
 59 *			a threshold value at 10 indicating a need for
 60 *			replacement. A value of 0 may mean that the drive is
 61 *			operating in read-only mode to allow data recovery.
 62 *			Previously (pre-2010) occasionally used for Drive
 63 *			Temperature (more typically reported at 0xC2).
 64 *
 65 * Common denominator is that the first raw byte reports the temperature
 66 * in degrees C on almost all drives. Some drives may report a fractional
 67 * temperature in the second raw byte.
 68 *
 69 * Known exceptions (from libatasmart):
 70 * - SAMSUNG SV0412H and SAMSUNG SV1204H) report the temperature in 10th
 71 *   degrees C in the first two raw bytes.
 72 * - A few Maxtor drives report an unknown or bad value in attribute 194.
 73 * - Certain Apple SSD drives report an unknown value in attribute 190.
 74 *   Only certain firmware versions are affected.
 75 *
 76 * Those exceptions affect older ATA drives and are currently ignored.
 77 * Also, the second raw byte (possibly reporting the fractional temperature)
 78 * is currently ignored.
 79 *
 80 * Many drives also report temperature limits in additional SMART data raw
 81 * bytes. The format of those is not well defined and varies widely.
 82 * The driver does not currently attempt to report those limits.
 83 *
 84 * According to data in smartmontools, attribute 231 is rarely used to report
 85 * drive temperatures. At the same time, several drives report SSD life left
 86 * in attribute 231, but do not support temperature sensors. For this reason,
 87 * attribute 231 is currently ignored.
 88 *
 89 * Following above definitions, temperatures are reported as follows.
 90 *   If SCT Command Transport is supported, it is used to read the
 91 *   temperature and, if available, temperature limits.
 92 * - Otherwise, if SMART attribute 194 is supported, it is used to read
 93 *   the temperature.
 94 * - Otherwise, if SMART attribute 190 is supported, it is used to read
 95 *   the temperature.
 96 */
 97
 98#include <linux/ata.h>
 99#include <linux/bits.h>
100#include <linux/device.h>
101#include <linux/hwmon.h>
102#include <linux/kernel.h>
103#include <linux/list.h>
104#include <linux/module.h>
105#include <linux/mutex.h>
106#include <scsi/scsi_cmnd.h>
107#include <scsi/scsi_device.h>
108#include <scsi/scsi_driver.h>
109#include <scsi/scsi_proto.h>
110
111struct drivetemp_data {
112	struct list_head list;		/* list of instantiated devices */
113	struct mutex lock;		/* protect data buffer accesses */
114	struct scsi_device *sdev;	/* SCSI device */
115	struct device *dev;		/* instantiating device */
116	struct device *hwdev;		/* hardware monitoring device */
117	u8 smartdata[ATA_SECT_SIZE];	/* local buffer */
118	int (*get_temp)(struct drivetemp_data *st, u32 attr, long *val);
119	bool have_temp_lowest;		/* lowest temp in SCT status */
120	bool have_temp_highest;		/* highest temp in SCT status */
121	bool have_temp_min;		/* have min temp */
122	bool have_temp_max;		/* have max temp */
123	bool have_temp_lcrit;		/* have lower critical limit */
124	bool have_temp_crit;		/* have critical limit */
125	int temp_min;			/* min temp */
126	int temp_max;			/* max temp */
127	int temp_lcrit;			/* lower critical limit */
128	int temp_crit;			/* critical limit */
129};
130
131static LIST_HEAD(drivetemp_devlist);
132
133#define ATA_MAX_SMART_ATTRS	30
134#define SMART_TEMP_PROP_190	190
135#define SMART_TEMP_PROP_194	194
136
137#define SCT_STATUS_REQ_ADDR	0xe0
138#define  SCT_STATUS_VERSION_LOW		0	/* log byte offsets */
139#define  SCT_STATUS_VERSION_HIGH	1
140#define  SCT_STATUS_TEMP		200
141#define  SCT_STATUS_TEMP_LOWEST		201
142#define  SCT_STATUS_TEMP_HIGHEST	202
143#define SCT_READ_LOG_ADDR	0xe1
144#define  SMART_READ_LOG			0xd5
145#define  SMART_WRITE_LOG		0xd6
146
147#define INVALID_TEMP		0x80
148
149#define temp_is_valid(temp)	((temp) != INVALID_TEMP)
150#define temp_from_sct(temp)	(((s8)(temp)) * 1000)
151
152static inline bool ata_id_smart_supported(u16 *id)
153{
154	return id[ATA_ID_COMMAND_SET_1] & BIT(0);
155}
156
157static inline bool ata_id_smart_enabled(u16 *id)
158{
159	return id[ATA_ID_CFS_ENABLE_1] & BIT(0);
160}
161
162static int drivetemp_scsi_command(struct drivetemp_data *st,
163				 u8 ata_command, u8 feature,
164				 u8 lba_low, u8 lba_mid, u8 lba_high)
165{
166	u8 scsi_cmd[MAX_COMMAND_SIZE];
167	int data_dir;
168
169	memset(scsi_cmd, 0, sizeof(scsi_cmd));
170	scsi_cmd[0] = ATA_16;
171	if (ata_command == ATA_CMD_SMART && feature == SMART_WRITE_LOG) {
172		scsi_cmd[1] = (5 << 1);	/* PIO Data-out */
173		/*
174		 * No off.line or cc, write to dev, block count in sector count
175		 * field.
176		 */
177		scsi_cmd[2] = 0x06;
178		data_dir = DMA_TO_DEVICE;
179	} else {
180		scsi_cmd[1] = (4 << 1);	/* PIO Data-in */
181		/*
182		 * No off.line or cc, read from dev, block count in sector count
183		 * field.
184		 */
185		scsi_cmd[2] = 0x0e;
186		data_dir = DMA_FROM_DEVICE;
187	}
188	scsi_cmd[4] = feature;
189	scsi_cmd[6] = 1;	/* 1 sector */
190	scsi_cmd[8] = lba_low;
191	scsi_cmd[10] = lba_mid;
192	scsi_cmd[12] = lba_high;
193	scsi_cmd[14] = ata_command;
194
195	return scsi_execute_req(st->sdev, scsi_cmd, data_dir,
196				st->smartdata, ATA_SECT_SIZE, NULL, HZ, 5,
197				NULL);
198}
199
200static int drivetemp_ata_command(struct drivetemp_data *st, u8 feature,
201				 u8 select)
202{
203	return drivetemp_scsi_command(st, ATA_CMD_SMART, feature, select,
204				     ATA_SMART_LBAM_PASS, ATA_SMART_LBAH_PASS);
205}
206
207static int drivetemp_get_smarttemp(struct drivetemp_data *st, u32 attr,
208				  long *temp)
209{
210	u8 *buf = st->smartdata;
211	bool have_temp = false;
212	u8 temp_raw;
213	u8 csum;
214	int err;
215	int i;
216
217	err = drivetemp_ata_command(st, ATA_SMART_READ_VALUES, 0);
218	if (err)
219		return err;
220
221	/* Checksum the read value table */
222	csum = 0;
223	for (i = 0; i < ATA_SECT_SIZE; i++)
224		csum += buf[i];
225	if (csum) {
226		dev_dbg(&st->sdev->sdev_gendev,
227			"checksum error reading SMART values\n");
228		return -EIO;
229	}
230
231	for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) {
232		u8 *attr = buf + i * 12;
233		int id = attr[2];
234
235		if (!id)
236			continue;
237
238		if (id == SMART_TEMP_PROP_190) {
239			temp_raw = attr[7];
240			have_temp = true;
241		}
242		if (id == SMART_TEMP_PROP_194) {
243			temp_raw = attr[7];
244			have_temp = true;
245			break;
246		}
247	}
248
249	if (have_temp) {
250		*temp = temp_raw * 1000;
251		return 0;
252	}
253
254	return -ENXIO;
255}
256
257static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val)
258{
259	u8 *buf = st->smartdata;
260	int err;
261
262	err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
263	if (err)
264		return err;
265	switch (attr) {
266	case hwmon_temp_input:
267		if (!temp_is_valid(buf[SCT_STATUS_TEMP]))
268			return -ENODATA;
269		*val = temp_from_sct(buf[SCT_STATUS_TEMP]);
270		break;
271	case hwmon_temp_lowest:
272		if (!temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]))
273			return -ENODATA;
274		*val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]);
275		break;
276	case hwmon_temp_highest:
277		if (!temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]))
278			return -ENODATA;
279		*val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]);
280		break;
281	default:
282		err = -EINVAL;
283		break;
284	}
285	return err;
286}
287
288static const char * const sct_avoid_models[] = {
289/*
290 * These drives will have WRITE FPDMA QUEUED command timeouts and sometimes just
291 * freeze until power-cycled under heavy write loads when their temperature is
292 * getting polled in SCT mode. The SMART mode seems to be fine, though.
293 *
294 * While only the 3 TB model (DT01ACA3) was actually caught exhibiting the
295 * problem let's play safe here to avoid data corruption and ban the whole
296 * DT01ACAx family.
297
298 * The models from this array are prefix-matched.
299 */
300	"TOSHIBA DT01ACA",
301};
302
303static bool drivetemp_sct_avoid(struct drivetemp_data *st)
304{
305	struct scsi_device *sdev = st->sdev;
306	unsigned int ctr;
307
308	if (!sdev->model)
309		return false;
310
311	/*
312	 * The "model" field contains just the raw SCSI INQUIRY response
313	 * "product identification" field, which has a width of 16 bytes.
314	 * This field is space-filled, but is NOT NULL-terminated.
315	 */
316	for (ctr = 0; ctr < ARRAY_SIZE(sct_avoid_models); ctr++)
317		if (!strncmp(sdev->model, sct_avoid_models[ctr],
318			     strlen(sct_avoid_models[ctr])))
319			return true;
320
321	return false;
322}
323
324static int drivetemp_identify_sata(struct drivetemp_data *st)
325{
326	struct scsi_device *sdev = st->sdev;
327	u8 *buf = st->smartdata;
328	struct scsi_vpd *vpd;
329	bool is_ata, is_sata;
330	bool have_sct_data_table;
331	bool have_sct_temp;
332	bool have_smart;
333	bool have_sct;
334	u16 *ata_id;
335	u16 version;
336	long temp;
337	int err;
338
339	/* SCSI-ATA Translation present? */
340	rcu_read_lock();
341	vpd = rcu_dereference(sdev->vpd_pg89);
342
343	/*
344	 * Verify that ATA IDENTIFY DEVICE data is included in ATA Information
345	 * VPD and that the drive implements the SATA protocol.
346	 */
347	if (!vpd || vpd->len < 572 || vpd->data[56] != ATA_CMD_ID_ATA ||
348	    vpd->data[36] != 0x34) {
349		rcu_read_unlock();
350		return -ENODEV;
351	}
352	ata_id = (u16 *)&vpd->data[60];
353	is_ata = ata_id_is_ata(ata_id);
354	is_sata = ata_id_is_sata(ata_id);
355	have_sct = ata_id_sct_supported(ata_id);
356	have_sct_data_table = ata_id_sct_data_tables(ata_id);
357	have_smart = ata_id_smart_supported(ata_id) &&
358				ata_id_smart_enabled(ata_id);
359
360	rcu_read_unlock();
361
362	/* bail out if this is not a SATA device */
363	if (!is_ata || !is_sata)
364		return -ENODEV;
365
366	if (have_sct && drivetemp_sct_avoid(st)) {
367		dev_notice(&sdev->sdev_gendev,
368			   "will avoid using SCT for temperature monitoring\n");
369		have_sct = false;
370	}
371
372	if (!have_sct)
373		goto skip_sct;
374
375	err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR);
376	if (err)
377		goto skip_sct;
378
379	version = (buf[SCT_STATUS_VERSION_HIGH] << 8) |
380		  buf[SCT_STATUS_VERSION_LOW];
381	if (version != 2 && version != 3)
382		goto skip_sct;
383
384	have_sct_temp = temp_is_valid(buf[SCT_STATUS_TEMP]);
385	if (!have_sct_temp)
386		goto skip_sct;
387
388	st->have_temp_lowest = temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]);
389	st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]);
390
391	if (!have_sct_data_table)
392		goto skip_sct_data;
393
394	/* Request and read temperature history table */
395	memset(buf, '\0', sizeof(st->smartdata));
396	buf[0] = 5;	/* data table command */
397	buf[2] = 1;	/* read table */
398	buf[4] = 2;	/* temperature history table */
399
400	err = drivetemp_ata_command(st, SMART_WRITE_LOG, SCT_STATUS_REQ_ADDR);
401	if (err)
402		goto skip_sct_data;
403
404	err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_READ_LOG_ADDR);
405	if (err)
406		goto skip_sct_data;
407
408	/*
409	 * Temperature limits per AT Attachment 8 -
410	 * ATA/ATAPI Command Set (ATA8-ACS)
411	 */
412	st->have_temp_max = temp_is_valid(buf[6]);
413	st->have_temp_crit = temp_is_valid(buf[7]);
414	st->have_temp_min = temp_is_valid(buf[8]);
415	st->have_temp_lcrit = temp_is_valid(buf[9]);
416
417	st->temp_max = temp_from_sct(buf[6]);
418	st->temp_crit = temp_from_sct(buf[7]);
419	st->temp_min = temp_from_sct(buf[8]);
420	st->temp_lcrit = temp_from_sct(buf[9]);
421
422skip_sct_data:
423	if (have_sct_temp) {
424		st->get_temp = drivetemp_get_scttemp;
425		return 0;
426	}
427skip_sct:
428	if (!have_smart)
429		return -ENODEV;
430	st->get_temp = drivetemp_get_smarttemp;
431	return drivetemp_get_smarttemp(st, hwmon_temp_input, &temp);
432}
433
434static int drivetemp_identify(struct drivetemp_data *st)
435{
436	struct scsi_device *sdev = st->sdev;
437
438	/* Bail out immediately if there is no inquiry data */
439	if (!sdev->inquiry || sdev->inquiry_len < 16)
440		return -ENODEV;
441
442	/* Disk device? */
443	if (sdev->type != TYPE_DISK && sdev->type != TYPE_ZBC)
444		return -ENODEV;
445
446	return drivetemp_identify_sata(st);
447}
448
449static int drivetemp_read(struct device *dev, enum hwmon_sensor_types type,
450			 u32 attr, int channel, long *val)
451{
452	struct drivetemp_data *st = dev_get_drvdata(dev);
453	int err = 0;
454
455	if (type != hwmon_temp)
456		return -EINVAL;
457
458	switch (attr) {
459	case hwmon_temp_input:
460	case hwmon_temp_lowest:
461	case hwmon_temp_highest:
462		mutex_lock(&st->lock);
463		err = st->get_temp(st, attr, val);
464		mutex_unlock(&st->lock);
465		break;
466	case hwmon_temp_lcrit:
467		*val = st->temp_lcrit;
468		break;
469	case hwmon_temp_min:
470		*val = st->temp_min;
471		break;
472	case hwmon_temp_max:
473		*val = st->temp_max;
474		break;
475	case hwmon_temp_crit:
476		*val = st->temp_crit;
477		break;
478	default:
479		err = -EINVAL;
480		break;
481	}
482	return err;
483}
484
485static umode_t drivetemp_is_visible(const void *data,
486				   enum hwmon_sensor_types type,
487				   u32 attr, int channel)
488{
489	const struct drivetemp_data *st = data;
490
491	switch (type) {
492	case hwmon_temp:
493		switch (attr) {
494		case hwmon_temp_input:
495			return 0444;
496		case hwmon_temp_lowest:
497			if (st->have_temp_lowest)
498				return 0444;
499			break;
500		case hwmon_temp_highest:
501			if (st->have_temp_highest)
502				return 0444;
503			break;
504		case hwmon_temp_min:
505			if (st->have_temp_min)
506				return 0444;
507			break;
508		case hwmon_temp_max:
509			if (st->have_temp_max)
510				return 0444;
511			break;
512		case hwmon_temp_lcrit:
513			if (st->have_temp_lcrit)
514				return 0444;
515			break;
516		case hwmon_temp_crit:
517			if (st->have_temp_crit)
518				return 0444;
519			break;
520		default:
521			break;
522		}
523		break;
524	default:
525		break;
526	}
527	return 0;
528}
529
530static const struct hwmon_channel_info *drivetemp_info[] = {
531	HWMON_CHANNEL_INFO(chip,
532			   HWMON_C_REGISTER_TZ),
533	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT |
534			   HWMON_T_LOWEST | HWMON_T_HIGHEST |
535			   HWMON_T_MIN | HWMON_T_MAX |
536			   HWMON_T_LCRIT | HWMON_T_CRIT),
537	NULL
538};
539
540static const struct hwmon_ops drivetemp_ops = {
541	.is_visible = drivetemp_is_visible,
542	.read = drivetemp_read,
543};
544
545static const struct hwmon_chip_info drivetemp_chip_info = {
546	.ops = &drivetemp_ops,
547	.info = drivetemp_info,
548};
549
550/*
551 * The device argument points to sdev->sdev_dev. Its parent is
552 * sdev->sdev_gendev, which we can use to get the scsi_device pointer.
553 */
554static int drivetemp_add(struct device *dev, struct class_interface *intf)
555{
556	struct scsi_device *sdev = to_scsi_device(dev->parent);
557	struct drivetemp_data *st;
558	int err;
559
560	st = kzalloc(sizeof(*st), GFP_KERNEL);
561	if (!st)
562		return -ENOMEM;
563
564	st->sdev = sdev;
565	st->dev = dev;
566	mutex_init(&st->lock);
567
568	if (drivetemp_identify(st)) {
569		err = -ENODEV;
570		goto abort;
571	}
572
573	st->hwdev = hwmon_device_register_with_info(dev->parent, "drivetemp",
574						    st, &drivetemp_chip_info,
575						    NULL);
576	if (IS_ERR(st->hwdev)) {
577		err = PTR_ERR(st->hwdev);
578		goto abort;
579	}
580
581	list_add(&st->list, &drivetemp_devlist);
582	return 0;
583
584abort:
585	kfree(st);
586	return err;
587}
588
589static void drivetemp_remove(struct device *dev, struct class_interface *intf)
590{
591	struct drivetemp_data *st, *tmp;
592
593	list_for_each_entry_safe(st, tmp, &drivetemp_devlist, list) {
594		if (st->dev == dev) {
595			list_del(&st->list);
596			hwmon_device_unregister(st->hwdev);
597			kfree(st);
598			break;
599		}
600	}
601}
602
603static struct class_interface drivetemp_interface = {
604	.add_dev = drivetemp_add,
605	.remove_dev = drivetemp_remove,
606};
607
608static int __init drivetemp_init(void)
609{
610	return scsi_register_interface(&drivetemp_interface);
611}
612
613static void __exit drivetemp_exit(void)
614{
615	scsi_unregister_interface(&drivetemp_interface);
616}
617
618module_init(drivetemp_init);
619module_exit(drivetemp_exit);
620
621MODULE_AUTHOR("Guenter Roeck <linus@roeck-us.net>");
622MODULE_DESCRIPTION("Hard drive temperature monitor");
623MODULE_LICENSE("GPL");
624MODULE_ALIAS("platform:drivetemp");