1# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging, os, re
6from autotest_lib.client.bin import test, utils
7from autotest_lib.client.common_lib import error
8
9
10class hardware_StorageWearoutDetect(test.test):
11    """
12    Check wear out status for storage device available in SMART for SSD and
13    in ext_csd for eMMC version 5.0 or later. For previous version of eMMC,
14    it will be treat as data not available.
15
16    The test will be failed if:
17    - At least one SMART variable has value under its threshold
18      or
19    - Percentage Used reported by SMART for NVMe or SATA is above 90
20      or
21    - eMMC wear out status variable is in 90-100% band or higher (
22      DEVICE_LIFE_TIME_EST_TYP_A). Seeing this consistently means the lab
23      device may have to be replaced.
24    """
25
26    version = 1
27    STORAGE_INFO_PATH = '/var/log/storage_info.txt'
28    STORAGE_INFO_COMMON_PATH = '/usr/share/misc/storage-info-common.sh'
29
30    # Example     "SATA Version is: SATA 3.1, 6.0 Gb/s (current: 6.0 Gb/s)"
31    SATA_DETECT = r"SATA Version is:.*"
32
33    # Example     "   Extended CSD rev 1.7 (MMC 5.0)"
34    MMC_DETECT = r"\s*Extended CSD rev.*MMC (?P<version>\d+.\d+)"
35
36    # Example     "SMART/Health Information (NVMe Log 0x02, NSID 0xffffffff)"
37    NVME_DETECT = r".*NVMe Log .*"
38
39    # Field meaning and example line that have failing attribute
40    # ID# ATTRIBUTE_NAME          FLAGS    VALUE WORST THRESH FAIL RAW_VALUE
41    # 184 End-to-End_Error        PO--CK   001   001   097    NOW  135
42    SSD_FAIL = r"""\s*(?P<param>\S+\s\S+)      # ID and attribute name
43                   \s+[P-][O-][S-][R-][C-][K-] # flags
44                   (\s+\d{3}){3}               # three 3-digits numbers
45                   \s+NOW                      # fail indicator"""
46
47    # We want to detect and fail if we see a non-zero value for either
48    # attribute 160 Uncorrectable_Error_Cnt or attribute 187 Reported_Uncorrect
49    # ID# ATTRIBUTE_NAME          FLAGS    VALUE WORST THRESH FAIL RAW_VALUE
50    # 160 Uncorrectable_Error_Cnt -----    100   100   100     -   10
51    SATA_FAIL = r"""\s*(?P<param>(160\s+Uncorrectable_Error_Cnt|
52                    187\s+Reported_Uncorrect))
53                    \s+[P-][O-][S-][R-][C-][K-]
54                    (\s+\d{1,3}){3}
55                    \s+(NOW|[-])
56                    \s+[1-9][0-9]*"""
57
58    # Ex "Pre EOL information [PRE_EOL_INFO: 0x02]"
59    # 0x02 means Warning, consumed 80% of reserved blocks
60    # 0x03 means Urgent
61    MMC_FAIL = r".*(?P<param>PRE_EOL_INFO]?: 0x0[23])"
62
63    # Ex Available Spare:                    100%
64    # We want to fail when the available spare is below the
65    # available spare threshold.
66    NVME_SPARE = r"Available Spare:\s+(?P<param>\d{1,3})%"
67
68    #Available Spare Threshold:          10%
69    NVME_THRESH = r"Available Spare Threshold:\s+(?P<param>\d{1,3})%"
70
71    def run_once(self, use_cached_result=True):
72        """
73        Run the test
74
75        @param use_cached_result: Use the result that generated when machine
76                                  booted or generate new one.
77        """
78
79        if not use_cached_result:
80            if not os.path.exists(self.STORAGE_INFO_COMMON_PATH):
81                msg = str('Test failed with error: %s not exist'
82                          % self.STORAGE_INFO_COMMON_PATH)
83                raise error.TestFail(msg)
84            cmd = ' '.join(['. %s;' % (self.STORAGE_INFO_COMMON_PATH, ),
85                            'get_storage_info'])
86            utils.run(cmd, stdout_tee=open(self.STORAGE_INFO_PATH, 'w'),
87                      stderr_tee=utils.TEE_TO_LOGS)
88
89        # Check that storage_info file exist.
90        if not os.path.exists(self.STORAGE_INFO_PATH):
91            msg = str('Test failed with error: %s not exist'
92                      % self.STORAGE_INFO_PATH)
93            raise error.TestFail(msg)
94
95        mmc_detect = False
96        sata_detect = False
97        legacy_mmc = False
98        nvme_detect = False
99        fail_msg = ''
100
101        with open(self.STORAGE_INFO_PATH) as f:
102            for line in f:
103                m = re.match(self.SATA_DETECT, line)
104                if m:
105                    sata_detect = True
106                    logging.info('Found SATA device')
107
108                m = re.match(self.MMC_DETECT, line)
109                if m:
110                    version = m.group('version')
111                    if float(version) < 5.0:
112                        legacy_mmc = True
113                    mmc_detect = True
114                    logging.info('Found eMMC version %s', version)
115
116                m = re.match(self.NVME_DETECT, line)
117                if m:
118                    nvme_detect = True
119                    logging.info('Found NVMe device')
120
121                m = re.match(self.SSD_FAIL, line, re.X)
122                if m:
123                    param = m.group('param')
124                    fail_msg += 'SSD failure ' + param
125
126                m = re.match(self.MMC_FAIL, line)
127                if m:
128                    param = m.group('param')
129                    fail_msg += 'MMC failure ' + param
130
131                m = re.match(self.SATA_FAIL, line, re.X)
132                if m:
133                    param = m.group('param')
134                    fail_msg += 'SATA failure, attribute ' + param
135
136                m = re.match(self.NVME_SPARE, line)
137                if m:
138                    # Check the next line for the available spare threshold.
139                    # Fail if available spare is below the threshold.
140                    spare = m.group('param')
141                    nextLine = next(f)
142                    nm = re.match(self.NVME_THRESH, nextLine)
143                    if nm:
144                        thresh = nm.group('param')
145                        if int(spare) < int(thresh):
146                            fail_msg += 'NVMe failure, Available Spare ' + \
147                                        spare + '% below threshold ' + \
148                                        thresh + '%'
149
150        if not sata_detect and not mmc_detect and not nvme_detect:
151            raise error.TestFail('Can not detect storage device.')
152
153        if fail_msg:
154            msg = 'Detected wearout parameter:%s' % fail_msg
155            raise error.TestFail(msg)
156
157        if legacy_mmc:
158            msg = 'eMMC version %s detected. ' % version
159            msg += 'Wearout attributes are supported in eMMC 5.0 and later.'
160            logging.info(msg)
161