Print this page
8074 need to add FMA event for SSD wearout


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.

  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 /*
  29  * Disk error transport module
  30  *
  31  * This transport module is responsible for translating between disk errors
  32  * and FMA ereports.  It is a read-only transport module, and checks for the
  33  * following failures:
  34  *
  35  *      - overtemp
  36  *      - predictive failure
  37  *      - self-test failure

  38  *
  39  * These failures are detected via the TOPO_METH_DISK_STATUS method, which
  40  * leverages libdiskstatus to do the actual analysis.  This transport module is
  41  * in charge of the following tasks:
  42  *
  43  *      - discovering available devices
  44  *      - periodically checking devices
  45  *      - managing device addition/removal
  46  */
  47 
  48 #include <ctype.h>
  49 #include <fm/fmd_api.h>
  50 #include <fm/libdiskstatus.h>
  51 #include <fm/libtopo.h>
  52 #include <fm/topo_hc.h>
  53 #include <fm/topo_mod.h>
  54 #include <limits.h>
  55 #include <string.h>
  56 #include <sys/fm/io/scsi.h>
  57 #include <sys/fm/protocol.h>


  96                         nvlist_free(nvl);
  97                         dt_stats.dropped.fmds_value.ui64++;
  98                 }
  99         } else {
 100                 dt_stats.dropped.fmds_value.ui64++;
 101         }
 102 }
 103 
 104 /*
 105  * Check a single topo node for failure.  This simply invokes the disk status
 106  * method, and generates any ereports as necessary.
 107  */
 108 static int
 109 dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
 110 {
 111         nvlist_t *result;
 112         nvlist_t *fmri, *faults;
 113         char *protocol;
 114         int err;
 115         disk_monitor_t *dmp = arg;
 116         uint64_t ena;
 117         nvpair_t *elem;
 118         boolean_t fault;
 119         nvlist_t *details;
 120         char *fmristr;
 121         nvlist_t *in = NULL;
 122 
 123         if (topo_node_resource(node, &fmri, &err) != 0) {
 124                 fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
 125                     topo_strerror(err));
 126                 return (TOPO_WALK_ERR);
 127         }
 128 
 129         if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
 130                 nvlist_free(fmri);
 131                 return (TOPO_WALK_ERR);
 132         }
 133 
 134         if (dmp->dm_sim_search) {
 135                 fmristr = NULL;
 136                 if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
 137                     strstr(fmristr, dmp->dm_sim_search) != 0)
 138                         (void) nvlist_add_string(in, "path", dmp->dm_sim_file);
 139                 topo_hdl_strfree(thp, fmristr);
 140         }
 141 
 142         /*
 143          * Try to invoke the method.  If this fails (most likely because the
 144          * method is not supported), then ignore this node.
 145          */
 146         if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
 147             TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
 148                 nvlist_free(fmri);
 149                 nvlist_free(in);
 150                 return (TOPO_WALK_NEXT);
 151         }
 152 
 153         nvlist_free(in);
 154 
 155         ena = fmd_event_ena_create(dmp->dm_hdl);
 156 
 157         /*
 158          * Add any faults.
 159          */
 160         if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
 161             nvlist_lookup_string(result, "protocol", &protocol) == 0) {
 162                 elem = NULL;
 163                 while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
 164                         if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
 165                                 continue;
 166 
 167                         (void) nvpair_value_boolean_value(elem, &fault);
 168                         if (!fault ||
 169                             nvlist_lookup_nvlist(result, nvpair_name(elem),
 170                             &details) != 0)
 171                                 continue;
 172 






 173                         dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
 174                             nvpair_name(elem), ena, fmri, details);

 175                 }
 176         }
 177 
 178         nvlist_free(result);
 179         nvlist_free(fmri);
 180 
 181         return (TOPO_WALK_NEXT);
 182 }
 183 
 184 /*
 185  * Periodic timeout.  Iterates over all hc:// topo nodes, calling
 186  * dt_analyze_disk() for each one.
 187  */
 188 /*ARGSUSED*/
 189 static void
 190 dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
 191 {
 192         topo_hdl_t *thp;
 193         topo_walk_t *twp;
 194         int err;


 231  */
 232 /*ARGSUSED*/
 233 static void
 234 dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
 235 {
 236         disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
 237 
 238         if (dmp->dm_timer_istopo)
 239                 return;
 240 
 241         fmd_timer_remove(hdl, dmp->dm_timer);
 242         dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
 243             fmd_prop_get_int64(hdl, "min-interval"));
 244         dmp->dm_timer_istopo = B_TRUE;
 245 }
 246 
 247 static const fmd_prop_t fmd_props[] = {
 248         { "interval", FMD_TYPE_TIME, "1h" },
 249         { "min-interval", FMD_TYPE_TIME, "1min" },
 250         { "simulate", FMD_TYPE_STRING, "" },

 251         { NULL, 0, NULL }
 252 };
 253 
 254 static const fmd_hdl_ops_t fmd_ops = {
 255         NULL,                   /* fmdo_recv */
 256         dt_timeout,             /* fmdo_timeout */
 257         NULL,                   /* fmdo_close */
 258         NULL,                   /* fmdo_stats */
 259         NULL,                   /* fmdo_gc */
 260         NULL,                   /* fmdo_send */
 261         dt_topo_change,         /* fmdo_topo_change */
 262 };
 263 
 264 static const fmd_hdl_info_t fmd_info = {
 265         "Disk Transport Agent", "1.0", &fmd_ops, fmd_props
 266 };
 267 
 268 void
 269 _fmd_init(fmd_hdl_t *hdl)
 270 {
 271         disk_monitor_t *dmp;
 272         char *simulate;
 273 
 274         if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
 275                 return;
 276 
 277         (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
 278             sizeof (dt_stats) / sizeof (fmd_stat_t),
 279             (fmd_stat_t *)&dt_stats);
 280 
 281         dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
 282         fmd_hdl_setspecific(hdl, dmp);
 283 
 284         dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
 285         dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  25  */
  26 


  27 /*
  28  * Disk error transport module
  29  *
  30  * This transport module is responsible for translating between disk errors
  31  * and FMA ereports.  It is a read-only transport module, and checks for the
  32  * following failures:
  33  *
  34  *      - overtemp
  35  *      - predictive failure
  36  *      - self-test failure
  37  *      - solid state media wearout
  38  *
  39  * These failures are detected via the TOPO_METH_DISK_STATUS method, which
  40  * leverages libdiskstatus to do the actual analysis.  This transport module is
  41  * in charge of the following tasks:
  42  *
  43  *      - discovering available devices
  44  *      - periodically checking devices
  45  *      - managing device addition/removal
  46  */
  47 
  48 #include <ctype.h>
  49 #include <fm/fmd_api.h>
  50 #include <fm/libdiskstatus.h>
  51 #include <fm/libtopo.h>
  52 #include <fm/topo_hc.h>
  53 #include <fm/topo_mod.h>
  54 #include <limits.h>
  55 #include <string.h>
  56 #include <sys/fm/io/scsi.h>
  57 #include <sys/fm/protocol.h>


  96                         nvlist_free(nvl);
  97                         dt_stats.dropped.fmds_value.ui64++;
  98                 }
  99         } else {
 100                 dt_stats.dropped.fmds_value.ui64++;
 101         }
 102 }
 103 
 104 /*
 105  * Check a single topo node for failure.  This simply invokes the disk status
 106  * method, and generates any ereports as necessary.
 107  */
 108 static int
 109 dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
 110 {
 111         nvlist_t *result;
 112         nvlist_t *fmri, *faults;
 113         char *protocol;
 114         int err;
 115         disk_monitor_t *dmp = arg;

 116         nvpair_t *elem;
 117         boolean_t fault;
 118         nvlist_t *details;
 119         char *fmristr;
 120         nvlist_t *in = NULL;
 121 
 122         if (topo_node_resource(node, &fmri, &err) != 0) {
 123                 fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
 124                     topo_strerror(err));
 125                 return (TOPO_WALK_ERR);
 126         }
 127 
 128         if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
 129                 nvlist_free(fmri);
 130                 return (TOPO_WALK_ERR);
 131         }
 132 
 133         if (dmp->dm_sim_search) {
 134                 fmristr = NULL;
 135                 if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
 136                     strstr(fmristr, dmp->dm_sim_search) != 0)
 137                         (void) nvlist_add_string(in, "path", dmp->dm_sim_file);
 138                 topo_hdl_strfree(thp, fmristr);
 139         }
 140 
 141         /*
 142          * Try to invoke the method.  If this fails (most likely because the
 143          * method is not supported), then ignore this node.
 144          */
 145         if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
 146             TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
 147                 nvlist_free(fmri);
 148                 nvlist_free(in);
 149                 return (TOPO_WALK_NEXT);
 150         }
 151 
 152         nvlist_free(in);
 153 


 154         /*
 155          * Check for faults and post ereport(s) if needed
 156          */
 157         if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
 158             nvlist_lookup_string(result, "protocol", &protocol) == 0) {
 159                 elem = NULL;
 160                 while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
 161                         if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
 162                                 continue;
 163 
 164                         (void) nvpair_value_boolean_value(elem, &fault);
 165                         if (!fault ||
 166                             nvlist_lookup_nvlist(result, nvpair_name(elem),
 167                             &details) != 0)
 168                                 continue;
 169 
 170                         if (strcmp(nvpair_name(elem),
 171                             FM_EREPORT_SCSI_SSMWEAROUT) == 0 &&
 172                             fmd_prop_get_int32(dmp->dm_hdl,
 173                             "ignore-ssm-wearout") == FMD_B_TRUE)
 174                                 continue;
 175 
 176                         dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
 177                             nvpair_name(elem),
 178                             fmd_event_ena_create(dmp->dm_hdl), fmri, details);
 179                 }
 180         }
 181 
 182         nvlist_free(result);
 183         nvlist_free(fmri);
 184 
 185         return (TOPO_WALK_NEXT);
 186 }
 187 
 188 /*
 189  * Periodic timeout.  Iterates over all hc:// topo nodes, calling
 190  * dt_analyze_disk() for each one.
 191  */
 192 /*ARGSUSED*/
 193 static void
 194 dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
 195 {
 196         topo_hdl_t *thp;
 197         topo_walk_t *twp;
 198         int err;


 235  */
 236 /*ARGSUSED*/
 237 static void
 238 dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
 239 {
 240         disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
 241 
 242         if (dmp->dm_timer_istopo)
 243                 return;
 244 
 245         fmd_timer_remove(hdl, dmp->dm_timer);
 246         dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
 247             fmd_prop_get_int64(hdl, "min-interval"));
 248         dmp->dm_timer_istopo = B_TRUE;
 249 }
 250 
 251 static const fmd_prop_t fmd_props[] = {
 252         { "interval", FMD_TYPE_TIME, "1h" },
 253         { "min-interval", FMD_TYPE_TIME, "1min" },
 254         { "simulate", FMD_TYPE_STRING, "" },
 255         { "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"},
 256         { NULL, 0, NULL }
 257 };
 258 
 259 static const fmd_hdl_ops_t fmd_ops = {
 260         NULL,                   /* fmdo_recv */
 261         dt_timeout,             /* fmdo_timeout */
 262         NULL,                   /* fmdo_close */
 263         NULL,                   /* fmdo_stats */
 264         NULL,                   /* fmdo_gc */
 265         NULL,                   /* fmdo_send */
 266         dt_topo_change,         /* fmdo_topo_change */
 267 };
 268 
 269 static const fmd_hdl_info_t fmd_info = {
 270         "Disk Transport Agent", "1.1", &fmd_ops, fmd_props
 271 };
 272 
 273 void
 274 _fmd_init(fmd_hdl_t *hdl)
 275 {
 276         disk_monitor_t *dmp;
 277         char *simulate;
 278 
 279         if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
 280                 return;
 281 
 282         (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
 283             sizeof (dt_stats) / sizeof (fmd_stat_t),
 284             (fmd_stat_t *)&dt_stats);
 285 
 286         dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
 287         fmd_hdl_setspecific(hdl, dmp);
 288 
 289         dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
 290         dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");