Print this page
8074 need to add FMA event for SSD wearout

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c
          +++ new/usr/src/cmd/fm/modules/common/disk-transport/disk_transport.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  24   25   */
  25   26  
  26      -#pragma ident   "%Z%%M% %I%     %E% SMI"
  27      -
  28   27  /*
  29   28   * Disk error transport module
  30   29   *
  31   30   * This transport module is responsible for translating between disk errors
  32   31   * and FMA ereports.  It is a read-only transport module, and checks for the
  33   32   * following failures:
  34   33   *
  35      - *      - overtemp
  36      - *      - predictive failure
  37      - *      - self-test failure
       34 + *      - overtemp
       35 + *      - predictive failure
       36 + *      - self-test failure
       37 + *      - solid state media wearout
  38   38   *
  39   39   * These failures are detected via the TOPO_METH_DISK_STATUS method, which
  40   40   * leverages libdiskstatus to do the actual analysis.  This transport module is
  41   41   * in charge of the following tasks:
  42   42   *
  43      - *      - discovering available devices
  44      - *      - periodically checking devices
  45      - *      - managing device addition/removal
       43 + *      - discovering available devices
       44 + *      - periodically checking devices
       45 + *      - managing device addition/removal
  46   46   */
  47   47  
  48   48  #include <ctype.h>
  49   49  #include <fm/fmd_api.h>
  50   50  #include <fm/libdiskstatus.h>
  51   51  #include <fm/libtopo.h>
  52   52  #include <fm/topo_hc.h>
  53   53  #include <fm/topo_mod.h>
  54   54  #include <limits.h>
  55   55  #include <string.h>
↓ open down ↓ 50 lines elided ↑ open up ↑
 106  106   * method, and generates any ereports as necessary.
 107  107   */
 108  108  static int
 109  109  dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
 110  110  {
 111  111          nvlist_t *result;
 112  112          nvlist_t *fmri, *faults;
 113  113          char *protocol;
 114  114          int err;
 115  115          disk_monitor_t *dmp = arg;
 116      -        uint64_t ena;
 117  116          nvpair_t *elem;
 118  117          boolean_t fault;
 119  118          nvlist_t *details;
 120  119          char *fmristr;
 121  120          nvlist_t *in = NULL;
 122  121  
 123  122          if (topo_node_resource(node, &fmri, &err) != 0) {
 124  123                  fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
 125  124                      topo_strerror(err));
 126  125                  return (TOPO_WALK_ERR);
↓ open down ↓ 18 lines elided ↑ open up ↑
 145  144           */
 146  145          if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
 147  146              TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
 148  147                  nvlist_free(fmri);
 149  148                  nvlist_free(in);
 150  149                  return (TOPO_WALK_NEXT);
 151  150          }
 152  151  
 153  152          nvlist_free(in);
 154  153  
 155      -        ena = fmd_event_ena_create(dmp->dm_hdl);
 156      -
 157  154          /*
 158      -         * Add any faults.
      155 +         * Check for faults and post ereport(s) if needed
 159  156           */
 160  157          if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
 161  158              nvlist_lookup_string(result, "protocol", &protocol) == 0) {
 162  159                  elem = NULL;
 163  160                  while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
 164  161                          if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
 165  162                                  continue;
 166  163  
 167  164                          (void) nvpair_value_boolean_value(elem, &fault);
 168  165                          if (!fault ||
 169  166                              nvlist_lookup_nvlist(result, nvpair_name(elem),
 170  167                              &details) != 0)
 171  168                                  continue;
 172  169  
      170 +                        if (strcmp(nvpair_name(elem),
      171 +                            FM_EREPORT_SCSI_SSMWEAROUT) == 0 &&
      172 +                            fmd_prop_get_int32(dmp->dm_hdl,
      173 +                            "ignore-ssm-wearout") == FMD_B_TRUE)
      174 +                                continue;
      175 +
 173  176                          dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
 174      -                            nvpair_name(elem), ena, fmri, details);
      177 +                            nvpair_name(elem),
      178 +                            fmd_event_ena_create(dmp->dm_hdl), fmri, details);
 175  179                  }
 176  180          }
 177  181  
 178  182          nvlist_free(result);
 179  183          nvlist_free(fmri);
 180  184  
 181  185          return (TOPO_WALK_NEXT);
 182  186  }
 183  187  
 184  188  /*
↓ open down ↓ 56 lines elided ↑ open up ↑
 241  245          fmd_timer_remove(hdl, dmp->dm_timer);
 242  246          dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
 243  247              fmd_prop_get_int64(hdl, "min-interval"));
 244  248          dmp->dm_timer_istopo = B_TRUE;
 245  249  }
 246  250  
 247  251  static const fmd_prop_t fmd_props[] = {
 248  252          { "interval", FMD_TYPE_TIME, "1h" },
 249  253          { "min-interval", FMD_TYPE_TIME, "1min" },
 250  254          { "simulate", FMD_TYPE_STRING, "" },
      255 +        { "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"},
 251  256          { NULL, 0, NULL }
 252  257  };
 253  258  
 254  259  static const fmd_hdl_ops_t fmd_ops = {
 255  260          NULL,                   /* fmdo_recv */
 256  261          dt_timeout,             /* fmdo_timeout */
 257  262          NULL,                   /* fmdo_close */
 258  263          NULL,                   /* fmdo_stats */
 259  264          NULL,                   /* fmdo_gc */
 260  265          NULL,                   /* fmdo_send */
 261  266          dt_topo_change,         /* fmdo_topo_change */
 262  267  };
 263  268  
 264  269  static const fmd_hdl_info_t fmd_info = {
 265      -        "Disk Transport Agent", "1.0", &fmd_ops, fmd_props
      270 +        "Disk Transport Agent", "1.1", &fmd_ops, fmd_props
 266  271  };
 267  272  
 268  273  void
 269  274  _fmd_init(fmd_hdl_t *hdl)
 270  275  {
 271  276          disk_monitor_t *dmp;
 272  277          char *simulate;
 273  278  
 274  279          if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
 275  280                  return;
↓ open down ↓ 6 lines elided ↑ open up ↑
 282  287          fmd_hdl_setspecific(hdl, dmp);
 283  288  
 284  289          dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
 285  290          dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
 286  291  
 287  292          /*
 288  293           * Determine if we have the simulate property set.  This property allows
 289  294           * the developer to substitute a faulty device based off all or part of
 290  295           * an FMRI string.  For example, one could do:
 291  296           *
 292      -         *      setprop simulate "bay=4/disk=4  /path/to/sim.so"
      297 +         *      setprop simulate "bay=4/disk=4  /path/to/sim.so"
 293  298           *
 294  299           * When the transport module encounters an FMRI containing the given
 295  300           * string, then it will open the simulator file instead of the
 296  301           * corresponding device.  This can be any file, but is intended to be a
 297  302           * libdiskstatus simulator shared object, capable of faking up SCSI
 298  303           * responses.
 299  304           *
 300  305           * The property consists of two strings, an FMRI fragment and an
 301  306           * absolute path, separated by whitespace.
 302  307           */
↓ open down ↓ 53 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX