4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 /*
29 * Disk error transport module
30 *
31 * This transport module is responsible for translating between disk errors
32 * and FMA ereports. It is a read-only transport module, and checks for the
33 * following failures:
34 *
35 * - overtemp
36 * - predictive failure
37 * - self-test failure
38 *
39 * These failures are detected via the TOPO_METH_DISK_STATUS method, which
40 * leverages libdiskstatus to do the actual analysis. This transport module is
41 * in charge of the following tasks:
42 *
43 * - discovering available devices
44 * - periodically checking devices
45 * - managing device addition/removal
46 */
47
48 #include <ctype.h>
49 #include <fm/fmd_api.h>
50 #include <fm/libdiskstatus.h>
51 #include <fm/libtopo.h>
52 #include <fm/topo_hc.h>
53 #include <fm/topo_mod.h>
54 #include <limits.h>
55 #include <string.h>
56 #include <sys/fm/io/scsi.h>
57 #include <sys/fm/protocol.h>
96 nvlist_free(nvl);
97 dt_stats.dropped.fmds_value.ui64++;
98 }
99 } else {
100 dt_stats.dropped.fmds_value.ui64++;
101 }
102 }
103
104 /*
105 * Check a single topo node for failure. This simply invokes the disk status
106 * method, and generates any ereports as necessary.
107 */
108 static int
109 dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
110 {
111 nvlist_t *result;
112 nvlist_t *fmri, *faults;
113 char *protocol;
114 int err;
115 disk_monitor_t *dmp = arg;
116 uint64_t ena;
117 nvpair_t *elem;
118 boolean_t fault;
119 nvlist_t *details;
120 char *fmristr;
121 nvlist_t *in = NULL;
122
123 if (topo_node_resource(node, &fmri, &err) != 0) {
124 fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
125 topo_strerror(err));
126 return (TOPO_WALK_ERR);
127 }
128
129 if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
130 nvlist_free(fmri);
131 return (TOPO_WALK_ERR);
132 }
133
134 if (dmp->dm_sim_search) {
135 fmristr = NULL;
136 if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
137 strstr(fmristr, dmp->dm_sim_search) != 0)
138 (void) nvlist_add_string(in, "path", dmp->dm_sim_file);
139 topo_hdl_strfree(thp, fmristr);
140 }
141
142 /*
143 * Try to invoke the method. If this fails (most likely because the
144 * method is not supported), then ignore this node.
145 */
146 if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
147 TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
148 nvlist_free(fmri);
149 nvlist_free(in);
150 return (TOPO_WALK_NEXT);
151 }
152
153 nvlist_free(in);
154
155 ena = fmd_event_ena_create(dmp->dm_hdl);
156
157 /*
158 * Add any faults.
159 */
160 if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
161 nvlist_lookup_string(result, "protocol", &protocol) == 0) {
162 elem = NULL;
163 while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
164 if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
165 continue;
166
167 (void) nvpair_value_boolean_value(elem, &fault);
168 if (!fault ||
169 nvlist_lookup_nvlist(result, nvpair_name(elem),
170 &details) != 0)
171 continue;
172
173 dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
174 nvpair_name(elem), ena, fmri, details);
175 }
176 }
177
178 nvlist_free(result);
179 nvlist_free(fmri);
180
181 return (TOPO_WALK_NEXT);
182 }
183
184 /*
185 * Periodic timeout. Iterates over all hc:// topo nodes, calling
186 * dt_analyze_disk() for each one.
187 */
188 /*ARGSUSED*/
189 static void
190 dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
191 {
192 topo_hdl_t *thp;
193 topo_walk_t *twp;
194 int err;
231 */
232 /*ARGSUSED*/
233 static void
234 dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
235 {
236 disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
237
238 if (dmp->dm_timer_istopo)
239 return;
240
241 fmd_timer_remove(hdl, dmp->dm_timer);
242 dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
243 fmd_prop_get_int64(hdl, "min-interval"));
244 dmp->dm_timer_istopo = B_TRUE;
245 }
246
247 static const fmd_prop_t fmd_props[] = {
248 { "interval", FMD_TYPE_TIME, "1h" },
249 { "min-interval", FMD_TYPE_TIME, "1min" },
250 { "simulate", FMD_TYPE_STRING, "" },
251 { NULL, 0, NULL }
252 };
253
254 static const fmd_hdl_ops_t fmd_ops = {
255 NULL, /* fmdo_recv */
256 dt_timeout, /* fmdo_timeout */
257 NULL, /* fmdo_close */
258 NULL, /* fmdo_stats */
259 NULL, /* fmdo_gc */
260 NULL, /* fmdo_send */
261 dt_topo_change, /* fmdo_topo_change */
262 };
263
264 static const fmd_hdl_info_t fmd_info = {
265 "Disk Transport Agent", "1.0", &fmd_ops, fmd_props
266 };
267
268 void
269 _fmd_init(fmd_hdl_t *hdl)
270 {
271 disk_monitor_t *dmp;
272 char *simulate;
273
274 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
275 return;
276
277 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
278 sizeof (dt_stats) / sizeof (fmd_stat_t),
279 (fmd_stat_t *)&dt_stats);
280
281 dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
282 fmd_hdl_setspecific(hdl, dmp);
283
284 dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
285 dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /*
28 * Disk error transport module
29 *
30 * This transport module is responsible for translating between disk errors
31 * and FMA ereports. It is a read-only transport module, and checks for the
32 * following failures:
33 *
34 * - overtemp
35 * - predictive failure
36 * - self-test failure
37 * - solid state media wearout
38 *
39 * These failures are detected via the TOPO_METH_DISK_STATUS method, which
40 * leverages libdiskstatus to do the actual analysis. This transport module is
41 * in charge of the following tasks:
42 *
43 * - discovering available devices
44 * - periodically checking devices
45 * - managing device addition/removal
46 */
47
48 #include <ctype.h>
49 #include <fm/fmd_api.h>
50 #include <fm/libdiskstatus.h>
51 #include <fm/libtopo.h>
52 #include <fm/topo_hc.h>
53 #include <fm/topo_mod.h>
54 #include <limits.h>
55 #include <string.h>
56 #include <sys/fm/io/scsi.h>
57 #include <sys/fm/protocol.h>
96 nvlist_free(nvl);
97 dt_stats.dropped.fmds_value.ui64++;
98 }
99 } else {
100 dt_stats.dropped.fmds_value.ui64++;
101 }
102 }
103
104 /*
105 * Check a single topo node for failure. This simply invokes the disk status
106 * method, and generates any ereports as necessary.
107 */
108 static int
109 dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
110 {
111 nvlist_t *result;
112 nvlist_t *fmri, *faults;
113 char *protocol;
114 int err;
115 disk_monitor_t *dmp = arg;
116 nvpair_t *elem;
117 boolean_t fault;
118 nvlist_t *details;
119 char *fmristr;
120 nvlist_t *in = NULL;
121
122 if (topo_node_resource(node, &fmri, &err) != 0) {
123 fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
124 topo_strerror(err));
125 return (TOPO_WALK_ERR);
126 }
127
128 if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
129 nvlist_free(fmri);
130 return (TOPO_WALK_ERR);
131 }
132
133 if (dmp->dm_sim_search) {
134 fmristr = NULL;
135 if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
136 strstr(fmristr, dmp->dm_sim_search) != 0)
137 (void) nvlist_add_string(in, "path", dmp->dm_sim_file);
138 topo_hdl_strfree(thp, fmristr);
139 }
140
141 /*
142 * Try to invoke the method. If this fails (most likely because the
143 * method is not supported), then ignore this node.
144 */
145 if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
146 TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
147 nvlist_free(fmri);
148 nvlist_free(in);
149 return (TOPO_WALK_NEXT);
150 }
151
152 nvlist_free(in);
153
154 /*
155 * Check for faults and post ereport(s) if needed
156 */
157 if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
158 nvlist_lookup_string(result, "protocol", &protocol) == 0) {
159 elem = NULL;
160 while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
161 if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
162 continue;
163
164 (void) nvpair_value_boolean_value(elem, &fault);
165 if (!fault ||
166 nvlist_lookup_nvlist(result, nvpair_name(elem),
167 &details) != 0)
168 continue;
169
170 if (strcmp(nvpair_name(elem),
171 FM_EREPORT_SCSI_SSMWEAROUT) == 0 &&
172 fmd_prop_get_int32(dmp->dm_hdl,
173 "ignore-ssm-wearout") == FMD_B_TRUE)
174 continue;
175
176 dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
177 nvpair_name(elem),
178 fmd_event_ena_create(dmp->dm_hdl), fmri, details);
179 }
180 }
181
182 nvlist_free(result);
183 nvlist_free(fmri);
184
185 return (TOPO_WALK_NEXT);
186 }
187
188 /*
189 * Periodic timeout. Iterates over all hc:// topo nodes, calling
190 * dt_analyze_disk() for each one.
191 */
192 /*ARGSUSED*/
193 static void
194 dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
195 {
196 topo_hdl_t *thp;
197 topo_walk_t *twp;
198 int err;
235 */
236 /*ARGSUSED*/
237 static void
238 dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
239 {
240 disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
241
242 if (dmp->dm_timer_istopo)
243 return;
244
245 fmd_timer_remove(hdl, dmp->dm_timer);
246 dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
247 fmd_prop_get_int64(hdl, "min-interval"));
248 dmp->dm_timer_istopo = B_TRUE;
249 }
250
251 static const fmd_prop_t fmd_props[] = {
252 { "interval", FMD_TYPE_TIME, "1h" },
253 { "min-interval", FMD_TYPE_TIME, "1min" },
254 { "simulate", FMD_TYPE_STRING, "" },
255 { "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"},
256 { NULL, 0, NULL }
257 };
258
259 static const fmd_hdl_ops_t fmd_ops = {
260 NULL, /* fmdo_recv */
261 dt_timeout, /* fmdo_timeout */
262 NULL, /* fmdo_close */
263 NULL, /* fmdo_stats */
264 NULL, /* fmdo_gc */
265 NULL, /* fmdo_send */
266 dt_topo_change, /* fmdo_topo_change */
267 };
268
269 static const fmd_hdl_info_t fmd_info = {
270 "Disk Transport Agent", "1.1", &fmd_ops, fmd_props
271 };
272
273 void
274 _fmd_init(fmd_hdl_t *hdl)
275 {
276 disk_monitor_t *dmp;
277 char *simulate;
278
279 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
280 return;
281
282 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
283 sizeof (dt_stats) / sizeof (fmd_stat_t),
284 (fmd_stat_t *)&dt_stats);
285
286 dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
287 fmd_hdl_setspecific(hdl, dmp);
288
289 dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
290 dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
|