diff --git a/Add-MulanV2-License-statement.patch b/Add-MulanV2-License-statement.patch new file mode 100644 index 0000000000000000000000000000000000000000..41bfc6675f00db70ad24184b91f51d3ac7ed4f8b --- /dev/null +++ b/Add-MulanV2-License-statement.patch @@ -0,0 +1,110 @@ +From 7bfe3bfb4cd7d1db43b1baeefc0cfbf2f648a092 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Tue, 23 Sep 2025 11:58:03 +0800 +Subject: [PATCH 9/9] sysSentry: Add MulanV2 License statement + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Add MulanV2 license statement to the header of each file. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c | 12 +++++++++++- + src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h | 12 +++++++++++- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 12 +++++++++++- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.h | 12 +++++++++++- + 4 files changed, 44 insertions(+), 4 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index df195ae..d5e615c 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: SOC Ring sentry main program + * Author: Yihang Li + * Create: 2025-7-10 +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +index 0566496..34fb5ec 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: SOC Ring sentry main header + * Author: Yihang Li + * Create: 2025-7-10 +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 3dda5e0..bae7ada 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: tc ring testcase program + * Author: lizixian + * Create: 2025-7-10 +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +index 5e93a56..126f9cb 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: tc ring testcase header + * Author: lizixian + * Create: 2025-7-10 +-- +2.33.0 + diff --git a/Add-SOC-Ring-sentry-function.patch b/Add-SOC-Ring-sentry-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..65aa805a27c05affd345a9a738d8cfa239824c79 --- /dev/null +++ b/Add-SOC-Ring-sentry-function.patch @@ -0,0 +1,477 @@ +From 71dfbdfa02a5a4b9f1f40e4eb045d5b35ae9ad3c Mon Sep 17 00:00:00 2001 +From: Qizhi Zhang +Date: Fri, 17 Oct 2025 10:04:15 +0800 +Subject: [PATCH] sysSentry: Add SOC Ring sentry function + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Qizhi Zhang +--- + Makefile | 17 +- + config/env/soc_ring_sentry.env | 6 + + config/tasks/soc_ring_sentry.mod | 7 + + src/sentryPlugins/soc_ring_sentry/Makefile | 9 + + src/sentryPlugins/soc_ring_sentry/README.md | 34 +++ + .../soc_ring_sentry/soc_ring_sentry.c | 261 ++++++++++++++++++ + .../soc_ring_sentry/soc_ring_sentry.h | 27 ++ + 7 files changed, 359 insertions(+), 2 deletions(-) + create mode 100644 config/env/soc_ring_sentry.env + create mode 100644 config/tasks/soc_ring_sentry.mod + create mode 100644 src/sentryPlugins/soc_ring_sentry/Makefile + create mode 100644 src/sentryPlugins/soc_ring_sentry/README.md + create mode 100644 src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c + create mode 100644 src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h + +diff --git a/Makefile b/Makefile +index 73ada63..2855285 100644 +--- a/Makefile ++++ b/Makefile +@@ -27,7 +27,7 @@ PYTHON_VERSION := $(shell $(PYBIN) --version 2>&1 | awk '{print $$2}' | cut -d ' + PKGVER := syssentry-$(VERSION)-py$(PYTHON_VERSION) + PKGVEREGG := syssentry-$(VERSION)-py$(PYTHON_VERSION).egg-info + +-all: lib ebpf hbm_online_repair ++all: lib ebpf hbm_online_repair soc_ring_sentry + + lib:libxalarm log + +@@ -47,6 +47,9 @@ ebpf: + hbm_online_repair: + cd $(CURSRCDIR)/sentryPlugins/hbm_online_repair/ && make + ++soc_ring_sentry: lib ++ cd $(CURSRCDIR)/sentryPlugins/soc_ring_sentry/ && make ++ + install: all dirs isentry + + dirs: +@@ -123,6 +126,11 @@ isentry: + install -m 600 $(CURCONFIGDIR)/env/hbm_online_repair.env $(ETCDIR)/sysconfig/ + install -m 600 $(CURCONFIGDIR)/tasks/hbm_online_repair.mod $(ETCDIR)/sysSentry/tasks/ + ++ # soc_ring_sentry ++ install -m 750 $(CURSRCDIR)/sentryPlugins/soc_ring_sentry/soc_ring_sentry $(BINDIR) ++ install -m 600 $(CURCONFIGDIR)/env/soc_ring_sentry.env $(ETCDIR)/sysconfig/ ++ install -m 600 $(CURCONFIGDIR)/tasks/soc_ring_sentry.mod $(ETCDIR)/sysSentry/tasks/ ++ + # pysentry_notify + install -m 550 src/libsentry/python/pySentryNotify/sentry_notify.py $(PYDIR)/xalarm + +@@ -150,7 +158,10 @@ ebpf_clean: + hbm_clean: + cd $(CURSRCDIR)/sentryPlugins/hbm_online_repair && make clean + +-clean: ebpf_clean hbm_clean ++srs_clean: ++ cd $(CURSRCDIR)/sentryPlugins/soc_ring_sentry && make clean ++ ++clean: ebpf_clean hbm_clean srs_clean + rm -rf $(CURLIBDIR)/build + rm -rf $(CURSRCDIR)/build + rm -rf $(CURSRCDIR)/libsentry/c/log/build +@@ -163,6 +174,7 @@ uninstall: + rm -rf $(BINDIR)/xalarmd + rm -rf $(BINDIR)/sentryCollector + rm -rf $(BINDIR)/hbm_online_repair ++ rm -rf $(BINDIR)/soc_ring_sentry + rm -rf $(BINDIR)/ebpf_collector + rm -rf $(LIBINSTALLDIR)/libxalarm.so + rm -rf $(INCLUDEDIR)/xalarm +@@ -170,6 +182,7 @@ uninstall: + rm -rf $(INCLUDEDIR)/libsentry + rm -rf $(ETCDIR)/sysSentry + rm -rf $(ETCDIR)/hbm_online_repair.env ++ rm -rf $(ETCDIR)/soc_ring_sentry.env + rm -rf $(LOGSAVEDIR)/sysSentry + rm -rf $(PYDIR)/syssentry + rm -rf $(PYDIR)/xalarm +diff --git a/config/env/soc_ring_sentry.env b/config/env/soc_ring_sentry.env +new file mode 100644 +index 0000000..b1fdd68 +--- /dev/null ++++ b/config/env/soc_ring_sentry.env +@@ -0,0 +1,6 @@ ++LOG_LEVEL=info ++SOC_RING_SENTRY_INTENSITY_DELAY=600 ++SOC_RING_SENTRY_MEM_SIZE=4096 ++SOC_RING_SENTRY_LOOP_CNT=0 ++SOC_RING_SENTRY_FAULT_HANDLING=1 ++SOC_RING_SENTRY_BLACKLIST= +\ No newline at end of file +diff --git a/config/tasks/soc_ring_sentry.mod b/config/tasks/soc_ring_sentry.mod +new file mode 100644 +index 0000000..303567a +--- /dev/null ++++ b/config/tasks/soc_ring_sentry.mod +@@ -0,0 +1,7 @@ ++[common] ++enabled=yes ++task_start=/usr/bin/soc_ring_sentry ++task_stop=pkill -f soc_ring_sentry ++type=oneshot ++onstart=yes ++env_file=/etc/sysconfig/soc_ring_sentry.env +\ No newline at end of file +diff --git a/src/sentryPlugins/soc_ring_sentry/Makefile b/src/sentryPlugins/soc_ring_sentry/Makefile +new file mode 100644 +index 0000000..02032d9 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/Makefile +@@ -0,0 +1,9 @@ ++all: soc_ring_sentry ++ ++soc_ring_sentry: ++ gcc *.c -Wall -o3 -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lxalarm -lsentry_log -ljson-c -pthread -lnuma -o soc_ring_sentry ++ ++clean: ++ rm -f soc_ring_sentry ++ ++.PHONY: all clean +diff --git a/src/sentryPlugins/soc_ring_sentry/README.md b/src/sentryPlugins/soc_ring_sentry/README.md +new file mode 100644 +index 0000000..e4b3c53 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/README.md +@@ -0,0 +1,34 @@ ++# soc_ring_sentry ++ ++#### 介绍 ++soc_ring_sentry是一款依赖sysSentry并用于SOC STL巡检的插件,该插件的使用方法是: ++ ++usage: soc_ring_sentry [OPTIONS] ++ ++Options: ++ -h, Show this help message and exit. ++ -g, Get the SOC Ring sentry case. ++ ++用户可以通过 `/etc/sysconfig/soc_ring_sentry.env` 修改环境变量以配置不同参数 ++该文件中各个环境变量含义为: ++`LOG_LEVEL` ++日志登记配置,默认配置为info级别。也可以配置为debug, warning, 或者error. ++ ++`SOC_RING_SENTRY_INTENSITY_DELAY` ++巡检间隔时长配置,单位ms,默认配置为600ms。用户可自定义其他所需间隔时长。 ++ ++`SOC_RING_SENTRY_MEM_SIZE` ++巡检空间大小配置,单位KB,默认配置为4096KB。也可配置为其他64KB的倍数。 ++ ++`SOC_RING_SENTRY_LOOP_CNT` ++巡检次数配置,默认配置为0,即持续巡检。若配置为其他值则为巡检次数。 ++ ++`SOC_RING_SENTRY_FAULT_HANDLING` ++后处理标识配置,默认配置为1,即主动触发panic。 ++设置为0则表示检测到错误不做任何处理。 ++设置为2则表示检测到错误主动关机。 ++设置为3则表示检测到错误主动重启。 ++ ++`SOC_RING_SENTRY_BLACKLIST` ++巡检黑名单配置,默认配置为空。用户可将不运行巡检的CPU核号写入该环境变量,例如`SOC_RING_SENTRY_BLACKLIST=0,2,4,6-10` ++ +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +new file mode 100644 +index 0000000..f407fb8 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -0,0 +1,261 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: SOC Ring sentry main program ++ * Author: Yihang Li ++ * Create: 2025-7-10 ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "register_xalarm.h" ++#include "log_utils.h" ++#include "soc_ring_sentry.h" ++ ++#define DEFAULT_INTENSITY_DELAY 600 ++#define DEFAULT_HANDLE 1 ++#define DEFAULT_LOOP_CNT 0 ++#define LIMITE_MEM_SIZE 64 ++#define KB 1024 ++#define DEFAULT_MEM_SIZE (4 * KB * KB) ++ ++uint64_t g_intensity_delay; ++uint64_t g_handle; ++uint64_t g_mem_size; ++uint64_t g_loop_cnt; ++bool *g_blacklist; ++ ++static void print_opts_help() ++{ ++ printf("usage: soc_ring_sentry [OPTIONS]\n" ++ "\n" ++ "Options:\n" ++ " -h, Show this help message and exit.\n" ++ " -g, Get the SOC Ring sentry case.\n"); ++} ++ ++static void soc_ring_sentry_case_get() ++{ ++ //todo: print the test case name ++} ++ ++static bool soc_ring_sentry_envtoull(char *env, uint64_t *value) ++{ ++ char *endptr; ++ errno = 0; ++ ++ if (env) { ++ if (*env == '-') { ++ logging_error("Negative input not allowed.\n"); ++ return false; ++ } ++ ++ *value = strtoull(env, &endptr, 10); ++ if (errno == 0 && endptr != env) { ++ while (isspace((unsigned char)*endptr)) { ++ endptr++; ++ } ++ ++ if (*endptr == '\0') { ++ return true; ++ } ++ } ++ } ++ ++ return false; ++} ++ ++static void soc_ring_sentry_log_level_init() ++{ ++ setLogLevel(); ++} ++ ++static void soc_ring_sentry_intensity_delay_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_INTENSITY_DELAY"); ++ g_intensity_delay = DEFAULT_INTENSITY_DELAY; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value)) { ++ g_intensity_delay = value; ++ logging_info("soc_ring_sentry intensity delay set %lums\n", g_intensity_delay); ++ return; ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_INTENSITY_DELAY invalid, using default value %lums\n", g_intensity_delay); ++} ++ ++static void soc_ring_sentry_handle_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_FAULT_HANDLING"); ++ g_handle = DEFAULT_HANDLE; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value) && value < HANDLE_LEVEL_INVALID) { ++ g_handle = value; ++ logging_info("soc_ring_sentry handle set %lu\n", g_handle); ++ return; ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_FAULT_HANDLING invalid, using default value %lu\n", g_handle); ++} ++ ++static void soc_ring_sentry_mem_size_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_MEM_SIZE"); ++ g_mem_size = DEFAULT_MEM_SIZE; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value)) { ++ if (value != 0 && value < (UINT64_MAX / KB) && (value % LIMITE_MEM_SIZE) == 0) { ++ g_mem_size = value * KB; ++ logging_info("soc_ring_sentry memory size set %luKB\n", value); ++ return; ++ } ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_MEM_SIZE invalid, using default value %luKB\n", g_mem_size / KB); ++} ++ ++static void soc_ring_sentry_loop_cnt_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_LOOP_CNT"); ++ g_loop_cnt = DEFAULT_LOOP_CNT; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value)) { ++ g_loop_cnt = value; ++ logging_info("soc_ring_sentry loop cnt set %lu\n", g_loop_cnt); ++ return; ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_LOOP_CNT invalid, using default value %lu\n", g_loop_cnt); ++} ++ ++static void soc_ring_sentry_blacklist_init(size_t core_num) ++{ ++ char *env = getenv("SOC_RING_SENTRY_BLACKLIST"); ++ char *log_buf, *log_end_ptr; ++ size_t log_buf_len, i; ++ int offset; ++ ++ g_blacklist = (bool *)calloc(core_num, sizeof(bool)); ++ if (!g_blacklist) { ++ logging_error("Failed to allocate memory for blacklist, none CPU set to blacklist\n"); ++ return; ++ } ++ ++ if (env && strlen(env) > 0) { ++ struct bitmask *cpuset = numa_parse_cpustring(env); ++ ++ if (!cpuset) { ++ logging_error("Failed to parse environment variable SOC_RING_SENTRY_BLACKLIST: %s\n", env); ++ return; ++ } ++ ++ for (i = 0; i < core_num; i++) { ++ if (numa_bitmask_isbitset(cpuset, i)) { ++ g_blacklist[i] = true; ++ } ++ } ++ ++ numa_bitmask_free(cpuset); ++ cpuset = NULL; ++ logging_info("soc_ring_sentry blacklist set successful\n"); ++ log_buf_len = strlen("blacklist cores: ") + core_num * 4 + 2; ++ log_buf = (char *)calloc(log_buf_len, sizeof(char)); ++ if (log_buf) { ++ offset = snprintf(log_buf, log_buf_len * sizeof(char), "blacklist cores: "); ++ log_end_ptr = log_buf + offset; ++ for (i = 0; i < core_num; i++) { ++ if (g_blacklist[i]) { ++ offset = snprintf(log_end_ptr, log_buf_len - (log_end_ptr - log_buf), "%ld ", i); ++ if (offset < 0 || offset >= (log_buf_len - (log_end_ptr - log_buf))) { ++ logging_error("Log buffer overflow during snprintf\n"); ++ break; ++ } ++ ++ log_end_ptr += offset; ++ } ++ } ++ ++ logging_info("%s\n", log_buf); ++ free(log_buf); ++ log_buf = NULL; ++ } ++ } ++} ++ ++static void soc_ring_sentry_init(size_t core_num) ++{ ++ soc_ring_sentry_log_level_init(); ++ soc_ring_sentry_intensity_delay_init(); ++ soc_ring_sentry_handle_init(); ++ soc_ring_sentry_mem_size_init(); ++ soc_ring_sentry_loop_cnt_init(); ++ soc_ring_sentry_blacklist_init(core_num); ++} ++ ++static int soc_ring_sentry_delivery(size_t core_num) ++{ ++ //todo: add delivery test ++ return 0; ++} ++ ++size_t get_system_core_num(void) ++{ ++ long core_num = sysconf(_SC_NPROCESSORS_CONF); ++ ++ return (core_num > 0) ? (size_t)core_num : 1; ++} ++ ++static void soc_ring_sentry_exec() ++{ ++ size_t core_num = get_system_core_num(); ++ int ret; ++ ++ soc_ring_sentry_init(core_num); ++ ret = soc_ring_sentry_delivery(core_num); ++ if (ret == 0) { ++ report_result(TOOL_NAME, RESULT_LEVEL_PASS, "{\"msg\":\"SOC STL test pass\", \"code\":1001}"); ++ } ++ ++ if (g_blacklist) { ++ free(g_blacklist); ++ g_blacklist = NULL; ++ } ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int opt; ++ ++ if (argc > 2) { ++ print_opts_help(); ++ return -1; ++ } else if (argc == 2) { ++ while ((opt = getopt(argc, argv, "hg")) != -1) { ++ switch ((char)opt) { ++ case 'h': ++ print_opts_help(); ++ return 0; ++ case 'g': ++ soc_ring_sentry_case_get(); ++ return 0; ++ default: ++ print_opts_help(); ++ return -1; ++ } ++ } ++ } else { ++ soc_ring_sentry_exec(); ++ } ++ ++ return 0; ++} ++ +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +new file mode 100644 +index 0000000..bea991f +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: SOC Ring sentry main header ++ * Author: Yihang Li ++ * Create: 2025-7-10 ++ */ ++ ++#ifndef __SOC_RING_SENTRY_H ++#define __SOC_RING_SENTRY_H ++ ++#define TOOL_NAME "soc_ring_sentry" ++ ++enum handle_level { ++ HANDLE_NONE, ++ HANDLE_PANIC, ++ HANDLE_POWEROFF, ++ HANDLE_REBOOT, ++ HANDLE_LEVEL_INVALID ++}; ++ ++extern uint64_t g_intensity_delay; ++extern uint64_t g_handle; ++extern uint64_t g_mem_size; ++extern uint64_t g_loop_cnt; ++extern bool *g_blacklist; ++ ++#endif +-- +2.34.1 + diff --git a/Add-testcase-tc_ring-for-SOC-Ring-sentry.patch b/Add-testcase-tc_ring-for-SOC-Ring-sentry.patch new file mode 100644 index 0000000000000000000000000000000000000000..81ac1ea236c9a0eeea490361dc678960cce19e17 --- /dev/null +++ b/Add-testcase-tc_ring-for-SOC-Ring-sentry.patch @@ -0,0 +1,809 @@ +From 6946a70721c26c74b973bf419b6b3c4e8747e419 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 17 Jul 2025 20:47:44 +0800 +Subject: [PATCH 2/9] sysSentry: Add testcase tc_ring for SOC Ring sentry + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + .../soc_ring_sentry/soc_ring_sentry.c | 12 +- + .../soc_ring_sentry/tc_ring_one.c | 689 ++++++++++++++++++ + .../soc_ring_sentry/tc_ring_one.h | 38 + + 3 files changed, 736 insertions(+), 3 deletions(-) + create mode 100755 src/sentryPlugins/soc_ring_sentry/tc_ring_one.c + create mode 100755 src/sentryPlugins/soc_ring_sentry/tc_ring_one.h + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index f407fb8..21c78b6 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -16,6 +17,7 @@ + #include "register_xalarm.h" + #include "log_utils.h" + #include "soc_ring_sentry.h" ++#include "tc_ring_one.h" + + #define DEFAULT_INTENSITY_DELAY 600 + #define DEFAULT_HANDLE 1 +@@ -41,7 +43,7 @@ static void print_opts_help() + + static void soc_ring_sentry_case_get() + { +- //todo: print the test case name ++ printf("1. [soc stl] ring data bit line scan tescase.\n"); + } + + static bool soc_ring_sentry_envtoull(char *env, uint64_t *value) +@@ -203,8 +205,12 @@ static void soc_ring_sentry_init(size_t core_num) + + static int soc_ring_sentry_delivery(size_t core_num) + { +- //todo: add delivery test +- return 0; ++ int ret; ++ ++ ret = tc_ring_one_create_threads(g_mem_size, g_loop_cnt, g_intensity_delay, g_handle, g_blacklist, core_num); ++ tc_ring_one_post_process(ret); ++ ++ return ret; + } + + size_t get_system_core_num(void) +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +new file mode 100755 +index 0000000..9713495 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -0,0 +1,689 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: tc ring testcase program ++ * Author: lizixian ++ * Create: 2025-7-10 ++ */ ++ ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "register_xalarm.h" ++#include "log_utils.h" ++#include "soc_ring_sentry.h" ++#include "tc_ring_one.h" ++ ++typedef struct tc_ring_one_config { ++ void** test_space_base; // 存储每个numa节点测试空间的指针,需要根据当前系统numa节点数量动态申请存储空间 ++ size_t space_size; // 每个numa节点测试空间内存大小 ++ size_t block_size; // 每个测试块的大小,固定为64kByte ++ uint64_t loop_total; // 测试循环总数,0 -- 无限测试 ++ int64_t sleep_ms; // 每次读完一个数据块后的休眠时间,≤ 0则不休眠,继续下一个数据块的扫描 ++ int64_t rd_loop; // 每个扫描bit单次循环内的数据扫描次数,固定为0x80 ++ pthread_t* tc_core_threads; // 记录每个核的测试线程 ++ pthread_barrier_t tc_barrier; // 同步标志 ++ pthread_mutex_t* tc_node_mutex; // 每个numa节点数据更新的锁,单个numa节点测试空间的数据刷新,只能由一个本numa节点的测试核来刷新 ++ uint32_t* node_update_flag; // 测试空间数据刷新状态标志 ++ uint64_t err_handle; // 错误后处理 ++ bool* black_list; // 测试黑名单 ++ int tc_core_total; // 测试核总数 ++ int sys_core_total; // 系统核总数 ++ int scan_bit; // 扫描的bit ++ int err_cnt; // 错误计数 ++} tc_ring_one_config_t; ++ ++#define TC_RING_ONE_BLOCK_SIZE 0x10000 ++#define TC_RING_ONE_RD_LOOP 0x80 ++#define TC_RING_ONE_DATA_UNIT 128 ++#define TC_RING_ONE_CACHELINE_SIZE 64 ++ ++#define TC_RING_ONE_PRAMA_ERR -1 ++#define TC_RING_ONE_FAIL -2 ++#define TC_RING_ONE_SUCCESS 0 ++ ++#define TC_ERROR_HANDLE_NONE 0 ++#define TC_ERROR_HANDLE_PANIC 1 ++#define TC_ERROR_HANDLE_SHUTDOWN 2 ++#define TC_ERROR_HANDLE_REBOOT 3 ++ ++static tc_ring_one_config_t g_tc_config = { 0 }; ++static const uint32_t tc_ring_one_pattern[32] = { ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++}; ++static const uint32_t g_tc_ring_one_special_bits[] = {394, 397, 405, 343, 394, 393, 392, 377}; ++ ++static void* tc_ring_one_thread_entry(void* arg); ++ ++static int get_system_online_core_total(void) ++{ ++ int core_num = 0; ++ ++ core_num = sysconf(_SC_NPROCESSORS_ONLN); ++ core_num = (core_num > 0) ? core_num : 1; ++ ++ return core_num; ++} ++ ++static int get_system_core_total(void) ++{ ++ int core_num = 0; ++ ++ core_num = sysconf(_SC_NPROCESSORS_CONF); ++ core_num = (core_num > 0) ? core_num : 1; ++ ++ return core_num; ++} ++ ++static int get_core_id_by_thread(pthread_t *thread) ++{ ++ int core_total = get_system_core_total(); ++ pthread_t my_thread_id = pthread_self(); ++ int core_id; ++ ++ for (core_id = 0; core_id < core_total; core_id++) { ++ if (my_thread_id == thread[core_id]) { ++ return core_id; ++ } ++ } ++ ++ return -1; ++} ++ ++static int is_cpu_online(int core_id) ++{ ++ char online_file[64]; ++ struct stat buffer; ++ int online; ++ FILE *fp; ++ ++ snprintf(online_file, sizeof(online_file), "/sys/devices/system/cpu/cpu%d/online", core_id); ++ if ((core_id == 0) && (lstat(online_file, &buffer) != 0)) { ++ return 1; ++ } ++ ++ fp = fopen(online_file, "r"); ++ if (!fp) { ++ logging_error("Failed to open %s\n", online_file); ++ return 0; ++ } ++ fscanf(fp, "%d", &online); ++ fclose(fp); ++ ++ return online; ++} ++ ++static uintptr_t vaddr_to_phys(uintptr_t vaddr) ++{ ++ int page_size = sysconf(_SC_PAGESIZE); ++ char page_map_name[64]; ++ int pid = getpid(); ++ uintptr_t offset; ++ uintptr_t pinfo; ++ int fd; ++ ++ offset = vaddr / page_size * (sizeof(pinfo)); ++ sprintf(page_map_name, "/proc/%d/pagemap", pid); ++ fd = open(page_map_name, O_RDONLY); ++ if (fd < 0) { ++ logging_error("Failed to open %s\n", page_map_name); ++ return 0; ++ } ++ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) { ++ logging_error("Failed to read %s\n", page_map_name); ++ close(fd); ++ return 0; ++ } ++ ++ close(fd); ++ if((pinfo & (1ULL << 63)) == 0) { ++ logging_error("pfn is not present\n"); ++ return 0; ++ } else { ++ return (pinfo & ((1ULL << 55) - 1)) * page_size + (vaddr & (page_size - 1)); ++ } ++} ++ ++static int get_numa_node_of_core(int core_id) ++{ ++ int numa_node; ++ ++ numa_node = numa_node_of_cpu(core_id); ++ if (numa_node < 0) { ++ logging_error("[CORE%d] numa_node_of_cpu failed, errno:%d\n", core_id, errno); ++ return 0; ++ } ++ ++ return numa_node; ++} ++ ++/** ++ * 从指定 NUMA 节点优先分配内存,若失败则尝试其他节点 ++ * @param preferred_node 优先分配的 NUMA 节点号 ++ * @param size 需要分配的内存大小(字节) ++ * @return 成功返回内存指针,失败返回 NULL ++ */ ++static void *numa_alloc_fallback(int preferred_node, size_t size) ++{ ++ struct bitmask *allowed_nodes; ++ void *ptr = NULL; ++ int max_node; ++ int node; ++ ++ // 1. 获取所有可用的 NUMA 节点 ++ allowed_nodes = numa_get_mems_allowed(); ++ if (!allowed_nodes) { ++ logging_error("Failed to get allowed NUMA nodes"); ++ return NULL; ++ } ++ ++ // 2. 优先从指定节点分配内存 ++ if (numa_bitmask_isbitset(allowed_nodes, preferred_node)) { ++ ptr = numa_alloc_onnode(size, preferred_node); ++ if (ptr != NULL) { ++ logging_debug("Allocated %#x bytes on NUMA assigned node %d, addr: %p \n", size, preferred_node, ptr, __LINE__); ++ return ptr; ++ } ++ } ++ ++ // 3. 遍历所有节点(跳过优先节点) ++ max_node = numa_max_node(); ++ for (node = 0; node <= max_node; node++) { ++ if (node == preferred_node || !numa_bitmask_isbitset(allowed_nodes, node)) { ++ continue; // 跳过优先节点或不允许的节点 ++ } ++ ++ ptr = numa_alloc_onnode(size, node); ++ if (ptr != NULL) { ++ logging_debug("Allocated %#x bytes on NUMA other node %d, addr: %p \n", size, node, ptr, __LINE__); ++ return ptr; ++ } ++ } ++ ++ // 4. 所有节点均失败,尝试跨节点分配 ++ ptr = numa_alloc_interleaved(size); ++ if (ptr == NULL) { ++ logging_error("Failed to allocate %#x bytes on any NUMA node\n", size); ++ } ++ ++ return ptr; ++} ++ ++static void tc_ring_one_space_init(void *base, size_t size) ++{ ++ int i; ++ ++ for (i = 0; i < size; i += TC_RING_ONE_DATA_UNIT) { ++ memcpy((char*)base + i, tc_ring_one_pattern, sizeof(tc_ring_one_pattern)); ++ } ++} ++ ++static int tc_ring_one_ctrl_var_init(tc_ring_one_config_t *config) ++{ ++ pthread_mutexattr_t attr; ++ int online_core_total; ++ int system_core_total; ++ int tc_core_total; ++ int numa_node_num; ++ int i; ++ ++ system_core_total = config->sys_core_total; ++ online_core_total = get_system_online_core_total(); ++ tc_core_total = online_core_total; ++ numa_node_num = numa_max_node() + 1; ++ ++ for (i = 0; i < system_core_total; i++) { ++ if (config->black_list[i] && is_cpu_online(i)) { ++ // 跳过黑名单中的 CPU 核心 ++ tc_core_total--; ++ } ++ } ++ ++ if (pthread_mutexattr_init(&attr) != 0) { ++ logging_error("Failed to initialize mutex attribute"); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); ++ config->tc_node_mutex = (pthread_mutex_t *)malloc(sizeof(pthread_mutex_t) * numa_node_num); ++ if (config->tc_node_mutex == NULL) { ++ logging_error("Failed to allocate memory for tc_mutex"); ++ pthread_mutexattr_destroy(&attr); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ for (i = 0; i < numa_node_num; i++) { ++ if (pthread_mutex_init(&(config->tc_node_mutex[i]), &attr) != 0) { ++ logging_error("Failed to initialize mutex %d", i); ++ free(config->tc_node_mutex); ++ pthread_mutexattr_destroy(&attr); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ } ++ pthread_mutexattr_destroy(&attr); ++ ++ config->tc_core_threads = (pthread_t *)malloc(sizeof(pthread_t) * system_core_total); ++ if (config->tc_core_threads == NULL) { ++ logging_error("Failed to allocate memory for tc_core_threads"); ++ for (i = 0; i < numa_node_num; i++) { ++ pthread_mutex_destroy(&config->tc_node_mutex[i]); ++ } ++ free(config->tc_node_mutex); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ config->sys_core_total = tc_core_total; ++ pthread_barrier_init(&config->tc_barrier, NULL, tc_core_total); ++ ++ return TC_RING_ONE_SUCCESS; ++} ++ ++static int tc_ring_one_init(tc_ring_one_config_t *config) ++{ ++ int numa_node_num; ++ int ret = 0; ++ void *ptr; ++ int i; ++ ++ if (numa_available() < 0) { ++ logging_error("NUMA is not available on this system"); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ // 为每个 NUMA 节点分配测试内存空间 ++ numa_node_num = numa_max_node() + 1; ++ config->test_space_base = (void **)malloc(sizeof(void *) * numa_node_num); ++ if (config->test_space_base == NULL) { ++ logging_error("Failed to allocate memory for test_space_base"); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ for (i = 0; i < numa_node_num; i++) { ++ ptr = numa_alloc_fallback(i, config->space_size); ++ if (ptr == NULL) { ++ ret = TC_RING_ONE_PRAMA_ERR; ++ goto numa_alloc_fail; ++ } ++ ++ tc_ring_one_space_init(ptr, config->space_size); ++ config->test_space_base[i] = ptr; ++ } ++ ++ config->node_update_flag = (uint32_t *)malloc(sizeof(uint32_t) * numa_node_num); ++ if (config->node_update_flag == NULL) { ++ logging_error("Failed to allocate memory for node_update_flag"); ++ ret = TC_RING_ONE_PRAMA_ERR; ++ goto numa_alloc_fail; ++ } else { ++ for (i = 0; i < numa_node_num; i++) { ++ config->node_update_flag[i] = 0; ++ } ++ } ++ ++ ret = tc_ring_one_ctrl_var_init(config); ++ if (ret != 0) { ++ logging_error("tc_ring_one_ctrl_var_init fail ret:%d", ret); ++ goto ctrl_var_init_fail; ++ } ++ ++ return ret; ++ ++ctrl_var_init_fail: ++ free(config->node_update_flag); ++ config->node_update_flag = NULL; ++ ++numa_alloc_fail: ++ for (i = 0; i < numa_node_num; i++) { ++ numa_free(config->test_space_base[i], config->space_size); ++ } ++ ++ free(config->test_space_base); ++ config->test_space_base = NULL; ++ return ret; ++} ++ ++static void tc_ring_one_release(tc_ring_one_config_t *config) ++{ ++ int numa_node_num; ++ int node; ++ ++ numa_node_num = numa_max_node() + 1; ++ for (int i = 0; i < numa_node_num; i++) { ++ numa_free(config->test_space_base[i], config->space_size); ++ } ++ free(config->test_space_base); ++ config->test_space_base = NULL; ++ free(config->node_update_flag); ++ config->node_update_flag = NULL; ++ ++ pthread_barrier_destroy(&(config->tc_barrier)); ++ for (node = 0; node < numa_node_num; node++) { ++ pthread_mutex_destroy(&config->tc_node_mutex[node]); ++ } ++ free(config->tc_node_mutex); ++ config->tc_node_mutex = NULL; ++} ++ ++static int is_core_run_tc(tc_ring_one_config_t *config, int core_id) ++{ ++ if (is_cpu_online(core_id) == 0) { ++ return 0; ++ } ++ ++ if ((config->black_list != NULL) && (config->black_list[core_id] == 1)) { ++ return 0; ++ } ++ ++ return 1; ++} ++ ++// 将线程绑定到指定 CORE ++static int bind_thread_to_core(pthread_t thread, int core_id) ++{ ++ cpu_set_t cpuset; ++ int ret = 0; ++ ++ CPU_ZERO(&cpuset); ++ CPU_SET(core_id, &cpuset); ++ ++ ret = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); ++ if (ret != 0) { ++ logging_error("pthread_setaffinity_np failed"); ++ ret = TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ return ret; ++} ++ ++int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, ++ uint64_t err_handle, bool *blacklist, size_t core_num) ++{ ++ int ret = 0; ++ int i; ++ ++ g_tc_config.space_size = mem_size; ++ g_tc_config.loop_total = loop_cnt; ++ g_tc_config.sleep_ms = delay; ++ g_tc_config.err_handle = err_handle; ++ g_tc_config.black_list = blacklist; ++ g_tc_config.sys_core_total = core_num; ++ g_tc_config.block_size = TC_RING_ONE_BLOCK_SIZE; ++ g_tc_config.rd_loop = TC_RING_ONE_RD_LOOP; ++ ++ ret = tc_ring_one_init(&g_tc_config); ++ if (ret != 0) { ++ logging_error("tc_ring_one_init fail ret:%d", ret); ++ return ret; ++ } ++ ++ for (i = 0; i < core_num; i++) { ++ if (is_core_run_tc(&g_tc_config, i) == 0) { ++ // 跳过黑名单 & offline 的core ++ continue; ++ } ++ ret = pthread_create(&g_tc_config.tc_core_threads[i], NULL, tc_ring_one_thread_entry, (void *)(&g_tc_config)); ++ if (ret != 0) { ++ logging_error("Failed to create thread for core %d", i); ++ tc_ring_one_release(&g_tc_config); ++ free(g_tc_config.tc_core_threads); ++ return ret; ++ } ++ ++ ret = bind_thread_to_core(g_tc_config.tc_core_threads[i], i); ++ if (ret != 0) { ++ logging_error("Failed to bind thread to core %d", i); ++ tc_ring_one_release(&g_tc_config); ++ free(g_tc_config.tc_core_threads); ++ return ret; ++ } ++ } ++ ++ // 等待所有线程完成 ++ for (i = 0; i < core_num; i++) { ++ if (is_core_run_tc(&g_tc_config, i) == 0) { ++ // 跳过黑名单 & offline 的core ++ continue; ++ } ++ pthread_join(g_tc_config.tc_core_threads[i], NULL); ++ } ++ ++ tc_ring_one_release(&g_tc_config); ++ free(g_tc_config.tc_core_threads); ++ if (g_tc_config.err_cnt > 0) { ++ return TC_RING_ONE_FAIL; ++ } else { ++ return TC_RING_ONE_SUCCESS; ++ } ++} ++ ++static void tc_ring_one_init_data_pattern(uintptr_t base, size_t size, int scan_bit) ++{ ++ uint64_t dat_pattern = (1ULL << (scan_bit & 0x3F)); ++ size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 bit) ++ int i; ++ ++ for (i = 0; i < size; i += TC_RING_ONE_DATA_UNIT) { ++ *((uint64_t *)(base + i + word_offset + TC_RING_ONE_CACHELINE_SIZE)) = dat_pattern; ++ } ++} ++ ++static void tc_ring_one_data_clear(uintptr_t base, size_t size, int scan_bit) ++{ ++ size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 bit) ++ int i; ++ ++ for (i = 0; i < size; i += TC_RING_ONE_DATA_UNIT) { ++ *((uint64_t *)(base + i + word_offset + TC_RING_ONE_CACHELINE_SIZE)) = 0; ++ } ++} ++ ++static void tc_ring_one_testspace_update(tc_ring_one_config_t *config, int scan_bit) ++{ ++ int core_id = get_core_id_by_thread(config->tc_core_threads); ++ int numa_node = get_numa_node_of_core(core_id); ++ ++ pthread_barrier_wait(&config->tc_barrier); ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 0) { ++ tc_ring_one_init_data_pattern((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); ++ config->node_update_flag[numa_node] = 1; ++ } ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ pthread_barrier_wait(&config->tc_barrier); ++} ++ ++static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan_bit) ++{ ++ uint32_t core_id = get_core_id_by_thread(config->tc_core_threads); ++ int numa_node = get_numa_node_of_core(core_id); ++ ++ pthread_barrier_wait(&config->tc_barrier); ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 1) { ++ tc_ring_one_data_clear((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); ++ config->node_update_flag[numa_node] = 0; ++ } ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ pthread_barrier_wait(&config->tc_barrier); ++} ++ ++/** ++ * tc_ring_one_scan_test_block - 针对测试空间的指定块空间进行读扫描, 确保在块空间的的指定bit位置不存在非预期的由1跳变成0的情况 ++ * @base_addr: 测试空间中指定测试块首地址(必须为128B对齐地址) ++ * @scan_bit: 待测试bit位置(0-511) ++ * @block_size: 待测试块空间大小 ++ * @loop: 测试循环 ++ * @err_cnt: 记录错误次数的内存地址 ++ * ++ * 在调用本函数之前,要保证测试空间已经被待验证的数据Pattern初始化。 ++ * 待验证的数据Pattern: ++ * - base_addr + n * 64 各个数据bit为1 ++ * - base_addr + (n + 1) * 64 待验证的数据bit为1, 其余数据bit为0 ++ * ++ * 注意: 调用者要保证 base_addr + block_size 不能超过测试空间长度 ++ */ ++static void tc_ring_one_scan_test_block(uintptr_t base_addr, int scan_bit, size_t block_size, int *err_cnt) ++{ ++ uint64_t tgt_dat_pattern = (1ULL << (scan_bit & 0x3F)); ++ size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 Byte) ++ uint64_t tgt_dat_all_one = ~0x0ULL; ++ char json_result[2048]; ++ char err_msg[1024]; ++ uint64_t rd_data[2]; ++ int i; ++ ++ for (i = 0; i < block_size; i += TC_RING_ONE_DATA_UNIT) { ++ rd_data[0] = *((uint64_t *)(base_addr + i + word_offset)); // base_addr + n * 64 + word_offset ++ rd_data[1] = *((uint64_t *)(base_addr + i + TC_RING_ONE_CACHELINE_SIZE + word_offset)); // base_addr + (n + 1) * 64 + word_offset ++ if((rd_data[0] != tgt_dat_all_one) || (rd_data[1] != tgt_dat_pattern)) { ++ __atomic_add_fetch(err_cnt, 1, __ATOMIC_SEQ_CST); // 错误次数加1 ++ snprintf(err_msg, sizeof(err_msg), "[ERROR][CORE%d] vaddr = %#lx paddr = %#lx read_data = %#llx read_disturb = %#llx target_data = " ++ "%#llx bit_index = %d offset = %#lx block_size = %#lx\n", ++ sched_getcpu(), ++ (base_addr + i + word_offset), ++ vaddr_to_phys(base_addr + i + word_offset), ++ rd_data[1], ++ rd_data[0], ++ (1ULL << (scan_bit & 0x3F)), ++ scan_bit, ++ word_offset, ++ block_size); ++ logging_error("%s", err_msg); ++ snprintf(json_result, sizeof(json_result), "{\"msg\":\"%s\", \"code\":2001}", err_msg); ++ report_result(TOOL_NAME, RESULT_LEVEL_MAJOR_ALM, json_result); ++ break; ++ } ++ } ++} ++ ++static void tc_ring_one_scan_bit(tc_ring_one_config_t *config, uint64_t loop) ++{ ++ int core_id = sched_getcpu(); ++ int numa_node; ++ uint32_t i, j; ++ ++ numa_node = get_numa_node_of_core(core_id); ++ for (i = 0; i < config->rd_loop; i++) { ++ for (j = 0; j < config->space_size; j += config->block_size) { ++ // 按测试块大小,扫描测试空间 ++ tc_ring_one_scan_test_block((uintptr_t)((char *)(config->test_space_base[numa_node])) + j, ++ config->scan_bit, ++ config->block_size, ++ &(config->err_cnt)); ++ ++ if (config->err_cnt > 0) { ++ logging_error("[ERROR][CORE%d] dbls_scan_bit error, scan_bit = %d, err_cnt = %d vir_base_addr = %p " ++ "phy_base_addr = %p block = %ld rd_loop = %d loop = %d\n", ++ sched_getcpu(), ++ config->scan_bit, ++ config->err_cnt, ++ config->test_space_base[numa_node], ++ vaddr_to_phys((uintptr_t)(config->test_space_base[numa_node])), ++ j / config->block_size, ++ i, loop); ++ return; ++ } ++ ++ if (config->sleep_ms > 0) { ++ // 每扫描完一个测试块就休眠一段时间 ++ usleep(config->sleep_ms * 1000); ++ } ++ } ++ } ++} ++ ++static void* tc_ring_one_thread_entry(void *arg) ++{ ++ tc_ring_one_config_t *config = arg; ++ int scan_sequence_id = 0; ++ int scan_special_id = 0; ++ uint64_t loop_cnt = 0; ++ uint64_t tc_flag = 1; ++ ++ // 等待所有线程准备就绪 ++ pthread_barrier_wait(&config->tc_barrier); ++ ++ while (tc_flag == 1) { ++ config->scan_bit = scan_sequence_id; ++ tc_ring_one_testspace_update(config, scan_sequence_id); ++ tc_ring_one_scan_bit(config, loop_cnt); ++ tc_ring_one_testspace_recover(config, scan_sequence_id); ++ if (config->err_cnt > 0) { ++ break; ++ } ++ scan_sequence_id = (scan_sequence_id + 1) % (TC_RING_ONE_CACHELINE_SIZE * 8); ++ ++ scan_special_id = g_tc_ring_one_special_bits[rand() % 8]; ++ config->scan_bit = scan_special_id; ++ pthread_barrier_wait(&config->tc_barrier); ++ tc_ring_one_testspace_update(config, config->scan_bit); ++ tc_ring_one_scan_bit(config, loop_cnt); ++ tc_ring_one_testspace_recover(config, config->scan_bit); ++ if (config->err_cnt > 0) { ++ break; ++ } ++ loop_cnt++; ++ if ((config->loop_total != 0) && (loop_cnt >= config->loop_total)) { ++ tc_flag = 0; ++ } ++ } ++ ++ return config; ++} ++ ++void tc_ring_one_post_process(int result) ++{ ++ if (result == TC_RING_ONE_SUCCESS) { ++ logging_info("tc_ring_one test pass\n"); ++ } else if (result == TC_RING_ONE_PRAMA_ERR) { ++ // 通过log打印 ++ logging_error("The system can not run the tc_ring_one:\n"); ++ logging_error("1. the system must support NUMA\n"); ++ logging_error("2. the memory in the system maybe too small\n"); ++ report_result(TOOL_NAME, RESULT_LEVEL_FAIL, "{\"msg\":\"The system can not run the tc_ring_one testcase\", \"code\":1001}"); ++ } else if (result == TC_RING_ONE_FAIL) { ++ switch (g_tc_config.err_handle) { // 根据错误处理策略进行相应的处理 ++ case TC_ERROR_HANDLE_NONE: ++ logging_error("the system administrator must handle this error!!!\n"); ++ break; ++ case TC_ERROR_HANDLE_SHUTDOWN: ++ logging_error("Execute 'shutdown'\n"); ++ if (reboot(RB_POWER_OFF) < 0) { ++ logging_error("ERROR: Failed to execute 'shutdown'\n"); ++ } ++ break; ++ case TC_ERROR_HANDLE_REBOOT: ++ logging_error("Execute 'reboot'\n"); ++ if (reboot(RB_AUTOBOOT) < 0) { ++ logging_error("ERROR: Failed to execute 'reboot'\n"); ++ } ++ break; ++ default: // panic ++ abort(); ++ } ++ } ++} ++ ++// end of tc_ring_one.c +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +new file mode 100755 +index 0000000..015a5b3 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: tc ring testcase header ++ * Author: lizixian ++ * Create: 2025-7-10 ++ */ ++ ++#ifndef __TC_RING_ONE_H__ ++#define __TC_RING_ONE_H__ ++#include "soc_ring_sentry.h" ++ ++ ++ ++/** ++ * tc_ring_one_create_threads - ++ * 测试用例总入口,该函数会为测试申请测试内存空间,为每个测试核创建测试线程,并将测试线程调度到测试核上 ++ * @mem_size: 用户指定的测试内存空间大小,系统每个numa节点均提供对用大小的测试空间,用于巡检测试 ++ * @loop_cnt: 测试循环次数, 大于0,则按照对应的循环做巡检测试,等于0,则巡检线程持续驻留在测试核中 ++ * @delay: 每个测试块扫描完成后的休眠时长,单位ms, ≤0 则不休眠。 ++ * @err_handle: 用于指示是否检测到错误后,是否需要做相关后处理操作 ++ * 0 - 不处理,错误处理交由上层软件进行 ++ * 1 - 主动触发panic(默认处理) ++ * 2 - 关机 ++ * 3 - 重启(不建议,该用例检测的失效错误为数据出错的致命错误,设备不应继续工作) ++ * 其他 - 非法输入。保持为默认处理。 ++ * @blacklist: 行巡检用例执行核的黑名单数组,用于指定某个核是否执行巡检测试线程,数值大小为测试核个数 ++ * NULL - 无黑名单,系统所有在线核均需要调度巡检线程 ++ * @core_num: 系统核总数(包含online & offline的core的总数),调用者需要保证该参数正确性 ++ * ++ * return: 0 - 在测试块中未检出错误 ++ * -1 - 系统状态不支持巡检用例执行(系统不支持numa or 系统内存空间不足) ++ * -2 - 检出到数据错误 ++ * ++ */ ++int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); ++void tc_ring_one_post_process(int result); ++ ++#endif/*__TC_RING_ONE_H__*/ +\ No newline at end of file +-- +2.33.0 + diff --git a/Fix-Security-Scan-Warning.patch b/Fix-Security-Scan-Warning.patch new file mode 100644 index 0000000000000000000000000000000000000000..6f5c404932c1b62ea77c09e1fd7281c4721fb5e1 --- /dev/null +++ b/Fix-Security-Scan-Warning.patch @@ -0,0 +1,60 @@ +From 32a6b1c277bc825be22083eb9286622f65dd562c Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Mon, 4 Aug 2025 11:34:47 +0800 +Subject: [PATCH 7/9] sysSentry: Fix Security Scan Warning + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Fix Security Scan Warning: +a. There should be one space between the comment symbol and the comment +content. +b. Use snprintf instead of sprintf. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 4 ++-- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.h | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 5101070..ea8fb34 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -48,7 +48,7 @@ struct tc_ring_one_config { + int sys_core_total; // 系统核总数 + int scan_bit; // 扫描的bit + int err_flag; // 错误计数 +- int numa_node; //系统numa总数 ++ int numa_node; // 系统numa总数 + }; + + #define TC_RING_ONE_BLOCK_SIZE 0x10000 +@@ -119,7 +119,7 @@ static uintptr_t vaddr_to_phys(uintptr_t vaddr) + int fd; + + offset = vaddr / page_size * (sizeof(pinfo)); +- sprintf(page_map_name, "/proc/%d/pagemap", pid); ++ snprintf(page_map_name, sizeof(page_map_name), "/proc/%d/pagemap", pid); + fd = open(page_map_name, O_RDONLY); + if (fd < 0) { + logging_error("Failed to open %s\n", page_map_name); +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +index d5a25ee..5e93a56 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -34,4 +34,4 @@ + */ + int tc_ring_one_main(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); + +-#endif/*__TC_RING_ONE_H__*/ +\ No newline at end of file ++#endif /*__TC_RING_ONE_H__*/ +-- +2.33.0 + diff --git a/Fix-issue-cores-with-isolcpus-set-blacklis.patch b/Fix-issue-cores-with-isolcpus-set-blacklis.patch new file mode 100644 index 0000000000000000000000000000000000000000..72e3ba19cd374af0155560f6df01aaeaa19e1b83 --- /dev/null +++ b/Fix-issue-cores-with-isolcpus-set-blacklis.patch @@ -0,0 +1,61 @@ +From a29c44ecdb6357a38f112e3d1cc07258da33acc8 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 24 Jul 2025 20:31:17 +0800 +Subject: [PATCH 4/9] sysSentry: Fix issue cores with isolcpus set blacklist + failed + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c | 2 +- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 7 +++++-- + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index 1baffe6..df195ae 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -153,7 +153,7 @@ static void soc_ring_sentry_blacklist_init(size_t core_num) + } + + if (env && strlen(env) > 0) { +- struct bitmask *cpuset = numa_parse_cpustring(env); ++ struct bitmask *cpuset = numa_parse_cpustring_all(env); + + if (!cpuset) { + logging_error("Failed to parse environment variable SOC_RING_SENTRY_BLACKLIST: %s\n", env); +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index f926d5f..9473d68 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -410,6 +410,7 @@ static int tc_ring_one_exec(struct tc_ring_one_config *config) + pthread_join(config->tc_core_threads[i], NULL); + } + ++ i = i - 1; + if (config->err_flag > 0) { + ret = TC_RING_ONE_FAIL; + } else { +@@ -417,8 +418,10 @@ static int tc_ring_one_exec(struct tc_ring_one_config *config) + } + + pthread_bind_fail: +- pthread_cancel(config->tc_core_threads[i]); +- pthread_join(config->tc_core_threads[i], NULL); ++ if (!is_core_invalid(config, i)) { ++ pthread_cancel(config->tc_core_threads[i]); ++ pthread_join(config->tc_core_threads[i], NULL); ++ } + + pthread_create_fail: + for (i = i - 1; i >= 0; i--) { +-- +2.33.0 + diff --git a/Fix-issue-inconsistent-status-and-result-a.patch b/Fix-issue-inconsistent-status-and-result-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..669ea66f7b756b435eef2ece20f43d880457bf31 --- /dev/null +++ b/Fix-issue-inconsistent-status-and-result-a.patch @@ -0,0 +1,57 @@ +From 053e1d40f510214343e812a1ae9fbc13b7c8858b Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 31 Jul 2025 14:43:55 +0800 +Subject: [PATCH 5/9] sysSentry: Fix issue inconsistent status and result after + single inspection + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +When configured for a single inspection, the inspection status is set to +FAILED after the inspection is completed, but the inspection result is +empty, which does not meet expectations. + +All inspection threads should directly release resources, return the +corresponding results, and exit after completion. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 9473d68..e8740c0 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -410,13 +410,14 @@ static int tc_ring_one_exec(struct tc_ring_one_config *config) + pthread_join(config->tc_core_threads[i], NULL); + } + +- i = i - 1; + if (config->err_flag > 0) { + ret = TC_RING_ONE_FAIL; + } else { + ret = TC_RING_ONE_SUCCESS; + } + ++ goto out; ++ + pthread_bind_fail: + if (!is_core_invalid(config, i)) { + pthread_cancel(config->tc_core_threads[i]); +@@ -433,6 +434,7 @@ pthread_create_fail: + pthread_join(config->tc_core_threads[i], NULL); + } + ++out: + tc_ring_one_release(config); + free(config->tc_core_threads); + config->tc_core_threads = NULL; +-- +2.33.0 + diff --git a/Fix-two-code-review-comments.patch b/Fix-two-code-review-comments.patch new file mode 100644 index 0000000000000000000000000000000000000000..d28d57713d7dfbfb962d816682746807561e9b6c --- /dev/null +++ b/Fix-two-code-review-comments.patch @@ -0,0 +1,45 @@ +From 0fe147f30031154d43f6a504874bdce1d77047e2 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 7 Aug 2025 16:06:10 +0800 +Subject: [PATCH 8/9] sysSentry: Fix two code review comments + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Fix two code review comments: +a. When a uint64_t type loop_cnt is passed as a parameter to an int type, +truncation occurs. To resolve this issue, the parameter type is changed +to uint64_t. + +b. The loop counter loop_cnt is of type uint64_t. When it increments to +its maximum value and then adds 1, it overflows to 0, causing an infinite +loop that cannot be exited. To address this problem, a loop condition is +added to ensure that the loop continues only if loop_cnt does not overflow +to 0 after incrementing. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index ea8fb34..3dda5e0 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -641,7 +641,7 @@ static bool tc_ring_one_scan_bit(struct tc_ring_one_config *config, int scan_bit + return true; + } + +-static bool tc_ring_one_test_issue(struct tc_ring_one_config *config, int scan_bit, int loop_cnt) ++static bool tc_ring_one_test_issue(struct tc_ring_one_config *config, int scan_bit, uint64_t loop_cnt) + { + bool ret; + +-- +2.33.0 + diff --git a/Use-panic-instead-of-coredump-file.patch b/Use-panic-instead-of-coredump-file.patch new file mode 100644 index 0000000000000000000000000000000000000000..495dabef89af1b6ef190c61d7108240f814b6a80 --- /dev/null +++ b/Use-panic-instead-of-coredump-file.patch @@ -0,0 +1,78 @@ +From 9b631cf00449a9e16373600b70031df534a8ff1d Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 31 Jul 2025 15:01:20 +0800 +Subject: [PATCH 6/9] sysSentry: Use panic instead of coredump file + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Prior to this, when the post-processing flag was configured to 1, +detecting an error would generate a coredump file, which was not as +expected. Instead, a panic should be used to replace the coredump file. + +Additionally, some formatting errors were cleaned up. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + .../soc_ring_sentry/tc_ring_one.c | 23 ++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index e8740c0..5101070 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -441,6 +441,22 @@ out: + return ret; + } + ++static void tc_ring_one_execute_panic(void) ++{ ++ FILE *file = fopen("/proc/sysrq-trigger", "w"); ++ ++ if (file == NULL) { ++ logging_error("Failed to open /proc/sysrq-trigger"); ++ return; ++ } ++ ++ if (fwrite("c", sizeof(char), 1, file) != 1) { ++ logging_error("Failed to write to /proc/sysrq-trigger"); ++ } ++ ++ fclose(file); ++} ++ + void tc_ring_one_post_process(uint64_t err_handle, int result) + { + if (result == TC_RING_ONE_SUCCESS) { +@@ -451,19 +467,20 @@ void tc_ring_one_post_process(uint64_t err_handle, int result) + logging_error("the system administrator must handle this error!!!\n"); + break; + case TC_ERROR_HANDLE_SHUTDOWN: +- logging_error("Execute 'shutdown'\n"); ++ logging_error("Execute 'shutdown'\n"); + if (reboot(RB_POWER_OFF) < 0) { + logging_error("ERROR: Failed to execute 'shutdown'\n"); + } + break; + case TC_ERROR_HANDLE_REBOOT: +- logging_error("Execute 'reboot'\n"); ++ logging_error("Execute 'reboot'\n"); + if (reboot(RB_AUTOBOOT) < 0) { + logging_error("ERROR: Failed to execute 'reboot'\n"); + } + break; + default: // panic +- abort(); ++ logging_error("Execute 'panic'\n"); ++ tc_ring_one_execute_panic(); + break; + } + } else { +-- +2.33.0 + diff --git a/sysSentry.spec b/sysSentry.spec index 51507fce95d13a63d10adb3202282a154d04545b..9f17b08ea223f689a80ee85cea2518d1dd39fd6a 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.3 -Release: 10 +Release: 11 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -19,6 +19,15 @@ Patch7: ai-block-io-exit-when-stage-is-not-supported.patch Patch8: add-log-utils-for-c.patch Patch9: fix-env-for-subprocess.Popen.patch Patch10: fix-period-task-some-bugs.patch +Patch11: Add-SOC-Ring-sentry-function.patch +Patch12: Add-testcase-tc_ring-for-SOC-Ring-sentry.patch +Patch13: testcase-tc_ring-cleancode.patch +Patch14: Fix-issue-cores-with-isolcpus-set-blacklis.patch +Patch15: Fix-issue-inconsistent-status-and-result-a.patch +Patch16: Use-panic-instead-of-coredump-file.patch +Patch17: Fix-Security-Scan-Warning.patch +Patch18: Fix-two-code-review-comments.patch +Patch19: Add-MulanV2-License-statement.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -26,6 +35,7 @@ BuildRequires: json-c-devel BuildRequires: chrpath BuildRequires: elfutils-devel clang libbpf-devel bpftool BuildRequires: python3-numpy python3-pytest +BuildRequires: numactl-libs numactl-devel Requires: pyxalarm = %{version} Requires: libbpf @@ -103,6 +113,15 @@ Requires: sysSentry = %{version}-%{release} %description -n hbm_online_repair This package provides hbm_online_repair for the sysSentry. +%package -n soc_ring_sentry +Summary: soc_ring_sentry for the sysSentry +Provides: soc_ring_sentry = %{version} +BuildRequires: numactl-libs numactl-devel +Requires: sysSentry = %{version}-%{release} + +%description -n soc_ring_sentry +This package provides soc_ring_sentry for the sysSentry. + %prep %autosetup -n %{name}-%{version} -p1 @@ -181,6 +200,10 @@ rm -rf /var/run/sysSentry | : %exclude %{python3_sitelib}/syssentry/bmc_* %exclude %{python3_sitelib}/syssentry/*/bmc_* +# soc_ring_sentry +%exclude %{_sysconfdir}/sysconfig/soc_ring_sentry.env +%exclude %{_sysconfdir}/sysSentry/tasks/sentry_msg_monitor.mod + %files -n libxalarm %attr(0550,root,root) %{_libdir}/libxalarm.so @@ -216,7 +239,25 @@ rm -rf /var/run/sysSentry | : %attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/tasks/hbm_online_repair.mod %attr(0550,root,root) %{python3_sitelib}/syssentry/bmc_alarm.py +%files -n soc_ring_sentry +%attr(0750,root,root) %{_bindir}/soc_ring_sentry +%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/soc_ring_sentry.env + %changelog +* Fri Oct 17 2025 Qizhi Zhang - 1.0.3-11 +- Type:feature +- CVE:NA +- SUG:NA +- DESC:Add SOC Ring sentry function + Add testcase tc_ring for SOC Ring sentry + testcase tc_ring cleancode + Fix issue cores with isolcpus set blacklist failed + Fix issue inconsistent status and result after single inspection + Use panic instead of coredump file + Fix Security Scan Warning + Fix two code review comments + Add MulanV2 License statement + * Sat Mar 29 2025 shixuantong - 1.0.3-10 - Type:bugfix - CVE:NA diff --git a/testcase-tc_ring-cleancode.patch b/testcase-tc_ring-cleancode.patch new file mode 100644 index 0000000000000000000000000000000000000000..6d067fd45be959d86144aa116adb85dc3e05d09f --- /dev/null +++ b/testcase-tc_ring-cleancode.patch @@ -0,0 +1,914 @@ +From cba3b2805f825187d4222d8564c0e888f06a1582 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Tue, 22 Jul 2025 09:33:39 +0800 +Subject: [PATCH 3/9] sysSentry: testcase tc_ring cleancode + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + .../soc_ring_sentry/soc_ring_sentry.c | 18 +- + .../soc_ring_sentry/soc_ring_sentry.h | 2 + + .../soc_ring_sentry/tc_ring_one.c | 514 +++++++++--------- + .../soc_ring_sentry/tc_ring_one.h | 5 +- + 4 files changed, 266 insertions(+), 273 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index 21c78b6..1baffe6 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -207,8 +207,7 @@ static int soc_ring_sentry_delivery(size_t core_num) + { + int ret; + +- ret = tc_ring_one_create_threads(g_mem_size, g_loop_cnt, g_intensity_delay, g_handle, g_blacklist, core_num); +- tc_ring_one_post_process(ret); ++ ret = tc_ring_one_main(g_mem_size, g_loop_cnt, g_intensity_delay, g_handle, g_blacklist, core_num); + + return ret; + } +@@ -220,6 +219,19 @@ size_t get_system_core_num(void) + return (core_num > 0) ? (size_t)core_num : 1; + } + ++void soc_ring_sentry_report(enum RESULT_LEVEL result_level, const char *report_data) ++{ ++ char json_result[2048]; ++ ++ snprintf(json_result, sizeof(json_result), "{\"msg\":\"%s\", \"code\":1001}", report_data); ++ report_result(TOOL_NAME, result_level, json_result); ++ if (result_level == RESULT_LEVEL_PASS) { ++ logging_info("%s\n", report_data); ++ } else { ++ logging_error("%s\n", report_data); ++ } ++} ++ + static void soc_ring_sentry_exec() + { + size_t core_num = get_system_core_num(); +@@ -228,7 +240,7 @@ static void soc_ring_sentry_exec() + soc_ring_sentry_init(core_num); + ret = soc_ring_sentry_delivery(core_num); + if (ret == 0) { +- report_result(TOOL_NAME, RESULT_LEVEL_PASS, "{\"msg\":\"SOC STL test pass\", \"code\":1001}"); ++ soc_ring_sentry_report(RESULT_LEVEL_PASS, "SOC STL test pass"); + } + + if (g_blacklist) { +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +index bea991f..0566496 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +@@ -24,4 +24,6 @@ extern uint64_t g_mem_size; + extern uint64_t g_loop_cnt; + extern bool *g_blacklist; + ++void soc_ring_sentry_report(enum RESULT_LEVEL result_level, const char *report_data); ++ + #endif +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 9713495..f926d5f 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -31,12 +31,12 @@ + #include "soc_ring_sentry.h" + #include "tc_ring_one.h" + +-typedef struct tc_ring_one_config { ++struct tc_ring_one_config { + void** test_space_base; // 存储每个numa节点测试空间的指针,需要根据当前系统numa节点数量动态申请存储空间 +- size_t space_size; // 每个numa节点测试空间内存大小 ++ size_t mem_size; // 每个numa节点测试空间内存大小 + size_t block_size; // 每个测试块的大小,固定为64kByte +- uint64_t loop_total; // 测试循环总数,0 -- 无限测试 +- int64_t sleep_ms; // 每次读完一个数据块后的休眠时间,≤ 0则不休眠,继续下一个数据块的扫描 ++ uint64_t loop_cnt; // 测试循环总数,0 -- 无限测试 ++ int64_t delay_ms; // 每次读完一个数据块后的休眠时间,≤ 0则不休眠,继续下一个数据块的扫描 + int64_t rd_loop; // 每个扫描bit单次循环内的数据扫描次数,固定为0x80 + pthread_t* tc_core_threads; // 记录每个核的测试线程 + pthread_barrier_t tc_barrier; // 同步标志 +@@ -47,8 +47,9 @@ typedef struct tc_ring_one_config { + int tc_core_total; // 测试核总数 + int sys_core_total; // 系统核总数 + int scan_bit; // 扫描的bit +- int err_cnt; // 错误计数 +-} tc_ring_one_config_t; ++ int err_flag; // 错误计数 ++ int numa_node; //系统numa总数 ++}; + + #define TC_RING_ONE_BLOCK_SIZE 0x10000 + #define TC_RING_ONE_RD_LOOP 0x80 +@@ -64,7 +65,6 @@ typedef struct tc_ring_one_config { + #define TC_ERROR_HANDLE_SHUTDOWN 2 + #define TC_ERROR_HANDLE_REBOOT 3 + +-static tc_ring_one_config_t g_tc_config = { 0 }; + static const uint32_t tc_ring_one_pattern[32] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, +@@ -75,43 +75,14 @@ static const uint32_t tc_ring_one_pattern[32] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + }; ++ + static const uint32_t g_tc_ring_one_special_bits[] = {394, 397, 405, 343, 394, 393, 392, 377}; + + static void* tc_ring_one_thread_entry(void* arg); + +-static int get_system_online_core_total(void) ++static int get_numa_node(void) + { +- int core_num = 0; +- +- core_num = sysconf(_SC_NPROCESSORS_ONLN); +- core_num = (core_num > 0) ? core_num : 1; +- +- return core_num; +-} +- +-static int get_system_core_total(void) +-{ +- int core_num = 0; +- +- core_num = sysconf(_SC_NPROCESSORS_CONF); +- core_num = (core_num > 0) ? core_num : 1; +- +- return core_num; +-} +- +-static int get_core_id_by_thread(pthread_t *thread) +-{ +- int core_total = get_system_core_total(); +- pthread_t my_thread_id = pthread_self(); +- int core_id; +- +- for (core_id = 0; core_id < core_total; core_id++) { +- if (my_thread_id == thread[core_id]) { +- return core_id; +- } +- } +- +- return -1; ++ return numa_max_node() + 1; + } + + static int is_cpu_online(int core_id) +@@ -131,6 +102,7 @@ static int is_cpu_online(int core_id) + logging_error("Failed to open %s\n", online_file); + return 0; + } ++ + fscanf(fp, "%d", &online); + fclose(fp); + +@@ -170,12 +142,10 @@ static uintptr_t vaddr_to_phys(uintptr_t vaddr) + + static int get_numa_node_of_core(int core_id) + { +- int numa_node; ++ int numa_node = numa_node_of_cpu(core_id);; + +- numa_node = numa_node_of_cpu(core_id); + if (numa_node < 0) { + logging_error("[CORE%d] numa_node_of_cpu failed, errno:%d\n", core_id, errno); +- return 0; + } + + return numa_node; +@@ -183,21 +153,21 @@ static int get_numa_node_of_core(int core_id) + + /** + * 从指定 NUMA 节点优先分配内存,若失败则尝试其他节点 +- * @param preferred_node 优先分配的 NUMA 节点号 +- * @param size 需要分配的内存大小(字节) +- * @return 成功返回内存指针,失败返回 NULL ++ * @numa_node: 系统numa总数 ++ * @preferred_node: 优先分配的 NUMA 节点号 ++ * @size: 需要分配的内存大小(字节) ++ * @return: 成功返回内存指针,失败返回 NULL + */ +-static void *numa_alloc_fallback(int preferred_node, size_t size) ++static void *numa_alloc_fallback(int numa_node, int preferred_node, size_t size) + { + struct bitmask *allowed_nodes; + void *ptr = NULL; +- int max_node; + int node; + + // 1. 获取所有可用的 NUMA 节点 + allowed_nodes = numa_get_mems_allowed(); + if (!allowed_nodes) { +- logging_error("Failed to get allowed NUMA nodes"); ++ logging_error("Failed to get allowed NUMA nodes\n"); + return NULL; + } + +@@ -205,21 +175,20 @@ static void *numa_alloc_fallback(int preferred_node, size_t size) + if (numa_bitmask_isbitset(allowed_nodes, preferred_node)) { + ptr = numa_alloc_onnode(size, preferred_node); + if (ptr != NULL) { +- logging_debug("Allocated %#x bytes on NUMA assigned node %d, addr: %p \n", size, preferred_node, ptr, __LINE__); ++ logging_debug("Allocated %#x bytes on NUMA assigned node %d, addr: %p\n", size, preferred_node, ptr); + return ptr; + } + } + + // 3. 遍历所有节点(跳过优先节点) +- max_node = numa_max_node(); +- for (node = 0; node <= max_node; node++) { ++ for (node = 0; node < numa_node; node++) { + if (node == preferred_node || !numa_bitmask_isbitset(allowed_nodes, node)) { + continue; // 跳过优先节点或不允许的节点 + } + + ptr = numa_alloc_onnode(size, node); + if (ptr != NULL) { +- logging_debug("Allocated %#x bytes on NUMA other node %d, addr: %p \n", size, node, ptr, __LINE__); ++ logging_debug("Allocated %#x bytes on NUMA other node %d, addr: %p\n", size, node, ptr); + return ptr; + } + } +@@ -242,109 +211,107 @@ static void tc_ring_one_space_init(void *base, size_t size) + } + } + +-static int tc_ring_one_ctrl_var_init(tc_ring_one_config_t *config) ++static bool is_core_invalid(struct tc_ring_one_config *config, int core_id) ++{ ++ return config->black_list[core_id] || !is_cpu_online(core_id); ++} ++ ++static int tc_ring_one_ctrl_var_init(struct tc_ring_one_config *config) + { + pthread_mutexattr_t attr; +- int online_core_total; +- int system_core_total; +- int tc_core_total; +- int numa_node_num; ++ int valid_core_num = 0; + int i; + +- system_core_total = config->sys_core_total; +- online_core_total = get_system_online_core_total(); +- tc_core_total = online_core_total; +- numa_node_num = numa_max_node() + 1; +- +- for (i = 0; i < system_core_total; i++) { +- if (config->black_list[i] && is_cpu_online(i)) { +- // 跳过黑名单中的 CPU 核心 +- tc_core_total--; ++ for (i = 0; i < config->sys_core_total; i++) { ++ if (!is_core_invalid(config, i)) { ++ valid_core_num++; + } + } + + if (pthread_mutexattr_init(&attr) != 0) { +- logging_error("Failed to initialize mutex attribute"); ++ logging_error("Failed to initialize mutex attribute\n"); + return TC_RING_ONE_PRAMA_ERR; + } ++ + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +- config->tc_node_mutex = (pthread_mutex_t *)malloc(sizeof(pthread_mutex_t) * numa_node_num); ++ config->tc_node_mutex = (pthread_mutex_t *)calloc(config->numa_node, sizeof(pthread_mutex_t)); + if (config->tc_node_mutex == NULL) { +- logging_error("Failed to allocate memory for tc_mutex"); +- pthread_mutexattr_destroy(&attr); +- return TC_RING_ONE_PRAMA_ERR; ++ logging_error("Failed to allocate memory for tc_mutex\n"); ++ goto mutex_alloc_fail; + } +- for (i = 0; i < numa_node_num; i++) { ++ ++ for (i = 0; i < config->numa_node; i++) { + if (pthread_mutex_init(&(config->tc_node_mutex[i]), &attr) != 0) { +- logging_error("Failed to initialize mutex %d", i); +- free(config->tc_node_mutex); +- pthread_mutexattr_destroy(&attr); +- return TC_RING_ONE_PRAMA_ERR; ++ logging_error("Failed to initialize mutex %d\n", i); ++ goto mutex_init_fail; + } + } +- pthread_mutexattr_destroy(&attr); + +- config->tc_core_threads = (pthread_t *)malloc(sizeof(pthread_t) * system_core_total); ++ config->tc_core_threads = (pthread_t *)calloc(config->sys_core_total, sizeof(pthread_t)); + if (config->tc_core_threads == NULL) { +- logging_error("Failed to allocate memory for tc_core_threads"); +- for (i = 0; i < numa_node_num; i++) { +- pthread_mutex_destroy(&config->tc_node_mutex[i]); +- } +- free(config->tc_node_mutex); +- return TC_RING_ONE_PRAMA_ERR; ++ logging_error("Failed to allocate memory for tc_core_threads\n"); ++ goto mutex_init_fail; + } + +- config->sys_core_total = tc_core_total; +- pthread_barrier_init(&config->tc_barrier, NULL, tc_core_total); +- ++ pthread_mutexattr_destroy(&attr); ++ pthread_barrier_init(&config->tc_barrier, NULL, valid_core_num); + return TC_RING_ONE_SUCCESS; ++ ++mutex_init_fail: ++ for (i = i - 1; i >= 0; i--) { ++ pthread_mutex_destroy(&config->tc_node_mutex[i]); ++ } ++ ++ free(config->tc_node_mutex); ++ config->tc_node_mutex = NULL; ++ ++mutex_alloc_fail: ++ pthread_mutexattr_destroy(&attr); ++ return TC_RING_ONE_PRAMA_ERR; + } + +-static int tc_ring_one_init(tc_ring_one_config_t *config) ++static int tc_ring_one_init(struct tc_ring_one_config *config) + { +- int numa_node_num; +- int ret = 0; ++ int ret, i; + void *ptr; +- int i; + + if (numa_available() < 0) { +- logging_error("NUMA is not available on this system"); ++ logging_error("NUMA is not available on this system\n"); + return TC_RING_ONE_PRAMA_ERR; + } + + // 为每个 NUMA 节点分配测试内存空间 +- numa_node_num = numa_max_node() + 1; +- config->test_space_base = (void **)malloc(sizeof(void *) * numa_node_num); ++ config->test_space_base = (void **)calloc(config->numa_node, sizeof(void *)); + if (config->test_space_base == NULL) { +- logging_error("Failed to allocate memory for test_space_base"); ++ logging_error("Failed to allocate memory for test_space_base\n"); + return TC_RING_ONE_PRAMA_ERR; + } + +- for (i = 0; i < numa_node_num; i++) { +- ptr = numa_alloc_fallback(i, config->space_size); ++ for (i = 0; i < config->numa_node; i++) { ++ ptr = numa_alloc_fallback(config->numa_node, i, config->mem_size); + if (ptr == NULL) { + ret = TC_RING_ONE_PRAMA_ERR; + goto numa_alloc_fail; + } + +- tc_ring_one_space_init(ptr, config->space_size); ++ tc_ring_one_space_init(ptr, config->mem_size); + config->test_space_base[i] = ptr; + } + +- config->node_update_flag = (uint32_t *)malloc(sizeof(uint32_t) * numa_node_num); ++ config->node_update_flag = (uint32_t *)calloc(config->numa_node, sizeof(uint32_t)); + if (config->node_update_flag == NULL) { +- logging_error("Failed to allocate memory for node_update_flag"); ++ logging_error("Failed to allocate memory for node_update_flag\n"); + ret = TC_RING_ONE_PRAMA_ERR; + goto numa_alloc_fail; +- } else { +- for (i = 0; i < numa_node_num; i++) { ++ } ++ ++ for (i = 0; i < config->numa_node; i++) { + config->node_update_flag[i] = 0; +- } + } + + ret = tc_ring_one_ctrl_var_init(config); + if (ret != 0) { +- logging_error("tc_ring_one_ctrl_var_init fail ret:%d", ret); ++ logging_error("tc_ring_one_ctrl_var_init fail ret:%d\n", ret); + goto ctrl_var_init_fail; + } + +@@ -355,8 +322,8 @@ ctrl_var_init_fail: + config->node_update_flag = NULL; + + numa_alloc_fail: +- for (i = 0; i < numa_node_num; i++) { +- numa_free(config->test_space_base[i], config->space_size); ++ for (i = i - 1; i >= 0; i--) { ++ numa_free(config->test_space_base[i], config->mem_size); + } + + free(config->test_space_base); +@@ -364,41 +331,27 @@ numa_alloc_fail: + return ret; + } + +-static void tc_ring_one_release(tc_ring_one_config_t *config) ++static void tc_ring_one_release(struct tc_ring_one_config *config) + { +- int numa_node_num; + int node; + +- numa_node_num = numa_max_node() + 1; +- for (int i = 0; i < numa_node_num; i++) { +- numa_free(config->test_space_base[i], config->space_size); ++ for (int i = 0; i < config->numa_node; i++) { ++ numa_free(config->test_space_base[i], config->mem_size); + } ++ + free(config->test_space_base); + config->test_space_base = NULL; + free(config->node_update_flag); + config->node_update_flag = NULL; +- + pthread_barrier_destroy(&(config->tc_barrier)); +- for (node = 0; node < numa_node_num; node++) { ++ for (node = 0; node < config->numa_node; node++) { + pthread_mutex_destroy(&config->tc_node_mutex[node]); + } ++ + free(config->tc_node_mutex); + config->tc_node_mutex = NULL; + } + +-static int is_core_run_tc(tc_ring_one_config_t *config, int core_id) +-{ +- if (is_cpu_online(core_id) == 0) { +- return 0; +- } +- +- if ((config->black_list != NULL) && (config->black_list[core_id] == 1)) { +- return 0; +- } +- +- return 1; +-} +- + // 将线程绑定到指定 CORE + static int bind_thread_to_core(pthread_t thread, int core_id) + { +@@ -410,72 +363,133 @@ static int bind_thread_to_core(pthread_t thread, int core_id) + + ret = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + if (ret != 0) { +- logging_error("pthread_setaffinity_np failed"); ++ logging_error("pthread_setaffinity_np failed\n"); + ret = TC_RING_ONE_PRAMA_ERR; + } + + return ret; + } + +-int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, +- uint64_t err_handle, bool *blacklist, size_t core_num) ++static int tc_ring_one_exec(struct tc_ring_one_config *config) + { +- int ret = 0; ++ int ret; + int i; + +- g_tc_config.space_size = mem_size; +- g_tc_config.loop_total = loop_cnt; +- g_tc_config.sleep_ms = delay; +- g_tc_config.err_handle = err_handle; +- g_tc_config.black_list = blacklist; +- g_tc_config.sys_core_total = core_num; +- g_tc_config.block_size = TC_RING_ONE_BLOCK_SIZE; +- g_tc_config.rd_loop = TC_RING_ONE_RD_LOOP; +- +- ret = tc_ring_one_init(&g_tc_config); ++ ret = tc_ring_one_init(config); + if (ret != 0) { +- logging_error("tc_ring_one_init fail ret:%d", ret); ++ logging_error("tc_ring_one_init fail ret:%d\n", ret); + return ret; + } + +- for (i = 0; i < core_num; i++) { +- if (is_core_run_tc(&g_tc_config, i) == 0) { ++ for (i = 0; i < config->sys_core_total; i++) { ++ if (is_core_invalid(config, i)) { + // 跳过黑名单 & offline 的core + continue; + } +- ret = pthread_create(&g_tc_config.tc_core_threads[i], NULL, tc_ring_one_thread_entry, (void *)(&g_tc_config)); ++ ++ ret = pthread_create(&config->tc_core_threads[i], NULL, tc_ring_one_thread_entry, (void *)config); + if (ret != 0) { +- logging_error("Failed to create thread for core %d", i); +- tc_ring_one_release(&g_tc_config); +- free(g_tc_config.tc_core_threads); +- return ret; ++ logging_error("Failed to create thread for core %d\n", i); ++ goto pthread_create_fail; + } + +- ret = bind_thread_to_core(g_tc_config.tc_core_threads[i], i); ++ ret = bind_thread_to_core(config->tc_core_threads[i], i); + if (ret != 0) { +- logging_error("Failed to bind thread to core %d", i); +- tc_ring_one_release(&g_tc_config); +- free(g_tc_config.tc_core_threads); +- return ret; ++ logging_error("Failed to bind thread to core %d\n", i); ++ goto pthread_bind_fail; + } + } + + // 等待所有线程完成 +- for (i = 0; i < core_num; i++) { +- if (is_core_run_tc(&g_tc_config, i) == 0) { ++ for (i = 0; i < config->sys_core_total; i++) { ++ if (is_core_invalid(config, i)) { + // 跳过黑名单 & offline 的core + continue; + } +- pthread_join(g_tc_config.tc_core_threads[i], NULL); ++ ++ pthread_join(config->tc_core_threads[i], NULL); + } + +- tc_ring_one_release(&g_tc_config); +- free(g_tc_config.tc_core_threads); +- if (g_tc_config.err_cnt > 0) { +- return TC_RING_ONE_FAIL; ++ if (config->err_flag > 0) { ++ ret = TC_RING_ONE_FAIL; + } else { +- return TC_RING_ONE_SUCCESS; ++ ret = TC_RING_ONE_SUCCESS; ++ } ++ ++pthread_bind_fail: ++ pthread_cancel(config->tc_core_threads[i]); ++ pthread_join(config->tc_core_threads[i], NULL); ++ ++pthread_create_fail: ++ for (i = i - 1; i >= 0; i--) { ++ if (is_core_invalid(config, i)) { ++ continue; ++ } ++ ++ pthread_cancel(config->tc_core_threads[i]); ++ pthread_join(config->tc_core_threads[i], NULL); + } ++ ++ tc_ring_one_release(config); ++ free(config->tc_core_threads); ++ config->tc_core_threads = NULL; ++ return ret; ++} ++ ++void tc_ring_one_post_process(uint64_t err_handle, int result) ++{ ++ if (result == TC_RING_ONE_SUCCESS) { ++ logging_info("tc_ring_one test pass\n"); ++ } else if (result == TC_RING_ONE_FAIL) { ++ switch (err_handle) { // 根据错误处理策略进行相应的处理 ++ case TC_ERROR_HANDLE_NONE: ++ logging_error("the system administrator must handle this error!!!\n"); ++ break; ++ case TC_ERROR_HANDLE_SHUTDOWN: ++ logging_error("Execute 'shutdown'\n"); ++ if (reboot(RB_POWER_OFF) < 0) { ++ logging_error("ERROR: Failed to execute 'shutdown'\n"); ++ } ++ break; ++ case TC_ERROR_HANDLE_REBOOT: ++ logging_error("Execute 'reboot'\n"); ++ if (reboot(RB_AUTOBOOT) < 0) { ++ logging_error("ERROR: Failed to execute 'reboot'\n"); ++ } ++ break; ++ default: // panic ++ abort(); ++ break; ++ } ++ } else { ++ // 通过log打印 ++ logging_error("The system can not run the tc_ring_one:\n"); ++ logging_error("1. the system must support NUMA\n"); ++ logging_error("2. the memory in the system maybe too small\n"); ++ soc_ring_sentry_report(RESULT_LEVEL_SKIP, "The system can not run the tc_ring_one testcase"); ++ } ++} ++ ++int tc_ring_one_main(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, ++ uint64_t err_handle, bool *blacklist, size_t core_num) ++{ ++ struct tc_ring_one_config tc_config = { 0 }; ++ int ret; ++ ++ tc_config.mem_size = mem_size; ++ tc_config.loop_cnt = loop_cnt; ++ tc_config.delay_ms = delay; ++ tc_config.err_handle = err_handle; ++ tc_config.black_list = blacklist; ++ tc_config.sys_core_total = core_num; ++ tc_config.block_size = TC_RING_ONE_BLOCK_SIZE; ++ tc_config.rd_loop = TC_RING_ONE_RD_LOOP; ++ tc_config.numa_node = get_numa_node(); ++ ++ ret = tc_ring_one_exec(&tc_config); ++ tc_ring_one_post_process(err_handle, ret); ++ ++ return ret; + } + + static void tc_ring_one_init_data_pattern(uintptr_t base, size_t size, int scan_bit) +@@ -499,33 +513,40 @@ static void tc_ring_one_data_clear(uintptr_t base, size_t size, int scan_bit) + } + } + +-static void tc_ring_one_testspace_update(tc_ring_one_config_t *config, int scan_bit) ++static void tc_ring_one_testspace_update(struct tc_ring_one_config *config, int scan_bit) + { +- int core_id = get_core_id_by_thread(config->tc_core_threads); ++ int core_id = sched_getcpu(); + int numa_node = get_numa_node_of_core(core_id); + + pthread_barrier_wait(&config->tc_barrier); +- pthread_mutex_lock(&config->tc_node_mutex[numa_node]); +- if (config->node_update_flag[numa_node] == 0) { +- tc_ring_one_init_data_pattern((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); +- config->node_update_flag[numa_node] = 1; ++ if (numa_node >= 0) { ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 0) { ++ tc_ring_one_init_data_pattern((uintptr_t)config->test_space_base[numa_node], config->mem_size, scan_bit); ++ config->node_update_flag[numa_node] = 1; ++ } ++ ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); + } +- pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ + pthread_barrier_wait(&config->tc_barrier); + } + +-static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan_bit) ++static void tc_ring_one_testspace_recover(struct tc_ring_one_config *config, int scan_bit) + { +- uint32_t core_id = get_core_id_by_thread(config->tc_core_threads); ++ uint32_t core_id = sched_getcpu(); + int numa_node = get_numa_node_of_core(core_id); + + pthread_barrier_wait(&config->tc_barrier); +- pthread_mutex_lock(&config->tc_node_mutex[numa_node]); +- if (config->node_update_flag[numa_node] == 1) { +- tc_ring_one_data_clear((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); +- config->node_update_flag[numa_node] = 0; ++ if (numa_node >= 0) { ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 1) { ++ tc_ring_one_data_clear((uintptr_t)config->test_space_base[numa_node], config->mem_size, scan_bit); ++ config->node_update_flag[numa_node] = 0; ++ } ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); + } +- pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ + pthread_barrier_wait(&config->tc_barrier); + } + +@@ -534,8 +555,7 @@ static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan + * @base_addr: 测试空间中指定测试块首地址(必须为128B对齐地址) + * @scan_bit: 待测试bit位置(0-511) + * @block_size: 待测试块空间大小 +- * @loop: 测试循环 +- * @err_cnt: 记录错误次数的内存地址 ++ * @loop_cnt: 巡检次数 + * + * 在调用本函数之前,要保证测试空间已经被待验证的数据Pattern初始化。 + * 待验证的数据Pattern: +@@ -544,108 +564,101 @@ static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan + * + * 注意: 调用者要保证 base_addr + block_size 不能超过测试空间长度 + */ +-static void tc_ring_one_scan_test_block(uintptr_t base_addr, int scan_bit, size_t block_size, int *err_cnt) ++static bool tc_ring_one_scan_test_block(uintptr_t base_addr, int scan_bit, size_t block_size, uint64_t loop_cnt) + { + uint64_t tgt_dat_pattern = (1ULL << (scan_bit & 0x3F)); + size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 Byte) + uint64_t tgt_dat_all_one = ~0x0ULL; +- char json_result[2048]; + char err_msg[1024]; + uint64_t rd_data[2]; +- int i; ++ size_t i; + + for (i = 0; i < block_size; i += TC_RING_ONE_DATA_UNIT) { + rd_data[0] = *((uint64_t *)(base_addr + i + word_offset)); // base_addr + n * 64 + word_offset + rd_data[1] = *((uint64_t *)(base_addr + i + TC_RING_ONE_CACHELINE_SIZE + word_offset)); // base_addr + (n + 1) * 64 + word_offset + if((rd_data[0] != tgt_dat_all_one) || (rd_data[1] != tgt_dat_pattern)) { +- __atomic_add_fetch(err_cnt, 1, __ATOMIC_SEQ_CST); // 错误次数加1 +- snprintf(err_msg, sizeof(err_msg), "[ERROR][CORE%d] vaddr = %#lx paddr = %#lx read_data = %#llx read_disturb = %#llx target_data = " +- "%#llx bit_index = %d offset = %#lx block_size = %#lx\n", +- sched_getcpu(), +- (base_addr + i + word_offset), +- vaddr_to_phys(base_addr + i + word_offset), +- rd_data[1], +- rd_data[0], +- (1ULL << (scan_bit & 0x3F)), +- scan_bit, +- word_offset, +- block_size); +- logging_error("%s", err_msg); +- snprintf(json_result, sizeof(json_result), "{\"msg\":\"%s\", \"code\":2001}", err_msg); +- report_result(TOOL_NAME, RESULT_LEVEL_MAJOR_ALM, json_result); +- break; ++ snprintf(err_msg, sizeof(err_msg), "[ERROR][CORE%d] test loop %lu vaddr = %#lx paddr = %#lx read_data = %#llx read_disturb = %#llx " ++ "target_data = %#llx bit_index = %d offset = %#lx block_size = %#lx", ++ sched_getcpu(), loop_cnt, (base_addr + i + word_offset), vaddr_to_phys(base_addr + i + word_offset), rd_data[1], rd_data[0], ++ tgt_dat_pattern, scan_bit, word_offset, block_size); ++ soc_ring_sentry_report(RESULT_LEVEL_MAJOR_ALM, err_msg); ++ return false; + } + } ++ ++ return true; + } + +-static void tc_ring_one_scan_bit(tc_ring_one_config_t *config, uint64_t loop) ++static bool tc_ring_one_scan_bit(struct tc_ring_one_config *config, int scan_bit, uint64_t loop_cnt) + { + int core_id = sched_getcpu(); +- int numa_node; ++ int numa_node = get_numa_node_of_core(core_id);; + uint32_t i, j; + +- numa_node = get_numa_node_of_core(core_id); ++ // numa为无效值时直接返回,不再执行扫描测试 ++ if (numa_node < 0) { ++ return true; ++ } ++ + for (i = 0; i < config->rd_loop; i++) { +- for (j = 0; j < config->space_size; j += config->block_size) { ++ for (j = 0; j < config->mem_size; j += config->block_size) { + // 按测试块大小,扫描测试空间 +- tc_ring_one_scan_test_block((uintptr_t)((char *)(config->test_space_base[numa_node])) + j, +- config->scan_bit, +- config->block_size, +- &(config->err_cnt)); +- +- if (config->err_cnt > 0) { +- logging_error("[ERROR][CORE%d] dbls_scan_bit error, scan_bit = %d, err_cnt = %d vir_base_addr = %p " +- "phy_base_addr = %p block = %ld rd_loop = %d loop = %d\n", +- sched_getcpu(), +- config->scan_bit, +- config->err_cnt, +- config->test_space_base[numa_node], +- vaddr_to_phys((uintptr_t)(config->test_space_base[numa_node])), +- j / config->block_size, +- i, loop); +- return; ++ if (!tc_ring_one_scan_test_block((uintptr_t)((char *)(config->test_space_base[numa_node])) + j, ++ scan_bit, config->block_size, loop_cnt)) { ++ __atomic_add_fetch(&config->err_flag, 1, __ATOMIC_SEQ_CST); ++ return false; + } + +- if (config->sleep_ms > 0) { ++ if (config->delay_ms > 0) { + // 每扫描完一个测试块就休眠一段时间 +- usleep(config->sleep_ms * 1000); ++ usleep(config->delay_ms * 1000); + } + } + } ++ ++ return true; ++} ++ ++static bool tc_ring_one_test_issue(struct tc_ring_one_config *config, int scan_bit, int loop_cnt) ++{ ++ bool ret; ++ ++ tc_ring_one_testspace_update(config, scan_bit); ++ ret = tc_ring_one_scan_bit(config, scan_bit, loop_cnt); ++ tc_ring_one_testspace_recover(config, scan_bit); ++ ++ if (config->err_flag) { ++ return false; ++ } ++ ++ return ret; + } + + static void* tc_ring_one_thread_entry(void *arg) + { +- tc_ring_one_config_t *config = arg; ++ struct tc_ring_one_config *config = arg; + int scan_sequence_id = 0; + int scan_special_id = 0; + uint64_t loop_cnt = 0; + uint64_t tc_flag = 1; + +- // 等待所有线程准备就绪 +- pthread_barrier_wait(&config->tc_barrier); +- +- while (tc_flag == 1) { ++ while (tc_flag == 1 && config->err_flag == 0) { + config->scan_bit = scan_sequence_id; +- tc_ring_one_testspace_update(config, scan_sequence_id); +- tc_ring_one_scan_bit(config, loop_cnt); +- tc_ring_one_testspace_recover(config, scan_sequence_id); +- if (config->err_cnt > 0) { ++ pthread_barrier_wait(&config->tc_barrier); ++ if (!tc_ring_one_test_issue(config, config->scan_bit, loop_cnt)) { + break; + } +- scan_sequence_id = (scan_sequence_id + 1) % (TC_RING_ONE_CACHELINE_SIZE * 8); + +- scan_special_id = g_tc_ring_one_special_bits[rand() % 8]; ++ scan_sequence_id = (scan_sequence_id + 1) % (TC_RING_ONE_CACHELINE_SIZE * 8); ++ scan_special_id = g_tc_ring_one_special_bits[random() % 8]; + config->scan_bit = scan_special_id; + pthread_barrier_wait(&config->tc_barrier); +- tc_ring_one_testspace_update(config, config->scan_bit); +- tc_ring_one_scan_bit(config, loop_cnt); +- tc_ring_one_testspace_recover(config, config->scan_bit); +- if (config->err_cnt > 0) { ++ if (!tc_ring_one_test_issue(config, config->scan_bit, loop_cnt)) { + break; + } ++ + loop_cnt++; +- if ((config->loop_total != 0) && (loop_cnt >= config->loop_total)) { ++ if ((config->loop_cnt != 0) && (loop_cnt >= config->loop_cnt)) { + tc_flag = 0; + } + } +@@ -653,37 +666,4 @@ static void* tc_ring_one_thread_entry(void *arg) + return config; + } + +-void tc_ring_one_post_process(int result) +-{ +- if (result == TC_RING_ONE_SUCCESS) { +- logging_info("tc_ring_one test pass\n"); +- } else if (result == TC_RING_ONE_PRAMA_ERR) { +- // 通过log打印 +- logging_error("The system can not run the tc_ring_one:\n"); +- logging_error("1. the system must support NUMA\n"); +- logging_error("2. the memory in the system maybe too small\n"); +- report_result(TOOL_NAME, RESULT_LEVEL_FAIL, "{\"msg\":\"The system can not run the tc_ring_one testcase\", \"code\":1001}"); +- } else if (result == TC_RING_ONE_FAIL) { +- switch (g_tc_config.err_handle) { // 根据错误处理策略进行相应的处理 +- case TC_ERROR_HANDLE_NONE: +- logging_error("the system administrator must handle this error!!!\n"); +- break; +- case TC_ERROR_HANDLE_SHUTDOWN: +- logging_error("Execute 'shutdown'\n"); +- if (reboot(RB_POWER_OFF) < 0) { +- logging_error("ERROR: Failed to execute 'shutdown'\n"); +- } +- break; +- case TC_ERROR_HANDLE_REBOOT: +- logging_error("Execute 'reboot'\n"); +- if (reboot(RB_AUTOBOOT) < 0) { +- logging_error("ERROR: Failed to execute 'reboot'\n"); +- } +- break; +- default: // panic +- abort(); +- } +- } +-} +- + // end of tc_ring_one.c +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +index 015a5b3..d5a25ee 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -12,7 +12,7 @@ + + + /** +- * tc_ring_one_create_threads - ++ * tc_ring_one_main - + * 测试用例总入口,该函数会为测试申请测试内存空间,为每个测试核创建测试线程,并将测试线程调度到测试核上 + * @mem_size: 用户指定的测试内存空间大小,系统每个numa节点均提供对用大小的测试空间,用于巡检测试 + * @loop_cnt: 测试循环次数, 大于0,则按照对应的循环做巡检测试,等于0,则巡检线程持续驻留在测试核中 +@@ -32,7 +32,6 @@ + * -2 - 检出到数据错误 + * + */ +-int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); +-void tc_ring_one_post_process(int result); ++int tc_ring_one_main(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); + + #endif/*__TC_RING_ONE_H__*/ +\ No newline at end of file +-- +2.33.0 +