From 8f3e47c68be5838bae43e762d04af4db7f7a4aa8 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 1 Dec 2025 16:27:03 +0800 Subject: [PATCH 01/10] scsi: libsas: reset the negotiated_linkrate when phy is down hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- If the device is unplugged or disconnected, the negotiated_linkrate still can be seen from the userspace by sysfs. This makes people confused and leaks information of the device last used. So let's reset the negotiated_linkrate after the phy is down. Signed-off-by: Jason Yan CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_expander.c | 2 ++ include/scsi/libsas.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index a901f2f79029..b27d39f56d1a 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1931,6 +1931,8 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent, } phy->port = NULL; } + if (phy->phy) + phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN; } static int sas_discover_bfs_by_root_level(struct domain_device *root, diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 698f2032807b..99228996991a 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -434,6 +434,9 @@ static inline void sas_phy_disconnected(struct asd_sas_phy *phy) { phy->oob_mode = OOB_NOT_CONNECTED; phy->linkrate = SAS_LINK_RATE_UNKNOWN; + + if (phy->phy) + phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN; } static inline unsigned int to_sas_gpio_od(int device, int bit) -- Gitee From 465e50d0bc852a90307143fec13b4d32aa6a74d0 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 1 Dec 2025 16:27:04 +0800 Subject: [PATCH 02/10] scsi: libsas: optimize the debug print of the revalidate process hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No sas_rediscover() returns error code if discover failed for a expander phy. And sas_ex_revalidate_domain() only returns the last phy's error code. So when sas_revalidate_domain() prints the return value of the discover process, we do not know if the revalidation for every phy is successful or not. We just know the last bcast phy revalidation succeeded or not. No need to return a error code for sas_ex_revalidate_domain() and sas_rediscover(), and just print the debug log for each bcast phy directly in sas_rediscover(). Signed-off-by: Jason Yan CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_discover.c | 7 +++---- drivers/scsi/libsas/sas_expander.c | 11 ++++++----- include/scsi/libsas.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index f2ad4c5eadc3..4932f63b69bb 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -518,7 +518,6 @@ static void sas_discover_domain(struct work_struct *work) static void sas_revalidate_domain(struct work_struct *work) { - int res = 0; struct sas_discovery_event *ev = to_sas_discovery_event(work); struct asd_sas_port *port = ev->port; struct sas_ha_struct *ha = port->ha; @@ -538,10 +537,10 @@ static void sas_revalidate_domain(struct work_struct *work) task_pid_nr(current)); if (ddev && dev_is_expander(ddev->dev_type)) - res = sas_ex_revalidate_domain(ddev); + sas_ex_revalidate_domain(ddev); - pr_debug("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n", - port->id, task_pid_nr(current), res); + pr_debug("done REVALIDATING DOMAIN on port %d, pid:%d\n", + port->id, task_pid_nr(current)); out: mutex_unlock(&ha->disco_mutex); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index b27d39f56d1a..1b6230c646a1 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2112,7 +2112,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, * first phy,for other phys in this port, we add it to the port to * forming the wide-port. */ -static int sas_rediscover(struct domain_device *dev, const int phy_id) +static void sas_rediscover(struct domain_device *dev, const int phy_id) { struct expander_device *ex = &dev->ex_dev; struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; @@ -2138,7 +2138,9 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) res = sas_rediscover_dev(dev, phy_id, last, i); } else res = sas_discover_new(dev, phy_id); - return res; + + pr_debug("ex %016llx phy%d discover returned 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, res); } /** @@ -2150,7 +2152,7 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id) * Discover process only interrogates devices in order to discover the * domain. */ -int sas_ex_revalidate_domain(struct domain_device *port_dev) +void sas_ex_revalidate_domain(struct domain_device *port_dev) { int res; struct domain_device *dev = NULL; @@ -2165,11 +2167,10 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) res = sas_find_bcast_phy(dev, &phy_id, i, true); if (phy_id == -1) break; - res = sas_rediscover(dev, phy_id); + sas_rediscover(dev, phy_id); i = phy_id + 1; } while (i < ex->num_phys); } - return res; } void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 99228996991a..d97624e77fb1 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -684,7 +684,7 @@ int sas_discover_root_expander(struct domain_device *); void sas_init_ex_attr(void); -int sas_ex_revalidate_domain(struct domain_device *); +void sas_ex_revalidate_domain(struct domain_device *); void sas_unregister_domain_devices(struct asd_sas_port *port, int gone); void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *); -- Gitee From 994fd405eeaa9c2285373c1eaf50daea7860fffd Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 1 Dec 2025 16:27:05 +0800 Subject: [PATCH 03/10] scsi: libsas: split the replacement of sas disks in two steps hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No Now if a new device replaced a old device, the sas address will change. We unregister the old device and discover the new device in one revalidation process. But after we deferred the sas_port_delete(), the sas port is not deleted when we registering the new port and device. The sas port cannot be added because the name of the new port is the same as the old. Fix this by doing the replacement in two steps. The first revalidation only delete the old device and trigger a new revalidation. The second revalidation discover the new device. To keep the event processing synchronised to the original event, we wrapped a loop and added a new parameter to see if we should revalidate again. Signed-off-by: Jason Yan CC: chenxiang CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_discover.c | 21 ++++++++++++++++----- drivers/scsi/libsas/sas_expander.c | 22 ++++++++++++++-------- include/scsi/libsas.h | 2 +- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 4932f63b69bb..0d524d901be3 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -516,12 +516,10 @@ static void sas_discover_domain(struct work_struct *work) task_pid_nr(current), error); } -static void sas_revalidate_domain(struct work_struct *work) +static void sas_do_revalidate_domain(struct asd_sas_port *port, bool *retry) { - struct sas_discovery_event *ev = to_sas_discovery_event(work); - struct asd_sas_port *port = ev->port; - struct sas_ha_struct *ha = port->ha; struct domain_device *ddev = port->port_dev; + struct sas_ha_struct *ha = port->ha; /* prevent revalidation from finding sata links in recovery */ mutex_lock(&ha->disco_mutex); @@ -537,7 +535,8 @@ static void sas_revalidate_domain(struct work_struct *work) task_pid_nr(current)); if (ddev && dev_is_expander(ddev->dev_type)) - sas_ex_revalidate_domain(ddev); + sas_ex_revalidate_domain(ddev, retry); + pr_debug("done REVALIDATING DOMAIN on port %d, pid:%d\n", port->id, task_pid_nr(current)); @@ -549,6 +548,18 @@ static void sas_revalidate_domain(struct work_struct *work) sas_probe_devices(port); } +static void sas_revalidate_domain(struct work_struct *work) +{ + struct sas_discovery_event *ev = to_sas_discovery_event(work); + struct asd_sas_port *port = ev->port; + bool retry; + + do { + retry = false; + sas_do_revalidate_domain(port, &retry); + } while (retry); +} + /* ---------- Events ---------- */ static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 1b6230c646a1..a7fd97ae01c3 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2019,8 +2019,8 @@ static bool dev_type_flutter(enum sas_device_type new, enum sas_device_type old) return false; } -static int sas_rediscover_dev(struct domain_device *dev, int phy_id, - bool last, int sibling) +static int sas_unregister(struct domain_device *dev, int phy_id, bool last, + bool *retry, int sibling) { struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; @@ -2033,7 +2033,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, if (!last) sprintf(msg, ", part of a wide port with phy%02d", sibling); - pr_debug("ex %016llx rediscovering phy%02d%s\n", + pr_debug("ex %016llx unregistering phy%02d%s\n", SAS_ADDR(dev->sas_addr), phy_id, msg); memset(sas_addr, 0, SAS_ADDR_SIZE); @@ -2092,7 +2092,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, SAS_ADDR(phy->attached_sas_addr)); sas_unregister_devs_sas_addr(dev, phy_id, last); - res = sas_discover_new(dev, phy_id); + /* force the next revalidation find this phy and bring it up */ + phy->phy_change_count = -1; + ex->ex_change_count = -1; + *retry = true; + res = 0; out_free_resp: kfree(disc_resp); return res; @@ -2112,7 +2116,8 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, * first phy,for other phys in this port, we add it to the port to * forming the wide-port. */ -static void sas_rediscover(struct domain_device *dev, const int phy_id) +static void sas_rediscover(struct domain_device *dev, const int phy_id, + bool *retry) { struct expander_device *ex = &dev->ex_dev; struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; @@ -2135,7 +2140,7 @@ static void sas_rediscover(struct domain_device *dev, const int phy_id) break; } } - res = sas_rediscover_dev(dev, phy_id, last, i); + res = sas_unregister(dev, phy_id, last, retry, i); } else res = sas_discover_new(dev, phy_id); @@ -2146,13 +2151,14 @@ static void sas_rediscover(struct domain_device *dev, const int phy_id) /** * sas_ex_revalidate_domain - revalidate the domain * @port_dev: port domain device. + * @retry: do we need to revalidate again * * NOTE: this process _must_ quit (return) as soon as any connection * errors are encountered. Connection recovery is done elsewhere. * Discover process only interrogates devices in order to discover the * domain. */ -void sas_ex_revalidate_domain(struct domain_device *port_dev) +void sas_ex_revalidate_domain(struct domain_device *port_dev, bool *retry) { int res; struct domain_device *dev = NULL; @@ -2167,7 +2173,7 @@ void sas_ex_revalidate_domain(struct domain_device *port_dev) res = sas_find_bcast_phy(dev, &phy_id, i, true); if (phy_id == -1) break; - sas_rediscover(dev, phy_id); + sas_rediscover(dev, phy_id, retry); i = phy_id + 1; } while (i < ex->num_phys); } diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index d97624e77fb1..b7bb9d493800 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -684,7 +684,7 @@ int sas_discover_root_expander(struct domain_device *); void sas_init_ex_attr(void); -void sas_ex_revalidate_domain(struct domain_device *); +void sas_ex_revalidate_domain(struct domain_device *port_dev, bool *retry); void sas_unregister_domain_devices(struct asd_sas_port *port, int gone); void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *); -- Gitee From 52b312b70ba47a9828242b430c65d50fb1377d97 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 1 Dec 2025 16:27:06 +0800 Subject: [PATCH 04/10] scsi: libsas: check if the same device when flutter hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No The ata device do not have a real sas address. If a ata device is replaced with another one, the sas address is the same. Now libsas treat this scenario as flutter and do not delete the old one and discover the new one. This will cause the data read from or write to the wrong device. And also when hotplugging a sata device, libsas entered to the flutter case and sometimes found the phy attached address is abnormal. The log is like this: sas: ex 500e004aaaaaaa1f phy6 originated BROADCAST(CHANGE) sas: ex 500e004aaaaaaa1f phy06:U:0 attached: 0000000000000000 (no device) sas: ex 500e004aaaaaaa1f phy 0x6 broadcast flutter Fix this issue by checking the phy attached address and the ata device's class and id if they are the same as the origin. The ata class and id is readed in ata EH process. When ata EH is scheduled, revalidate will be deferred and a new bcast will be raised. Tested-by: Chen Liangfei Signed-off-by: Jason Yan Reviewed-by: John Garry CC: chenxiang CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Tejun Heo CC: Hannes Reinecke Signed-off-by: Li Lingfeng --- drivers/ata/libata-core.c | 3 +- drivers/scsi/libsas/sas_ata.c | 18 ++++++++ drivers/scsi/libsas/sas_expander.c | 67 +++++++++++++++++++++++++----- include/linux/libata.h | 2 + include/scsi/libsas.h | 1 + 5 files changed, 80 insertions(+), 11 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 107c28ec23b8..11c91d7231aa 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3628,7 +3628,7 @@ EXPORT_SYMBOL_GPL(ata_std_postreset); * RETURNS: * 1 if @dev matches @new_class and @new_id, 0 otherwise. */ -static int ata_dev_same_device(struct ata_device *dev, unsigned int new_class, +int ata_dev_same_device(struct ata_device *dev, unsigned int new_class, const u16 *new_id) { const u16 *old_id = dev->id; @@ -3660,6 +3660,7 @@ static int ata_dev_same_device(struct ata_device *dev, unsigned int new_class, return 1; } +EXPORT_SYMBOL_GPL(ata_dev_same_device); /** * ata_dev_reread_id - Re-read IDENTIFY data diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index e2445d5d41ca..88cb015fcc70 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -651,6 +651,22 @@ void sas_ata_task_abort(struct sas_task *task) complete(waiting); } +static void sas_ata_store_id(struct domain_device *dev) +{ + struct ata_device *ata_dev = sas_to_ata_dev(dev); + unsigned char model[ATA_ID_PROD_LEN + 1]; + unsigned char serial[ATA_ID_SERNO_LEN + 1]; + + /* store the ata device's class and id */ + memcpy(dev->sata_dev.id, ata_dev->id, ATA_ID_WORDS); + dev->sata_dev.class = ata_dev->class; + + ata_id_c_string(ata_dev->id, model, ATA_ID_PROD, sizeof(model)); + ata_id_c_string(ata_dev->id, serial, ATA_ID_SERNO, sizeof(serial)); + + sas_ata_printk(KERN_INFO, dev, "model:%s serial:%s\n", model, serial); +} + static int sas_get_ata_command_set(struct domain_device *dev) { struct dev_to_host_fis *fis = @@ -689,6 +705,8 @@ void sas_probe_sata(struct asd_sas_port *port) */ if (!ata_dev_enabled(sas_to_ata_dev(dev))) sas_fail_probe(dev, __func__, -ENODEV); + else + sas_ata_store_id(dev); } } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index a7fd97ae01c3..320778178aca 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2019,6 +2019,61 @@ static bool dev_type_flutter(enum sas_device_type new, enum sas_device_type old) return false; } +/* + * we think the device is fluttering so just read the phy state and update + * some information of the device, but if some important things changed + * such as the sas address, or the linkrate, or the ata devices id and class, + * we have to unregister the device and re-probe it. + */ +static bool sas_process_flutter(struct domain_device *dev, struct ex_phy *phy, + int phy_id, u8 *sas_addr) +{ + struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); + enum sas_linkrate linkrate = phy->linkrate; + char *action = ""; + + sas_ex_phy_discover(dev, phy_id); + + if (ata_dev && phy->attached_dev_type == SAS_SATA_PENDING) + action = ", needs recovery"; + pr_debug("ex %016llx phy%d broadcast flutter%s\n", + SAS_ADDR(dev->sas_addr), phy_id, action); + + if (linkrate != phy->linkrate) { + pr_debug("ex %016llx phy%d linkrate changed from %d to %d\n", + SAS_ADDR(dev->sas_addr), phy_id, + linkrate, phy->linkrate); + return false; + } + + /* the phy attached address will be updated by sas_ex_phy_discover() + * and sometimes become abnormal + */ + if (SAS_ADDR(phy->attached_sas_addr) != SAS_ADDR(sas_addr) || + SAS_ADDR(phy->attached_sas_addr) == 0) { + /* if attached_sas_addr become abnormal, we must set the + * original address back so that the device can be unregistered + */ + memcpy(phy->attached_sas_addr, sas_addr, SAS_ADDR_SIZE); + pr_debug("phy address(%016llx) abnormal, origin:%016llx\n", + SAS_ADDR(phy->attached_sas_addr), + SAS_ADDR(sas_addr)); + return false; + } + + if (ata_dev) { + struct ata_device *adev = sas_to_ata_dev(ata_dev); + unsigned int class = ata_dev->sata_dev.class; + u16 *id = ata_dev->sata_dev.id; + + /* to see if the disk is replaced with another one */ + if (!ata_dev_same_device(adev, class, id)) + return false; + } + + return true; +} + static int sas_unregister(struct domain_device *dev, int phy_id, bool last, bool *retry, int sibling) { @@ -2074,16 +2129,8 @@ static int sas_unregister(struct domain_device *dev, int phy_id, bool last, goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { - struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id); - char *action = ""; - - sas_ex_phy_discover(dev, phy_id); - - if (ata_dev && phy->attached_dev_type == SAS_SATA_PENDING) - action = ", needs recovery"; - pr_debug("ex %016llx phy%02d broadcast flutter%s\n", - SAS_ADDR(dev->sas_addr), phy_id, action); - goto out_free_resp; + if (sas_process_flutter(dev, phy, phy_id, sas_addr)) + goto out_free_resp; } /* we always have to delete the old device when we went here */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 1ceec830d5f7..8184a0cf155d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1110,6 +1110,8 @@ extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); extern bool ata_link_offline(struct ata_link *link); +extern int ata_dev_same_device(struct ata_device *dev, unsigned int new_class, + const u16 *new_id); #ifdef CONFIG_PM extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index b7bb9d493800..799574dbb737 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -147,6 +147,7 @@ struct sata_device { struct ata_host *ata_host; struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */ u8 fis[ATA_RESP_FIS_SIZE]; + u16 id[ATA_ID_WORDS]; }; struct ssp_device { -- Gitee From 16e9dc28061f3194d1327a9b8c0db60e5a59a88a Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 1 Dec 2025 16:27:07 +0800 Subject: [PATCH 05/10] scsi: libsas: reset the phy address if discover failed hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No When we failed to discover the device, the phy address is still kept in ex_phy. So when the next time we revalidate this phy the address and device type is the same, it will be considered as flutter and will not be discovered again. So the device will not be brought up. Fix this by reset the phy address to the initial value. Then in the next revalidation the device will be discovered agian. Tested-by: Chen Liangfei Signed-off-by: Jason Yan CC: Xiaofei Tan CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Reviewed-by: John Garry Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_expander.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 320778178aca..74cc362a11ad 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1126,9 +1126,36 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) break; } - if (!child) + if (child) { + int i; + + for (i = 0; i < ex->num_phys; i++) { + if (ex->ex_phy[i].phy_state == PHY_VACANT || + ex->ex_phy[i].phy_state == PHY_NOT_PRESENT) + continue; + /* + * Due to races, the phy might not get added to the + * wide port, so we add the phy to the wide port here. + */ + if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == + SAS_ADDR(child->sas_addr)) { + ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; + if (sas_ex_join_wide_port(dev, i)) + pr_debug("Attaching ex phy%02d to wide port %016llx\n", + i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); + } + } + } else { pr_notice("ex %016llx phy%02d failed to discover\n", SAS_ADDR(dev->sas_addr), phy_id); + /* if we failed to discover this device, we have to + * reset the expander phy attached address so that we + * will not treat the phy as flutter in the next + * revalidation + */ + memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + } + return res; } -- Gitee From ad93567ef9e10540b2428ded6fae5d075de6db89 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 1 Dec 2025 16:27:08 +0800 Subject: [PATCH 06/10] scsi: libsas: fix issue of swapping two sas disks hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No The work flow of revalidation now is scanning expander phy by the sequence of the phy and check if the phy have changed. This will leads to an issue of swapping two sas disks on one expander. Assume we have two sas disks, connected with expander phy10 and phy11: phy10: 5000cca04eb1001d port-0:0:10 phy11: 5000cca04eb043ad port-0:0:11 Swap these two disks, and imaging the following scenario: revalidation 1: -->phy10: 0 --> delete phy10 domain device -->phy11: 5000cca04eb043ad (no change) revalidation done revalidation 2: -->step 1, check phy10: -->phy10: 5000cca04eb043ad --> add to wide port(port-0:0:11) (phy11 address is still 5000cca04eb043ad now) -->step 2, check phy11: -->phy11: 0 --> phy11 address is 0 now, but it's part of wide port(port-0:0:11), the domain device will not be deleted. revalidation done revalidation 3: -->phy10, 5000cca04eb043ad (no change) -->phy11: 5000cca04eb1001d --> try to add port-0:0:11 but failed, port-0:0:11 already exist, trigger a warning as follows revalidation done [14790.189699] sysfs: cannot create duplicate filename '/devices/pci0000:74/0000:74:02.0/host0/port-0:0/expander-0:0/port-0:0:11' [14790.201081] CPU: 25 PID: 5031 Comm: kworker/u192:3 Not tainted 4.16.0-rc1-191134-g138f084-dirty #228 [14790.210199] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 EC UEFI Nemo 2.0 RC0 - B303 05/16/2018 [14790.219323] Workqueue: 0000:74:02.0_disco_q sas_revalidate_domain [14790.225404] Call trace: [14790.227842] dump_backtrace+0x0/0x18c [14790.231492] show_stack+0x14/0x1c [14790.234798] dump_stack+0x88/0xac [14790.238101] sysfs_warn_dup+0x64/0x7c [14790.241751] sysfs_create_dir_ns+0x90/0xa0 [14790.245835] kobject_add_internal+0xa0/0x284 [14790.250092] kobject_add+0xb8/0x11c [14790.253570] device_add+0xe8/0x598 [14790.256960] sas_port_add+0x24/0x50 [14790.260436] sas_ex_discover_devices+0xb10/0xc30 [14790.265040] sas_ex_revalidate_domain+0x1d8/0x518 [14790.269731] sas_revalidate_domain+0x12c/0x154 [14790.274163] process_one_work+0x128/0x2b0 [14790.278160] worker_thread+0x14c/0x408 [14790.281897] kthread+0xfc/0x128 [14790.285026] ret_from_fork+0x10/0x18 [14790.288598] ------------[ cut here ]------------ At last, the disk 5000cca04eb1001d is lost. The basic idea of fix this issue is to let the revalidation first scan all phys, and then unregisterring devices. Only when no devices need to be unregisterred, go to the next step to discover new devices. If there are devices need unregister, unregister those devices and tell the revalidation event processor to retry again. The next revalidation will process the discovering of the new devices. Tested-by: Chen Liangfei Signed-off-by: Jason Yan CC: Xiaofei Tan CC: chenxiang CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_expander.c | 153 +++++++++++++++++++---------- 1 file changed, 102 insertions(+), 51 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 74cc362a11ad..f08da82e699b 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2101,22 +2101,18 @@ static bool sas_process_flutter(struct domain_device *dev, struct ex_phy *phy, return true; } -static int sas_unregister(struct domain_device *dev, int phy_id, bool last, - bool *retry, int sibling) +static int sas_ex_unregister(struct domain_device *dev, int phy_id, bool last, + bool *retry) { struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; enum sas_device_type type = SAS_PHY_UNUSED; struct smp_disc_resp *disc_resp; u8 sas_addr[SAS_ADDR_SIZE]; - char msg[80] = ""; int res; - if (!last) - sprintf(msg, ", part of a wide port with phy%02d", sibling); - - pr_debug("ex %016llx unregistering phy%02d%s\n", - SAS_ADDR(dev->sas_addr), phy_id, msg); + pr_debug("ex %016llx unregistering phy%02d\n", + SAS_ADDR(dev->sas_addr), phy_id); memset(sas_addr, 0, SAS_ADDR_SIZE); disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); @@ -2176,21 +2172,7 @@ static int sas_unregister(struct domain_device *dev, int phy_id, bool last, return res; } -/** - * sas_rediscover - revalidate the domain. - * @dev:domain device to be detect. - * @phy_id: the phy id will be detected. - * - * NOTE: this process _must_ quit (return) as soon as any connection - * errors are encountered. Connection recovery is done elsewhere. - * Discover process only interrogates devices in order to discover the - * domain.For plugging out, we un-register the device only when it is - * the last phy in the port, for other phys in this port, we just delete it - * from the port.For inserting, we do discovery when it is the - * first phy,for other phys in this port, we add it to the port to - * forming the wide-port. - */ -static void sas_rediscover(struct domain_device *dev, const int phy_id, +static void sas_ex_unregister_device(struct domain_device *dev, const int phy_id, bool *retry) { struct expander_device *ex = &dev->ex_dev; @@ -2199,29 +2181,70 @@ static void sas_rediscover(struct domain_device *dev, const int phy_id, int i; bool last = true; /* is this the last phy of the port */ - pr_debug("ex %016llx phy%02d originated BROADCAST(CHANGE)\n", - SAS_ADDR(dev->sas_addr), phy_id); - - if (SAS_ADDR(changed_phy->attached_sas_addr) != 0) { - for (i = 0; i < ex->num_phys; i++) { - struct ex_phy *phy = &ex->ex_phy[i]; + for (i = 0; i < ex->num_phys; i++) { + struct ex_phy *phy = &ex->ex_phy[i]; - if (i == phy_id) - continue; - if (SAS_ADDR(phy->attached_sas_addr) == - SAS_ADDR(changed_phy->attached_sas_addr)) { - last = false; - break; - } + if (i == phy_id) + continue; + if (SAS_ADDR(phy->attached_sas_addr) == + SAS_ADDR(changed_phy->attached_sas_addr)) { + pr_debug("phy%02d part of wide port with phy%02d\n", + phy_id, i); + last = false; + break; } - res = sas_unregister(dev, phy_id, last, retry, i); - } else - res = sas_discover_new(dev, phy_id); + } + res = sas_ex_unregister(dev, phy_id, last, retry); pr_debug("ex %016llx phy%d discover returned 0x%x\n", SAS_ADDR(dev->sas_addr), phy_id, res); } +static int sas_ex_try_unregister(struct domain_device *dev, u8 *changed_phy, + int nr, bool *retry) +{ + struct expander_device *ex = &dev->ex_dev; + int unregistered = 0; + struct ex_phy *phy; + int i; + + for (i = 0; i < nr; i++) { + pr_debug("ex %016llx phy%d originated BROADCAST(CHANGE)\n", + SAS_ADDR(dev->sas_addr), changed_phy[i]); + + phy = &ex->ex_phy[changed_phy[i]]; + + if (SAS_ADDR(phy->attached_sas_addr) == 0) + continue; + + sas_ex_unregister_device(dev, changed_phy[i], retry); + changed_phy[i] = 0xff; + unregistered++; + } + return unregistered; +} + +static void sas_ex_register(struct domain_device *dev, u8 *changed_phy, + int nr) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *phy; + int res = 0; + int i; + + for (i = 0; i < nr; i++) { + if (changed_phy[i] == 0xff) + continue; + + phy = &ex->ex_phy[changed_phy[i]]; + + res = sas_discover_new(dev, changed_phy[i]); + + pr_debug("ex %016llx phy%d register returned 0x%x\n", + SAS_ADDR(dev->sas_addr), changed_phy[i], res); + } +} + /** * sas_ex_revalidate_domain - revalidate the domain * @port_dev: port domain device. @@ -2236,21 +2259,49 @@ void sas_ex_revalidate_domain(struct domain_device *port_dev, bool *retry) { int res; struct domain_device *dev = NULL; + u8 changed_phy[MAX_EXPANDER_PHYS]; + struct expander_device *ex; + int unregistered = 0; + int phy_id; + int nr = 0; + int i = 0; res = sas_find_bcast_dev(port_dev, &dev); - if (res == 0 && dev) { - struct expander_device *ex = &dev->ex_dev; - int i = 0, phy_id; - - do { - phy_id = -1; - res = sas_find_bcast_phy(dev, &phy_id, i, true); - if (phy_id == -1) - break; - sas_rediscover(dev, phy_id, retry); - i = phy_id + 1; - } while (i < ex->num_phys); + if (res || !dev) + return; + + memset(changed_phy, 0xff, MAX_EXPANDER_PHYS); + ex = &dev->ex_dev; + + do { + phy_id = -1; + res = sas_find_bcast_phy(dev, &phy_id, i, true); + if (phy_id == -1) + break; + changed_phy[nr++] = phy_id; + i = phy_id + 1; + } while (i < dev->ex_dev.num_phys); + + if (nr == 0) + return; + + unregistered = sas_ex_try_unregister(dev, changed_phy, nr, retry); + + if (unregistered > 0) { + struct ex_phy *phy; + + for (i = 0; i < nr; i++) { + if (changed_phy[i] == 0xff) + continue; + phy = &ex->ex_phy[changed_phy[i]]; + phy->phy_change_count = -1; + } + ex->ex_change_count = -1; + *retry = true; + return; } + + sas_ex_register(dev, changed_phy, nr); } void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, -- Gitee From 3ae74c2ab71fed63efd29bba038627fa11215845 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Mon, 1 Dec 2025 16:27:09 +0800 Subject: [PATCH 07/10] scsi: libsas: recover attached_sas_addr and phy_change_count after update empty PHY info hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No We found that there is a chance that disk is recover when we call phy_discover() to update empty PHY info. It will cause some problem that attached_sas_addr and phy_change_count to be update at advance. Signed-off-by: Luo Jiaxing DTS:DTS2019041908574 Description: NA Team:HISI_SW Feature or Bugfix:Bugfix Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_expander.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index f08da82e699b..f1ec8d5d3573 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2149,6 +2149,8 @@ static int sas_ex_unregister(struct domain_device *dev, int phy_id, bool last, */ if (res == 0) sas_set_ex_phy(dev, phy_id, disc_resp); + memcpy(phy->attached_sas_addr, sas_addr, SAS_ADDR_SIZE); + phy->phy_change_count = -1; goto out_free_resp; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { -- Gitee From ab0534f5419061a6daa69975327ffa1125be4ff1 Mon Sep 17 00:00:00 2001 From: Luo Jiaxing Date: Mon, 1 Dec 2025 16:27:10 +0800 Subject: [PATCH 08/10] scsi: libsas: move abnormal scenario handle of flutter before linkrate change check hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No We found out that abnormal scenario handle for flutter can not cover all abnormal scenario,so we change the order to let it handle eralier. Signed-off-by: Luo Jiaxing DTS:DTS2019041908574 Description: NA Team:HISI_SW Feature or Bugfix:Bugfix Change-Id: I69ea02d622ae2873d8bace66da7ad0fee9cf90fd Signed-off-by: l00437090 Reviewed-on: http://10.90.31.173:8080/6839 Tested-by: public TuringEE Reviewed-by: chenxiang 00284940 Reviewed-by: public TuringEE Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_expander.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index f1ec8d5d3573..35b0b9e6f3f2 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2066,13 +2066,6 @@ static bool sas_process_flutter(struct domain_device *dev, struct ex_phy *phy, pr_debug("ex %016llx phy%d broadcast flutter%s\n", SAS_ADDR(dev->sas_addr), phy_id, action); - if (linkrate != phy->linkrate) { - pr_debug("ex %016llx phy%d linkrate changed from %d to %d\n", - SAS_ADDR(dev->sas_addr), phy_id, - linkrate, phy->linkrate); - return false; - } - /* the phy attached address will be updated by sas_ex_phy_discover() * and sometimes become abnormal */ @@ -2088,6 +2081,13 @@ static bool sas_process_flutter(struct domain_device *dev, struct ex_phy *phy, return false; } + if (linkrate != phy->linkrate) { + pr_debug("ex %016llx phy%d linkrate changed from %d to %d\n", + SAS_ADDR(dev->sas_addr), phy_id, + linkrate, phy->linkrate); + return false; + } + if (ata_dev) { struct ata_device *adev = sas_to_ata_dev(ata_dev); unsigned int class = ata_dev->sata_dev.class; -- Gitee From 2a4fdc3bdd82acd029baf9e8bbf2a6699a3b98b5 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 1 Dec 2025 16:27:11 +0800 Subject: [PATCH 09/10] scsi: libsas: do not add end deivces to wide port hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No In commit c49b589b3326 ("scsi: libsas: fix issue of swapping two sas disks") we described a scenario of swapping disks which will cause disk lost. But now we found that the hardware may behave much slower than we think. Even the disks of two slots are swapped, the sas address we read out may still the same. This means even with the prev fix the end device may be added to a wideport anyway. So do not allowed the phy of end device type to be added to a wideport in sas_ex_join_wide_port(). And we need to clear the sas address so that it can be discovered in next revalidation. CC: chenxiang Signed-off-by: Jason Yan DTS:DTS2019051011917 Description: NA Team: HISI_SW Feature or Bugfix: Bugfix Change-Id: I6b32412cb81f16f2574f3b3fa2c0e1373a36ac65 Signed-off-by: x00470154 Reviewed-on: http://10.90.31.173:8080/7520 Tested-by: public TuringEE Reviewed-by: chenxiang 00284940 Reviewed-by: public TuringEE Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_discover.c | 1 + drivers/scsi/libsas/sas_expander.c | 35 ++++++++++++++++++++++++------ drivers/scsi/libsas/sas_internal.h | 6 +++++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 0d524d901be3..36853522eac7 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -357,6 +357,7 @@ static void sas_destruct_ports(struct asd_sas_port *port) list_for_each_entry_safe(sas_port, p, &port->sas_port_del_list, del_list) { list_del_init(&sas_port->del_list); + dev_printk(KERN_INFO, &sas_port->dev, "port deleted\n"); sas_port_delete(sas_port); } } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 35b0b9e6f3f2..1945def5d78d 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -829,7 +829,10 @@ static struct domain_device *sas_ex_discover_end_dev( sas_port_free(phy->port); goto out_err; } - } + dev_printk(KERN_INFO, &phy->port->dev, "port alloc and added\n"); + } else + dev_printk(KERN_INFO, &phy->port->dev, "port already attached to this phy?\n"); + sas_ex_get_linkrate(parent, child, phy); sas_device_set_phy(child, phy->port); @@ -927,6 +930,7 @@ static struct domain_device *sas_ex_discover_end_dev( list_del(&child->dev_list_node); spin_unlock_irq(&parent->port->dev_list_lock); out_free: + dev_printk(KERN_INFO, &phy->port->dev, "port deleted due to failed discover\n"); sas_port_delete(phy->port); out_err: phy->port = NULL; @@ -948,7 +952,23 @@ static bool sas_ex_join_wide_port(struct domain_device *parent, int phy_id) if (!memcmp(phy->attached_sas_addr, ephy->attached_sas_addr, SAS_ADDR_SIZE) && ephy->port) { + /* + * Do not join wide port if it is an end device, + * this only happen when swapping disks. Return true + * here to exit the discover process. + */ + if (sas_phy_end_device(phy) || sas_phy_end_device(ephy)) { + memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + phy->phy_change_count = -1; + parent->ex_dev.ex_change_count = -1; + pr_debug("Try attaching ex phy%d to wide port %016llx(with phy%d), not allowed\n", + phy_id, SAS_ADDR(ephy->attached_sas_addr), i); + return true; + } + sas_port_add_ex_phy(ephy->port, phy); + pr_debug("Attaching ex phy%d to wide port %016llx(with phy%d)\n", + phy_id, SAS_ADDR(ephy->attached_sas_addr), i); return true; } } @@ -2189,9 +2209,10 @@ static void sas_ex_unregister_device(struct domain_device *dev, const int phy_id if (i == phy_id) continue; if (SAS_ADDR(phy->attached_sas_addr) == - SAS_ADDR(changed_phy->attached_sas_addr)) { - pr_debug("phy%02d part of wide port with phy%02d\n", - phy_id, i); + SAS_ADDR(changed_phy->attached_sas_addr) && + phy->port == changed_phy->port) { + pr_debug("phy%d part of wide port with phy%d, port:%llx\n", + phy_id, i, (unsigned long long)phy->port); last = false; break; } @@ -2211,10 +2232,10 @@ static int sas_ex_try_unregister(struct domain_device *dev, u8 *changed_phy, int i; for (i = 0; i < nr; i++) { - pr_debug("ex %016llx phy%d originated BROADCAST(CHANGE)\n", - SAS_ADDR(dev->sas_addr), changed_phy[i]); - phy = &ex->ex_phy[changed_phy[i]]; + pr_debug("ex %016llx phy%d:%016llx originated BROADCAST(CHANGE)\n", + SAS_ADDR(dev->sas_addr), changed_phy[i], + SAS_ADDR(phy->attached_sas_addr)); if (SAS_ADDR(phy->attached_sas_addr) == 0) continue; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 0329d7ecff0c..0c7e7d5f6ca5 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -192,4 +192,10 @@ static inline void sas_put_device(struct domain_device *dev) kref_put(&dev->kref, sas_free_device); } +static inline bool sas_phy_end_device(struct ex_phy *phy) +{ + return (phy->attached_dev_type == SAS_END_DEVICE || + phy->attached_dev_type == SAS_SATA_DEV || + phy->attached_dev_type == SAS_SATA_PENDING); +} #endif /* _SAS_INTERNAL_H_ */ -- Gitee From c6df46eea202b7c74c062a81e1c487d3b7ee7706 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Mon, 1 Dec 2025 16:27:12 +0800 Subject: [PATCH 10/10] scsi: libsas: delete sas port out of the disco mutex hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID9GQH -------------------------------- Upstream: No delete sas port out of the disco mutex Signed-off-by: Jason Yan DTS:DTS2019032908508 Description:libsas: delete sas port out of the disco mutex Team:HISI_SW Feature or Bugfix:Bugfix Change-Id: I620775c266273577ea3a7e373904c292870a0f15 Signed-off-by: c00284940 Reviewed-on: http://10.90.31.173:8080/5908 Tested-by: public TuringEE Reviewed-by: tanxiaofei 00396350 Reviewed-by: public TuringEE Signed-off-by: x00470154 Signed-off-by: Li Lingfeng --- drivers/scsi/libsas/sas_expander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 1945def5d78d..48f1ef4ca524 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -931,7 +931,7 @@ static struct domain_device *sas_ex_discover_end_dev( spin_unlock_irq(&parent->port->dev_list_lock); out_free: dev_printk(KERN_INFO, &phy->port->dev, "port deleted due to failed discover\n"); - sas_port_delete(phy->port); + list_add_tail(&phy->port->del_list, &parent->port->sas_port_del_list); out_err: phy->port = NULL; sas_put_device(child); -- Gitee