fix kernel module in scsi sr_mod at multiple drivers performence

I have a program handling multiple cdrom to burn files together running on CentOS 6.9 (kernel 2.6.32). when I decide to upgrade os platform to CentOS 7(kernel >= 3.10) recently, I have found that the performance of this program is decrease.
Eventually I found out that there is a global lock in the sr kernel module. I apply a patch to source code file and replace that module, everything is back to normal.

Origin

I have a server with multiple cd drivers connect to server via HBA card. I use scsi command to directly control them to working together (eg. burn data or read data).

1
2
3
4
5
6
……
sg_io.dxferp = buf;
sg_io.dxfer_len = sz;
sg_io.dxfer_direction = use_sg_io[dir];
ret = ioctl (fd,SG_IO,&sg_io);
……

But when I upgrade os platform from CentOS 6 to CentOS 7, I found that when send scsi command to multiple drivers, some commands will block other thread’s command. This lead to too much time consumption actually N times (N drivers) the previous time.

Analysis

I print all time duration for per scsi command. I found that if there is only one thread running, no block will occur. When there are two or more threads running and sending scsi command to cd drivers, block will occur.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# one thread
1.981000 ms
3.800000 ms
3.807000 ms
3.758000 ms
2.009000 ms
3.690000 ms
1.956000 ms
3.729000 ms
2.008000 ms
3.702000 ms
1.997000 ms
3.725000 ms
1.928000 ms
3.770000 ms
1.909000 ms
3.722000 ms
2.109000 ms

# multiple thread
3.770000 ms
1.909000 ms
3.722000 ms
2.109000 ms
3.700000 ms
2.001000 ms
3.707000 ms
1.955000 ms
3.710000 ms
[256.205000 ms]
2.035000 ms
3.711000 ms
1.957000 ms
3.690000 ms
1.984000 ms
3.671000 ms
2.029000 ms

First, I guess that it was caused by the HBA card. Maybe the thread hold HBA Card(SCSI host lock)?
But when I test with multiple drivers attached to different cards, nothing changed. I also test with multiple drivers directly connect to SATA port in motherboard, also nothing changed.
This issue is not cause by HBA card driver or SATA driver.

ioctl

Multiple threads call ioctl system call parallel, maybe new kernels use some mutexs to block command.

CDROM driver (sr or scd)

I have found these links below:

[PATCH] [SCSI] sr: Fix multi-drive performance by using per-device mutexes

Why does Linux kernel driver sr.c sr_block_ioctl do mutex_lock?

Discussion:
[PATCH] [SCSI] sr: Fix multi-drive performance by using per-device mutexes

Now, it’s clear.

The sr_mod use a global mutex to block mutiple thread commands:

1
2
3
4
5
6
7
8
9
10
11
12
13
……
struct scsi_cd *cd;
int ret = -ENXIO;
mutex_lock(&sr_mutex);
cd = scsi_cd_get(bdev->bd_disk);
if (cd) {
ret = cdrom_open(&cd->cdi, bdev, mode);
if (ret)
scsi_cd_put(cd);
}
mutex_unlock(&sr_mutex);
return ret;
……

we need to remove global lock and add new mutex for each cd struct.

1
2
3
4
5
6
7
8
9
10
11
12
13
……
struct scsi_cd *cd;
int ret = -ENXIO;
cd = scsi_cd_get(bdev->bd_disk);
if (cd) {
mutex_lock(&cd->lock);
ret = cdrom_open(&cd->cdi, bdev, mode);
mutex_unlock(&cd->lock);
if (ret)
scsi_cd_put(cd);
}
return ret;
……

kernel patch

Based on kernel v3.10.957, I modify drivers/scsi/sr.c and drivers/scsi/sr.h two source code files, patch file as below:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
diff -rupN linux-3.10/drivers/scsi/sr.c linux-3.10-new/drivers/scsi/sr.c
--- linux-3.10/drivers/scsi/sr.c 2019-07-26 01:12:20.072000000 +0800
+++ linux-3.10-new/drivers/scsi/sr.c 2019-07-26 01:19:29.269000000 +0800
@@ -76,7 +76,6 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM);
CDC_CD_R|CDC_CD_RW|CDC_DVD|CDC_DVD_R|CDC_DVD_RAM|CDC_GENERIC_PACKET| \
CDC_MRW|CDC_MRW_W|CDC_RAM)

-static DEFINE_MUTEX(sr_mutex);
static int sr_probe(struct device *);
static int sr_remove(struct device *);
static int sr_init_command(struct scsi_cmnd *SCpnt);
@@ -529,24 +528,24 @@ static int sr_block_open(struct block_de

check_disk_change(bdev);

- mutex_lock(&sr_mutex);
cd = scsi_cd_get(bdev->bd_disk);
if (cd) {
+ mutex_lock(&cd->lock);
ret = cdrom_open(&cd->cdi, bdev, mode);
+ mutex_unlock(&cd->lock);
if (ret)
scsi_cd_put(cd);
}
- mutex_unlock(&sr_mutex);
return ret;
}

static void sr_block_release(struct gendisk *disk, fmode_t mode)
{
struct scsi_cd *cd = scsi_cd(disk);
- mutex_lock(&sr_mutex);
+ mutex_lock(&cd->lock);
cdrom_release(&cd->cdi, mode);
+ mutex_unlock(&cd->lock);
scsi_cd_put(cd);
- mutex_unlock(&sr_mutex);
}

static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
@@ -559,7 +558,7 @@ static int sr_block_ioctl(struct block_d

scsi_autopm_get_device(cd->device);

- mutex_lock(&sr_mutex);
+ mutex_lock(&cd->lock);

/*
* Send SCSI addressing ioctls directly to mid level, send other
@@ -589,7 +588,7 @@ static int sr_block_ioctl(struct block_d
ret = scsi_ioctl(sdev, cmd, argp);

out:
- mutex_unlock(&sr_mutex);
+ mutex_unlock(&cd->lock);
scsi_autopm_put_device(cd->device);
return ret;
}
@@ -706,6 +705,8 @@ static int sr_probe(struct device *dev)
if (!disk)
goto fail_free;

+ mutex_init(&cd->lock);
+
spin_lock(&sr_index_lock);
minor = find_first_zero_bit(sr_index_bits, SR_DISKS);
if (minor == SR_DISKS) {
@@ -769,6 +770,7 @@ static int sr_probe(struct device *dev)

fail_put:
put_disk(disk);
+ mutex_destroy(&cd->lock);
fail_free:
kfree(cd);
fail:
@@ -1007,6 +1009,8 @@ static void sr_kref_release(struct kref

put_disk(disk);

+ mutex_destroy(&cd->lock);
+
kfree(cd);
}

diff -rupN linux-3.10/drivers/scsi/sr.h linux-3.10-new/drivers/scsi/sr.h
--- linux-3.10/drivers/scsi/sr.h 2019-07-26 01:12:20.072000000 +0800
+++ linux-3.10-new/drivers/scsi/sr.h 2019-07-26 01:19:57.454000000 +0800
@@ -19,6 +19,7 @@

#include <linux/genhd.h>
#include <linux/kref.h>
+#include <linux/mutex.h>

#define MAX_RETRIES 3
#define SR_TIMEOUT (30 * HZ)
@@ -49,6 +50,7 @@ typedef struct scsi_cd {
bool ignore_get_event:1; /* GET_EVENT is unreliable, use TUR */

struct cdrom_device_info cdi;
+ struct mutex lock;
/* We hold gendisk and scsi_device references on probe and use
* the refs on this kref to decide when to release them */
struct kref kref;

Because of only kernel module change, you can build this sr_mod module to save time.

finish

Alter apply this patch, I build entire kernel, after install new kernel, the issue is gone.

How to build kernel and apply patch to kernel in CentOS 7? look at below:

[I_need_the_Kernel_Source(https://wiki.centos.org/zh/HowTos/I_need_the_Kernel_Source)
Custom_Kernel