diff options
| author | jro1979oliver <jroliveira.oliveira301@gmail.com> | 2022-08-01 19:29:11 +0200 |
|---|---|---|
| committer | Jeferson Rodrigo de Oliveira <jroliveira.oliveira301@gmail.com> | 2022-08-01 21:44:13 +0200 |
| commit | dcf86140b94ce1c7e6e910fe30beb8a59d9635d2 (patch) | |
| tree | 3cc4584b0d9b7b49084c19b987d8a6a7a6b65d6f | |
| parent | 03fe9149f9f79d55800ca9b0e9dca549801f568e (diff) | |
moto: Revert all zram writeback related
Change-Id: Ifc34533cde7ad8f54c8374b5641118789e2208f5
| -rw-r--r-- | Documentation/ABI/testing/sysfs-block-zram | 39 | ||||
| -rw-r--r-- | Documentation/blockdev/zram.txt | 126 | ||||
| -rw-r--r-- | arch/arm64/configs/lineageos_beckham_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm64/configs/lineageos_chef_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm64/configs/lineageos_evert_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm64/configs/lineageos_lake_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm64/configs/lineageos_nash_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm64/configs/lineageos_payton_defconfig | 1 | ||||
| -rw-r--r-- | drivers/block/zram/Kconfig | 5 | ||||
| -rw-r--r-- | drivers/block/zram/zram_drv.c | 525 | ||||
| -rw-r--r-- | drivers/block/zram/zram_drv.h | 18 |
11 files changed, 174 insertions, 545 deletions
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 14b2bf2e5105..c1513c756af1 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -98,42 +98,3 @@ Description: The backing_dev file is read-write and set up backing device for zram to write incompressible pages. For using, user should enable CONFIG_ZRAM_WRITEBACK. - -What: /sys/block/zram<id>/idle -Date: November 2018 -Contact: Minchan Kim <minchan@kernel.org> -Description: - idle file is write-only and mark zram slot as idle. - If system has mounted debugfs, user can see which slots - are idle via /sys/kernel/debug/zram/zram<id>/block_state - -What: /sys/block/zram<id>/writeback -Date: November 2018 -Contact: Minchan Kim <minchan@kernel.org> -Description: - The writeback file is write-only and trigger idle and/or - huge page writeback to backing device. - -What: /sys/block/zram<id>/bd_stat -Date: November 2018 -Contact: Minchan Kim <minchan@kernel.org> -Description: - The bd_stat file is read-only and represents backing device's - statistics (bd_count, bd_reads, bd_writes) in a format - similar to block layer statistics file format. - -What: /sys/block/zram<id>/writeback_limit_enable -Date: November 2018 -Contact: Minchan Kim <minchan@kernel.org> -Description: - The writeback_limit_enable file is read-write and specifies - eanbe of writeback_limit feature. "1" means eable the feature. - No limit "0" is the initial state. - -What: /sys/block/zram<id>/writeback_limit -Date: November 2018 -Contact: Minchan Kim <minchan@kernel.org> -Description: - The writeback_limit file is read-write and specifies the maximum - amount of writeback ZRAM can do. The limit could be changed - in run time. diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 6e5c2bb222c3..875b2b56b87f 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -156,23 +156,19 @@ Per-device statistics are exported as various nodes under /sys/block/zram<id>/ A brief description of exported device attributes. For more details please read Documentation/ABI/testing/sysfs-block-zram. -Name access description ----- ------ ----------- -disksize RW show and set the device's disk size -initstate RO shows the initialization state of the device -reset WO trigger device reset -mem_used_max WO reset the `mem_used_max' counter (see later) -mem_limit WO specifies the maximum amount of memory ZRAM can use - to store the compressed data -writeback_limit WO specifies the maximum amount of write IO zram can - write out to backing device as 4KB unit -writeback_limit_enable RW show and set writeback_limit feature -max_comp_streams RW the number of possible concurrent compress operations -comp_algorithm RW show and change the compression algorithm -compact WO trigger memory compaction -debug_stat RO this file is used for zram debugging purposes -backing_dev RW set up backend storage for zram to write out -idle WO mark allocated slot as idle +Name access description +---- ------ ----------- +disksize RW show and set the device's disk size +initstate RO shows the initialization state of the device +reset WO trigger device reset +mem_used_max WO reset the `mem_used_max' counter (see later) +mem_limit WO specifies the maximum amount of memory ZRAM can use + to store the compressed data +max_comp_streams RW the number of possible concurrent compress operations +comp_algorithm RW show and change the compression algorithm +compact WO trigger memory compaction +debug_stat RO this file is used for zram debugging purposes +backing_dev RW set up backend storage for zram to write out User space is advised to use the following files to read the device statistics. @@ -224,17 +220,6 @@ line of text and contains the following stats separated by whitespace: pages_compacted the number of pages freed during compaction huge_pages the number of incompressible pages -File /sys/block/zram<id>/bd_stat - -The stat file represents device's backing device statistics. It consists of -a single line of text and contains the following stats separated by whitespace: - bd_count size of data written in backing device. - Unit: 4K bytes - bd_reads the number of reads from backing device - Unit: 4K bytes - bd_writes the number of writes to backing device - Unit: 4K bytes - 9) Deactivate: swapoff /dev/zram0 umount /dev/zram1 @@ -252,79 +237,11 @@ a single line of text and contains the following stats separated by whitespace: = writeback -With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page +With incompressible pages, there is no memory saving with zram. +Instead, with CONFIG_ZRAM_WRITEBACK, zram can write incompressible page to backing storage rather than keeping it in memory. -To use the feature, admin should set up backing device via - - "echo /dev/sda5 > /sys/block/zramX/backing_dev" - -before disksize setting. It supports only partition at this moment. -If admin want to use incompressible page writeback, they could do via - - "echo huge > /sys/block/zramX/write" - -To use idle page writeback, first, user need to declare zram pages -as idle. - - "echo all > /sys/block/zramX/idle" - -From now on, any pages on zram are idle pages. The idle mark -will be removed until someone request access of the block. -IOW, unless there is access request, those pages are still idle pages. - -Admin can request writeback of those idle pages at right timing via - - "echo idle > /sys/block/zramX/writeback" - -With the command, zram writeback idle pages from memory to the storage. - -If there are lots of write IO with flash device, potentially, it has -flash wearout problem so that admin needs to design write limitation -to guarantee storage health for entire product life. - -To overcome the concern, zram supports "writeback_limit" feature. -The "writeback_limit_enable"'s default value is 0 so that it doesn't limit -any writeback. IOW, if admin want to apply writeback budget, he should -enable writeback_limit_enable via - - $ echo 1 > /sys/block/zramX/writeback_limit_enable - -Once writeback_limit_enable is set, zram doesn't allow any writeback -until admin set the budget via /sys/block/zramX/writeback_limit. - -(If admin doesn't enable writeback_limit_enable, writeback_limit's value -assigned via /sys/block/zramX/writeback_limit is meaninless.) - -If admin want to limit writeback as per-day 400M, he could do it -like below. - - $ MB_SHIFT=20 - $ 4K_SHIFT=12 - $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ - /sys/block/zram0/writeback_limit. - $ echo 1 > /sys/block/zram0/writeback_limit_enable - -If admin want to allow further write again once the bugdet is exausted, -he could do it like below - - $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ - /sys/block/zram0/writeback_limit - -If admin want to see remaining writeback budget since he set, - - $ cat /sys/block/zramX/writeback_limit - -If admin want to disable writeback limit, he could do - - $ echo 0 > /sys/block/zramX/writeback_limit_enable - -The writeback_limit count will reset whenever you reset zram(e.g., -system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of -writeback happened until you reset the zram to allocate extra writeback -budget in next setting is user's job. - -If admin want to measure writeback count in a certain period, he could -know it via /sys/block/zram0/bd_stat's 3rd column. +User should set up backing device via /sys/block/zramX/backing_dev +before disksize setting. = memory tracking @@ -334,17 +251,16 @@ pages of the process with*pagemap. If you enable the feature, you could see block state via /sys/kernel/debug/zram/zram0/block_state". The output is as follows, - 300 75.033841 .wh. - 301 63.806904 s... - 302 63.806919 ..hi + 300 75.033841 .wh + 301 63.806904 s.. + 302 63.806919 ..h First column is zram's block index. Second column is access time since the system was booted Third column is state of the block. (s: same page w: written page to backing store -h: huge page -i: idle page) +h: huge page) First line of above example says 300th block is accessed at 75.033841sec and the block's state is huge so it is written back to the backing diff --git a/arch/arm64/configs/lineageos_beckham_defconfig b/arch/arm64/configs/lineageos_beckham_defconfig index 618ea10406c1..725d3c2f445c 100644 --- a/arch/arm64/configs/lineageos_beckham_defconfig +++ b/arch/arm64/configs/lineageos_beckham_defconfig @@ -272,7 +272,6 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_PNP_DEBUG_MESSAGES is not set CONFIG_ZRAM=y -CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/lineageos_chef_defconfig b/arch/arm64/configs/lineageos_chef_defconfig index 93d80ffeedf2..b359221afca0 100644 --- a/arch/arm64/configs/lineageos_chef_defconfig +++ b/arch/arm64/configs/lineageos_chef_defconfig @@ -275,7 +275,6 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_PNP_DEBUG_MESSAGES is not set CONFIG_ZRAM=y -CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/lineageos_evert_defconfig b/arch/arm64/configs/lineageos_evert_defconfig index e8c3fd16057c..457c9e676162 100644 --- a/arch/arm64/configs/lineageos_evert_defconfig +++ b/arch/arm64/configs/lineageos_evert_defconfig @@ -271,7 +271,6 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_PNP_DEBUG_MESSAGES is not set CONFIG_ZRAM=y -CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/lineageos_lake_defconfig b/arch/arm64/configs/lineageos_lake_defconfig index 131bf11dcc9d..dd3bde4d1ab3 100644 --- a/arch/arm64/configs/lineageos_lake_defconfig +++ b/arch/arm64/configs/lineageos_lake_defconfig @@ -270,7 +270,6 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_PNP_DEBUG_MESSAGES is not set CONFIG_ZRAM=y -CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/lineageos_nash_defconfig b/arch/arm64/configs/lineageos_nash_defconfig index 0557096cde0f..d5aa76522c69 100644 --- a/arch/arm64/configs/lineageos_nash_defconfig +++ b/arch/arm64/configs/lineageos_nash_defconfig @@ -266,7 +266,6 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_PNP_DEBUG_MESSAGES is not set CONFIG_ZRAM=y -CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm64/configs/lineageos_payton_defconfig b/arch/arm64/configs/lineageos_payton_defconfig index ea3725534860..70f79801c34a 100644 --- a/arch/arm64/configs/lineageos_payton_defconfig +++ b/arch/arm64/configs/lineageos_payton_defconfig @@ -270,7 +270,6 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_PNP_DEBUG_MESSAGES is not set CONFIG_ZRAM=y -CONFIG_ZRAM_WRITEBACK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=16 CONFIG_BLK_DEV_RAM=y diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index e8f2fa2993ab..cb53957d58f9 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -15,7 +15,7 @@ config ZRAM See Documentation/blockdev/zram.txt for more information. config ZRAM_WRITEBACK - bool "Write back incompressible or idle page to backing device" + bool "Write back incompressible page to backing device" depends on ZRAM default n help @@ -24,9 +24,6 @@ config ZRAM_WRITEBACK For this feature, admin should set up backing device via /sys/block/zramX/backing_dev. - With /sys/block/zramX/{idle,writeback}, application could ask - idle page's writeback to the backing device to save in memory. - See Documentation/blockdev/zram.txt for more information. config ZRAM_MEMORY_TRACKING diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2e373ac715c3..d0d04c025692 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -51,22 +51,15 @@ static unsigned int num_devices = 1; static size_t huge_class_size; static void zram_free_page(struct zram *zram, size_t index); -static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio); - -static int zram_slot_trylock(struct zram *zram, u32 index) -{ - return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); -} static void zram_slot_lock(struct zram *zram, u32 index) { - bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); + bit_spin_lock(ZRAM_LOCK, &zram->table[index].value); } static void zram_slot_unlock(struct zram *zram, u32 index) { - bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); + bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value); } static inline bool init_done(struct zram *zram) @@ -74,6 +67,13 @@ static inline bool init_done(struct zram *zram) return zram->disksize; } +static inline bool zram_allocated(struct zram *zram, u32 index) +{ + + return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) || + zram->table[index].handle; +} + static inline struct zram *dev_to_zram(struct device *dev) { return (struct zram *)dev_to_disk(dev)->private_data; @@ -93,19 +93,19 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) static bool zram_test_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - return zram->table[index].flags & BIT(flag); + return zram->table[index].value & BIT(flag); } static void zram_set_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - zram->table[index].flags |= BIT(flag); + zram->table[index].value |= BIT(flag); } static void zram_clear_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - zram->table[index].flags &= ~BIT(flag); + zram->table[index].value &= ~BIT(flag); } static inline void zram_set_element(struct zram *zram, u32 index, @@ -121,22 +121,15 @@ static unsigned long zram_get_element(struct zram *zram, u32 index) static size_t zram_get_obj_size(struct zram *zram, u32 index) { - return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); + return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); } static void zram_set_obj_size(struct zram *zram, u32 index, size_t size) { - unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; + unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; - zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; -} - -static inline bool zram_allocated(struct zram *zram, u32 index) -{ - return zram_get_obj_size(zram, index) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_WB); + zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; } #if PAGE_SIZE != 4096 @@ -291,115 +284,17 @@ static ssize_t mem_used_max_store(struct device *dev, return len; } -static ssize_t idle_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct zram *zram = dev_to_zram(dev); - unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - int index; - - if (!sysfs_streq(buf, "all")) - return -EINVAL; - - down_read(&zram->init_lock); - if (!init_done(zram)) { - up_read(&zram->init_lock); - return -EINVAL; - } - - for (index = 0; index < nr_pages; index++) { - /* - * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. - * See the comment in writeback_store. - */ - zram_slot_lock(zram, index); - if (zram_allocated(zram, index) && - !zram_test_flag(zram, index, ZRAM_UNDER_WB)) - zram_set_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); - } - - up_read(&zram->init_lock); - - return len; -} - #ifdef CONFIG_ZRAM_WRITEBACK -static ssize_t writeback_limit_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct zram *zram = dev_to_zram(dev); - u64 val; - ssize_t ret = -EINVAL; - - if (kstrtoull(buf, 10, &val)) - return ret; - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - zram->wb_limit_enable = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - ret = len; - - return ret; -} - -static ssize_t writeback_limit_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - bool val; - struct zram *zram = dev_to_zram(dev); - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - val = zram->wb_limit_enable; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%d\n", val); -} - -static ssize_t writeback_limit_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct zram *zram = dev_to_zram(dev); - u64 val; - ssize_t ret = -EINVAL; - - if (kstrtoull(buf, 10, &val)) - return ret; - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - zram->bd_wb_limit = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - ret = len; - - return ret; -} - -static ssize_t writeback_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) +static bool zram_wb_enabled(struct zram *zram) { - u64 val; - struct zram *zram = dev_to_zram(dev); - - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); - val = zram->bd_wb_limit; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return zram->backing_dev; } static void reset_bdev(struct zram *zram) { struct block_device *bdev; - if (!zram->backing_dev) + if (!zram_wb_enabled(zram)) return; bdev = zram->bdev; @@ -425,7 +320,7 @@ static ssize_t backing_dev_show(struct device *dev, ssize_t ret; down_read(&zram->init_lock); - if (!zram->backing_dev) { + if (!zram_wb_enabled(zram)) { memcpy(buf, "none\n", 5); up_read(&zram->init_lock); return 5; @@ -495,10 +390,8 @@ static ssize_t backing_dev_store(struct device *dev, bdev = bdgrab(I_BDEV(inode)); err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); - if (err < 0) { - bdev = NULL; + if (err < 0) goto out; - } nr_pages = i_size_read(inode) >> PAGE_SHIFT; bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); @@ -521,6 +414,7 @@ static ssize_t backing_dev_store(struct device *dev, goto out; reset_bdev(zram); + spin_lock_init(&zram->bitmap_lock); zram->old_block_size = old_block_size; zram->bdev = bdev; @@ -550,29 +444,32 @@ out: return err; } -static unsigned long alloc_block_bdev(struct zram *zram) +static unsigned long get_entry_bdev(struct zram *zram) { - unsigned long blk_idx = 1; -retry: + unsigned long entry; + + spin_lock(&zram->bitmap_lock); /* skip 0 bit to confuse zram.handle = 0 */ - blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); - if (blk_idx == zram->nr_pages) + entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); + if (entry == zram->nr_pages) { + spin_unlock(&zram->bitmap_lock); return 0; + } - if (test_and_set_bit(blk_idx, zram->bitmap)) - goto retry; + set_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); - atomic64_inc(&zram->stats.bd_count); - return blk_idx; + return entry; } -static void free_block_bdev(struct zram *zram, unsigned long blk_idx) +static void put_entry_bdev(struct zram *zram, unsigned long entry) { int was_set; - was_set = test_and_clear_bit(blk_idx, zram->bitmap); + spin_lock(&zram->bitmap_lock); + was_set = test_and_clear_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); WARN_ON_ONCE(!was_set); - atomic64_dec(&zram->stats.bd_count); } static void zram_page_end_io(struct bio *bio) @@ -614,163 +511,6 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, return 1; } -#define HUGE_WRITEBACK 1 -#define IDLE_WRITEBACK 2 - -static ssize_t writeback_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct zram *zram = dev_to_zram(dev); - unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - unsigned long index; - struct bio bio; - struct page *page; - ssize_t ret; - int mode; - unsigned long blk_idx = 0; - - if (sysfs_streq(buf, "idle")) - mode = IDLE_WRITEBACK; - else if (sysfs_streq(buf, "huge")) - mode = HUGE_WRITEBACK; - else - return -EINVAL; - - down_read(&zram->init_lock); - if (!init_done(zram)) { - ret = -EINVAL; - goto release_init_lock; - } - - if (!zram->backing_dev) { - ret = -ENODEV; - goto release_init_lock; - } - - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto release_init_lock; - } - - for (index = 0; index < nr_pages; index++) { - struct bio_vec bvec; - - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; - - spin_lock(&zram->wb_limit_lock); - if (zram->wb_limit_enable && !zram->bd_wb_limit) { - spin_unlock(&zram->wb_limit_lock); - ret = -EIO; - break; - } - spin_unlock(&zram->wb_limit_lock); - - if (!blk_idx) { - blk_idx = alloc_block_bdev(zram); - if (!blk_idx) { - ret = -ENOSPC; - break; - } - } - - zram_slot_lock(zram, index); - if (!zram_allocated(zram, index)) - goto next; - - if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_UNDER_WB)) - goto next; - - if (mode == IDLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_IDLE)) - goto next; - if (mode == HUGE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_HUGE)) - goto next; - /* - * Clearing ZRAM_UNDER_WB is duty of caller. - * IOW, zram_free_page never clear it. - */ - zram_set_flag(zram, index, ZRAM_UNDER_WB); - /* Need for hugepage writeback racing */ - zram_set_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); - if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { - zram_slot_lock(zram, index); - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_clear_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); - continue; - } - - bio_init(&bio); - - bio.bi_max_vecs = 1; - bio.bi_io_vec = &bvec; - bio.bi_bdev = zram->bdev; - - bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); - bio_add_page(&bio, bvec.bv_page, bvec.bv_len, - bvec.bv_offset); - /* - * XXX: A single page IO would be inefficient for write - * but it would be not bad as starter. - */ - ret = submit_bio_wait(REQ_WRITE|REQ_SYNC, &bio); - if (ret) { - zram_slot_lock(zram, index); - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_clear_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); - continue; - } - - atomic64_inc(&zram->stats.bd_writes); - /* - * We released zram_slot_lock so need to check if the slot was - * changed. If there is freeing for the slot, we can catch it - * easily by zram_allocated. - * A subtle case is the slot is freed/reallocated/marked as - * ZRAM_IDLE again. To close the race, idle_store doesn't - * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. - * Thus, we could close the race by checking ZRAM_IDLE bit. - */ - zram_slot_lock(zram, index); - if (!zram_allocated(zram, index) || - !zram_test_flag(zram, index, ZRAM_IDLE)) { - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_clear_flag(zram, index, ZRAM_IDLE); - goto next; - } - - zram_free_page(zram, index); - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_set_flag(zram, index, ZRAM_WB); - zram_set_element(zram, index, blk_idx); - blk_idx = 0; - atomic64_inc(&zram->stats.pages_stored); - spin_lock(&zram->wb_limit_lock); - if (zram->wb_limit_enable && zram->bd_wb_limit > 0) - zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); - spin_unlock(&zram->wb_limit_lock); -next: - zram_slot_unlock(zram, index); - } - - if (blk_idx) - free_block_bdev(zram, blk_idx); - ret = len; - __free_page(page); -release_init_lock: - up_read(&zram->init_lock); - - return ret; -} - struct zram_work { struct work_struct work; struct zram *zram; @@ -823,21 +563,79 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *parent, bool sync) { - atomic64_inc(&zram->stats.bd_reads); if (sync) return read_from_bdev_sync(zram, bvec, entry, parent); else return read_from_bdev_async(zram, bvec, entry, parent); } + +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) +{ + struct bio *bio; + unsigned long entry; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + + entry = get_entry_bdev(zram); + if (!entry) { + bio_put(bio); + return -ENOSPC; + } + + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); + bio->bi_bdev = zram->bdev; + if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, + bvec->bv_offset)) { + bio_put(bio); + put_entry_bdev(zram, entry); + return -EIO; + } + + if (!parent) { + bio->bi_rw = REQ_WRITE | REQ_SYNC; + bio->bi_end_io = zram_page_end_io; + } else { + bio->bi_rw = parent->bi_rw; + bio_chain(bio, parent); + } + + submit_bio(WRITE, bio); + *pentry = entry; + + return 0; +} + +static void zram_wb_clear(struct zram *zram, u32 index) +{ + unsigned long entry; + + zram_clear_flag(zram, index, ZRAM_WB); + entry = zram_get_element(zram, index); + zram_set_element(zram, index, 0); + put_entry_bdev(zram, entry); +} + #else +static bool zram_wb_enabled(struct zram *zram) { return false; } static inline void reset_bdev(struct zram *zram) {}; +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) + +{ + return -EIO; +} + static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *parent, bool sync) { return -EIO; } - -static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; +static void zram_wb_clear(struct zram *zram, u32 index) {} #endif #ifdef CONFIG_ZRAM_MEMORY_TRACKING @@ -856,10 +654,14 @@ static void zram_debugfs_destroy(void) static void zram_accessed(struct zram *zram, u32 index) { - zram_clear_flag(zram, index, ZRAM_IDLE); zram->table[index].ac_time = ktime_get_boottime(); } +static void zram_reset_access(struct zram *zram, u32 index) +{ + zram->table[index].ac_time.tv64 = 0; +} + static ssize_t read_block_state(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -897,13 +699,12 @@ static ssize_t read_block_state(struct file *file, char __user *buf, ts = ktime_to_timespec64(zram->table[index].ac_time); copied = snprintf(kbuf + written, count, - "%12zd %12lld.%06lu %c%c%c%c\n", + "%12zd %12lld.%06lu %c%c%c\n", index, (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC, zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', - zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', - zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); + zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.'); if (count < copied) { zram_slot_unlock(zram, index); @@ -948,10 +749,8 @@ static void zram_debugfs_unregister(struct zram *zram) #else static void zram_debugfs_create(void) {}; static void zram_debugfs_destroy(void) {}; -static void zram_accessed(struct zram *zram, u32 index) -{ - zram_clear_flag(zram, index, ZRAM_IDLE); -}; +static void zram_accessed(struct zram *zram, u32 index) {}; +static void zram_reset_access(struct zram *zram, u32 index) {}; static void zram_debugfs_register(struct zram *zram) {}; static void zram_debugfs_unregister(struct zram *zram) {}; #endif @@ -1088,26 +887,6 @@ static ssize_t mm_stat_show(struct device *dev, return ret; } -#ifdef CONFIG_ZRAM_WRITEBACK -#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) -static ssize_t bd_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - ssize_t ret; - - down_read(&zram->init_lock); - ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu\n", - FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), - FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), - FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); - up_read(&zram->init_lock); - - return ret; -} -#endif - static ssize_t debug_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1117,10 +896,9 @@ static ssize_t debug_stat_show(struct device *dev, down_read(&zram->init_lock); ret = scnprintf(buf, PAGE_SIZE, - "version: %d\n%8llu %8llu\n", + "version: %d\n%8llu\n", version, - (u64)atomic64_read(&zram->stats.writestall), - (u64)atomic64_read(&zram->stats.miss_free)); + (u64)atomic64_read(&zram->stats.writestall)); up_read(&zram->init_lock); return ret; @@ -1128,9 +906,6 @@ static ssize_t debug_stat_show(struct device *dev, static DEVICE_ATTR_RO(io_stat); static DEVICE_ATTR_RO(mm_stat); -#ifdef CONFIG_ZRAM_WRITEBACK -static DEVICE_ATTR_RO(bd_stat); -#endif static DEVICE_ATTR_RO(debug_stat); static void zram_meta_free(struct zram *zram, u64 disksize) @@ -1175,21 +950,17 @@ static void zram_free_page(struct zram *zram, size_t index) { unsigned long handle; -#ifdef CONFIG_ZRAM_MEMORY_TRACKING - zram->table[index].ac_time = 0; -#endif - if (zram_test_flag(zram, index, ZRAM_IDLE)) - zram_clear_flag(zram, index, ZRAM_IDLE); + zram_reset_access(zram, index); if (zram_test_flag(zram, index, ZRAM_HUGE)) { zram_clear_flag(zram, index, ZRAM_HUGE); atomic64_dec(&zram->stats.huge_pages); } - if (zram_test_flag(zram, index, ZRAM_WB)) { - zram_clear_flag(zram, index, ZRAM_WB); - free_block_bdev(zram, zram_get_element(zram, index)); - goto out; + if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { + zram_wb_clear(zram, index); + atomic64_dec(&zram->stats.pages_stored); + return; } /* @@ -1198,8 +969,10 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(zram, index, ZRAM_SAME)) { zram_clear_flag(zram, index, ZRAM_SAME); + zram_set_element(zram, index, 0); atomic64_dec(&zram->stats.same_pages); - goto out; + atomic64_dec(&zram->stats.pages_stored); + return; } handle = zram_get_handle(zram, index); @@ -1210,12 +983,10 @@ static void zram_free_page(struct zram *zram, size_t index) atomic64_sub(zram_get_obj_size(zram, index), &zram->stats.compr_data_size); -out: atomic64_dec(&zram->stats.pages_stored); + zram_set_handle(zram, index, 0); zram_set_obj_size(zram, index, 0); - WARN_ON_ONCE(zram->table[index].flags & - ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); } static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, @@ -1226,20 +997,24 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, unsigned int size; void *src, *dst; - zram_slot_lock(zram, index); - if (zram_test_flag(zram, index, ZRAM_WB)) { - struct bio_vec bvec; + if (zram_wb_enabled(zram)) { + zram_slot_lock(zram, index); + if (zram_test_flag(zram, index, ZRAM_WB)) { + struct bio_vec bvec; - zram_slot_unlock(zram, index); + zram_slot_unlock(zram, index); - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; - return read_from_bdev(zram, &bvec, - zram_get_element(zram, index), - bio, partial_io); + bvec.bv_page = page; + bvec.bv_len = PAGE_SIZE; + bvec.bv_offset = 0; + return read_from_bdev(zram, &bvec, + zram_get_element(zram, index), + bio, partial_io); + } + zram_slot_unlock(zram, index); } + zram_slot_lock(zram, index); handle = zram_get_handle(zram, index); if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { unsigned long value; @@ -1324,6 +1099,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, struct page *page = bvec->bv_page; unsigned long element = 0; enum zram_pageflags flags = 0; + bool allow_wb = true; mem = kmap_atomic(page); if (page_same_filled(mem, &element)) { @@ -1348,8 +1124,20 @@ compress_again: return ret; } - if (comp_len >= huge_class_size) + if (unlikely(comp_len >= huge_class_size)) { comp_len = PAGE_SIZE; + if (zram_wb_enabled(zram) && allow_wb) { + zcomp_stream_put(zram->comp); + ret = write_to_bdev(zram, bvec, index, bio, &element); + if (!ret) { + flags = ZRAM_WB; + ret = 1; + goto out; + } + allow_wb = false; + goto compress_again; + } + } /* * handle allocation has 2 paths: @@ -1617,14 +1405,10 @@ static void zram_slot_free_notify(struct block_device *bdev, zram = bdev->bd_disk->private_data; - atomic64_inc(&zram->stats.notify_free); - if (!zram_slot_trylock(zram, index)) { - atomic64_inc(&zram->stats.miss_free); - return; - } - + zram_slot_lock(zram, index); zram_free_page(zram, index); zram_slot_unlock(zram, index); + atomic64_inc(&zram->stats.notify_free); } static int zram_rw_page(struct block_device *bdev, sector_t sector, @@ -1827,14 +1611,10 @@ static DEVICE_ATTR_RO(initstate); static DEVICE_ATTR_WO(reset); static DEVICE_ATTR_WO(mem_limit); static DEVICE_ATTR_WO(mem_used_max); -static DEVICE_ATTR_WO(idle); static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); #ifdef CONFIG_ZRAM_WRITEBACK static DEVICE_ATTR_RW(backing_dev); -static DEVICE_ATTR_WO(writeback); -static DEVICE_ATTR_RW(writeback_limit); -static DEVICE_ATTR_RW(writeback_limit_enable); #endif static struct attribute *zram_disk_attrs[] = { @@ -1844,20 +1624,13 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_compact.attr, &dev_attr_mem_limit.attr, &dev_attr_mem_used_max.attr, - &dev_attr_idle.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, #ifdef CONFIG_ZRAM_WRITEBACK &dev_attr_backing_dev.attr, - &dev_attr_writeback.attr, - &dev_attr_writeback_limit.attr, - &dev_attr_writeback_limit_enable.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, -#ifdef CONFIG_ZRAM_WRITEBACK - &dev_attr_bd_stat.attr, -#endif &dev_attr_debug_stat.attr, NULL, }; @@ -1891,9 +1664,7 @@ static int zram_add(void) device_id = ret; init_rwsem(&zram->init_lock); -#ifdef CONFIG_ZRAM_WRITEBACK - spin_lock_init(&zram->wb_limit_lock); -#endif + queue = blk_alloc_queue(GFP_KERNEL); if (!queue) { pr_err("Error allocating disk queue for device %d\n", diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 1cb3b9a82012..72c8584b6dff 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -30,7 +30,7 @@ /* - * The lower ZRAM_FLAG_SHIFT bits of table.flags is for + * The lower ZRAM_FLAG_SHIFT bits of table.value is for * object size (excluding header), the higher bits is for * zram_pageflags. * @@ -41,15 +41,13 @@ */ #define ZRAM_FLAG_SHIFT 24 -/* Flags for zram pages (table[page_no].flags) */ +/* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { /* zram slot is locked */ ZRAM_LOCK = ZRAM_FLAG_SHIFT, ZRAM_SAME, /* Page consists the same element */ ZRAM_WB, /* page is stored on backing_device */ - ZRAM_UNDER_WB, /* page is under writeback */ ZRAM_HUGE, /* Incompressible page */ - ZRAM_IDLE, /* not accessed page since last idle marking */ __NR_ZRAM_PAGEFLAGS, }; @@ -62,7 +60,7 @@ struct zram_table_entry { unsigned long handle; unsigned long element; }; - unsigned long flags; + unsigned long value; #ifdef CONFIG_ZRAM_MEMORY_TRACKING ktime_t ac_time; #endif @@ -81,12 +79,6 @@ struct zram_stats { atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ - atomic64_t miss_free; /* no. of missed free */ -#ifdef CONFIG_ZRAM_WRITEBACK - atomic64_t bd_count; /* no. of pages in backing device */ - atomic64_t bd_reads; /* no. of reads from backing device */ - atomic64_t bd_writes; /* no. of writes from backing device */ -#endif }; struct zram { @@ -114,13 +106,11 @@ struct zram { bool claim; /* Protected by bdev->bd_mutex */ #ifdef CONFIG_ZRAM_WRITEBACK struct file *backing_dev; - spinlock_t wb_limit_lock; - bool wb_limit_enable; - u64 bd_wb_limit; struct block_device *bdev; unsigned int old_block_size; unsigned long *bitmap; unsigned long nr_pages; + spinlock_t bitmap_lock; #endif #ifdef CONFIG_ZRAM_MEMORY_TRACKING struct dentry *debugfs_dir; |
