aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjro1979oliver <jroliveira.oliveira301@gmail.com>2022-08-01 19:29:11 +0200
committerJeferson Rodrigo de Oliveira <jroliveira.oliveira301@gmail.com>2022-08-01 21:44:13 +0200
commitdcf86140b94ce1c7e6e910fe30beb8a59d9635d2 (patch)
tree3cc4584b0d9b7b49084c19b987d8a6a7a6b65d6f
parent03fe9149f9f79d55800ca9b0e9dca549801f568e (diff)
moto: Revert all zram writeback related
Change-Id: Ifc34533cde7ad8f54c8374b5641118789e2208f5
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram39
-rw-r--r--Documentation/blockdev/zram.txt126
-rw-r--r--arch/arm64/configs/lineageos_beckham_defconfig1
-rw-r--r--arch/arm64/configs/lineageos_chef_defconfig1
-rw-r--r--arch/arm64/configs/lineageos_evert_defconfig1
-rw-r--r--arch/arm64/configs/lineageos_lake_defconfig1
-rw-r--r--arch/arm64/configs/lineageos_nash_defconfig1
-rw-r--r--arch/arm64/configs/lineageos_payton_defconfig1
-rw-r--r--drivers/block/zram/Kconfig5
-rw-r--r--drivers/block/zram/zram_drv.c525
-rw-r--r--drivers/block/zram/zram_drv.h18
11 files changed, 174 insertions, 545 deletions
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 14b2bf2e5105..c1513c756af1 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -98,42 +98,3 @@ Description:
The backing_dev file is read-write and set up backing
device for zram to write incompressible pages.
For using, user should enable CONFIG_ZRAM_WRITEBACK.
-
-What: /sys/block/zram<id>/idle
-Date: November 2018
-Contact: Minchan Kim <minchan@kernel.org>
-Description:
- idle file is write-only and mark zram slot as idle.
- If system has mounted debugfs, user can see which slots
- are idle via /sys/kernel/debug/zram/zram<id>/block_state
-
-What: /sys/block/zram<id>/writeback
-Date: November 2018
-Contact: Minchan Kim <minchan@kernel.org>
-Description:
- The writeback file is write-only and trigger idle and/or
- huge page writeback to backing device.
-
-What: /sys/block/zram<id>/bd_stat
-Date: November 2018
-Contact: Minchan Kim <minchan@kernel.org>
-Description:
- The bd_stat file is read-only and represents backing device's
- statistics (bd_count, bd_reads, bd_writes) in a format
- similar to block layer statistics file format.
-
-What: /sys/block/zram<id>/writeback_limit_enable
-Date: November 2018
-Contact: Minchan Kim <minchan@kernel.org>
-Description:
- The writeback_limit_enable file is read-write and specifies
- eanbe of writeback_limit feature. "1" means eable the feature.
- No limit "0" is the initial state.
-
-What: /sys/block/zram<id>/writeback_limit
-Date: November 2018
-Contact: Minchan Kim <minchan@kernel.org>
-Description:
- The writeback_limit file is read-write and specifies the maximum
- amount of writeback ZRAM can do. The limit could be changed
- in run time.
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 6e5c2bb222c3..875b2b56b87f 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -156,23 +156,19 @@ Per-device statistics are exported as various nodes under /sys/block/zram<id>/
A brief description of exported device attributes. For more details please
read Documentation/ABI/testing/sysfs-block-zram.
-Name access description
----- ------ -----------
-disksize RW show and set the device's disk size
-initstate RO shows the initialization state of the device
-reset WO trigger device reset
-mem_used_max WO reset the `mem_used_max' counter (see later)
-mem_limit WO specifies the maximum amount of memory ZRAM can use
- to store the compressed data
-writeback_limit WO specifies the maximum amount of write IO zram can
- write out to backing device as 4KB unit
-writeback_limit_enable RW show and set writeback_limit feature
-max_comp_streams RW the number of possible concurrent compress operations
-comp_algorithm RW show and change the compression algorithm
-compact WO trigger memory compaction
-debug_stat RO this file is used for zram debugging purposes
-backing_dev RW set up backend storage for zram to write out
-idle WO mark allocated slot as idle
+Name access description
+---- ------ -----------
+disksize RW show and set the device's disk size
+initstate RO shows the initialization state of the device
+reset WO trigger device reset
+mem_used_max WO reset the `mem_used_max' counter (see later)
+mem_limit WO specifies the maximum amount of memory ZRAM can use
+ to store the compressed data
+max_comp_streams RW the number of possible concurrent compress operations
+comp_algorithm RW show and change the compression algorithm
+compact WO trigger memory compaction
+debug_stat RO this file is used for zram debugging purposes
+backing_dev RW set up backend storage for zram to write out
User space is advised to use the following files to read the device statistics.
@@ -224,17 +220,6 @@ line of text and contains the following stats separated by whitespace:
pages_compacted the number of pages freed during compaction
huge_pages the number of incompressible pages
-File /sys/block/zram<id>/bd_stat
-
-The stat file represents device's backing device statistics. It consists of
-a single line of text and contains the following stats separated by whitespace:
- bd_count size of data written in backing device.
- Unit: 4K bytes
- bd_reads the number of reads from backing device
- Unit: 4K bytes
- bd_writes the number of writes to backing device
- Unit: 4K bytes
-
9) Deactivate:
swapoff /dev/zram0
umount /dev/zram1
@@ -252,79 +237,11 @@ a single line of text and contains the following stats separated by whitespace:
= writeback
-With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
+With incompressible pages, there is no memory saving with zram.
+Instead, with CONFIG_ZRAM_WRITEBACK, zram can write incompressible page
to backing storage rather than keeping it in memory.
-To use the feature, admin should set up backing device via
-
- "echo /dev/sda5 > /sys/block/zramX/backing_dev"
-
-before disksize setting. It supports only partition at this moment.
-If admin want to use incompressible page writeback, they could do via
-
- "echo huge > /sys/block/zramX/write"
-
-To use idle page writeback, first, user need to declare zram pages
-as idle.
-
- "echo all > /sys/block/zramX/idle"
-
-From now on, any pages on zram are idle pages. The idle mark
-will be removed until someone request access of the block.
-IOW, unless there is access request, those pages are still idle pages.
-
-Admin can request writeback of those idle pages at right timing via
-
- "echo idle > /sys/block/zramX/writeback"
-
-With the command, zram writeback idle pages from memory to the storage.
-
-If there are lots of write IO with flash device, potentially, it has
-flash wearout problem so that admin needs to design write limitation
-to guarantee storage health for entire product life.
-
-To overcome the concern, zram supports "writeback_limit" feature.
-The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
-any writeback. IOW, if admin want to apply writeback budget, he should
-enable writeback_limit_enable via
-
- $ echo 1 > /sys/block/zramX/writeback_limit_enable
-
-Once writeback_limit_enable is set, zram doesn't allow any writeback
-until admin set the budget via /sys/block/zramX/writeback_limit.
-
-(If admin doesn't enable writeback_limit_enable, writeback_limit's value
-assigned via /sys/block/zramX/writeback_limit is meaninless.)
-
-If admin want to limit writeback as per-day 400M, he could do it
-like below.
-
- $ MB_SHIFT=20
- $ 4K_SHIFT=12
- $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
- /sys/block/zram0/writeback_limit.
- $ echo 1 > /sys/block/zram0/writeback_limit_enable
-
-If admin want to allow further write again once the bugdet is exausted,
-he could do it like below
-
- $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
- /sys/block/zram0/writeback_limit
-
-If admin want to see remaining writeback budget since he set,
-
- $ cat /sys/block/zramX/writeback_limit
-
-If admin want to disable writeback limit, he could do
-
- $ echo 0 > /sys/block/zramX/writeback_limit_enable
-
-The writeback_limit count will reset whenever you reset zram(e.g.,
-system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
-writeback happened until you reset the zram to allocate extra writeback
-budget in next setting is user's job.
-
-If admin want to measure writeback count in a certain period, he could
-know it via /sys/block/zram0/bd_stat's 3rd column.
+User should set up backing device via /sys/block/zramX/backing_dev
+before disksize setting.
= memory tracking
@@ -334,17 +251,16 @@ pages of the process with*pagemap.
If you enable the feature, you could see block state via
/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
- 300 75.033841 .wh.
- 301 63.806904 s...
- 302 63.806919 ..hi
+ 300 75.033841 .wh
+ 301 63.806904 s..
+ 302 63.806919 ..h
First column is zram's block index.
Second column is access time since the system was booted
Third column is state of the block.
(s: same page
w: written page to backing store
-h: huge page
-i: idle page)
+h: huge page)
First line of above example says 300th block is accessed at 75.033841sec
and the block's state is huge so it is written back to the backing
diff --git a/arch/arm64/configs/lineageos_beckham_defconfig b/arch/arm64/configs/lineageos_beckham_defconfig
index 618ea10406c1..725d3c2f445c 100644
--- a/arch/arm64/configs/lineageos_beckham_defconfig
+++ b/arch/arm64/configs/lineageos_beckham_defconfig
@@ -272,7 +272,6 @@ CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
CONFIG_ZRAM=y
-CONFIG_ZRAM_WRITEBACK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm64/configs/lineageos_chef_defconfig b/arch/arm64/configs/lineageos_chef_defconfig
index 93d80ffeedf2..b359221afca0 100644
--- a/arch/arm64/configs/lineageos_chef_defconfig
+++ b/arch/arm64/configs/lineageos_chef_defconfig
@@ -275,7 +275,6 @@ CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
CONFIG_ZRAM=y
-CONFIG_ZRAM_WRITEBACK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm64/configs/lineageos_evert_defconfig b/arch/arm64/configs/lineageos_evert_defconfig
index e8c3fd16057c..457c9e676162 100644
--- a/arch/arm64/configs/lineageos_evert_defconfig
+++ b/arch/arm64/configs/lineageos_evert_defconfig
@@ -271,7 +271,6 @@ CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
CONFIG_ZRAM=y
-CONFIG_ZRAM_WRITEBACK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm64/configs/lineageos_lake_defconfig b/arch/arm64/configs/lineageos_lake_defconfig
index 131bf11dcc9d..dd3bde4d1ab3 100644
--- a/arch/arm64/configs/lineageos_lake_defconfig
+++ b/arch/arm64/configs/lineageos_lake_defconfig
@@ -270,7 +270,6 @@ CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
CONFIG_ZRAM=y
-CONFIG_ZRAM_WRITEBACK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm64/configs/lineageos_nash_defconfig b/arch/arm64/configs/lineageos_nash_defconfig
index 0557096cde0f..d5aa76522c69 100644
--- a/arch/arm64/configs/lineageos_nash_defconfig
+++ b/arch/arm64/configs/lineageos_nash_defconfig
@@ -266,7 +266,6 @@ CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
CONFIG_ZRAM=y
-CONFIG_ZRAM_WRITEBACK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm64/configs/lineageos_payton_defconfig b/arch/arm64/configs/lineageos_payton_defconfig
index ea3725534860..70f79801c34a 100644
--- a/arch/arm64/configs/lineageos_payton_defconfig
+++ b/arch/arm64/configs/lineageos_payton_defconfig
@@ -270,7 +270,6 @@ CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
# CONFIG_PNP_DEBUG_MESSAGES is not set
CONFIG_ZRAM=y
-CONFIG_ZRAM_WRITEBACK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
CONFIG_BLK_DEV_RAM=y
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index e8f2fa2993ab..cb53957d58f9 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -15,7 +15,7 @@ config ZRAM
See Documentation/blockdev/zram.txt for more information.
config ZRAM_WRITEBACK
- bool "Write back incompressible or idle page to backing device"
+ bool "Write back incompressible page to backing device"
depends on ZRAM
default n
help
@@ -24,9 +24,6 @@ config ZRAM_WRITEBACK
For this feature, admin should set up backing device via
/sys/block/zramX/backing_dev.
- With /sys/block/zramX/{idle,writeback}, application could ask
- idle page's writeback to the backing device to save in memory.
-
See Documentation/blockdev/zram.txt for more information.
config ZRAM_MEMORY_TRACKING
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 2e373ac715c3..d0d04c025692 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -51,22 +51,15 @@ static unsigned int num_devices = 1;
static size_t huge_class_size;
static void zram_free_page(struct zram *zram, size_t index);
-static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset, struct bio *bio);
-
-static int zram_slot_trylock(struct zram *zram, u32 index)
-{
- return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
-}
static void zram_slot_lock(struct zram *zram, u32 index)
{
- bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
+ bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
}
static void zram_slot_unlock(struct zram *zram, u32 index)
{
- bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+ bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
}
static inline bool init_done(struct zram *zram)
@@ -74,6 +67,13 @@ static inline bool init_done(struct zram *zram)
return zram->disksize;
}
+static inline bool zram_allocated(struct zram *zram, u32 index)
+{
+
+ return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
+ zram->table[index].handle;
+}
+
static inline struct zram *dev_to_zram(struct device *dev)
{
return (struct zram *)dev_to_disk(dev)->private_data;
@@ -93,19 +93,19 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
static bool zram_test_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- return zram->table[index].flags & BIT(flag);
+ return zram->table[index].value & BIT(flag);
}
static void zram_set_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- zram->table[index].flags |= BIT(flag);
+ zram->table[index].value |= BIT(flag);
}
static void zram_clear_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- zram->table[index].flags &= ~BIT(flag);
+ zram->table[index].value &= ~BIT(flag);
}
static inline void zram_set_element(struct zram *zram, u32 index,
@@ -121,22 +121,15 @@ static unsigned long zram_get_element(struct zram *zram, u32 index)
static size_t zram_get_obj_size(struct zram *zram, u32 index)
{
- return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
}
static void zram_set_obj_size(struct zram *zram,
u32 index, size_t size)
{
- unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
+ unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
- zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
-}
-
-static inline bool zram_allocated(struct zram *zram, u32 index)
-{
- return zram_get_obj_size(zram, index) ||
- zram_test_flag(zram, index, ZRAM_SAME) ||
- zram_test_flag(zram, index, ZRAM_WB);
+ zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}
#if PAGE_SIZE != 4096
@@ -291,115 +284,17 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- struct zram *zram = dev_to_zram(dev);
- unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
- int index;
-
- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
- for (index = 0; index < nr_pages; index++) {
- /*
- * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
- * See the comment in writeback_store.
- */
- zram_slot_lock(zram, index);
- if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
- zram_slot_unlock(zram, index);
- }
-
- up_read(&zram->init_lock);
-
- return len;
-}
-
#ifdef CONFIG_ZRAM_WRITEBACK
-static ssize_t writeback_limit_enable_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- struct zram *zram = dev_to_zram(dev);
- u64 val;
- ssize_t ret = -EINVAL;
-
- if (kstrtoull(buf, 10, &val))
- return ret;
-
- down_read(&zram->init_lock);
- spin_lock(&zram->wb_limit_lock);
- zram->wb_limit_enable = val;
- spin_unlock(&zram->wb_limit_lock);
- up_read(&zram->init_lock);
- ret = len;
-
- return ret;
-}
-
-static ssize_t writeback_limit_enable_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- bool val;
- struct zram *zram = dev_to_zram(dev);
-
- down_read(&zram->init_lock);
- spin_lock(&zram->wb_limit_lock);
- val = zram->wb_limit_enable;
- spin_unlock(&zram->wb_limit_lock);
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", val);
-}
-
-static ssize_t writeback_limit_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- struct zram *zram = dev_to_zram(dev);
- u64 val;
- ssize_t ret = -EINVAL;
-
- if (kstrtoull(buf, 10, &val))
- return ret;
-
- down_read(&zram->init_lock);
- spin_lock(&zram->wb_limit_lock);
- zram->bd_wb_limit = val;
- spin_unlock(&zram->wb_limit_lock);
- up_read(&zram->init_lock);
- ret = len;
-
- return ret;
-}
-
-static ssize_t writeback_limit_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static bool zram_wb_enabled(struct zram *zram)
{
- u64 val;
- struct zram *zram = dev_to_zram(dev);
-
- down_read(&zram->init_lock);
- spin_lock(&zram->wb_limit_lock);
- val = zram->bd_wb_limit;
- spin_unlock(&zram->wb_limit_lock);
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+ return zram->backing_dev;
}
static void reset_bdev(struct zram *zram)
{
struct block_device *bdev;
- if (!zram->backing_dev)
+ if (!zram_wb_enabled(zram))
return;
bdev = zram->bdev;
@@ -425,7 +320,7 @@ static ssize_t backing_dev_show(struct device *dev,
ssize_t ret;
down_read(&zram->init_lock);
- if (!zram->backing_dev) {
+ if (!zram_wb_enabled(zram)) {
memcpy(buf, "none\n", 5);
up_read(&zram->init_lock);
return 5;
@@ -495,10 +390,8 @@ static ssize_t backing_dev_store(struct device *dev,
bdev = bdgrab(I_BDEV(inode));
err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
- if (err < 0) {
- bdev = NULL;
+ if (err < 0)
goto out;
- }
nr_pages = i_size_read(inode) >> PAGE_SHIFT;
bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
@@ -521,6 +414,7 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
reset_bdev(zram);
+ spin_lock_init(&zram->bitmap_lock);
zram->old_block_size = old_block_size;
zram->bdev = bdev;
@@ -550,29 +444,32 @@ out:
return err;
}
-static unsigned long alloc_block_bdev(struct zram *zram)
+static unsigned long get_entry_bdev(struct zram *zram)
{
- unsigned long blk_idx = 1;
-retry:
+ unsigned long entry;
+
+ spin_lock(&zram->bitmap_lock);
/* skip 0 bit to confuse zram.handle = 0 */
- blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
- if (blk_idx == zram->nr_pages)
+ entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
+ if (entry == zram->nr_pages) {
+ spin_unlock(&zram->bitmap_lock);
return 0;
+ }
- if (test_and_set_bit(blk_idx, zram->bitmap))
- goto retry;
+ set_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
- atomic64_inc(&zram->stats.bd_count);
- return blk_idx;
+ return entry;
}
-static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
+static void put_entry_bdev(struct zram *zram, unsigned long entry)
{
int was_set;
- was_set = test_and_clear_bit(blk_idx, zram->bitmap);
+ spin_lock(&zram->bitmap_lock);
+ was_set = test_and_clear_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
WARN_ON_ONCE(!was_set);
- atomic64_dec(&zram->stats.bd_count);
}
static void zram_page_end_io(struct bio *bio)
@@ -614,163 +511,6 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
return 1;
}
-#define HUGE_WRITEBACK 1
-#define IDLE_WRITEBACK 2
-
-static ssize_t writeback_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- struct zram *zram = dev_to_zram(dev);
- unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
- unsigned long index;
- struct bio bio;
- struct page *page;
- ssize_t ret;
- int mode;
- unsigned long blk_idx = 0;
-
- if (sysfs_streq(buf, "idle"))
- mode = IDLE_WRITEBACK;
- else if (sysfs_streq(buf, "huge"))
- mode = HUGE_WRITEBACK;
- else
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- ret = -EINVAL;
- goto release_init_lock;
- }
-
- if (!zram->backing_dev) {
- ret = -ENODEV;
- goto release_init_lock;
- }
-
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto release_init_lock;
- }
-
- for (index = 0; index < nr_pages; index++) {
- struct bio_vec bvec;
-
- bvec.bv_page = page;
- bvec.bv_len = PAGE_SIZE;
- bvec.bv_offset = 0;
-
- spin_lock(&zram->wb_limit_lock);
- if (zram->wb_limit_enable && !zram->bd_wb_limit) {
- spin_unlock(&zram->wb_limit_lock);
- ret = -EIO;
- break;
- }
- spin_unlock(&zram->wb_limit_lock);
-
- if (!blk_idx) {
- blk_idx = alloc_block_bdev(zram);
- if (!blk_idx) {
- ret = -ENOSPC;
- break;
- }
- }
-
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index))
- goto next;
-
- if (zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME) ||
- zram_test_flag(zram, index, ZRAM_UNDER_WB))
- goto next;
-
- if (mode == IDLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
- goto next;
- if (mode == HUGE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
- goto next;
- /*
- * Clearing ZRAM_UNDER_WB is duty of caller.
- * IOW, zram_free_page never clear it.
- */
- zram_set_flag(zram, index, ZRAM_UNDER_WB);
- /* Need for hugepage writeback racing */
- zram_set_flag(zram, index, ZRAM_IDLE);
- zram_slot_unlock(zram, index);
- if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
- zram_slot_lock(zram, index);
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
- zram_clear_flag(zram, index, ZRAM_IDLE);
- zram_slot_unlock(zram, index);
- continue;
- }
-
- bio_init(&bio);
-
- bio.bi_max_vecs = 1;
- bio.bi_io_vec = &bvec;
- bio.bi_bdev = zram->bdev;
-
- bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
- bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
- bvec.bv_offset);
- /*
- * XXX: A single page IO would be inefficient for write
- * but it would be not bad as starter.
- */
- ret = submit_bio_wait(REQ_WRITE|REQ_SYNC, &bio);
- if (ret) {
- zram_slot_lock(zram, index);
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
- zram_clear_flag(zram, index, ZRAM_IDLE);
- zram_slot_unlock(zram, index);
- continue;
- }
-
- atomic64_inc(&zram->stats.bd_writes);
- /*
- * We released zram_slot_lock so need to check if the slot was
- * changed. If there is freeing for the slot, we can catch it
- * easily by zram_allocated.
- * A subtle case is the slot is freed/reallocated/marked as
- * ZRAM_IDLE again. To close the race, idle_store doesn't
- * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
- * Thus, we could close the race by checking ZRAM_IDLE bit.
- */
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index) ||
- !zram_test_flag(zram, index, ZRAM_IDLE)) {
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
- zram_clear_flag(zram, index, ZRAM_IDLE);
- goto next;
- }
-
- zram_free_page(zram, index);
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
- zram_set_flag(zram, index, ZRAM_WB);
- zram_set_element(zram, index, blk_idx);
- blk_idx = 0;
- atomic64_inc(&zram->stats.pages_stored);
- spin_lock(&zram->wb_limit_lock);
- if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
- zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
- spin_unlock(&zram->wb_limit_lock);
-next:
- zram_slot_unlock(zram, index);
- }
-
- if (blk_idx)
- free_block_bdev(zram, blk_idx);
- ret = len;
- __free_page(page);
-release_init_lock:
- up_read(&zram->init_lock);
-
- return ret;
-}
-
struct zram_work {
struct work_struct work;
struct zram *zram;
@@ -823,21 +563,79 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *parent, bool sync)
{
- atomic64_inc(&zram->stats.bd_reads);
if (sync)
return read_from_bdev_sync(zram, bvec, entry, parent);
else
return read_from_bdev_async(zram, bvec, entry, parent);
}
+
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+{
+ struct bio *bio;
+ unsigned long entry;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ entry = get_entry_bdev(zram);
+ if (!entry) {
+ bio_put(bio);
+ return -ENOSPC;
+ }
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio->bi_bdev = zram->bdev;
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset)) {
+ bio_put(bio);
+ put_entry_bdev(zram, entry);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_rw = REQ_WRITE | REQ_SYNC;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_rw = parent->bi_rw;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(WRITE, bio);
+ *pentry = entry;
+
+ return 0;
+}
+
+static void zram_wb_clear(struct zram *zram, u32 index)
+{
+ unsigned long entry;
+
+ zram_clear_flag(zram, index, ZRAM_WB);
+ entry = zram_get_element(zram, index);
+ zram_set_element(zram, index, 0);
+ put_entry_bdev(zram, entry);
+}
+
#else
+static bool zram_wb_enabled(struct zram *zram) { return false; }
static inline void reset_bdev(struct zram *zram) {};
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+
+{
+ return -EIO;
+}
+
static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *parent, bool sync)
{
return -EIO;
}
-
-static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
+static void zram_wb_clear(struct zram *zram, u32 index) {}
#endif
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
@@ -856,10 +654,14 @@ static void zram_debugfs_destroy(void)
static void zram_accessed(struct zram *zram, u32 index)
{
- zram_clear_flag(zram, index, ZRAM_IDLE);
zram->table[index].ac_time = ktime_get_boottime();
}
+static void zram_reset_access(struct zram *zram, u32 index)
+{
+ zram->table[index].ac_time.tv64 = 0;
+}
+
static ssize_t read_block_state(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -897,13 +699,12 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
ts = ktime_to_timespec64(zram->table[index].ac_time);
copied = snprintf(kbuf + written, count,
- "%12zd %12lld.%06lu %c%c%c%c\n",
+ "%12zd %12lld.%06lu %c%c%c\n",
index, (s64)ts.tv_sec,
ts.tv_nsec / NSEC_PER_USEC,
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
- zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
- zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
+ zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
if (count < copied) {
zram_slot_unlock(zram, index);
@@ -948,10 +749,8 @@ static void zram_debugfs_unregister(struct zram *zram)
#else
static void zram_debugfs_create(void) {};
static void zram_debugfs_destroy(void) {};
-static void zram_accessed(struct zram *zram, u32 index)
-{
- zram_clear_flag(zram, index, ZRAM_IDLE);
-};
+static void zram_accessed(struct zram *zram, u32 index) {};
+static void zram_reset_access(struct zram *zram, u32 index) {};
static void zram_debugfs_register(struct zram *zram) {};
static void zram_debugfs_unregister(struct zram *zram) {};
#endif
@@ -1088,26 +887,6 @@ static ssize_t mm_stat_show(struct device *dev,
return ret;
}
-#ifdef CONFIG_ZRAM_WRITEBACK
-#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
-static ssize_t bd_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
- ssize_t ret;
-
- down_read(&zram->init_lock);
- ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu\n",
- FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
- FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
- FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
- up_read(&zram->init_lock);
-
- return ret;
-}
-#endif
-
static ssize_t debug_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1117,10 +896,9 @@ static ssize_t debug_stat_show(struct device *dev,
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
- "version: %d\n%8llu %8llu\n",
+ "version: %d\n%8llu\n",
version,
- (u64)atomic64_read(&zram->stats.writestall),
- (u64)atomic64_read(&zram->stats.miss_free));
+ (u64)atomic64_read(&zram->stats.writestall));
up_read(&zram->init_lock);
return ret;
@@ -1128,9 +906,6 @@ static ssize_t debug_stat_show(struct device *dev,
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
-#ifdef CONFIG_ZRAM_WRITEBACK
-static DEVICE_ATTR_RO(bd_stat);
-#endif
static DEVICE_ATTR_RO(debug_stat);
static void zram_meta_free(struct zram *zram, u64 disksize)
@@ -1175,21 +950,17 @@ static void zram_free_page(struct zram *zram, size_t index)
{
unsigned long handle;
-#ifdef CONFIG_ZRAM_MEMORY_TRACKING
- zram->table[index].ac_time = 0;
-#endif
- if (zram_test_flag(zram, index, ZRAM_IDLE))
- zram_clear_flag(zram, index, ZRAM_IDLE);
+ zram_reset_access(zram, index);
if (zram_test_flag(zram, index, ZRAM_HUGE)) {
zram_clear_flag(zram, index, ZRAM_HUGE);
atomic64_dec(&zram->stats.huge_pages);
}
- if (zram_test_flag(zram, index, ZRAM_WB)) {
- zram_clear_flag(zram, index, ZRAM_WB);
- free_block_bdev(zram, zram_get_element(zram, index));
- goto out;
+ if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_wb_clear(zram, index);
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
}
/*
@@ -1198,8 +969,10 @@ static void zram_free_page(struct zram *zram, size_t index)
*/
if (zram_test_flag(zram, index, ZRAM_SAME)) {
zram_clear_flag(zram, index, ZRAM_SAME);
+ zram_set_element(zram, index, 0);
atomic64_dec(&zram->stats.same_pages);
- goto out;
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
}
handle = zram_get_handle(zram, index);
@@ -1210,12 +983,10 @@ static void zram_free_page(struct zram *zram, size_t index)
atomic64_sub(zram_get_obj_size(zram, index),
&zram->stats.compr_data_size);
-out:
atomic64_dec(&zram->stats.pages_stored);
+
zram_set_handle(zram, index, 0);
zram_set_obj_size(zram, index, 0);
- WARN_ON_ONCE(zram->table[index].flags &
- ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
}
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
@@ -1226,20 +997,24 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
unsigned int size;
void *src, *dst;
- zram_slot_lock(zram, index);
- if (zram_test_flag(zram, index, ZRAM_WB)) {
- struct bio_vec bvec;
+ if (zram_wb_enabled(zram)) {
+ zram_slot_lock(zram, index);
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ struct bio_vec bvec;
- zram_slot_unlock(zram, index);
+ zram_slot_unlock(zram, index);
- bvec.bv_page = page;
- bvec.bv_len = PAGE_SIZE;
- bvec.bv_offset = 0;
- return read_from_bdev(zram, &bvec,
- zram_get_element(zram, index),
- bio, partial_io);
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+ return read_from_bdev(zram, &bvec,
+ zram_get_element(zram, index),
+ bio, partial_io);
+ }
+ zram_slot_unlock(zram, index);
}
+ zram_slot_lock(zram, index);
handle = zram_get_handle(zram, index);
if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
unsigned long value;
@@ -1324,6 +1099,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
struct page *page = bvec->bv_page;
unsigned long element = 0;
enum zram_pageflags flags = 0;
+ bool allow_wb = true;
mem = kmap_atomic(page);
if (page_same_filled(mem, &element)) {
@@ -1348,8 +1124,20 @@ compress_again:
return ret;
}
- if (comp_len >= huge_class_size)
+ if (unlikely(comp_len >= huge_class_size)) {
comp_len = PAGE_SIZE;
+ if (zram_wb_enabled(zram) && allow_wb) {
+ zcomp_stream_put(zram->comp);
+ ret = write_to_bdev(zram, bvec, index, bio, &element);
+ if (!ret) {
+ flags = ZRAM_WB;
+ ret = 1;
+ goto out;
+ }
+ allow_wb = false;
+ goto compress_again;
+ }
+ }
/*
* handle allocation has 2 paths:
@@ -1617,14 +1405,10 @@ static void zram_slot_free_notify(struct block_device *bdev,
zram = bdev->bd_disk->private_data;
- atomic64_inc(&zram->stats.notify_free);
- if (!zram_slot_trylock(zram, index)) {
- atomic64_inc(&zram->stats.miss_free);
- return;
- }
-
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
zram_slot_unlock(zram, index);
+ atomic64_inc(&zram->stats.notify_free);
}
static int zram_rw_page(struct block_device *bdev, sector_t sector,
@@ -1827,14 +1611,10 @@ static DEVICE_ATTR_RO(initstate);
static DEVICE_ATTR_WO(reset);
static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
-static DEVICE_ATTR_WO(idle);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
#ifdef CONFIG_ZRAM_WRITEBACK
static DEVICE_ATTR_RW(backing_dev);
-static DEVICE_ATTR_WO(writeback);
-static DEVICE_ATTR_RW(writeback_limit);
-static DEVICE_ATTR_RW(writeback_limit_enable);
#endif
static struct attribute *zram_disk_attrs[] = {
@@ -1844,20 +1624,13 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_compact.attr,
&dev_attr_mem_limit.attr,
&dev_attr_mem_used_max.attr,
- &dev_attr_idle.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
#ifdef CONFIG_ZRAM_WRITEBACK
&dev_attr_backing_dev.attr,
- &dev_attr_writeback.attr,
- &dev_attr_writeback_limit.attr,
- &dev_attr_writeback_limit_enable.attr,
#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
-#ifdef CONFIG_ZRAM_WRITEBACK
- &dev_attr_bd_stat.attr,
-#endif
&dev_attr_debug_stat.attr,
NULL,
};
@@ -1891,9 +1664,7 @@ static int zram_add(void)
device_id = ret;
init_rwsem(&zram->init_lock);
-#ifdef CONFIG_ZRAM_WRITEBACK
- spin_lock_init(&zram->wb_limit_lock);
-#endif
+
queue = blk_alloc_queue(GFP_KERNEL);
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 1cb3b9a82012..72c8584b6dff 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -30,7 +30,7 @@
/*
- * The lower ZRAM_FLAG_SHIFT bits of table.flags is for
+ * The lower ZRAM_FLAG_SHIFT bits of table.value is for
* object size (excluding header), the higher bits is for
* zram_pageflags.
*
@@ -41,15 +41,13 @@
*/
#define ZRAM_FLAG_SHIFT 24
-/* Flags for zram pages (table[page_no].flags) */
+/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
/* zram slot is locked */
ZRAM_LOCK = ZRAM_FLAG_SHIFT,
ZRAM_SAME, /* Page consists the same element */
ZRAM_WB, /* page is stored on backing_device */
- ZRAM_UNDER_WB, /* page is under writeback */
ZRAM_HUGE, /* Incompressible page */
- ZRAM_IDLE, /* not accessed page since last idle marking */
__NR_ZRAM_PAGEFLAGS,
};
@@ -62,7 +60,7 @@ struct zram_table_entry {
unsigned long handle;
unsigned long element;
};
- unsigned long flags;
+ unsigned long value;
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
ktime_t ac_time;
#endif
@@ -81,12 +79,6 @@ struct zram_stats {
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
atomic64_t writestall; /* no. of write slow paths */
- atomic64_t miss_free; /* no. of missed free */
-#ifdef CONFIG_ZRAM_WRITEBACK
- atomic64_t bd_count; /* no. of pages in backing device */
- atomic64_t bd_reads; /* no. of reads from backing device */
- atomic64_t bd_writes; /* no. of writes from backing device */
-#endif
};
struct zram {
@@ -114,13 +106,11 @@ struct zram {
bool claim; /* Protected by bdev->bd_mutex */
#ifdef CONFIG_ZRAM_WRITEBACK
struct file *backing_dev;
- spinlock_t wb_limit_lock;
- bool wb_limit_enable;
- u64 bd_wb_limit;
struct block_device *bdev;
unsigned int old_block_size;
unsigned long *bitmap;
unsigned long nr_pages;
+ spinlock_t bitmap_lock;
#endif
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
struct dentry *debugfs_dir;