1 /* mke2fs.c - Create an ext2 filesystem image.
2  *
3  * Copyright 2006, 2007 Rob Landley <rob@landley.net>
4 
5 // Still to go: "E:jJ:L:m:O:"
6 USE_MKE2FS(NEWTOY(mke2fs, "<1>2g:Fnqm#N#i#b#", TOYFLAG_SBIN))
7 
8 config MKE2FS
9   bool "mke2fs"
10   default n
11   help
12     usage: mke2fs [-Fnq] [-b ###] [-N|i ###] [-m ###] device
13 
14     Create an ext2 filesystem on a block device or filesystem image.
15 
16     -F         Force to run on a mounted device
17     -n         Don't write to device
18     -q         Quiet (no output)
19     -b size    Block size (1024, 2048, or 4096)
20     -N inodes  Allocate this many inodes
21     -i bytes   Allocate one inode for every XXX bytes of device
22     -m percent Reserve this percent of filesystem space for root user
23 
24 config MKE2FS_JOURNAL
25   bool "Journaling support (ext3)"
26   default n
27   depends on MKE2FS
28   help
29     usage: mke2fs [-j] [-J size=###,device=XXX]
30 
31     -j         Create journal (ext3)
32     -J         Journal options
33                size: Number of blocks (1024-102400)
34                device: Specify an external journal
35 
36 config MKE2FS_GEN
37   bool "Generate (gene2fs)"
38   default n
39   depends on MKE2FS
40   help
41     usage: gene2fs [options] device filename
42 
43     The [options] are the same as mke2fs.
44 
45 config MKE2FS_LABEL
46   bool "Label support"
47   default n
48   depends on MKE2FS
49   help
50     usage: mke2fs [-L label] [-M path] [-o string]
51 
52     -L         Volume label
53     -M         Path to mount point
54     -o         Created by
55 
56 config MKE2FS_EXTENDED
57   bool "Extended options"
58   default n
59   depends on MKE2FS
60   help
61     usage: mke2fs [-E stride=###] [-O option[,option]]
62 
63     -E stride= Set RAID stripe size (in blocks)
64     -O [opts]  Specify fewer ext2 option flags (for old kernels)
65                All of these are on by default (as appropriate)
66        none         Clear default options (all but journaling)
67        dir_index    Use htree indexes for large directories
68        filetype     Store file type info in directory entry
69        has_journal  Set by -j
70        journal_dev  Set by -J device=XXX
71        sparse_super Don't allocate huge numbers of redundant superblocks
72 */
73 
74 #define FOR_mke2fs
75 #include "toys.h"
76 
GLOBALS(long blocksize;long bytes_per_inode;long inodes;long reserved_percent;char * gendir;struct dirtree * dt;unsigned treeblocks;unsigned treeinodes;unsigned blocks;unsigned freeblocks;unsigned inodespg;unsigned groups;unsigned blockbits;unsigned nextblock;unsigned nextgroup;int fsfd;struct ext2_superblock sb;)77 GLOBALS(
78   // Command line arguments.
79   long blocksize;
80   long bytes_per_inode;
81   long inodes;           // Total inodes in filesystem.
82   long reserved_percent; // Integer precent of space to reserve for root.
83   char *gendir;          // Where to read dirtree from.
84 
85   // Internal data.
86   struct dirtree *dt;    // Tree of files to copy into the new filesystem.
87   unsigned treeblocks;   // Blocks used by dt
88   unsigned treeinodes;   // Inodes used by dt
89 
90   unsigned blocks;       // Total blocks in the filesystem.
91   unsigned freeblocks;   // Free blocks in the filesystem.
92   unsigned inodespg;     // Inodes per group
93   unsigned groups;       // Total number of block groups.
94   unsigned blockbits;    // Bits per block.  (Also blocks per group.)
95 
96   // For gene2fs
97   unsigned nextblock;    // Next data block to allocate
98   unsigned nextgroup;    // Next group we'll be allocating from
99   int fsfd;              // File descriptor of filesystem (to output to).
100 
101   struct ext2_superblock sb;
102 )
103 
104 #define INODES_RESERVED 10
105 
106 static uint32_t div_round_up(uint32_t a, uint32_t b)
107 {
108   uint32_t c = a/b;
109 
110   if (a%b) c++;
111   return c;
112 }
113 
114 // Calculate data blocks plus index blocks needed to hold a file.
115 
file_blocks_used(uint64_t size,uint32_t * blocklist)116 static uint32_t file_blocks_used(uint64_t size, uint32_t *blocklist)
117 {
118   uint32_t dblocks = (uint32_t)((size+(TT.blocksize-1))/TT.blocksize);
119   uint32_t idx=TT.blocksize/4, iblocks=0, diblocks=0, tiblocks=0;
120 
121   // Fill out index blocks in inode.
122 
123   if (blocklist) {
124     int i;
125 
126     // Direct index blocks
127     for (i=0; i<13 && i<dblocks; i++) blocklist[i] = i;
128     // Singly indirect index blocks
129     if (dblocks > 13+idx) blocklist[13] = 13+idx;
130     // Doubly indirect index blocks
131     idx = 13 + idx + (idx*idx);
132     if (dblocks > idx) blocklist[14] = idx;
133 
134     return 0;
135   }
136 
137   // Account for direct, singly, doubly, and triply indirect index blocks
138 
139   if (dblocks > 12) {
140     iblocks = ((dblocks-13)/idx)+1;
141     if (iblocks > 1) {
142       diblocks = ((iblocks-2)/idx)+1;
143       if (diblocks > 1)
144         tiblocks = ((diblocks-2)/idx)+1;
145     }
146   }
147 
148   return dblocks + iblocks + diblocks + tiblocks;
149 }
150 
151 // Use the parent pointer to iterate through the tree non-recursively.
treenext(struct dirtree * this)152 static struct dirtree *treenext(struct dirtree *this)
153 {
154   while (this && !this->next) this = this->parent;
155   if (this) this = this->next;
156 
157   return this;
158 }
159 
160 // Recursively calculate the number of blocks used by each inode in the tree.
161 // Returns blocks used by this directory, assigns bytes used to *size.
162 // Writes total block count to TT.treeblocks and inode count to TT.treeinodes.
163 
check_treesize(struct dirtree * that,off_t * size)164 static long check_treesize(struct dirtree *that, off_t *size)
165 {
166   long blocks;
167 
168   while (that) {
169     *size += sizeof(struct ext2_dentry) + strlen(that->name);
170 
171     if (that->child)
172       that->st.st_blocks = check_treesize(that->child, &that->st.st_size);
173     else if (S_ISREG(that->st.st_mode)) {
174        that->st.st_blocks = file_blocks_used(that->st.st_size, 0);
175        TT.treeblocks += that->st.st_blocks;
176     }
177     that = that->next;
178   }
179   TT.treeblocks += blocks = file_blocks_used(*size, 0);
180   TT.treeinodes++;
181 
182   return blocks;
183 }
184 
185 // Calculate inode numbers and link counts.
186 //
187 // To do this right I need to copy the tree and sort it, but here's a really
188 // ugly n^2 way of dealing with the problem that doesn't scale well to large
189 // numbers of files (> 100,000) but can be done in very little code.
190 // This rewrites inode numbers to their final values, allocating depth first.
191 
check_treelinks(struct dirtree * tree)192 static void check_treelinks(struct dirtree *tree)
193 {
194   struct dirtree *current=tree, *that;
195   long inode = INODES_RESERVED;
196 
197   while (current) {
198     ++inode;
199     // Since we can't hardlink to directories, we know their link count.
200     if (S_ISDIR(current->st.st_mode)) current->st.st_nlink = 2;
201     else {
202       dev_t new = current->st.st_dev;
203 
204       if (!new) continue;
205 
206       // Look for other copies of current node
207       current->st.st_nlink = 0;
208       for (that = tree; that; that = treenext(that)) {
209         if (current->st.st_ino == that->st.st_ino &&
210           current->st.st_dev == that->st.st_dev)
211         {
212           current->st.st_nlink++;
213           current->st.st_ino = inode;
214         }
215       }
216     }
217     current->st.st_ino = inode;
218     current = treenext(current);
219   }
220 }
221 
222 // Calculate inodes per group from total inodes.
get_inodespg(uint32_t inodes)223 static uint32_t get_inodespg(uint32_t inodes)
224 {
225   uint32_t temp;
226 
227   // Round up to fill complete inode blocks.
228   temp = (inodes + TT.groups - 1) / TT.groups;
229   inodes = TT.blocksize/sizeof(struct ext2_inode);
230   return ((temp + inodes - 1)/inodes)*inodes;
231 }
232 
233 // Fill out superblock and TT structures.
234 
init_superblock(struct ext2_superblock * sb)235 static void init_superblock(struct ext2_superblock *sb)
236 {
237   uint32_t temp;
238 
239   // Set log_block_size and log_frag_size.
240 
241   for (temp = 0; temp < 4; temp++) if (TT.blocksize == 1024<<temp) break;
242   if (temp==4) error_exit("bad blocksize");
243   sb->log_block_size = sb->log_frag_size = SWAP_LE32(temp);
244 
245   // Fill out blocks_count, r_blocks_count, first_data_block
246 
247   sb->blocks_count = SWAP_LE32(TT.blocks);
248   sb->free_blocks_count = SWAP_LE32(TT.freeblocks);
249   temp = (TT.blocks * (uint64_t)TT.reserved_percent) / 100;
250   sb->r_blocks_count = SWAP_LE32(temp);
251 
252   sb->first_data_block = SWAP_LE32(TT.blocksize == 1024 ? 1 : 0);
253 
254   // Set blocks_per_group and frags_per_group, which is the size of an
255   // allocation bitmap that fits in one block (I.E. how many bits per block)?
256 
257   sb->blocks_per_group = sb->frags_per_group = SWAP_LE32(TT.blockbits);
258 
259   // Set inodes_per_group and total inodes_count
260   sb->inodes_per_group = SWAP_LE32(TT.inodespg);
261   sb->inodes_count = SWAP_LE32(TT.inodespg * TT.groups);
262 
263   // Determine free inodes.
264   temp = TT.inodespg*TT.groups - INODES_RESERVED;
265   if (temp < TT.treeinodes) error_exit("Not enough inodes.\n");
266   sb->free_inodes_count = SWAP_LE32(temp - TT.treeinodes);
267 
268   // Fill out the rest of the superblock.
269   sb->max_mnt_count=0xFFFF;
270   sb->wtime = sb->lastcheck = sb->mkfs_time = SWAP_LE32(time(NULL));
271   sb->magic = SWAP_LE32(0xEF53);
272   sb->state = sb->errors = SWAP_LE16(1);
273 
274   sb->rev_level = SWAP_LE32(1);
275   sb->first_ino = SWAP_LE32(INODES_RESERVED+1);
276   sb->inode_size = SWAP_LE16(sizeof(struct ext2_inode));
277   sb->feature_incompat = SWAP_LE32(EXT2_FEATURE_INCOMPAT_FILETYPE);
278   sb->feature_ro_compat = SWAP_LE32(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER);
279 
280   create_uuid(sb->uuid);
281 
282   // TODO If we're called as mke3fs or mkfs.ext3, do a journal.
283 
284   //if (strchr(toys.which->name,'3'))
285   //	sb->feature_compat |= SWAP_LE32(EXT3_FEATURE_COMPAT_HAS_JOURNAL);
286 }
287 
288 // Does this group contain a superblock backup (and group descriptor table)?
is_sb_group(uint32_t group)289 static int is_sb_group(uint32_t group)
290 {
291   int i;
292 
293   // Superblock backups are on groups 0, 1, and powers of 3, 5, and 7.
294   if(!group || group==1) return 1;
295   for (i=3; i<9; i+=2) {
296     int j = i;
297     while (j<group) j*=i;
298     if (j==group) return 1;
299   }
300   return 0;
301 }
302 
303 
304 // Number of blocks used in group by optional superblock/group list backup.
group_superblock_overhead(uint32_t group)305 static int group_superblock_overhead(uint32_t group)
306 {
307   int used;
308 
309   if (!is_sb_group(group)) return 0;
310 
311   // How many blocks does the group descriptor table take up?
312   used = TT.groups * sizeof(struct ext2_group);
313   used += TT.blocksize - 1;
314   used /= TT.blocksize;
315   // Plus the superblock itself.
316   used++;
317   // And a corner case.
318   if (!group && TT.blocksize == 1024) used++;
319 
320   return used;
321 }
322 
323 // Number of blocks used in group to store superblock/group/inode list
group_overhead(uint32_t group)324 static int group_overhead(uint32_t group)
325 {
326   // Return superblock backup overhead (if any), plus block/inode
327   // allocation bitmaps, plus inode tables.
328   return group_superblock_overhead(group) + 2 + get_inodespg(TT.inodespg)
329         / (TT.blocksize/sizeof(struct ext2_inode));
330 }
331 
332 // In bitmap "array" set "len" bits starting at position "start" (from 0).
bits_set(char * array,int start,int len)333 static void bits_set(char *array, int start, int len)
334 {
335   while(len) {
336     if ((start&7) || len<8) {
337       array[start/8]|=(1<<(start&7));
338       start++;
339       len--;
340     } else {
341       array[start/8]=255;
342       start+=8;
343       len-=8;
344     }
345   }
346 }
347 
348 // Seek past len bytes (to maintain sparse file), or write zeroes if output
349 // not seekable
put_zeroes(int len)350 static void put_zeroes(int len)
351 {
352   if(-1 == lseek(TT.fsfd, len, SEEK_SET)) {
353     memset(toybuf, 0, sizeof(toybuf));
354     while (len) {
355       int out = len > sizeof(toybuf) ? sizeof(toybuf) : len;
356       xwrite(TT.fsfd, toybuf, out);
357       len -= out;
358     }
359   }
360 }
361 
362 // Fill out an inode structure from struct stat info in dirtree.
fill_inode(struct ext2_inode * in,struct dirtree * that)363 static void fill_inode(struct ext2_inode *in, struct dirtree *that)
364 {
365   uint32_t fbu[15];
366   int temp;
367 
368   file_blocks_used(that->st.st_size, fbu);
369 
370   // If that inode needs data blocks allocated to it.
371   if (that->st.st_size) {
372     int i, group = TT.nextblock/TT.blockbits;
373 
374     // TODO: teach this about indirect blocks.
375     for (i=0; i<15; i++) {
376       // If we just jumped into a new group, skip group overhead blocks.
377       while (group >= TT.nextgroup)
378         TT.nextblock += group_overhead(TT.nextgroup++);
379     }
380   }
381   // TODO :  S_ISREG/DIR/CHR/BLK/FIFO/LNK/SOCK(m)
382   in->mode = SWAP_LE32(that->st.st_mode);
383 
384   in->uid = SWAP_LE16(that->st.st_uid & 0xFFFF);
385   in->uid_high = SWAP_LE16(that->st.st_uid >> 16);
386   in->gid = SWAP_LE16(that->st.st_gid & 0xFFFF);
387   in->gid_high = SWAP_LE16(that->st.st_gid >> 16);
388   in->size = SWAP_LE32(that->st.st_size & 0xFFFFFFFF);
389 
390   // Contortions to make the compiler not generate a warning for x>>32
391   // when x is 32 bits.  The optimizer should clean this up.
392   if (sizeof(that->st.st_size) > 4) temp = 32;
393   else temp = 0;
394   if (temp) in->dir_acl = SWAP_LE32(that->st.st_size >> temp);
395 
396   in->atime = SWAP_LE32(that->st.st_atime);
397   in->ctime = SWAP_LE32(that->st.st_ctime);
398   in->mtime = SWAP_LE32(that->st.st_mtime);
399 
400   in->links_count = SWAP_LE16(that->st.st_nlink);
401   in->blocks = SWAP_LE32(that->st.st_blocks);
402   // in->faddr
403 }
404 
405 // Works like an archiver.
406 // The first argument is the name of the file to create.  If it already
407 // exists, that size will be used.
408 
mke2fs_main(void)409 void mke2fs_main(void)
410 {
411   int i, temp;
412   off_t length;
413   uint32_t usedblocks, usedinodes, dtiblk, dtbblk;
414   struct dirtree *dti, *dtb;
415 
416   // Handle command line arguments.
417 
418   if (toys.optargs[1]) {
419     sscanf(toys.optargs[1], "%u", &TT.blocks);
420     temp = O_RDWR|O_CREAT;
421   } else temp = O_RDWR;
422   if (!TT.reserved_percent) TT.reserved_percent = 5;
423 
424   // TODO: Check if filesystem is mounted here
425 
426   // For mke?fs, open file.  For gene?fs, create file.
427   TT.fsfd = xcreate(*toys.optargs, temp, 0777);
428 
429   // Determine appropriate block size and block count from file length.
430   // (If no length, default to 4k.  They can override it on the cmdline.)
431 
432   length = fdlength(TT.fsfd);
433   if (!TT.blocksize) TT.blocksize = (length && length < 1<<29) ? 1024 : 4096;
434   TT.blockbits = 8*TT.blocksize;
435   if (!TT.blocks) TT.blocks = length/TT.blocksize;
436 
437   // Collect gene2fs list or lost+found, calculate requirements.
438 
439   if (TT.gendir) {
440     strncpy(toybuf, TT.gendir, sizeof(toybuf));
441     dti = dirtree_read(toybuf, dirtree_notdotdot);
442   } else {
443     dti = xzalloc(sizeof(struct dirtree)+11);
444     strcpy(dti->name, "lost+found");
445     dti->st.st_mode = S_IFDIR|0755;
446     dti->st.st_ctime = dti->st.st_mtime = time(NULL);
447   }
448 
449   // Add root directory inode.  This is iterated through for when finding
450   // blocks, but not when finding inodes.  The tree's parent pointers don't
451   // point back into this.
452 
453   dtb = xzalloc(sizeof(struct dirtree)+1);
454   dtb->st.st_mode = S_IFDIR|0755;
455   dtb->st.st_ctime = dtb->st.st_mtime = time(NULL);
456   dtb->child = dti;
457 
458   // Figure out how much space is used by preset files
459   length = check_treesize(dtb, &(dtb->st.st_size));
460   check_treelinks(dtb);
461 
462   // Figure out how many total inodes we need.
463 
464   if (!TT.inodes) {
465     if (!TT.bytes_per_inode) TT.bytes_per_inode = 8192;
466     TT.inodes = (TT.blocks * (uint64_t)TT.blocksize) / TT.bytes_per_inode;
467   }
468 
469   // If we're generating a filesystem and have no idea how many blocks it
470   // needs, start with a minimal guess, find the overhead of that many
471   // groups, and loop until this is enough groups to store this many blocks.
472   if (!TT.blocks) TT.groups = (TT.treeblocks/TT.blockbits)+1;
473   else TT.groups = div_round_up(TT.blocks, TT.blockbits);
474 
475   for (;;) {
476     temp = TT.treeblocks;
477 
478     for (i = 0; i<TT.groups; i++) temp += group_overhead(i);
479 
480     if (TT.blocks) {
481       if (TT.blocks < temp) error_exit("Not enough space.\n");
482       break;
483     }
484     if (temp <= TT.groups * TT.blockbits) {
485       TT.blocks = temp;
486       break;
487     }
488     TT.groups++;
489   }
490   TT.freeblocks = TT.blocks - temp;
491 
492   // Now we know all the TT data, initialize superblock structure.
493 
494   init_superblock(&TT.sb);
495 
496   // Start writing.  Skip the first 1k to avoid the boot sector (if any).
497   put_zeroes(1024);
498 
499   // Loop through block groups, write out each one.
500   dtiblk = dtbblk = usedblocks = usedinodes = 0;
501   for (i=0; i<TT.groups; i++) {
502     struct ext2_inode *in = (struct ext2_inode *)toybuf;
503     uint32_t start, itable, used, end;
504     int j, slot;
505 
506     // Where does this group end?
507     end = TT.blockbits;
508     if ((i+1)*TT.blockbits > TT.blocks) end = TT.blocks & (TT.blockbits-1);
509 
510     // Blocks used by inode table
511     itable = (TT.inodespg*sizeof(struct ext2_inode))/TT.blocksize;
512 
513     // If a superblock goes here, write it out.
514     start = group_superblock_overhead(i);
515     if (start) {
516       struct ext2_group *bg = (struct ext2_group *)toybuf;
517       int treeblocks = TT.treeblocks, treeinodes = TT.treeinodes;
518 
519       TT.sb.block_group_nr = SWAP_LE16(i);
520 
521       // Write superblock and pad it up to block size
522       xwrite(TT.fsfd, &TT.sb, sizeof(struct ext2_superblock));
523       temp = TT.blocksize - sizeof(struct ext2_superblock);
524       if (!i && TT.blocksize > 1024) temp -= 1024;
525       memset(toybuf, 0, TT.blocksize);
526       xwrite(TT.fsfd, toybuf, temp);
527 
528       // Loop through groups to write group descriptor table.
529       for(j=0; j<TT.groups; j++) {
530 
531         // Figure out what sector this group starts in.
532         used = group_superblock_overhead(j);
533 
534         // Find next array slot in this block (flush block if full).
535         slot = j % (TT.blocksize/sizeof(struct ext2_group));
536         if (!slot) {
537           if (j) xwrite(TT.fsfd, bg, TT.blocksize);
538           memset(bg, 0, TT.blocksize);
539         }
540 
541         // How many free inodes in this group?
542         temp = TT.inodespg;
543         if (!i) temp -= INODES_RESERVED;
544         if (temp > treeinodes) {
545           treeinodes -= temp;
546           temp = 0;
547         } else {
548           temp -= treeinodes;
549           treeinodes = 0;
550         }
551         bg[slot].free_inodes_count = SWAP_LE16(temp);
552 
553         // How many free blocks in this group?
554         temp = TT.inodespg/(TT.blocksize/sizeof(struct ext2_inode)) + 2;
555         temp = end-used-temp;
556         if (temp > treeblocks) {
557           treeblocks -= temp;
558           temp = 0;
559         } else {
560           temp -= treeblocks;
561           treeblocks = 0;
562         }
563         bg[slot].free_blocks_count = SWAP_LE32(temp);
564 
565         // Fill out rest of group structure
566         used += j*TT.blockbits;
567         bg[slot].block_bitmap = SWAP_LE32(used++);
568         bg[slot].inode_bitmap = SWAP_LE32(used++);
569         bg[slot].inode_table = SWAP_LE32(used);
570         bg[slot].used_dirs_count = 0;  // (TODO)
571       }
572       xwrite(TT.fsfd, bg, TT.blocksize);
573     }
574 
575     // Now write out stuff that every block group has.
576 
577     // Write block usage bitmap
578 
579     start += 2 + itable;
580     memset(toybuf, 0, TT.blocksize);
581     bits_set(toybuf, 0, start);
582     bits_set(toybuf, end, TT.blockbits-end);
583     temp = TT.treeblocks - usedblocks;
584     if (temp) {
585       if (end-start > temp) temp = end-start;
586       bits_set(toybuf, start, temp);
587     }
588     xwrite(TT.fsfd, toybuf, TT.blocksize);
589 
590     // Write inode bitmap
591     memset(toybuf, 0, TT.blocksize);
592     j = 0;
593     if (!i) bits_set(toybuf, 0, j = INODES_RESERVED);
594     bits_set(toybuf, TT.inodespg, slot = TT.blockbits-TT.inodespg);
595     temp = TT.treeinodes - usedinodes;
596     if (temp) {
597       if (slot-j > temp) temp = slot-j;
598       bits_set(toybuf, j, temp);
599     }
600     xwrite(TT.fsfd, toybuf, TT.blocksize);
601 
602     // Write inode table for this group (TODO)
603     for (j = 0; j<TT.inodespg; j++) {
604       slot = j % (TT.blocksize/sizeof(struct ext2_inode));
605       if (!slot) {
606         if (j) xwrite(TT.fsfd, in, TT.blocksize);
607         memset(in, 0, TT.blocksize);
608       }
609       if (!i && j<INODES_RESERVED) {
610         // Write root inode
611         if (j == 2) fill_inode(in+slot, dtb);
612       } else if (dti) {
613         fill_inode(in+slot, dti);
614         dti = treenext(dti);
615       }
616     }
617     xwrite(TT.fsfd, in, TT.blocksize);
618 
619     while (dtb) {
620       // TODO write index data block
621       // TODO write root directory data block
622       // TODO write directory data block
623       // TODO write file data block
624       put_zeroes(TT.blocksize);
625       start++;
626       if (start == end) break;
627     }
628     // Write data blocks (TODO)
629     put_zeroes((end-start) * TT.blocksize);
630   }
631 }
632