In previous post, i have setup a 3-node hadoop clustor:
master: node1
slaver: node1, node2, noade3
Today, i am going to testing how a big file is allocated by hadoop.
Suppose the testing big file:
$ ls -l XXXX.tar.gz
-rw-r--r--. 1 uid gid 142593641 Feb 25 2014 XXXX.tar.gz
* add 2 copies of file into hadoop
hadoop-0.20.2/bin/hadoop dfs -put /path/to/XXXX.tar.gz /data/file1
hadoop-0.20.2/bin/hadoop dfs -put /path/to/XXXX.tar.gz /data/file2
* check file
$ hadoop-0.20.2/bin/hadoop dfs -ls /data
Found 2 items
-rw-r--r-- 2 huid hgid 142593641 2015-05-25 05:39 /data/file1
-rw-r--r-- 2 huid hgid 142593641 2015-05-25 05:39 /data/file2
$ hadoop-0.20.2/bin/hadoop fsck /data -files -blocks -locations
/data/file1 142593641 bytes, 3 block(s): OK
0. blk_-7503928884623423785_1002 len=67108864 repl=2 [node1:50010, node1:50010]
1. blk_-4637956211111529516_1002 len=67108864 repl=2 [node1:50010, node3:50010]
2. blk_2524680377287011061_1002 len=8375913 repl=2 [node1:50010, node3:50010]
/data/file2 142593641 bytes, 3 block(s): OK
0. blk_-6831940211467475548_1003 len=67108864 repl=2 [node1:50010, node2:50010]
1. blk_8536201268713735224_1003 len=67108864 repl=2 [node1:50010, node3:50010]
2. blk_3336643339863134573_1003 len=8375913 repl=2 [node1:50010, node3:50010]
* check with local filesystem
cd $hadooproot/dfs/data/current
$ ls -l | grep -v meta
node1:
-rw-r--r-- 1 uid gid 67108864 May 25 05:39 blk_-7503928884623423785 #file1, block 0
-rw-r--r-- 1 uid gid 67108864 May 25 05:39 blk_-4637956211111529516 #file1, block 1
-rw-r--r-- 1 uid gid 8375913 May 25 05:39 blk_2524680377287011061 #file1, block 2
-rw-r--r-- 1 uid gid 67108864 May 25 05:39 blk_-6831940211467475548 #file2, block 0
-rw-r--r-- 1 uid gid 67108864 May 25 05:39 blk_8536201268713735224 #file2, block 1
-rw-r--r-- 1 uid gid 8375913 May 25 05:39 blk_3336643339863134573 #file2, block 2
node2:
-rw-r--r-- 1 uid gid 67108864 May 25 05:43 blk_-6831940211467475548 #file2, block 0
node3:
-rw-r--r-- 1 uid gid 67108864 May 25 05:43 blk_-7503928884623423785 #file1, block 0
-rw-r--r-- 1 uid gid 67108864 May 25 05:43 blk_-4637956211111529516 #file1, block 1
-rw-r--r-- 1 uid gid 8375913 May 25 05:43 blk_2524680377287011061 #file1, block 2
-rw-r--r-- 1 uid gid 67108864 May 25 05:43 blk_8536201268713735224 #file2, block 1
-rw-r--r-- 1 uid gid 8375913 May 25 05:43 blk_3336643339863134573 #file2, block 2