Java API of HDFS
Steps
-
on windows machine, add new PATH parameter
- HADOOP_USER_NAME : root
-
create a maven project
- java version: 1.8
- pom needed
- hadoop common 2.6.5
- hdfs 2.6.5
- put
core-site.xml
&hdfs-site.xml
under/resources
of project folder -
public class HDFSTest { private Configuration conf; private FileSystem fs; @Before public void conn() { conf = new Configuration(true); // preload the conf file } @Test public void mkdir() { Path dir = new Path("/path_in_hdfs"); if (fs.exists(dir)) { fs.delete(dir, true); } fs.mkdirs(dir); } @Test public void upload() { BufferInputStream input = new BufferInputStream(new FileInputStream(new File("/local_file_path"))); FSDataOutputStream output = fs.create(new Path("/hdfs_file_path")); IOUtils.copyBytes(input, output, conf, true); } @Test public void download() { // reverse of upload function } @Test public void blocksInfo() { Path file = new Path("/hdfs_file_path"); FileStatus fss = fs.getFileStatus(file); BlockLocation[] blks = fs.getFileBlockLocations(fss, 0, fss.getLen()); for(BlockLocation blk: blks){ sout(blk); } FSDataInputStream in = fs.open("/hdfs_file_path"); in.seek(start_position); } @After public void close() { fs.close(); } }