1. 程式人生 > >hdfs遍歷檔案方法

hdfs遍歷檔案方法

import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*;   import java.io.IOException; import java.net.URI; import java.net.URISyntaxException;   class HelloWord { public static void main(String[] args){     Hdfs();   }     //方法1: public static void Hdfs(){ try{ Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://10.8.6.126:8020"); FileSystem fs = null; //fs = FileSystem.get(new URI("hdfs://10.8.6.126:8020"),conf); //這兩種方式都可以配置hdfs ip fs = FileSystem.get(conf);   RemoteIterator<LocatedFileStatus> lt = fs.listFiles(new Path("hdfs://10.8.6.126:8020/ada/lyy/App"), true); while (lt.hasNext()) { LocatedFileStatus file = lt.next(); if(file.isFile()) { Path path = file.getPath(); System.out.println("檔案:["+path.toString()+"]"); System.out.println("檔名:["+path.getName.toString()+"]"); //只是檔名,沒有路徑資訊 }else{ Path path = file.getPath(); System.out.println("目錄:["+path.toString()+"]"); } }   } catch( IOException e){ System.out.println(e.getStackTrace()); } }   }   //方法2: public static void Hdfs(){ try{ Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://10.8.6.126:8020"); FileSystem fs = null; //fs = FileSystem.get(new URI("hdfs://10.8.6.126:8020"),conf); //這兩種方式都可以配置hdfs ip fs = FileSystem.get(conf);   Path path = new Path("/shell"); //通過fs的listStatus方法獲取一個指定path的所有檔案資訊(status),因此我們需要傳入一個hdfs的路徑,返回的是一個filStatus陣列 FileStatus[] fileStatuses = fs.listStatus(path); for (FileStatus fileStatus : fileStatuses) { //判斷當前迭代物件是否是目錄 boolean isDir = fileStatus.isDirectory(); //獲取當前檔案的絕對路徑 String fullPath = fileStatus.getPath().toString(); System.out.println("isDir:" + isDir + ",Path:" + fullPath); } }       同時讀取本地和hdfs目錄:   public class TestHdfs{   public static void main(String[] args) {     //讀取配置檔案 Configuration conf=new Configuration(); //String path = "hdfs://10.8.6.126:8020/ada/lyy/data/NaiveBayesModel.model"; String path = "file:///home/liyanyan/cluster/NaiveBayesModel.model";       String classfile = "";   //讀取配置檔案 FileStatus[] listFile = null; FileSystem fs = null; try { if(path.startsWith("hdfs:")){ conf.set("fs.defaultFS",path.substring(0,path.indexOf('/', path.indexOf(':') + 3))); fs = FileSystem.get(conf);   }else if(path.startsWith("file:")){ fs=FileSystem.getLocal(conf); //獲取檔案目錄 }   listFile =fs.listStatus(new Path(path+"/metadata"), new RegxRejectPathFilter("^[._]+\\w+.*$"));   if(listFile.length != 1){ }else{ classfile = listFile[0].getPath().toString(); }   }catch(IOException e){ e.printStackTrace(); }   System.out.println("classfile = "+classfile.replaceFirst("file:","file://"));   }   private static class RegxRejectPathFilter implements PathFilter { private final String regex; public RegxRejectPathFilter(String regex) { this.regex=regex; }   @Override public boolean accept(Path path) { // TODO 自動生成的方法存根 boolean flag=path.getName().toString().matches(regex); return !flag; } } }