Jsuop进行链接提取-提取url链接

[[178886]]
// 检索WebContent中的图片和附件 
 Document doc = Jsoup.parse(info.getWebc().getWebContent()); 
 // 当前页中的图片 
 Elements srcLinks = doc.select("img[src]"); 
 String imagesPath = ""; 
 for (Element link : srcLinks) { 
 // 剔除标签，只剩链接路径 
 String imagesPaths = link.attr("src"); 
 String ht = imagesPaths.substring(0, 4); 
 String htt = imagesPaths.substring(0, 1); 
 if (!ht.equals("http") && htt.equals("/")) { 
 imagesPath = imagesPaths.trim().replaceAll(ContextPath, ""); 
 imagesPath = imagesPath.substring(imagesPath.lastIndexOf("/") + 1); 
 } else { 
 imagesPath = ""; 
 } 
 //System.out.println("---导入WebContent中的图片---" + imagesPath); 
 if (!imagesPath.equals("")) { 
 importCopy("/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/"+imagesPath, path, strInfoID,"/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/"); 
 } 
 } 
 // 提取所有的href连接 
 String filePaths = ""; 
 Elements linehrefs = doc.select("a[href]"); 
 for (Element link : linehrefs) { 
 filePaths = link.attr("href").trim().replaceAll(ContextPath, ""); 
 filePaths = filePaths.substring(filePaths.lastIndexOf("/") + 1); 
 //System.out.println("---导入WebContent中的文件---" + filePaths); 
 if (!filePaths.equals("")) { 
 importCopy("/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/"+filePaths, path, strInfoID,"/html/"+strSiteID+"/"+strColumnID+"/"+keyID+"/"); 
 } 
 }
【本文是51CTO专栏作者张勇波的原创文章，转载请通过51CTO获取作者授权】