福特抓取网页增加分页

uat-temp-castle-2502241152-fordFetchWeb
臧学普 2 months ago
parent 37843777b5
commit d42f88d77a

@ -95,16 +95,23 @@ public class MesFordFetchWebServiceImpl implements IFordFetchWebService {
map.put("et",parameter.getEndDateTime());
map.put("catacode","0");
map.put("itemcode","0");
HttpResponse response = HttpRequest.get(parameter.getFetchUrl()).cookie(cookies).timeout(60000).form(map).execute();
log.info("doFetchData-查询出来的内容为:{}",response.body());
Document doc = Jsoup.parse(response.body());
Elements trList = doc.select("tr");
//校验最后一条数据是否已经存在数据库中, 特殊逻辑page如果为空则只查最后一页的内容
boolean result = checkLastPage(parameter, cookies, organizeCode);
if (!result){
//分页查询
map.put("page",parameter.getCurrentPageIndex());
HttpResponse response = HttpRequest.get(parameter.getFetchUrl()).cookie(cookies).timeout(60000).form(map).execute();
log.info("doFetchData-查询出来的内容为:{}",response.body());
Document doc = Jsoup.parse(response.body());
int tempCount = 0;
Elements trList = doc.select("tr");
for (int i = 1; i < trList.size(); i++) {
Elements tdList = trList.get(i).select("td");
MesFordJsaSortInfoWithFetchWeb webInfo = new MesFordJsaSortInfoWithFetchWeb();
if (tdList.get(0).text().equals("无数据") || StringUtils.isBlank(tdList.get(0).text())){
break;
}
tempCount++;
webInfo.setSeq(Long.parseLong(tdList.get(0).text()));
webInfo.setFetchDate(tdList.get(1).text());
webInfo.setFetchTime(tdList.get(2).text());
@ -134,7 +141,42 @@ public class MesFordFetchWebServiceImpl implements IFordFetchWebService {
fetchWebRao.insert(webInfo);
infoList.add(webInfo);
}
if (tempCount == parameter.getPageSize()){
parameter.setCurrentPageIndex(parameter.getCurrentPageIndex()+1);
ConvertBean.saveOrUpdate(parameter,"doFetchData-index");
paramRao.update(parameter);
}
}
return infoList;
}
private boolean checkLastPage(MesFordFetchParameter parameter, List<HttpCookie> cookies,String organizeCode) {
Map<String,Object> map = new HashMap<>();
map.put("maxpage",parameter.getPageSize() != null ? parameter.getPageSize().toString() : "1");
map.put("trigger",parameter.getInfoPointCode());
map.put("st",parameter.getStartDateTime());
map.put("et",parameter.getEndDateTime());
map.put("catacode","0");
map.put("itemcode","0");
//校验最后一条数据是否已经存在数据库中, 特殊逻辑page如果为空则只查最后一页的内容
HttpResponse response = HttpRequest.get(parameter.getFetchUrl()).cookie(cookies).timeout(60000).form(map).execute();
log.info("doFetchData-查询出来的内容为:{}",response.body());
Document doc = Jsoup.parse(response.body());
Elements trList = doc.select("tr");
Elements tdList = trList.get(trList.size()-1).select("td");
if (!tdList.isEmpty()){
if (tdList.get(0).text().equals("无数据") || StringUtils.isBlank(tdList.get(0).text())) {
return false;
}
//校验是否已经查询过
DdlPackBean webInfoPackBean = DdlPackBean.getDdlPackBean(organizeCode);
DdlPreparedPack.getNumEqualPack(Long.parseLong(tdList.get(0).text()),"seq",webInfoPackBean);
DdlPreparedPack.getStringEqualPack(tdList.get(1).text(),"fetchDate",webInfoPackBean);
DdlPreparedPack.getStringEqualPack(tdList.get(2).text(),"fetchTime",webInfoPackBean);
int count = fetchWebRao.findByHqlWhereCount(webInfoPackBean);
return count > 0;
}
return false;
return infoList;
}
}

Loading…
Cancel
Save