java获取全国博物馆名录方法,该网站有反扒措施,有一定的前端基础的人就能看明白,首先通过官网访问的地址请求头的信息都要模拟出来。
其次,secretSign参数是需要加密处理的,逆向js就能看出
到这一步就好办了,模拟这个过程即可拿到数据。
package com.zxlhdata.framework.core.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import com.zxlhdata.framework.poi.excel.ExcelUtil; import net.sf.json.JSONArray; import net.sf.json.JSONObject; /** * 全国博物馆名录获取工具类 * */ public class HttpRequestCollectionssUtil { /** * 向指定URL发送GET方法的请求 * * @param url * 发送请求的URL * @param param * 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。 * @return URL 所代表远程资源的响应结果 */ public static String sendGet(String url, String param) { String result = ""; BufferedReader in = null; try { // 获取当前毫秒为主的时间戳 String timeStamp = (new Date().getTime())+""; // 获取加密串 String secret = "cms"; String secretKey = "crb_2021@#"; String str = "secret_time"; JSONObject user_data = new JSONObject(); user_data.put("districtCode", null); String keyvalue = URLEncoder.encode("districtCode") + "" + URLEncoder.encode("null"); String list = str + timeStamp + keyvalue + secretKey; String encryptionString = MD5.GetMD5Code(list); String urlNameString = url + ((param!=null && !"".equals(param))?("?" + param):""); URL realUrl = new URL(urlNameString); // 打开和URL之间的连接 URLConnection connection = realUrl.openConnection(); // 设置通用的请求属性 connection.setRequestProperty("Origin", "http://gl.ncha.gov.cn"); connection.setRequestProperty("Host", "gl.ncha.gov.cn:9200"); connection.setRequestProperty("dataType", "DataCollection"); connection.setRequestProperty("ClassCode", "CollectionQualityLevelCode"); connection.setRequestProperty("secretDefinite", "districtCode"); connection.setRequestProperty("secretSign", encryptionString); connection.setRequestProperty("secretTime", timeStamp); connection.setRequestProperty("appCode", secret); connection.setRequestProperty("Referer", "http://gl.ncha.gov.cn/"); connection.setRequestProperty("Cookie", "mozi-assist={'show':false,'audio':false,'speed':'middle','zomm':1,'cursor':false,'pointer':false,'bigtext':false,'overead':false}"); connection.setRequestProperty("accept", "pplication/json, text/plain, */*"); connection.setRequestProperty("connection", "Keep-Alive"); connection.setRequestProperty("Accept-Encoding", "gzip, deflate"); connection.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9"); connection.setRequestProperty("districtCode", "null"); connection.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"); // 建立实际的连接 connection.connect(); // 获取所有响应头字段 Map<String, List<String>> map = connection.getHeaderFields(); // 遍历所有的响应头字段 for (String key : map.keySet()) { System.out.println(key + "--->" + map.get(key)); } // 定义 BufferedReader输入流来读取URL的响应 in = new BufferedReader(new InputStreamReader( connection.getInputStream())); String line; while ((line = in.readLine()) != null) { result += line; } } catch (Exception e) { System.out.println("发送GET请求出现异常!" + e); e.printStackTrace(); } // 使用finally块来关闭输入流 finally { try { if (in != null) { in.close(); } } catch (Exception e2) { e2.printStackTrace(); } } return result; } /** * 向指定 URL 发送POST方法的请求 * * @param url * 发送请求的 URL * @param param * 请求参数,请求参数应该是 name1=value1&name2=value2 的形式。 * @return 所代表远程资源的响应结果 */ public static String sendPost(String url, String param) { PrintWriter out = null; BufferedReader in = null; String result = ""; try { URL realUrl = new URL(url); // 打开和URL之间的连接 URLConnection conn = realUrl.openConnection(); // 设置通用的请求属性 conn.setRequestProperty("accept", "*/*"); conn.setRequestProperty("connection", "Keep-Alive"); conn.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); // 发送POST请求必须设置如下两行 conn.setDoOutput(true); conn.setDoInput(true); // 获取URLConnection对象对应的输出流 out = new PrintWriter(conn.getOutputStream()); // 发送请求参数 out.print(param); // flush输出流的缓冲 out.flush(); // 定义BufferedReader输入流来读取URL的响应 in = new BufferedReader( new InputStreamReader(conn.getInputStream())); String line; while ((line = in.readLine()) != null) { result += line; } } catch (Exception e) { System.out.println("发送 POST 请求出现异常!"+e); e.printStackTrace(); } //使用finally块来关闭输出流、输入流 finally{ try{ if(out!=null){ out.close(); } if(in!=null){ in.close(); } } catch(IOException ex){ ex.printStackTrace(); } } return result; } public static void main(String[] args) { Map<String, Object> cunzai = new HashMap<String, Object>(); List<Map<String, Object>> dataList = new ArrayList<Map<String,Object>>(); String ret = sendGet("http://gl.ncha.gov.cn:9200/api/portal/dataCollectionss?pageNum=1&pageSize=6183",""); JSONObject obj = JSONObject.fromObject(ret); JSONArray list = obj.getJSONArray("rows"); for(int a1=0;a1<list.size();a1++) { String key = list.getJSONObject(a1).get("proName")+"_"+list.getJSONObject(a1).get("collectionName"); if(!cunzai.containsKey(key)) { dataList.add(list.getJSONObject(a1)); cunzai.put(key, key); } } System.out.println(ret); String[] HEAD_LIST = { "省份", "博物馆编号", "博物馆名称","性质","质量等级","是否免费开放","藏品数(件/套)","珍贵文物(件/套)","展览(个)","教育活动(次)","参观人数(万人次)" }; String[] FIELD_LIST = { "proName", "districtCode", "collectionName", "collectionNatureName","collectionQualityLevelName","openFlag","crNum","valuableNum","exhibitionNum","educationActivityNum","attendanceNum" }; try { ExcelUtil.createExcel("d://全国博物馆名录.xls", HEAD_LIST, FIELD_LIST, dataList); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
以上代码不能用于非法途径,如触犯法律法规,后果自负,与本网站无任何关系
本文由 admin 创作,采用 知识共享署名4.0
国际许可协议进行许可
本站文章除注明转载/出处外,均为本站原创或翻译,转载前请务必署名
最后编辑时间为:2024-09-09 11:10:56