java相似度判斷(餘弦相似度)
業務邏輯:
輸入某一企業,返回跟該企業相似的企業列表。
大體思路:
1、輸入企業有哪些欄位來構建相似度字典;
2、輸入某個企業欄位具體值,轉換成向量 來計算;
3、根據向量計算的值來排序。
程式碼如下:
/**
* 企業相似度
* @param id
* @return
*/
@Override
public Page findAndOrderBySimilar(String id,HiddenDangerListVO hiddenDangerListVO) {
Integer page = hiddenDangerListVO.getPage();
Integer limit = hiddenDangerListVO.getLimit();
if (page == null || page < 1) {
page = 1;
}
if (limit == null || limit < 0) {
limit = 5;
}
List<JSONObject> jsonObjectList = new ArrayList<>();
List<JSONObject> jsonObjectList2 = new ArrayList<>();
Map map = new HashMap();
String sql = "SELECT" +
" HY,YHBW,YHLY, companyname '企業名稱', " +
" HCRQ '核查日期', " +
"CASE WHEN YHDJ = '1' THEN " +
" '一般隱患' " +
" WHEN YHDJ = '2' THEN " +
" '重大隱患' ELSE '無型別' " +
" END '隱患級別', " +
"CASE WHEN ZGZT = '1' THEN " +
" '未整改' " +
" WHEN ZGZT = '2' THEN " +
" '整改中' " +
" WHEN ZGZT = '3' THEN " +
" '已整改' ELSE '無整改狀態' " +
" END '整改狀態', " +
" YHMS '隱患描述', " +
" YHDD '隱患地點'," +
" PCRE '排查人'," +
" CASE WHEN ZGLX = '1' THEN '立即整改' " +
" WHEN ZGLX = '2' THEN '限期整改' " +
" WHEN ZGLX = '3' THEN '停業停產整頓' ELSE '無整改資訊' " +
" END '整改型別'," +
" ZGWCRQ '整改完成時間' " +
" FROM t_hidden_danger_list limit 8000 ";
List<Map<String, Object>> findAll = jdbcTemplate.queryForList(sql);
for (Map map1 : findAll) {
jsonObjectList.add(JSONObject.fromObject(map1));
}
List<String> column = new ArrayList<>();
List<String> num_column = new ArrayList<>();
column.add("HY");
column.add("YHBW");
column.add("整改型別");
column.add("隱患級別");
column.add("整改狀態");
column.add("隱患地點");
num_column.add("YHLY");
Set<DicVO> dictionaries = RestructureUtil.dictionaries(jsonObjectList, column, num_column);
sql= "SELECT" +
" HY,YHBW,YHLY, companyname '企業名稱', " +
" HCRQ '核查日期', " +
"CASE WHEN YHDJ = '1' THEN " +
" '一般隱患' " +
" WHEN YHDJ = '2' THEN " +
" '重大隱患' ELSE '無型別' " +
" END '隱患級別', " +
"CASE WHEN ZGZT = '1' THEN " +
" '未整改' " +
" WHEN ZGZT = '2' THEN " +
" '整改中' " +
" WHEN ZGZT = '3' THEN " +
" '已整改' ELSE '無整改狀態' " +
" END '整改狀態', " +
" YHMS '隱患描述', " +
" YHDD '隱患地點'," +
" PCRE '排查人'," +
" CASE WHEN ZGLX = '1' THEN '立即整改' " +
" WHEN ZGLX = '2' THEN '限期整改' " +
" WHEN ZGLX = '3' THEN '停業停產整頓' ELSE '無整改資訊' " +
" END '整改型別'," +
" ZGWCRQ '整改完成時間' " +
" FROM t_hidden_danger_list "+
" WHERE ID="+id+" ";
List<Map<String, Object>> findAll2 = jdbcTemplate.queryForList(sql);
for (Map map1 : findAll2) {
jsonObjectList2.add(JSONObject.fromObject(map1));
}
List<Double> vector_1 = RestructureUtil.vectorFromJson(dictionaries, jsonObjectList2.get(0));
List<Double> vector_2;
for (int i = 0; i < jsonObjectList.size() - 1; i++) {
vector_2 = RestructureUtil.vectorFromJson(dictionaries, jsonObjectList.get(i));
if (vector_2!=null){
Double aDouble = RestructureUtil.similarityDegree(vector_1, vector_2);
map.put(jsonObjectList.get(i), aDouble);
}
}
map = sortByComparator(map);
List<Map.Entry<JSONObject, Double>> list = new ArrayList<Map.Entry<JSONObject, Double>>(map.entrySet());
//輸出
List listResult=new ArrayList();
for (Map.Entry<JSONObject, Double> entry : list) {
System.out.println(entry.getKey() + ":" + entry.getValue());
listResult.add(entry.getKey());
if (listResult.size()==100){
break;
}
}
Page pageResult = PageUtil.getPage(page, limit, listResult);
return pageResult;
}
/**
* map以value排序
*
* @param unsortMap
* @return
*/
public static Map sortByComparator(Map unsortMap) {
List list = new LinkedList(unsortMap.entrySet());
Collections.sort(list, new Comparator() {
public int compare(Object o1, Object o2) {
return ((Comparable) ((Map.Entry) (o2)).getValue())
.compareTo(((Map.Entry) (o1)).getValue());
}
});
Map sortedMap = new LinkedHashMap();
for (Iterator it = list.iterator(); it.hasNext(); ) {
Map.Entry entry = (Map.Entry) it.next();
sortedMap.put(entry.getKey(), entry.getValue());
}
return sortedMap;
}