常用CYPHER查询(四)

 

Here’s the table of contents:

  1. 多字段分组聚合
  2. Double类型数字转换
  3. 小数比例计算
  4. 数组参数访问关系数据库
  5. 动态拼接SQL参数批量查询
  6. 判断名称中是否包含了名称碎片中的任意一个词
  7. 查看是waiting状态的查询
  8. 加载DBMS驱动
  9. 生成gephi格式文件
  10. 时间格式转换函数
  11. 从数据库获取小数损失精度问题解决
  12. 正则替换
  13. 寻找非数值类型数据
  14. 批量写入ES
  15. JSON对象转为字符串
  16. 时间格式转换
  17. 随机数生成
  18. Neo4j中的随机节点匹配
  19. 指定Cypher运行时间
  20. 时间求差
  21. 集合求差集
  22. 浮点数处理保留有效位
  23. 从列表中删除元素
  24. 克隆子图
  25. Louvain模块度算法
  26. 三角计数和集聚系数
  27. 欧氏距离相似度
  28. 大图可视化
  29. 求总体标准差
  30. 数据阈值设置:平均值+一倍标准差
  31. 匹配到节点后设置属性
  32. Neo4j-3.4.x 全文索引

多字段分组聚合

WITH [
		{field1:'f1',field2:'f2',value:1},
        {field1:'f1',field2:'f2',field3:'f3',value:3},
        {field1:'f1',field2:'f2',value:7},
        {field1:'f1',field2:'f2',field3:'f3',value:12}] AS list
UNWIND list AS ele
// 以field1,field3
WITH ele.field1 AS field1,ele.field3 AS field3,ele
WHERE field1 IS NOT NULL
RETURN field1,
       MAX(ele.value),MIN(ele.value)
WITH [
		{field1:'f1',field2:'f2',value:1},
        {field1:'f1',field2:'f2',field3:'f3',value:3},
        {field1:'f1',field2:'f2',value:7},
        {field1:'f1',field2:'f2',field3:'f3',value:12}] AS list
UNWIND list AS ele
// 以field1,field3
WITH ele.field1 AS field1,ele.field3 AS field3,ele
WHERE field1 IS NOT NULL AND field3 IS NOT NULL
RETURN field1,
       MAX(ele.value),MIN(ele.value)

Double类型数字转换

RETURN apoc.convert.toFloat('2.123213904580435') AS weight

小数比例计算

RETURN apoc.convert.toFloat(apoc.number.exact.div(TOSTRING('0.213'),TOSTRING('1.343'),6)) AS decimal

数组参数访问关系数据库

CALL apoc.load.jdbc('jdbc:mysql://testlab-contentdb-dev.crkldnwly6ki.rds.cn-north-1.alibaba.com.cn:3306/analytics_graph_data?user=dev&password=testlabgogo&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC', 'SELECT ybId,tfidf FROM XXXY_YANBAO_CONCEPT_CALC_NO_WEIGHT_BEFORE WHERE tfidf>0.02 AND kwId IN (1211876451,17344784)',[])

动态拼接SQL参数批量查询

WITH 0.01 AS weight,'(1211876451,17344784)' AS kwIds
CALL apoc.cypher.run('CALL apoc.load.jdbc(\'jdbc:mysql://testlab-contentdb-dev.crkldnwly6ki.rds.cn-north-1.alibaba.com.cn:3306/analytics_graph_data?user=dev&password=testlabgogo&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC\', \'SELECT ybId,tfidf FROM XXXY_YANBAO_CONCEPT_CALC_NO_WEIGHT_BEFORE WHERE tfidf>'+weight+' AND kwId IN '+kwIds+'\',[])',{}) YIELD value RETURN value

判断名称中是否包含了名称碎片中的任意一个词

// 列表包含
RETURN ANY(frag IN $para.fundFragNameList WHERE s.name CONTAINS frag AND frag<>'' AND frag IS NOT NULL) AS  bool
RETURN ANY(frag IN ['中欧'] WHERE '中欧基金' CONTAINS frag AND frag<>'' AND frag IS NOT NULL) AS  bool

查看是waiting状态的查询

CALL dbms.listQueries() YIELD queryId,username,metaData,query,parameters,planner,runtime,indexes,startTime,protocol,clientAddress,requestUri,status,resourceInformation,activeLockCount,elapsedTimeMillis,cpuTimeMillis,waitTimeMillis,idleTimeMillis,allocatedBytes,pageHits,pageFaults,connectionId
	WHERE status='waiting'
	RETURN queryId,username,metaData,query,parameters,planner,runtime,indexes,startTime,protocol,clientAddress,requestUri,status,resourceInformation,activeLockCount,elapsedTimeMillis,cpuTimeMillis,waitTimeMillis,idleTimeMillis,allocatedBytes,pageHits,pageFaults,connectionId

加载DBMS驱动

CALL apoc.load.driver("com.mysql.jdbc.Driver");
CALL apoc.load.driver("oracle.jdbc.driver.OracleDriver");
CALL apoc.load.driver("com.microsoft.sqlserver.jdbc.SQLServerDriver");

生成gephi格式文件

CALL apoc.export.graphml.query('MATCH p=(n:HORGShareHoldV002)-->(m:HORGShareHoldV002) RETURN p LIMIT 25','test.gexf',{useTypes:true,format: 'gephi',stream:true})

时间格式转换函数

RETURN apoc.date.convertFormat('20190514145206', 'yyyyMMddHHmmss', 'yyyy-MM-dd HH:mm:ss') AS dateTime

从数据库获取小数损失精度问题解决

先转换成字符型拿出数据,然后使用apoc.convert.toFloat(stock_price)函数转换成小数存到图数据库

CALL apoc.load.jdbc('jdbc:mysql://testlab-contentdb-dev.crkldnwly6ki.rds.cn-north-1.alibaba.com.cn:3306/analytics_graph_data?user=dev&password=testlabgogo&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC', 'SELECT CAST(stock_price AS CHAR) AS restock_price,stock_price FROM analytics_graph_data.ASHAREINSIDEHOLDER_20210726 a WHERE a.HWMP IS NULL limit 1') YIELD row RETURN * LIMIT 1

正则替换

RETURN apoc.text.replace('112343sadsad','[0-9]','') AS newStr
RETURN apoc.text.replace('Hello World!', '[^a-zA-Z]', '!!!') AS newStr
//替换中文括号以及括号内内容
RETURN apoc.text.replace('太棒了!(jsapfhaspogf)测试', '\\(([^}]*)\\)', '')
//替换英文括号以及括号内内容
RETURN apoc.text.replace('太棒了!(jsapfhaspogf)测试', '\\(([^}]*)\\)', '')
//替换中文括号以及括号内内容
RETURN apoc.text.replace('太棒了![jsapfhaspogf]测试', '\\[([^}]*)\\]', '')
//去除罗马字符,转为小写
RETURN LOWER(apoc.text.replace('Hello World!Ⅰ', '[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]', ''))
//去除罗马字符,转为小写,替换中文括号以及括号内内容、替换英文括号以及括号内内容、替换中文括号以及括号内内容
RETURN apoc.text.replace(apoc.text.replace(apoc.text.replace(LOWER(apoc.text.replace('Hello World!Ⅰ', '[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]', '')), '\\(([^}]*)\\)', ''), '\\(([^}]*)\\)', ''), '\\[([^}]*)\\]', '')
//替换中文括号以及括号内内容、替换英文括号以及括号内内容、替换中文括号以及括号内内容
RETURN apoc.text.replace(apoc.text.replace(apoc.text.replace('太棒了!(jsapfhaspogf)测试', '\\(([^}]*)\\)', ''), '\\(([^}]*)\\)', ''), '\\[([^}]*)\\]', '')

寻找非数值类型数据

RETURN apoc.coll.contains(['INTEGER','FLOAT'],apoc.meta.cypher.type(r.weight))

批量写入ES

CALL apoc.es.postRaw('http://localhost:9200','gdb_mstr_hinst_cluster/_bulk','{"index":{"_id":"1084265135-1084265135-1"}{}}\r\n{"auto_id":1084265135,"hcreatetime":"2021-12-29 18:37:04","unique":"1084265135-1084265135-1","hisvalid":0,"hcode":"HINST0000001084265135","hupdatetime":"2021-12-29 18:37:04","node_id":1084265135}\r\n')
WITH
	'{"index":{"_id":"1084265135-1084265135-1"}{}}' AS unique,
    '{"auto_id":1084265135,"hcreatetime":"2021-12-29 18:37:04","unique":"1084265135-1084265135-1","hisvalid":1,"hcode":"HINST0000001084265135","hupdatetime":"2021-12-29 18:37:04","node_id":1084265135}' AS data
WITH unique+'\r\n'+data+'\r\n' AS request
CALL apoc.es.postRaw('http://localhost:9200','gdb_mstr_hinst_cluster/_bulk',request) YIELD value
	RETURN *

JSON对象转为字符串

RETURN apoc.convert.toJson({num:2,str:'str'}) as data
RETURN apoc.convert.toJson([{num:2,str:'str'}]) as data

时间格式转换

RETURN TOINTEGER(apoc.date.convertFormat('2022-02-07 11:39:46','yyyy-MM-dd HH:mm:ss','yyyyMMddHHmmss')) AS timeL
RETURN TOINTEGER(apoc.date.convertFormat('2017年07月20日','yyyy年MM月dd日','yyyyMMdd')) AS formattedDate

随机数生成

WITH TOINTEGER(RAND() * 60) AS rand_node, 5 AS offset
WITH RANGE(rand_node - offset, rand_node + offset) AS rand_range
return rand_range

Neo4j中的随机节点匹配

MATCH (n)
WHERE RAND() <= 0.01
RETURN n
LIMIT 1
// 从RAND()函数生成随机ID并将其乘以节点数
MATCH (n)
WHERE id(n) = TOINTEGER(RAND() * 3400000)
RETURN n
// ID因删除数据不再完全连续,则可以调整随机数据范围进行抽取
WITH TOINTEGER(RAND() * 3400000) AS rand_node, 5 AS offset
WITH RANGE(rand_node - offset, rand_node + offset) AS rand_range
MATCH (n)
WHERE id(n) IN rand_range
RETURN n
LIMIT 1

指定Cypher运行时间

// 10000以内的整数运行加法计算,并计算所有加法结果的合计值
// {}指定Cypher的参数
// 2000指定Cypher的运行时间为两秒以内
CALL apoc.cypher.runTimeboxed(
  'UNWIND range(1,10000) AS a1 UNWIND range(1,10000) AS a2 WITH a1+a2 AS a RETURN sum(a) AS sum',
  {}, 2000
  ) YIELD value
RETURN value

时间求差

//时间差值
RETURN duration.inDays(date("2019-01-01"), date("2019-01-06")).days AS days;

集合求差集

RETURN apoc.coll.disjunction([1,4,5],[1,2,4])

浮点数处理保留有效位

RETURN apoc.number.exact.toFloat('50423.1656', 5,'HALF_UP') AS output;

从列表中删除元素

RETURN apoc.coll.remove([1,2,3,999],3,1)

克隆子图

MATCH p=()-[r:FRIEND]->() WITH p
CALL apoc.refactor.cloneSubgraphFromPaths([p]) YIEL D input, output, error RETURN *

Louvain模块度算法

CALL algo.louvain('LABEL','REL',{write:true,weightProperty:'min_max_scaling' ,writeProperty:'louvain_community',includeIntermediateCommunities:true,intermediateCommunitiesWriteProperty:'communities',concurrency:8}) YIELD nodes,communityCount,iterations,loadMillis,computeMillis,writeMillis
RETURN nodes,communityCount,iterations,loadMillis,computeMillis,writeMillis;

三角计数和集聚系数

CALL algo.triangleCount.stream('FGI基金经理SIM','相似',{concurrency:8})
CALL algo.triangleCount('FGI基金经理SIM','相似',{concurrency:8})

欧氏距离相似度

//靠近1相似
RETURN algo.similarity.euclidean([1,2,3,4,5],[1,2,3,4,5])
//靠近0相似
RETURN algo.similarity.euclideanDistance([1,2,3,4,5],[1.2,2,3,4,5])

大图可视化

MATCH p=(n:Day)--()
WITH COLLECT(p) AS paths
CALL apoc.gephi.add('localhost:8080','workspace1',paths,'',[]) YIELD nodes, relationships, format
RETURN *

求总体标准差

WITH [9,2,5,4,12,7,8,11,9,3,7,4,12,5,4,10,9,6,9,4] AS number
//WITH RANGE(1,1000) AS number
//求平均值
WITH apoc.coll.avg(number) AS avg,number
//从每一个数值减去平均,然后求差的平方
WITH REDUCE(l=[],e IN number | l+(e-avg)*(e-avg)) AS list
//求结果的平均,并取平方根
WITH SQRT(apoc.coll.sum(list)/SIZE(list)) AS sqrt
RETURN sqrt
//优化
WITH [9,2,5,4,12,7,8,11,9,3,7,4,12,5,4,10,9,6,9,4] AS number
//WITH RANGE(1,1000) AS number
//求平均值
WITH apoc.coll.avg(number) AS avg,number
//从每一个数值减去平均,然后求差的平方
UNWIND number AS e
WITH (e-avg) AS el
WITH COLLECT(el*el) AS list
//求结果的平均,并取平方根
WITH SQRT(apoc.coll.sum(list)/SIZE(list)) AS sqrt
RETURN sqrt

数据阈值设置:平均值+一倍标准差

//优化
WITH [9,2,5,4,12,7,8,11,9,3,7,4,12,5,4,10,9,6,9,4] AS number
//WITH RANGE(1,1000) AS number
//求平均值
WITH apoc.coll.avg(number) AS avg,number
//从每一个数值减去平均,然后求差的平方
UNWIND number AS e
WITH avg,(e-avg) AS el
WITH avg,COLLECT(el*el) AS list
//求结果的平均,并取平方根
WITH avg,SQRT(apoc.coll.sum(list)/SIZE(list)) AS sqrt
//阈值:平均值+一倍标准差
WITH avg,sqrt,avg+sqrt AS threshold
RETURN avg,sqrt,threshold

匹配到节点后设置属性

//MERGE -> CREATE SET -> MATCH SET
CALL apoc.merge.node(['Person'],{id:123},NULL,{is_build_word:true}) YIELD node RETURN node

Neo4j-3.4.x 全文索引

neo4j-apoc-procedures-3.5-indexes-fulltext-index

  • 创建节点并将其添加到索引
    MATCH (n:`股票名称`)
    WITH n
    CALL db.index.explicit.addNode('股票名称',n,'value',n.value) YIELD success RETURN COUNT(*) AS count;
    
  • 验证索引是否已创建
    CALL db.index.explicit.list();
    
  • 查找以开头的股票
    CALL db.index.explicit.searchNodes('股票名称','value:中国*');
    
  • 索引需要手动更新新增的数据
    MATCH (n:`股票名称`) WHERE n.value='中国股票'
    WITH n
    CALL db.index.explicit.addNode('股票名称',n,'value',n.value) YIELD success RETURN COUNT(*) AS count;