Here’s the table of contents:
- 多字段分组聚合
- Double类型数字转换
- 小数比例计算
- 数组参数访问关系数据库
- 动态拼接SQL参数批量查询
- 判断名称中是否包含了名称碎片中的任意一个词
- 查看是waiting状态的查询
- 加载DBMS驱动
- 生成gephi格式文件
- 时间格式转换函数
- 从数据库获取小数损失精度问题解决
- 正则替换
- 寻找非数值类型数据
- 批量写入ES
- JSON对象转为字符串
- 时间格式转换
- 随机数生成
- Neo4j中的随机节点匹配
- 指定Cypher运行时间
- 时间求差
- 集合求差集
- 浮点数处理保留有效位
- 从列表中删除元素
- 克隆子图
- Louvain模块度算法
- 三角计数和集聚系数
- 欧氏距离相似度
- 大图可视化
- 求总体标准差
- 数据阈值设置:平均值+一倍标准差
- 匹配到节点后设置属性
- Neo4j-3.4.x 全文索引
多字段分组聚合
WITH [
{field1:'f1',field2:'f2',value:1},
{field1:'f1',field2:'f2',field3:'f3',value:3},
{field1:'f1',field2:'f2',value:7},
{field1:'f1',field2:'f2',field3:'f3',value:12}] AS list
UNWIND list AS ele
// 以field1,field3
WITH ele.field1 AS field1,ele.field3 AS field3,ele
WHERE field1 IS NOT NULL
RETURN field1,
MAX(ele.value),MIN(ele.value)
WITH [
{field1:'f1',field2:'f2',value:1},
{field1:'f1',field2:'f2',field3:'f3',value:3},
{field1:'f1',field2:'f2',value:7},
{field1:'f1',field2:'f2',field3:'f3',value:12}] AS list
UNWIND list AS ele
// 以field1,field3
WITH ele.field1 AS field1,ele.field3 AS field3,ele
WHERE field1 IS NOT NULL AND field3 IS NOT NULL
RETURN field1,
MAX(ele.value),MIN(ele.value)
Double类型数字转换
RETURN apoc.convert.toFloat('2.123213904580435') AS weight
小数比例计算
RETURN apoc.convert.toFloat(apoc.number.exact.div(TOSTRING('0.213'),TOSTRING('1.343'),6)) AS decimal
数组参数访问关系数据库
CALL apoc.load.jdbc('jdbc:mysql://testlab-contentdb-dev.crkldnwly6ki.rds.cn-north-1.alibaba.com.cn:3306/analytics_graph_data?user=dev&password=testlabgogo&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC', 'SELECT ybId,tfidf FROM XXXY_YANBAO_CONCEPT_CALC_NO_WEIGHT_BEFORE WHERE tfidf>0.02 AND kwId IN (1211876451,17344784)',[])
动态拼接SQL参数批量查询
WITH 0.01 AS weight,'(1211876451,17344784)' AS kwIds
CALL apoc.cypher.run('CALL apoc.load.jdbc(\'jdbc:mysql://testlab-contentdb-dev.crkldnwly6ki.rds.cn-north-1.alibaba.com.cn:3306/analytics_graph_data?user=dev&password=testlabgogo&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC\', \'SELECT ybId,tfidf FROM XXXY_YANBAO_CONCEPT_CALC_NO_WEIGHT_BEFORE WHERE tfidf>'+weight+' AND kwId IN '+kwIds+'\',[])',{}) YIELD value RETURN value
判断名称中是否包含了名称碎片中的任意一个词
// 列表包含
RETURN ANY(frag IN $para.fundFragNameList WHERE s.name CONTAINS frag AND frag<>'' AND frag IS NOT NULL) AS bool
RETURN ANY(frag IN ['中欧'] WHERE '中欧基金' CONTAINS frag AND frag<>'' AND frag IS NOT NULL) AS bool
查看是waiting状态的查询
CALL dbms.listQueries() YIELD queryId,username,metaData,query,parameters,planner,runtime,indexes,startTime,protocol,clientAddress,requestUri,status,resourceInformation,activeLockCount,elapsedTimeMillis,cpuTimeMillis,waitTimeMillis,idleTimeMillis,allocatedBytes,pageHits,pageFaults,connectionId
WHERE status='waiting'
RETURN queryId,username,metaData,query,parameters,planner,runtime,indexes,startTime,protocol,clientAddress,requestUri,status,resourceInformation,activeLockCount,elapsedTimeMillis,cpuTimeMillis,waitTimeMillis,idleTimeMillis,allocatedBytes,pageHits,pageFaults,connectionId
加载DBMS驱动
CALL apoc.load.driver("com.mysql.jdbc.Driver");
CALL apoc.load.driver("oracle.jdbc.driver.OracleDriver");
CALL apoc.load.driver("com.microsoft.sqlserver.jdbc.SQLServerDriver");
生成gephi格式文件
CALL apoc.export.graphml.query('MATCH p=(n:HORGShareHoldV002)-->(m:HORGShareHoldV002) RETURN p LIMIT 25','test.gexf',{useTypes:true,format: 'gephi',stream:true})
时间格式转换函数
RETURN apoc.date.convertFormat('20190514145206', 'yyyyMMddHHmmss', 'yyyy-MM-dd HH:mm:ss') AS dateTime
从数据库获取小数损失精度问题解决
先转换成字符型拿出数据,然后使用
apoc.convert.toFloat(stock_price)
函数转换成小数存到图数据库CALL apoc.load.jdbc('jdbc:mysql://testlab-contentdb-dev.crkldnwly6ki.rds.cn-north-1.alibaba.com.cn:3306/analytics_graph_data?user=dev&password=testlabgogo&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC', 'SELECT CAST(stock_price AS CHAR) AS restock_price,stock_price FROM analytics_graph_data.ASHAREINSIDEHOLDER_20210726 a WHERE a.HWMP IS NULL limit 1') YIELD row RETURN * LIMIT 1
正则替换
RETURN apoc.text.replace('112343sadsad','[0-9]','') AS newStr
RETURN apoc.text.replace('Hello World!', '[^a-zA-Z]', '!!!') AS newStr
//替换中文括号以及括号内内容
RETURN apoc.text.replace('太棒了!(jsapfhaspogf)测试', '\\(([^}]*)\\)', '')
//替换英文括号以及括号内内容
RETURN apoc.text.replace('太棒了!(jsapfhaspogf)测试', '\\(([^}]*)\\)', '')
//替换中文括号以及括号内内容
RETURN apoc.text.replace('太棒了![jsapfhaspogf]测试', '\\[([^}]*)\\]', '')
//去除罗马字符,转为小写
RETURN LOWER(apoc.text.replace('Hello World!Ⅰ', '[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]', ''))
//去除罗马字符,转为小写,替换中文括号以及括号内内容、替换英文括号以及括号内内容、替换中文括号以及括号内内容
RETURN apoc.text.replace(apoc.text.replace(apoc.text.replace(LOWER(apoc.text.replace('Hello World!Ⅰ', '[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]', '')), '\\(([^}]*)\\)', ''), '\\(([^}]*)\\)', ''), '\\[([^}]*)\\]', '')
//替换中文括号以及括号内内容、替换英文括号以及括号内内容、替换中文括号以及括号内内容
RETURN apoc.text.replace(apoc.text.replace(apoc.text.replace('太棒了!(jsapfhaspogf)测试', '\\(([^}]*)\\)', ''), '\\(([^}]*)\\)', ''), '\\[([^}]*)\\]', '')
寻找非数值类型数据
RETURN apoc.coll.contains(['INTEGER','FLOAT'],apoc.meta.cypher.type(r.weight))
批量写入ES
CALL apoc.es.postRaw('http://localhost:9200','gdb_mstr_hinst_cluster/_bulk','{"index":{"_id":"1084265135-1084265135-1"}{}}\r\n{"auto_id":1084265135,"hcreatetime":"2021-12-29 18:37:04","unique":"1084265135-1084265135-1","hisvalid":0,"hcode":"HINST0000001084265135","hupdatetime":"2021-12-29 18:37:04","node_id":1084265135}\r\n')
WITH
'{"index":{"_id":"1084265135-1084265135-1"}{}}' AS unique,
'{"auto_id":1084265135,"hcreatetime":"2021-12-29 18:37:04","unique":"1084265135-1084265135-1","hisvalid":1,"hcode":"HINST0000001084265135","hupdatetime":"2021-12-29 18:37:04","node_id":1084265135}' AS data
WITH unique+'\r\n'+data+'\r\n' AS request
CALL apoc.es.postRaw('http://localhost:9200','gdb_mstr_hinst_cluster/_bulk',request) YIELD value
RETURN *
JSON对象转为字符串
RETURN apoc.convert.toJson({num:2,str:'str'}) as data
RETURN apoc.convert.toJson([{num:2,str:'str'}]) as data
时间格式转换
RETURN TOINTEGER(apoc.date.convertFormat('2022-02-07 11:39:46','yyyy-MM-dd HH:mm:ss','yyyyMMddHHmmss')) AS timeL
RETURN TOINTEGER(apoc.date.convertFormat('2017年07月20日','yyyy年MM月dd日','yyyyMMdd')) AS formattedDate
随机数生成
WITH TOINTEGER(RAND() * 60) AS rand_node, 5 AS offset
WITH RANGE(rand_node - offset, rand_node + offset) AS rand_range
return rand_range
Neo4j中的随机节点匹配
MATCH (n)
WHERE RAND() <= 0.01
RETURN n
LIMIT 1
// 从RAND()函数生成随机ID并将其乘以节点数
MATCH (n)
WHERE id(n) = TOINTEGER(RAND() * 3400000)
RETURN n
// ID因删除数据不再完全连续,则可以调整随机数据范围进行抽取
WITH TOINTEGER(RAND() * 3400000) AS rand_node, 5 AS offset
WITH RANGE(rand_node - offset, rand_node + offset) AS rand_range
MATCH (n)
WHERE id(n) IN rand_range
RETURN n
LIMIT 1
指定Cypher运行时间
// 10000以内的整数运行加法计算,并计算所有加法结果的合计值
// {}指定Cypher的参数
// 2000指定Cypher的运行时间为两秒以内
CALL apoc.cypher.runTimeboxed(
'UNWIND range(1,10000) AS a1 UNWIND range(1,10000) AS a2 WITH a1+a2 AS a RETURN sum(a) AS sum',
{}, 2000
) YIELD value
RETURN value
时间求差
//时间差值
RETURN duration.inDays(date("2019-01-01"), date("2019-01-06")).days AS days;
集合求差集
RETURN apoc.coll.disjunction([1,4,5],[1,2,4])
浮点数处理保留有效位
RETURN apoc.number.exact.toFloat('50423.1656', 5,'HALF_UP') AS output;
从列表中删除元素
RETURN apoc.coll.remove([1,2,3,999],3,1)
克隆子图
MATCH p=()-[r:FRIEND]->() WITH p
CALL apoc.refactor.cloneSubgraphFromPaths([p]) YIEL D input, output, error RETURN *
Louvain模块度算法
CALL algo.louvain('LABEL','REL',{write:true,weightProperty:'min_max_scaling' ,writeProperty:'louvain_community',includeIntermediateCommunities:true,intermediateCommunitiesWriteProperty:'communities',concurrency:8}) YIELD nodes,communityCount,iterations,loadMillis,computeMillis,writeMillis
RETURN nodes,communityCount,iterations,loadMillis,computeMillis,writeMillis;
三角计数和集聚系数
CALL algo.triangleCount.stream('FGI基金经理SIM','相似',{concurrency:8})
CALL algo.triangleCount('FGI基金经理SIM','相似',{concurrency:8})
欧氏距离相似度
//靠近1相似
RETURN algo.similarity.euclidean([1,2,3,4,5],[1,2,3,4,5])
//靠近0相似
RETURN algo.similarity.euclideanDistance([1,2,3,4,5],[1.2,2,3,4,5])
大图可视化
MATCH p=(n:Day)--()
WITH COLLECT(p) AS paths
CALL apoc.gephi.add('localhost:8080','workspace1',paths,'',[]) YIELD nodes, relationships, format
RETURN *
求总体标准差
WITH [9,2,5,4,12,7,8,11,9,3,7,4,12,5,4,10,9,6,9,4] AS number
//WITH RANGE(1,1000) AS number
//求平均值
WITH apoc.coll.avg(number) AS avg,number
//从每一个数值减去平均,然后求差的平方
WITH REDUCE(l=[],e IN number | l+(e-avg)*(e-avg)) AS list
//求结果的平均,并取平方根
WITH SQRT(apoc.coll.sum(list)/SIZE(list)) AS sqrt
RETURN sqrt
//优化
WITH [9,2,5,4,12,7,8,11,9,3,7,4,12,5,4,10,9,6,9,4] AS number
//WITH RANGE(1,1000) AS number
//求平均值
WITH apoc.coll.avg(number) AS avg,number
//从每一个数值减去平均,然后求差的平方
UNWIND number AS e
WITH (e-avg) AS el
WITH COLLECT(el*el) AS list
//求结果的平均,并取平方根
WITH SQRT(apoc.coll.sum(list)/SIZE(list)) AS sqrt
RETURN sqrt
数据阈值设置:平均值+一倍标准差
//优化
WITH [9,2,5,4,12,7,8,11,9,3,7,4,12,5,4,10,9,6,9,4] AS number
//WITH RANGE(1,1000) AS number
//求平均值
WITH apoc.coll.avg(number) AS avg,number
//从每一个数值减去平均,然后求差的平方
UNWIND number AS e
WITH avg,(e-avg) AS el
WITH avg,COLLECT(el*el) AS list
//求结果的平均,并取平方根
WITH avg,SQRT(apoc.coll.sum(list)/SIZE(list)) AS sqrt
//阈值:平均值+一倍标准差
WITH avg,sqrt,avg+sqrt AS threshold
RETURN avg,sqrt,threshold
匹配到节点后设置属性
//MERGE -> CREATE SET -> MATCH SET
CALL apoc.merge.node(['Person'],{id:123},NULL,{is_build_word:true}) YIELD node RETURN node
Neo4j-3.4.x 全文索引
neo4j-apoc-procedures-3.5-indexes-fulltext-index
- 创建节点并将其添加到索引
MATCH (n:`股票名称`) WITH n CALL db.index.explicit.addNode('股票名称',n,'value',n.value) YIELD success RETURN COUNT(*) AS count;
- 验证索引是否已创建
CALL db.index.explicit.list();
- 查找以
中
开头的股票CALL db.index.explicit.searchNodes('股票名称','value:中国*');
- 索引需要手动更新新增的数据
MATCH (n:`股票名称`) WHERE n.value='中国股票' WITH n CALL db.index.explicit.addNode('股票名称',n,'value',n.value) YIELD success RETURN COUNT(*) AS count;
PREVIOUS股权网络穿透一百层测试
NEXT图数据库数据写入性能优化