Here’s the table of contents:
GPT-3.5 生成 Fabric Cypher
Fabric 架构图数据库节点设计
在之前已经测试过 GPT4生成Cypher 和 GPT3.5生成Cypher 的能力,为了让生成Cypher的能力应用在更大规模的图数据查询,本次测试使用图数据库的Fabric架构。 测试用
Fabric架构
包含一个Fabric Node
,两个Data Node
。Fabric Node
不存储任何数据,只负责请求的路由转发。Data Node
存储数据,负责响应数据查询请求。
//导入 https://ongdb-contrib.github.io/graphene/app/ 查看可视化效果
{"data":{"nodes":[{"x":480,"y":504,"color":"#d19d7b","label":"用户","properties":[{"id":"a2b632be","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"a78b598d","isSelected":false,"isNode":true},{"x":719,"y":496,"color":"#f54e42","label":"Fabric Node","properties":[{"id":"aa887cb0","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"ac8263bb","isSelected":false,"isNode":true},{"x":988,"y":386,"color":"#56b6e6","label":"Data Node-1","properties":[{"id":"a9b1709f","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"a5a6c1a2","isSelected":false,"isNode":true},{"x":979,"y":617,"color":"#56b6e6","label":"Data Node-2","properties":[{"id":"a8b8689e","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"ae8ff082","isSelected":false,"isNode":true}],"edges":[{"startNodeId":"a78b598d","endNodeId":"ac8263bb","middlePointOffset":[18.5,15],"properties":[{"id":"a5944abd","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"请求","id":"a7b119b0","isSelected":false,"isEdge":true,"color":"#f54e42","pathStrokeDasharray":"2 5"},{"startNodeId":"ac8263bb","endNodeId":"a78b598d","middlePointOffset":[19.5,-15],"properties":[{"id":"aabff3b4","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"响应","id":"ac9212a6","isSelected":false,"isEdge":true,"color":"#56b6e6","pathStrokeDasharray":"2 5"},{"startNodeId":"ac8263bb","endNodeId":"a5a6c1a2","middlePointOffset":[37.5,31],"properties":[{"id":"a98616a8","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"路由Cypher","id":"ad91b79b","isSelected":false,"isEdge":true,"color":"#f54e42","pathStrokeDasharray":"15 5"},{"startNodeId":"ac8263bb","endNodeId":"ae8ff082","middlePointOffset":[40,-35.5],"properties":[{"id":"ab88619c","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"路由Cypher","id":"afbdf2af","isSelected":false,"isEdge":true,"color":"#f54e42","pathStrokeDasharray":"15 5"},{"startNodeId":"a5a6c1a2","endNodeId":"ac8263bb","middlePointOffset":[3,-3],"properties":[{"id":"a49517ac","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"响应查询","id":"ac879ab6","isSelected":false,"isEdge":true,"color":"#56b6e6","pathStrokeDasharray":"15 5"},{"startNodeId":"ae8ff082","endNodeId":"ac8263bb","middlePointOffset":[-3,-3.5],"properties":[{"id":"ada420a8","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"响应查询","id":"a481e3ac","isSelected":false,"isEdge":true,"color":"#56b6e6","pathStrokeDasharray":"15 5"}]}}
图数据模型设计
基于Fabric架构设计图数据模型,
Data Node-1
负责保存股票图谱,Data Node-2
负责保存高管图谱。 数据参考 graph-qabot-demo ,基于该样例数据源拆分为股票图谱和高管图谱。 红色部分表示股票图谱,蓝色部分表示高管图谱,股票图谱中同时存储高管任职于
关系(股票图谱中的高管节点只保存跨图关联的md5
即可)。
//导入 https://ongdb-contrib.github.io/graphene/app/ 查看可视化效果
{"data":{"nodes":[{"x":593,"y":317,"color":"#e60000","label":"股票代码","properties":[{"id":"a2aeffb2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"afb48f90","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a1b949a0","isSelected":false,"isNode":true},{"x":658,"y":477,"color":"#e60000","label":"股票","properties":[{"id":"a4a8beb4","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a9bf77ad","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a5844bbd","isSelected":false,"isNode":true},{"x":393,"y":401,"color":"#e60000","label":"股票名称","properties":[{"id":"a08f768e","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a09d0ab9","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a78c159d","isSelected":false,"isNode":true},{"x":406,"y":508,"color":"#e60000","label":"地域","properties":[{"id":"a59d20a1","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a28d19bb","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a49f8ebc","isSelected":false,"isNode":true},{"x":461,"y":590,"color":"#e60000","label":"行业","properties":[{"id":"aea1c5b2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"aaa4b082","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"a381f688","isSelected":false,"isNode":true},{"x":694,"y":602,"color":"#e60000","label":"上市日期","properties":[{"id":"a284f1aa","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a0b444a4","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"ac81ffb4","isSelected":false,"isNode":true},{"x":814,"y":413,"color":"#236ce1","label":"高管","properties":[{"id":"a4b02384","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"a2ade2ad","isSelected":false,"isNode":true},{"x":870,"y":299,"color":"#236ce1","label":"性别","properties":[{"id":"ad9cab82","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a2aa0f86","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"af9a0f97","isSelected":false,"isNode":true},{"x":957,"y":480,"color":"#236ce1","label":"学历","properties":[{"id":"a090f498","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""},{"id":"a8a0e1b4","key":"value","type":"","defaultValue":"","limitMin":"","limitMax":"","isRequired":false,"isAutoGenerated":false,"isSystem":false,"description":"值"}],"id":"ad931890","isSelected":false,"isNode":true},{"x":981,"y":213,"color":"#236ce1","label":"性别_别名","properties":[{"id":"a3980888","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"id":"aa8b5199","isSelected":false,"isNode":true}],"edges":[{"startNodeId":"a2ade2ad","endNodeId":"a5844bbd","middlePointOffset":[0,0],"properties":[{"id":"adaf73aa","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"任职于","id":"a3bcec8c","isSelected":false,"isEdge":true,"color":"#d3b68d","pathStrokeDasharray":"2 5"},{"startNodeId":"a5844bbd","endNodeId":"a1b949a0","middlePointOffset":[0,0],"properties":[{"id":"a3bd83a0","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"股票代码","id":"ae88d4aa","isSelected":false,"isEdge":true,"color":"#E60000","pathStrokeDasharray":"none"},{"startNodeId":"a5844bbd","endNodeId":"a78c159d","middlePointOffset":[0,0],"properties":[{"id":"a08a7485","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"股票名称","id":"a5a68b93","isSelected":false,"isEdge":true,"color":"#E60000","pathStrokeDasharray":"none"},{"startNodeId":"a5844bbd","endNodeId":"a49f8ebc","middlePointOffset":[0,0],"properties":[{"id":"a79e37b3","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"地域","id":"acab7b92","isSelected":false,"isEdge":true,"color":"#E60000","pathStrokeDasharray":"none"},{"startNodeId":"a5844bbd","endNodeId":"a381f688","middlePointOffset":[0,0],"properties":[{"id":"af98c1a2","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"所属行业","id":"a9af63b6","isSelected":false,"isEdge":true,"color":"#E60000","pathStrokeDasharray":"none"},{"startNodeId":"a5844bbd","endNodeId":"ac81ffb4","middlePointOffset":[0,0],"properties":[{"id":"a5b64896","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"上市日期","id":"a0922fb7","isSelected":false,"isEdge":true,"color":"#E60000","pathStrokeDasharray":"none"},{"startNodeId":"a2ade2ad","endNodeId":"af9a0f97","middlePointOffset":[0,0],"properties":[{"id":"a19e1692","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"性别","id":"ada854ae","isSelected":false,"isEdge":true,"color":"#236CE1","pathStrokeDasharray":"none"},{"startNodeId":"a2ade2ad","endNodeId":"ad931890","middlePointOffset":[0,0],"properties":[{"id":"aeb6ae9e","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"学历","id":"acb3848a","isSelected":false,"isEdge":true,"color":"#236CE1","pathStrokeDasharray":"none"},{"startNodeId":"af9a0f97","endNodeId":"aa8b5199","middlePointOffset":[0,0],"properties":[{"id":"ab919ead","key":"id","type":"ID","defaultValue":"","limitMin":"","limitMax":"","isRequired":true,"isAutoGenerated":true,"isSystem":true,"description":""}],"label":"别名","id":"a398fe92","isSelected":false,"isEdge":true,"color":"#236CE1","pathStrokeDasharray":"none"}]}}
Fabric 快速开始指南
本次测试案例运行在win10系统,可按照具体情况下载对应软件安装包。 在win10系统合适的地方创建
ongdb\fabric
目录(方便个人文件管理),用于安装服务节点。
下载必要资源
- 下载ONgDB
- 下载ONgDB Fabric
- 下载ONgDB Apoc
配置
开始配置前,在
ongdb\fabric
目录下,创建graph-node-1
、graph-node-2
、fabric-node-1
三个文件夹。 并将下载的ONgDB
压缩文件解压后文件分别完整放置在三个目录下。 另外也需要将ONgDB Apoc
组件放置graph-node-1
、graph-node-2
根目录下的plugins
目录。 保存好文件后在各个节点安装目录下修改conf\ongdb.conf
配置,修改方式如下:
- 修改
graph-node-1
配置
dbms.connector.bolt.listen_address=:7681
dbms.connector.http.listen_address=:7471
dbms.connector.https.listen_address=:7461
apoc.import.file.enabled=true
dbms.security.procedures.unrestricted=apoc.*
- 修改
graph-node-2
配置
dbms.connector.bolt.listen_address=:7682
dbms.connector.http.listen_address=:7472
dbms.connector.https.listen_address=:7462
apoc.import.file.enabled=true
dbms.security.procedures.unrestricted=apoc.*
- 修改
fabric-node-1
配置
dbms.connector.bolt.listen_address=:7281
dbms.connector.http.listen_address=:7171
dbms.connector.https.listen_address=:7161
dbms.security.procedures.unrestricted=c*
修改好配置后,将
ONgDB Fabric
组件放置在fabric-node-1
安装目录下的plugins
文件夹,并在根目录下创建fabric\fabric.properties
配置。 文件fabric.properties
的配置如下:
c1=bolt://fabric:pssfabric@localhost:7681
c2=bolt://fabric:pssfabric@localhost:7682
现在可以在各个节点根目录下,运行下面命令启动节点:
bin\ongdb.bat console
启动节点后,分别在节点上创建以下用户账号。
编号 | 用户名 | 密码 | 备注 |
---|---|---|---|
graph-node-1 | ongdb | 123456 | HTTP访问页面:http://localhost:7471 |
graph-node-2 | ongdb | 123456 | HTTP访问页面:http://localhost:7472 |
fabric-node-1 | ongdb | 123456 | HTTP访问页面:http://localhost:7171 |
graph-node-1 | fabric | pssfabric | 【只读】提供给Fabric服务使用 |
graph-node-2 | fabric | pssfabric | 【只读】提供给Fabric服务使用 |
构建数据
在graph-node-1
节点构建股票图谱
将
graph-qabot-demo
中stocks.csv
和managers.csv
的文件放置在graph-node-1
根目录下的import
目录,并运行下面脚本构建数据。
CREATE CONSTRAINT ON (n:股票代码) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:股票) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:股票名称) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:地域) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:行业) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:上市日期) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:高管) ASSERT (n.md5) IS NODE KEY;
LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
WITH row.ts_code AS fval,row.symbol AS tval
WHERE tval IS NOT NULL AND tval<>''
MERGE (f:股票 {value:fval})
MERGE (t:股票代码 {value:tval})
MERGE (f)-[:股票代码]->(t);
LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
WITH row.ts_code AS fval,row.name AS tval
WHERE tval IS NOT NULL AND tval<>''
MERGE (f:股票 {value:fval})
MERGE (t:股票名称 {value:tval})
MERGE (f)-[:股票名称]->(t);
LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
WITH row.ts_code AS fval,row.area AS tval
WHERE tval IS NOT NULL AND tval<>''
MERGE (f:股票 {value:fval})
MERGE (t:地域 {value:tval})
MERGE (f)-[:地域]->(t);
LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
WITH row.ts_code AS fval,row.industry AS tval
WHERE tval IS NOT NULL AND tval<>''
MERGE (f:股票 {value:fval})
MERGE (t:行业 {value:tval})
MERGE (f)-[:所属行业]->(t);
LOAD CSV WITH HEADERS FROM 'file:/stocks.csv' AS row
WITH row.ts_code AS fval,row.list_date AS tval
WHERE tval IS NOT NULL AND tval<>''
MERGE (f:股票 {value:fval})
MERGE (t:上市日期 {value:TOINTEGER(tval)})
MERGE (f)-[:上市日期]->(t);
LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.ts_code AS tval
WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
MERGE (f:高管 {md5:fval})
MERGE (t:股票 {value:tval})
MERGE (f)-[:任职于]->(t);
在graph-node-2
节点构建高管图谱
将
graph-qabot-demo
中managers.csv
文件放置在graph-node-2
根目录下的import
目录,并运行下面脚本构建数据。
CREATE CONSTRAINT ON (n:高管) ASSERT (n.md5) IS NODE KEY;
CREATE INDEX ON :高管(value);
CREATE CONSTRAINT ON (n:性别) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:学历) ASSERT (n.value) IS NODE KEY;
CREATE CONSTRAINT ON (n:性别_别名) ASSERT (n.value) IS NODE KEY;
LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.ts_code AS tval
WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
MERGE (t:股票 {value:tval})
MERGE (f)-[:任职于]->(t);
LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.gender AS tval
WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
MERGE (t:性别 {value:tval})
MERGE (f)-[:性别]->(t);
LOAD CSV WITH HEADERS FROM 'file:/managers.csv' AS row
WITH row,apoc.util.md5([row.name,row.gender,row.birthday]) AS fval,row.edu AS tval
WHERE tval IS NOT NULL AND tval<>'' AND fval IS NOT NULL AND fval<>'' AND (row.end_date IS NULL OR row.end_date='')
MERGE (f:高管 {md5:fval}) SET f+={value:row.name}
MERGE (t:学历 {value:tval})
MERGE (f)-[:学历]->(t);
WITH ['女性','女'] AS list
UNWIND list AS wd
WITH wd
MATCH (n:性别) WHERE n.value='F' WITH n,wd
MERGE (t:性别_别名 {value:wd})
MERGE (n)-[:别名]->(t);
WITH ['男性','男'] AS list
UNWIND list AS wd
WITH wd
MATCH (n:性别) WHERE n.value='M' WITH n,wd
MERGE (t:性别_别名 {value:wd})
MERGE (n)-[:别名]->(t);
GPT-3.5 生成 Fabric Cypher
使用 GPT-3.5 生成 Fabric Cypher 时,需要注意 Fabric Cypher 样例不要使用转义符,GPT 生成转义符不太友好(会丢弃转义符,添加任何其它Prompt都没有用)。 如果丢弃了转义符,生成的 Fabric Cypher 直接执行会报错。 除了转义符需要注意外,其它样例覆盖的情况,Fabric Cypher 生成效果都比较好。 下面可以看一下,一些 LLM Generate Cypher | 使用大语言模型生成Cypher 测试结果:
import requests
import Levenshtein
# 使用HTTP封装的GPT-3.5接口,请根据具体接口情况替换
def gpt3_5(msg):
# 设置请求头
headers = {"Content-type": "application/json"}
url = 'http://localhost:8080/cgpt-api/push_question'
data = {
"ask_str": msg,
}
response = requests.post(url, json=data, headers=headers)
return response.json()['data']['content']
def example_list():
ex_list = [{
'qa': '女性高管任职的上市公司有多少家?',
'cypher': '''
CALL c2(
'
MATCH (gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
WHERE xb.value=$xb
RETURN COLLECT(gg.md5) AS gg_md5;
',
{xb:'女性'}
) YIELD value WITH value.gg_md5 AS gg_md5
CALL c1(
'
MATCH (stk:股票)<-[:任职于]-(gg:高管)
WHERE gg.md5 IN $gg_md5
RETURN COUNT(DISTINCT stk) AS count;
',
{gg_md5:gg_md5}
) YIELD value RETURN value.count AS count;
'''
}, {
'qa': '上市公司女性高管比例是多少?',
'cypher': '''
CALL c2(
'
MATCH (gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
WHERE xb.value=$xb
RETURN COUNT(gg) AS v_ct;
',
{xb:'女性'}
) YIELD value WITH value.v_ct AS v_ct
CALL c2(
'
MATCH (gg:高管)
WITH COUNT(gg) AS t_ct
RETURN TOFLOAT(apoc.number.exact.div(TOSTRING($v_ct),TOSTRING(t_ct),3,$mk_half)) AS val
',
{v_ct:v_ct,mk_half:'HALF_DOWN'}
) YIELD value RETURN value.val AS val;
'''
}, {
'qa': '北京的上市公司中有多少位女性高管?',
'cypher': '''
CALL c2(
'
MATCH (gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
WHERE xb.value=$xb
RETURN COLLECT(gg.md5) AS gg_md5;
',
{xb:'女性'}
) YIELD value WITH value.gg_md5 AS gg_md5
CALL c1(
'
MATCH (dy:地域)<-[:地域]-(stk:股票)<-[:任职于]-(gg:高管)
WHERE gg.md5 IN $gg_md5 AND dy.value=$dy
RETURN COUNT(DISTINCT stk) AS count;
',
{dy:'北京',gg_md5:gg_md5}
) YIELD value RETURN value.count AS count;
'''
}, {
'qa': '软件服务行业硕士学历的女性高管有多少位?',
'cypher': '''
CALL c2(
'
MATCH (xl:学历)<-[:学历]-(gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
WHERE xb.value=$xb AND xl.value=$xl
RETURN COLLECT(gg.md5) AS gg_md5;
',
{xb:'女性',xl:'硕士'}
) YIELD value WITH value.gg_md5 AS gg_md5
CALL c1(
'
MATCH (hy:行业)<-[:所属行业]-(stk:股票)<-[:任职于]-(gg:高管)
WHERE gg.md5 IN $gg_md5 AND hy.value=$hy
RETURN COUNT(DISTINCT stk) AS count;
',
{hy:'软件服务',gg_md5:gg_md5}
) YIELD value RETURN value.count AS count;
'''
}, {
'qa': '在海南由博士学历的男性高管任职的上市公司,都属于哪些行业?',
'cypher': '''
CALL c2(
'
MATCH (xl:学历)<-[:学历]-(gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
WHERE xb.value=$xb AND xl.value=$xl
RETURN COLLECT(gg.md5) AS gg_md5;
',
{xb:'男性',xl:'博士'}
) YIELD value WITH value.gg_md5 AS gg_md5
CALL c1(
'
MATCH (hy:行业)<-[:所属行业]-(stk:股票)<-[:任职于]-(gg:高管),
(dy:地域)<-[:地域]-(stk)
WHERE gg.md5 IN $gg_md5 AND dy.value=$dy
RETURN DISTINCT hy.value AS hy;
',
{dy:'海南',gg_md5:gg_md5}
) YIELD value RETURN value.hy AS hy;
'''
}]
return ex_list
def examples(ask):
examples_str = ''
examples_list = []
for index, map in enumerate(example_list()):
qa = map['qa']
cypher = map['cypher']
dis = Levenshtein.distance(ask, qa)
examples_list.append({'qa': qa, 'cypher': cypher, 'dis': dis})
sorted_list = sorted(examples_list, key=lambda map: map['dis'])
for map in sorted_list:
qa = map['qa']
cypher = map['cypher']
ex = f'''
# {qa}
{cypher}
'''
# `prompt(ask)` Prompt Length 300
if len(examples_str + ex) + 300 <= 2048:
examples_str += ex
return examples_str
def prompt(ask):
return f"""
您是一名助手,能够根据示例Cypher查询生成Cypher查询。
示例Cypher查询有:\n {examples(ask)} \n
除了Cypher查询之外,不要回复任何解释或任何其他信息。
您永远不要为你的不准确回复感到抱歉,并严格根据提供的cypher示例生成cypher语句。
不要提供任何无法从Cypher示例中推断出的Cypher语句。
当由于缺少对话上下文而无法推断密码语句时,通知用户,并说明缺少的上下文是什么。
现在请为这个查询生成Cypher:
# {ask}
"""
if __name__ == '__main__':
# 输入问题
ask = '在北京由硕士学历的女性高管任职的上市公司,都属于哪些行业?'
# This model's maximum context length is 4097 tokens.
# However, you requested 4364 tokens (2316 in the messages, 2048 in the completion).
# Please reduce the length of the messages or completion.
print(gpt3_5(prompt(ask)))
# Test Result:
# QA:你好
# RE-GPT3.5(正确生成):无法根据示例Cypher查询推断出生成Cypher查询的特定上下文。请提供更具体的问题或信息。
# QA:男性高管任职的上市公司有多少家?
# RE-GPT3.5(正确生成):
# CALL c2(
# '
# MATCH (gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
# WHERE xb.value=$xb
# RETURN COLLECT(gg.md5) AS gg_md5;
# ',
# {xb:'男性'}
# ) YIELD value WITH value.gg_md5 AS gg_md5
# CALL c1(
# '
# MATCH (stk:股票)<-[:任职于]-(gg:高管)
# WHERE gg.md5 IN $gg_md5
# RETURN COUNT(DISTINCT stk) AS count;
# ',
# {gg_md5:gg_md5}
# ) YIELD value RETURN value.count AS count;
# QA:上市公司男性高管比例是多少?
# RE-GPT3.5(正确生成):
# CALL c2(
# '
# MATCH (gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
# WHERE xb.value=$xb
# RETURN COUNT(gg) AS v_ct;
# ',
# {xb:'男性'}
# ) YIELD value WITH value.v_ct AS v_ct
# CALL c2(
# '
# MATCH (gg:高管)
# WITH COUNT(gg) AS t_ct
# RETURN TOFLOAT(apoc.number.exact.div(TOSTRING($v_ct),TOSTRING(t_ct),3,$mk_half)) AS val
# ',
# {v_ct:v_ct,mk_half:'HALF_DOWN'}
# ) YIELD value RETURN value.val AS val;
# QA:上海的上市公司中有多少位男性高管?
# RE-GPT3.5(正确生成):
# CALL c2(
# '
# MATCH (gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
# WHERE xb.value=$xb
# RETURN COLLECT(gg.md5) AS gg_md5;
# ',
# {xb:'男性'}
# ) YIELD value WITH value.gg_md5 AS gg_md5
# CALL c1(
# '
# MATCH (dy:地域)<-[:地域]-(stk:股票)<-[:任职于]-(gg:高管)
# WHERE gg.md5 IN $gg_md5 AND dy.value=$dy
# RETURN COUNT(DISTINCT stk) AS count;
# ',
# {dy:'上海',gg_md5:gg_md5}
# ) YIELD value RETURN value.count AS count;
# QA:水泥行业博士学历的男性高管有多少位?
# RE-GPT3.5:
# CALL c2(
# '
# MATCH (xl:学历)<-[:学历]-(gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
# WHERE xb.value=$xb AND xl.value=$xl
# RETURN COLLECT(gg.md5) AS gg_md5;
# ',
# {xb:'男性',xl:'博士'}
# ) YIELD value WITH value.gg_md5 AS gg_md5
# CALL c1(
# '
# MATCH (hy:行业)<-[:所属行业]-(stk:股票)<-[:任职于]-(gg:高管)
# WHERE gg.md5 IN $gg_md5 AND hy.value=$hy
# RETURN COUNT(DISTINCT gg) AS count;
# ',
# {hy:'水泥',gg_md5:gg_md5}
# ) YIELD value RETURN value.count AS count;
# QA:在北京由硕士学历的女性高管任职的上市公司,都属于哪些行业?
# RE-GPT3.5:
# CALL c2(
# '
# MATCH (xl:学历)<-[:学历]-(gg:高管)-[:性别]->(:性别)-[:别名]->(xb:性别_别名)
# WHERE xb.value=$xb AND xl.value=$xl
# RETURN COLLECT(gg.md5) AS gg_md5;
# ',
# {xb:'女性',xl:'硕士'}
# ) YIELD value WITH value.gg_md5 AS gg_md5
# CALL c1(
# '
# MATCH (hy:行业)<-[:所属行业]-(stk:股票)<-[:任职于]-(gg:高管),
# (dy:地域)<-[:地域]-(stk)
# WHERE gg.md5 IN $gg_md5 AND dy.value=$dy
# RETURN DISTINCT hy.value AS hy;
# ',
# {dy:'北京',gg_md5:gg_md5}
# ) YIELD value RETURN value.hy AS hy;