以下のクエリを使用してneo4jデータベースを作成しました。私たちのcsvファイルには50k行が含まれています。neo4jベースデータベースのパフォーマンスを向上させる方法
// Query1
CREATE CONSTRAINT ON (p:PR) ASSERT p.prId IS UNIQUE;
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'file:///2015_PRData.csv' AS line WITH line,
SPLIT(SPLIT(line.`Open Date`, ' ')[0], '/') AS opnDateList,
SPLIT(SPLIT(line.`Closed Date`, ' ')[0], '/') AS clsDateList
MERGE (prNode:PR{prId:TOINT(line.prId)})
MERGE (app:Application{appName:line.Application})
MERGE (func:Function{funName:line.Function})
MERGE (subfunc:SubFunction{subFunName:line.Subfunction})
MERGE (cat:Category{catName:line.Category})
MERGE (rel:Release{relName:line.Release})
MERGE (custNode:Customer{customerName:line.`Server Name`})
MERGE (prOpenDate:PROpenDate{openDate:SPLIT(line.`Open Date`, ' ')[0]})
SET prOpenDate.day = TOINT(opnDateList[1]),prOpenDate.month = TOINT(opnDateList[0]),prOpenDate.year = opnDateList[2]
MERGE (prClosedDate:PRClosedDate{closedDate:SPLIT(line.`Closed Date`, ' ')[0]})
SET prClosedDate.day = TOINT(clsDateList[1]),prClosedDate.month = TOINT(clsDateList[0]),prClosedDate.year = clsDateList[2]
MERGE (app)-[:PART_OF_APPLN]->(func)
MERGE (func)-[:PART_OF_FUNCTION]->(subfunc)
MERGE (subfunc)-[:PART_OF_SUBFUNCTION]->(cat)
MERGE (prNode)-[:CATEGORY]->(cat)
MERGE (prNode)-[:REPORTED_BY]->(custNode)
MERGE (prNode)-[:OPEN_ON]->(prOpenDate)
MERGE (prNode)-[:CLOSED_ON]->(prClosedDate)
MERGE (prNode)-[:REPORTED_IN]->(rel)
Query 2:
//change year for open date nodes
MERGE (q:PROpenDate) SET q.year=SPLIT(q.year,' ')[0] return q;
Query 3:
//change year for closed date nodes
MERGE (q:PRClosedDate) SET q.year=SPLIT(q.year,' ')[0] return q;
Query 4:
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'file:///2015_PR_WithCP.csv' AS line WITH line
MERGE (cpNode:CP{cpId:line.cpId})
MERGE (prnode:PR{prId:TOINT(SPLIT(line.prRefId, 'PR')[1])})
CREATE (prnode)-[:FIXED_BY]->(cpNode)
Query 5:
USING PERIODIC COMMIT
LOAD CSV WITH HEADERS FROM
'file:///2015_CPWithFilename.csv' AS line WITH line
MERGE (cpNode:CP{cpId:line.cpId})
MERGE (cpFile:FILE{fileName:line.fileName})
CREATE (cpNode)-[:CONTAINS]->(cpFile)
Query 6:
USING PERIODIC COMMIT 100
LOAD CSV WITH HEADERS FROM
'file:///2015_CPcomments.csv' AS line
MERGE (cpNode:CP{cpId:line.cpId})
MERGE (fileNode:FILE{fileName:line.fileName})
MERGE (owner:DougUser{userId:line.cpOwner})
MERGE (reviewer:DougUser{userId:line.cpReviewer})
MERGE (cpNode)-[:SUBMITTED_BY]->(owner)
WITH line WHERE line.reviewComment IS NOT NULL
MERGE (comment:ReviewComment{commentText:line.reviewComment,contextCP:line.cpId})
MERGE (comment)-[:GIVEN_BY]->(reviewer)
MERGE (comment)-[:COMMENT_FOR]->(fileNode)
neo4jでデータをアップロードするのに時間がかかります。最初のクエリで7時間以上。
また、サーバーからデータを取得するのに時間がかかります。
MATCH (pr:PR)-[:FIXED_BY]-(cp)
MATCH (cp)-[:CONTAINS]-(file)
MATCH (pr)-[:CLOSED_ON]-(cls)
MATCH (pr)-[:REPORTED_BY]-(custs)
MATCH (pr)-[:CATEGORY]-(cats)
WHERE file.fileName STARTS WITH 'xyz' AND NOT(cls.closedDate = '')AND
apoc.date.parse(cls.closedDate,'s', 'MM/dd/yyyy') >= apoc.date.parse('01/01/2014','s', 'MM/dd/yyyy') AND apoc.date.parse(cls.closedDate,'s', 'MM/dd/yyyy') <= apoc.date.parse('06/13/2017','s', 'MM/dd/yyyy')
RETURN collect(DISTINCT custs.customerName) AS customers, collect(DISTINCT cats.catName) AS categories
上記のクエリは、データをフェッチするのに5分以上かかります。これを解決するために私を助けてください。パフォーマンスは本当に悪いです。
インデックスと制約はありますか? (ブラウザの ':schema') neo4jの設定をしましたか? – logisima