2010-09-07 16 views
2

Je me demande s'il existe un script Python pour importer le fichier de données hiérarchiques csv dans neo4j.py.Importer un fichier de données hiérarchiques CSV vers Neo4j.py

J'ai fait le script python ci-dessous, mais j'ai besoin d'un meilleur moyen de vérifier si un noeud est déjà créé avant de le créer.

graphdb = neo4j.GraphDatabase(lparam_database_path, classpath=lparam_kernel_class_path, jvm=lparam_jvm_path, heap_size=ls_jvm_heap_size) 

with graphdb.transaction: 
    sectornode = Subreference.Node.SECTOR_ROOT(graphdb, label="Sectors", level=1) 
    categorynode = graphdb.node(label="Categories", level=2) 
    productnode = graphdb.node(label="Products", level=3) 
    brandnode = graphdb.node(label="Brands", level=4) 
    subbrandnode = graphdb.node(label="Subbrands", level=5) 

    relationship1 = sectornode.CATEGORY_ROOT(categorynode) 
    relationship2 = categorynode.PRODUCT_ROOT(productnode) 
    relationship3 = productnode.BRAND_ROOT(brandnode) 
    relationship4 = brandnode.SUBBRAND_ROOT(subbrandnode) 

    li_index = 0 
    ls_last_sector = "" 
    ls_last_category = "" 
    ls_last_product = "" 
    ls_last_brand = "" 
    ls_last_subbrand = "" 
    for data in data_list: 
     li_index = li_index + 1 
     print "importing row " + str(li_index) + " of " + str(li_total_rows) 

     if data[0] != ls_last_sector: 
      tempsectornode = graphdb.node(label=data[0]) 
      lastsectornode = tempsectornode 
     if data[1] != ls_last_category or data[0] != ls_last_sector: 
      tempcategorynode = graphdb.node(label=data[1]) 
      lastcategorynode = tempcategorynode 
     if data[2] != ls_last_product or data[1] != ls_last_category or data[0] != ls_last_sector: 
      tempproductnode = graphdb.node(label=data[2]) 
      lastproductnode = tempproductnode 
     if data[3] != ls_last_brand or data[2] != ls_last_product or data[1] != ls_last_category or data[0] != ls_last_sector: 
      tempbrandnode = graphdb.node(label=data[3]) 
      lastbrandnode = tempbrandnode 
     if data[4] != ls_last_subbrand or data[3] != ls_last_brand or data[2] != ls_last_product or data[1] != ls_last_category or data[0] != ls_last_sector: 
      tempsubbrandnode = graphdb.node(label=data[4]) 
      lastsubbrandnode = tempsubbrandnode 

     temprelationship1 = tempsectornode.OFTYPE(sectornode) 
     temprelationship2 = tempcategorynode.OFTYPE(categorynode) 
     temprelationship3 = tempproductnode.OFTYPE(productnode) 
     temprelationship4 = tempbrandnode.OFTYPE(brandnode) 
     temprelationship5 = tempsubbrandnode.OFTYPE(subbrandnode) 

     temprelationship6 = tempcategorynode.MEMBEROF(tempsectornode) 
     temprelationship7 = tempproductnode.MEMBEROF(tempcategorynode) 
     temprelationship8 = tempbrandnode.MEMBEROF(tempproductnode) 
     temprelationship9 = tempsubbrandnode.MEMBEROF(tempbrandnode) 

     ls_last_sector = data[0] 
     ls_last_category = data[1] 
     ls_last_product = data[2] 
     ls_last_brand = data[3] 
     ls_last_subbrand = data[4] 

graphdb.shutdown() 

Répondre

1

Vous devriez être en mesure d'indexer le noeud après sa création, puis vérifier via l'API d'index s'il existe déjà. Je n'ai pas l'API neo4j.py à portée de main mais elle devrait exposer cette fonctionnalité. approche principe

(en Java) est:

graphDatabase.index().forNodes("index-name"); 
index.add(node, "key","value"); 

Node node=index.get("key","value").getSingle();