site_graphlogo
  -   Terms of Use and Privacy
Demos | Example Code | Meta | Operation
rss
site_graphlogo
  -   Terms of Use and Privacy
Demos | Example Code | Meta | Operation
rss

<<   <   >   >>

2021-05-01 | Example Code | Create Triples

This code creates triples from a data flow captured on a filesystem:

#!/usr/bin/python3
# coding=utf-8
#Create TRIPLES for import into graph db
# The person who associated a work with this deed has dedicated
# the work to the public domain by waiving all of his or her rights
# to the work worldwide under copyright law, including all related
# and neighboring rights, to the extent allowed by law.
# You can copy, modify, distribute and perform the work, even for
# commercial purposes, all without asking permission.
# In no way are the patent or trademark rights of any person affected by
# CC0, nor are the rights that other persons may have in the work or in
# how the work is used, such as publicity or privacy rights.
# Unless expressly stated otherwise, the person who associated a work with
# this deed makes no warranties about the work, and disclaims liability
# for all uses of the work, to the fullest extent permitted by applicable law.
# When using or citing the work, you should not imply endorsement by the 
# author or the affirmer.
# https://creativecommons.org/publicdomain/zero/1.0/

import re
import os
from pathlib import Path
from collections import defaultdict
from datetime import date

home = str(Path.home())+'/sync'
fsbase=home+'/websites/site/itd/'
out=fsbase+'graph/'
rootd=home+'/websites/source/itd/0/'
rootds=home+'/websites/source/itd/'
triples={'<https://example.com/> <http://www.w3.org/2000/01/rdf-schema#label> "All ACME Business"'}
triples.add('<https://example.com/BST/> <https://w3id.org/dfd#subProcessOf> <https://example.com/6/1/1/>')
base='<https://example.com/'
pbase='/source/itd/0/'
padds=[]
for root,dirs,files in os.walk(rootd, followlinks=False):
   for fname in files:
      if not fname.startswith('.') and fname[-13:]!='predlabel.txt' and fname[-11:]!='details.txt' and fname.find('^^')==-1 and fname.find('_component_destination')==-1 and fname.find('_flow_destination')==-1:
         padds.clear()
         p=os.path.join(root,fname)
         bits=re.compile('^.+'+pbase+'(.+)/(has_specified_input|has_specified_output|has_specified_inp_and_out)/(.+)$')
         try:
            m = bits.match(p)
            subpath=m.group(1)
            predicate=m.group(2)
            objctraw=m.group(3)
            bitsf=''
            ld=''
            for bit in subpath.split('/'):
               parent=bitsf
               if parent=='':
                  parent='0'
                  bitsf+=bit+'/'
                  triples.add(base+bitsf[:-1]+'/> <https://w3id.org/dfd#subProcessOf> '+base+parent[:-1]+'>')
               else:
                  bitsf+=bit+'/'
                  triples.add(base+bitsf[:-1]+'/> <https://w3id.org/dfd#subProcessOf> '+base+parent[:-1]+'/>')
            subject=subpath
            sublabel=''
            subcomm=''
            if os.path.isfile(rootd+subpath+'.title.txt'):
               with open(rootd+subpath+'.title.txt') as f:
                  r=f.read()
               if len(r)>0:
                  sublabel=r.strip()
                  triples.add(base+subject+'/> <http://www.w3.org/2000/01/rdf-schema#label> "'+r.replace("\n","\\n").replace("\\n ","\\n")+'"')
            if os.path.isfile(rootd+subpath+'.details.txt'):
               with open(rootd+subpath+'.details.txt') as f:
                  r=f.read()
               if len(r)>0:
                  subcomm=r.strip()
                  triples.add(base+subject+'/> <http://www.w3.org/2000/01/rdf-schema#comment> "'+r.replace("\n","\\n").replace("\\n ","\\n")+'"')
            if os.path.isfile(rootd+subpath+'.contains.txt'):
               with open(rootd+subpath+'.contains.txt') as f:
                  contf=f.read()
               if len(contf)>0:
                  triples.add(base+contf.strip()+'/> <https://w3id.org/dfd#subProcessOf> '+base+subject+'/>')
            ttl=objctraw.find('title.txt')
            if ttl==-1:
               objct=objctraw[:-4]
               objctp=objctraw
            else:
               objct=objctraw[:ttl-1]
               objctp=objctraw
            if predicate=='has_specified_inp_and_out':
               padds=['<https://w3id.org/dfd#from>','<https://w3id.org/dfd#to>']
               paddo=['<https://w3id.org/dfd#from>','<https://w3id.org/dfd#to>']
            elif predicate=='has_specified_output':
               padds=['<https://w3id.org/dfd#from>']
               paddo=['<https://w3id.org/dfd#to>']
            elif predicate=='has_specified_input':
               padds=['<https://w3id.org/dfd#to>']
               paddo=['<https://w3id.org/dfd#from>']
            ld=subject.rfind('/')
            if ld!=-1:
               level=subject[:ld]+'/'
               subjects=subject[ld+1:]
            else:
               level=''
               subjects=subject
            mo=re.search('\d',objct)
            triples.add(base+subject+'/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/dfd#Process>')
            predlabel=''
            predcomm=''
            if os.path.isfile(rootd+subject+'/'+predicate+'/'+objct+'.details.txt'):
               with open(rootd+subject+'/'+predicate+'/'+objct+'.details.txt') as f:
                  r=f.read().strip()
               if len(r)>0:
                  predcomm=r
                  triples.add(base+subject+'DataFlow'+objct+'/> <http://www.w3.org/2000/01/rdf-schema#comment> "'+r.replace("\n","\\n").replace("\\n ","\\n")+'"')
            else:
               six=9 #I don't mind
            if os.path.isfile(rootd+subject+'/'+predicate+'/'+objct+'.predlabel.txt'):
               with open(rootd+subject+'/'+predicate+'/'+objct+'.predlabel.txt') as f:
                  r=f.read().strip()
               if len(r)>0:
                  predlabel=r
                  triples.add(base+subject+'DataFlow'+objct+'/> <http://www.w3.org/2000/01/rdf-schema#label> "'+r.replace("\n","\\n").replace("\\n ","\\n")+'"')
            objlabel=''
            objcomm=''
            if os.path.isfile(rootd+level+objct+'.txt'):
               with open(rootd+level+objct+'.txt') as f:
                  r=f.read().strip()
               if len(r)>0:
                  objlabel=r
                  triples.add(base+level+objct+'/> <http://www.w3.org/2000/01/rdf-schema#label> "'+r.replace("\n","\\n").replace("\\n ","\\n")+'"')
            if os.path.isfile(rootd+level+objct+'.details.txt'):
               with open(rootd+level+objct+'.details.txt') as f:
                  r=f.read().strip()
               if len(r)>0:
                  objcomm=r
                  triples.add(base+level+objct+'/> <http://www.w3.org/2000/01/rdf-schema#comment> "'+r.replace("\n","\\n").replace("\\n ","\\n")+'"')
            triples.add(base+subject+'DataFlow'+objct+'/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/dfd#DataFlow>')
            for padd in paddo:
               triples.add(base+subject+'DataFlow'+objct+'/> '+padd+' '+base+level+objct+'/>')
            for padd in padds:
               triples.add(base+subject+'DataFlow'+objct+'/> '+padd+' '+base+subject+'/>')
            if objct.find('D')!=-1 and mo:
               triples.add(base+level+objct+'/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/dfd#DataStore>')
            elif mo:
               triples.add(base+level+objct+'/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/dfd#Process>')
            if not(objct.find('D')!=-1 and mo) and not mo:
               triples.add(base+level+objct+'/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/dfd#Interface>')
         except:
            six=9
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper('http://localhost/sparql')
sparql.method = 'POST'
queryString = 'DROP SILENT GRAPH <https://example.com/>'
sparql.setQuery(queryString)
try:
   ret = sparql.query().convert()
except:
   print(ret)
sparql.addDefaultGraph('https://example.com/')
with open(fsbase+'files/itd.txt','w+') as f:
   for t in triples:
      print(t)
      queryString = 'INSERT INTO <https://example.com/> { '+t+' . }'
      sparql.setQuery(queryString)
      try:
         ret = sparql.query().convert()
         f.write(t+' .'+'\n')
      except:
         print(ret)

When you run the code, use this set of files for the example, or create your own directory tree.

triple scripts python