Solution examples 2023
From info216
*** SHACL examples - includes answers to the exam questions @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix owl: <http://www.w3.org/2002/07/owl#> . @prefix dc: <http://purl.org/dc/elements/1.1/> . @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix sh: <http://www.w3.org/ns/shacl#> . @prefix : <http://info216.uib.no/movies/> . :DirectorShape a sh:NodeShape ; sh:targetClass :Director ; # A Director must have exactly one foaf:name of type xsd:string. sh:property [ sh:path foaf:name ; sh:minCount 1 ; sh:maxCount 1 ;\\ sh:type xsd:string ] ; # A Director must be the director of at least one Movie. sh:property [ sh:path :director_of ; sh:minCount 1 ; sh:class :Movie ] . :ActorShape a sh:NodeShape ; sh:targetClass :Actor ; sh:property [ sh:path foaf:name ; sh:minCount 1 ; sh:maxCount 1 ; sh:type xsd:string ] ; # If an actor is an actor in a resource, that resource must be a movie. sh:property [ sh:path :actor_in ; sh:minCount 1 ; sh:class :Movie ] ; sh:property [ sh:path :plays_role ; sh:class :Role ; ] ; # If an actor plays a role that is a role in some resource, that resource must be a movie. sh:property [ sh:path ( :plays_role :role_in ) ; sh:qualifiedValueShape [ sh:path :actor_in ] ; sh:qualifiedMinCount 1 ; ] . :MovieShape a sh:NodeShape ; sh:targetClass :Movie ; sh:property [ sh:path dc:title ; sh:minCount 1 ; sh:maxCount 1 ; sh:type xsd:string ] ; # A movie must be directed by at least one dIrector or acted in by at least one actor. sh:or ( [ sh:property [ sh:path [ sh:inversePath :actor_in ] ; sh:minCount 1 ; ] ] [ sh:property [ sh:path [ sh:inversePath :director_of ] ; sh:minCount 1 ; ] ] ) ; sh:property [ sh:path :year ; sh:minCount 1 ; sh:maxCount 1 ; sh:type xsd:year ] . :RoleShape a sh:NodeShape ; sh:targetClass :Role ; sh:property [ sh:path foaf:name ; sh:minCount 1 ; sh:maxCount 1 ; sh:type xsd:string ] ; sh:property [ sh:path :role_in ; sh:minCount 1 ; sh:class :Movie ] . :LeadRoleShape a sh:NodeShape ; sh:node :RoleShape ; sh:targetClass :LeadRole . *** RDFS rules A resource that is a director_of something is a director. :director_of rdfs:domain :Director . A resource that something else is a director_of is a movie. :director_of rdfs:range :Movie . The year of something has type xsd:year. :year rdfs:range xsd:year . An actor is a Person. :Actor rdfs:subClassOf foaf:Person . A director is a person. :Director rdfs:subClassOf foaf:Person . *** OWL axioms Nothing can be both a person and a movie. :Person owl:disjointWith :Movie . Nothing can be more than one of a person, a role, or a movie. [] a owl:DisjointClass ; owl:disjointClasses ( :Person :Role :Movie ) . Something that plays in at least one Movie is an Actor. :Actor rdfs:subClassOf [ a owl:Restriction ; owl:onProperty :play_in ; owl:someValueFrom owl:Thing ] A LeadActor is an Actor that plays at least one LeadRole. :LeadActor rdfs:subClassOf :Actor, [ a owl:Restriction ; owl:onProperty :plays_role ; owl:someValueFrom :LeadRole . ] . *** SPARQL queries Count the number of movies that are represented in the graph. SELECT (COUNT(?movie) AS ?count) WHERE { ?movie rdf:type :Movie } List the titles and years of all movies. SELECT ?title ?year WHERE { ?movie rdf:type :Movie ; dc:title ?title ; dc:year ?year } List the titles and years of all movies since 2000. SELECT ?title ?year WHERE { ?movie rdf:type :Movie ; dc:title ?title ; dc:year ?year FILTER (INTEGER(?year) >= 2000) } SELECT ?title ?year WHERE { ?movie rdf:type :Movie ; dc:title ?title ; dc:year ?year FILTER (?year >= "2000"^^xsd:year) } List the titles and years of all movies sorted first by year, then by name. SELECT ?title ?year WHERE { ?movie rdf:type :Movie ; dc:title ?title ; dc:year ?year } ORDER BY ?year, ?name Count the number of movies for each year with more than one movie. SELECT ?year (COUNT(?movie) AS ?count) WHERE { ?movie rdf:type :Movie ; dc:year ?year } GROUP BY ?year HAVING ?count > 1 List the names of all persons that are both directors and actors. SELECT ?name WHERE { ?person (:plays_in & :director_of) / rdf:type :Movie ; foaf:name ?name } List the actor name and movie title for all lead roles. SELECT ?name ?title WHERE { ?role rdf:type :LeadRole ; ^:plays_role / foaf:name ?name ; :role_in / dc:title ?title } List all distinct pairs of actor names that have played lead roles in the same movies. SELECT ?name1 ?name2 WHERE { ?movie rdf:type :Movie ; ^:?role_in ?role1, ?role2 . ?role1 rdf:type :LeadRole ; ^:plays_role / foaf:name ?name1 . ?role2 rdf:type :LeadRole ; ^:plays_role / foaf:name ?name2 . FILTER (STR(?name1) < STR(?name2)) } *** Examples related to the programming task from owlrl import DeductiveClosure, OWLRL_Semantics import pandas as pd from pyshacl import validate from rdflib import Namespace, Graph, Literal, RDF, DC, FOAF, XSD ONTOLOGY_FILE = './movie-ontology.ttl' SHACL_FILE = './movie-shacl.ttl' DIRECTOR_FILE = './movie-director-year.csv' LEAD_ROLE_FILE = './movie-actor-lead-role.csv' OTHER_ROLE_FILE = './movie-actor-other-role.csv' BASE_URI = 'http://example.org/' MOVIE = Namespace(BASE_URI) def add_movie_triples(g, row): movie = row.to_dict() # example dict: # {'Movie': 'Pulp_Fiction', 'Director': 'Quentin_Tarantino', 'Year': 1994} movie_name = movie['Movie'] director_name = movie['Director'] movie_year = movie['Year'] # update g with a set of triples that represent the movie and its director g.add((MOVIE[director_name], RDF.type, MOVIE.Director)) g.add((MOVIE[director_name], FOAF.name, Literal(director_name))) g.add((MOVIE[director_name], MOVIE.director_of, MOVIE[movie_name])) g.add((MOVIE[movie_name], RDF.type, MOVIE.Movie)) g.add((MOVIE[movie_name], DC.title, Literal(movie_name))) g.add((MOVIE[movie_name], MOVIE.year, Literal(movie_year, datatype=XSD.year))) def add_lead_role_triples(g, row): movie = row.to_dict() # example dict: # {'Movie': 'Pulp_Fiction', 'Director': 'Quentin_Tarantino', 'Year': 1994} movie_name = movie['Movie'] actor_name = movie['Actor'] role_name = movie_name+'-role-'+movie['LeadRole'] # update g with a set of triples that represent the movie and its director g.add((MOVIE[actor_name], RDF.type, MOVIE.Actor)) g.add((MOVIE[actor_name], FOAF.name, Literal(actor_name))) g.add((MOVIE[actor_name], MOVIE.actor_in, MOVIE[movie_name])) g.add((MOVIE[actor_name], MOVIE.plays_role, MOVIE[role_name])) g.add((MOVIE[role_name], RDF.type, MOVIE.LeadRole)) g.add((MOVIE[role_name], FOAF.name, Literal(movie['LeadRole']))) g.add((MOVIE[role_name], MOVIE.role_in, MOVIE[movie_name])) g.add((MOVIE[movie_name], RDF.type, MOVIE.Movie)) def add_other_role_triples(g, row): movie = row.to_dict() # example dict: # {'Movie': 'Pulp_Fiction', 'Director': 'Quentin_Tarantino', 'Year': 1994} movie_name = movie['Movie'] actor_name = movie['Actor'] role_name = movie_name+'-role-'+movie['Role'] # update g with a set of triples that represent the movie and its director g.add((MOVIE[actor_name], RDF.type, MOVIE.Actor)) g.add((MOVIE[actor_name], FOAF.name, Literal(actor_name))) g.add((MOVIE[actor_name], MOVIE.actor_in, MOVIE[movie_name])) g.add((MOVIE[actor_name], MOVIE.plays_role, MOVIE[role_name])) g.add((MOVIE[role_name], RDF.type, MOVIE.Role)) g.add((MOVIE[role_name], FOAF.name, Literal(movie['Role']))) g.add((MOVIE[role_name], MOVIE.role_in, MOVIE[movie_name])) g.add((MOVIE[movie_name], RDF.type, MOVIE.Movie)) def load_movie_triples(g, fn): df = pd.read_csv(fn) df.apply(lambda row: add_movie_triples(g, row), axis=1) def load_lead_role_triples(g, fn): df = pd.read_csv(fn) df.apply(lambda row: add_lead_role_triples(g, row), axis=1) def load_other_role_triples(g, fn): df = pd.read_csv(fn) df.apply(lambda row: add_other_role_triples(g, row), axis=1) g = Graph() g.bind('', MOVIE) load_movie_triples(g, DIRECTOR_FILE) load_lead_role_triples(g, LEAD_ROLE_FILE) load_other_role_triples(g, OTHER_ROLE_FILE) print(g.serialize(format='ttl')) sg = Graph() sg.parse(SHACL_FILE, format='ttl') r = validate(g, shacl_graph=sg, # ont_graph=og, inference='rdfs' ) val, rg, rep = r print(rep) g.parse(ONTOLOGY_FILE) DeductiveClosure(OWLRL_Semantics).expand(g) print(g.serialize(format='ttl'))