Examples from the lectures: Difference between revisions

From info216
No edit summary
Line 1: Line 1:
==S07: SHACL==
 
=Examples from the lectures=
 
==Lecture 1: Introduction to KGs==
Turtle example:
<syntaxhighlight>
@prefix ex: <http://example.org/> .
ex:Roger_Stone
    ex:name "Roger Stone" ;
    ex:occupation ex:lobbyist ;
    ex:significant_person ex:Donald_Trump .
ex:Donald_Trump
    ex:name "Donald Trump" .
</syntaxhighlight>
 
==Lecture 2: RDF==
Blank nodes for anonymity, or when we have not decided on a URI:
<syntaxhighlight lang="Python">
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')
 
g = Graph()
g.bind('ex', EX)  # this is why the line '@prefix ex: <http://example.org/> .'
                  # and the 'ex.' prefix are used when we print out Turtle later
 
robertMueller = BNode()
g.add((robertMueller, RDF.type, EX.Human))
g.add((robertMueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((robertMueller, EX.position_held, Literal('Director of the Federal Bureau of Investigation', lang='en')))
 
print(g.serialize(format='turtle'))
</syntaxhighlight>
 
Blank nodes used to group related properties:
<syntaxhighlight>
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')
 
g = Graph()
g.bind('ex', EX)
 
# This is a task in Exercise 2
 
print(g.serialize(format='turtle'))
</syntaxhighlight>
 
Literals:
<syntaxhighlight>
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')
 
g = Graph()
g.bind('ex', EX)
 
g.add((EX.Robert_Mueller, RDF.type, EX.Human))
g.add((EX.Robert_Mueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((EX.Robert_Mueller, FOAF.name, Literal('رابرت مولر', lang='fa')))
g.add((EX.Robert_Mueller, DC.description, Literal('sixth director of the FBI', datatype=XSD.string)))
g.add((EX.Robert_Mueller, EX.start_time, Literal(2001, datatype=XSD.integer)))
 
print(g.serialize(format='turtle'))
</syntaxhighlight>
 
Alternative container (open):
<syntaxhighlight>
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')
 
g = Graph()
g.bind('ex', EX)
 
muellerReportArchives = BNode()
g.add((muellerReportArchives, RDF.type, RDF.Alt))
 
archive1 = 'https://archive.org/details/MuellerReportVolume1Searchable/' \
                    'Mueller%20Report%20Volume%201%20Searchable/'
archive2 = 'https://edition.cnn.com/2019/04/18/politics/full-mueller-report-pdf/index.html'
archive3 = 'https://www.politico.com/story/2019/04/18/mueller-report-pdf-download-text-file-1280891'
 
g.add((muellerReportArchives, RDFS.member, Literal(archive1, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive2, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive3, datatype=XSD.anyURI)))
 
g.add((EX.Mueller_Report, RDF.type, FOAF.Document))
g.add((EX.Mueller_Report, DC.contributor, EX.Robert_Mueller))
g.add((EX.Mueller_Report, SCHEMA.archivedAt, muellerReportArchives))
 
print(g.serialize(format='turtle'))
</syntaxhighlight>
 
Sequence container (open):
<syntaxhighlight>
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')
 
g = Graph()
g.bind('ex', EX)
 
donaldTrumpSpouses = BNode()
g.add((donaldTrumpSpouses, RDF.type, RDF.Seq))
g.add((donaldTrumpSpouses, RDF._1, EX.IvanaTrump))
g.add((donaldTrumpSpouses, RDF._2, EX.MarlaMaples))
g.add((donaldTrumpSpouses, RDF._3, EX.MelaniaTrump))
 
g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))
 
print(g.serialize(format='turtle'))
</syntaxhighlight>
 
Collection (closed list):
<syntaxhighlight>
from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD
 
EX = Namespace('http://example.org/')
 
g = Graph()
g.bind('ex', EX)
 
from rdflib.collection import Collection
 
g = Graph()
g.bind('ex', EX)
 
donaldTrumpSpouses = BNode()
Collection(g, donaldTrumpSpouses, [
    EX.IvanaTrump, EX.MarlaMaples, EX.MelaniaTrump
])
g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))
 
print(g.serialize(format='turtle'))
g.serialize(destination='s02_Donald_Trump_spouses_list.ttl', format='turtle')
 
print(g.serialize(format='turtle'))
</syntaxhighlight>
 
==Lecture 7: SHACL==
The examples are for use with the [https://shacl.org/playground/ interactive SHACL Playground].
The examples are for use with the [https://shacl.org/playground/ interactive SHACL Playground].


Line 197: Line 337:
</syntaxhighlight>
</syntaxhighlight>


==S07: RDFS==
==Lecture 7: RDFS==
Create a new Blazegraph Namespace ''with inference'' and ''Use'' it.
Create a new Blazegraph Namespace ''with inference'' and ''Use'' it.


Line 263: Line 403:
Back in the ''Query'' tab, rerun the SPARQL queries to check that the new SW paper is now an ''ss:Paper'' and ''foaf:Document'', but not a ''kg:MainPaper''.
Back in the ''Query'' tab, rerun the SPARQL queries to check that the new SW paper is now an ''ss:Paper'' and ''foaf:Document'', but not a ''kg:MainPaper''.


==S10: OWL-DL==
==Lecture 10: OWL-DL==
Here is the OWL file with the minimal Protégé / HermiT example from S10. Save it with the ''.owl'' suffix, for example ''s10-example.owl'', and ''File -> Open'' it in Protégé.
Here is the OWL file with the minimal Protégé / HermiT example from S10. Save it with the ''.owl'' suffix, for example ''s10-example.owl'', and ''File -> Open'' it in Protégé.
<syntaxhighlight lang='owl'>
<syntaxhighlight lang='owl'>
Line 334: Line 474:
</syntaxhighlight>
</syntaxhighlight>


==S11: Graph embeddings==
==Lecture 11: Graph embeddings==
Here is the example that used graph embeddings from ConceptNet. The ''numbernatch-en-19.08.txt.gz'' file can be [https://github.com/commonsense/conceptnet-numberbatch downloaded from GitHub].
Here is the example that used graph embeddings from ConceptNet. The ''numbernatch-en-19.08.txt.gz'' file can be [https://github.com/commonsense/conceptnet-numberbatch downloaded from GitHub].



Revision as of 10:58, 10 January 2024

Examples from the lectures

Lecture 1: Introduction to KGs

Turtle example:

@prefix ex: <http://example.org/> .
ex:Roger_Stone
    ex:name "Roger Stone" ;
    ex:occupation ex:lobbyist ;
    ex:significant_person ex:Donald_Trump .
ex:Donald_Trump
    ex:name "Donald Trump" .

Lecture 2: RDF

Blank nodes for anonymity, or when we have not decided on a URI:

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)  # this is why the line '@prefix ex: <http://example.org/> .'
                  # and the 'ex.' prefix are used when we print out Turtle later

robertMueller = BNode()
g.add((robertMueller, RDF.type, EX.Human))
g.add((robertMueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((robertMueller, EX.position_held, Literal('Director of the Federal Bureau of Investigation', lang='en')))

print(g.serialize(format='turtle'))

Blank nodes used to group related properties:

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

# This is a task in Exercise 2

print(g.serialize(format='turtle'))

Literals:

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

g.add((EX.Robert_Mueller, RDF.type, EX.Human))
g.add((EX.Robert_Mueller, FOAF.name, Literal('Robert Mueller', lang='en')))
g.add((EX.Robert_Mueller, FOAF.name, Literal('رابرت مولر', lang='fa')))
g.add((EX.Robert_Mueller, DC.description, Literal('sixth director of the FBI', datatype=XSD.string)))
g.add((EX.Robert_Mueller, EX.start_time, Literal(2001, datatype=XSD.integer)))

print(g.serialize(format='turtle'))

Alternative container (open):

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

muellerReportArchives = BNode()
g.add((muellerReportArchives, RDF.type, RDF.Alt))

archive1 = 'https://archive.org/details/MuellerReportVolume1Searchable/' \
                    'Mueller%20Report%20Volume%201%20Searchable/'
archive2 = 'https://edition.cnn.com/2019/04/18/politics/full-mueller-report-pdf/index.html'
archive3 = 'https://www.politico.com/story/2019/04/18/mueller-report-pdf-download-text-file-1280891'

g.add((muellerReportArchives, RDFS.member, Literal(archive1, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive2, datatype=XSD.anyURI)))
g.add((muellerReportArchives, RDFS.member, Literal(archive3, datatype=XSD.anyURI)))

g.add((EX.Mueller_Report, RDF.type, FOAF.Document))
g.add((EX.Mueller_Report, DC.contributor, EX.Robert_Mueller))
g.add((EX.Mueller_Report, SCHEMA.archivedAt, muellerReportArchives))

print(g.serialize(format='turtle'))

Sequence container (open):

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

donaldTrumpSpouses = BNode()
g.add((donaldTrumpSpouses, RDF.type, RDF.Seq))
g.add((donaldTrumpSpouses, RDF._1, EX.IvanaTrump))
g.add((donaldTrumpSpouses, RDF._2, EX.MarlaMaples))
g.add((donaldTrumpSpouses, RDF._3, EX.MelaniaTrump))

g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))

print(g.serialize(format='turtle'))

Collection (closed list):

from rdflib import Graph, Namespace, Literal, BNode, RDF, RDFS, DC, FOAF, XSD

EX = Namespace('http://example.org/')

g = Graph()
g.bind('ex', EX)

from rdflib.collection import Collection

g = Graph()
g.bind('ex', EX)

donaldTrumpSpouses = BNode()
Collection(g, donaldTrumpSpouses, [
    EX.IvanaTrump, EX.MarlaMaples, EX.MelaniaTrump
])
g.add((EX.Donald_Trump, SCHEMA.spouse, donaldTrumpSpouses))

print(g.serialize(format='turtle'))
g.serialize(destination='s02_Donald_Trump_spouses_list.ttl', format='turtle')

print(g.serialize(format='turtle'))

Lecture 7: SHACL

The examples are for use with the interactive SHACL Playground.

Minimal example

First shape graph:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape  .

kg:MainPaperYearShape
        sh:path kg:year .

First data graph:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:LOD_Paper
    a kg:MainPaper ;
    dcterm:title "Linked Data - The Story so Far" .

This should not give a validation error.

Alternative shape graph

Keep the prefixes from the first examples. You can also write the property constraint as an anyonymous node like this:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year 
    ] .

This is equivalent to the previous example (no validation error).

Cardinality constraints on properties

Add a minimum constraint to the kg:year property:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1
    ] .

Now you should get a validation error.

  • In the data graph, add two kg:year properties (e.g., 2008 and 2009) to kg_LOD_Paper to get rid of the error.
  • In the shapes graph, add a sh:maxCount 1 constraint to get another validation error.
  • In the data graph, remove one kg:year property value from kg:LOD_Paper to get rid of the error (2009 is the right year).

Datatype constraint on literal property values

Add the following property constraint to the previous example:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:minCount 1 ;
        sh:datatype xsd:integer
    ] .

This should not give a validation error.

  • In the shapes graph, change the datatype constraint to sh:datatype xsd:year. Now you should get an error.
  • In the data graph, change the integer 2009 to the typed value "2009"^^xsd:year to get rid of the error.

Class and node kind constraints

Add the following property constraints to the shape graph, either with a URI like this:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape, MainPaperContributorShape  

...

kg:MainPaperContributorShape
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI .

Or like this:

kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property [
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:maxCount 1 ;
        sh:datatype xsd:year 
    ], [
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI 
    ] .

Either way should give you a validation error.

  • In the data graph, add dcterm:contributor "T. Berners-Lee" to get rid of the cardinality error. The error goes away, but you get two new ones instead.
  • In the data graph, add dcterm:contributor [ a kg:MainAuthor ; foaf:name "T. Berners-Lee" ] to get rid of the error. The class error goes away, but the IRI error remains.
  • In the data graph, create an IRI for Tim-Berners Lee to resolve the error:
    ...
    dcterm:contributor kg:TBL .

kg:TBL
    a kg:MainAuthor;
    foaf:name "T. Berners-Lee" .


Full example

Final shape graph from the lecture:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:MainPaperShape
    a sh:NodeShape ;
    sh:targetClass kg:MainPaper ;
    sh:property kg:MainPaperYearShape, kg:MainPaperContributorShape .

kg:MainPaperYearShape
        sh:path kg:year ;
        sh:minCount 1 ;
        sh:maxCount 1 ;
        sh:datatype xsd:year .

kg:MainPaperContributorShape
        sh:path dcterm:contributor ;
        sh:minCount 1 ;
        sh:class kg:MainAuthor ;
        sh:nodeKind sh:IRI .

Final data graph from the lecture:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix dcterm: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix kg: <http://i2s.uib.no/kg4news/> .
@prefix th: <http://i2s.uib.no/kg4news/theme/> .
@prefix ss: <http://semanticscholar.org/> .


kg:LOD_Paper
    a kg:MainPaper ;
    dcterm:title "Linked Data - The Story so Far" ;
    kg:year "2006"^^xsd:year ;
    dcterm:contributor kg:TBL, kg:CB .

kg:TBL
    a kg:MainAuthor;
    foaf:name "T. Berners-Lee" .
 
kg:CB
     a kg:MainAuthor;
     foaf:name "C. Bizer" .

Lecture 7: RDFS

Create a new Blazegraph Namespace with inference and Use it.

rdfs:subClassOf

In the Update tab, insert the Final data graph from the SHACL example above. (Select Type: RDF data and Format: Turtle.)

Go to the Query tab, and SELECT all papers of type kg:MainPaper.

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sh: <http://www.w3.org/ns/shacl#>
PREFIX dcterm: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX th: <http://i2s.uib.no/kg4news/theme/>
PREFIX ss: <http://semanticscholar.org/>

SELECT ?paper WHERE {
    ?paper rdf:type kg:MainPaper
}

Change the query to select all papers first of type ss:Paper and then of type foaf:Document.

In the Update tab:

kg:MainPaper rdfs:subClassOf ss:Paper .

Back in the Query tab, rerun the query to select all papers first of type ss:Paper and then of type foaf:Document.

Transitive rdfs:subClassOf

In the Update tab, add the following RDFS rule:

ss:Paper rdfs:subClassOf foaf:Document .

Back in the Query tab, rerun the query to select all papers first of type ss:Paper and then of type foaf:Document.

rdfs:range

In the Update tab, add another paper, which is cited (referenced) by the LOD paper:

kg:SW_Paper 
    dcterm:title "The Semantic Web" ;
    dcterm:contributor kg:TBL, kg:JH, kg:OL .

kg:JH
    foaf:name "J. Hendler" .

kg:OL
    foaf:name "O. Lassila" .

kg:LOD_Paper kg:cites kg:SW_Paper  # the new SW paper is cited by the LOD paper

Note that we have not stated the RDF types of the new paper and new authors.

In the Query tab, rerun the SPARQL queries to check that kg:SW_Paper has no rdf:type.

In the Update tab add this RDFS rule:

kg:cites rdfs:range ss:Paper .  # any paper that is cited by a kg:MainPaper is an ss:Paper

Back in the Query tab, rerun the SPARQL queries to check that the new SW paper is now an ss:Paper and foaf:Document, but not a kg:MainPaper.

Lecture 10: OWL-DL

Here is the OWL file with the minimal Protégé / HermiT example from S10. Save it with the .owl suffix, for example s10-example.owl, and File -> Open it in Protégé.

<?xml version="1.0"?>
<rdf:RDF xmlns="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#"
     xml:base="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3"
     xmlns:owl="http://www.w3.org/2002/07/owl#"
     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     xmlns:xml="http://www.w3.org/XML/1998/namespace"
     xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
     xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
     xmlns:untitled-ontology-3="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#">
    <owl:Ontology rdf:about="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3"/>

    <!-- 
    ///////////////////////////////////////////////////////////////////////////////////////
    //
    // Object Properties
    //
    ///////////////////////////////////////////////////////////////////////////////////////
     -->

    <!-- http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#hasSource -->
    <owl:ObjectProperty rdf:about="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#hasSource"/>

    <!-- 
    ///////////////////////////////////////////////////////////////////////////////////////
    //
    // Classes
    //
    ///////////////////////////////////////////////////////////////////////////////////////
     -->

    <!-- http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#Content -->
    <owl:Class rdf:about="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#Content"/>

    <!-- http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#Source -->
    <owl:Class rdf:about="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#Source">
        <owl:equivalentClass>
            <owl:Restriction>
                <owl:onProperty>
                    <rdf:Description>
                        <owl:inverseOf rdf:resource="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#hasSource"/>
                    </rdf:Description>
                </owl:onProperty>
                <owl:someValuesFrom rdf:resource="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#Content"/>
            </owl:Restriction>
        </owl:equivalentClass>
    </owl:Class>

    <!-- 
    ///////////////////////////////////////////////////////////////////////////////////////
    //
    // Individuals
    //
    ///////////////////////////////////////////////////////////////////////////////////////
     -->

    <!-- http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#NYTimes -->
    <owl:NamedIndividual rdf:about="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#NYTimes"/>

    <!-- http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#article_about_Trump_rally -->
    <owl:NamedIndividual rdf:about="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#article_about_Trump_rally">
        <rdf:type rdf:resource="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#Content"/>
        <hasSource rdf:resource="http://www.semanticweb.org/sinoa/ontologies/2023/2/untitled-ontology-3#NYTimes"/>
    </owl:NamedIndividual>
</rdf:RDF>

<!-- Generated by the OWL API (version 4.5.9.2019-02-01T07:24:44Z) https://github.com/owlcs/owlapi -->

Lecture 11: Graph embeddings

Here is the example that used graph embeddings from ConceptNet. The numbernatch-en-19.08.txt.gz file can be downloaded from GitHub.

Precomputed pickle files are available here (requires UiB login).

import pickle

import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors


NUMBERBATCH_FILE= './conceptnet/numberbatch-en-19.08.txt'
EMBEDDINGS_FILE = f'./conceptnet/numberbatch-embeddings.pickle'
N_NEIGHBOURS = 5
NUMBERBATCH_NEIGHBOURS_FILE = f'./conceptnet/numberbatch-neighbours-{N_NEIGHBOURS}.pickle'


# load numberbatch
reload = False  # set False first time you run, or if you already have the pickle file
if not reload:
    embeddings = {}
    with open(NUMBERBATCH_FILE) as file:
        n_terms, n_dims = map(int, file.readline().strip().split(' '))
        for line in file: 
            splits = line.strip().split(' ')
            embeddings[splits[0]] = np.array(list(map(float, splits[1:])))
    with open(EMBEDDINGS_FILE, 'wb') as file:
        pickle.dump(embeddings, file)
else:
    with open(EMBEDDINGS_FILE, 'rb') as file:
        embeddings = pickle.load(file)

# find nearest neighbours
reload = False  # set False first time you run, or if you already have the pickle file
if not reload:
    knn = NearestNeighbors(n_neighbors=N_NEIGHBOURS, algorithm='ball_tree')
    np_embeddings = np.array(list(embeddings.values()))
    knn.fit(np_embeddings)
    with open(NUMBERBATCH_NEIGHBOURS_FILE, 'wb') as file:
        pickle.dump(knn, file)
else:
    with open(NUMBERBATCH_NEIGHBOURS_FILE, 'rb') as file:
        knn = pickle.load(file)
     

# test
distances, indexes = knn.kneighbors([embeddings['bergen']])
for dist, idx in zip(distances[0], indexes[0]):
    print(f'{dist}:\t{list(embeddings.keys())[idx]}')


def vector_neighbours(vector):
    distances, indexes = knn.kneighbors([vector])
    for dist, idx in zip(distances[0], indexes[0]):
        print(f'{dist}:\t{list(embeddings.keys())[idx]}')


vector_neighbours(embeddings['oslo'] - embeddings['norway'] + embeddings['france'])