SPARQL Examples: Difference between revisions

From info216
No edit summary
Line 216: Line 216:
     ?paper dct:contributor kg:auth_456
     ?paper dct:contributor kg:auth_456
}
}
</syntaxhighlight>
== Lab 3: SPARQL ==
===List all triples===
<syntaxhighlight lang="SPARQL">
SELECT ?s ?p ?o
WHERE {?s ?p ?o .}
</syntaxhighlight>
===List the first 100 triples===
<syntaxhighlight lang="SPARQL">
SELECT ?s ?p ?o
WHERE {?s ?p ?o .}
LIMIT 100
</syntaxhighlight>
===Count the number of triples===
<syntaxhighlight lang="SPARQL">
SELECT (COUNT(*) as ?count)
WHERE {?s ?p ?o .}
</syntaxhighlight>
===Count the number of indictments===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
SELECT (COUNT(?ind) as ?amount)
WHERE {
  ?s ns1:outcome ?ind;
    ns1:outcome ns1:indictment.
}
</syntaxhighlight>
===List the names of everyone who pleaded guilty, along with the name of the investigation===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
SELECT ?name ?invname
WHERE {
  ?s ns1:name ?name;
    ns1:investigation ?invname;
    ns1:outcome ns1:guilty-plea .
}
</syntaxhighlight>
===List the names of everyone who were convicted, but who had their conviction overturned by which president===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
SELECT ?name ?president
WHERE {
  ?s ns1:name ?name;
    ns1:president ?president;
    ns1:outcome ns1:conviction;
    ns1:overturned ns1:true.
}
</syntaxhighlight>
===For each investigation, list the number of indictments made===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
SELECT ?invs (COUNT(?invs) as ?count)
WHERE {
  ?s ns1:investigation ?invs;
    ns1:outcome ns1:indictment .
}
GROUP BY ?invs
</syntaxhighlight>
===For each investigation with multiple indictments, list the number of indictments made===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
SELECT ?invs (COUNT(?invs) as ?count)
WHERE {
  ?s ns1:investigation ?invs;
    ns1:outcome ns1:indictment .
}
GROUP BY ?invs
HAVING(?count > 1)
</syntaxhighlight>
===For each investigation with multiple indictments, list the number of indictments made, sorted with the most indictments first===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
SELECT ?invs (COUNT(?invs) as ?count)
WHERE {
  ?s ns1:investigation ?invs;
    ns1:outcome ns1:indictment .
}
GROUP BY ?invs
HAVING(?count > 1)
ORDER BY DESC(?count)
</syntaxhighlight>
===For each president, list the numbers of convictions and of pardons made===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
SELECT ?president (COUNT(?outcome) as ?conviction) (COUNT(?pardon) as ?pardons)
WHERE {
  ?s ns1:president ?president;
    ns1:outcome ?outcome ;
    ns1:outcome ns1:conviction.
    OPTIONAL{
        ?s ns1:pardoned ?pardon .
        FILTER (?pardon = ns1:true)
    }
}
GROUP BY ?president
</syntaxhighlight>
===Rename mullerkg:name to something like muellerkg:person===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
DELETE{?s ns1:name ?o}
INSERT{?s ns1:person ?o}
WHERE {?s ns1:name ?o}
</syntaxhighlight>
===Update the graph so all the investigated person and president nodes become the subjects in foaf:name triples with the corresponding strings===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
#Persons
INSERT {?person foaf:name ?name}
WHERE {
    ?investigation ns1:person ?person .
    BIND(REPLACE(STR(?person), STR(ns1:), "") AS ?name)
}
#Presidents
INSERT {?president foaf:name ?name}
WHERE {
    ?investigation ns1:president ?president .
    BIND(REPLACE(STR(?president), STR(ns1:), "") AS ?name)
}
</syntaxhighlight>
===Use INSERT DATA updates to add these triples===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
INSERT DATA {
    ns1:George_Papadopoulos ns1:adviserTo ns1:Donald_Trump;
        ns1:pleadGuiltyTo ns1:LyingToFBI;
        ns1:sentencedTo ns1:Prison.
       
    ns1:Roger_Stone a ns1:Republican;
        ns1:adviserTo ns1:Donald_Trump;
        ns1:officialTo ns1:Trump_Campaign;
        ns1:interactedWith ns1:Wikileaks;
        ns1:providedTestimony ns1:House_Intelligence_Committee;
        ns1:clearedOf ns1:AllCharges.
}
#To test if added
SELECT ?p ?o
WHERE {ns1:Roger_Stone ?p ?o .}
</syntaxhighlight>
===Use DELETE DATA and then INSERT DATA updates to correct that Roger Stone was cleared of all charges===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
DELETE DATA {
    ns1:Roger_Stone ns1:clearedOf ns1:AllCharges .
}
INSERT DATA {
    ns1:Roger_Stone ns1:indictedFor ns1:ObstructionOfJustice,
                                    ns1:WitnessTampering,
                                    ns1:FalseStatements.
}
#The task specifically requested DELETE DATA & INSERT DATA, put below is a more efficient solution
DELETE{ns1:Roger_Stone ns1:clearedOf ns1:AllCharges.}
INSERT{
  ns1:Roger_Stone ns1:indictedFor ns1:ObstructionOfJustice,
                                  ns1:WitnessTampering,
                                  ns1:FalseStatements.
}
WHERE{ns1:Roger_Stone ns1:clearedOf ns1:AllCharges.}
</syntaxhighlight>
===Use a DESCRIBE query to show the updated information about Roger Stone===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
DESCRIBE ?o
WHERE {ns1:Roger_Stone ns1:indictedFor ?o .}
</syntaxhighlight>
===Use a CONSTRUCT query to create a new RDF group with triples only about Roger Stone===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
CONSTRUCT {
  ns1:Roger_Stone ?p ?o.
  ?s ?p2 ns1:Roger_Stone.
}
WHERE {
  ns1:Roger_Stone ?p ?o .
  ?s ?p2 ns1:Roger_Stone
}
</syntaxhighlight>
===Write a DELETE/INSERT statement to change one of the prefixes in your graph===
<syntaxhighlight lang="SPARQL">
PREFIX ns1: <http://example.org#>
PREFIX dbp: <https://dbpedia.org/page/>
DELETE {?s ns1:person ?o1}
INSERT {?s ns1:person ?o2}
WHERE{
  ?s ns1:person ?o1 .
  BIND (IRI(replace(str(?o1), str(ns1:), str(dbp:)))  AS ?o2)
}
#This update changes the object in triples with ns1:person as the predicate. It changes it's prefix of ns1 (which is the "shortcut/shorthand" for example.org) to the prefix dbp (dbpedia.org)
</syntaxhighlight>
===Write an INSERT statement to add at least one significant date to the Mueller investigation, with literal type xsd:date. Write a DELETE/INSERT statement to change the date to a string, and a new DELETE/INSERT statement to change it back to xsd:date. ===
<syntaxhighlight lang="SPARQL">
#Whilst this solution is not exactly what the task asks for, I feel like this is more appropiate given the dataset. The following update
changes the objects that uses the cp_date as predicate from a URI, to a literal with date as it's datatype
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX ns1: <http://example.org#>
DELETE {?s ns1:cp_date ?o}
INSERT{?s ns1:cp_date ?o3}
WHERE{
  ?s ns1:cp_date ?o .
  BIND (replace(str(?o), str(ns1:), "")  AS ?o2)
  BIND (STRDT(STR(?o2), xsd:date) AS ?o3)
}
#To test:
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX ns1: <http://example.org#>
SELECT ?s ?o
WHERE{
  ?s ns1:cp_date ?o.
  FILTER(datatype(?o) = xsd:date)
}
#To change it to an integer, use the following code, and to change it back to date, swap "xsd:integer" to "xsd:date"
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX ns1: <http://example.org#>
DELETE {?s ns1:cp_date ?o}
INSERT{?s ns1:cp_date ?o2}
WHERE{
  ?s ns1:cp_date ?o .
  BIND (STRDT(STR(?o), xsd:integer) AS ?o2)
}
</syntaxhighlight>
</syntaxhighlight>

Revision as of 16:42, 13 February 2023

Lecture 3:SPARQL

The KG4News knowledge graph can be accessed here (namespace kb) (read-only). To test updates, you can run your own Blazegraph server or try the I2S sandbox.

Limit

SELECT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10

List distinct properties only (with limit)

SELECT DISTINCT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10

Limit with offset

SELECT DISTINCT ?p WHERE {
    ?s ?p ?o .
}
LIMIT 10 OFFSET 9

List rdf:types

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT DISTINCT ?t WHERE {
    ?s rdf:type ?t .
}
LIMIT 50

URI for Tim Berners-Lee

PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT DISTINCT ?person WHERE {
    ?person foaf:name ?name .  
 
    FILTER(CONTAINS(?name, "Berners-Lee"))
}
LIMIT 10  # best to use limit if something goes wrong

Describe Tim Berners-Lee

DESCRIBE <http://i2s.uib.no/kg4news/author/1432678629>

DESCRIBE returns a new RDF graph, whereas SELECT returns a table of rows.

Papers that mention "Semantic Web" in the title

PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?paper ?title WHERE {
    ?paper ss:title ?title .  
 
    FILTER(CONTAINS(STR(?title), "Semantic Web"))
}
LIMIT 50

Alternative filter that ignores capitalisation (lower/upper case)

    FILTER(REGEX(STR(?title), "Semantic Web", "i"))

Authors sorted by name

PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT DISTINCT * WHERE {
    ?author foaf:name ?name .  
}
ORDER BY ?name
LIMIT 10

Sorted by descending name instead

ORDER BY DESC(?name)

Count papers by author

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?author (COUNT(?paper) AS ?count) WHERE {
    ?author rdf:type ss:Author .  
    ?paper  rdf:type ss:Paper ;
            dct:contributor ?author .
}
GROUP BY ?author
LIMIT 10

Only list the most prolific authors

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?author (COUNT(?paper) AS ?count) WHERE {
    ?author rdf:type ss:Author .  
    ?paper  rdf:type ss:Paper ;
            dct:contributor ?author .
}
GROUP BY ?author
HAVING (?count >= 10)  # similar to a filter expression
LIMIT 10               # include limit when you test

Order by descending paper count

SELECT ... {
    ...
}
GROUP BY ?person
HAVING (?count > 10)
ORDER BY DESC(?count)
LIMIT 10

Order by descending paper count and then by author name

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX ss: <http://semanticscholar.org/>

SELECT DISTINCT ?person (SAMPLE(?name) AS ?name) (COUNT(?paper) AS ?count) WHERE {
    ?person rdf:type ss:Author ;
            foaf:name ?name .  
    ?paper  rdf:type ss:Paper ;
            ss:title ?title ;
            dct:contributor ?person .
}
GROUP BY ?person
HAVING (?count > 10)
ORDER BY DESC(?count)
LIMIT 10

Embedded Wikidata query

This is a toy example only. Embedded queries like these are better suited for situations where the same URIs are used in more than one triple store and you want to combine data. But Wikidata and KG4News do not use the same URIs. So instead, the example searches for similar labels, and this is something graph databases may not be optimised for. Moreover, Wikidata uses language-tagged strings whereas KG4News uses plain strings, so the labels cannot even be directly compared.

PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT DISTINCT ?enname ?person ?wdperson WHERE {
    BIND("T. Berners-Lee"@en AS ?enname)  # language-tagger label

    SERVICE <https://query.wikidata.org/bigdata/namespace/wdq/sparql> {
        # return a Wikidata identifier (URI) with this label as alternative
        SELECT ?wdperson ?enname WHERE {
             ?wdperson skos:altLabel ?enname .
        }
        LIMIT 10  # we use limit in case the label does not match
    }
            
    BIND(STR(?enname) AS ?name)  # the same label, but with language tag removed
    # return a KG4News identifier (URI) with this label as name
    ?person foaf:name ?name .  

}
LIMIT 10

Add one or more triples

From now on you need a Blazegraph that allows writing, for example the I2S sandbox. Remember to create a new namespace first and make sure you use it afterwards.

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX ss: <http://semanticscholar.org/>

INSERT DATA {  # note the Turtle-like syntax
    kg:paper_123 rdf:type ss:Paper ;
            ss:title "Semantic Knowledge Graphs for the News: A Review"@en ;
            kg:year 2022 ;
            dct:contributor kg:auth_456, kg:auth_789 . 
}

Remove one or more triples

PREFIX kg: <http://i2s.uib.no/kg4news/>

DELETE DATA
{
    kg:paper_123 kg:year 2022  .
}

Pattern-based addition and or removal of triples

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX kg: <http://i2s.uib.no/kg4news/>
PREFIX ss: <http://semanticscholar.org/>

DELETE DATA {    
    ?paper dct:contributor kg:auth_456
}
INSERT DATA {    
    ?paper dct:contributor kg:auth_654
}
WHERE {  # the patterns are similar to SELECT patterns
    ?paper dct:contributor kg:auth_456
}

Lab 3: SPARQL

List all triples

SELECT ?s ?p ?o
WHERE {?s ?p ?o .}

List the first 100 triples

SELECT ?s ?p ?o
WHERE {?s ?p ?o .} 
LIMIT 100

Count the number of triples

SELECT (COUNT(*) as ?count)
WHERE {?s ?p ?o .}

Count the number of indictments

PREFIX ns1: <http://example.org#> 

SELECT (COUNT(?ind) as ?amount) 
WHERE {
  ?s ns1:outcome ?ind;
     ns1:outcome ns1:indictment.
}

List the names of everyone who pleaded guilty, along with the name of the investigation

PREFIX ns1: <http://example.org#> 

SELECT ?name ?invname 
WHERE {
  ?s ns1:name ?name;
     ns1:investigation ?invname;
     ns1:outcome ns1:guilty-plea .
}

List the names of everyone who were convicted, but who had their conviction overturned by which president

PREFIX ns1: <http://example.org#> 

SELECT ?name ?president
WHERE {
  ?s ns1:name ?name;
     ns1:president ?president;
     ns1:outcome ns1:conviction;
     ns1:overturned ns1:true.
}

For each investigation, list the number of indictments made

PREFIX ns1: <http://example.org#> 

SELECT ?invs (COUNT(?invs) as ?count)
WHERE {
  ?s ns1:investigation ?invs;
     ns1:outcome ns1:indictment .
} 
GROUP BY ?invs

For each investigation with multiple indictments, list the number of indictments made

PREFIX ns1: <http://example.org#> 

SELECT ?invs (COUNT(?invs) as ?count)
WHERE {
  ?s ns1:investigation ?invs;
     ns1:outcome ns1:indictment .
} 
GROUP BY ?invs
HAVING(?count > 1)

For each investigation with multiple indictments, list the number of indictments made, sorted with the most indictments first

PREFIX ns1: <http://example.org#> 

SELECT ?invs (COUNT(?invs) as ?count)
WHERE {
  ?s ns1:investigation ?invs;
     ns1:outcome ns1:indictment .
} 
GROUP BY ?invs
HAVING(?count > 1)
ORDER BY DESC(?count)

For each president, list the numbers of convictions and of pardons made

PREFIX ns1: <http://example.org#> 

SELECT ?president (COUNT(?outcome) as ?conviction) (COUNT(?pardon) as ?pardons)
WHERE {
  ?s ns1:president ?president;
     ns1:outcome ?outcome ;
     ns1:outcome ns1:conviction.
     OPTIONAL{
        ?s ns1:pardoned ?pardon .
        FILTER (?pardon = ns1:true)
     }
}
GROUP BY ?president

Rename mullerkg:name to something like muellerkg:person

PREFIX ns1: <http://example.org#> 

DELETE{?s ns1:name ?o}
INSERT{?s ns1:person ?o}
WHERE {?s ns1:name ?o}

Update the graph so all the investigated person and president nodes become the subjects in foaf:name triples with the corresponding strings

PREFIX ns1: <http://example.org#> 
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

#Persons
INSERT {?person foaf:name ?name}
WHERE {
     ?investigation ns1:person ?person .
     BIND(REPLACE(STR(?person), STR(ns1:), "") AS ?name)
}

#Presidents
INSERT {?president foaf:name ?name}
WHERE {
     ?investigation ns1:president ?president .
     BIND(REPLACE(STR(?president), STR(ns1:), "") AS ?name)
}

Use INSERT DATA updates to add these triples

PREFIX ns1: <http://example.org#> 

INSERT DATA {
    ns1:George_Papadopoulos ns1:adviserTo ns1:Donald_Trump;
        ns1:pleadGuiltyTo ns1:LyingToFBI;
        ns1:sentencedTo ns1:Prison.
        
    ns1:Roger_Stone a ns1:Republican;
        ns1:adviserTo ns1:Donald_Trump;
        ns1:officialTo ns1:Trump_Campaign;
        ns1:interactedWith ns1:Wikileaks;
        ns1:providedTestimony ns1:House_Intelligence_Committee;
        ns1:clearedOf ns1:AllCharges.
}

#To test if added
SELECT ?p ?o
WHERE {ns1:Roger_Stone ?p ?o .}

Use DELETE DATA and then INSERT DATA updates to correct that Roger Stone was cleared of all charges

PREFIX ns1: <http://example.org#> 

DELETE DATA {
     ns1:Roger_Stone ns1:clearedOf ns1:AllCharges .
}

INSERT DATA {
     ns1:Roger_Stone ns1:indictedFor ns1:ObstructionOfJustice,
                                     ns1:WitnessTampering, 
                                     ns1:FalseStatements.
}

#The task specifically requested DELETE DATA & INSERT DATA, put below is a more efficient solution

DELETE{ns1:Roger_Stone ns1:clearedOf ns1:AllCharges.}
INSERT{
  ns1:Roger_Stone ns1:indictedFor ns1:ObstructionOfJustice,
                                  ns1:WitnessTampering,
                                  ns1:FalseStatements.
}
WHERE{ns1:Roger_Stone ns1:clearedOf ns1:AllCharges.}

Use a DESCRIBE query to show the updated information about Roger Stone

PREFIX ns1: <http://example.org#> 

DESCRIBE ?o
WHERE {ns1:Roger_Stone ns1:indictedFor ?o .}

Use a CONSTRUCT query to create a new RDF group with triples only about Roger Stone

PREFIX ns1: <http://example.org#> 

CONSTRUCT {
  ns1:Roger_Stone ?p ?o.
  ?s ?p2 ns1:Roger_Stone.
}
WHERE {
  ns1:Roger_Stone ?p ?o .
  ?s ?p2 ns1:Roger_Stone
}

Write a DELETE/INSERT statement to change one of the prefixes in your graph

PREFIX ns1: <http://example.org#> 
PREFIX dbp: <https://dbpedia.org/page/>

DELETE {?s ns1:person ?o1} 
INSERT {?s ns1:person ?o2} 
WHERE{
  ?s ns1:person ?o1 .
  BIND (IRI(replace(str(?o1), str(ns1:), str(dbp:)))  AS ?o2)
}

#This update changes the object in triples with ns1:person as the predicate. It changes it's prefix of ns1 (which is the "shortcut/shorthand" for example.org) to the prefix dbp (dbpedia.org)

Write an INSERT statement to add at least one significant date to the Mueller investigation, with literal type xsd:date. Write a DELETE/INSERT statement to change the date to a string, and a new DELETE/INSERT statement to change it back to xsd:date.

#Whilst this solution is not exactly what the task asks for, I feel like this is more appropiate given the dataset. The following update
changes the objects that uses the cp_date as predicate from a URI, to a literal with date as it's datatype 

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 
PREFIX ns1: <http://example.org#> 

DELETE {?s ns1:cp_date ?o}
INSERT{?s ns1:cp_date ?o3}
WHERE{
  ?s ns1:cp_date ?o .
  BIND (replace(str(?o), str(ns1:), "")  AS ?o2)
  BIND (STRDT(STR(?o2), xsd:date) AS ?o3)
}

#To test:

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 
PREFIX ns1: <http://example.org#> 

SELECT ?s ?o 
WHERE{
  ?s ns1:cp_date ?o.
  FILTER(datatype(?o) = xsd:date)
}

#To change it to an integer, use the following code, and to change it back to date, swap "xsd:integer" to "xsd:date" 

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 
PREFIX ns1: <http://example.org#> 

DELETE {?s ns1:cp_date ?o}
INSERT{?s ns1:cp_date ?o2}
WHERE{
  ?s ns1:cp_date ?o .
  BIND (STRDT(STR(?o), xsd:integer) AS ?o2)
}