'''
Created on 2022-03-4
@author: wf
'''
import unittest
from lodstorage.trulytabular import TrulyTabular, WikidataItem, WikidataProperty
from lodstorage.query import Query, QuerySyntaxHighlight
from lodstorage.sparql import SPARQL
[docs]class TestTrulyTabular(unittest.TestCase):
'''
test Truly tabular analysis
'''
[docs] def setUp(self):
self.debug=False
pass
[docs] def tearDown(self):
pass
[docs] def testGetFirst(self):
'''
test the get First helper function
'''
tt=TrulyTabular("Q2020153")
testcases=[
{
"qlod":[{"name":"firstname"}],
"expected": "firstname"
},
{
"qlod":[],
"expected": None
},
{
"qlod":[{"name":"firstname"},{"name":"second name"}],
"expected": None
}
]
for testcase in testcases:
qLod=testcase["qlod"]
expected=testcase["expected"]
try:
value=tt.sparql.getFirst(qLod,"name")
self.assertEqual(expected,value)
except Exception as ex:
if self.debug:
print(str(ex))
self.assertIsNone(expected)
[docs] def documentQuery(self,tt,query,show=True,formats=["mediawiki"]):
'''
document the given query for the given TrueTabular instance
'''
qlod=tt.sparql.queryAsListOfDicts(query.query)
for tablefmt in formats:
tryItUrl="https://query.wikidata.org/"
doc=query.documentQueryResult(qlod, tablefmt=tablefmt,tryItUrl=tryItUrl,floatfmt=".0f")
docstr=doc.asText()
if show:
print (docstr)
[docs] def testGetPropertiesByLabel(self):
'''
try getting properties by label
'''
debug=self.debug
#debug=True
propertyLabels=["title","country","location"]
tt=TrulyTabular("Q2020153",propertyLabels=propertyLabels)
if debug:
print (tt.properties)
for prop in propertyLabels:
self.assertTrue(prop in tt.properties)
[docs] def testGetPropertiesById(self):
'''
try getting properties by label
'''
debug=self.debug
#debug=True
propertyIds=["P1800"]
expected=["Wikimedia database name"]
sparql=SPARQL(TrulyTabular.endpoint)
propList=WikidataProperty.getPropertiesByIds(sparql, propertyIds, lang="en")
for i,prop in enumerate(propList):
if debug:
print(f"{i}:{prop}")
self.assertEqual(prop,expected[i])
[docs] def testGetItemsByLabel(self):
'''
try getting items by label
'''
#debug=self.debug
debug=True
qLabels=["academic conference","scientific conference series","whisky distillery","human"]
sparql=SPARQL(TrulyTabular.endpoint)
items={}
for qLabel in qLabels:
items4Label=WikidataItem.getItemsByLabel(sparql, qLabel)
for item in items4Label:
if debug:
print(item)
items[qLabel]=items4Label[0]
for qLabel in qLabels:
self.assertTrue(qLabel in items)
[docs] def testTrulyTabularTables(self):
'''
test Truly Tabular for different tabular queries
'''
debug=self.debug
#debug=True
show=False
showStats=["mediawiki","github","latex"]
tables=[
{
"name": "computer scientist",
"title": "humans with the occupation computer scientist",
"qid":"Q5", # human
"where": "?item wdt:P106 wd:Q82594.", # computer scientist only
"propertyLabels": ["sex or gender","date of birth","place of birth","field of work","occupation","ORCID iD",
"GND ID","DBLP author ID","Google Scholar author ID","VIAF ID"],
"expected": 10
},
{
"name": "academic conferences",
"title": "academic conferences",
"qid": "Q2020153",# academic conference
"propertyLabels":["title","country","location","short name","start time",
"end time","part of the series","official website","described at URL",
"WikiCFP event ID","GND ID","VIAF ID","main subject","language used",
"is proceedings from"
],
"expected": 7500
},
{
"name": "scientific conferences series",
"title": "scientific conference series",
"qid": "Q47258130", # scientific conference series
"propertyLabels":["title","short name","inception","official website","DBLP venue ID","GND ID",
"Microsoft Academic ID","Freebase ID","WikiCFP conference series ID",
"Publons journals/conferences ID","ACM conference ID"],
"expected": 4200
},
{
"name": "whisky distilleries",
"title": "whisky distilleries",
"qid": "Q10373548", # whisky distillery
"propertyLabels":["inception","official website","owned by","country","headquarters location","Whiskybase distillery ID"],
"expected": 200
}
]
errors=0
for table in tables[3:]:
# academic conference
where=None
if "where" in table:
where=table["where"]
tt=TrulyTabular(table["qid"],table["propertyLabels"],where=where,debug=debug)
if "is proceedings from" in tt.properties:
tt.properties["is proceedings from"].reverse=True
count=tt.count()
if (debug):
print(count)
self.assertTrue(count>table["expected"])
stats=tt.getPropertyStatistics()
# sort descending by total percentage
stats = sorted(stats, key=lambda row: row['total%'],reverse=True)
for tablefmt in showStats:
query=Query(name=table["name"],title=table["title"],query="")
doc=query.documentQueryResult(stats, tablefmt=tablefmt, withSourceCode=False)
print(doc)
if show:
for wdProperty in tt.properties.values():
for asFrequency in [True,False]:
query=tt.noneTabularQuery(wdProperty,asFrequency=asFrequency)
try:
self.documentQuery(tt, query)
except Exception as ex:
print(f"query for {wdProperty} failed\n{str(ex)}")
errors+=1
self.assertEqual(0,errors)
[docs] def testMostFrequentProperties(self):
'''
test getting the most frequent properties for some Wikidata Item types
'''
#show=True
show=False
debug=self.debug
#debug=True
for qid in ["Q6256"]:
tt=TrulyTabular(qid,debug=debug)
query=tt.mostFrequentPropertiesQuery()
self.documentQuery(tt, query,formats=["github"],show=show)
[docs] def testSyntaxHighlighting(self):
'''
https://github.com/WolfgangFahl/pyLoDStorage/issues/81
'''
#debug=self.debug
debug=True
qid="Q6256" # country
tt=TrulyTabular(qid,debug=debug)
query=tt.mostFrequentPropertiesQuery()
sh=QuerySyntaxHighlight(query,"html")
html=sh.highlight()
if debug:
print(html)
self.assertTrue('<span class="k">SELECT</span>' in html)
pass
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()