'''
Created on 2020-08-22
@author: wf
'''
from enum import Enum
import os
import yaml
from tabulate import tabulate
import urllib
import copy
#from wikibot.mwTable import MediaWikiTable
# redundant copy in this library to avoid dependency issues
# original is at
from lodstorage.jsonable import JSONAble
from lodstorage.mwTable import MediaWikiTable
from pylatexenc.latexencode import unicode_to_latex
import re
import sys
from pathlib import Path
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters.html import HtmlFormatter
from pygments.formatters.latex import LatexFormatter
[docs]class YamlPath:
[docs] @staticmethod
def getPaths(yamlFileName:str,yamlPath:str=None):
if yamlPath is None:
yamlPath = f"{os.path.dirname(__file__)}/../sampledata/{yamlFileName}"
yamlPaths=[yamlPath]
home = str(Path.home())
# additional yamls from users yaml configuration
homepath = f"{home}/.pylodstorage/{yamlFileName}"
if os.path.isfile(homepath):
yamlPaths.append(homepath)
return yamlPaths
[docs]class QuerySyntaxHighlight:
'''
Syntax highlighting for queries with pygments
'''
def __init__(self,query,highlightFormat:str="html"):
'''
construct me for the given query and highlightFormat
Args:
query(Query): the query to do the syntax highlighting for
highlightFormat(str): the highlight format to be used
'''
self.query=query
self.highlightFormat=highlightFormat
self.lexer=get_lexer_by_name(self.query.lang)
if self.highlightFormat=="html":
self.formatter=HtmlFormatter()
elif self.highlightFormat=="latex":
self.formatter=LatexFormatter()
[docs] def highlight(self):
'''
Returns:
str: the result of the syntax highlighting with pygments
'''
syntaxResult=highlight(self.query.query,self.lexer, self.formatter)
return syntaxResult
[docs]class QueryResultDocumentation():
'''
documentation of a query result
'''
def __init__(self,query,title:str,tablefmt:str,tryItMarkup:str,sourceCodeHeader:str,sourceCode:str,resultHeader:str,result:str):
'''
constructor
Args:
query(Query): the query to be documented
title(str): the title markup
tablefmt(str): the tableformat that has been used
tryItMarkup: the "try it!" markup to show
sourceCodeHeader(str): the header title to use for the sourceCode
sourceCode(str): the sourceCode
resultCodeHeader(str): the header title to use for the result
result(str): the result header
'''
self.query=query
self.title=title
self.tablefmt=tablefmt
self.tryItMarkup=f"\n{tryItMarkup}"
self.sourceCodeHeader=sourceCodeHeader
self.sourceCode=sourceCode
self.resultHeader=resultHeader
self.result=result
[docs] @staticmethod
def uniCode2Latex(text:str,withConvert:bool=False)->str:
'''
converts unicode text to latex and
fixes UTF-8 chars for latex in a certain range:
₀:$_0$ ... ₉:$_9$
see https://github.com/phfaist/pylatexenc/issues/72
Args:
text(str): the string to fix
withConvert(bool): if unicode to latex libary conversion should be used
Return:
str: latex presentation of UTF-8 char
'''
for code in range(8320,8330):
text=text.replace(chr(code),f"$_{code-8320}$")
if withConvert:
latex=unicode_to_latex(text)
# workaround {\textbackslash} being returned
#latex=latex.replace("{\\textbackslash}",'\\')
text=latex
return text
def __str__(self):
'''
simple string representation
'''
return self.asText()
[docs] def asText(self):
'''
return my text representation
Returns:
str: description, sourceCodeHeader, sourceCode, tryIt link and result table
'''
text=f"{self.title}\n{self.query.description}\n{self.sourceCodeHeader}\n{self.sourceCode}{self.tryItMarkup}\n{self.resultHeader}\n{self.result}"
fixedStr=self.uniCode2Latex(text) if self.tablefmt.lower()=="latex" else text
return fixedStr
[docs]class Query(object):
''' a Query e.g. for SPAQRL '''
def __init__(self,name:str,query:str,lang='sparql',endpoint:str=None,title:str=None,description:str=None,prefixes=None,tryItUrl:str=None,formats:list=None,debug=False):
'''
constructor
Args:
name(string): the name/label of the query
query(string): the native Query text e.g. in SPARQL
lang(string): the language of the query e.g. SPARQL
endpoint(string): the endpoint url to use
title(string): the header/title of the query
description(string): the description of the query
prefixes(list): list of prefixes to be resolved
tryItUrl(str): the url of a "tryit" webpage
formats(list): key,value pairs of ValueFormatters to be applied
debug(boolean): true if debug mode should be switched on
'''
self.name=name
self.query=query
self.lang=lang
self.endpoint=endpoint
self.title=title=name if title is None else title
self.description="" if description is None else description
self.prefixes=prefixes
self.debug=debug
self.tryItUrl=tryItUrl
self.formats=formats
self.formatCallBacks=[]
def __str__(self):
queryStr="\n".join([f"{key}:{value}" for key, value in self.__dict__.items() if value is not None])
return f"{queryStr}"
[docs] def getTryItUrl(self,baseurl:str):
'''
return the "try it!" url for the given baseurl
Args:
baseurl(str): the baseurl to used
Returns:
str: the "try it!" url for the given query
'''
# https://stackoverflow.com/a/9345102/1497139
quoted=urllib.parse.quote(self.query)
quoted=f"#{quoted}"
url=f"{baseurl}/{quoted}"
return url
[docs] def getLink(self,url,title,tablefmt):
'''
convert the given url and title to a link for the given tablefmt
Args:
url(str): the url to convert
title(str): the title to show
tablefmt(str): the table format to use
'''
# create a safe url
if url is None:
return ""
markup=f"{title}:{url}"
if tablefmt=="mediawiki":
markup=f"[{url} {title}]"
elif tablefmt=="github":
markup=f"[{title}]({url})"
elif tablefmt=="latex":
markup=r"\href{%s}{%s}" % (url,title)
return markup
[docs] def prefixToLink(self,lod:list,prefix:str,tablefmt:str):
'''
convert url prefixes to link according to the given table format
TODO - refactor as preFormat callback
Args:
lod(list): the list of dicts to convert
prefix(str): the prefix to strip
tablefmt(str): the tabulate tableformat to use
'''
for record in lod:
for key in record.keys():
value=record[key]
if value is not None and isinstance(value,str) and value.startswith(prefix):
item=value.replace(prefix,"")
uqitem=urllib.parse.unquote(item)
if tablefmt=="latex":
link=uqitem
else:
link=self.getLink(value,uqitem,tablefmt)
record[key]=link
[docs] def asYaml(self):
yamlMarkup=yaml.dump(self)
return yamlMarkup
[docs] def asWikiSourceMarkup(self):
'''
convert me to Mediawiki markup for syntax highlighting using the "source" tag
Returns:
string: the Markup
'''
markup="<source lang='%s'>\n%s\n</source>\n" %(self.lang,self.query)
return markup
[docs] def asWikiMarkup(self,listOfDicts):
'''
convert the given listOfDicts result to MediaWiki markup
Args:
listOfDicts(list): the list of Dicts to convert to MediaWiki markup
Returns:
string: the markup
'''
if self.debug:
print(listOfDicts)
mwTable=MediaWikiTable()
mwTable.fromListOfDicts(listOfDicts)
markup=mwTable.asWikiMarkup()
return markup
[docs] def documentQueryResult(self,qlod:list,limit=None,tablefmt:str="mediawiki",tryItUrl:str=None,withSourceCode=True,**kwArgs):
'''
document the given query results - note that a copy of the whole list is going to be created for being able to format
Args:
qlod: the list of dicts result
limit(int): the maximum number of records to display in result tabulate
tablefmt(str): the table format to use
tryItUrl: the "try it!" url to show
withSourceCode(bool): if True document the source code
Return:
str: the documentation tabular text for the given parameters
'''
sourceCode=self.query
tryItMarkup=""
sourceCodeHeader=""
resultHeader=""
title=self.title
if limit is not None:
lod=copy.deepcopy(qlod[:limit])
else:
lod=copy.deepcopy(qlod)
self.preFormatWithCallBacks(lod,tablefmt=tablefmt)
self.formatWithValueFormatters(lod,tablefmt=tablefmt)
result=tabulate(lod,headers="keys",tablefmt=tablefmt,**kwArgs)
if tryItUrl is None and hasattr(self,'tryItUrl'):
tryItUrl=self.tryItUrl
if tablefmt=="github":
title=f"## {self.title}"
resultHeader="## result"
elif tablefmt=="mediawiki":
title=f"== {self.title} =="
resultHeader="=== result ==="
elif tablefmt=="latex":
resultHeader=""
result=r"""\begin{table}
\caption{%s}
\label{tab:%s}
%s
\end{table}
""" % (self.title,self.name,result)
else:
title=f"{self.title}"
resultHeader="result:"
if withSourceCode:
tryItUrlEncoded=self.getTryItUrl(tryItUrl)
tryItMarkup=self.getLink(tryItUrlEncoded, "try it!", tablefmt)
if tablefmt=="github":
sourceCodeHeader="### query"
sourceCode=f"""```{self.lang}
{self.query}
```"""
elif tablefmt=="mediawiki":
sourceCodeHeader="=== query ==="
sourceCode=f"""<source lang='{self.lang}'>
{self.query}
</source>
"""
elif tablefmt=="latex":
sourceCodeHeader=r"see query listing \ref{listing:%s} and result table \ref{tab:%s}" % (self.name,self.name)
sourceCode=r"""\begin{listing}[ht]
\caption{%s}
\label{listing:%s}
\begin{minted}{%s}
%s
\end{minted}
%s
\end{listing}
""" % (self.title,self.name,self.lang.lower(),self.query,tryItMarkup)
else:
sourceCodeHeader="query:"
sourceCode=f"{self.query}"
if self.lang!="sparql":
tryItMarkup=""
queryResultDocumentation=QueryResultDocumentation(query=self,title=title,tablefmt=tablefmt,tryItMarkup=tryItMarkup,sourceCodeHeader=sourceCodeHeader,sourceCode=sourceCode,resultHeader=resultHeader,result=result)
return queryResultDocumentation
[docs]class QueryManager(object):
'''
manages pre packaged Queries
'''
def __init__(self,lang:str=None,debug=False,queriesPath=None):
'''
Constructor
Args:
lang(string): the language to use for the queries sql or sparql
debug(boolean): True if debug information should be shown
'''
if lang is None:
lang='sql'
self.queriesByName={}
self.lang=lang
self.debug=debug
queries=QueryManager.getQueries(queriesPath=queriesPath)
for name,queryDict in queries.items():
if self.lang in queryDict:
queryText=queryDict.pop(self.lang)
for qformat in ['sparql', 'sql']: # drop not needed query variants
if qformat in queryDict:
queryDict.pop(qformat)
query=Query(name=name,query=queryText,lang=self.lang,**queryDict,debug=self.debug)
self.queriesByName[name]=query
[docs] @staticmethod
def getQueries(queriesPath=None):
'''
get the queries for thee given queries Path
'''
queriesPaths=YamlPath.getPaths("queries.yaml", queriesPath)
queries={}
for queriesPath in queriesPaths:
with open(queriesPath, 'r') as stream:
lqueries = yaml.safe_load(stream)
for key in lqueries:
queries[key]=lqueries[key]
return queries
[docs]class EndpointManager(object):
"""
manages a set of SPARQL endpoints
"""
[docs] @staticmethod
def getEndpoints(endpointPath=None):
'''
get the queries for thee given queries Path
'''
endpointPaths=YamlPath.getPaths("endpoints.yaml",endpointPath)
endpoints={}
for lEndpointPath in endpointPaths:
with open(lEndpointPath, 'r') as stream:
endpointRecords = yaml.safe_load(stream)
for name, record in endpointRecords.items():
endpoint=Endpoint()
endpoint.fromDict({"name": name, **record})
endpoints[name]=endpoint
return endpoints
[docs] @staticmethod
def getEndpointNames(endpointPath=None) -> list:
"""
Returns a list of all available endpoint names
"""
endpoints = EndpointManager.getEndpoints(endpointPath)
return list(endpoints.keys())
[docs]class Endpoint(JSONAble):
"""
a query endpoint
"""
[docs] @staticmethod
def getSamples():
samples=[
{
"name": "wikidata",
"lang": "sparql",
"endpoint": "https://query.wikidata.org/sparql",
"method": "POST",
"prefixes": "PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>"
}
]
return samples
def __init__(self):
'''
constructor for setting defaults
'''
self.method="POST"
self.lang="SPARQL"
def __str__(self):
'''
Returns:
str: a string representation of this Endpoint
'''
text=f"{self.name}({self.name}):{self.endpoint}({self.method})"
return text