Module pedspell
Expand source code
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import signal, os, time, string, pickle, sys
from threading import Timer
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk
from gi.repository import GObject
from pedlib import pedconfig
from pedlib import leven
from pedlib.pedutil import *
index2 = []; index3 = []; index4 = []
userdic = []
# Punctuation characters (most all non-alphanumeric chars)
# Note the space at the beginning
punctuation = " ,./<>?;\':[]\{}|=-`!@#$%^&*()_+\""
# Benchmark time
#got_clock = 0
# ------------------------------------------------------------------------
# Spell buffer. We read the spell.txt file into a python array.
# Lower case it, and we create an index to the letters aa-zz. (double letter)
# On search, we look at the first 2 letters and search the index for offset
# in the array. This loads on 300 msec on an average system, and spell
# checks the current context (of an average python file) in 5-15 msec.
# Not bad for a 200+ thousand word dictionary. Without the index the
# check took 3.00 full seconds. The spell operates on the idle timer,
# within the next N msec ticks. (configurable)
# We spell check the strings and comments only. The algorithm used to
# select the checkable portion of the file is similar to the algorithm
# used in coloring. Speed versus intelligence, so 'backslash quote' will
# fool the speller into thinking it is code.
# As the speller's presence is advisory, no harm is done. You can
# cooperate in your strings with the speller by using single quote in
# double quote strings and vice-versa. One useful trick is to escape
# the offending quote. Note, that if it is simpler for the editor,
# it is simpler to the compiler, and to the maintainer / reader.
document = None
def spell(self2, allflag = False):
#global got_clock
global document
document = self2
# Profile line, use it on bottlenecks
#ck = time.clock()
if not self2.spell:
if len(self2.ularr):
self2.ularr = []
self2.invalidate()
return
if pedconfig.conf.pgdebug > 3:
print( "spell started", allflag)
self2.ularr = []
try:
errcnt = 0
xlen = len(self2.text); cnt = self2.ypos
yyy = self2.ypos + self2.get_height() / self2.cyy
# Contain checking to visible range
while True:
if cnt >= xlen: break
if cnt >= yyy: break
line = self2.text[cnt]
if allflag:
# Spell all of it
err = spell_line(line, 0, len(line))
for ss, ee in err:
self2.ularr.append((ss, cnt, ee))
errcnt += 1
else:
# Comments
got = 0; doit = 0
ssss = line.find('"')
ccc = line.find("#");
if ccc < 0:
ccc = line.find("//")
# See if comment precedes quote (if any)
if ssss >= 0:
if ssss > ccc:
doit = True
if ccc >= 0 or doit == True:
got = True
ccc2 = calc_tabs(line, ccc)
err = spell_line(line, ccc, len(line))
for ss, ee in err:
self2.ularr.append((ss, cnt, ee))
#print( ss, ee, cnt, "'" + line[ss:ee] + "'")
errcnt += 1
if not got:
# Locate strings
qqq = 0
while True:
quote = '"'
sss = qqq
qqq = line.find(quote, qqq);
if qqq < 0:
# See if single quote is found
qqq = line.find("'", sss);
if qqq >= 0:
quote = "'"
if qqq >= 0:
qqq += 1
qqqq = line.find(quote, qqq)
if qqqq >= 0:
qqq -= self2.xpos
qqq2 = calc_tabs(line, qqq)
err = spell_line(line, qqq, qqqq)
for ss, ee in err:
self2.ularr.append((ss, cnt, ee))
#print( ss, ee, cnt, "'" + line[ss:ee] + "'")
errcnt += 1
qqq = qqqq + 1
else:
break
else:
break
cnt += 1
self2.invalidate()
#print( self2.ularr)
#self2.mained.update_statusbar("%d spelling mistakes." % errcnt)
except:
print("Exception on spell check", sys.exc_info())
#raise
#print( "all", time.clock() - got_clock)
# ------------------------------------------------------------------------
# Ret an array of error misspelled x,y coord for this line
def spell_line(line, beg, end):
if pedconfig.conf.pgdebug > 6:
print( "spell_line", line[beg:end])
err = []; idx = beg
while True:
if idx >= end: break
idx = xnextchar2(line, punctuation, idx)
#ss, ee = selword(line, idx)
ss, ee = selasci(line, idx)
found = spell_word(line[ss:ee])
if not found:
found = spell_user(line[ss:ee])
# Communicate it to upper layers
if not found:
err.append((ss, ee))
idx = ee + 1
if err:
pass
#print( "spell_line", line[beg:end])
#for aa, bb in err:
# print( line[aa:bb],)
#print()
return err
# ------------------------------------------------------------------------
# Return True if word found
def spell_word(word):
if pedconfig.conf.pgdebug > 9:
print( "spell word", "'" + word + "'")
if len(word) <= 1: # Do not spell short words
return True
if word[0] == "#": # Hex Numbers, hashes
return True
if word[0] >= "0" and word[0] <= "9": # Numbers
return True
lw = word.lower().strip().lstrip()
global index2, index3
# Pre read index
if len(index2) == 0:
build_index()
found = False
# Locate start and end
cnt = 0; sss = 0; eee = 0
for bb, bbb in index3:
if lw[0:2] == bb:
sss = bbb
eee = index3[cnt+1][1]
break
cnt += 1
#print( "start, end", sss, eee)
# Finally, search within limits
while True:
if sss >= eee: break
if index2[sss] == lw:
#print( "found", lw, word)
found = True
break
sss += 1
return found
# -----------------------------------------------------------------------
# If not found in the the regular dictionary, see if it is in user's
def spell_user(word):
if pedconfig.conf.pgdebug > 5:
print("Spell user", word)
# Load / re-load
global userdic, document
if (len(userdic) == 0) or (document.newword == True):
document.newword = False
load_user_dict()
if len(word) <= 1: # Do not spell short words
return True
if word[0] == "#": # Hex Numbers, hashes
return True
found = False
lw = word.lower().strip().lstrip()
for aa in userdic:
if aa == lw:
found = True
break
return found
def append_user_dict(self2, arg):
ret = True
if pedconfig.conf.pgdebug > 5:
print( "user add dict", arg)
lw = arg.lower()
xfile = pedconfig.conf.config_dir + os.sep + "userdict.txt"
try:
fd = open(xfile, "a+")
except:
print("Cannot open user dictionary.", sys.exc_info())
return False
try:
fd.write(lw); fd.write("\n")
except:
print("Cannot write to user dictionary.", sys.exc_info())
ret = False
fd.close()
return ret
# ------------------------------------------------------------------------
# User dictionary. Crafted to be as simple as possible, no indexing etc ..
# We do not expect the user dictionary to grow beyond tens of words.
# Note: the dictionary reloads as we add new words.
def load_user_dict():
global userdic, document
xfile = os.path.join(pedconfig.conf.config_dir, "userdict.txt")
if pedconfig.conf.pgdebug > 5:
print( "load_user_dict", xfile)
# No dictionary yet
if not os.path.isfile(xfile):
return
try:
fd = open(xfile, "rt")
except:
print("Cannot open user dictionary", sys.exc_info())
return
userdic = []
while True :
try:
line = fd.readline(128)
line = line.replace("\r", ""); line = line.replace("\n", "");
if line == "":
break
if pedconfig.conf.pgdebug > 9:
print( "load dict:", "'" + line + "'")
userdic.append(line)
except:
print( "Cannot read user dictionary", sys.exc_info())
break
fd.close()
# ------------------------------------------------------------------------
def build_index():
global index2, index3
global document
#document.mained.update_statusbar("Loading dictionary")
if(pedconfig.conf.verbose):
print ("Loading dictionary", get_exec_path("spell.txt"))
# It is an msdos file (don't ask)
index = readfile(get_exec_path("spell.txt"))
#print (index[0:10])
#return []
for ww in index:
index2.append(ww.lower())
#print( index2[0:10])
pprev = ""; prev = ""; prevs = ""; cnt = 0
for ii in index2:
if len(ii) >= 2:
# The dictionary contained some intl chars, filter ascii
if str(ii[0]) <= "z" and str(ii[0]) >= "0":
if str(ii[1]) <= "z" and str(ii[1]) >= "0":
ss = ii[0:2]
if ss != prev:
#print ("idx:", ss , cnt, ii)
index3.append((ss, cnt))
prev = ss
# Do single index as well
sss = ii[0:1]
if sss != prevs:
index4.append((sss, cnt))
prevs = sss
cnt += 1
# End Marker
index3.append((" ", cnt))
#print ("index3", index3)
#global got_clock
#print( "building idx", time.clock() - got_clock)
#document.mained.update_statusbar(" ")
# ------------------------------------------------------------------------
def suggest(self2, xstr):
got_clock = time.clock()
#print( "Suggest", "'" + xstr + "'")
cntx = 0
lw = xstr.lower()
global index2, index3, index4
# Pre read index
if len(index2) == 0:
build_index()
arr = []
# Locate start and end
cnt = 0; sss = 0; eee = 0
# single index
'''for bb, bbb in index4:
if lw[0:1] == bb:
sss = bbb
eee = index4[cnt+1][1]
break
cnt += 1'''
# double index
for bb, bbb in index3:
if lw[0:2] == bb:
sss = bbb
eee = index3[cnt+1][1]
break
cnt += 1
#print( "start, end", sss, eee)
#print( index2[sss], index2[eee])
# Finally, search within limits
while True:
if sss >= eee: break
if cntx > 100: break
ret = leven.Distance(index2[sss], lw)
if ret < 4:
arr.append((ret, index2[sss]));
cntx += 1
sss += 1
#print( "suggest", time.clock() - got_clock)
arr.sort()
return arr[:15]
# EOF
Functions
def append_user_dict(self2, arg)
-
Expand source code
def append_user_dict(self2, arg): ret = True if pedconfig.conf.pgdebug > 5: print( "user add dict", arg) lw = arg.lower() xfile = pedconfig.conf.config_dir + os.sep + "userdict.txt" try: fd = open(xfile, "a+") except: print("Cannot open user dictionary.", sys.exc_info()) return False try: fd.write(lw); fd.write("\n") except: print("Cannot write to user dictionary.", sys.exc_info()) ret = False fd.close() return ret
def build_index()
-
Expand source code
def build_index(): global index2, index3 global document #document.mained.update_statusbar("Loading dictionary") if(pedconfig.conf.verbose): print ("Loading dictionary", get_exec_path("spell.txt")) # It is an msdos file (don't ask) index = readfile(get_exec_path("spell.txt")) #print (index[0:10]) #return [] for ww in index: index2.append(ww.lower()) #print( index2[0:10]) pprev = ""; prev = ""; prevs = ""; cnt = 0 for ii in index2: if len(ii) >= 2: # The dictionary contained some intl chars, filter ascii if str(ii[0]) <= "z" and str(ii[0]) >= "0": if str(ii[1]) <= "z" and str(ii[1]) >= "0": ss = ii[0:2] if ss != prev: #print ("idx:", ss , cnt, ii) index3.append((ss, cnt)) prev = ss # Do single index as well sss = ii[0:1] if sss != prevs: index4.append((sss, cnt)) prevs = sss cnt += 1 # End Marker index3.append((" ", cnt)) #print ("index3", index3) #global got_clock #print( "building idx", time.clock() - got_clock) #document.mained.update_statusbar(" ")
def load_user_dict()
-
Expand source code
def load_user_dict(): global userdic, document xfile = os.path.join(pedconfig.conf.config_dir, "userdict.txt") if pedconfig.conf.pgdebug > 5: print( "load_user_dict", xfile) # No dictionary yet if not os.path.isfile(xfile): return try: fd = open(xfile, "rt") except: print("Cannot open user dictionary", sys.exc_info()) return userdic = [] while True : try: line = fd.readline(128) line = line.replace("\r", ""); line = line.replace("\n", ""); if line == "": break if pedconfig.conf.pgdebug > 9: print( "load dict:", "'" + line + "'") userdic.append(line) except: print( "Cannot read user dictionary", sys.exc_info()) break fd.close()
def spell(self2, allflag=False)
-
Expand source code
def spell(self2, allflag = False): #global got_clock global document document = self2 # Profile line, use it on bottlenecks #ck = time.clock() if not self2.spell: if len(self2.ularr): self2.ularr = [] self2.invalidate() return if pedconfig.conf.pgdebug > 3: print( "spell started", allflag) self2.ularr = [] try: errcnt = 0 xlen = len(self2.text); cnt = self2.ypos yyy = self2.ypos + self2.get_height() / self2.cyy # Contain checking to visible range while True: if cnt >= xlen: break if cnt >= yyy: break line = self2.text[cnt] if allflag: # Spell all of it err = spell_line(line, 0, len(line)) for ss, ee in err: self2.ularr.append((ss, cnt, ee)) errcnt += 1 else: # Comments got = 0; doit = 0 ssss = line.find('"') ccc = line.find("#"); if ccc < 0: ccc = line.find("//") # See if comment precedes quote (if any) if ssss >= 0: if ssss > ccc: doit = True if ccc >= 0 or doit == True: got = True ccc2 = calc_tabs(line, ccc) err = spell_line(line, ccc, len(line)) for ss, ee in err: self2.ularr.append((ss, cnt, ee)) #print( ss, ee, cnt, "'" + line[ss:ee] + "'") errcnt += 1 if not got: # Locate strings qqq = 0 while True: quote = '"' sss = qqq qqq = line.find(quote, qqq); if qqq < 0: # See if single quote is found qqq = line.find("'", sss); if qqq >= 0: quote = "'" if qqq >= 0: qqq += 1 qqqq = line.find(quote, qqq) if qqqq >= 0: qqq -= self2.xpos qqq2 = calc_tabs(line, qqq) err = spell_line(line, qqq, qqqq) for ss, ee in err: self2.ularr.append((ss, cnt, ee)) #print( ss, ee, cnt, "'" + line[ss:ee] + "'") errcnt += 1 qqq = qqqq + 1 else: break else: break cnt += 1 self2.invalidate() #print( self2.ularr) #self2.mained.update_statusbar("%d spelling mistakes." % errcnt) except: print("Exception on spell check", sys.exc_info()) #raise #print( "all", time.clock() - got_clock)
def spell_line(line, beg, end)
-
Expand source code
def spell_line(line, beg, end): if pedconfig.conf.pgdebug > 6: print( "spell_line", line[beg:end]) err = []; idx = beg while True: if idx >= end: break idx = xnextchar2(line, punctuation, idx) #ss, ee = selword(line, idx) ss, ee = selasci(line, idx) found = spell_word(line[ss:ee]) if not found: found = spell_user(line[ss:ee]) # Communicate it to upper layers if not found: err.append((ss, ee)) idx = ee + 1 if err: pass #print( "spell_line", line[beg:end]) #for aa, bb in err: # print( line[aa:bb],) #print() return err
def spell_user(word)
-
Expand source code
def spell_user(word): if pedconfig.conf.pgdebug > 5: print("Spell user", word) # Load / re-load global userdic, document if (len(userdic) == 0) or (document.newword == True): document.newword = False load_user_dict() if len(word) <= 1: # Do not spell short words return True if word[0] == "#": # Hex Numbers, hashes return True found = False lw = word.lower().strip().lstrip() for aa in userdic: if aa == lw: found = True break return found
def spell_word(word)
-
Expand source code
def spell_word(word): if pedconfig.conf.pgdebug > 9: print( "spell word", "'" + word + "'") if len(word) <= 1: # Do not spell short words return True if word[0] == "#": # Hex Numbers, hashes return True if word[0] >= "0" and word[0] <= "9": # Numbers return True lw = word.lower().strip().lstrip() global index2, index3 # Pre read index if len(index2) == 0: build_index() found = False # Locate start and end cnt = 0; sss = 0; eee = 0 for bb, bbb in index3: if lw[0:2] == bb: sss = bbb eee = index3[cnt+1][1] break cnt += 1 #print( "start, end", sss, eee) # Finally, search within limits while True: if sss >= eee: break if index2[sss] == lw: #print( "found", lw, word) found = True break sss += 1 return found
def suggest(self2, xstr)
-
Expand source code
def suggest(self2, xstr): got_clock = time.clock() #print( "Suggest", "'" + xstr + "'") cntx = 0 lw = xstr.lower() global index2, index3, index4 # Pre read index if len(index2) == 0: build_index() arr = [] # Locate start and end cnt = 0; sss = 0; eee = 0 # single index '''for bb, bbb in index4: if lw[0:1] == bb: sss = bbb eee = index4[cnt+1][1] break cnt += 1''' # double index for bb, bbb in index3: if lw[0:2] == bb: sss = bbb eee = index3[cnt+1][1] break cnt += 1 #print( "start, end", sss, eee) #print( index2[sss], index2[eee]) # Finally, search within limits while True: if sss >= eee: break if cntx > 100: break ret = leven.Distance(index2[sss], lw) if ret < 4: arr.append((ret, index2[sss])); cntx += 1 sss += 1 #print( "suggest", time.clock() - got_clock) arr.sort() return arr[:15]