Sample Implementation: sync-notes¶
Below is a sample client-side implementation of a pysyncml client. Please note that it uses some of the more advanced features of pysyncml, and may therefore appear overwhelming. For a simpler general guide to implementing client-side SyncML adapters with pysyncml, please see the Implementing a SyncML Client guide.
Approach¶
The sync-notes program maintains the synchronization of a set of files in a given directory with a remote “Note” storage SyncML server. When launched, it scans the directory for any changes, such as new files, deleted files, or modified files and reports those changes to the local pysyncml.Context.Adapter. Then (and at user option), it synchronizes with a potentially pre-configured remote SyncML peer.
Code¶
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 #------------------------------------------------------------------------------ 4 # file: $Id: notes.py 34 2012-07-03 02:48:00Z griff1n $ 5 # lib: pysyncml.cli.notes 6 # auth: griffin <griffin@uberdev.org> 7 # date: 2012/05/19 8 # copy: (C) CopyLoose 2012 UberDev <hardcore@uberdev.org>, No Rights Reserved. 9 #------------------------------------------------------------------------------ 10 11 import sys, os, re, time, uuid, hashlib, logging, getpass, pysyncml 12 from optparse import OptionParser 13 from elementtree import ElementTree as ET 14 import sqlalchemy 15 from sqlalchemy import orm 16 from sqlalchemy.ext.declarative import declarative_base, declared_attr 17 from sqlalchemy.orm import sessionmaker 18 from sqlalchemy.orm.exc import NoResultFound 19 20 log = logging.getLogger(__name__) 21 22 #------------------------------------------------------------------------------ 23 class DatabaseObject(object): 24 # todo: is having a "global" Note._db really the best way?... 25 _db = None 26 @declared_attr 27 def __tablename__(cls): 28 return cls.__name__.lower() 29 id = sqlalchemy.Column(sqlalchemy.Integer, autoincrement=True, primary_key=True) 30 31 DatabaseObject = declarative_base(cls=DatabaseObject) 32 33 #------------------------------------------------------------------------------ 34 class Note(DatabaseObject, pysyncml.NoteItem): 35 # note: attributes inherited from NoteItem: id, extensions, name, body 36 # attributes then overriden by DatabaseObject (i hope): id 37 # and then attributes overriden here: name 38 # note: the `deleted` attribute exists only to ensure ID's are not recycled 39 # ugh. i need a better solution to that... 40 inode = sqlalchemy.Column(sqlalchemy.Integer, index=True) 41 name = sqlalchemy.Column(sqlalchemy.String) 42 sha256 = sqlalchemy.Column(sqlalchemy.String(64)) 43 deleted = sqlalchemy.Column(sqlalchemy.Boolean) 44 @classmethod 45 def q(cls, deleted=False, **kw): 46 if deleted is None: 47 return DatabaseObject._db.query(cls).filter_by(**kw) 48 return DatabaseObject._db.query(cls).filter_by(deleted=deleted, **kw) 49 def __init__(self, *args, **kw): 50 self.deleted = False 51 DatabaseObject.__init__(self, *args, **kw) 52 # TODO: check this... 53 # NOTE: not calling NoteItem.__init__ as it can conflict with the 54 # sqlalchemy stuff done here... 55 # todo: is this really necessary?... 56 skw = dict() 57 skw.update(kw) 58 for key in self.__table__.c.keys(): 59 if key in skw: 60 del skw[key] 61 pysyncml.Ext.__init__(self, *args, **skw) 62 @orm.reconstructor 63 def __dbinit__(self): 64 # note: not calling ``NoteItem.__init__`` - see ``__init__`` notes. 65 pysyncml.Ext.__init__(self) 66 def __str__(self): 67 return 'Note "%s"' % (self.name,) 68 def __repr__(self): 69 return '<Note "%s": inode=%s; sha256=%s>' \ 70 % (self.name, '-' if self.inode is None else str(self.inode), 71 self.sha256) 72 def dump(self, stream, contentType, version, rootdir): 73 # TODO: convert this to a .body @property... 74 with open(os.path.join(rootdir, self.name), 'rb') as fp: 75 self.body = fp.read() 76 pysyncml.NoteItem.dump(self, stream, contentType, version) 77 self.body = None 78 return self 79 @classmethod 80 def load(cls, stream, contentType=None, version=None): 81 base = pysyncml.NoteItem.load(stream, contentType, version) 82 if contentType == pysyncml.TYPE_TEXT_PLAIN: 83 # remove special characters, windows illegal set: \/:*?"<>| 84 base.name = re.sub(r'[^a-zA-Z0-9,_+=!@#$%^&() -]+', '', base.name) 85 # collapse white space and replace with '_' 86 base.name = re.sub(r'\s+', '_', base.name) + '.txt' 87 ret = Note(name=base.name, sha256=hashlib.sha256(base.body).hexdigest()) 88 # temporarily storing the content in "body" attribute (until addItem() 89 # is called) 90 ret.body = base.body 91 return ret 92 93 #------------------------------------------------------------------------------ 94 def hashstream(hash, stream): 95 while True: 96 buf = stream.read(8192) 97 if len(buf) <= 0: 98 break 99 hash.update(buf) 100 return hash 101 102 #------------------------------------------------------------------------------ 103 class FilesystemNoteAgent(pysyncml.BaseNoteAgent): 104 105 #---------------------------------------------------------------------------- 106 def __init__(self, root, index, options, ignoreRoot=None, ignoreAll=None, 107 syncstore=None, *args, **kw): 108 super(FilesystemNoteAgent, self).__init__(*args, **kw) 109 self.rootdir = root 110 self.index = index 111 self.options = options 112 self.ignoreRoot = re.compile(ignoreRoot) if ignoreRoot is not None else None 113 self.ignoreAll = re.compile(ignoreAll) if ignoreAll is not None else None 114 self.dbengine = sqlalchemy.create_engine('sqlite:///%s%s' % (root, index)) 115 self.db = sessionmaker(bind=self.dbengine)() 116 # TODO: how to detect if my schema has changed?... 117 if not os.path.isfile('%s%s' % (root, index)): 118 DatabaseObject.metadata.create_all(self.dbengine) 119 # todo: is having a global really the best way?... 120 DatabaseObject._db = self.db 121 if syncstore is not None: 122 self.scan(syncstore) 123 124 # TODO: adding this for funambol-compatibility (to remove multiple "VerCT" nodes)... 125 self.contentTypes = [ 126 pysyncml.ContentTypeInfo(pysyncml.TYPE_SIF_NOTE, '1.1', True), 127 pysyncml.ContentTypeInfo(pysyncml.TYPE_SIF_NOTE, '1.0'), 128 # pysyncml.ContentTypeInfo(pysyncml.TYPE_TEXT_PLAIN, ['1.1', '1.0']), 129 pysyncml.ContentTypeInfo(pysyncml.TYPE_TEXT_PLAIN, '1.0'), 130 ] 131 132 #---------------------------------------------------------------------------- 133 def scan(self, store): 134 # todo: this scan assumes that the note index (not the bodies) will 135 # comfortably fit in memory... this is probably a good assumption, 136 # but ideally it would not need to depend on that. 137 reg = dict() 138 if store.peer is not None: 139 reg = dict((c.itemID, c.state) for c in store.peer.getRegisteredChanges()) 140 self._scandir('.', store, reg) 141 self._scanindex(store, reg) 142 143 #---------------------------------------------------------------------------- 144 def _scanindex(self, store, reg): 145 # IMPORTANT: this assumes that _scandir has completed and that all moved 146 # files have been recorded, etc. this function then searches 147 # for deleted files... 148 # TODO: this is somewhat of a simplistic algorithm... this comparison 149 # should be done at the same time as the dirwalk to detect more 150 # complex changes such as: files "a" and "b" are synced. then 151 # "a" is deleted and "b" is moved to "a"... 152 # the current algorithm would incorrectly record that as a non-syncing 153 # change to "b", and "a" would not be deleted. 154 for note in Note.q(): 155 if str(note.id) in reg: 156 continue 157 log.debug('locally deleted note: %s', note.name) 158 note.deleted = True 159 store.registerChange(note.id, pysyncml.ITEM_DELETED) 160 reg[str(note.id)] = pysyncml.ITEM_DELETED 161 162 #---------------------------------------------------------------------------- 163 def _scandir(self, dirname, store, reg): 164 curdir = os.path.normcase(os.path.normpath(os.path.join(self.rootdir, dirname))) 165 log.debug('scanning directory "%s"...', curdir) 166 for name in os.listdir(curdir): 167 # ignore the pysyncml storage file in the root directory 168 if dirname == '.': 169 if self.ignoreRoot is not None and self.ignoreRoot.match(name): 170 continue 171 if self.ignoreAll is not None and self.ignoreAll.match(name): 172 continue 173 path = os.path.join(curdir, name) 174 if os.path.islink(path): 175 # todo: should i follow?... 176 continue 177 if os.path.isfile(path): 178 self._scanfile(path, os.path.join(dirname, name), store, reg) 179 continue 180 if os.path.isdir(path): 181 # and recurse!... 182 self._scandir(os.path.join(dirname, name), store, reg) 183 184 #---------------------------------------------------------------------------- 185 def _scanfile(self, path, name, store, reg): 186 log.debug('analyzing file "%s"...', path) 187 inode = os.stat(path).st_ino 188 name = os.path.normpath(name) 189 note = None 190 chksum = None 191 try: 192 note = Note.q(name=name).one() 193 log.debug(' matched item %d by name ("%s")', note.id, note.name) 194 except NoResultFound: 195 try: 196 with open(path,'rb') as fp: 197 chksum = hashstream(hashlib.sha256(), fp).hexdigest() 198 note = Note.q(sha256=chksum).one() 199 log.debug(' matched item %d by checksum ("%s")', note.id, note.sha256) 200 except NoResultFound: 201 try: 202 note = Note.q(inode=inode).one() 203 log.debug(' matched item %d by inode (%d)', note.id, note.inode) 204 if note.name != name and note.sha256 != chksum: 205 log.debug(' looks like the inode was recycled... dropping match') 206 raise NoResultFound() 207 except NoResultFound: 208 log.debug('locally added note: %s', path) 209 note = Note(inode=inode, name=name, sha256=chksum) 210 self.db.add(note) 211 self.db.flush() 212 store.registerChange(note.id, pysyncml.ITEM_ADDED) 213 reg[str(note.id)] = pysyncml.ITEM_ADDED 214 return 215 if inode != note.inode: 216 log.debug('locally recreated note with new inode: %d => %d (not synchronized)', note.inode, inode) 217 note.inode = inode 218 if name != note.name: 219 # todo: a rename should prolly trigger an update... 220 log.debug('locally renamed note: %s => %s (not synchronized)', note.name, name) 221 note.name = name 222 # TODO: i *should* store the last-modified and check that instead of 223 # opening and sha256-digesting every single file... 224 if chksum is None: 225 with open(path,'rb') as fp: 226 chksum = hashstream(hashlib.sha256(), fp).hexdigest() 227 modified = None 228 if chksum != note.sha256: 229 modified = 'content' 230 note.sha256 = chksum 231 if modified is not None: 232 log.debug('locally modified note: %s (%s)', path, modified) 233 if reg.get(str(note.id)) == pysyncml.ITEM_ADDED: 234 return 235 store.registerChange(note.id, pysyncml.ITEM_MODIFIED) 236 reg[str(note.id)] = pysyncml.ITEM_MODIFIED 237 else: 238 reg[str(note.id)] = pysyncml.ITEM_OK 239 240 #---------------------------------------------------------------------------- 241 def save(self): 242 self.db.commit() 243 244 #---------------------------------------------------------------------------- 245 def getAllItems(self): 246 for note in Note.q(): 247 yield note 248 249 #---------------------------------------------------------------------------- 250 def dumpItem(self, item, stream, contentType=None, version=None): 251 item.dump(stream, contentType, version, self.rootdir) 252 253 #---------------------------------------------------------------------------- 254 def loadItem(self, stream, contentType=None, version=None): 255 return Note.load(stream, contentType, version) 256 257 #---------------------------------------------------------------------------- 258 def getItem(self, itemID, includeDeleted=False): 259 if includeDeleted: 260 return Note.q(id=int(itemID), deleted=None).one() 261 return Note.q(id=int(itemID)).one() 262 263 #---------------------------------------------------------------------------- 264 def addItem(self, item): 265 path = os.path.join(self.rootdir, item.name) 266 if '.' not in item.name: 267 pbase = item.name 268 psufx = '' 269 else: 270 pbase = item.name[:item.name.rindex('.')] 271 psufx = item.name[item.name.rindex('.'):] 272 count = 0 273 while os.path.exists(path): 274 count += 1 275 item.name = '%s(%d)%s' % (pbase, count, psufx) 276 path = os.path.join(self.rootdir, item.name) 277 with open(path, 'wb') as fp: 278 fp.write(item.body) 279 item.inode = os.stat(path).st_ino 280 delattr(item, 'body') 281 self.db.add(item) 282 self.db.flush() 283 log.debug('added: %s', item) 284 return item 285 286 #---------------------------------------------------------------------------- 287 def replaceItem(self, item): 288 curitem = self.getItem(item.id) 289 path = os.path.join(self.rootdir, curitem.name) 290 with open(path, 'wb') as fp: 291 fp.write(item.body) 292 curitem.inode = os.stat(path).st_ino 293 curitem.sha256 = hashlib.sha256(item.body).hexdigest() 294 delattr(item, 'body') 295 self.db.flush() 296 log.debug('updated: %s', curitem) 297 298 #---------------------------------------------------------------------------- 299 def deleteItem(self, itemID): 300 item = self.getItem(itemID) 301 path = os.path.join(self.rootdir, item.name) 302 if os.path.exists(path): 303 os.unlink(path) 304 item.deleted = True 305 # note: writing log before actual delete as otherwise object is invalid 306 log.debug('deleted: %s', item) 307 # note: not deleting from DB to ensure ID's are not recycled... ugh. i 308 # need a better solution to that... 309 # self.db.delete(item) 310 311 #------------------------------------------------------------------------------ 312 class LogFormatter(logging.Formatter): 313 levelString = { 314 logging.DEBUG: '[ ] DEBUG ', 315 logging.INFO: '[--] INFO ', 316 logging.WARNING: '[++] WARNING ', 317 logging.ERROR: '[**] ERROR ', 318 logging.CRITICAL: '[**] CRITICAL', 319 } 320 def __init__(self, logsource, *args, **kw): 321 logging.Formatter.__init__(self, *args, **kw) 322 self.logsource = logsource 323 def format(self, record): 324 msg = record.getMessage() 325 pfx = '%s|%s: ' % (LogFormatter.levelString[record.levelno], record.name) \ 326 if self.logsource else \ 327 '%s ' % (LogFormatter.levelString[record.levelno],) 328 if msg.find('\n') < 0: 329 return '%s%s' % (pfx, record.getMessage()) 330 return pfx + ('\n' + pfx).join(msg.split('\n')) 331 332 #------------------------------------------------------------------------------ 333 def main(): 334 335 #---------------------------------------------------------------------------- 336 # setup program parameters 337 338 defaultDevID = 'pysyncml.cli.notes:%x:%x' % (uuid.getnode(), time.time()) 339 340 cli = OptionParser(usage='%prog [options] DIRNAME', 341 version='%prog ' + pysyncml.versionString, 342 ) 343 344 cli.add_option('-v', '--verbose', 345 dest='verbose', default=0, action='count', 346 help='enable verbose output to STDERR, mostly for diagnotic' 347 ' purposes (multiple invocations increase verbosity).') 348 349 cli.add_option('-q', '--quiet', 350 dest='quiet', default=False, action='store_true', 351 help='do not display sync summary') 352 353 cli.add_option('-c', '--config', 354 dest='config', default=False, action='store_true', 355 help='configure the local SyncML adapter, display a summary' 356 ' and exit without actually syncronizing') 357 358 cli.add_option('-l', '--local', 359 dest='local', default=False, action='store_true', 360 help='display the pending local changes') 361 362 cli.add_option('-i', '--id', 363 dest='devid', default=None, action='store', 364 help='overrides the default device ID, either the store' 365 ' value from a previous sync or the generated default' 366 ' (currently "%s" - generated based on local MAC address' 367 ' and current time)' 368 % (defaultDevID,)) 369 370 cli.add_option('-n', '--name', 371 dest='name', default=None, action='store', 372 help='sets the local note adapter/store name (no default)') 373 374 cli.add_option('-m', '--mode', 375 dest='mode', default='sync', action='store', 376 help='set the synchronization mode - can be one of "sync"' 377 ' (for two-way synchronization), "full" (for a complete' 378 ' re-synchronization), "pull" (for fetching remote' 379 ' changes only), "push" (for pushing local changes only),' 380 ' or "pull-over" (to obliterate the local data and' 381 ' download the remote data) or "push-over" (to obliterate' 382 ' the remote data and upload the local data); the default' 383 ' is "%default".') 384 385 cli.add_option('-r', '--remote', 386 dest='remote', default=None, action='store', 387 help='specifies the remote URL of the SyncML synchronization' 388 ' server - only required if the target ``DIRNAME`` has never' 389 ' been synchronized, or the synchronization meta information' 390 ' was lost.') 391 392 cli.add_option('-R', '--remote-uri', 393 dest='remoteUri', default=None, action='store', 394 help='specifies the remote URI of the note datastore. if' 395 ' left unspecified, pysyncml will attempt to identify it' 396 ' automatically.') 397 398 cli.add_option('-u', '--username', 399 dest='username', default=None, action='store', 400 help='specifies the remote server username to log in with.') 401 402 cli.add_option('-p', '--password', 403 dest='password', default=None, action='store', 404 help='specifies the remote server password to log in with' 405 ' (if "--remote" and "--username" is specified, but not,' 406 ' "--password", the password will be prompted for to avoid' 407 ' leaking the password into the local hosts environment,' 408 ' which is the recommended approach).') 409 410 (opts, args) = cli.parse_args() 411 412 if len(args) != 1: 413 cli.error('expected exactly one argument DIRNAME - please see "--help" for details.') 414 415 rootlog = logging.getLogger() 416 handler = logging.StreamHandler(sys.stderr) 417 handler.setFormatter(LogFormatter(opts.verbose >= 2)) 418 rootlog.addHandler(handler) 419 if opts.verbose >= 3: rootlog.setLevel(logging.DEBUG) 420 elif opts.verbose == 2: rootlog.setLevel(logging.INFO) 421 elif opts.verbose == 1: rootlog.setLevel(logging.INFO) 422 else: rootlog.setLevel(logging.FATAL) 423 424 syncdir = '.sync' 425 storageName = os.path.join(syncdir, 'syncml.db') 426 indexStorage = os.path.join(syncdir, 'index.db') 427 # rootdir = os.path.abspath(args[0]) 428 rootdir = args[0] 429 if not rootdir.startswith('/') and not rootdir.startswith('.'): 430 rootdir = './' + rootdir 431 if not rootdir.endswith('/'): 432 rootdir += '/' 433 434 if not os.path.isdir(rootdir): 435 cli.error('note root directory "%s" does not exist' % (rootdir,)) 436 437 if not os.path.isdir(os.path.join(rootdir, syncdir)): 438 os.makedirs(os.path.join(rootdir, syncdir)) 439 440 #---------------------------------------------------------------------------- 441 # setup the pysyncml adapter 442 443 context = pysyncml.Context(storage='sqlite:///%(rootdir)s%(storageName)s' % 444 dict(rootdir=rootdir, storageName=storageName)) 445 446 adapter = context.Adapter() 447 448 if opts.name is not None: 449 adapter.name = opts.name + ' (pysyncml.cli.notes SyncML Adapter)' 450 451 # # TODO: stop ignoring ``opts.remoteUri``... 452 # if opts.remoteUri is not None: 453 # adapter.router.addRoute(agent.uri, opts.remoteUri) 454 455 if adapter.devinfo is None: 456 log.info('adapter has no device info - registering new device') 457 else: 458 if opts.devid is not None and opts.devid != adapter.devinfo.devID: 459 log.info('adapter has invalid device ID - overwriting with new device info') 460 adapter.devinfo = None 461 462 if adapter.devinfo is None: 463 # setup some information about the local device, most importantly the 464 # device ID, which the server will use to uniquely identify this client. 465 adapter.devinfo = context.DeviceInfo( 466 devID = opts.devid or defaultDevID, 467 devType = pysyncml.DEVTYPE_WORKSTATION, 468 softwareVersion = '0.1', 469 manufacturerName = 'pysyncml', 470 modelName = 'pysyncml.cli.notes', 471 # TODO: adding this for funambol-compatibility... 472 hierarchicalSync = False, 473 ) 474 475 if adapter.peer is None: 476 if opts.remote is None: 477 opts.remote = raw_input('SyncML remote URL: ') 478 if opts.username is None: 479 opts.username = raw_input('SyncML remote username (leave empty if none): ') 480 if len(opts.username) <= 0: 481 opts.username = None 482 log.info('adapter has no remote info - registering new remote adapter') 483 else: 484 if opts.remote is not None: 485 if opts.remote != adapter.peer.url \ 486 or opts.username != adapter.peer.username \ 487 or opts.password != adapter.peer.password: 488 #or opts.password is not None: 489 log.info('adapter has invalid or rejected remote info - overwriting with new remote info') 490 adapter.peer = None 491 492 if adapter.peer is None: 493 auth = None 494 if opts.username is not None: 495 auth = pysyncml.NAMESPACE_AUTH_BASIC 496 if opts.password is None: 497 opts.password = getpass.getpass('SyncML remote password: ') 498 # setup the remote connection parameters, if not already stored in 499 # the adapter sync tables or the URL has changed. 500 adapter.peer = context.RemoteAdapter( 501 url = opts.remote, 502 auth = auth, 503 username = opts.username, 504 password = opts.password, 505 ) 506 507 # TODO: this check should be made redundant... (ie. once the 508 # implementation of Store.merge() is fixed this will go away) 509 if 'note' in adapter.stores: 510 store = adapter.stores['note'] 511 else: 512 store = adapter.addStore(context.Store( 513 uri = 'note', 514 displayName = opts.name, 515 # TODO: adding this for funambol-compatibility... 516 maxObjSize = None)) 517 518 #---------------------------------------------------------------------------- 519 # create a new agent, which will scan the files stored in the root directory, 520 # looking for changed files, new files, and deleted files. 521 522 agent = FilesystemNoteAgent(rootdir, indexStorage, opts, 523 ignoreRoot='^(%s)$' % (re.escape(syncdir),), 524 syncstore=store) 525 526 if store.peer is None: 527 if opts.local: 528 print 'no pending local changes (not associated yet)' 529 else: 530 log.info('no pending local changes (not associated yet)') 531 else: 532 changes = list(store.peer.getRegisteredChanges()) 533 if len(changes) <= 0: 534 if opts.local: 535 print 'no pending local changes to synchronize' 536 else: 537 log.info('no pending local changes to synchronize') 538 else: 539 if opts.local: 540 print 'pending local changes:' 541 else: 542 log.info('pending local changes:') 543 for c in changes: 544 item = agent.getItem(c.itemID, includeDeleted=True) 545 msg = ' - %s: %s' % (item, pysyncml.state2string(c.state)) 546 if opts.local: 547 print msg 548 else: 549 log.info(msg) 550 551 if opts.local: 552 context.save() 553 agent.save() 554 return 0 555 556 store.agent = agent 557 558 #---------------------------------------------------------------------------- 559 # do the synchronization 560 561 mode = { 562 'sync': pysyncml.SYNCTYPE_TWO_WAY, 563 'full': pysyncml.SYNCTYPE_SLOW_SYNC, 564 'pull': pysyncml.SYNCTYPE_ONE_WAY_FROM_SERVER, 565 'push': pysyncml.SYNCTYPE_ONE_WAY_FROM_CLIENT, 566 'pull-over': pysyncml.SYNCTYPE_REFRESH_FROM_SERVER, 567 'push-over': pysyncml.SYNCTYPE_REFRESH_FROM_CLIENT, 568 }[opts.mode] 569 570 if opts.config: 571 sys.stdout.write('Note SyncML adapter configuration:\n') 572 adapter.describe(pysyncml.IndentStream(sys.stdout, ' ')) 573 else: 574 stats = adapter.sync(mode=mode) 575 if not opts.quiet: 576 pysyncml.describeStats(stats, sys.stdout, title='Synchronization Summary') 577 578 #---------------------------------------------------------------------------- 579 # and cleanup 580 581 context.save() 582 agent.save() 583 return 0 584 585 #------------------------------------------------------------------------------ 586 if __name__ == '__main__': 587 sys.exit(main()) 588 589 #------------------------------------------------------------------------------ 590 # end of $Id: notes.py 34 2012-07-03 02:48:00Z griff1n $ 591 #------------------------------------------------------------------------------