Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1 

2"""This module provide a Persiste using sqlite 

3""" 

4 

5import sqlite3 

6import time 

7from datetime import datetime 

8import os 

9import random 

10import threading 

11import hashlib 

12import yaml 

13import re 

14import ujson as json 

15from collections import namedtuple, defaultdict 

16from typing import Iterable 

17from weakref import WeakKeyDictionary 

18 

19from aiopb.aiopb import Hub 

20from gutools.tools import uidfrom, snake_case, expandpath, retry, identity, \ 

21 yaml_encode, yaml_decode, test_pid, walk, rebuild, serializable_container 

22from gutools.uobjects import UObject 

23 

24class FileLock(object): 

25 """Open a file with exclusive access. 

26 FileLock is used as a context manager returning the file stream opened 

27 with the desired access mode ('w', 'r', 'a'). 

28 

29 When a program wants to update a file in the session, 

30 FileLock can be used to prevent only one file will get the access. 

31 

32 FileLock proceed as follows: 

33 

34 - check for ``<filename>.lock`` 

35 - if does not exist, or the modification timestamp is older, then 

36 it will create the file with the content: ``<pid>.<threadid>.<random>`` 

37 - then check again the file. 

38 - read the file and check that the content is the same. 

39 - then acquire the lock. 

40 - update file. 

41 - remove the lock file. 

42 

43 In case two processes try to access at the same time, the file creation 

44 is an atomic operation, so only one process will read its own content. 

45 """ 

46 def __init__(self, path, mode='w', timeout=0): 

47 """ 

48 - ``path``: the file to get access to. 

49 - ``mode``: the desired access mode. 

50 - ``timeout``: for getting the lock. 

51 """ 

52 self.path = path 

53 self.lock_file = path + '.lock' 

54 self.mode = mode 

55 self.timeout = timeout 

56 self.fd = None 

57 self.lock_fd = None 

58 

59 def __enter__(self): 

60 """Try to write a specific content (like a fingerprint) 

61 in a lock file, and check if the content is the same afterall. 

62 If content match, then the access is granted and return the 

63 real file opened in the given access mode. 

64 """ 

65 content = f'{os.getpid()}.{threading.current_thread().name}.{random.random()}' 

66 

67 def test(): 

68 try: 

69 return open(self.lock_file, 'r').read() 

70 except OSError: 

71 return '' 

72 

73 now = time.time() 

74 pid = os.getpid() 

75 while self.timeout and time.time() - now < self.timeout: 

76 content2 = test() 

77 if content == content2: 

78 break 

79 if not content2 or not test_pid(content2.split('.')[0]): 

80 open(self.lock_file, 'w').write(content) 

81 continue 

82 time.sleep(random.random() / 10.0) 

83 else: 

84 raise TimeoutError(f"Locking {self.path} for {self.mode}") 

85 

86 self.fd = open(self.path, self.mode) 

87 return self.fd 

88 

89 def __exit__(self, *args): 

90 """Remove the lock and close the file.""" 

91 os.unlink(self.lock_file) 

92 self.fd.close() 

93 self.fd = None 

94 

95 

96class FSLayout(object): 

97 """Handle files contents based on predeterminated patterns and structure. 

98 """ 

99 reg_file_params = re.compile( 

100 r'(?P<workspace>.*)/(?P<key>[^/]+)/(?P<name>\w+-\w+)(\.(?P<label>.*))?\.(\2)', 

101 re.DOTALL) 

102 

103 ext_decoder = { 

104 'yaml': yaml_decode, 

105 'json': json.decode, 

106 'pid': int, 

107 'out': identity, 

108 'err': identity, 

109 } 

110 

111 ext_encoder = { 

112 'yaml': yaml_encode, 

113 'json': json.encode, 

114 'pid': str, 

115 'out': identity, 

116 'err': identity, 

117 } 

118 

119 ext_aliases = { 

120 'fp': 'yaml', 

121 } 

122 

123 for k, v in ext_aliases.items(): 

124 ext_decoder[k] = ext_decoder[v] 

125 ext_encoder[k] = ext_encoder[v] 

126 

127 patterns = { 

128 ('out', 'err', 'pid', 'fp', 'db'): '{root:}/{key:}/{name:}.{key:}', 

129 ('etc', ): '{root:}/etc/{name:}.yaml', 

130 ('<folder>', ) : '{root:}/{name:}' 

131 } 

132 

133 def __init__(self, path): 

134 self.path = expandpath(path) 

135 self.stat = dict() 

136 """Dictionary with last modification of a file. 

137 Is updated by ``get/set_content()``""" 

138 

139 def get_path(self, key, *args, touch=False): 

140 """Get a the file (or folder) of the key expanded with given args. 

141 

142 When ``touch=True`` the file is *touched* and create all necessary 

143 parent folders. 

144 """ 

145 path = '' 

146 root = self.path 

147 name = '.'.join(['{}'] * len(args)).format(*args) 

148 for pattern, fmt in self.patterns.items(): 

149 if key in pattern: 

150 path = fmt.format(**locals()) 

151 break 

152 else: 

153 raise RuntimeError(f"Key '{key}' found to expand path") 

154 

155 assert path 

156 if touch: 

157 if key in ('<folder>', ): 

158 os.makedirs(path, exist_ok=True) 

159 else: 

160 parent = os.path.dirname(path) 

161 os.makedirs(parent, exist_ok=True) 

162 return path 

163 

164 def get_content(self, key, *args, default=None): 

165 """Get the content of a file in the layout structure, 

166 setting with default when file does not exists.""" 

167 path = self.get_path(key, *args) 

168 content = self._get_content(path) 

169 if content is None and default is not None: 

170 content = default 

171 self.set_content(key, content, *args) 

172 try: 

173 self.stat[path] = [os.stat(path).st_mtime, (key, args)] 

174 except FileNotFoundError: 

175 pass 

176 return content 

177 

178 def set_content(self, key, content, *args): 

179 """Set the content of a key/args in the layout structure. 

180 Uses a FileLock to have exclusive access to file. 

181 """ 

182 path = self.get_path(key, *args) 

183 parent = os.path.dirname(path) 

184 os.makedirs(parent, exist_ok=True) 

185 content = serializable_container(content) 

186 

187 with FileLock(path, timeout=3) as f: 

188 try: 

189 func = self.ext_encoder.get(path.split('.')[-1], identity) 

190 f.write(func(content)) 

191 self.stat[path] = [os.stat(path).st_mtime, (key, args)] 

192 

193 except Exception as why: 

194 path = None 

195 return path 

196 

197 def update_content(self, key, content, *args): 

198 """Update the content of a key/args in the layout structure. 

199 

200 - read the current content 

201 - update the content 

202 - save the file 

203 - return content last state 

204 """ 

205 content2 = self.get_content(key, *args, default=content) 

206 content2.update(content) 

207 self.set_content(key, content, *args) 

208 return content2 

209 

210 def iter_file_content(self, key, name='', label='', *args): 

211 """Iterate over known files handled by the layout structure 

212 filtering name and label if they are provided. 

213 """ 

214 dummy = self.get_path(key, '__dummy__') 

215 top = os.path.dirname(dummy) 

216 for root, _, files in os.walk(top): 

217 for name_ in files: 

218 filename = os.path.join(root, name_) 

219 m = self.reg_file_params.search(filename) 

220 if m: 

221 d = m.groupdict() 

222 if name in ('', d['name']) and \ 

223 label in ('', d['label']): 

224 content = self._get_content(filename) 

225 if content is not None: 

226 yield d, filename, content 

227 

228 def iter_external_modifications(self): 

229 """Iterator for external known file modifications.""" 

230 for path, (mtime, params) in list(self.stat.items()): 

231 try: 

232 mtime2 = os.stat(path) 

233 if mtime2 > mtime: 

234 yield path, params 

235 except OSError: 

236 self.stat.pop(path) 

237 

238 def set_alias(self, key, alias, *args): 

239 """Set an alias of a file by creating a symbolic 

240 link between files. 

241 """ 

242 src, dst = self._alias_info(key, alias, *args) 

243 if src != dst: 

244 assert os.path.exists(src) 

245 if os.path.exists(dst): 

246 os.remove(dst) 

247 # cd dir to make link relative and less anoying when `ls -ls` 

248 # don't work! 

249 # curdir = expandpath(os.path.curdir) 

250 # os.chdir(os.path.dirname(dst)) 

251 # os.symlink(src, os.path.basename(dst)) 

252 # os.chdir(curdir) 

253 os.symlink(src, dst) 

254 assert os.path.islink(dst) 

255 

256 def remove_alias(self, key, alias, *args): 

257 """Remove an alias of a file.""" 

258 src, dst = self._alias_info(key, alias, *args) 

259 assert os.path.islink(dst) 

260 os.remove(dst) 

261 

262 def _alias_info(self, key, alias, *args): 

263 "compute the source and target for an alias." 

264 src = self.get_path(key, *args) 

265 dst, ext = os.path.splitext(src) 

266 dst = list(os.path.split(dst)) 

267 dst[-1] = alias + ext 

268 dst = os.path.join(*dst) 

269 return src, dst 

270 

271 def _get_content(self, filename, default=None): 

272 "get the content of a file, retuning default value if does not exists." 

273 try: 

274 with open(filename) as f: 

275 content = f.read() 

276 func = self.ext_decoder.get(filename.split('.')[-1], identity) 

277 content = func(content) 

278 except FileNotFoundError as why: 

279 content = default 

280 return content 

281 

282# -------------------------------------------------- 

283# Configurable 

284# -------------------------------------------------- 

285class Config(dict): 

286 def __init__(self, layout, name, default={}): 

287 self.layout = layout 

288 self.name = name 

289 if default: 

290 self.update(default) 

291 

292 def load(self, default={}): 

293 self.clear() 

294 data = self.layout.get_content( 

295 'etc', self.name, 

296 default=default) 

297 self.update(data) 

298 

299 

300 def save(self, name=None): 

301 name = name or self.name 

302 self.config = self.layout.set_content( 

303 'etc', self, self.name) 

304 

305 def update_key(self, key, values): 

306 v0 = self.get(key) 

307 if isinstance(v0, dict) and isinstance(values, dict): 

308 v0.update(values) 

309 elif isinstance(v0, list) and isinstance(values, Iterable): 

310 v0 = set(v0) 

311 v0.update(values) 

312 self[key] = list(v0) 

313 else: 

314 self[key] = values 

315 

316 self.save() 

317 

318 

319class DB(object): # , metaclass=Xingleton): 

320 """This class provide a Persiste Logging events using sqlite. 

321 

322 As sqlite doesn't suppor shared connection between threads, 

323 we implement a simple connection factory for the current thread. 

324 """ 

325 scheme = "" 

326 

327 def __init__(self, path=None, delete=False): 

328 self.path = expandpath(path) 

329 self.conn__ = dict() 

330 self.workspaces = WeakKeyDictionary() 

331 

332 if delete and os.path.exists(self.path): 

333 os.unlink(self.path) 

334 

335 def __del__(self): 

336 self.conn.commit() 

337 

338 def __enter__(self): 

339 return self 

340 

341 def __exit__(self, *_exc): 

342 self.conn.commit() 

343 

344 def __str__(self): 

345 return f"<{self.__class__.__name__}: {self.path}>" 

346 

347 def __repr__(self): 

348 return str(self) 

349 

350 @property 

351 def conn(self): 

352 "Connection Factory per thread" 

353 tid = threading.get_ident() 

354 

355 conn = self.conn__.get(tid) 

356 if conn is None: 

357 self.conn__[tid] = conn = sqlite3.connect(self.path) 

358 return conn 

359 

360 def close(self): 

361 """Clear the processed event and close connections with database""" 

362 for conn in list(self.conn__.values()): 

363 try: 

364 conn.commit() 

365 conn.close() 

366 except sqlite3.ProgrammingError: 

367 pass 

368 

369 # def get(self, since=0): 

370 # cursor = self.conn.cursor() 

371 # cursor.execute(SELECT, (since, )) 

372 # for raw in cursor.fetchall(): 

373 # if raw: 

374 # event = Event(*raw) 

375 # yield event 

376 

377 def execute(self, query, *args, **kw): 

378 conn = self.conn 

379 if args: 

380 r = conn.execute(query, args) 

381 else: 

382 r = conn.execute(query, kw) 

383 return r 

384 

385 def executescript(self, script): 

386 conn = self.conn 

387 try: 

388 conn.executescript(script) 

389 except sqlite3.OperationalError as why: 

390 print('FAILED TO CREATE DB SCHEME: {}'.format(why)) 

391 print(script) 

392 foo = 1 

393 conn.commit() 

394 

395 # workspace management 

396 @property 

397 def ready(self): 

398 return self.path is not None 

399 

400 def attach(self, workspace): 

401 self.workspaces[workspace] = True # keep a life reference 

402 if self.ready: 

403 workspace.db_ready() 

404 

405 def change_db(self, path): 

406 self.path = path 

407 # force new operations to create a new sqlite3 connection per thread 

408 self.conn__.clear() 

409 for workpace in self.workspaces: 

410 workpace.db_ready() 

411 

412 

413class DBWorkspace(object): 

414 scheme = "" 

415 REPLACE = 'REPLACE INTO {table:} ({}) VALUES (:{})' 

416 INSERT = 'INSERT INTO {table:} ({}) VALUES (:{})' 

417 SELECT = 'SELECT * FROM {table:} WHERE {where:}' 

418 DELETE = 'DELETE FROM {table:} WHERE {}' 

419 

420 uobject_table = dict() 

421 

422 def __init__(self, db): 

423 self.db = db 

424 self.db.attach(self) 

425 

426 def db_ready(self): 

427 if self.db.ready: 

428 self.__create_squema__() 

429 

430 def __create_squema__(self): 

431 try: 

432 self.db.executescript(self.scheme) 

433 self.db.conn.commit() 

434 except Exception as why: 

435 print(why) 

436 retry(1) 

437 

438 @classmethod 

439 def _get_table(cls, klass): 

440 table = cls.uobject_table.get(klass) 

441 if table is None: 

442 table = snake_case(klass.__name__) 

443 cls.uobject_table[klass] = table 

444 return table 

445 

446 def update(self, uobject, sql=None, table=None, **kwargs): 

447 table = table or self._get_table(uobject.__class__) 

448 sql = sql or self.REPLACE 

449 

450 kw = uobject.asdict(skip_nones=True, **kwargs) 

451 # for k in set(uobject.__slots__).intersection(kwargs): 

452 # kw[k] = kwargs[k] 

453 

454 if 'date' in kw and kw.get('date') is None: 

455 kw['date'] = datetime.now() 

456 

457 sql = sql.format(','.join(kw.keys()), 

458 ',:'.join(kw.keys()), 

459 table=table, ) 

460 self.db.execute(sql, **kw) 

461 for tries in range(20): 

462 try: 

463 self.db.conn.commit() 

464 break 

465 except sqlite3.OperationalError: 

466 time.sleep(random.random()) 

467 

468 def xupdate(self, uobject, **kw): 

469 self.update(uobject, **kw) 

470 kw.update(uobject.asdict()) 

471 for full_object in self.find(uobject.__class__, **kw): 

472 return full_object 

473 

474 def replace(self, uobject, **kw): 

475 kw.update(uobject.asdict()) 

476 for full_object in self.find(uobject.__class__, **kw): 

477 break 

478 else: 

479 return self.xupdate(uobject, **kw) 

480 

481 def delete_item(self, uobject): 

482 table = self._get_table(uobject.__class__) 

483 kw = uobject.asdict(skip_nones=True) 

484 sql = self.DELETE.format(' AND '.join([f'{k}=:{k}' for k in kw.keys()]), 

485 table=table, ) 

486 self.db.execute(sql, **kw) 

487 self.db.conn.commit() 

488 

489 def delete(self, klass, join='AND', sql=None, table=None, **kw): 

490 table = table or self._get_table(klass) 

491 sql = sql or self.DELETE 

492 

493 for row in self._execute(klass, sql, join=join, table=table, **kw): 

494 item = klass(*row) 

495 yield item 

496 

497 def find(self, klass, join='AND', sql=None, table=None, **kw): 

498 table = table or self._get_table(klass) 

499 sql = sql or self.SELECT 

500 

501 for row in self._execute(klass, sql, join=join, table=table, **kw): 

502 item = klass(*row) 

503 yield item 

504 

505 def _execute(self, klass, sql, join='AND', **kw): 

506 where = [k for k in set(kw).intersection( 

507 klass.__slots__) if kw[k] is not None] 

508 

509 where = f' {join} '.join([f'{k}=:{k}' for k in where]) or '1' 

510 

511 sql = sql.format(where=where, **kw) 

512 iterator = self.db.execute(sql, **kw) 

513 return iterator 

514 

515def test_file_locking(): 

516 layout = FSLayout(path='/tmp/kk') 

517 content = dict(foo=1, bar='dos') 

518 path = layout.set_content('etc', content, 'buzz') 

519 

520 with FileLock(path, mode='a', timeout=100) as f1: 

521 with FileLock(path, mode='a', timeout=2) as f2: 

522 foo = 1 

523 foo = 1 

524 

525 

526if __name__ == '__main__': 

527 test_file_locking()