Source code for laf.fabric

import os
import glob
import collections
import functools
import time
from .lib import make_array_inverse
from .names import Names, FabricError
from .data import LafData
from .elements import Feature, Connection, XMLid, PrimaryData

[docs]class LafAPI(LafData): '''Makes all API methods available. ``API()`` returns a dict keyed by mnemonics and valued by API methods. ''' def __init__(self, names): self.api = {} self.names = names self.stamp = names.stamp LafData.__init__(self) self.result_files = []
[docs] def API(self): self._api_fcxp() self._api_nodes() self._api_edges() self._api_io() self._api_prep() return self.api
[docs] def APIprep(self): self._api_post() return self.api
[docs] def get_all_features(self): env = self.names.env loadables = set() for feat_path in glob.glob('{}/*'.format(env['m_compiled_dir'])): filename = os.path.basename(feat_path) if filename.startswith(('_', 'A', 'Z')): continue loadables.add('m{}'.format(filename)) for anx in env['annox']: for feat_path in glob.glob('{}/*'.format(env['annox'][anx]['a_compiled_dir'])): filename = os.path.basename(feat_path) if filename.startswith('_'): continue loadables.add('a{}:{}'.format(anx, filename)) self.all_features = collections.defaultdict(lambda: collections.defaultdict(lambda: set())) self.all_features_index = collections.defaultdict(lambda: collections.defaultdict(lambda: [])) self.all_features_origin = collections.defaultdict(lambda: collections.defaultdict(lambda: set())) for filename in loadables: (dorigin, dgroup, dkind, ddir, dcomps) = Names.decomp_full(filename) if dgroup != 'F': continue (namespace, label, name) = dcomps self.all_features[dkind][namespace].add("{}.{}".format(label, name)) self.all_features_index[dkind][name].append((namespace, label)) self.all_features_origin[dkind][(namespace, label, name)].add(dorigin) if 'e' in self.all_features: for fname in ('x', 'y'): self.all_features['e']['laf'].add('{}.{}'.format('', fname)) self.all_features_index['e'][fname].append(('laf', '')) self.all_features_origin['e'][('laf', '', fname)].add(dorigin)
def _api_fcxp(self): data_items = self.data_items api = { 'F': Bunch(), 'FE': Bunch(), 'C': Bunch(), 'Ci': Bunch(), } features = {'n': set(), 'e': set()} connections = {'f': set(), 'b': set()} xmlmaps = {'n': set(), 'e': set()} for dkey in data_items: (dorigin, dgroup, dkind, ddir, dcomps) = Names.decomp_full(dkey) if dgroup == 'F': features[dkind].add(dcomps) elif dgroup == 'C': connections[ddir].add(dcomps) elif dgroup == 'X': xmlmaps[dkind].add(dcomps) elif dgroup == 'P' and dcomps[0] == 'primary_data': api['P'] = PrimaryData(self) self.feature_abbs = collections.defaultdict(lambda: set()) self.feature_abb = {} for kind in sorted(features): for feat in sorted(features[kind]): name = Names.apiname(feat) for abb in (Names.apiname(feat[1:]), Names.apiname(feat[2:])): if abb: self.feature_abbs[abb].add(name) self.feature_abb[abb] = name for abb in self.feature_abbs: expansions = self.feature_abbs[abb] chosen = self.feature_abb[abb] if len(expansions) > 1: self.stamp.Imsg("Feature {} refers to {}, not to {}".format(abb, chosen, ', '.join(sorted(expansions - set([chosen]))))) for kind in features: for feat in features[kind]: name = Names.apiname(feat) obj = Feature(self, feat, kind) dest = api['FE'] if kind == 'e' else api['F'] dest.item[name] = obj setattr(dest, name, obj) for abb in (Names.apiname(feat[1:]), Names.apiname(feat[2:])): if abb and self.feature_abb.get(abb, '') == name: setattr(dest, abb, obj) dest.item[abb] = obj for inv in connections: for feat in connections[inv]: name = Names.apiname(feat) obj = Connection(self, feat, inv) dest = api['C'] if inv == 'f' else api['Ci'] if inv == 'b' else None dest.item[name] = obj setattr(dest, name, obj) for abb in (Names.apiname(feat[1:]), Names.apiname(feat[2:])): if abb and self.feature_abb.get(abb, '') == name: setattr(dest, abb, obj) dest.item[abb] = obj for kind in xmlmaps: for comp in xmlmaps[kind]: obj = XMLid(self, kind) dest = 'XE' if kind == 'e' else 'X' api[dest] = obj def feature_list(kind): result = [] for namespace in sorted(self.all_features[kind]): result.append((namespace, sorted(self.all_features[kind][namespace]))) return result def pretty_fl(flist): result = [] for ((namespace, features)) in flist: result.append('{}:'.format(namespace)) for feature in features: result.append('\t{}:'.format(feature)) return '\n'.join(result) api.update({ 'F_all': feature_list('n'), 'fF_all': pretty_fl(feature_list('n')), 'FE_all': feature_list('e'), 'fFE_all': pretty_fl(feature_list('e')), }) self.api.update(api) def _api_prep(self): api = self.api api['make_array_inverse'] = make_array_inverse api['data_items'] = self.data_items def _api_post(self): (self.prepare_init)(self) def _api_edges(self): data_items = self.data_items edges_from = data_items[Names.comp('mG00', ('edges_from',))] edges_to = data_items[Names.comp('mG00', ('edges_to',))] def next_edge(): for e in range(len(edges_from)): yield (e, edges_from[e], edges_to[e]) self.api.update({ 'EE': next_edge, }) def _api_nodes(self): data_items = self.data_items node_anchor_min = data_items[Names.comp('mG00', ('node_anchor_min',))] node_anchor_max = data_items[Names.comp('mG00', ('node_anchor_max',))] def before(nodea, nodeb): if node_anchor_min[nodea] == node_anchor_max[nodea] or node_anchor_min[nodeb] == node_anchor_max[nodeb]: return None if node_anchor_min[nodea] < node_anchor_min[nodeb]: return True if node_anchor_min[nodea] > node_anchor_min[nodeb]: return False if node_anchor_max[nodea] > node_anchor_max[nodeb]: return True if node_anchor_max[nodea] < node_anchor_max[nodeb]: return False return None def node_sort_key(node): return data_items[Names.comp('mG00', ('node_sort_inv',))][node] def msetbefore(sa,sb): if sa == sb: return 0 if sa <= sb: return 1 if sb <= sa: return -1 am = min(sa - sb) bm = min(sb - sa) return -1 if am < bm else 1 if bm < am else None msetkey = functools.cmp_to_key(msetbefore) def next_node(nodes=None, test=None, value=None, values=None, extrakey=None): class Extra_key(object): __slots__ = ['value', 'amin', 'amax'] def __init__(self, node): self.amin = node_anchor_min[node] - 1 self.amax = node_anchor_max[node] - 1 self.value = extrakey(node) def __lt__(self, other): return ( self.amin == other.amin and self.amax == other.amax and self.value < other.value ) def __gt__(self, other): return ( self.amin == other.amin and self.amax == other.amax and self.value > other.value ) def __eq__(self, other): return ( self.amin != other.amin or self.amax != other.amax or self.value == other.value ) __hash__ = None order = data_items[Names.comp('mG00', ('node_sort',))] order_key = data_items[Names.comp('mG00', ('node_sort_inv',))] the_nodes = sorted(nodes, key=lambda x: order_key[x]) if nodes else order if extrakey != None: self.stamp.Imsg("Resorting {} nodes...".format(len(the_nodes))) the_nodes = sorted(the_nodes, key=Extra_key) self.stamp.Imsg("Done") if test != None: test_values = set(([value] if value != None else []) + (list(values) if values != None else [])) if len(test_values): for node in the_nodes: if test(node) in test_values: yield node else: for node in the_nodes: if test(node): yield node else: for node in the_nodes: yield node def no_next_event(key=None, simplify=None): raise FabricError("Node events not available because primary data is not loaded.", self.stamp) return None def next_event(key=None, simplify=None): class Additional_key(object): __slots__ = ['value', 'kind', 'amin', 'amax'] def __init__(self, event): (node, kind) = event self.amin = node_anchor_min[node] - 1 self.amax = node_anchor_max[node] - 1 self.value = key(node) * (-1 if kind < 2 else 1) self.kind = kind def __lt__(self, other): return ( self.amin == other.amin and self.amax == other.amax and (self.kind == other.kind or self.amin == self.amax) and self.value < other.value ) def __gt__(self, other): return ( self.amin == other.amin and self.amax == other.amax and (self.kind == other.kind or self.amin == self.amax) and self.value > other.value ) def __eq__(self, other): return ( self.amin != other.amin or self.amax != other.amax or (self.kind != other.kind and (self.amin != self.amax or other.amin != other.amax)) or self.value == other.value ) __hash__ = None nodes = data_items[Names.comp('mP00', ('node_events_n',))] kinds = data_items[Names.comp('mP00', ('node_events_k',))] node_events = data_items[Names.comp('mP00', ('node_events',))] node_events_items = data_items[Names.comp('mP00', ('node_events_items',))] bufferevents = collections.deque([(-1, [])], 2) active = {} for anchor in range(len(node_events)): event_ids = self._getitems(node_events, node_events_items, anchor) if len(event_ids) == 0: continue eventset = [] for event_id in event_ids: node = nodes[event_id] if key == None or key(node) != None: eventset.append((nodes[event_id], kinds[event_id])) if not eventset: continue if key != None: eventset = sorted(eventset, key=Additional_key) if simplify == None: yield (anchor, eventset) continue bufferevents.append([anchor, eventset]) if bufferevents[0][0] == -1: continue (this_anchor, these_events) = bufferevents[0] (next_anchor, next_events) = bufferevents[1] deleted = {} for (n, kind) in these_events: if simplify(n): if kind == 3: deleted[n] = None elif kind == 2: active[n] = False elif kind == 1: active[n] = True elif kind == 0: active[n] = True for n in deleted: if n in active: del active[n] if True not in active.values(): weed = collections.defaultdict(lambda: False) for (n, k) in these_events: if k == 2: weed[n] = None for (n, k) in next_events: if k == 1: if n in weed: weed[n] = True if True in weed.values(): bufferevents[0][1] = [(n, k) for (n, k) in these_events if not (k == 2 and weed[n])] bufferevents[1][1] = [(n, k) for (n, k) in next_events if not (k == 1 and weed[n])] yield (bufferevents[0]) yield (bufferevents[0]) self.api.update({ 'BF': before, 'NN': next_node, 'NE': next_event if Names.comp('mP00', ('node_events',)) in data_items else no_next_event, 'NK': node_sort_key, 'MK': msetkey, }) def _api_io(self): def _inf(msg, newline=True, withtime=True, verbose=None): self.stamp.raw_msg(msg, newline=newline, withtime=withtime, verbose=verbose, error=False) def _msg(msg, newline=True, withtime=True, verbose=None): self.stamp.raw_msg(msg, newline=newline, withtime=withtime, verbose=verbose, error=True) task_dir = self.names.env['task_dir'] def add_output(file_name): result_file = "{}/{}".format(task_dir, file_name) handle = open(result_file, "w", encoding="utf-8") self.result_files.append(handle) return handle def add_input(file_name): result_file = "{}/{}".format(task_dir, file_name) handle = open(result_file, "r", encoding="utf-8") self.result_files.append(handle) return handle def result(file_name=None): if file_name == None: return task_dir else: return "{}/{}".format(task_dir, file_name) api = { 'infile': add_input, 'outfile': add_output, 'close': self.finish_task, 'my_file': result, 'msg': _msg, 'inf': _inf, 'data_dir': self.names.env['data_dir'], 'output_dir': self.names.env['output_dir'], } self.api.update(api) def _getitems(self, data, data_items, elem): data_items_index = data[elem] n_items = data_items[data_items_index] return data_items[data_items_index + 1:data_items_index + 1 + n_items] def __del__(self): for handle in self.result_files: if handle and not handle.closed: handle.close() LafData.__del__(self)
[docs]class Bunch(object): def __init__(self): self.item = {}
[docs]class LafFabric(object): '''Process manager. ``load(params)``: given the source, annox and task, loads the data, assembles the API, and returns the API. ''' def __init__(self, data_dir=None, laf_dir=None, output_dir=None, save=False, verbose=None): self.lafapi = LafAPI(Names(data_dir, laf_dir, output_dir, save, verbose)) self.lafapi.stamp.reset() self.api = {}
[docs] def load(self, source, annox, task, load_spec, add=False, compile_main=False, compile_annox=False, verbose='NORMAL', time_reset=True): self.api.clear() lafapi = self.lafapi self.api['fabric'] = self if time_reset: lafapi.stamp.reset() Names.check_load_spec(load_spec, lafapi.stamp) self.lafapi.stamp.set_verbose(verbose) lafapi.stamp.Nmsg("LOADING API{}: please wait ... ".format(' with EXTRAs' if add else '')) lafapi.names.setenv(source=source, annox=annox, task=task) lafapi.names.set_annox() env = lafapi.names.env lafapi.prepare_dirs(env['annox']) lafapi.compile_all({'m': compile_main, 'a': compile_annox}) req_items = {} lafapi.names.request_init(req_items) lafapi.get_all_features() if 'primary' in load_spec and load_spec['primary']: req_items['mP00'] = True if 'xmlids' in load_spec: for kind in [k[0] for k in load_spec['xmlids'] if load_spec['xmlids'][k]]: for ddir in ('f', 'b'): req_items['mX{}{}'.format(kind, ddir)].append(()) if 'features' in load_spec: self._request_features(load_spec['features'], req_items, add) prep = load_spec['prepare'] if 'prepare' in load_spec else (lafapi.prepare_dict, lafapi.prepare_init) if add else ({}, None) lafapi.load_all(req_items, prep, add) lafapi.add_logfile() self.api.update(lafapi.API()) if 'prepare' in load_spec: lafapi.stamp.Imsg("LOADING PREPARED data: please wait ... ") lafapi.prepare_all(self.api) self.api.update(lafapi.APIprep()) lafapi.stamp.Imsg("LOADED PREPARED data") lafapi.stamp.Smsg( 'DATA LOADED FROM SOURCE {} AND ANNOX {} FOR TASK {} AT {}'.format( env['source'], ', '.join(env['annox'].keys()), env['task'], time.strftime("%Y-%m-%dT%H-%M-%S", time.gmtime()) ), 'INFO' if time_reset else 'NORMAL', ) if time_reset: lafapi.stamp.reset() self.localnames = '\n'.join('''{key} = {{var}}.api['{key}']'''.format(key=key) for key in self.api) self.llocalnames = '\n'.join('''if '{key}' not in locals(): {key} = dict()\n{key}['{{biblang}}'] = {{var}}.api['{key}']'''.format(key=key) for key in self.api) self.lafapi.stamp.set_verbose(verbose) return self.api
[docs] def load_again(self, load_spec, annox=None, add=False, compile_main=False, compile_annox=False, verbose='NORMAL'): env = self.lafapi.names.env new_annox = annox if add: if annox == None or annox == '' or annox == env['empty'] or annox == [] or annox == {}: new_annox = env['annox'] else: new_annox = list(env['annox'].keys()) + [annox] else: if annox == None or annox == '' or annox == env['empty'] or annox == [] or annox == {}: new_annox = [] else: new_annox = [annox] x = self.load(env['source'], new_annox, env['task'], load_spec, add, compile_main=compile_main, compile_annox=compile_annox, verbose=verbose, time_reset=False) return x
[docs] def resolve_feature(self, kind, feature_given): lafapi = self.lafapi all_features = lafapi.all_features_index stamp = lafapi.stamp dkind = kind[0] if dkind not in all_features: raise FabricError("No features of kind {} in LAF resource".format(kind), stamp) (aspace, feature_raw) = feature_given.split(':', 1) if ':' in feature_given else (None, feature_given) (alabel, fname) = feature_raw.split('.', 1) if '.' in feature_raw else (None, feature_raw) if fname not in all_features[dkind]: raise FabricError("No such feature in LAF resource: {}".format(fname), stamp) hits = [] candidates = all_features[dkind][fname] for (aspacec, alabelc) in candidates: if (aspace == None or aspace == aspacec) and (alabel == None or alabelc == alabel): hits.append((aspacec, alabelc)) if not hits: raise FabricError("No feature in LAF resource: {}{}{}".format((aspace+':') if aspace != None else '', (alabel+'.') if alabel != None else '', fname), stamp) hit = hits[-1] the_feature = (hit[0], hit[1], fname) if len(hits) > 1: stamp.Imsg("Feature {}{}{} may mean any of {}. Choosing {}".format( (aspace+':') if aspace != None else '', (alabel+'.') if alabel != None else '', fname, ', '.join("{}:{}.{}".format(fc[0], fc[1], fname) for fc in hits), "{}:{}.{}".format(*the_feature), )) return the_feature
def _request_features(self, feat_spec, req_items, add): lafapi = self.lafapi env = lafapi.names.env all_features = lafapi.all_features_index stamp = lafapi.stamp the_features = collections.defaultdict(lambda: set()) if type(feat_spec) == dict: for aspace in feat_spec: for kind in feat_spec[aspace]: for line in feat_spec[aspace][kind]: (alabel, fnamestring) = line.split('.') if '.' in line else (None, line) fnames = fnamestring.split(',') for fname in fnames: the_features[kind].add((aspace, alabel, fname)) else: for (kind, index) in (("node", 0), ("edge", 1)): feature_list = feat_spec[index] features = feature_list.split() for line in features: the_features[kind].add(self.resolve_feature(kind, line)) for kind in the_features: dkind = kind[0] if dkind not in all_features: raise FabricError("No features of kind {} in LAF resource".format(kind), stamp) for (aspace, alabel, fname) in the_features[kind]: if fname not in all_features[dkind]: raise FabricError("No such feature in LAF resource: {}".format(fname), stamp) for origin in ['m'] + ['a{}'.format(anx) for anx in env['annox']]: osep = ':' if origin[0] == 'a' else '' if origin in lafapi.all_features_origin[dkind][(aspace, alabel, fname)]: req_items['{}F{}0'.format(origin+osep, dkind)].append((aspace, alabel, fname)) if dkind == 'e': for ddir in ('f', 'b'): req_items['{}C0{}'.format(origin+osep, ddir)].append((aspace, alabel, fname))