import os
import glob
import collections
import functools
import time
from .lib import make_array_inverse
from .names import Names, FabricError
from .data import LafData
from .elements import Feature, Connection, XMLid, PrimaryData
[docs]class LafAPI(LafData):
'''Makes all API methods available.
``API()`` returns a dict keyed by mnemonics and valued by API methods.
'''
def __init__(self, names):
self.api = {}
self.names = names
self.stamp = names.stamp
LafData.__init__(self)
self.result_files = []
[docs] def API(self):
self._api_fcxp()
self._api_nodes()
self._api_edges()
self._api_io()
self._api_prep()
return self.api
[docs] def APIprep(self):
self._api_post()
return self.api
[docs] def get_all_features(self):
env = self.names.env
loadables = set()
for feat_path in glob.glob('{}/*'.format(env['m_compiled_dir'])):
filename = os.path.basename(feat_path)
if filename.startswith(('_', 'A', 'Z')): continue
loadables.add('m{}'.format(filename))
for anx in env['annox']:
for feat_path in glob.glob('{}/*'.format(env['annox'][anx]['a_compiled_dir'])):
filename = os.path.basename(feat_path)
if filename.startswith('_'): continue
loadables.add('a{}:{}'.format(anx, filename))
self.all_features = collections.defaultdict(lambda: collections.defaultdict(lambda: set()))
self.all_features_index = collections.defaultdict(lambda: collections.defaultdict(lambda: []))
self.all_features_origin = collections.defaultdict(lambda: collections.defaultdict(lambda: set()))
for filename in loadables:
(dorigin, dgroup, dkind, ddir, dcomps) = Names.decomp_full(filename)
if dgroup != 'F': continue
(namespace, label, name) = dcomps
self.all_features[dkind][namespace].add("{}.{}".format(label, name))
self.all_features_index[dkind][name].append((namespace, label))
self.all_features_origin[dkind][(namespace, label, name)].add(dorigin)
if 'e' in self.all_features:
for fname in ('x', 'y'):
self.all_features['e']['laf'].add('{}.{}'.format('', fname))
self.all_features_index['e'][fname].append(('laf', ''))
self.all_features_origin['e'][('laf', '', fname)].add(dorigin)
def _api_fcxp(self):
data_items = self.data_items
api = {
'F': Bunch(),
'FE': Bunch(),
'C': Bunch(),
'Ci': Bunch(),
}
features = {'n': set(), 'e': set()}
connections = {'f': set(), 'b': set()}
xmlmaps = {'n': set(), 'e': set()}
for dkey in data_items:
(dorigin, dgroup, dkind, ddir, dcomps) = Names.decomp_full(dkey)
if dgroup == 'F': features[dkind].add(dcomps)
elif dgroup == 'C': connections[ddir].add(dcomps)
elif dgroup == 'X': xmlmaps[dkind].add(dcomps)
elif dgroup == 'P' and dcomps[0] == 'primary_data': api['P'] = PrimaryData(self)
self.feature_abbs = collections.defaultdict(lambda: set())
self.feature_abb = {}
for kind in sorted(features):
for feat in sorted(features[kind]):
name = Names.apiname(feat)
for abb in (Names.apiname(feat[1:]), Names.apiname(feat[2:])):
if abb:
self.feature_abbs[abb].add(name)
self.feature_abb[abb] = name
for abb in self.feature_abbs:
expansions = self.feature_abbs[abb]
chosen = self.feature_abb[abb]
if len(expansions) > 1:
self.stamp.Imsg("Feature {} refers to {}, not to {}".format(abb, chosen, ', '.join(sorted(expansions - set([chosen])))))
for kind in features:
for feat in features[kind]:
name = Names.apiname(feat)
obj = Feature(self, feat, kind)
dest = api['FE'] if kind == 'e' else api['F']
dest.item[name] = obj
setattr(dest, name, obj)
for abb in (Names.apiname(feat[1:]), Names.apiname(feat[2:])):
if abb and self.feature_abb.get(abb, '') == name:
setattr(dest, abb, obj)
dest.item[abb] = obj
for inv in connections:
for feat in connections[inv]:
name = Names.apiname(feat)
obj = Connection(self, feat, inv)
dest = api['C'] if inv == 'f' else api['Ci'] if inv == 'b' else None
dest.item[name] = obj
setattr(dest, name, obj)
for abb in (Names.apiname(feat[1:]), Names.apiname(feat[2:])):
if abb and self.feature_abb.get(abb, '') == name:
setattr(dest, abb, obj)
dest.item[abb] = obj
for kind in xmlmaps:
for comp in xmlmaps[kind]:
obj = XMLid(self, kind)
dest = 'XE' if kind == 'e' else 'X'
api[dest] = obj
def feature_list(kind):
result = []
for namespace in sorted(self.all_features[kind]):
result.append((namespace, sorted(self.all_features[kind][namespace])))
return result
def pretty_fl(flist):
result = []
for ((namespace, features)) in flist:
result.append('{}:'.format(namespace))
for feature in features:
result.append('\t{}:'.format(feature))
return '\n'.join(result)
api.update({
'F_all': feature_list('n'),
'fF_all': pretty_fl(feature_list('n')),
'FE_all': feature_list('e'),
'fFE_all': pretty_fl(feature_list('e')),
})
self.api.update(api)
def _api_prep(self):
api = self.api
api['make_array_inverse'] = make_array_inverse
api['data_items'] = self.data_items
def _api_post(self):
(self.prepare_init)(self)
def _api_edges(self):
data_items = self.data_items
edges_from = data_items[Names.comp('mG00', ('edges_from',))]
edges_to = data_items[Names.comp('mG00', ('edges_to',))]
def next_edge():
for e in range(len(edges_from)):
yield (e, edges_from[e], edges_to[e])
self.api.update({
'EE': next_edge,
})
def _api_nodes(self):
data_items = self.data_items
node_anchor_min = data_items[Names.comp('mG00', ('node_anchor_min',))]
node_anchor_max = data_items[Names.comp('mG00', ('node_anchor_max',))]
def before(nodea, nodeb):
if node_anchor_min[nodea] == node_anchor_max[nodea] or node_anchor_min[nodeb] == node_anchor_max[nodeb]: return None
if node_anchor_min[nodea] < node_anchor_min[nodeb]: return True
if node_anchor_min[nodea] > node_anchor_min[nodeb]: return False
if node_anchor_max[nodea] > node_anchor_max[nodeb]: return True
if node_anchor_max[nodea] < node_anchor_max[nodeb]: return False
return None
def node_sort_key(node): return data_items[Names.comp('mG00', ('node_sort_inv',))][node]
def msetbefore(sa,sb):
if sa == sb: return 0
if sa <= sb: return 1
if sb <= sa: return -1
am = min(sa - sb)
bm = min(sb - sa)
return -1 if am < bm else 1 if bm < am else None
msetkey = functools.cmp_to_key(msetbefore)
def next_node(nodes=None, test=None, value=None, values=None, extrakey=None):
class Extra_key(object):
__slots__ = ['value', 'amin', 'amax']
def __init__(self, node):
self.amin = node_anchor_min[node] - 1
self.amax = node_anchor_max[node] - 1
self.value = extrakey(node)
def __lt__(self, other):
return (
self.amin == other.amin and
self.amax == other.amax and
self.value < other.value
)
def __gt__(self, other):
return (
self.amin == other.amin and
self.amax == other.amax and
self.value > other.value
)
def __eq__(self, other):
return (
self.amin != other.amin or
self.amax != other.amax or
self.value == other.value
)
__hash__ = None
order = data_items[Names.comp('mG00', ('node_sort',))]
order_key = data_items[Names.comp('mG00', ('node_sort_inv',))]
the_nodes = sorted(nodes, key=lambda x: order_key[x]) if nodes else order
if extrakey != None:
self.stamp.Imsg("Resorting {} nodes...".format(len(the_nodes)))
the_nodes = sorted(the_nodes, key=Extra_key)
self.stamp.Imsg("Done")
if test != None:
test_values = set(([value] if value != None else []) + (list(values) if values != None else []))
if len(test_values):
for node in the_nodes:
if test(node) in test_values: yield node
else:
for node in the_nodes:
if test(node): yield node
else:
for node in the_nodes: yield node
def no_next_event(key=None, simplify=None):
raise FabricError("Node events not available because primary data is not loaded.", self.stamp)
return None
def next_event(key=None, simplify=None):
class Additional_key(object):
__slots__ = ['value', 'kind', 'amin', 'amax']
def __init__(self, event):
(node, kind) = event
self.amin = node_anchor_min[node] - 1
self.amax = node_anchor_max[node] - 1
self.value = key(node) * (-1 if kind < 2 else 1)
self.kind = kind
def __lt__(self, other):
return (
self.amin == other.amin and
self.amax == other.amax and
(self.kind == other.kind or self.amin == self.amax) and
self.value < other.value
)
def __gt__(self, other):
return (
self.amin == other.amin and
self.amax == other.amax and
(self.kind == other.kind or self.amin == self.amax) and
self.value > other.value
)
def __eq__(self, other):
return (
self.amin != other.amin or
self.amax != other.amax or
(self.kind != other.kind and (self.amin != self.amax or other.amin != other.amax)) or
self.value == other.value
)
__hash__ = None
nodes = data_items[Names.comp('mP00', ('node_events_n',))]
kinds = data_items[Names.comp('mP00', ('node_events_k',))]
node_events = data_items[Names.comp('mP00', ('node_events',))]
node_events_items = data_items[Names.comp('mP00', ('node_events_items',))]
bufferevents = collections.deque([(-1, [])], 2)
active = {}
for anchor in range(len(node_events)):
event_ids = self._getitems(node_events, node_events_items, anchor)
if len(event_ids) == 0: continue
eventset = []
for event_id in event_ids:
node = nodes[event_id]
if key == None or key(node) != None: eventset.append((nodes[event_id], kinds[event_id]))
if not eventset: continue
if key != None: eventset = sorted(eventset, key=Additional_key)
if simplify == None:
yield (anchor, eventset)
continue
bufferevents.append([anchor, eventset])
if bufferevents[0][0] == -1: continue
(this_anchor, these_events) = bufferevents[0]
(next_anchor, next_events) = bufferevents[1]
deleted = {}
for (n, kind) in these_events:
if simplify(n):
if kind == 3: deleted[n] = None
elif kind == 2: active[n] = False
elif kind == 1: active[n] = True
elif kind == 0: active[n] = True
for n in deleted:
if n in active: del active[n]
if True not in active.values():
weed = collections.defaultdict(lambda: False)
for (n, k) in these_events:
if k == 2: weed[n] = None
for (n, k) in next_events:
if k == 1:
if n in weed: weed[n] = True
if True in weed.values():
bufferevents[0][1] = [(n, k) for (n, k) in these_events if not (k == 2 and weed[n])]
bufferevents[1][1] = [(n, k) for (n, k) in next_events if not (k == 1 and weed[n])]
yield (bufferevents[0])
yield (bufferevents[0])
self.api.update({
'BF': before,
'NN': next_node,
'NE': next_event if Names.comp('mP00', ('node_events',)) in data_items else no_next_event,
'NK': node_sort_key,
'MK': msetkey,
})
def _api_io(self):
def _inf(msg, newline=True, withtime=True, verbose=None):
self.stamp.raw_msg(msg, newline=newline, withtime=withtime, verbose=verbose, error=False)
def _msg(msg, newline=True, withtime=True, verbose=None):
self.stamp.raw_msg(msg, newline=newline, withtime=withtime, verbose=verbose, error=True)
task_dir = self.names.env['task_dir']
def add_output(file_name):
result_file = "{}/{}".format(task_dir, file_name)
handle = open(result_file, "w", encoding="utf-8")
self.result_files.append(handle)
return handle
def add_input(file_name):
result_file = "{}/{}".format(task_dir, file_name)
handle = open(result_file, "r", encoding="utf-8")
self.result_files.append(handle)
return handle
def result(file_name=None):
if file_name == None: return task_dir
else: return "{}/{}".format(task_dir, file_name)
api = {
'infile': add_input,
'outfile': add_output,
'close': self.finish_task,
'my_file': result,
'msg': _msg,
'inf': _inf,
'data_dir': self.names.env['data_dir'],
'output_dir': self.names.env['output_dir'],
}
self.api.update(api)
def _getitems(self, data, data_items, elem):
data_items_index = data[elem]
n_items = data_items[data_items_index]
return data_items[data_items_index + 1:data_items_index + 1 + n_items]
def __del__(self):
for handle in self.result_files:
if handle and not handle.closed: handle.close()
LafData.__del__(self)
[docs]class Bunch(object):
def __init__(self): self.item = {}
[docs]class LafFabric(object):
'''Process manager.
``load(params)``: given the source, annox and task, loads the data, assembles the API, and returns the API.
'''
def __init__(self, data_dir=None, laf_dir=None, output_dir=None, save=False, verbose=None):
self.lafapi = LafAPI(Names(data_dir, laf_dir, output_dir, save, verbose))
self.lafapi.stamp.reset()
self.api = {}
[docs] def load(self, source, annox, task, load_spec, add=False, compile_main=False, compile_annox=False, verbose='NORMAL', time_reset=True):
self.api.clear()
lafapi = self.lafapi
self.api['fabric'] = self
if time_reset: lafapi.stamp.reset()
Names.check_load_spec(load_spec, lafapi.stamp)
self.lafapi.stamp.set_verbose(verbose)
lafapi.stamp.Nmsg("LOADING API{}: please wait ... ".format(' with EXTRAs' if add else ''))
lafapi.names.setenv(source=source, annox=annox, task=task)
lafapi.names.set_annox()
env = lafapi.names.env
lafapi.prepare_dirs(env['annox'])
lafapi.compile_all({'m': compile_main, 'a': compile_annox})
req_items = {}
lafapi.names.request_init(req_items)
lafapi.get_all_features()
if 'primary' in load_spec and load_spec['primary']: req_items['mP00'] = True
if 'xmlids' in load_spec:
for kind in [k[0] for k in load_spec['xmlids'] if load_spec['xmlids'][k]]:
for ddir in ('f', 'b'): req_items['mX{}{}'.format(kind, ddir)].append(())
if 'features' in load_spec: self._request_features(load_spec['features'], req_items, add)
prep = load_spec['prepare'] if 'prepare' in load_spec else (lafapi.prepare_dict, lafapi.prepare_init) if add else ({}, None)
lafapi.load_all(req_items, prep, add)
lafapi.add_logfile()
self.api.update(lafapi.API())
if 'prepare' in load_spec:
lafapi.stamp.Imsg("LOADING PREPARED data: please wait ... ")
lafapi.prepare_all(self.api)
self.api.update(lafapi.APIprep())
lafapi.stamp.Imsg("LOADED PREPARED data")
lafapi.stamp.Smsg(
'DATA LOADED FROM SOURCE {} AND ANNOX {} FOR TASK {} AT {}'.format(
env['source'], ', '.join(env['annox'].keys()), env['task'], time.strftime("%Y-%m-%dT%H-%M-%S", time.gmtime())
),
'INFO' if time_reset else 'NORMAL',
)
if time_reset: lafapi.stamp.reset()
self.localnames = '\n'.join('''{key} = {{var}}.api['{key}']'''.format(key=key) for key in self.api)
self.llocalnames = '\n'.join('''if '{key}' not in locals(): {key} = dict()\n{key}['{{biblang}}'] = {{var}}.api['{key}']'''.format(key=key) for key in self.api)
self.lafapi.stamp.set_verbose(verbose)
return self.api
[docs] def load_again(self, load_spec, annox=None, add=False, compile_main=False, compile_annox=False, verbose='NORMAL'):
env = self.lafapi.names.env
new_annox = annox
if add:
if annox == None or annox == '' or annox == env['empty'] or annox == [] or annox == {}: new_annox = env['annox']
else: new_annox = list(env['annox'].keys()) + [annox]
else:
if annox == None or annox == '' or annox == env['empty'] or annox == [] or annox == {}: new_annox = []
else: new_annox = [annox]
x = self.load(env['source'], new_annox, env['task'], load_spec, add, compile_main=compile_main, compile_annox=compile_annox, verbose=verbose, time_reset=False)
return x
[docs] def resolve_feature(self, kind, feature_given):
lafapi = self.lafapi
all_features = lafapi.all_features_index
stamp = lafapi.stamp
dkind = kind[0]
if dkind not in all_features: raise FabricError("No features of kind {} in LAF resource".format(kind), stamp)
(aspace, feature_raw) = feature_given.split(':', 1) if ':' in feature_given else (None, feature_given)
(alabel, fname) = feature_raw.split('.', 1) if '.' in feature_raw else (None, feature_raw)
if fname not in all_features[dkind]: raise FabricError("No such feature in LAF resource: {}".format(fname), stamp)
hits = []
candidates = all_features[dkind][fname]
for (aspacec, alabelc) in candidates:
if (aspace == None or aspace == aspacec) and (alabel == None or alabelc == alabel): hits.append((aspacec, alabelc))
if not hits: raise FabricError("No feature in LAF resource: {}{}{}".format((aspace+':') if aspace != None else '', (alabel+'.') if alabel != None else '', fname), stamp)
hit = hits[-1]
the_feature = (hit[0], hit[1], fname)
if len(hits) > 1:
stamp.Imsg("Feature {}{}{} may mean any of {}. Choosing {}".format(
(aspace+':') if aspace != None else '',
(alabel+'.') if alabel != None else '',
fname,
', '.join("{}:{}.{}".format(fc[0], fc[1], fname) for fc in hits),
"{}:{}.{}".format(*the_feature),
))
return the_feature
def _request_features(self, feat_spec, req_items, add):
lafapi = self.lafapi
env = lafapi.names.env
all_features = lafapi.all_features_index
stamp = lafapi.stamp
the_features = collections.defaultdict(lambda: set())
if type(feat_spec) == dict:
for aspace in feat_spec:
for kind in feat_spec[aspace]:
for line in feat_spec[aspace][kind]:
(alabel, fnamestring) = line.split('.') if '.' in line else (None, line)
fnames = fnamestring.split(',')
for fname in fnames:
the_features[kind].add((aspace, alabel, fname))
else:
for (kind, index) in (("node", 0), ("edge", 1)):
feature_list = feat_spec[index]
features = feature_list.split()
for line in features:
the_features[kind].add(self.resolve_feature(kind, line))
for kind in the_features:
dkind = kind[0]
if dkind not in all_features: raise FabricError("No features of kind {} in LAF resource".format(kind), stamp)
for (aspace, alabel, fname) in the_features[kind]:
if fname not in all_features[dkind]: raise FabricError("No such feature in LAF resource: {}".format(fname), stamp)
for origin in ['m'] + ['a{}'.format(anx) for anx in env['annox']]:
osep = ':' if origin[0] == 'a' else ''
if origin in lafapi.all_features_origin[dkind][(aspace, alabel, fname)]:
req_items['{}F{}0'.format(origin+osep, dkind)].append((aspace, alabel, fname))
if dkind == 'e':
for ddir in ('f', 'b'): req_items['{}C0{}'.format(origin+osep, ddir)].append((aspace, alabel, fname))