Source code for emdros2laf.settings

import os
import sys
import collections
import glob

import configparser
import argparse

NAME = 'LAF-Fabric'
VERSION = '4.8.3'
APIREF = 'http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html'
DEFAULT_DATA_DIR = 'laf-fabric-data'
MAIN_CFG = 'laf-fabric.cfg'
ALL_PARTS = ['monad', 'section', 'lingo']

[docs]class Settings: ''' Stores configuration information from the main configuration file and the command line. Defines an extra function in order to get the items in a section as a dictionary, without getting the DEFAULT items as wel ''' _myconfig = { 'my_name': NAME, 'version': VERSION, } _env_def = { 'my_name': '{my_name}', 'version': '{version}', 'template_dir': '{script_dir}/templates', 'xml_dir': '{script_dir}/xml', 'source': '{source}', 'meta_info': '{data_dir}/{source}/config/main.cfg', 'feature_info': '{data_dir}/{source}/config/ObjectsFeaturesValues.txt', 'feature_plain_info': '{data_dir}/{source}/config/ObjectsFeatures.csv', 'object_info': '{data_dir}/{source}/config/Objects.txt', 'raw_emdros_dir': '{data_dir}/{source}/raw', 'source_data': '{data_dir}/{source}/mql/{source}', 'query_dst_dir': '{data_dir}/{source}/mql', 'result_dir': '{data_dir}/{source}/laf', 'annot_hdr': '{data_dir}/{source}/laf/{source}', 'primary_text': '{data_dir}/{source}/laf/{source}.txt', 'primary_hdr_txt': '{data_dir}/{source}/laf/{source}.txt.hdr', 'resource_hdr_txt': '{data_dir}/{source}/laf/{source}.hdr', 'monad_index': '{data_dir}/{source}/laf/{source}.lst', 'decl_dst_dir': '{data_dir}/{source}/decl', } _metaconfig = { 'my_name': NAME, 'version': VERSION, 'ISOcatprefix': 'http://www.isocat.org/datcat/DC-', 'DANSpidprefix': 'http://persistent-identifier/?identifier=', } _meta_def = { 'my_name': '{my_name}', 'version': '{version}', 'source': '{source}', 'ISOcatprefix': '{ISOcatprefix}', 'DANSpidprefix': '{DANSpidprefix}', 'danspid_act': '{DANSpidprefix}{danspid_urn}', 'publicationdate': '{publicationdate}', 'danspid_urn': '{danspid_urn}', 'annot_method': 'conversion script {my_name} {version}', 'annot_resp': '{annot_resp}', 'primary': '{primary}', 'trailer': '{trailer}', 'verse_newline': '{verse_newline}', 'annot_space_def': '{annot_space_def}', 'prim_creator': '{prim_creator}', 'res_creator': '{res_creator}', 'prim_title': '{prim_title}', 'res_title': '{res_title}', 'prim_source_title': '{prim_source_title}', 'prim_source_author': '{prim_source_author}', 'prim_source_publisher':'{prim_source_publisher}', 'prim_source_date': '{prim_source_date}', 'prim_source_year': '{prim_source_year}', 'prim_source_place': '{prim_source_place}', 'prim_languages': '{prim_languages}', 'res_funder': '{res_funder}', 'res_respons_link': '{res_respons_link}', 'res_respons_name': '{res_respons_name}', 'res_distributor': '{res_distributor}', 'res_institute': '{res_institute}', 'res_email': '{res_email}', 'res_project_desc': '{res_project_desc}', 'res_sampling_desc': '{res_sampling_desc}', 'res_transduction': '{res_transduction}', 'res_correction': '{res_correction}', 'res_segmentation': '{res_segmentation}', } _laf_templates = { 'feature_decl': ('feature_decl.xml', False), 'feature': ('feature.xml', True), 'feature_local': ('feature_local.xml', True), 'feature_val': ('feature_val.xml', True), 'feature_val1': ('feature_sym.xml', True), 'feature_basic': ('feature_basic.xml', True), 'annotation_decl': ('annotation_decl.xml', True), 'annotation_item': ('annotation_item.xml', True), 'annotation_hdr': ('annotation_header.xml', False), 'annotation_label': ('annotation_label.xml', True), 'annotation_ftr': ('annotation_footer.xml', False), 'annotation_elem': ('annotation_element.xml', False), 'feature_elem': ('feature_element.xml', False), 'node_elem': ('node_element.xml', False), 'edge_elem': ('edge_element.xml', False), 'edgenode_elem': ('edgenode_element.xml', False), 'region_hdr': ('region_header.xml', False), 'region_elem': ('region_element.xml', False), 'resource_hdr': ('resource_header.xml', False), 'primary_hdr': ('primary_header.xml', False), 'dependency': ('dependency.xml', True), } _laf = { 'resource_header': 'requires', 'annotation_header': 'dependsOn', } _xml = { 'xmllint_cmd': 'xmllint --noout --nonet --schema {{schema}} {{xmlfile}}', 'xmllint_cat_env_var': 'XML_CATALOG_FILES', 'xmllint_cat_env_val': '{xml_dir}/xmllint_cat.xml', 'xlink_src': '{xml_dir}/xlink.xsd', 'xlink_dst': '{decl_dst_dir}/xlink.xsd', 'xml_src': '{xml_dir}/xml.xsd', 'xml_dst': '{decl_dst_dir}/xml.xsd', 'xml_isofs_src': '{xml_dir}/xml-isofs.xsd', 'xml_isofs_dst': '{decl_dst_dir}/xml-isofs.xsd', 'graf_annot_src': '{xml_dir}/graf-standoff.xsd', 'graf_annot_dst': '{decl_dst_dir}/graf-standoff.xsd', 'graf_resource_src': '{xml_dir}/graf-resource.xsd', 'graf_resource_dst': '{decl_dst_dir}/graf-resource.xsd', 'graf_document_src': '{xml_dir}/graf-document.xsd', 'graf_document_dst': '{decl_dst_dir}/graf-document.xsd', 'tei_fs_src': '{xml_dir}/isofs_dcr.xsd', 'tei_fs_dst': '{decl_dst_dir}/isofs_dcr.xsd', 'dcr_src': '{xml_dir}/dcr.xsd', 'dcr_dst': '{decl_dst_dir}/dcr.xsd', } _parts = { 'monad': { 'raw_text': '{raw_emdros_dir}/monad.txt', 'object_type': 'word', 'make_index': '', 'do_primary': '', 'separate_node_file':'', }, 'section': { 'raw_text': '{raw_emdros_dir}/section.txt', 'use_index': '', 'find_embedding': '', 'hierarchy': 'book chapter verse half_verse', }, 'lingo': { 'raw_text': '{raw_emdros_dir}/lingo.txt', 'separate_node_file':'', 'no_monad_nodes': '', }, } _annotation_kind = { 'monad': 'minimal objects&text&', 'section': 'section objects&text&', 'lingo': 'linguistic objects&text&', 'reference': 'linguistic relationships&fsDecl&decl/ft.xml', 'ft': 'linguistic features&fsDecl&decl/ft.xml', 'sft': 'sectional features&fsDecl&decl/sft.xml', 'db': 'database features&fsDecl&decl/db.xml', } _annotation_regions = { 'name': 'region', 'word': 'w', 'punct': 'p', 'section': 's', } _annotation_skip_object = { 'lingo': 'word', } annotation_skip = set(('self',)) _annotation_label = { 'section_label': 'sft', 'lingo_label': 'ft', 'monad_label': 'ft', 'db_label': 'db', } _type_mapping = { 'string': 'string', 'ascii': 'string', 'integer': 'numeric&value="0" max="100000000"', 'enum': 'symbol', 'boolean': 'binary', 'reference': 'string', } _type_boolean = { 't': 'false', 'f': 'true', } laf_switches = set(('comment_local_deps',)) _file_types = collections.OrderedDict(( ('f.hdr', '.hdr&xml'), ('f.primary.hdr', '.text.hdr&xml'), ('f.primary', '.txt&text'), ('f_monad.region', '_regions.xml&xml'), ('f_monad', '_monads.xml&xml&db&f_monad.region'), ('f_lingo', '_lingo.xml&xml&db&f_monad'), ('f_section', '_sections.xml&xml&db sft'), ('f_monad.*', '_monads.{{subpart}}.xml&xml&ft&f_monad'), ('f_lingo.*', '_lingo.{{subpart}}.xml&xml&ft&f_lingo'), ))
[docs] def flag(self, name): return getattr(self.args, name)
def __init__(self): print('This is {} {}\n{}'.format(NAME, VERSION, APIREF)) strings = configparser.ConfigParser(inline_comment_prefixes=('#')) script_dir = os.path.dirname(os.path.abspath(__file__)) home_dir = os.path.expanduser('~') global_config_dir = "{}/{}".format(home_dir, DEFAULT_DATA_DIR) global_config_path = "{}/{}".format(global_config_dir, MAIN_CFG) local_config_path = MAIN_CFG default_data_dir = global_config_dir default_laf_dir = global_config_dir config_data_dir = None config_laf_dir = None config_output_dir = None the_config_path = None for config_path in (local_config_path, global_config_path): if os.path.exists(config_path): the_config_path = config_path if the_config_path != None: with open(the_config_path, "r", encoding="utf-8") as f: strings.read_file(f) if 'locations' in strings: if 'data_dir' in strings['locations']: config_data_dir = strings['locations']['data_dir'] if 'laf_dir' in strings['locations']: config_laf_dir = strings['locations']['laf_dir'] if 'output_dir' in strings['locations']: config_output_dir = strings['locations']['output_dir'] the_data_dir = config_data_dir or default_data_dir the_laf_dir = config_laf_dir or the_data_dir the_output_dir = config_output_dir the_data_dir = \ the_data_dir.replace('.', cw_dir, 1) if the_data_dir.startswith('.') else the_data_dir.replace('~', home_dir, 1) if the_data_dir.startswith('~') else the_data_dir the_laf_dir = \ the_laf_dir.replace('.', cw_dir, 1) if the_laf_dir.startswith('.') else the_laf_dir.replace('~', home_dir, 1) if the_laf_dir.startswith('~') else the_laf_dir the_output_dir = \ the_output_dir.replace('.', cw_dir, 1) if the_output_dir.startswith('.') else the_output_dir.replace('~', home_dir, 1) if the_output_dir.startswith('~') else the_output_dir sources = [os.path.basename(x) for x in glob.glob("{}/*".format(the_data_dir)) if os.path.isdir(x)] self._myconfig['data_dir'] = the_data_dir self._myconfig['home_dir'] = home_dir self._myconfig['script_dir'] = script_dir argsparser = argparse.ArgumentParser(description = 'Conversion of Emdros to LAF') argsparser.add_argument( '--source', nargs = 1, type = str, choices = sources, metavar = 'Source', help = 'Source selection for conversion', ) argsparser.add_argument( '--parts', nargs = '*', type = str, choices = ALL_PARTS + ['all', 'none'], metavar = 'Kind', help = 'task in conversion process', ) argsparser.add_argument( "--raw", action = "store_true", help = "retrieve raw data from Emdros", ) argsparser.add_argument( "--validate", action = "store_true", help = "validate genrated xml files against their schemas", ) argsparser.add_argument( "--fdecls-only", dest = 'fdecls_only', default = False, action = "store_true", help = "only generate feature declaration file, nothing else", ) argsparser.add_argument( "--limit", dest = 'limit', type = int, metavar = 'Limit', help = "limit to the first N monads", ) self.args = argsparser.parse_args() self.given_parts = collections.OrderedDict() for arg in self.args.parts: if arg == 'none' or self.args.fdecls_only: for a in ALL_PARTS: if a in self.given_parts: del self.given_parts[a] elif arg == 'all': for a in ALL_PARTS: self.given_parts[a] = True else: self.given_parts[arg] = True source = self.args.source[0] self.env = dict((e, v.format(source=source, **self._myconfig)) for (e,v) in self._env_def.items()) with open(self.env['meta_info'], "r", encoding="utf-8") as f: strings.read_file(f) self._metaconfig.update(strings['meta'] if 'meta' in strings else {}) self.meta = dict((e, v.format(source=source, **self._metaconfig).replace('\\n','\n')) for (e,v) in self._meta_def.items()) self._myconfig.update(self.env) self.laf_templates = dict((e, (v[0].format(**self._myconfig), v[1])) for (e,v) in self._laf_templates.items()) self.laf = dict((e, v.format(**self._myconfig)) for (e,v) in self._laf.items()) self.xml = dict((e, v.format(**self._myconfig)) for (e,v) in self._xml.items()) self.parts = {} for p in self._parts: self.parts[p] = {} self.parts[p] = dict((e, v.format(**self._myconfig)) for (e,v) in self._parts[p].items()) self.annotation_kind = dict((e, v.format(**self._myconfig)) for (e,v) in self._annotation_kind.items()) self.annotation_regions = dict((e, v.format(**self._myconfig)) for (e,v) in self._annotation_regions.items()) self.annotation_skip_object = dict((e, v.format(**self._myconfig)) for (e,v) in self._annotation_skip_object.items()) self.annotation_label = dict((e, v.format(**self._myconfig)) for (e,v) in self._annotation_label.items()) self.type_mapping = dict((e, v.format(**self._myconfig)) for (e,v) in self._type_mapping.items()) self.type_boolean = dict((e, v.format(**self._myconfig)) for (e,v) in self._type_boolean.items()) self.file_types = collections.OrderedDict((e, v.format(**self._myconfig)) for (e,v) in self._file_types.items())