Module gatenlp.gateslave

Module for interacting with a Java GATE process, running API commands on it and exchanging data with it.

Expand source code
#!/usr/bin/env python
"""
Module for interacting with a Java GATE process, running API commands on it and
exchanging data with it.
"""

import sys
import subprocess
import os
import platform as sysplatform
import logging
import atexit
import secrets
import argparse
import signal
import glob

# NOTE: we delay imporint py4j to the class initializer. This allows us to make GateSlave available via gatenlp
# but does not force everyone to actually have py4j installed if they do not use the GateSlave
# from py4j.java_gateway import JavaGateway, GatewayParameters
from gatenlp import Document
from gatenlp.utils import init_logger

JARVERSION = "1.0"

logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def classpath_sep(platform=None):
    """

    Args:
      platform:  (Default value = None)

    Returns:
      :return: classpath separator character

    """
    if not platform:
        myplatform = sysplatform.system()
        if not myplatform:
            raise Exception("Could not determine operating system, please use platform parameter")
        platform = myplatform
    if platform.lower() == "windows" or platform.lower() == "win":
        return ";"
    else:
        return ":"


def gate_classpath(gatehome, platform=None):
    """Return the GATE classpath components as a string, with the element seperator characters appropriate
    for the operating system.

    Args:
      gatehome: where GATE is installed, either as a cloned git repo or a downloaded installation dir.
      platform:  (Default value = None)

    Returns:
      GATE classpath

    """
    # check which kind of GATE home we have: if there is a distro subdirectory, assume cloned git repo
    if not os.path.exists(gatehome):
        raise Exception("GATE home directory does not exist: {}".format(gatehome))
    if not os.path.isdir(gatehome):
        raise Exception("GATE home directory does not a directory: {}".format(gatehome))
    cpsep = classpath_sep(platform)
    cpfile = os.path.join(gatehome, "gate.classpath")
    bindir = os.path.join(gatehome, "bin")
    # logger.info("DEBUG checking for {}".format(cpfile))
    if os.path.exists(cpfile):
        if not os.path.exists(cpfile):
            raise Exception("File not found {}, distribution may need compiling".format(cpfile))
        with open(cpfile, "rt", encoding="utf-8") as fp:
            cp = fp.read()
            return cp + cpsep + bindir
    else:
        # logger.info("DEBUG {} does not exist".format(cpfile))
        libdir = os.path.join(gatehome, "lib")
        bindir = os.path.join(gatehome, "bin")
        if not os.path.isdir(libdir):
            raise Exception("Could not determine class path from {}, no lib directory".format(gatehome))
        jars = glob.glob(os.path.join(libdir, "*.jar"))
        libcp = cpsep.join(jars)

        return libcp + cpsep + bindir


def start_gate_slave(
        port=25333,
        host="127.0.0.1",
        auth_token=None,
        use_auth_token=True,
        java="java",
        platform=None,
        gatehome=None,
        log_actions=False,
        keep=False,
        debug=False,
):
    """

    Args:
      port:  (Default value = 25333)
      host:  (Default value = "127.0.0.1")
      auth_token:  (Default value = None)
      use_auth_token:  (Default value = True)
      java:  (Default value = "java")
      platform:  (Default value = None)
      gatehome:  (Default value = None)
      log_actions:  (Default value = False)
      keep:  (Default value = False)
      debug: (Default valuye = False) Show debug messages.

    Returns:

    """
    logger = init_logger(__name__)
    if debug:
        logger.setLevel(logging.DEBUG)

    if gatehome is None:
        gatehome = os.environ.get("GATE_HOME")
        if gatehome is None:
            raise Exception("Parameter gatehome is None and environment var GATE_HOME not set")
    if use_auth_token:
        if not auth_token:
            auth_token = secrets.token_urlsafe(20)
        else:
            auth_token = auth_token
    else:
        auth_token = ""
    if log_actions:
        log_actions = "1"
    else:
        log_actions = "0"
    if keep:
        keep = "1"
    else:
        keep = "0"
    logger.debug(f"Starting gate slave, gatehome={gatehome}, auth_token={auth_token}, log_actions={log_actions}, keep={keep}")
    jarloc = os.path.join(os.path.dirname(__file__), "_jars", f"gatetools-gatenlpslave-{JARVERSION}.jar")
    if not os.path.exists(jarloc):
        raise Exception("Could not find jar, {} does not exist".format(jarloc))
    logger.debug(f"Using JAR: {jarloc}")
    cmdandparms = [java, "-cp"]
    cpsep = classpath_sep(platform=platform)
    cmdandparms.append(jarloc + cpsep + gate_classpath(gatehome, platform=platform))
    cmdandparms.append("gate.tools.gatenlpslave.GatenlpSlave")
    cmdandparms.append(str(port))
    cmdandparms.append(host)
    cmdandparms.append(log_actions)
    cmdandparms.append(keep)
    os.environ["GATENLP_SLAVE_TOKEN_" + str(port)] = auth_token
    cmd = " ".join(cmdandparms)
    logger.debug(f"Running command: {cmd}")
    subproc = subprocess.Popen(cmdandparms, stderr=subprocess.PIPE, bufsize=0, encoding="utf-8")

    def shutdown():
        """ """
        subproc.send_signal(signal.SIGINT)
        for line in subproc.stderr:
            print(line, file=sys.stderr, end="")

    atexit.register(shutdown)
    while True:
        line = subproc.stderr.readline()
        if line == "":
            break
        line = line.rstrip("\n\r")
        if line == "PythonSlaveRunner.java: server start OK":
            break
        if line == "PythonSlaveRunner.java: server start NOT OK":
            raise Exception("Could not start server, giving up")
        print(line, file=sys.stderr)
    try:
        subproc.wait()
    except KeyboardInterrupt:
        print("Received keyboard interrupt, shutting down server...")
        shutdown()


class GateSlave:
    """ """
    def __init__(self, port=25333,
                 start=True,
                 java="java",
                 host="127.0.0.1",
                 gatehome=None,
                 platform=None,
                 auth_token=None,
                 use_auth_token=True,
                 log_actions=False,
                 keep=False,
                 debug=False,
                 ):
        """
        Create an instance of the GateSlave and either start our own Java GATE process for it to use
        (start=True) or connect to an existing one (start=False).

        After the GateSlave instance has been create successfully, it is possible to:

        * Use one of the methods of the instance to perform operations on the Java side or exchange data

        * use GateSlave.slave to invoke methods from the PythonSlave class on the Java side

        * use GateSlave.jvm to directly construct objects or call instance or static methods

        NOTE: the GATE process must not output anything important/big to stderr because everything from
        stderr gets captured and used for communication between the Java and Python processes. At least
        part of the output to stderr may only be passed on after the GATE process has ended.

        Example: ::

            gs = GateSlave()
            pipeline = gs.slave.loadPipelineFromFile("thePipeline.xgapp")
            doc = gs.slave.createDocument("Some document text")
            gs.slave.run4doc(pipeline,doc)
            pdoc = gs.gdoc2pdoc(doc)
            gs.slave.deleteResource(doc)
            # process the gatenlp Document pdoc ...

        :param port: port to use
        :param start: if True, try to start our own GATE process, otherwise expect an already started
           process at the host/port address
        :param java: path to the java binary to run or the java command to use from the PATH (for start=True)
        :param host: host an existing Java GATE process is running on (only relevant for start=False)
        :param gatehome: where GATE is installed (only relevant if start=True). If None, expects
               environment variable GATE_HOME to be set.
        :param platform: system platform we run on, one of Windows, Linux (also for MacOs) or Java
        :param auth_token: if None or "" and use_auth_token is True, generate a random token which
               is then accessible via the auth_token attribute, otherwise use the given auth token.
        :param use_auth_token: if False, do not use an auth token, otherwise either use the one specified
               via auth_token or generate a random one.
        :param log_actions: if the gate slave should log the actions it is doing
        :param keep: normally if gs.close() is called and we are not connected to the PythonSlaveLr,
               the slave will be shut down. If this is True, the gs.close() method does not shut down
               the slave.
        :param debug: show debug messages (default: False)
        """
        self.logger = init_logger(__name__)

        from py4j.java_gateway import JavaGateway, GatewayParameters

        self.gatehome = gatehome
        self.port = port
        self.host = host
        self.start = start
        self.gatehome = gatehome
        self.platform = platform
        self.gateprocess = None
        self.gateway = None
        self.slave = None
        self.closed = False
        self.keep = keep
        self.debug = debug
        self.log_actions = log_actions
        if use_auth_token:
            if not auth_token:
                self.auth_token = secrets.token_urlsafe(20)
            else:
                self.auth_token = auth_token
        else:
            self.auth_token = ""
        if gatehome is None and start:
            gatehome = os.environ.get("GATE_HOME")
            if gatehome is None:
                raise Exception("Parameter gatehome is None and environment var GATE_HOME not set")
            self.gatehome = gatehome
        if start:
            # make sure we find the jar we need
            # logger.info("DEBUG: file location: {}".format(__file__))
            jarloc = os.path.join(os.path.dirname(__file__), "_jars", f"gatetools-gatenlpslave-{JARVERSION}.jar")
            if not os.path.exists(jarloc):
                raise Exception("Could not find jar, {} does not exist".format(jarloc))
            cmdandparms = [java, "-cp"]
            cpsep = classpath_sep(platform=platform)
            cmdandparms.append(jarloc + cpsep + gate_classpath(self.gatehome, platform=platform))
            cmdandparms.append("gate.tools.gatenlpslave.GatenlpSlave")
            cmdandparms.append(str(port))
            cmdandparms.append(host)
            if log_actions:
                cmdandparms.append("1")
            else:
                cmdandparms.append("0")
            if keep:
                cmdandparms.append("1")
            else:
                cmdandparms.append("0")
            os.environ["GATENLP_SLAVE_TOKEN_"+str(self.port)] = self.auth_token
            cmd = " ".join(cmdandparms)
            self.logger.debug(f"Running command: {cmd}")
            subproc = subprocess.Popen(cmdandparms, stderr=subprocess.PIPE, bufsize=0, encoding="utf-8")
            self.gateprocess = subproc
            while True:
                line = subproc.stderr.readline()
                if line == "":
                    break
                line = line.rstrip("\n\r")
                if line == "PythonSlaveRunner.java: server start OK":
                    break
                if line == "PythonSlaveRunner.java: server start NOT OK":
                    raise Exception("Could not start server, giving up")
                print(line, file=sys.stderr)
            atexit.register(self.close)
        self.gateway = JavaGateway(
            gateway_parameters=GatewayParameters(port=port, auth_token=self.auth_token))
        self.jvm = self.gateway.jvm
        self.slave = self.gateway.entry_point
        self.gate_version = self.jvm.gate.Main.version
        self.gate_build = self.jvm.gate.Main.build
        self.slave_version = self.slave.plugin_version()
        self.slave_build = self.slave.plugin_build()

    @staticmethod
    def download():
        """
        Download GATE libraries into a standard location so we can run the GATE slave even if GATE_HOME
        is not set.
        """
        # TODO: this should use the command and bootstrapping jar in gate-downloader:
        # copy the whole directory into the standard per-user config directory for the system
        # run the command
        # use the generated gate.classpath as for a compiled local git repo
        # NOTE: should change error message if GATE_HOME is not set to hint at this! (option --downlaod for the script)
        # NOTE: add to documentation
        raise Exception("Not yet implemented")

    def close(self):
        """
        Clean up: if the gate slave process was started by us, we will shut it down.
        Otherwise we can still close it if it was started by the slaverunner, not the Lr
        Note: if it was started by us, it was started via the slaverunner.
        
        :return:

        Args:

        Returns:

        """
        if not self.closed and self.slave.isClosable():
            self.closed = True
            self.gateway.shutdown()
            if self.gateprocess is not None:
                for line in self.gateprocess.stderr:
                    print(line, file=sys.stderr, end="")
                self.gateprocess.wait()

    def log_actions(self, onoff):
        """Swith logging actions at the slave on or off.

        Args:
          onoff: True to log actions, False to not log them

        Returns:

        """
        self.slave.logActions(onoff)

    def load_gdoc(self, path, mimetype=None):
        """Let GATE load a document from the given path and return a handle to it.

        Args:
          path: path to the gate document to load.
          mimetype: a mimetype to use when loading. (Default value = None)

        Returns:
          a handle to the GATE document

        """
        if mimetype is None:
            mimetype = ""
        return self.slave.loadDocumentFromFile(path, mimetype)

    def save_gdoc(self, gdoc, path, mimetype=None):
        """Save GATE document to the given path.

        Args:
          gdoc: GATE document handle
          path: destination path
          mimetype: mimtetype, only the following types are allowed: ""/None: GATE XML,
        application/fastinfoset, and all mimetypes supported by the Format_Bdoc plugin. (Default value = None)

        Returns:

        """
        if mimetype is None:
            mimetype = ""
        self.slave.saveDocumentToFile(path, mimetype)

    def gdoc2pdoc(self, gdoc):
        """Convert the GATE document to a python document and return it.

        Args:
          gdoc: the handle to a GATE document

        Returns:
          a gatenlp Document instance

        """
        bjs = self.slave.getBdocJson(gdoc)
        return Document.load_mem(bjs, fmt="bdocjs")

    def pdoc2gdoc(self, pdoc):
        """Convert the Python gatenlp document to a GATE document and return a handle to it.

        Args:
          pdoc: python gatenlp Document

        Returns:
          handle to GATE document

        """
        json = pdoc.save_mem(fmt="bdocjs")
        return self.slave.getDocument4BdocJson(json)

    def load_pdoc(self, path, mimetype=None):
        """Load a document from the given path, using GATE and convert and return as gatenlp Python document.

        Args:
          path: path to load document from
          mimetype: mime type to use (Default value = None)

        Returns:
          gatenlp document

        """
        gdoc = self.load_gdoc(path, mimetype)
        return self.gdoc2pdoc(gdoc)

    def del_gdoc(self, gdoc):
        """Delete/unload the GATE document from GATE.
        This is necessary to do for each GATE document that is not used anymore, otherwise the documents
        will accumulate in the Java process and eat up all memory. NOTE: just removing all references to the
        GATE document does not delete/unload the document!

        Args:
          gdoc: the document to remove

        Returns:

        """
        self.jvm.gate.Factory.deleteResource(gdoc)

    def show_gui(self):
        """Show the GUI for the started GATE process. NOTE: this is more of a hack and may cause sync problems
        when closing down the GATE slave.
        
        :return:

        Args:

        Returns:

        """
        self.slave.showGui()


def main():
    """ """
    ap = argparse.ArgumentParser(description="Start Java GATE Slave")
    ap.add_argument("--download", action="store_true", help="Download GATE libraries to run GATE slave")
    ap.add_argument("--port", default=25333, type=int, help="Port (25333)")
    ap.add_argument("--host", default="127.0.0.1", type=str, help="Host to bind to (127.0.0.1)")
    ap.add_argument("--auth", default=None, type=str, help="Auth token to use (generate random)")
    ap.add_argument("--noauth", action="store_true", help="Do not use auth token")
    ap.add_argument("--gatehome", default=None, type=str, help="Location of GATE (environment variable GATE_HOME)")
    ap.add_argument("--platform", default=None, type=str, help="OS/Platform: windows or linux (autodetect)")
    ap.add_argument("--log_actions", action="store_true", help="If slave actions should be logged")
    ap.add_argument("--keep", action="store_true", help="Prevent shutting down the slave")
    ap.add_argument("--debug", action="store_true", help="Show debug messages")
    args = ap.parse_args()
    if args.download:
        GateSlave.download()
    else:
        start_gate_slave(
            port=args.port,
            host=args.host,
            auth_token=args.auth,
            use_auth_token=not args.noauth,
            gatehome=args.gatehome,
            platform=args.platform,
            log_actions=args.log_actions,
            keep=args.keep,
            debug=args.debug,
        )


if __name__ == "__main__":
    main()

Functions

def classpath_sep(platform=None)

Args

platform
(Default value = None)

Returns

:return: classpath separator character

Expand source code
def classpath_sep(platform=None):
    """

    Args:
      platform:  (Default value = None)

    Returns:
      :return: classpath separator character

    """
    if not platform:
        myplatform = sysplatform.system()
        if not myplatform:
            raise Exception("Could not determine operating system, please use platform parameter")
        platform = myplatform
    if platform.lower() == "windows" or platform.lower() == "win":
        return ";"
    else:
        return ":"
def gate_classpath(gatehome, platform=None)

Return the GATE classpath components as a string, with the element seperator characters appropriate for the operating system.

Args

gatehome
where GATE is installed, either as a cloned git repo or a downloaded installation dir.
platform
(Default value = None)

Returns

GATE classpath

Expand source code
def gate_classpath(gatehome, platform=None):
    """Return the GATE classpath components as a string, with the element seperator characters appropriate
    for the operating system.

    Args:
      gatehome: where GATE is installed, either as a cloned git repo or a downloaded installation dir.
      platform:  (Default value = None)

    Returns:
      GATE classpath

    """
    # check which kind of GATE home we have: if there is a distro subdirectory, assume cloned git repo
    if not os.path.exists(gatehome):
        raise Exception("GATE home directory does not exist: {}".format(gatehome))
    if not os.path.isdir(gatehome):
        raise Exception("GATE home directory does not a directory: {}".format(gatehome))
    cpsep = classpath_sep(platform)
    cpfile = os.path.join(gatehome, "gate.classpath")
    bindir = os.path.join(gatehome, "bin")
    # logger.info("DEBUG checking for {}".format(cpfile))
    if os.path.exists(cpfile):
        if not os.path.exists(cpfile):
            raise Exception("File not found {}, distribution may need compiling".format(cpfile))
        with open(cpfile, "rt", encoding="utf-8") as fp:
            cp = fp.read()
            return cp + cpsep + bindir
    else:
        # logger.info("DEBUG {} does not exist".format(cpfile))
        libdir = os.path.join(gatehome, "lib")
        bindir = os.path.join(gatehome, "bin")
        if not os.path.isdir(libdir):
            raise Exception("Could not determine class path from {}, no lib directory".format(gatehome))
        jars = glob.glob(os.path.join(libdir, "*.jar"))
        libcp = cpsep.join(jars)

        return libcp + cpsep + bindir
def main()
Expand source code
def main():
    """ """
    ap = argparse.ArgumentParser(description="Start Java GATE Slave")
    ap.add_argument("--download", action="store_true", help="Download GATE libraries to run GATE slave")
    ap.add_argument("--port", default=25333, type=int, help="Port (25333)")
    ap.add_argument("--host", default="127.0.0.1", type=str, help="Host to bind to (127.0.0.1)")
    ap.add_argument("--auth", default=None, type=str, help="Auth token to use (generate random)")
    ap.add_argument("--noauth", action="store_true", help="Do not use auth token")
    ap.add_argument("--gatehome", default=None, type=str, help="Location of GATE (environment variable GATE_HOME)")
    ap.add_argument("--platform", default=None, type=str, help="OS/Platform: windows or linux (autodetect)")
    ap.add_argument("--log_actions", action="store_true", help="If slave actions should be logged")
    ap.add_argument("--keep", action="store_true", help="Prevent shutting down the slave")
    ap.add_argument("--debug", action="store_true", help="Show debug messages")
    args = ap.parse_args()
    if args.download:
        GateSlave.download()
    else:
        start_gate_slave(
            port=args.port,
            host=args.host,
            auth_token=args.auth,
            use_auth_token=not args.noauth,
            gatehome=args.gatehome,
            platform=args.platform,
            log_actions=args.log_actions,
            keep=args.keep,
            debug=args.debug,
        )
def start_gate_slave(port=25333, host='127.0.0.1', auth_token=None, use_auth_token=True, java='java', platform=None, gatehome=None, log_actions=False, keep=False, debug=False)

Args

port
(Default value = 25333)
host
(Default value = "127.0.0.1")
auth_token
(Default value = None)
use_auth_token
(Default value = True)
java
(Default value = "java")
platform
(Default value = None)
gatehome
(Default value = None)
log_actions
(Default value = False)
keep
(Default value = False)
debug
(Default valuye = False) Show debug messages.

Returns:

Expand source code
def start_gate_slave(
        port=25333,
        host="127.0.0.1",
        auth_token=None,
        use_auth_token=True,
        java="java",
        platform=None,
        gatehome=None,
        log_actions=False,
        keep=False,
        debug=False,
):
    """

    Args:
      port:  (Default value = 25333)
      host:  (Default value = "127.0.0.1")
      auth_token:  (Default value = None)
      use_auth_token:  (Default value = True)
      java:  (Default value = "java")
      platform:  (Default value = None)
      gatehome:  (Default value = None)
      log_actions:  (Default value = False)
      keep:  (Default value = False)
      debug: (Default valuye = False) Show debug messages.

    Returns:

    """
    logger = init_logger(__name__)
    if debug:
        logger.setLevel(logging.DEBUG)

    if gatehome is None:
        gatehome = os.environ.get("GATE_HOME")
        if gatehome is None:
            raise Exception("Parameter gatehome is None and environment var GATE_HOME not set")
    if use_auth_token:
        if not auth_token:
            auth_token = secrets.token_urlsafe(20)
        else:
            auth_token = auth_token
    else:
        auth_token = ""
    if log_actions:
        log_actions = "1"
    else:
        log_actions = "0"
    if keep:
        keep = "1"
    else:
        keep = "0"
    logger.debug(f"Starting gate slave, gatehome={gatehome}, auth_token={auth_token}, log_actions={log_actions}, keep={keep}")
    jarloc = os.path.join(os.path.dirname(__file__), "_jars", f"gatetools-gatenlpslave-{JARVERSION}.jar")
    if not os.path.exists(jarloc):
        raise Exception("Could not find jar, {} does not exist".format(jarloc))
    logger.debug(f"Using JAR: {jarloc}")
    cmdandparms = [java, "-cp"]
    cpsep = classpath_sep(platform=platform)
    cmdandparms.append(jarloc + cpsep + gate_classpath(gatehome, platform=platform))
    cmdandparms.append("gate.tools.gatenlpslave.GatenlpSlave")
    cmdandparms.append(str(port))
    cmdandparms.append(host)
    cmdandparms.append(log_actions)
    cmdandparms.append(keep)
    os.environ["GATENLP_SLAVE_TOKEN_" + str(port)] = auth_token
    cmd = " ".join(cmdandparms)
    logger.debug(f"Running command: {cmd}")
    subproc = subprocess.Popen(cmdandparms, stderr=subprocess.PIPE, bufsize=0, encoding="utf-8")

    def shutdown():
        """ """
        subproc.send_signal(signal.SIGINT)
        for line in subproc.stderr:
            print(line, file=sys.stderr, end="")

    atexit.register(shutdown)
    while True:
        line = subproc.stderr.readline()
        if line == "":
            break
        line = line.rstrip("\n\r")
        if line == "PythonSlaveRunner.java: server start OK":
            break
        if line == "PythonSlaveRunner.java: server start NOT OK":
            raise Exception("Could not start server, giving up")
        print(line, file=sys.stderr)
    try:
        subproc.wait()
    except KeyboardInterrupt:
        print("Received keyboard interrupt, shutting down server...")
        shutdown()

Classes

class GateSlave (port=25333, start=True, java='java', host='127.0.0.1', gatehome=None, platform=None, auth_token=None, use_auth_token=True, log_actions=False, keep=False, debug=False)

Create an instance of the GateSlave and either start our own Java GATE process for it to use (start=True) or connect to an existing one (start=False).

After the GateSlave instance has been create successfully, it is possible to:

  • Use one of the methods of the instance to perform operations on the Java side or exchange data

  • use GateSlave.slave to invoke methods from the PythonSlave class on the Java side

  • use GateSlave.jvm to directly construct objects or call instance or static methods

NOTE: the GATE process must not output anything important/big to stderr because everything from stderr gets captured and used for communication between the Java and Python processes. At least part of the output to stderr may only be passed on after the GATE process has ended.

Example: ::

gs = GateSlave()
pipeline = gs.slave.loadPipelineFromFile("thePipeline.xgapp")
doc = gs.slave.createDocument("Some document text")
gs.slave.run4doc(pipeline,doc)
pdoc = gs.gdoc2pdoc(doc)
gs.slave.deleteResource(doc)
# process the gatenlp Document pdoc ...

:param port: port to use :param start: if True, try to start our own GATE process, otherwise expect an already started process at the host/port address :param java: path to the java binary to run or the java command to use from the PATH (for start=True) :param host: host an existing Java GATE process is running on (only relevant for start=False) :param gatehome: where GATE is installed (only relevant if start=True). If None, expects environment variable GATE_HOME to be set. :param platform: system platform we run on, one of Windows, Linux (also for MacOs) or Java :param auth_token: if None or "" and use_auth_token is True, generate a random token which is then accessible via the auth_token attribute, otherwise use the given auth token. :param use_auth_token: if False, do not use an auth token, otherwise either use the one specified via auth_token or generate a random one. :param log_actions: if the gate slave should log the actions it is doing :param keep: normally if gs.close() is called and we are not connected to the PythonSlaveLr, the slave will be shut down. If this is True, the gs.close() method does not shut down the slave. :param debug: show debug messages (default: False)

Expand source code
class GateSlave:
    """ """
    def __init__(self, port=25333,
                 start=True,
                 java="java",
                 host="127.0.0.1",
                 gatehome=None,
                 platform=None,
                 auth_token=None,
                 use_auth_token=True,
                 log_actions=False,
                 keep=False,
                 debug=False,
                 ):
        """
        Create an instance of the GateSlave and either start our own Java GATE process for it to use
        (start=True) or connect to an existing one (start=False).

        After the GateSlave instance has been create successfully, it is possible to:

        * Use one of the methods of the instance to perform operations on the Java side or exchange data

        * use GateSlave.slave to invoke methods from the PythonSlave class on the Java side

        * use GateSlave.jvm to directly construct objects or call instance or static methods

        NOTE: the GATE process must not output anything important/big to stderr because everything from
        stderr gets captured and used for communication between the Java and Python processes. At least
        part of the output to stderr may only be passed on after the GATE process has ended.

        Example: ::

            gs = GateSlave()
            pipeline = gs.slave.loadPipelineFromFile("thePipeline.xgapp")
            doc = gs.slave.createDocument("Some document text")
            gs.slave.run4doc(pipeline,doc)
            pdoc = gs.gdoc2pdoc(doc)
            gs.slave.deleteResource(doc)
            # process the gatenlp Document pdoc ...

        :param port: port to use
        :param start: if True, try to start our own GATE process, otherwise expect an already started
           process at the host/port address
        :param java: path to the java binary to run or the java command to use from the PATH (for start=True)
        :param host: host an existing Java GATE process is running on (only relevant for start=False)
        :param gatehome: where GATE is installed (only relevant if start=True). If None, expects
               environment variable GATE_HOME to be set.
        :param platform: system platform we run on, one of Windows, Linux (also for MacOs) or Java
        :param auth_token: if None or "" and use_auth_token is True, generate a random token which
               is then accessible via the auth_token attribute, otherwise use the given auth token.
        :param use_auth_token: if False, do not use an auth token, otherwise either use the one specified
               via auth_token or generate a random one.
        :param log_actions: if the gate slave should log the actions it is doing
        :param keep: normally if gs.close() is called and we are not connected to the PythonSlaveLr,
               the slave will be shut down. If this is True, the gs.close() method does not shut down
               the slave.
        :param debug: show debug messages (default: False)
        """
        self.logger = init_logger(__name__)

        from py4j.java_gateway import JavaGateway, GatewayParameters

        self.gatehome = gatehome
        self.port = port
        self.host = host
        self.start = start
        self.gatehome = gatehome
        self.platform = platform
        self.gateprocess = None
        self.gateway = None
        self.slave = None
        self.closed = False
        self.keep = keep
        self.debug = debug
        self.log_actions = log_actions
        if use_auth_token:
            if not auth_token:
                self.auth_token = secrets.token_urlsafe(20)
            else:
                self.auth_token = auth_token
        else:
            self.auth_token = ""
        if gatehome is None and start:
            gatehome = os.environ.get("GATE_HOME")
            if gatehome is None:
                raise Exception("Parameter gatehome is None and environment var GATE_HOME not set")
            self.gatehome = gatehome
        if start:
            # make sure we find the jar we need
            # logger.info("DEBUG: file location: {}".format(__file__))
            jarloc = os.path.join(os.path.dirname(__file__), "_jars", f"gatetools-gatenlpslave-{JARVERSION}.jar")
            if not os.path.exists(jarloc):
                raise Exception("Could not find jar, {} does not exist".format(jarloc))
            cmdandparms = [java, "-cp"]
            cpsep = classpath_sep(platform=platform)
            cmdandparms.append(jarloc + cpsep + gate_classpath(self.gatehome, platform=platform))
            cmdandparms.append("gate.tools.gatenlpslave.GatenlpSlave")
            cmdandparms.append(str(port))
            cmdandparms.append(host)
            if log_actions:
                cmdandparms.append("1")
            else:
                cmdandparms.append("0")
            if keep:
                cmdandparms.append("1")
            else:
                cmdandparms.append("0")
            os.environ["GATENLP_SLAVE_TOKEN_"+str(self.port)] = self.auth_token
            cmd = " ".join(cmdandparms)
            self.logger.debug(f"Running command: {cmd}")
            subproc = subprocess.Popen(cmdandparms, stderr=subprocess.PIPE, bufsize=0, encoding="utf-8")
            self.gateprocess = subproc
            while True:
                line = subproc.stderr.readline()
                if line == "":
                    break
                line = line.rstrip("\n\r")
                if line == "PythonSlaveRunner.java: server start OK":
                    break
                if line == "PythonSlaveRunner.java: server start NOT OK":
                    raise Exception("Could not start server, giving up")
                print(line, file=sys.stderr)
            atexit.register(self.close)
        self.gateway = JavaGateway(
            gateway_parameters=GatewayParameters(port=port, auth_token=self.auth_token))
        self.jvm = self.gateway.jvm
        self.slave = self.gateway.entry_point
        self.gate_version = self.jvm.gate.Main.version
        self.gate_build = self.jvm.gate.Main.build
        self.slave_version = self.slave.plugin_version()
        self.slave_build = self.slave.plugin_build()

    @staticmethod
    def download():
        """
        Download GATE libraries into a standard location so we can run the GATE slave even if GATE_HOME
        is not set.
        """
        # TODO: this should use the command and bootstrapping jar in gate-downloader:
        # copy the whole directory into the standard per-user config directory for the system
        # run the command
        # use the generated gate.classpath as for a compiled local git repo
        # NOTE: should change error message if GATE_HOME is not set to hint at this! (option --downlaod for the script)
        # NOTE: add to documentation
        raise Exception("Not yet implemented")

    def close(self):
        """
        Clean up: if the gate slave process was started by us, we will shut it down.
        Otherwise we can still close it if it was started by the slaverunner, not the Lr
        Note: if it was started by us, it was started via the slaverunner.
        
        :return:

        Args:

        Returns:

        """
        if not self.closed and self.slave.isClosable():
            self.closed = True
            self.gateway.shutdown()
            if self.gateprocess is not None:
                for line in self.gateprocess.stderr:
                    print(line, file=sys.stderr, end="")
                self.gateprocess.wait()

    def log_actions(self, onoff):
        """Swith logging actions at the slave on or off.

        Args:
          onoff: True to log actions, False to not log them

        Returns:

        """
        self.slave.logActions(onoff)

    def load_gdoc(self, path, mimetype=None):
        """Let GATE load a document from the given path and return a handle to it.

        Args:
          path: path to the gate document to load.
          mimetype: a mimetype to use when loading. (Default value = None)

        Returns:
          a handle to the GATE document

        """
        if mimetype is None:
            mimetype = ""
        return self.slave.loadDocumentFromFile(path, mimetype)

    def save_gdoc(self, gdoc, path, mimetype=None):
        """Save GATE document to the given path.

        Args:
          gdoc: GATE document handle
          path: destination path
          mimetype: mimtetype, only the following types are allowed: ""/None: GATE XML,
        application/fastinfoset, and all mimetypes supported by the Format_Bdoc plugin. (Default value = None)

        Returns:

        """
        if mimetype is None:
            mimetype = ""
        self.slave.saveDocumentToFile(path, mimetype)

    def gdoc2pdoc(self, gdoc):
        """Convert the GATE document to a python document and return it.

        Args:
          gdoc: the handle to a GATE document

        Returns:
          a gatenlp Document instance

        """
        bjs = self.slave.getBdocJson(gdoc)
        return Document.load_mem(bjs, fmt="bdocjs")

    def pdoc2gdoc(self, pdoc):
        """Convert the Python gatenlp document to a GATE document and return a handle to it.

        Args:
          pdoc: python gatenlp Document

        Returns:
          handle to GATE document

        """
        json = pdoc.save_mem(fmt="bdocjs")
        return self.slave.getDocument4BdocJson(json)

    def load_pdoc(self, path, mimetype=None):
        """Load a document from the given path, using GATE and convert and return as gatenlp Python document.

        Args:
          path: path to load document from
          mimetype: mime type to use (Default value = None)

        Returns:
          gatenlp document

        """
        gdoc = self.load_gdoc(path, mimetype)
        return self.gdoc2pdoc(gdoc)

    def del_gdoc(self, gdoc):
        """Delete/unload the GATE document from GATE.
        This is necessary to do for each GATE document that is not used anymore, otherwise the documents
        will accumulate in the Java process and eat up all memory. NOTE: just removing all references to the
        GATE document does not delete/unload the document!

        Args:
          gdoc: the document to remove

        Returns:

        """
        self.jvm.gate.Factory.deleteResource(gdoc)

    def show_gui(self):
        """Show the GUI for the started GATE process. NOTE: this is more of a hack and may cause sync problems
        when closing down the GATE slave.
        
        :return:

        Args:

        Returns:

        """
        self.slave.showGui()

Static methods

def download()

Download GATE libraries into a standard location so we can run the GATE slave even if GATE_HOME is not set.

Expand source code
@staticmethod
def download():
    """
    Download GATE libraries into a standard location so we can run the GATE slave even if GATE_HOME
    is not set.
    """
    # TODO: this should use the command and bootstrapping jar in gate-downloader:
    # copy the whole directory into the standard per-user config directory for the system
    # run the command
    # use the generated gate.classpath as for a compiled local git repo
    # NOTE: should change error message if GATE_HOME is not set to hint at this! (option --downlaod for the script)
    # NOTE: add to documentation
    raise Exception("Not yet implemented")

Methods

def close(self)

Clean up: if the gate slave process was started by us, we will shut it down. Otherwise we can still close it if it was started by the slaverunner, not the Lr Note: if it was started by us, it was started via the slaverunner.

:return:

Args:

Returns:

Expand source code
def close(self):
    """
    Clean up: if the gate slave process was started by us, we will shut it down.
    Otherwise we can still close it if it was started by the slaverunner, not the Lr
    Note: if it was started by us, it was started via the slaverunner.
    
    :return:

    Args:

    Returns:

    """
    if not self.closed and self.slave.isClosable():
        self.closed = True
        self.gateway.shutdown()
        if self.gateprocess is not None:
            for line in self.gateprocess.stderr:
                print(line, file=sys.stderr, end="")
            self.gateprocess.wait()
def del_gdoc(self, gdoc)

Delete/unload the GATE document from GATE. This is necessary to do for each GATE document that is not used anymore, otherwise the documents will accumulate in the Java process and eat up all memory. NOTE: just removing all references to the GATE document does not delete/unload the document!

Args

gdoc
the document to remove

Returns:

Expand source code
def del_gdoc(self, gdoc):
    """Delete/unload the GATE document from GATE.
    This is necessary to do for each GATE document that is not used anymore, otherwise the documents
    will accumulate in the Java process and eat up all memory. NOTE: just removing all references to the
    GATE document does not delete/unload the document!

    Args:
      gdoc: the document to remove

    Returns:

    """
    self.jvm.gate.Factory.deleteResource(gdoc)
def gdoc2pdoc(self, gdoc)

Convert the GATE document to a python document and return it.

Args

gdoc
the handle to a GATE document

Returns

a gatenlp Document instance

Expand source code
def gdoc2pdoc(self, gdoc):
    """Convert the GATE document to a python document and return it.

    Args:
      gdoc: the handle to a GATE document

    Returns:
      a gatenlp Document instance

    """
    bjs = self.slave.getBdocJson(gdoc)
    return Document.load_mem(bjs, fmt="bdocjs")
def load_gdoc(self, path, mimetype=None)

Let GATE load a document from the given path and return a handle to it.

Args

path
path to the gate document to load.
mimetype
a mimetype to use when loading. (Default value = None)

Returns

a handle to the GATE document

Expand source code
def load_gdoc(self, path, mimetype=None):
    """Let GATE load a document from the given path and return a handle to it.

    Args:
      path: path to the gate document to load.
      mimetype: a mimetype to use when loading. (Default value = None)

    Returns:
      a handle to the GATE document

    """
    if mimetype is None:
        mimetype = ""
    return self.slave.loadDocumentFromFile(path, mimetype)
def load_pdoc(self, path, mimetype=None)

Load a document from the given path, using GATE and convert and return as gatenlp Python document.

Args

path
path to load document from
mimetype
mime type to use (Default value = None)

Returns

gatenlp document

Expand source code
def load_pdoc(self, path, mimetype=None):
    """Load a document from the given path, using GATE and convert and return as gatenlp Python document.

    Args:
      path: path to load document from
      mimetype: mime type to use (Default value = None)

    Returns:
      gatenlp document

    """
    gdoc = self.load_gdoc(path, mimetype)
    return self.gdoc2pdoc(gdoc)
def log_actions(self, onoff)

Swith logging actions at the slave on or off.

Args

onoff
True to log actions, False to not log them

Returns:

Expand source code
def log_actions(self, onoff):
    """Swith logging actions at the slave on or off.

    Args:
      onoff: True to log actions, False to not log them

    Returns:

    """
    self.slave.logActions(onoff)
def pdoc2gdoc(self, pdoc)

Convert the Python gatenlp document to a GATE document and return a handle to it.

Args

pdoc
python gatenlp Document

Returns

handle to GATE document

Expand source code
def pdoc2gdoc(self, pdoc):
    """Convert the Python gatenlp document to a GATE document and return a handle to it.

    Args:
      pdoc: python gatenlp Document

    Returns:
      handle to GATE document

    """
    json = pdoc.save_mem(fmt="bdocjs")
    return self.slave.getDocument4BdocJson(json)
def save_gdoc(self, gdoc, path, mimetype=None)

Save GATE document to the given path.

Args

gdoc
GATE document handle
path
destination path
mimetype
mimtetype, only the following types are allowed: ""/None: GATE XML,

application/fastinfoset, and all mimetypes supported by the Format_Bdoc plugin. (Default value = None)

Returns:

Expand source code
def save_gdoc(self, gdoc, path, mimetype=None):
    """Save GATE document to the given path.

    Args:
      gdoc: GATE document handle
      path: destination path
      mimetype: mimtetype, only the following types are allowed: ""/None: GATE XML,
    application/fastinfoset, and all mimetypes supported by the Format_Bdoc plugin. (Default value = None)

    Returns:

    """
    if mimetype is None:
        mimetype = ""
    self.slave.saveDocumentToFile(path, mimetype)
def show_gui(self)

Show the GUI for the started GATE process. NOTE: this is more of a hack and may cause sync problems when closing down the GATE slave.

:return:

Args:

Returns:

Expand source code
def show_gui(self):
    """Show the GUI for the started GATE process. NOTE: this is more of a hack and may cause sync problems
    when closing down the GATE slave.
    
    :return:

    Args:

    Returns:

    """
    self.slave.showGui()