Source code for story_graph.markdown_parser

"""
Markdown parser
===============

A :class:`~story_graph.models.ScriptCell` can hold markdown content in our own markdown
dialect to control breaks, change of speakers and emphasis but also allows us to access
variables which are defined within a :class:`stream.models.Stream`.
In the end this will be transformed into `SSML <https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language>`_
which will be used for :class:`stream.models.TextToSpeech`

Choosing markdown as a scripting language has been made because it still can be written easily
by humans and treats written text as first class citizen.

The dialect is described in :class:`~GencasterRenderer`.

Use :func:`~md_to_ssml` to convert markdown text within a Python context.
"""

import logging
import re
from datetime import datetime
from typing import Callable, Dict, Optional

from mistletoe import Document, block_token, span_token
from mistletoe.base_renderer import BaseRenderer
from mistletoe.span_token import SpanToken

from stream.models import TextToSpeech

log = logging.getLogger(__name__)


[docs]def md_to_ssml(text: str, stream_variables: Optional[Dict[str, str]] = None) -> str: """Converts a md text into `SSML <https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language>`_. :param text: Markdown text """ with GencasterRenderer(stream_variables) as render: document = Document(text) ssml_text = render.render(document) return ssml_text # type: ignore
[docs]class GencasterToken(SpanToken): pattern = re.compile(r"{(?P<type>\w*)}`(?P<value>[^`]*)`") parse_inner = False
[docs] def __init__(self, match_obj: re.Match): self.target = match_obj.group(1) self.content = match_obj.group(2)
[docs]class GencasterRenderer(BaseRenderer): """ Acts as a python parser for the Gencaster markdown dialect. """
[docs] def __init__(self, stream_variables: Optional[Dict[str, str]] = None) -> None: super().__init__(GencasterToken) self.d = ( datetime.now() ) # unused but needed so we can access it in a script cell self.stream_variables: Dict[str, str] = ( stream_variables if stream_variables else {} ) self.gencaster_token_resolver: Dict[str, Callable[[str], str]] = { "python": self.eval_python, "python_exec": self.exec_python, "chars": self.chars, "break": self.add_break, "moderate": self.moderate, "male": self.male, "female": self.female, "var": self.var, "raw_ssml": self.raw_ssml, }
[docs] def validate_gencaster_tokens(self, text: str) -> bool: """Validates if the used tags are known to Gencaster .. todo:: this is not implemented yet and will raise an exception """ raise NotImplementedError()
[docs] def chars(self, text: str) -> str: """ Speaks surrounded words as characters, so "can" becomes "C A N", see `say as <https://cloud.google.com/text-to-speech/docs/ssml#say%E2%80%91as>`_ in GC docs. .. code-block:: markdown how {chars}`can` you talk """ return f'<say-as interpret-as="characters">{text}</say-as>'
[docs] def moderate(self, text: str) -> str: """ Speaks surrounded words in a moderate manner, see `emphasis <https://cloud.google.com/text-to-speech/docs/ssml#emphasis>`_ in GC docs. .. code-block:: markdown speak {moderate}`something` to me """ return f'<emphasis level="moderate">{text}</emphasis>'
[docs] def female(self, text: str) -> str: """ Speaks as ``DE_STANDARD_A__FEMALE`` from :class:`stream.models.TextToSpeech.VoiceNameChoices`. .. code-block:: markdown hello {female}`world` """ return f'<voice name="{TextToSpeech.VoiceNameChoices.DE_NEURAL2_C__FEMALE}">{text}</voice>'
[docs] def male(self, text: str) -> str: """ Speaks as ``DE_STANDARD_B__MALE`` from :class:`stream.models.TextToSpeech.VoiceNameChoices`. .. code-block:: markdown hello {male}`world` """ return f'<voice name="{TextToSpeech.VoiceNameChoices.DE_NEURAL2_B__MALE}">{text}</voice>'
# break is native word in python
[docs] def add_break(self, text: str) -> str: """ Adds a break between words, see `break <https://cloud.google.com/text-to-speech/docs/ssml#break>`_ in GC docs. Example: Add a break of 300ms between hello and world. .. code-block:: markdown hello {break}`300ms` world """ return f'<break time="{text}"/>'
[docs] def eval_python(self, text: str) -> str: """ Execute a python inline script via eval, e.g. .. code-block:: markdown two plus two is {eval_python}`2+2` will result in `two plus two is 4`. Eval does not allow for variable assignment but we obtain a return value. .. todo:: Store variables in :class:`story_graph.models.GraphSession` context. """ try: r = eval(text) return str(r) if r is not None else "" except SyntaxError as e: log.error(f"Could not evaluate python code: {e}") return ""
[docs] def exec_python(self, text: str) -> str: """ Executes a Python statement which allows to assign variables. .. code-block:: markdown {exec_python}`a=2` A is now {eval_python}`a`. becomes `A is now 2`. .. seealso:: Use :func:`~GencasterRenderer.var` to access stream variables. """ try: exec(text) except Exception as e: log.error(f"Could not execute python code: {e}") return ""
[docs] def var(self, text: str) -> str: """ Refers to the value of a :class:`~stream.models.StreamVariable`. Example: Assuming we have set a streaming variable ``{"foo": "world"}`` .. code-block:: markdown Hello {var}`foo` becomes `Hello World`. If the streaming variable does not exist it will be replaced with an empty string `""`, but we can provide a fallback value via ``|``. .. code-block:: markdown Hello {var}`something_unknown|foobar` becomes `Hello foobar` if the streaming variable ``something_unknown`` does not exist. """ fallback_value = text.split("|")[-1] if text.count("|") else "" return self.stream_variables.get(text.split("|")[0], fallback_value)
[docs] def raw_ssml(self, text: str) -> str: """ Allows to use raw ssml statements to extend functionality that may not be covered by this parser. Example: .. code-block:: markdown Hello {raw_ssml}`<emphasis level="moderate">world</emphasis>` """ return text
def render_gencaster_token(self, token: GencasterToken) -> str: try: return self.gencaster_token_resolver[token.target](token.content) except KeyError: log.error(f"Could not match token type {token.target}: {token.content}") return f"{token.content}" def render_heading(self, token: block_token.Heading) -> str: return f"{self.render_inner(token)}\n" def render_line_break(self, token: span_token.LineBreak) -> str: return "\n" def render_raw_text(self, token: span_token.RawText) -> str: """""" return token.content # type: ignore def render_document(self, token: block_token.Document) -> str: text = super().render_document(token) return f"<speak>{text}</speak>"