Gist Blog

My attempt to make a HebrewString object, a string representing hebrew text that you can slice and dice despite extra Unicode characters.

from rich import print

e = "וְהָאָ֗רֶץ הָיְתָ֥ה תֹ֙הוּ֙ וָבֹ֔הוּ וְחֹ֖שֶׁךְ עַל־פְּנֵ֣י תְה֑וֹם וְר֣וּחַ אֱלֹהִ֔ים מְרַחֶ֖פֶת עַל־פְּנֵ֥י הַמָּֽיִם׃"


class HebrewString(str):
    HEBREW_LETTERS = ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "כ", "ך", "ל", "מ", "ם", "נ", "ן", "ס", "ע",
                      "פ", "ף", "צ", "ץ", "ק", "ר", "ש", "ת"]

    def __init__(self, hebrew_string):
        self._raw_input = hebrew_string
        self.word_list = self._raw_input.split()

    @property
    def character_split(self):
        word_pieces = []

        # Build a list of strings where each string is a single hebrew letter and its accompanying chars
        for word in self.word_list:
            part = ""
            for unicode_char in word:
                if len(part) == 0:
                    # First letter of the word
                    part += unicode_char
                    continue

                if unicode_char not in self.HEBREW_LETTERS:
                    part += unicode_char
                else:
                    word_pieces.append(part)
                    part = unicode_char

            word_pieces.append(part)
            word_pieces.append(" ")

        return word_pieces

    def split(self, *args, **kwargs):
        return [HebrewString(x) for x in super().split(*args, **kwargs)]

    def __getitem__(self, items):
        return "".join(self.character_split[items])


if __name__ == '__main__':
    h = HebrewString(e)
    print(h)
    first_word = h.split()[0]
    shoresh = first_word[-4:]
    print(shoresh)

Comments

Author
Oct 3, 2022

Python Library

This object is now available in my hebrew Python library!

pip install hebrew

Check it out: https://hebrew.aviperl.me/

To make a comment, please visit this posts Gist.

Add your comment!