ZapZop text compression algorithm using a state machine

COMPRESS:


import json

import re


class TextEntity:

    def __init__(self, text):

        self.original = text

        self.wave_map = {}


    def dive_and_compress(self):

        """

        Simplifies text by removing case sensitivity and specific punctuation,

        storing the 'Wave' (differences) in a separate dictionary.

        """

        words = self.original.split()

        compressed_words = []

        

        for i, word in enumerate(words):

            # The 'Low-Res' version: lowercase and alphanumeric only

            low_res_word = re.sub(r'\W+', '', word).lower()

            compressed_words.append(low_res_word)

            

            # The 'Wave Map': Store the original if it differs from low-res

            if word != low_res_word:

                self.wave_map[i] = word

        

        # Join the low-res version

        zap_content = " ".join(compressed_words)

        return zap_content, self.wave_map


    def surface_and_restore(self, zap_content, wave_map):

        """Reconstructs the original text using the zap file and the zop map."""

        low_res_words = zap_content.split()

        restored_words = []

        

        # Convert keys back to integers if they were saved as JSON strings

        wave_map = {int(k): v for k, v in wave_map.items()}

        

        for i, word in enumerate(low_res_words):

            # If the index exists in our wave map, use the original detail

            if i in wave_map:

                restored_words.append(wave_map[i])

            else:

                restored_words.append(word)

                

        return " ".join(restored_words)


# --- Execution ---

input_text = "Hello World! This is a test of the ZAP/ZOP compression algorithm."

entity = TextEntity(input_text)


# 1. Perform the 'Dive'

zap_data, zop_data = entity.dive_and_compress()


# 2. Save the files

with open('output.zap', 'w') as f:

    f.write(zap_data)


with open('output.zop', 'w') as f:

    json.dump(zop_data, f)


print("Compression complete: 'output.zap' and 'output.zop' created.")


# 3. Perform the 'Surface' (Restoration)

restored_text = entity.surface_and_restore(zap_data, zop_data)

print(f"\nRestored Result:\n{restored_text}")






DECOMPRESS:

import json


def decompress_zap_zop(zap_path, zop_path):

    """

    Restores the original text by merging the low-res .zap file

    with the 'Wave Map' optimization info in the .zop file.

    """

    try:

        # 1. Load the low-res text

        with open(zap_path, 'r') as f:

            zap_content = f.read()

            

        # 2. Load the logic key (Wave Map)

        with open(zop_path, 'r') as f:

            zop_data = json.load(f)

            

        # 3. Process the restoration

        low_res_words = zap_content.split()

        restored_words = []

        

        # JSON keys are always strings, so we convert them back to integers

        wave_map = {int(k): v for k, v in zop_data.items()}

        

        for i, word in enumerate(low_res_words):

            # If the index exists in our 'Wave Map', use the high-res detail

            if i in wave_map:

                restored_words.append(wave_map[i])

            else:

                # Otherwise, the 'low-res' version was already correct

                restored_words.append(word)

                

        return " ".join(restored_words)


    except FileNotFoundError:

        return "Error: Ensure both .zap and .zop files are in the directory."

    except Exception as e:

        return f"An error occurred: {e}"


# --- Execution ---

# This assumes 'output.zap' and 'output.zop' exist from the previous step

final_text = decompress_zap_zop('output.zap', 'output.zop')


print("--- Decompressed Text ---")

print(final_text)



WHAT THIS DOES:


It initiates a state machine search into the text, searching to compress the text; like up- and downscaling for images, but for text - so it creates two files, a .zap and a .zop file; the .zap file is the text minus its extra info, while with the .zop file it can recreate the original text.



Comments