ZapZop text compression algorithm using a state machine

COMPRESS:

import json

import re

class TextEntity:

def __init__(self, text):

self.original = text

self.wave_map = {}

def dive_and_compress(self):

"""

Simplifies text by removing case sensitivity and specific punctuation,

storing the 'Wave' (differences) in a separate dictionary.

"""

words = self.original.split()

compressed_words = []

for i, word in enumerate(words):

# The 'Low-Res' version: lowercase and alphanumeric only

low_res_word = re.sub(r'\W+', '', word).lower()

compressed_words.append(low_res_word)

# The 'Wave Map': Store the original if it differs from low-res

if word != low_res_word:

self.wave_map[i] = word

# Join the low-res version

zap_content = " ".join(compressed_words)

return zap_content, self.wave_map

def surface_and_restore(self, zap_content, wave_map):

"""Reconstructs the original text using the zap file and the zop map."""

low_res_words = zap_content.split()

restored_words = []

# Convert keys back to integers if they were saved as JSON strings

wave_map = {int(k): v for k, v in wave_map.items()}

for i, word in enumerate(low_res_words):

# If the index exists in our wave map, use the original detail

if i in wave_map:

restored_words.append(wave_map[i])

else:

restored_words.append(word)

return " ".join(restored_words)

# --- Execution ---

input_text = "Hello World! This is a test of the ZAP/ZOP compression algorithm."

entity = TextEntity(input_text)

# 1. Perform the 'Dive'

zap_data, zop_data = entity.dive_and_compress()

# 2. Save the files

with open('output.zap', 'w') as f:

f.write(zap_data)

with open('output.zop', 'w') as f:

json.dump(zop_data, f)

print("Compression complete: 'output.zap' and 'output.zop' created.")

# 3. Perform the 'Surface' (Restoration)

restored_text = entity.surface_and_restore(zap_data, zop_data)

print(f"\nRestored Result:\n{restored_text}")

DECOMPRESS:

import json

def decompress_zap_zop(zap_path, zop_path):

"""

Restores the original text by merging the low-res .zap file

with the 'Wave Map' optimization info in the .zop file.

"""

try:

# 1. Load the low-res text

with open(zap_path, 'r') as f:

zap_content = f.read()

# 2. Load the logic key (Wave Map)

with open(zop_path, 'r') as f:

zop_data = json.load(f)

# 3. Process the restoration

low_res_words = zap_content.split()

restored_words = []

# JSON keys are always strings, so we convert them back to integers

wave_map = {int(k): v for k, v in zop_data.items()}

for i, word in enumerate(low_res_words):

# If the index exists in our 'Wave Map', use the high-res detail

if i in wave_map:

restored_words.append(wave_map[i])

else:

# Otherwise, the 'low-res' version was already correct

restored_words.append(word)

return " ".join(restored_words)

except FileNotFoundError:

return "Error: Ensure both .zap and .zop files are in the directory."

except Exception as e:

return f"An error occurred: {e}"

# --- Execution ---

# This assumes 'output.zap' and 'output.zop' exist from the previous step

final_text = decompress_zap_zop('output.zap', 'output.zop')

print("--- Decompressed Text ---")

print(final_text)

WHAT THIS DOES:

It initiates a state machine search into the text, searching to compress the text; like up- and downscaling for images, but for text - so it creates two files, a .zap and a .zop file; the .zap file is the text minus its extra info, while with the .zop file it can recreate the original text.

Search This Blog

Hart Ziet Kleur

ZapZop text compression algorithm using a state machine

Comments

Post a Comment