ZapZop text compression algorithm using a state machine
COMPRESS:
import json
import re
class TextEntity:
def __init__(self, text):
self.original = text
self.wave_map = {}
def dive_and_compress(self):
"""
Simplifies text by removing case sensitivity and specific punctuation,
storing the 'Wave' (differences) in a separate dictionary.
"""
words = self.original.split()
compressed_words = []
for i, word in enumerate(words):
# The 'Low-Res' version: lowercase and alphanumeric only
low_res_word = re.sub(r'\W+', '', word).lower()
compressed_words.append(low_res_word)
# The 'Wave Map': Store the original if it differs from low-res
if word != low_res_word:
self.wave_map[i] = word
# Join the low-res version
zap_content = " ".join(compressed_words)
return zap_content, self.wave_map
def surface_and_restore(self, zap_content, wave_map):
"""Reconstructs the original text using the zap file and the zop map."""
low_res_words = zap_content.split()
restored_words = []
# Convert keys back to integers if they were saved as JSON strings
wave_map = {int(k): v for k, v in wave_map.items()}
for i, word in enumerate(low_res_words):
# If the index exists in our wave map, use the original detail
if i in wave_map:
restored_words.append(wave_map[i])
else:
restored_words.append(word)
return " ".join(restored_words)
# --- Execution ---
input_text = "Hello World! This is a test of the ZAP/ZOP compression algorithm."
entity = TextEntity(input_text)
# 1. Perform the 'Dive'
zap_data, zop_data = entity.dive_and_compress()
# 2. Save the files
with open('output.zap', 'w') as f:
f.write(zap_data)
with open('output.zop', 'w') as f:
json.dump(zop_data, f)
print("Compression complete: 'output.zap' and 'output.zop' created.")
# 3. Perform the 'Surface' (Restoration)
restored_text = entity.surface_and_restore(zap_data, zop_data)
print(f"\nRestored Result:\n{restored_text}")
DECOMPRESS:
import json
def decompress_zap_zop(zap_path, zop_path):
"""
Restores the original text by merging the low-res .zap file
with the 'Wave Map' optimization info in the .zop file.
"""
try:
# 1. Load the low-res text
with open(zap_path, 'r') as f:
zap_content = f.read()
# 2. Load the logic key (Wave Map)
with open(zop_path, 'r') as f:
zop_data = json.load(f)
# 3. Process the restoration
low_res_words = zap_content.split()
restored_words = []
# JSON keys are always strings, so we convert them back to integers
wave_map = {int(k): v for k, v in zop_data.items()}
for i, word in enumerate(low_res_words):
# If the index exists in our 'Wave Map', use the high-res detail
if i in wave_map:
restored_words.append(wave_map[i])
else:
# Otherwise, the 'low-res' version was already correct
restored_words.append(word)
return " ".join(restored_words)
except FileNotFoundError:
return "Error: Ensure both .zap and .zop files are in the directory."
except Exception as e:
return f"An error occurred: {e}"
# --- Execution ---
# This assumes 'output.zap' and 'output.zop' exist from the previous step
final_text = decompress_zap_zop('output.zap', 'output.zop')
print("--- Decompressed Text ---")
print(final_text)
WHAT THIS DOES:
It initiates a state machine search into the text, searching to compress the text; like up- and downscaling for images, but for text - so it creates two files, a .zap and a .zop file; the .zap file is the text minus its extra info, while with the .zop file it can recreate the original text.
Comments
Post a Comment