crossword-generator/generator.py

458 lines
18 KiB
Python
Executable File

#!/usr/bin/env python3
import random
from hunspell import HunSpell
from z3 import *
from crossword import Crossword, CrosswordClue
DEBUG=False
class Generator(object):
def __init__(self, words, width, height, required_words=None):
self.words = words
self.width = width
self.height = height
self.required_words = required_words
def generate(self, min_clues):
for size_contraction in reversed(range(min([self.width,
self.height])-1)):
for layout in self.generate_layouts(
self.width-size_contraction,
self.height-size_contraction):
crossword = self.generate_for_layout(layout)
if crossword and len(crossword.clues) >= min_clues:
return crossword
elif not crossword:
print('Unable to generate crossword for layout.')
else:
print('Generated crossword with only %d clues.'
% len(crossword.clues))
print('Unable to generate crossword for size.')
return None
def generate_sized(self, width, height):
print('Attempting to generate %dx%d crossword...' % (width, height))
size = max([width, height])
words = list(filter(lambda w: len(w) <= size, self.words))
for required_word in self.required_words:
if required_word not in words:
return None
letters = sorted(set([letter for word in words
for letter in word]))
(Letter, letterVals) = EnumSort('Letter', ['blank']+letters)
letterDict = dict(zip([None]+letters, letterVals))
blank = letterDict[None]
(Word, wordVals) = EnumSort('Word', ['None']+words)
wordDict = dict(zip([None]+words, wordVals))
noWord = wordDict[None]
grid = [[Const('grid_row%d_col%d' % (row, col), Letter)
for col in range(width)]
for row in range(height)]
grid_flat = [el for grid_row in grid for el in grid_row]
hClues = [[Const('clue_h_row%d_col%d' % (row, col), Word)
for col in range(width)]
for row in range(height)]
vClues = [[Const('clue_v_row%d_col%d' % (row, col), Word)
for col in range(width)]
for row in range(height)]
all_clues_flat = [el for clue_row in hClues+vClues for el in clue_row]
(Clue, clueVals) =\
EnumSort('Clue', ['None']+[str(clue)
for clue in all_clues_flat])
clueDict = dict(zip([None]+all_clues_flat, clueVals))
noClue = clueDict[None]
wordVars = [Const('clue_for_%s' % word, Clue) for word in words]
wordVarDict = {wordVar: str(wordVar)[len('clue_for_'):]
for wordVar in wordVars}
s = Optimize()
for wordVar in wordVars:
if self.required_words\
and wordVarDict[wordVar] in self.required_words:
s.add(wordVar != noClue)
else:
s.add_soft(wordVar != noClue)
for clueVar in all_clues_flat:
clueVal = clueDict[clueVar]
for wordVar in wordVars:
wordVal = wordDict[str(wordVar)[len('clue_for_'):]]
s.add((clueVar == wordVal) == (wordVar == clueVal))
for row in range(height):
for col in range(width):
hClue = hClues[row][col]
vClue = vClues[row][col]
require_v_clue_conditions = [grid[row][col] != blank]
if row != 0:
require_v_clue_conditions.append(grid[row-1][col] == blank)
s.add(Implies(grid[row-1][col] != blank, vClue == noWord))
if row < height - 1:
require_v_clue_conditions.append(grid[row+1][col] != blank)
s.add(Implies(And(*require_v_clue_conditions),
vClue != noWord))
require_h_clue_conditions = [grid[row][col] != blank]
if col != 0:
require_h_clue_conditions.append(grid[row][col-1] == blank)
s.add(Implies(grid[row][col-1] != blank, hClue == noWord))
if col < width - 1:
require_h_clue_conditions.append(grid[row][col+1] != blank)
s.add(Implies(And(*require_h_clue_conditions),
hClue != noWord))
for wordVal in wordVals:
word = str(wordVal)
if word == 'None':
continue
if len(word) > width - col:
s.add(hClue != wordVal)
else:
s.add(Implies(hClue == wordVal,
And(*[grid[row][col+idx] == letterDict[letter]
for (idx, letter) in enumerate(word)])))
if len(word) < width - col:
s.add(Implies(hClue == wordVal,
grid[row][col+len(word)] == blank))
if len(word) > height - row:
s.add(vClue != wordVal)
else:
s.add(Implies(vClue == wordVal,
And(*[grid[row+idx][col] == letterDict[letter]
for (idx, letter) in enumerate(word)])))
if len(word) < height - row:
s.add(Implies(vClue == wordVal,
grid[row+len(word)][col] == blank))
def as_crossword_clue(word, clue):
clueStr = str(clue)
if clueStr == 'None':
return None
(_, _, wordStr) = str(word).split('_')
(_, axisStr, rowStr, colStr) = clueStr.split('_')
return CrosswordClue(word=wordStr, horizontal=axisStr=='h',
row=int(rowStr[3:]), col=int(colStr[3:]))
while True:
if unsat == s.check():
return None
model = s.model()
clues = list(filter(None,
[as_crossword_clue(word, model[word]) for word in wordVars]))
if not clues:
return None
crossword = Crossword(clues)
if crossword.check_connected():
return crossword
else:
print('')
print('Found unconnected crossword:')
print(crossword)
s.add(Or(*[word != model[word]
for word in wordVars
if str(model[word]) != 'None']))
def generate_for_layout(self, layout):
print('In generate_for_layout()...')
print(layout)
width = layout.width
height = layout.height
size = max([width, height])
words = list(filter(lambda w: len(w) <= size, self.words))
for required_word in self.required_words:
if required_word not in words:
return None
letters = sorted(set([letter for word in words
for letter in word]))
(Letter, letterVals) = EnumSort('Letter', ['blank']+letters)
letterDict = dict(zip([None]+letters, letterVals))
blank = letterDict[None]
(Word, wordVals) = EnumSort('Word', words)
wordDict = dict(zip(words, wordVals))
grid = [[Const('grid_row%d_col%d' % (row, col), Letter)
for col in range(width)]
for row in range(height)]
grid_flat = [el for grid_row in grid for el in grid_row]
all_clues_pairs =\
[(clue, Const('clue_%s_row%d_col%d' %\
('h' if clue.horizontal else 'v',
clue.row, clue.col), Word))
for clue in layout.clues]
all_clues_flat = [clueVar for (clue, clueVar) in all_clues_pairs]
(Clue, clueVals) =\
EnumSort('Clue', ['None']+[str(clue)
for clue in all_clues_flat])
clueDict = dict(zip([None]+all_clues_flat, clueVals))
noClue = clueDict[None]
wordVars = [Const('clue_for_%s' % word, Clue) for word in words]
wordVarDict = {wordVar: str(wordVar)[len('clue_for_'):]
for wordVar in wordVars}
s = Optimize()
for wordVar in wordVars:
if self.required_words\
and wordVarDict[wordVar] in self.required_words:
s.add(wordVar != noClue)
else:
s.add_soft(wordVar != noClue)
for (clue, clueVar) in all_clues_pairs:
clueVal = clueDict[clueVar]
row = clue.row
col = clue.col
for wordVar in wordVars:
wordVal = wordDict[str(wordVar)[len('clue_for_'):]]
word = str(wordVal)
if len(word) != len(clue.word):
s.add(clueVar != wordVal)
continue
s.add((clueVar == wordVal) == (wordVar == clueVal))
if clue.horizontal:
s.add(Implies(clueVar == wordVal,
And(*[grid[row][col+idx] == letterDict[letter]
for (idx, letter) in enumerate(word)])))
else:
s.add(Implies(clueVar == wordVal,
And(*[grid[row+idx][col] == letterDict[letter]
for (idx, letter) in enumerate(word)])))
def as_crossword_clue(word, clue):
clueStr = str(clue)
if clueStr == 'None':
return None
(_, _, wordStr) = str(word).split('_')
(_, axisStr, rowStr, colStr) = clueStr.split('_')
return CrosswordClue(word=wordStr, horizontal=axisStr=='h',
row=int(rowStr[3:]), col=int(colStr[3:]))
while True:
if unsat == s.check():
return None
model = s.model()
clues = list(filter(None,
[as_crossword_clue(word, model[word]) for word in wordVars]))
if not clues:
return None
crossword = Crossword(clues)
if crossword.check_connected():
return crossword
else:
print('')
print('Found unconnected crossword:')
print(crossword)
s.add(Or(*[word != model[word]
for word in wordVars
if str(model[word]) != 'None']))
def generate_layouts(self, width, height):
print('Attempting to generate %dx%d crossword layouts...' % (width, height))
size = max([width, height])
words = list(filter(lambda w: len(w) <= size, self.words))
for required_word in self.required_words:
if required_word not in words:
print('Cannot generate crossword smaller than required word.')
return
blank = False
words = list(set(['*'*len(word) for word in words]))
(Word, wordVals) = EnumSort('Word', ['None']+words)
wordDict = dict(zip([None]+words, wordVals))
noWord = wordDict[None]
grid = [[Const('grid_row%d_col%d' % (row, col), BoolSort())
for col in range(width)]
for row in range(height)]
grid_flat = [el for grid_row in grid for el in grid_row]
hClues = [[Const('clue_h_row%d_col%d' % (row, col), Word)
for col in range(width)]
for row in range(height)]
vClues = [[Const('clue_v_row%d_col%d' % (row, col), Word)
for col in range(width)]
for row in range(height)]
all_clues_flat = [el for clue_row in hClues+vClues for el in clue_row]
(Clue, clueVals) =\
EnumSort('Clue', ['None']+[str(clue)
for clue in all_clues_flat])
clueDict = dict(zip([None]+all_clues_flat, clueVals))
noClue = clueDict[None]
s = Optimize()
for clueVar in all_clues_flat:
clueVal = clueDict[clueVar]
s.add_soft(clueVar != noWord)
s.add(Or(*[clueVar != noWord for clueVar in all_clues_flat]))
for word_len in set([len(word) for word in self.required_words]):
required_word = wordDict['*'*word_len]
s.add(Or(*[clueVar == required_word for clueVar in all_clues_flat]))
for row in range(height):
for col in range(width):
hClue = hClues[row][col]
vClue = vClues[row][col]
require_v_clue_conditions = [grid[row][col] != blank]
if row != 0:
require_v_clue_conditions.append(grid[row-1][col] == blank)
s.add(Implies(grid[row-1][col] != blank, vClue == noWord))
if row < height - 1:
require_v_clue_conditions.append(grid[row+1][col] != blank)
s.add(Implies(And(*require_v_clue_conditions),
vClue != noWord))
require_h_clue_conditions = [grid[row][col] != blank]
if col != 0:
require_h_clue_conditions.append(grid[row][col-1] == blank)
s.add(Implies(grid[row][col-1] != blank, hClue == noWord))
if col < width - 1:
require_h_clue_conditions.append(grid[row][col+1] != blank)
s.add(Implies(And(*require_h_clue_conditions),
hClue != noWord))
for wordVal in wordVals:
word = str(wordVal)
if word == 'None':
continue
if len(word) > width - col:
s.add(hClue != wordVal)
else:
s.add(Implies(hClue == wordVal,
And(*[grid[row][col+idx] != blank
for (idx, _) in enumerate(word)])))
if len(word) < width - col:
s.add(Implies(hClue == wordVal,
grid[row][col+len(word)] == blank))
if len(word) > height - row:
s.add(vClue != wordVal)
else:
s.add(Implies(vClue == wordVal,
And(*[grid[row+idx][col] != blank
for (idx, _) in enumerate(word)])))
if len(word) < height - row:
s.add(Implies(vClue == wordVal,
grid[row+len(word)][col] == blank))
def as_crossword_clue(word, clue):
clueStr = str(clue)
wordStr = str(word)
if wordStr == 'None':
return None
(_, axisStr, rowStr, colStr) = clueStr.split('_')
return CrosswordClue(word=wordStr, horizontal=axisStr=='h',
row=int(rowStr[3:]), col=int(colStr[3:]))
while True:
if unsat == s.check():
print('No more layouts.')
return
model = s.model()
clues = list(filter(None,
[as_crossword_clue(model[clue], clue)
for clue in all_clues_flat]))
if not clues:
print('No clues: ' + str(clues))
return
crossword = Crossword(clues)
if crossword.check_connected():
print('')
print('Found connected crossword layout:')
print(crossword)
yield crossword
else:
if DEBUG:
print('')
print('Found unconnected crossword layout:')
print(crossword)
s.add(Or(*[clue != model[clue]
for clue in all_clues_flat
if str(model[clue]) != 'None']))
spelling = HunSpell(
'anagram-games/www/wordlist/dictionaries/en_US/en_US.dic',
'anagram-games/www/wordlist/dictionaries/en_US/en_US.aff')
def check_spelling(word):
try:
return spelling.spell(word)
except UnicodeEncodeError:
# Only want a-z in words anyway
return False
with open('anagram-games/www/wordlist/en_50k.txt', 'r', encoding='utf-8') as f:
lines = [line.split(' ')[0] for line in f.readlines()]
common_words = list(filter(check_spelling, lines))
def random_common_word(min_length=None):
if min_length:
return random.choice(list(filter(lambda word: len(word) >= min_length,
common_words)))
else:
return random.choice(common_words)
def generate_crossword_for_word(word, min_clues, max_size):
sorted_word = sorted(word)
def word_matches(other):
if len(other) <= 1:
return False
sorted_other = sorted(other)
# From https://stackoverflow.com/a/29954829
pos = 0
for ch in sorted_word:
if pos < len(sorted_other) and ch == sorted_other[pos]:
pos += 1
return pos == len(sorted_other)
matching_words = list(filter(word_matches, common_words))
generator = Generator(words=matching_words,
width=max_size, height=max_size,
required_words=[word]
)
crossword = generator.generate(min_clues)
return crossword
if __name__ == '__main__':
#word = random_common_word(3)
word = 'hello'
crossword = generate_crossword_for_word(word=word,
min_clues=1, max_size=6)
print('')
print(crossword)
print('')
if not crossword:
print('Unable to generate crossword.')
elif crossword.valid():
print('Crossword is valid.')
else:
print('Crossword is invalid.')