###################################################################################
###################################################################################
# Rhymatron V2b- A generative, autonomous rap-phenomenon
# Concept and Code by Noah King, 4/09/2010
# Project for Reading and Writing Electronic Text - Spring 2010
# Professor Adam Parrish
###################################################################################
###################################################################################
################################################
################ setup and variable declaration
################################################
import sys
import random

diction = dict()
phonemes = list()
finishedLines = list()

buildDictionary = 1
parseInput = 1
i = 0
wordList = list()
wordList1 = list()
wordList2 = list()
wordList3 = list()
wordList4 = list()

lines = list()
punct = list()
words = list()

ngrams = dict()
ngrams2 = dict()
ngrams3 = dict()
ngrams4 = dict()


wordsTemp = list()
wordsNull = set()
rhymeWords = list()
xchars = [".", ",", ";", ":", "!", "?", "'", '"', "(", ")", "[", "]", "*","-"]
badwords = ["i", "la", "m", "re", "et", "am", "t", "fro", "and", "dr", "um", "le", "is", "the",
"she", "he", "for", "me", "to", "you", "but", "of","they", "them", "no", "yes", "d", "g"]


################################################
################ parse/optimize input text
################################################
allText = list()

#open each external text file
for line in open("twain.txt"):
	allText.append(line)
for line in open("alice.txt"):
	allText.append(line)
for line in open("moby.txt"):
	allText.append(line)

#prep the text
for line in allText:
	line = line.strip()
	line = line.lower()
	line = line.split(" ")
	for word in line:
		#check for punctuation, first by skipping single character words
		if len(word)>1:
			xfound = 0
			#then by checking if the last character of a word is punctuation
			for xchar in xchars:
				if xchar == word[-1]:
					xfound += 1
			if xfound >= 1:
				#if there's punctuation, store it a separate word
				words.append(word[0:-1])
				words.append(word[-1])
			else:
				words.append(word)
		else:
			if word == "":
				break
			else:
				words.append(word)

length = len(words)

#go through the entire list of words
for i in range (0, length-1):
	key = words[i]
	value = words[i + 1]
	valuelist = list()
	#if the word is already in the dictionary, add it to the list of dict values
	if key in ngrams:
		ngrams[key].append(value)
	#if the word is not yet in the dict, create a new entry
	else: 
		valuelist.append(value)
		ngrams[key] = valuelist
		
for i in range (0, length-2):
	key = (words[i] + " " + words[i+1])
	value = words[i + 2]
	valuelist = list()
	if key in ngrams2:
		ngrams2[key].append(value)
	else: 
		valuelist.append(value)
		ngrams2[key] = valuelist

#delete bad entries
keyremove = list()
for key in ngrams:
	for xchar in xchars:
		if xchar in key:
			keyremove.append(key)
			break
for key in keyremove:
	del ngrams[key]
for key, values in ngrams.iteritems():
	for value in values:
		for xchar in xchars:
			if xchar in value:
				values.remove(value)
				break
#print ngrams	

#Create Content
#finishedSentences = list()
#for i in range(10):
#	finishedLines = list()
#	finishedLines.append(random.choice(ngrams["."]))
#	word = random.choice(ngrams[finishedLines[-1]])
#	finishedLines.append(word)
#
#	for i in range (100):
#		keylist = finishedLines[-2:]
#		keypair = " ".join(keylist)
#		word = random.choice(ngrams2[keypair])
#		finishedLines.append(word)
#		if word == ".":
#			break
#		
#	finishedLines = " ".join(finishedLines)
#	print finishedLine

	
#################################################
################# build dictionary
#################################################

if buildDictionary == 1:
	for line in open("CMU_dict.txt"):
	#if i < 10:
		#i = i + 1
		line = line.strip()
		line = line.lower()
		line = line.split(" ")
		if line[0] in ngrams:
			diction[line[0]] = line[2:]
	for line in open("CMU_phoneme_vowels.txt"):
		line = line.strip()
		line = line.lower()
		phonemes.append(line)
#print diction
#print phonemes

			
#################################################
################# count syllables
#################################################
sylList = list()
for key, val in diction.iteritems():
	for sound in val:
		if sound in phonemes:
			sylList.append(sound)
	val.append(sylList)
	sylList = list()

if 1 == 1:
	# AB  Find all the words with one syllable,  
	for key, val in diction.iteritems():
		if len(val[-1]) == 1:
			wordList1.append(key)
	
	# AB  Find all the words with two syllables,  
	for key, val in diction.iteritems():
		if len(val[-1]) == 2:
			wordList2.append(key)
			
	# AB  Find all the words with three syllables,  
	for key, val in diction.iteritems():
		if len(val[-1]) == 3:
			wordList3.append(key)
			
	# AB  Find all the words with four syllables,  
	for key, val in diction.iteritems():
		if len(val[-1]) == 4:
			wordList4.append(key)

#print wordList1
#for i in range(10):
#	print "********************"
#print wordList2


#################################################
################# sample dictionary
#################################################
#
## sample dict entries : 'revolvers': ['r', 'ih0', 'v', 'aa1', 'l', 'v', 'er0', 'z', ['ih0', 'aa1', 'er0']]
## sample explained:				 'word': [ phoneme list [syllable phoneme list]]
#
#################################################
################# define functions
#################################################

def rhymer( syllableCount, rhymeList, val ):
	trueCount = 0
	if syllableCount >= 1:
		if rhymeList[0] == "free":
			trueCount = trueCount + 1
		else:
			if val[-1][0] == storedSyllables[rhymeList[0]]:
				trueCount = trueCount + 1
	if syllableCount >= 2:
		if rhymeList[1] == "free":
			trueCount = trueCount + 1
		else:
			if val[-1][1] == storedSyllables[rhymeList[1]]:
				trueCount = trueCount + 1
	if syllableCount >= 3:
		if rhymeList[2] == "free":
			trueCount = trueCount + 1
		else:
			if val[-1][2] == storedSyllables[rhymeList[2]]:
				trueCount = trueCount + 1
	if syllableCount >= 4:
		if rhymeList[3] == "free":
			trueCount = trueCount + 1
		else:
			if val[-1][3] == storedSyllables[rhymeList[3]]:
				trueCount = trueCount + 1
	return trueCount
			
def alliterater( alliterationBinary ): # <---- this is buggy
	alliteration = 0
	if alliterationBinary == 1 and len(finishedLines) > 0:
		if val[0] == diction[finishedLines[-1]][0]:
			alliteration = 1
	return alliteration
	
def safeMaker(syllableCount, rhymeList): # <---- this alliterator work-around is buggy too :(
	wordList = list()
	for key, val in diction.iteritems():
		if len(val[-1]) == syllableCount:
			if rhymer(syllableCount, rhymeList, val) == syllableCount:
				wordList.append(key)
				return wordList

def matchMaker(syllableCount, rhymeList, markovBinary):
	wordList = list()
	#if the number of syllables in the word is different than input, stop
	if syllableCount != len(rhymeList):
		print "Error!  Please check that there is a rhyme key for each syllable"
	else:
		#cycle through the words in the dictionary
		for key, val in diction.iteritems():
			#match the number of syllables
			if len(val[-1]) == syllableCount:
				#make sure every syllable rhymes or is free
				if rhymer(syllableCount, rhymeList, val) == syllableCount:
					wordList.append(key)
	if markovBinary == 1:
		valueList = ngrams[finishedLines[-1]]
		for word in wordList:
			if word not in valueList:
				wordList.remove(word)
	
	if len(wordList) == 0:
		for word in safeMaker(syllableCount,rhymeList):
			wordList.append(word)		
		random.shuffle(wordList)
		finishedLines.append(wordList[0])
		rhymeMap.append("*")
	else:
		random.shuffle(wordList)
		finishedLines.append(wordList[0])
		rhymeMap.append("@")

#################################################
################# run functions
#################################################
#
## The below four verses by rapper Mos Def were analyzed and broken down into a rhyme template 
#
## Two words, United States, no love, no breaks
## Low brow, high stakes, crack smoke, black folks
## Big Macs, fat folks, ecstasy capsules
## Presidential scandals, everybody MOVE
#
## Two words, Mos Deaf, K West, hot shit
## Calm down, get back, ghetto people, got this
## Game pawn, lock shit, gun pulled, cock shit
## We won't stop shit, everybody MOVE
#
## Two words, B K, N Y, bed sty
## Two hard, too hungry, too many, that's why
## These streets know game, can't ball, don't play
## Heavy traffic, one lane, everybody MOVE
#
## Two words, Mos Deaf, black jack, hot shit
## Calm down, get back, ghetto people, got this
## Game point lock, long pump cocked
## We won't stop, everybody MOVE
#
## this is the template, where "-" is a free syllable
#
## A B, ---C, D B, D C,
## D B, - C, E C, E C,
## - E, E C, --- EC,
## ---- EC, ---- A
#
#
rhymeMap = list()
storedSyllables = dict()

# the first time the words are selected rhyme keys are undefined as free syllables
# then immediately afterwards they are recorded as rhyme keys

matchMaker(1,["free"],0)
storedSyllables["A"] = diction[finishedLines[-1]][-1][0]
matchMaker(1,["free"],1)
storedSyllables["B"] = diction[finishedLines[-1]][-1][0]
matchMaker(4,["free", "free", "free", "free"],1)
tempSyllable = diction[finishedLines[-1]][-1][3] # 	<---- this workaround was needed 
if "1" in tempSyllable:	#							<---- because of issues around the final syllable
	storedSyllables["C"] = tempSyllable #			<---- being accented or not.
else: #												<---- changing a final syllable from unaccented
	tempSyllable = tempSyllable[0:-1] + "1" #		<---- to accented made for a better rhyme key,
	storedSyllables["C"] = tempSyllable #			<---- particularly for single syllable words.
matchMaker(1,["free"],1)
storedSyllables["D"] = diction[finishedLines[-1]][-1][0]
matchMaker(1,["B"],1)
matchMaker(1,["D"],1)
matchMaker(1,["C"],1)

matchMaker(1,["D"],0)
matchMaker(1,["B"],1)
matchMaker(1,["free"],1)
matchMaker(1,["C"],1)
matchMaker(1,["free"],1)
storedSyllables["E"] = diction[finishedLines[-1]][-1][0]
matchMaker(1,["C"],1)
matchMaker(1,["E"],1)
matchMaker(1,["C"],1)

matchMaker(1,["free"],0)
matchMaker(1,["E"],1)
matchMaker(1,["E"],1)
matchMaker(1,["C"],1)
matchMaker(3,["free", "free", "free"],1)
matchMaker(2,["free", "C"],1)

matchMaker(4,["free", "free", "free", "free"],0)
matchMaker(2,["free", "C"],1)
matchMaker(4,["free", "free", "free", "free"],1)
matchMaker(1,["A"],1)

######second through fourth verses

for i in range (0,4):
	matchMaker(1,["A"],0)
	matchMaker(1,["B"],1)
	matchMaker(4,["free", "free", "free", "free"],1)
	matchMaker(1,["D"],1)
	matchMaker(1,["B"],1)
	matchMaker(1,["D"],1)
	matchMaker(1,["C"],1)

	matchMaker(1,["D"],0)
	matchMaker(1,["B"],1)
	matchMaker(1,["free"],1)
	matchMaker(1,["C"],1)
	matchMaker(1,["free"],1)
	matchMaker(1,["C"],1)
	matchMaker(1,["E"],1)
	matchMaker(1,["C"],1)

	matchMaker(1,["free"],0)
	matchMaker(1,["E"],1)
	matchMaker(1,["E"],1)
	matchMaker(1,["C"],1)
	matchMaker(3,["free", "free", "free"],1)
	matchMaker(2,["free", "C"],1)

	matchMaker(4,["free", "free", "free", "free"],0)
	matchMaker(2,["free", "C"],1)
	matchMaker(4,["free", "free", "free", "free"],1)
	matchMaker(1,["A"],1)

#print finishedLines

#################################################
################# print words
#################################################
if 1 == 1:
	fl = finishedLines
	rl = rhymeMap
	tempCounter = [0, 25, 50, 75]
for i in tempCounter:
	print fl[i + 0] + " " + fl[i + 1] + ", " + fl[i + 2] + ", " + fl[i + 3] + " " + fl[i + 4] + ", " + fl[i + 5] + " " + fl[i + 6] + ","
	print fl[i + 7] + " " + fl[i + 8] + ", " + fl[i + 9] + " " + fl[i + 10] + ", " + fl[i + 11] + " " + fl[i + 12] + ", " + fl[i + 13] + " " + fl[i + 14] + ","
	print fl[i + 15] + " " + fl[i + 16] + ", " + fl[i + 17] + " " + fl[i + 18] + ", " + fl[i + 19] + " " + fl[i + 20] + ","
	print fl[i + 21] + " " + fl[i + 22] + ", " + fl[i + 23] + " " + fl[i + 24] + ".\n"
	
print rl
#for i in tempCounter:
#	print rl[i + 0] + " " + rl[i + 1] + ", " + rl[i + 2] + ", " + rl[i + 3] + " " + rl[i + 4] + ", " + rl[i + 5] + " " + rl[i + 6] + ","
#	print rl[i + 7] + " " + rl[i + 8] + ", " + rl[i + 9] + " " + rl[i + 10] + ", " + rl[i + 11] + " " + rl[i + 12] + ", " + rl[i + 13] + " " + rl[i + 14] + ","
#	print rl[i + 15] + " " + rl[i + 16] + ", " + rl[i + 17] + " " + rl[i + 18] + ", " + rl[i + 19] + " " + rl[i + 20] + ","
#	print rl[i + 21] + " " + rl[i + 22] + ", " + rl[i + 23] + " " + rl[i + 24] + ".\n"