Skip to content

Commit f6ca1a4

Browse files
committed
mid summer night dream project
1 parent 527e945 commit f6ca1a4

8 files changed

+3031
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
CREATE SCHEMA shakespeare;
2+
3+
USE shakespeare;
4+
5+
/* play and characters table */
6+
CREATE TABLE amnd(
7+
line_number INT NOT NULL AUTO_INCREMENT,
8+
char_name TEXT,
9+
play_text TEXT,
10+
PRIMARY KEY (line_number)
11+
);
12+
13+
/* performance table */
14+
CREATE TABLE performance(
15+
query_type TEXT,
16+
query_time FLOAT
17+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# read data from two text files and load the data into databases
2+
3+
import pymysql
4+
import time
5+
6+
myConnection = pymysql.connect(
7+
host="localhost", user="root", password="root", db="shakespeare")
8+
9+
cur = myConnection.cursor()
10+
11+
# read character file and get all characters
12+
with open("datasets/characters.txt", "r") as char:
13+
characterList = char.read().splitlines()
14+
15+
# as first character is unknown in Play file, initialize as Unknown
16+
currentCharacter = "Unknown"
17+
18+
start_time = time.time()
19+
20+
createSQL = "INSERT INTO amnd(char_name, play_text) VALUES(%s, %s);"
21+
22+
# Part 1: process Play file text
23+
# to Create a Record for each line in the play
24+
# The character who is speaking
25+
# The line number
26+
# The phrase itself, trimmed of spaces
27+
with open("datasets/A_Midsummer_Nights_Dream.txt", "r") as playlines:
28+
for line in playlines:
29+
if line.upper().strip() in characterList:
30+
currentCharacter = line.upper().strip()
31+
print("changing character to : ", currentCharacter)
32+
else:
33+
sql_values = currentCharacter, line.strip()
34+
print("writing line : ", sql_values)
35+
cur.execute(createSQL, sql_values)
36+
37+
myConnection.commit()
38+
end_time = time.time()
39+
40+
41+
# Part 2: process for Query Performance Calculation
42+
cur.execute('SELECT COUNT(line_number) FROM amnd;')
43+
numPlayLines = cur.fetchall()[0][0]
44+
print(numPlayLines, 'rows')
45+
46+
# calculate query execution time
47+
queryExecTime = end_time - start_time
48+
print("Total query time: ", queryExecTime)
49+
50+
queryTimePerLine = queryExecTime / numPlayLines
51+
print("Query time per line: ", queryTimePerLine)
52+
53+
# record query execution time into performance table
54+
insertPerformanceSQL = "INSERT INTO performance VALUES('CREATE',%s);"
55+
cur.execute(insertPerformanceSQL, queryTimePerLine)
56+
57+
myConnection.commit()
58+
myConnection.close()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Update Character Name in uppercase for every line in plays
2+
# example: "Now, fair Hippolyta, our nuptial hour" => Now, fair HIPPOLYTA, our nuptial hour
3+
4+
5+
import pymysql
6+
import time
7+
8+
myConnection = pymysql.connect(
9+
host="localhost", user="root", password="root", db="shakespeare")
10+
11+
cur = myConnection.cursor()
12+
start_time = time.time()
13+
14+
# Part 1: Update Character Name in uppercase for every line in plays
15+
updateSQL = "UPDATE amnd SET play_text = REPLACE(play_text, %s, %s);"
16+
17+
with open("datasets/characters.txt", "r") as char:
18+
for character in char.read().splitlines():
19+
print("capitalizing occurences of ", character)
20+
updateStrings = character.capitalize(), character.upper()
21+
cur.execute(updateSQL, updateStrings)
22+
23+
myConnection.commit()
24+
25+
end_time = time.time()
26+
27+
# Part 2: process for Query Performance Calculation
28+
cur.execute('SELECT COUNT(line_number) FROM amnd;')
29+
numPlayLines = cur.fetchall()[0][0]
30+
print(numPlayLines, 'rows')
31+
32+
# calculate query execution time
33+
queryExecTime = end_time - start_time
34+
print("Total query time: ", queryExecTime)
35+
36+
queryTimePerLine = queryExecTime / numPlayLines
37+
print("Query time per line: ", queryTimePerLine)
38+
39+
# record query execution time into performance table
40+
insertPerformanceSQL = "INSERT INTO performance VALUES('UPDATE',%s);"
41+
cur.execute(insertPerformanceSQL, queryTimePerLine)
42+
43+
myConnection.commit()
44+
myConnection.close()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Delete Stage Directions in Plays text such as "Exit", "Enter", etc
2+
3+
import pymysql
4+
import time
5+
6+
myConnection = pymysql.connect(
7+
host="localhost", user="root", password="root", db="shakespeare")
8+
9+
cur = myConnection.cursor()
10+
start_time = time.time()
11+
12+
cur.execute('SELECT COUNT(line_number) FROM amnd;')
13+
numPlayLines_Before_Delete = cur.fetchall()[0][0]
14+
15+
16+
# delete stage directions
17+
# RLIKE : regular expression like, ^ means starts with , | means OR
18+
cur.execute(
19+
"DELETE FROM amnd WHERE play_text RLIKE '^enter|^exit|^act|^scence|^exeunt';")
20+
21+
print("Deleting lines..")
22+
23+
end_time = time.time()
24+
25+
myConnection.commit()
26+
27+
28+
cur.execute('SELECT COUNT(line_number) FROM amnd;')
29+
numPlayLines_After_Delete = cur.fetchall()[0][0]
30+
numPlayLInes_Deleted = numPlayLines_Before_Delete - numPlayLines_After_Delete
31+
print(numPlayLInes_Deleted, 'rows')
32+
33+
# calculate query execution time
34+
queryExecTime = end_time - start_time
35+
print("Total query time: ", queryExecTime)
36+
37+
queryTimePerLine = queryExecTime / numPlayLInes_Deleted
38+
print("Query time per line: ", queryTimePerLine)
39+
40+
# record query execution time into performance table
41+
insertPerformanceSQL = "INSERT INTO performance VALUES('DELETE',%s);"
42+
cur.execute(insertPerformanceSQL, queryTimePerLine)
43+
44+
myConnection.commit()
45+
myConnection.close()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# get the plays and measure the performance of query
2+
3+
import pymysql
4+
import time
5+
6+
myConnection = pymysql.connect(
7+
host="localhost", user="root", password="root", db="shakespeare")
8+
9+
cur = myConnection.cursor()
10+
start_time = time.time()
11+
12+
# Part 1: Get the plays from database
13+
cur.execute("SELECT play_text FROM amnd;")
14+
15+
for line in cur.fetchall():
16+
print(line[0])
17+
18+
end_time = time.time()
19+
20+
# Part 2: process for Query Performance Calculation
21+
cur.execute('SELECT COUNT(line_number) FROM amnd;')
22+
numPlayLines = cur.fetchall()[0][0]
23+
print(numPlayLines, 'rows')
24+
25+
# calculate query execution time
26+
queryExecTime = end_time - start_time
27+
print("Total query time: ", queryExecTime)
28+
29+
queryTimePerLine = queryExecTime / numPlayLines
30+
print("Query time per line: ", queryTimePerLine)
31+
32+
# record query execution time into performance table
33+
insertPerformanceSQL = "INSERT INTO performance VALUES('READ',%s);"
34+
cur.execute(insertPerformanceSQL, queryTimePerLine)
35+
36+
myConnection.commit()
37+
myConnection.close()

0 commit comments

Comments
 (0)