|
| 1 | +# read data from two text files and load the data into databases |
| 2 | + |
| 3 | +import pymysql |
| 4 | +import time |
| 5 | + |
| 6 | +myConnection = pymysql.connect( |
| 7 | + host="localhost", user="root", password="root", db="shakespeare") |
| 8 | + |
| 9 | +cur = myConnection.cursor() |
| 10 | + |
| 11 | +# read character file and get all characters |
| 12 | +with open("datasets/characters.txt", "r") as char: |
| 13 | + characterList = char.read().splitlines() |
| 14 | + |
| 15 | +# as first character is unknown in Play file, initialize as Unknown |
| 16 | +currentCharacter = "Unknown" |
| 17 | + |
| 18 | +start_time = time.time() |
| 19 | + |
| 20 | +createSQL = "INSERT INTO amnd(char_name, play_text) VALUES(%s, %s);" |
| 21 | + |
| 22 | +# Part 1: process Play file text |
| 23 | +# to Create a Record for each line in the play |
| 24 | +# The character who is speaking |
| 25 | +# The line number |
| 26 | +# The phrase itself, trimmed of spaces |
| 27 | +with open("datasets/A_Midsummer_Nights_Dream.txt", "r") as playlines: |
| 28 | + for line in playlines: |
| 29 | + if line.upper().strip() in characterList: |
| 30 | + currentCharacter = line.upper().strip() |
| 31 | + print("changing character to : ", currentCharacter) |
| 32 | + else: |
| 33 | + sql_values = currentCharacter, line.strip() |
| 34 | + print("writing line : ", sql_values) |
| 35 | + cur.execute(createSQL, sql_values) |
| 36 | + |
| 37 | +myConnection.commit() |
| 38 | +end_time = time.time() |
| 39 | + |
| 40 | + |
| 41 | +# Part 2: process for Query Performance Calculation |
| 42 | +cur.execute('SELECT COUNT(line_number) FROM amnd;') |
| 43 | +numPlayLines = cur.fetchall()[0][0] |
| 44 | +print(numPlayLines, 'rows') |
| 45 | + |
| 46 | +# calculate query execution time |
| 47 | +queryExecTime = end_time - start_time |
| 48 | +print("Total query time: ", queryExecTime) |
| 49 | + |
| 50 | +queryTimePerLine = queryExecTime / numPlayLines |
| 51 | +print("Query time per line: ", queryTimePerLine) |
| 52 | + |
| 53 | +# record query execution time into performance table |
| 54 | +insertPerformanceSQL = "INSERT INTO performance VALUES('CREATE',%s);" |
| 55 | +cur.execute(insertPerformanceSQL, queryTimePerLine) |
| 56 | + |
| 57 | +myConnection.commit() |
| 58 | +myConnection.close() |
0 commit comments