|
1573 | 1573 | },
|
1574 | 1574 | {
|
1575 | 1575 | "cell_type": "code",
|
1576 |
| - "execution_count": 52, |
| 1576 | + "execution_count": 11, |
1577 | 1577 | "id": "bb62fa4e-e331-4f0f-ae0a-fb5eece8ee71",
|
1578 | 1578 | "metadata": {},
|
1579 |
| - "outputs": [], |
| 1579 | + "outputs": [ |
| 1580 | + { |
| 1581 | + "data": { |
| 1582 | + "text/plain": [ |
| 1583 | + "168" |
| 1584 | + ] |
| 1585 | + }, |
| 1586 | + "execution_count": 11, |
| 1587 | + "metadata": {}, |
| 1588 | + "output_type": "execute_result" |
| 1589 | + } |
| 1590 | + ], |
1580 | 1591 | "source": [
|
1581 |
| - "hyps = csv_file.ref.to_list()" |
| 1592 | + "# hyps = csv_file.ref.to_list()\n", |
| 1593 | + "hyps = []\n", |
| 1594 | + "with open(\"/Users/quert/Documents/GitHub/edit_NetKu/dataset/same_secs_insert_labeled/final_hyp.src\", \"r\") as f:\n", |
| 1595 | + " for line in f.readlines():\n", |
| 1596 | + " hyps.append(line.strip())\n", |
| 1597 | + "len(hyps)" |
1582 | 1598 | ]
|
1583 | 1599 | },
|
1584 | 1600 | {
|
1585 | 1601 | "cell_type": "code",
|
1586 |
| - "execution_count": 53, |
| 1602 | + "execution_count": 12, |
1587 | 1603 | "id": "7871b42c-1dbc-4bcb-b104-23bb2d4b26a3",
|
1588 | 1604 | "metadata": {},
|
1589 | 1605 | "outputs": [
|
1590 | 1606 | {
|
1591 | 1607 | "name": "stderr",
|
1592 | 1608 | "output_type": "stream",
|
1593 | 1609 | "text": [
|
1594 |
| - "Token indices sequence length is longer than the specified maximum sequence length for this model (3779 > 1024). Running this sequence through the model will result in indexing errors\n" |
| 1610 | + "Token indices sequence length is longer than the specified maximum sequence length for this model (6239 > 1024). Running this sequence through the model will result in indexing errors\n" |
1595 | 1611 | ]
|
1596 | 1612 | }
|
1597 | 1613 | ],
|
|
1607 | 1623 | },
|
1608 | 1624 | {
|
1609 | 1625 | "cell_type": "code",
|
1610 |
| - "execution_count": 54, |
| 1626 | + "execution_count": 13, |
1611 | 1627 | "id": "ce1769ed-b6e6-4c99-ab6d-7a4e567ddaa3",
|
1612 | 1628 | "metadata": {},
|
1613 | 1629 | "outputs": [
|
1614 | 1630 | {
|
1615 | 1631 | "data": {
|
1616 | 1632 | "text/plain": [
|
1617 |
| - "(34, 67597, 5766.785714285715, 3167.0)" |
| 1633 | + "(76, 86560, 7774.184523809524, 4387.5)" |
1618 | 1634 | ]
|
1619 | 1635 | },
|
1620 |
| - "execution_count": 54, |
| 1636 | + "execution_count": 13, |
1621 | 1637 | "metadata": {},
|
1622 | 1638 | "output_type": "execute_result"
|
1623 | 1639 | }
|
1624 | 1640 | ],
|
1625 | 1641 | "source": [
|
1626 | 1642 | "np.min(lengths),np.max(lengths),np.mean(lengths), np.median(lengths)"
|
1627 | 1643 | ]
|
1628 |
| - }, |
1629 |
| - { |
1630 |
| - "cell_type": "code", |
1631 |
| - "execution_count": 10, |
1632 |
| - "id": "5c277ded-9567-44e5-9425-d5743af04d0e", |
1633 |
| - "metadata": {}, |
1634 |
| - "outputs": [ |
1635 |
| - { |
1636 |
| - "data": { |
1637 |
| - "text/plain": [ |
1638 |
| - "(73846, 10425, 11253)" |
1639 |
| - ] |
1640 |
| - }, |
1641 |
| - "execution_count": 10, |
1642 |
| - "metadata": {}, |
1643 |
| - "output_type": "execute_result" |
1644 |
| - } |
1645 |
| - ], |
1646 |
| - "source": [ |
1647 |
| - "train_csv = pd.read_csv(\"/Users/quert/Documents/GitHub/edit_NetKu/dataset/same_secs_insert_labeled/merged_train.csv\")\n", |
1648 |
| - "test_csv = pd.read_csv(\"/Users/quert/Documents/GitHub/edit_NetKu/dataset/same_secs_insert_labeled/merged_test.csv\")\n", |
1649 |
| - "val_csv = pd.read_csv(\"/Users/quert/Documents/GitHub/edit_NetKu/dataset/same_secs_insert_labeled/merged_val.csv\")\n", |
1650 |
| - "\n", |
1651 |
| - "len(train_csv), len(test_csv), len(val_csv)" |
1652 |
| - ] |
1653 | 1644 | }
|
1654 | 1645 | ],
|
1655 | 1646 | "metadata": {
|
|
0 commit comments