Skip to content

Commit a132ab3

Browse files
committed
build 1.0.1
1 parent bf06a6a commit a132ab3

File tree

7 files changed

+305
-4
lines changed

7 files changed

+305
-4
lines changed

README.rst

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
|travis_pic| |codecov_pic|
2+
3+
==============
4+
Word to Number (Es)
5+
==============
6+
This is a Python module to convert number words (eg. veintiuno) to numeric digits (21). It works for positive numbers upto the range of 999,999,999,999 (i.e. billions).
7+
8+
++++++++++++
9+
Installation
10+
++++++++++++
11+
Please ensure that you have **updated pip** to the latest version before installing word2number_es.
12+
13+
You can install the module using Python Package Index using the below command.
14+
15+
.. code-block:: python
16+
17+
pip install word2number_es
18+
19+
Make sure you install all requirements given in requirements.txt
20+
21+
.. code-block:: python
22+
23+
pip install -r requirements.txt
24+
25+
+++++
26+
Usage
27+
+++++
28+
First you have to import the module using the below code.
29+
.. code-block:: python
30+
31+
from word2number_es import w2n
32+
33+
Then you can use the **word_to_num** method to convert a number-word to numeric digits, as shown below.
34+
35+
.. code-block:: python
36+
37+
>>> print(w2n.word_to_num('dos punto tres'))
38+
2.3
39+
40+
>>> print(w2n.word_to_num('112'))
41+
112
42+
43+
>>> print(w2n.word_to_num('punto cinco'))
44+
0.5
45+
46+
>>> print(w2n.word_to_num('dosmil veintitres'))
47+
2023
48+
49+
>>> print(w2n.word_to_num('millon millon'))
50+
Error: Redundant number! Please enter a valid number word (eg. two million twenty three thousand and forty nine)
51+
None
52+
53+
>>> print(w2n.word_to_num('blah'))
54+
Error: No valid number words found! Please enter a valid number word (eg. two million twenty three thousand and forty nine)
55+
None
56+
57+
You can also use the **numwords_in_sentence** to convert all number words in a sentence to numeric digits, as shown below.
58+
59+
.. code-block:: python
60+
61+
>>> print(w2n.numwords_in_sentence("el reloj me costo diez mil pesos"))
62+
el reloj me costo 10000 pesos
63+
64+
++++++++++++
65+
Contributors
66+
++++++++++++
67+
- Ben Batorsky (`bpben <https://github.com/bpben>`__)
68+
- Alex (`ledovsky <https://github.com/ledovsky>`__)
69+
- Tal Yarkoni (`tyarkoni <https://github.com/tyarkoni>`__)
70+
- ButteredGroove (`ButteredGroove <https://github.com/ButteredGroove>`__)
71+
- TurqW (`TurqW <https://github.com/TurqW>`__)

build/lib/word2number_es/__init__.py

Whitespace-only changes.

build/lib/word2number_es/w2n.py

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
NUMBERS = {
2+
'cero': 0,
3+
'uno': 1,
4+
'dos': 2,
5+
'tres': 3,
6+
'cuatro': 4,
7+
'cinco': 5,
8+
'seis': 6,
9+
'siete': 7,
10+
'ocho': 8,
11+
'nueve': 9,
12+
'diez' : 10,
13+
'once' : 11,
14+
'doce' : 12,
15+
'trece' : 13,
16+
'catorce' : 14,
17+
'quince' : 15,
18+
'dieciseis' : 16,
19+
'diecisiete' : 17,
20+
'dieciocho' : 18,
21+
'diecinueve' : 19,
22+
'veinte': 20,
23+
'veintiuno': 21,
24+
'veintiun': 21,
25+
'veintidos': 22,
26+
'veintitres': 23,
27+
'veinticuatro': 24,
28+
'veinticinco': 25,
29+
'veintiseis': 26,
30+
'veintisiete': 27,
31+
'veintiocho': 28,
32+
'veintinueve': 29,
33+
'treinta': 30,
34+
'cuarenta': 40,
35+
'cincuenta': 50,
36+
'sesenta': 60,
37+
'setenta': 70,
38+
'ochenta': 80,
39+
'noventa': 90,
40+
'cien': 100,
41+
'ciento': 100,
42+
'doscientos': 200,
43+
'trescientos': 300,
44+
'cuatrocientos': 400,
45+
'quinientos': 500,
46+
'seiscientos': 600,
47+
'setecientos': 700,
48+
'ochocientos': 800,
49+
'novecientos': 900,
50+
'mil': 1000,
51+
'punto': '.'
52+
}
53+
54+
SECTION_WORDS = ['quintillon', 'quatrillon', 'trillon', 'billon', 'millones', 'millon', 'mil']
55+
ALL_SEPARATORS = SECTION_WORDS + ['punto']
56+
NUMBER_WORDS = list(NUMBERS.keys()) + SECTION_WORDS + ['ciento']
57+
NUMBER_SAFE_WORDS = NUMBER_WORDS + ['y', '&']
58+
DECIMAL_WORDS = ['cero', 'uno', 'dos', 'tres', 'cuatro', 'cinco', 'seis', 'siete', 'ocho', 'nueve']
59+
60+
61+
def _number_formation(number_strings):
62+
"""
63+
function to form numeric multipliers for million, billion, thousand etc.
64+
input: list of strings
65+
return value: integer
66+
"""
67+
if not number_strings:
68+
return 1 # to correctly handle "a hundred and twelve", "thousand", etc
69+
hundred_index = number_strings.index('ciento') if 'ciento' in number_strings else -1
70+
number = 0
71+
if hundred_index >= 0:
72+
number = 100 * _number_formation(number_strings[0:hundred_index])
73+
number_strings = number_strings[hundred_index + 1:]
74+
number += sum([NUMBERS[word] for word in number_strings])
75+
return number
76+
77+
78+
def _get_decimal_sum(decimal_digit_words):
79+
"""
80+
function to convert post decimal digit words to numerial digits
81+
input: list of strings
82+
output: double
83+
"""
84+
decimal_number_str = []
85+
for dec_word in decimal_digit_words:
86+
if dec_word not in DECIMAL_WORDS:
87+
return 0
88+
else:
89+
decimal_number_str.append(NUMBERS[dec_word])
90+
final_decimal_string = '0.' + ''.join(map(str, decimal_number_str))
91+
return float(final_decimal_string)
92+
93+
94+
def normalize_accent_marks(word):
95+
"""
96+
function to normalize accent marks
97+
input: string
98+
output: string
99+
"""
100+
replacements = (
101+
("á", "a"),
102+
("é", "e"),
103+
("í", "i"),
104+
("ó", "o"),
105+
("ú", "u"),
106+
)
107+
for a, b in replacements:
108+
word = word.replace(a, b).replace(a.upper(), b.upper())
109+
return word
110+
111+
112+
def word_to_num(number_sentence):
113+
"""
114+
function to return integer for an input `number_sentence` string
115+
input: string
116+
output: int or double or None
117+
"""
118+
if type(number_sentence) is not str:
119+
raise ValueError(
120+
"Type of input is not string! Please enter a valid number word (eg. \'two million twenty three thousand and forty nine\')")
121+
122+
# normalize accent marks
123+
number_sentence = normalize_accent_marks(number_sentence)
124+
125+
if number_sentence.isdigit(): # return the number if user enters a number string
126+
return int(number_sentence)
127+
128+
split_words = number_sentence.replace('-', ' ').replace(',', ' ').lower().split()
129+
130+
# removing and, & etc.
131+
clean_numbers = [word for word in split_words if word in NUMBER_WORDS]
132+
133+
return _clean_words_to_num(clean_numbers)
134+
135+
136+
def numwords_in_sentence(sentence):
137+
if type(sentence) is not str:
138+
raise ValueError(
139+
"Type of input is not string! Please enter a valid number word (eg. \'two million twenty three thousand and forty nine\')")
140+
141+
# TODO: some way to tell the difference between "one thousand, two hundred, and three" = 1203 and
142+
# TODO "four, seven, twelve, three" = "4, 7, 12, 3"
143+
number_sentence = sentence.replace('-', ' ').replace(',', ' ').lower()
144+
145+
split_words = number_sentence.strip().split() # strip extra spaces and split sentence into words
146+
147+
last_found_index = 0
148+
new_sentence = ''
149+
i = 0
150+
151+
while i < len(split_words):
152+
if split_words[i] in NUMBER_WORDS:
153+
num_words = split_words[i - 1:i + 1] if i > 0 and split_words[i - 1] == 'a' else [split_words[i]]
154+
clean_words = [split_words[i]]
155+
while i + 1 < len(split_words) and split_words[i + 1] in NUMBER_SAFE_WORDS:
156+
i += 1
157+
num_words.append(split_words[i])
158+
if split_words[i] in NUMBER_WORDS:
159+
clean_words.append(split_words[i])
160+
num = _clean_words_to_num(clean_words)
161+
replace_start, replace_end = _get_replaceable(number_sentence, num_words, last_found_index)
162+
new_sentence += sentence[last_found_index:replace_start] + str(num)
163+
last_found_index = replace_end
164+
i += 1
165+
166+
new_sentence += sentence[last_found_index:]
167+
168+
return new_sentence
169+
170+
171+
def _get_replaceable(sentence, clean_words, last_found_index):
172+
start = sentence[last_found_index:].find(clean_words[0]) + last_found_index
173+
end = start + len(clean_words[0])
174+
for word in clean_words[1:]:
175+
end += sentence[end:].lower().find(word) + len(word)
176+
return start, end
177+
178+
179+
def _validate_clean_words(clean_words):
180+
# Error message if the user enters invalid input!
181+
if len(clean_words) == 0:
182+
raise ValueError(
183+
"No number words were found in the string.")
184+
185+
# Error if user enters million, billion, thousand or decimal punto twice
186+
if clean_words.count('mi') > 1 or clean_words.count('millon') > 1 or clean_words.count(
187+
'billion') > 1 or clean_words.count('punto') > 1:
188+
raise ValueError(
189+
"Redundant number word! Please enter a valid number word (eg. two million twenty three thousand and forty nine)")
190+
191+
separators = list(filter(lambda x: x in ALL_SEPARATORS, clean_words))
192+
sorted_seps = sorted(separators, key=lambda i: ALL_SEPARATORS.index(i))
193+
194+
if sorted_seps != separators:
195+
raise ValueError(
196+
"Malformed number! Something is out of order here.")
197+
198+
199+
def _clean_words_to_num(clean_words):
200+
_validate_clean_words(clean_words)
201+
clean_decimal_numbers = []
202+
203+
# separate decimal part of number (if exists)
204+
if clean_words.count('punto') == 1:
205+
clean_decimal_numbers = clean_words[clean_words.index('punto') + 1:]
206+
clean_words = clean_words[:clean_words.index('punto')]
207+
208+
total_sum = 0 # storing the number to be returned
209+
210+
if len(clean_words) > 0:
211+
for word in SECTION_WORDS:
212+
word_index = clean_words.index(word) if word in clean_words else -1
213+
if word_index > -1:
214+
total_sum += _number_formation(clean_words[:word_index])
215+
clean_words = clean_words[word_index + 1:]
216+
total_sum *= 1000
217+
218+
if clean_words:
219+
total_sum += _number_formation(clean_words)
220+
221+
# adding decimal part to total_sum (if exists)
222+
if len(clean_decimal_numbers) > 0:
223+
decimal_sum = _get_decimal_sum(clean_decimal_numbers)
224+
total_sum += decimal_sum
225+
226+
return total_sum
5.89 KB
Binary file not shown.

dist/word2number_es-1.0.1.tar.gz

6.8 KB
Binary file not shown.

setup.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[metadata]
2+
description-file=README.md
3+
license_files=LICENSE.txt

setup.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ def open_file(fname):
1212
setup(
1313
name = 'word2number_es',
1414
packages = ['word2number_es'], # this must be the same as the name above
15-
version = '1.0',
15+
version = '1.0.0',
1616
license=open('LICENSE.txt').read(),
17-
description = 'Convert number words (eg. twenty one) to numeric digits (spanish)',
17+
description = 'Convert number words (eg. veintiuno) to numeric digits (spanish)',
1818
author = 'Neuri',
1919
author_email = '[email protected]',
2020
url = 'https://github.com/Neuri-ai/w2n_es', # use the URL to the github repo
@@ -23,5 +23,6 @@ def open_file(fname):
2323
'Intended Audience :: Developers',
2424
'Programming Language :: Python'
2525
],
26-
long_description=open_file('README.rst').read()
27-
)
26+
long_description=open_file('README.rst').read(),
27+
python_requires = ">=3.6"
28+
)

0 commit comments

Comments
 (0)