Skip to content

Commit 8163aed

Browse files
authored
Automatically detect file type (#59)
* Automatically detect file type * Fix formatting
1 parent 0981e54 commit 8163aed

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

models/file.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from enum import Enum
2+
from urllib.parse import unquote, urlparse
23

3-
from pydantic import BaseModel
4+
from pydantic import BaseModel, validator
45

56

67
class FileType(Enum):
@@ -11,8 +12,39 @@ class FileType(Enum):
1112
md = "MARKDOWN"
1213
csv = "CSV"
1314
xlsx = "XLSX"
15+
html = "HTML"
16+
json = "JSON"
17+
18+
def suffix(self) -> str:
19+
suffixes = {
20+
"TXT": ".txt",
21+
"PDF": ".pdf",
22+
"MARKDOWN": ".md",
23+
"DOCX": ".docx",
24+
"CSV": ".csv",
25+
"XLSX": ".xlsx",
26+
"PPTX": ".pptx",
27+
"HTML": ".html",
28+
"JSON": ".json",
29+
}
30+
return suffixes[self.value]
1431

1532

1633
class File(BaseModel):
17-
type: FileType
1834
url: str
35+
type: FileType | None = None
36+
37+
@validator("type", pre=True, always=True)
38+
def set_type_from_url(cls, v, values): # noqa: F841
39+
if v is not None:
40+
return v
41+
url = values.get("url")
42+
if url:
43+
parsed_url = urlparse(url)
44+
path = unquote(parsed_url.path)
45+
extension = path.split(".")[-1].lower()
46+
try:
47+
return FileType[extension]
48+
except KeyError:
49+
raise ValueError(f"Unsupported file type for URL: {url}")
50+
return v

0 commit comments

Comments
 (0)