@router.post("/ingest/files", response_model=IngestResponse)
async def ingest_files(
files: List[UploadFile] = File(...),
title: Optional[str] = Form(None),
) -> IngestResponse:
"""Ingest uploaded files into the knowledge store."""
store = _get_store()
total_chunks = 0
for upload in files:
filename = upload.filename or "untitled"
ext = ""
if "." in filename:
ext = "." + filename.rsplit(".", 1)[-1].lower()
if ext not in _ALLOWED_EXTENSIONS:
allowed = ", ".join(sorted(_ALLOWED_EXTENSIONS))
raise HTTPException(
status_code=400,
detail=(
f"Unsupported file type: {ext}. "
f"Allowed: {allowed}"
),
)
data = await upload.read()
# Parse content based on extension
if ext in (".txt", ".md", ".csv"):
try:
text = data.decode("utf-8")
except UnicodeDecodeError:
text = data.decode("latin-1")
elif ext == ".pdf":
text = _extract_text_from_pdf(data)
elif ext == ".docx":
text = _extract_text_from_docx(data)
else:
continue
text = text.strip()
if not text:
continue
doc_id = str(uuid.uuid4())
doc_title = title or filename
chunks = _chunk_text(text)
for idx, chunk in enumerate(chunks):
store.store(
chunk,
source="upload",
doc_type=ext.lstrip("."),
doc_id=doc_id,
title=doc_title,
chunk_index=idx,
)
total_chunks += len(chunks)
logger.info(
"Ingested %d chunks from file %s (doc_id=%s)",
len(chunks),
filename,
doc_id,
)
return IngestResponse(chunks_added=total_chunks)