{ lib, hatchling, poetry-core, huggingface-hub, tqdm, aiohttp, configparser, google-api-core, google-genai, json-repair, networkx, numpy, pandas, pydantic, pypinyin, python-dotenv, tenacity, tiktoken, xlsxwriter, boto3, click, loguru, pdfminer-six, requests, httpx, pillow, pypdfium2, pypdf, reportlab, modelscope, opencv-python, scikit-image, openai, beautifulsoup4, magika, av, pdm-backend, fasttext-predict, pydantic-settings, aiofiles, hatch-fancy-pypi-readme, colorlog, torch, torchvision, wcwidth, matplotlib, pyyaml, scipy, psutil, py-cpuinfo, seaborn, albumentations, transformers, accelerate, ultralytics, dill, ftfy, shapely, pyclipper, omegaconf, onnxruntime, fastapi, python-multipart, uvicorn, gradio, gradio-pdf, ultralytics-thop, }: self: let inherit (self) # locally defined packages lightrag-hku mineru nano-vectordb pipmaster ascii-colors pdftext fast-langdetect mineru-vl-utils qwen-vl-utils httpx-retries robust-downloader doclayout-yolo decord ; in { raganything = { url = "https://github.com/HKUDS/RAG-Anything/tree/v1.2.9"; hash = "sha256-yepiLYzPD6UcJRbAbovg/BwFE8nh903o/tHypiHGKSw="; propagatedBuildDeps = [ huggingface-hub lightrag-hku # mineru (mineru.optionalDeps [ "core" ]) tqdm ]; pythonImportsCheck = [ "raganything" ]; meta = { description = "RAGAnything: All-in-One RAG System"; license = lib.licenses.mit; }; }; lightrag-hku = { url = "https://github.com/HKUDS/LightRAG/tree/v1.4.9.11"; hash = "sha256-TX/HSF2ZqoDo2SRlFzM+bkUxQXiUpnrl6kwI+lljjdo="; propagatedBuildDeps = [ aiohttp configparser google-api-core google-genai json-repair nano-vectordb networkx numpy pandas pipmaster pydantic pypinyin python-dotenv tenacity tiktoken xlsxwriter ]; meta = { description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"; license = lib.licenses.mit; }; }; pipmaster = { url = "https://github.com/ParisNeo/pipmaster/tree/820acdbc9d541443942bd8afd6ba968036bd8452"; hash = "sha256-H1R/hXPyjn1r6Dal0QMEQeBR5xUlgacxFsuDUZVwV+s="; force.version = "1.1.0"; propagatedBuildDeps = [ ascii-colors ]; meta = with lib; { description = "A versatile Python package manager utility for simplifying package installation, updates, checks, and environment management."; license = licenses.asl20; }; }; nano-vectordb = { url = "https://github.com/gusye1234/nano-vectordb/tree/03f821348f04a93a9c36bb575faae05b61f4c02e"; hash = "sha256-jLco+1TAncF8Ep+VGd3DhsoiZTW7G/H8fAEwnNsovTY="; force.version = "0.0.4.3"; propagatedBuildDeps = [ numpy ]; meta = { description = "A simple, easy-to-hack Vector Database implementation"; license = lib.licenses.mit; }; }; ascii-colors = { url = "https://github.com/ParisNeo/ascii_colors/tree/817a21485136635e3da89ff08444183254b25aab"; hash = "sha256-zK4vM2sOfEFDJBpFjcJmQXljg3pgprP+VFuuk5JNWnE="; force.version = "0.11.12"; propagatedBuildDeps = [ wcwidth ]; meta = { description = "A python library for displaying stuff on the console in a pretty way"; license = lib.licenses.asl20; }; }; mineru = { url = "https://github.com/opendatalab/MinerU/tree/mineru-2.7.6-released"; hash = "sha256-A/nFNQYGEfmWUdpX8N1lbex3mdiF8+eN8s0UKQFc42E="; propagatedBuildDeps = [ boto3 click loguru numpy pdfminer-six tqdm requests httpx pillow pypdfium2 pypdf reportlab pdftext modelscope huggingface-hub json-repair opencv-python fast-langdetect scikit-image openai beautifulsoup4 magika mineru-vl-utils qwen-vl-utils ]; optional-dependencies = let _gradio = gradio; in rec { vlm = [ torch transformers accelerate ]; # vllm = [ # vllm # ]; # lmdeploy = [ # lmdeploy # ]; # mlx = [ # mlx-vlm # ]; pipeline = [ matplotlib ultralytics doclayout-yolo dill pyyaml ftfy shapely pyclipper omegaconf torch torchvision transformers onnxruntime ]; api = [ fastapi python-multipart uvicorn ]; gradio = [ _gradio gradio-pdf ]; core = vlm ++ pipeline ++ api ++ gradio; # all = core ++ mlx ++ vllm ++ lmdeploy; }; pythonRelaxDeps = [ "fast-langdetect" ]; meta = { description = "Transforms complex documents like PDFs into LLM-ready markdown/JSON for your Agentic workflows."; license = lib.licenses.gpl3; }; }; doclayout-yolo = { url = "https://pypi.org/project/doclayout-yolo/0.0.4/"; hash = "sha256-gDEdEL7QPPiExb/MYkv+D9z/bnhLp2eZNHsCkNedy00="; propagatedBuildDeps = [ matplotlib opencv-python pillow pyyaml requests scipy torch torchvision tqdm psutil py-cpuinfo pandas seaborn albumentations huggingface-hub ] ++ [ ultralytics-thop # thop ]; postInstall = '' rm -f "$out/bin/yolo" # collision with $'{pkgs.python3Packages.ultralytics}/bin/.yolo-wrapped ''; pythonRelaxDeps = [ "thop" ]; pythonRemoveDeps = [ "thop" ]; meta = { description = "DocLayout-YOLO: Enhancing Document Layout Analysis through Diverse Synthetic Data and Global-to-Local Adaptive Perception"; license = lib.licenses.agpl3Only; }; }; # thop = { url = "use ultralytics-thop instead"; }; qwen-vl-utils = { url = "https://github.com/QwenLM/Qwen3-VL/tree/fe12058/qwen-vl-utils"; hash = "sha256-Vha/Tc4q2v5RCL31hB9U4ZrfIFxfZjgwM6PYlvDeoAQ="; force = { version = "0.0.14"; }; propagatedBuildDeps = [ hatchling av pillow requests torch torchvision ]; pythonImportsCheck = [ "qwen_vl_utils" ]; optional-dependencies = { decord = [ decord ]; }; meta = { description = "Qwen-VL Utils contains a set of helper functions for processing and integrating visual language information with Qwen-VL Series Model."; license = lib.licenses.asl20; }; }; decord = { url = "https://github.com/dmlc/decord/tree/v0.6.0/python"; meta = { description = "An efficient video loader for deep learning with smart shuffling that's super easy to digest"; license = lib.licenses.asl20; }; }; mineru-vl-utils = { url = "https://github.com/opendatalab/mineru-vl-utils/tree/mineru_vl_utils-0.1.22-released"; hash = "sha256-hpTW/1nwXPxfld4nx0XHZBMerWj+UL1vzDhYpwjezRU="; propagatedBuildDeps = [ httpx httpx-retries aiofiles pillow pydantic loguru ]; optional-dependencies = { transformers = [ torch transformers accelerate torchvision ]; # vllm = [ # vllm # ]; # mlx = [ # mlx-vlm # ]; # lmdeploy = [ # lmdeploy # qwen-vl-utils # ]; }; meta = { description = "A Python package for interacting with the MinerU Vision-Language Model."; license = lib.licenses.gpl3; }; }; httpx-retries = { url = "https://github.com/will-ockmore/httpx-retries/tree/0.4.5"; hash = "sha256-zJ3ExSEWxlHFluSdYA8/XZ3zb4KBelU+IOFyUu4ezvo="; propagatedBuildDeps = [ hatchling hatch-fancy-pypi-readme httpx ]; meta = { description = "A retry layer for HTTPX."; license = lib.licenses.mit; }; }; fast-langdetect = { url = "https://github.com/LlmKira/fast-langdetect/tree/pypi_1.0.0"; hash = "sha256-pj46gHG9cjkSjnYc88bSctL/1LAUe0jkBuM/GZWMsUI="; propagatedBuildDeps = [ pdm-backend robust-downloader requests fasttext-predict ]; meta = { description = "⚡️ 80x faster Fasttext language detection out of the box | Split text by language "; license = lib.licenses.mit; }; }; robust-downloader = { url = "https://github.com/fedebotu/robust-downloader/tree/0.0.2"; hash = "sha256-UmzfEIPiMtUkOG6sIMYgLxc8YwL5wRgMBRlywqKomv0="; propagatedBuildDeps = [ tqdm colorlog requests ]; meta = { description = "Minimal Python downloader with robustness in mind - resumable downloads, retries, and more"; license = lib.licenses.asl20; }; }; pdftext = { url = "https://github.com/datalab-to/pdftext/tree/v0.6.3"; hash = "sha256-EGVjzjDWtdcEPX//cOm5+xm9FvX0aP+h6fsD25hC8gA="; propagatedBuildDeps = [ poetry-core click pydantic pydantic-settings pypdfium2 ]; pythonRelaxDeps = [ "pypdfium2" ]; meta = { description = "Extract structured text from pdfs quickly"; license = lib.licenses.asl20; }; }; }