415 lines
9.3 KiB
Nix
415 lines
9.3 KiB
Nix
{
|
||
lib,
|
||
hatchling,
|
||
poetry-core,
|
||
|
||
huggingface-hub,
|
||
tqdm,
|
||
aiohttp,
|
||
configparser,
|
||
google-api-core,
|
||
google-genai,
|
||
json-repair,
|
||
networkx,
|
||
numpy,
|
||
pandas,
|
||
pydantic,
|
||
pypinyin,
|
||
python-dotenv,
|
||
tenacity,
|
||
tiktoken,
|
||
xlsxwriter,
|
||
boto3,
|
||
click,
|
||
loguru,
|
||
pdfminer-six,
|
||
requests,
|
||
httpx,
|
||
pillow,
|
||
pypdfium2,
|
||
pypdf,
|
||
reportlab,
|
||
modelscope,
|
||
opencv-python,
|
||
scikit-image,
|
||
openai,
|
||
beautifulsoup4,
|
||
magika,
|
||
av,
|
||
pdm-backend,
|
||
fasttext-predict,
|
||
pydantic-settings,
|
||
aiofiles,
|
||
hatch-fancy-pypi-readme,
|
||
colorlog,
|
||
torch,
|
||
torchvision,
|
||
wcwidth,
|
||
matplotlib,
|
||
pyyaml,
|
||
scipy,
|
||
psutil,
|
||
py-cpuinfo,
|
||
seaborn,
|
||
albumentations,
|
||
transformers,
|
||
accelerate,
|
||
ultralytics,
|
||
dill,
|
||
ftfy,
|
||
shapely,
|
||
pyclipper,
|
||
omegaconf,
|
||
onnxruntime,
|
||
fastapi,
|
||
python-multipart,
|
||
uvicorn,
|
||
gradio,
|
||
gradio-pdf,
|
||
ultralytics-thop,
|
||
}:
|
||
|
||
self:
|
||
let
|
||
inherit (self) # locally defined packages
|
||
lightrag-hku
|
||
mineru
|
||
nano-vectordb
|
||
pipmaster
|
||
ascii-colors
|
||
pdftext
|
||
fast-langdetect
|
||
mineru-vl-utils
|
||
qwen-vl-utils
|
||
httpx-retries
|
||
robust-downloader
|
||
doclayout-yolo
|
||
# decord
|
||
;
|
||
in
|
||
{
|
||
raganything = {
|
||
url = "https://github.com/HKUDS/RAG-Anything/tree/v1.2.9";
|
||
hash = "sha256-yepiLYzPD6UcJRbAbovg/BwFE8nh903o/tHypiHGKSw=";
|
||
propagatedBuildDeps = [
|
||
huggingface-hub
|
||
lightrag-hku
|
||
# mineru
|
||
(mineru.optionalDeps [ "core" ])
|
||
tqdm
|
||
];
|
||
pythonImportsCheck = [ "raganything" ];
|
||
meta = {
|
||
description = "RAGAnything: All-in-One RAG System";
|
||
license = lib.licenses.mit;
|
||
};
|
||
};
|
||
lightrag-hku = {
|
||
url = "https://github.com/HKUDS/LightRAG/tree/v1.4.9.11";
|
||
hash = "sha256-TX/HSF2ZqoDo2SRlFzM+bkUxQXiUpnrl6kwI+lljjdo=";
|
||
propagatedBuildDeps = [
|
||
aiohttp
|
||
configparser
|
||
google-api-core
|
||
google-genai
|
||
json-repair
|
||
nano-vectordb
|
||
networkx
|
||
numpy
|
||
pandas
|
||
pipmaster
|
||
pydantic
|
||
pypinyin
|
||
python-dotenv
|
||
tenacity
|
||
tiktoken
|
||
xlsxwriter
|
||
];
|
||
meta = {
|
||
description = "LightRAG: Simple and Fast Retrieval-Augmented Generation";
|
||
license = lib.licenses.mit;
|
||
};
|
||
};
|
||
pipmaster = {
|
||
url = "https://github.com/ParisNeo/pipmaster/tree/820acdbc9d541443942bd8afd6ba968036bd8452";
|
||
hash = "sha256-H1R/hXPyjn1r6Dal0QMEQeBR5xUlgacxFsuDUZVwV+s=";
|
||
force.version = "1.1.0";
|
||
propagatedBuildDeps = [
|
||
ascii-colors
|
||
];
|
||
meta = with lib; {
|
||
description = "A versatile Python package manager utility for simplifying package installation, updates, checks, and environment management.";
|
||
license = licenses.asl20;
|
||
};
|
||
};
|
||
nano-vectordb = {
|
||
url = "https://github.com/gusye1234/nano-vectordb/tree/03f821348f04a93a9c36bb575faae05b61f4c02e";
|
||
hash = "sha256-jLco+1TAncF8Ep+VGd3DhsoiZTW7G/H8fAEwnNsovTY=";
|
||
force.version = "0.0.4.3";
|
||
propagatedBuildDeps = [
|
||
numpy
|
||
];
|
||
meta = {
|
||
description = "A simple, easy-to-hack Vector Database implementation";
|
||
license = lib.licenses.mit;
|
||
};
|
||
};
|
||
ascii-colors = {
|
||
url = "https://github.com/ParisNeo/ascii_colors/tree/817a21485136635e3da89ff08444183254b25aab";
|
||
hash = "sha256-zK4vM2sOfEFDJBpFjcJmQXljg3pgprP+VFuuk5JNWnE=";
|
||
force.version = "0.11.12";
|
||
propagatedBuildDeps = [
|
||
wcwidth
|
||
];
|
||
meta = {
|
||
description = "A python library for displaying stuff on the console in a pretty way";
|
||
license = lib.licenses.asl20;
|
||
};
|
||
};
|
||
mineru = {
|
||
url = "https://github.com/opendatalab/MinerU/tree/mineru-2.7.6-released";
|
||
hash = "sha256-A/nFNQYGEfmWUdpX8N1lbex3mdiF8+eN8s0UKQFc42E=";
|
||
propagatedBuildDeps = [
|
||
boto3
|
||
click
|
||
loguru
|
||
numpy
|
||
pdfminer-six
|
||
tqdm
|
||
requests
|
||
httpx
|
||
pillow
|
||
pypdfium2
|
||
pypdf
|
||
reportlab
|
||
pdftext
|
||
modelscope
|
||
huggingface-hub
|
||
json-repair
|
||
opencv-python
|
||
fast-langdetect
|
||
scikit-image
|
||
openai
|
||
beautifulsoup4
|
||
magika
|
||
mineru-vl-utils
|
||
qwen-vl-utils
|
||
];
|
||
optional-dependencies =
|
||
let
|
||
_gradio = gradio;
|
||
in
|
||
rec {
|
||
vlm = [
|
||
torch
|
||
transformers
|
||
accelerate
|
||
];
|
||
# vllm = [
|
||
# vllm
|
||
# ];
|
||
# lmdeploy = [
|
||
# lmdeploy
|
||
# ];
|
||
# mlx = [
|
||
# mlx-vlm
|
||
# ];
|
||
pipeline = [
|
||
matplotlib
|
||
ultralytics
|
||
doclayout-yolo
|
||
dill
|
||
pyyaml
|
||
ftfy
|
||
shapely
|
||
pyclipper
|
||
omegaconf
|
||
torch
|
||
torchvision
|
||
transformers
|
||
onnxruntime
|
||
];
|
||
api = [
|
||
fastapi
|
||
python-multipart
|
||
uvicorn
|
||
];
|
||
gradio = [
|
||
_gradio
|
||
gradio-pdf
|
||
];
|
||
core = vlm ++ pipeline ++ api ++ gradio;
|
||
# all = core ++ mlx ++ vllm ++ lmdeploy;
|
||
};
|
||
pythonRelaxDeps = [
|
||
"fast-langdetect"
|
||
];
|
||
meta = {
|
||
description = "Transforms complex documents like PDFs into LLM-ready markdown/JSON for your Agentic workflows.";
|
||
license = lib.licenses.gpl3;
|
||
};
|
||
};
|
||
doclayout-yolo = {
|
||
url = "https://pypi.org/project/doclayout-yolo/0.0.4/";
|
||
hash = "sha256-gDEdEL7QPPiExb/MYkv+D9z/bnhLp2eZNHsCkNedy00=";
|
||
propagatedBuildDeps = [
|
||
matplotlib
|
||
opencv-python
|
||
pillow
|
||
pyyaml
|
||
requests
|
||
scipy
|
||
torch
|
||
torchvision
|
||
tqdm
|
||
psutil
|
||
py-cpuinfo
|
||
pandas
|
||
seaborn
|
||
albumentations
|
||
huggingface-hub
|
||
]
|
||
++ [
|
||
ultralytics-thop # thop
|
||
];
|
||
postInstall = ''
|
||
rm -f "$out/bin/yolo" # collision with $'{pkgs.python3Packages.ultralytics}/bin/.yolo-wrapped
|
||
'';
|
||
pythonRelaxDeps = [
|
||
"thop"
|
||
];
|
||
pythonRemoveDeps = [
|
||
"thop"
|
||
];
|
||
meta = {
|
||
description = "DocLayout-YOLO: Enhancing Document Layout Analysis through Diverse Synthetic Data and Global-to-Local Adaptive Perception";
|
||
license = lib.licenses.agpl3Only;
|
||
};
|
||
};
|
||
# thop = { url = "use ultralytics-thop instead"; };
|
||
qwen-vl-utils = {
|
||
url = "https://github.com/QwenLM/Qwen3-VL/tree/fe12058/qwen-vl-utils";
|
||
hash = "sha256-Vha/Tc4q2v5RCL31hB9U4ZrfIFxfZjgwM6PYlvDeoAQ=";
|
||
force = {
|
||
version = "0.0.14";
|
||
};
|
||
propagatedBuildDeps = [
|
||
hatchling
|
||
av
|
||
pillow
|
||
requests
|
||
torch
|
||
torchvision
|
||
];
|
||
pythonImportsCheck = [ "qwen_vl_utils" ];
|
||
optional-dependencies = {
|
||
# decord' = [
|
||
# decord
|
||
# ];
|
||
};
|
||
meta = {
|
||
description = "Qwen-VL Utils contains a set of helper functions for processing and integrating visual language information with Qwen-VL Series Model.";
|
||
license = lib.licenses.asl20;
|
||
};
|
||
};
|
||
# decord = {
|
||
# url = "https://github.com/dmlc/decord/tree/v0.6.0/python";
|
||
# hash = "sha256-7YedKE0FAuZOCiQlgocZCMfkXvykxdf10ES7KwVZ1hc=";
|
||
# meta = {
|
||
# description = "An efficient video loader for deep learning with smart shuffling that's super easy to digest";
|
||
# license = lib.licenses.asl20;
|
||
# };
|
||
# };
|
||
mineru-vl-utils = {
|
||
url = "https://github.com/opendatalab/mineru-vl-utils/tree/mineru_vl_utils-0.1.22-released";
|
||
hash = "sha256-hpTW/1nwXPxfld4nx0XHZBMerWj+UL1vzDhYpwjezRU=";
|
||
propagatedBuildDeps = [
|
||
httpx
|
||
httpx-retries
|
||
aiofiles
|
||
pillow
|
||
pydantic
|
||
loguru
|
||
];
|
||
optional-dependencies = {
|
||
transformers = [
|
||
torch
|
||
transformers
|
||
accelerate
|
||
torchvision
|
||
];
|
||
# vllm = [
|
||
# vllm
|
||
# ];
|
||
# mlx = [
|
||
# mlx-vlm
|
||
# ];
|
||
# lmdeploy = [
|
||
# lmdeploy
|
||
# qwen-vl-utils
|
||
# ];
|
||
};
|
||
meta = {
|
||
description = "A Python package for interacting with the MinerU Vision-Language Model.";
|
||
license = lib.licenses.gpl3;
|
||
};
|
||
};
|
||
httpx-retries = {
|
||
url = "https://github.com/will-ockmore/httpx-retries/tree/0.4.5";
|
||
hash = "sha256-zJ3ExSEWxlHFluSdYA8/XZ3zb4KBelU+IOFyUu4ezvo=";
|
||
propagatedBuildDeps = [
|
||
hatchling
|
||
hatch-fancy-pypi-readme
|
||
httpx
|
||
];
|
||
meta = {
|
||
description = "A retry layer for HTTPX.";
|
||
license = lib.licenses.mit;
|
||
};
|
||
};
|
||
fast-langdetect = {
|
||
url = "https://github.com/LlmKira/fast-langdetect/tree/pypi_1.0.0";
|
||
hash = "sha256-pj46gHG9cjkSjnYc88bSctL/1LAUe0jkBuM/GZWMsUI=";
|
||
propagatedBuildDeps = [
|
||
pdm-backend
|
||
robust-downloader
|
||
requests
|
||
fasttext-predict
|
||
];
|
||
meta = {
|
||
description = "⚡️ 80x faster Fasttext language detection out of the box | Split text by language ";
|
||
license = lib.licenses.mit;
|
||
};
|
||
};
|
||
robust-downloader = {
|
||
url = "https://github.com/fedebotu/robust-downloader/tree/0.0.2";
|
||
hash = "sha256-UmzfEIPiMtUkOG6sIMYgLxc8YwL5wRgMBRlywqKomv0=";
|
||
propagatedBuildDeps = [
|
||
tqdm
|
||
colorlog
|
||
requests
|
||
];
|
||
meta = {
|
||
description = "Minimal Python downloader with robustness in mind - resumable downloads, retries, and more";
|
||
license = lib.licenses.asl20;
|
||
};
|
||
};
|
||
pdftext = {
|
||
url = "https://github.com/datalab-to/pdftext/tree/v0.6.3";
|
||
hash = "sha256-EGVjzjDWtdcEPX//cOm5+xm9FvX0aP+h6fsD25hC8gA=";
|
||
propagatedBuildDeps = [
|
||
poetry-core
|
||
click
|
||
pydantic
|
||
pydantic-settings
|
||
pypdfium2
|
||
];
|
||
pythonRelaxDeps = [
|
||
"pypdfium2"
|
||
];
|
||
meta = {
|
||
description = "Extract structured text from pdfs quickly";
|
||
license = lib.licenses.asl20;
|
||
};
|
||
};
|
||
}
|