nixpkgs-extension/pkgs/by-category/pythonPackages/prefab-specs.nix

414 lines
9.3 KiB
Nix
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
lib,
hatchling,
poetry-core,
huggingface-hub,
tqdm,
aiohttp,
configparser,
google-api-core,
google-genai,
json-repair,
networkx,
numpy,
pandas,
pydantic,
pypinyin,
python-dotenv,
tenacity,
tiktoken,
xlsxwriter,
boto3,
click,
loguru,
pdfminer-six,
requests,
httpx,
pillow,
pypdfium2,
pypdf,
reportlab,
modelscope,
opencv-python,
scikit-image,
openai,
beautifulsoup4,
magika,
av,
pdm-backend,
fasttext-predict,
pydantic-settings,
aiofiles,
hatch-fancy-pypi-readme,
colorlog,
torch,
torchvision,
wcwidth,
matplotlib,
pyyaml,
scipy,
psutil,
py-cpuinfo,
seaborn,
albumentations,
transformers,
accelerate,
ultralytics,
dill,
ftfy,
shapely,
pyclipper,
omegaconf,
onnxruntime,
fastapi,
python-multipart,
uvicorn,
gradio,
gradio-pdf,
ultralytics-thop,
}:
self:
let
inherit (self) # locally defined packages
lightrag-hku
mineru
nano-vectordb
pipmaster
ascii-colors
pdftext
fast-langdetect
mineru-vl-utils
qwen-vl-utils
httpx-retries
robust-downloader
doclayout-yolo
decord
;
in
{
raganything = {
url = "https://github.com/HKUDS/RAG-Anything/tree/v1.2.9";
hash = "sha256-yepiLYzPD6UcJRbAbovg/BwFE8nh903o/tHypiHGKSw=";
propagatedBuildDeps = [
huggingface-hub
lightrag-hku
# mineru
(mineru.optionalDeps [ "core" ])
tqdm
];
pythonImportsCheck = [ "raganything" ];
meta = {
description = "RAGAnything: All-in-One RAG System";
license = lib.licenses.mit;
};
};
lightrag-hku = {
url = "https://github.com/HKUDS/LightRAG/tree/v1.4.9.11";
hash = "sha256-TX/HSF2ZqoDo2SRlFzM+bkUxQXiUpnrl6kwI+lljjdo=";
propagatedBuildDeps = [
aiohttp
configparser
google-api-core
google-genai
json-repair
nano-vectordb
networkx
numpy
pandas
pipmaster
pydantic
pypinyin
python-dotenv
tenacity
tiktoken
xlsxwriter
];
meta = {
description = "LightRAG: Simple and Fast Retrieval-Augmented Generation";
license = lib.licenses.mit;
};
};
pipmaster = {
url = "https://github.com/ParisNeo/pipmaster/tree/820acdbc9d541443942bd8afd6ba968036bd8452";
hash = "sha256-H1R/hXPyjn1r6Dal0QMEQeBR5xUlgacxFsuDUZVwV+s=";
force.version = "1.1.0";
propagatedBuildDeps = [
ascii-colors
];
meta = with lib; {
description = "A versatile Python package manager utility for simplifying package installation, updates, checks, and environment management.";
license = licenses.asl20;
};
};
nano-vectordb = {
url = "https://github.com/gusye1234/nano-vectordb/tree/03f821348f04a93a9c36bb575faae05b61f4c02e";
hash = "sha256-jLco+1TAncF8Ep+VGd3DhsoiZTW7G/H8fAEwnNsovTY=";
force.version = "0.0.4.3";
propagatedBuildDeps = [
numpy
];
meta = {
description = "A simple, easy-to-hack Vector Database implementation";
license = lib.licenses.mit;
};
};
ascii-colors = {
url = "https://github.com/ParisNeo/ascii_colors/tree/817a21485136635e3da89ff08444183254b25aab";
hash = "sha256-zK4vM2sOfEFDJBpFjcJmQXljg3pgprP+VFuuk5JNWnE=";
force.version = "0.11.12";
propagatedBuildDeps = [
wcwidth
];
meta = {
description = "A python library for displaying stuff on the console in a pretty way";
license = lib.licenses.asl20;
};
};
mineru = {
url = "https://github.com/opendatalab/MinerU/tree/mineru-2.7.6-released";
hash = "sha256-A/nFNQYGEfmWUdpX8N1lbex3mdiF8+eN8s0UKQFc42E=";
propagatedBuildDeps = [
boto3
click
loguru
numpy
pdfminer-six
tqdm
requests
httpx
pillow
pypdfium2
pypdf
reportlab
pdftext
modelscope
huggingface-hub
json-repair
opencv-python
fast-langdetect
scikit-image
openai
beautifulsoup4
magika
mineru-vl-utils
qwen-vl-utils
];
optional-dependencies =
let
_gradio = gradio;
in
rec {
vlm = [
torch
transformers
accelerate
];
# vllm = [
# vllm
# ];
# lmdeploy = [
# lmdeploy
# ];
# mlx = [
# mlx-vlm
# ];
pipeline = [
matplotlib
ultralytics
doclayout-yolo
dill
pyyaml
ftfy
shapely
pyclipper
omegaconf
torch
torchvision
transformers
onnxruntime
];
api = [
fastapi
python-multipart
uvicorn
];
gradio = [
_gradio
gradio-pdf
];
core = vlm ++ pipeline ++ api ++ gradio;
# all = core ++ mlx ++ vllm ++ lmdeploy;
};
pythonRelaxDeps = [
"fast-langdetect"
];
meta = {
description = "Transforms complex documents like PDFs into LLM-ready markdown/JSON for your Agentic workflows.";
license = lib.licenses.gpl3;
};
};
doclayout-yolo = {
url = "https://pypi.org/project/doclayout-yolo/0.0.4/";
hash = "sha256-gDEdEL7QPPiExb/MYkv+D9z/bnhLp2eZNHsCkNedy00=";
propagatedBuildDeps = [
matplotlib
opencv-python
pillow
pyyaml
requests
scipy
torch
torchvision
tqdm
psutil
py-cpuinfo
pandas
seaborn
albumentations
huggingface-hub
]
++ [
ultralytics-thop # thop
];
postInstall = ''
rm -f "$out/bin/yolo" # collision with $'{pkgs.python3Packages.ultralytics}/bin/.yolo-wrapped
'';
pythonRelaxDeps = [
"thop"
];
pythonRemoveDeps = [
"thop"
];
meta = {
description = "DocLayout-YOLO: Enhancing Document Layout Analysis through Diverse Synthetic Data and Global-to-Local Adaptive Perception";
license = lib.licenses.agpl3Only;
};
};
# thop = { url = "use ultralytics-thop instead"; };
qwen-vl-utils = {
url = "https://github.com/QwenLM/Qwen3-VL/tree/fe12058/qwen-vl-utils";
hash = "sha256-Vha/Tc4q2v5RCL31hB9U4ZrfIFxfZjgwM6PYlvDeoAQ=";
force = {
version = "0.0.14";
};
propagatedBuildDeps = [
hatchling
av
pillow
requests
torch
torchvision
];
pythonImportsCheck = [ "qwen_vl_utils" ];
optional-dependencies = {
decord = [
decord
];
};
meta = {
description = "Qwen-VL Utils contains a set of helper functions for processing and integrating visual language information with Qwen-VL Series Model.";
license = lib.licenses.asl20;
};
};
decord = {
url = "https://github.com/dmlc/decord/tree/v0.6.0/python";
meta = {
description = "An efficient video loader for deep learning with smart shuffling that's super easy to digest";
license = lib.licenses.asl20;
};
};
mineru-vl-utils = {
url = "https://github.com/opendatalab/mineru-vl-utils/tree/mineru_vl_utils-0.1.22-released";
hash = "sha256-hpTW/1nwXPxfld4nx0XHZBMerWj+UL1vzDhYpwjezRU=";
propagatedBuildDeps = [
httpx
httpx-retries
aiofiles
pillow
pydantic
loguru
];
optional-dependencies = {
transformers = [
torch
transformers
accelerate
torchvision
];
# vllm = [
# vllm
# ];
# mlx = [
# mlx-vlm
# ];
# lmdeploy = [
# lmdeploy
# qwen-vl-utils
# ];
};
meta = {
description = "A Python package for interacting with the MinerU Vision-Language Model.";
license = lib.licenses.gpl3;
};
};
httpx-retries = {
url = "https://github.com/will-ockmore/httpx-retries/tree/0.4.5";
hash = "sha256-zJ3ExSEWxlHFluSdYA8/XZ3zb4KBelU+IOFyUu4ezvo=";
propagatedBuildDeps = [
hatchling
hatch-fancy-pypi-readme
httpx
];
meta = {
description = "A retry layer for HTTPX.";
license = lib.licenses.mit;
};
};
fast-langdetect = {
url = "https://github.com/LlmKira/fast-langdetect/tree/pypi_1.0.0";
hash = "sha256-pj46gHG9cjkSjnYc88bSctL/1LAUe0jkBuM/GZWMsUI=";
propagatedBuildDeps = [
pdm-backend
robust-downloader
requests
fasttext-predict
];
meta = {
description = " 80x faster Fasttext language detection out of the box | Split text by language ";
license = lib.licenses.mit;
};
};
robust-downloader = {
url = "https://github.com/fedebotu/robust-downloader/tree/0.0.2";
hash = "sha256-UmzfEIPiMtUkOG6sIMYgLxc8YwL5wRgMBRlywqKomv0=";
propagatedBuildDeps = [
tqdm
colorlog
requests
];
meta = {
description = "Minimal Python downloader with robustness in mind - resumable downloads, retries, and more";
license = lib.licenses.asl20;
};
};
pdftext = {
url = "https://github.com/datalab-to/pdftext/tree/v0.6.3";
hash = "sha256-EGVjzjDWtdcEPX//cOm5+xm9FvX0aP+h6fsD25hC8gA=";
propagatedBuildDeps = [
poetry-core
click
pydantic
pydantic-settings
pypdfium2
];
pythonRelaxDeps = [
"pypdfium2"
];
meta = {
description = "Extract structured text from pdfs quickly";
license = lib.licenses.asl20;
};
};
}