LightRAG/env.example at main · servforce/LightRAG · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
### All configurable environment variable must show up in this sample file in active or comment out status
### Setup tool `make env-*` uses this file to generate final .env file

### Target environment of this env file: host/compose (compose is for Docker or Kubernetes)
# LIGHTRAG_RUNTIME_TARGET=host

###########################
### Server Configuration
###########################
HOST=0.0.0.0
PORT=9621
WEBUI_TITLE='My Graph KB'
WEBUI_DESCRIPTION='Simple and Fast Graph Based RAG System'
# WORKERS=2
### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
# TIMEOUT=150
### CORS allowed origins for browser cross-origin requests. Defaults to "*"
### (any origin). The bundled WebUI is served same-origin and does not need
### this; set an explicit allowlist only when a different-origin web app calls
### the API from a browser. Credentialed (cookie) cross-origin requests are
### only enabled for an explicit allowlist, never for the "*" wildcard.
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080

### Path Prefix Configuration (Optional)
### Used to host multiple LightRAG instances on one host behind a reverse
### proxy that routes by site prefix. Leave unset (or empty) for a
### single-instance deployment.
###
### - LIGHTRAG_API_PREFIX  : reverse-proxy prefix the upstream proxy strips
###                          before forwarding (passed to FastAPI as root_path).
###
### See docs/MultiSiteDeployment.md for end-to-end examples.
# LIGHTRAG_API_PREFIX=/site01

### Optional SSL Configuration
### Docker note: generated compose files mount staged certs at /app/data/certs/ inside the container
# SSL=true
# SSL_CERTFILE=/path/to/cert.pem
# SSL_KEYFILE=/path/to/key.pem

### Directory Configuration (defaults to current working directory)
### Default value is: ./inputs ./rag_storage
# INPUT_DIR=<absolute_path_for_doc_input_dir>
# WORKING_DIR=<absolute_path_for_working_dir>

### Tiktoken cache directory (Store cached files in this folder for offline deployment)
# TIKTOKEN_CACHE_DIR=/app/data/tiktoken

### Ollama Emulating Model and Tag
# OLLAMA_EMULATING_MODEL_NAME=lightrag
OLLAMA_EMULATING_MODEL_TAG=latest

### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
# MAX_GRAPH_NODES=1000

### Logging level
# LOG_LEVEL=INFO
# VERBOSE=False
# LOG_MAX_BYTES=10485760
# LOG_BACKUP_COUNT=5
### Logfile location (defaults to current working directory)
# LOG_DIR=/path/to/log/directory
# LIGHTRAG_PERFORMANCE_TIMING_LOGS=false

#####################################
### Login and API-Key Configuration
#####################################
# AUTH_ACCOUNTS='admin:admin123,user1:{bcrypt}$2b$12$S8Yu.gCbuAbNTJFB.231gegTwr5pgrFxc8H9kXQ4/sduFBHkhM8Ka'
# TOKEN_SECRET=lightrag-jwt-default-secret-key!
# JWT_ALGORITHM=HS256
# TOKEN_EXPIRE_HOURS=48
# GUEST_TOKEN_EXPIRE_HOURS=24

### Token Auto-Renewal Configuration (Sliding Window Expiration)
### Enable automatic token renewal to prevent active users from being logged out
### When enabled, tokens will be automatically renewed when remaining time < threshold
# TOKEN_AUTO_RENEW=true
### Token renewal threshold (0.0 - 1.0)
### Renew token when remaining time < (total time * threshold)
### Default: 0.5 (renew when 50% time remaining)
### Examples:
###   0.5 = renew when 24h token has 12h left
###   0.25 = renew when 24h token has 6h left
# TOKEN_RENEW_THRESHOLD=0.5
### Note: Token renewal is automatically skipped for certain endpoints:
###   - /health: Health check endpoint (no authentication required)
###   - /documents/paginated: Frequently polled by client (5-30s interval)
###   - /documents/pipeline_status: Very frequently polled by client (2s interval)
###   - Rate limit: Minimum 60 seconds between renewals for same user

### API-Key to access LightRAG Server API
### Use this key in HTTP requests with the 'X-API-Key' header
### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
# LIGHTRAG_API_KEY=your-secure-api-key-here
# WHITELIST_PATHS=/health,/api/*

######################################################################################
### Query Configuration
###
### How to control the context length sent to LLM:
###    MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS
###    Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Relation_Tokens
######################################################################################
# LLM response cache for query (default=true，permanently disabled for streaming response)
ENABLE_LLM_CACHE=false
# COSINE_THRESHOLD=0.2
### Number of entities or relations retrieved from KG
# TOP_K=40
### Maximum number or chunks for naive vector search
# CHUNK_TOP_K=20
### control the actual entities send to LLM
# MAX_ENTITY_TOKENS=6000
### control the actual relations send to LLM
# MAX_RELATION_TOKENS=8000
### control the maximum tokens send to LLM (include entities, relations and chunks)
# MAX_TOTAL_TOKENS=30000

### chunk selection strategies
###     VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
###     WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
###     If reranking is enabled, the impact of chunk selection strategies will be diminished.
# KG_CHUNK_PICK_METHOD=VECTOR

### maximum number of related chunks per source entity or relation
###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
###     Higher values increase re-ranking time
# RELATED_CHUNK_NUMBER=5

### Append each chunk's heading path (parent headings joined by " → ") as a
### `content_headings` field in the chunk JSON sent to the LLM. Costs extra tokens.
ENABLE_CONTENT_HEADINGS=true

#########################################################
### Reranking configuration
### RERANK_BINDING type: null, cohere, jina, aliyun
### For rerank model deployed by vLLM use cohere binding
### If LightRAG deployed in Docker:
###    uses host.docker.internal instead of localhost in RERANK_BINDING_HOST
#########################################################
RERANK_BINDING=null
# RERANK_MODEL=BAAI/bge-reranker-v2-m3
# RERANK_BINDING_HOST=http://localhost:8000/rerank
# RERANK_BINDING_API_KEY=your_rerank_api_key_here

### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enough)
# MIN_RERANK_SCORE=0.0
### Enable rerank by default in query params when RERANK_BINDING is not null
# RERANK_BY_DEFAULT=True

### Rerank concurrency and timeout (independent from base LLM settings)
### MAX_ASYNC_RERANK falls back to MAX_ASYNC_LLM when unset.
### RERANK_TIMEOUT has its own default (30s) since reranker calls are
### typically much shorter than full LLM generation.
# MAX_ASYNC_RERANK=4
# RERANK_TIMEOUT=30

### Cohere AI
# # RERANK_MODEL=rerank-v3.5
# # RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
# # RERANK_BINDING_API_KEY=your_rerank_api_key_here
### Cohere rerank chunking configuration (useful for models with token limits like ColBERT)
# RERANK_ENABLE_CHUNKING=true
# RERANK_MAX_TOKENS_PER_DOC=480

### Aliyun Dashscope
# # RERANK_MODEL=gte-rerank-v2
# # RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
# # RERANK_BINDING_API_KEY=your_rerank_api_key_here

### Jina AI
# # RERANK_MODEL=jina-reranker-v2-base-multilingual
# # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank
# # RERANK_BINDING_API_KEY=your_rerank_api_key_here

### For local deployment Embedding and Reranker with vLLM (OpenAI-compatible API)
### Wizard metadata used to preserve the chosen deployment provider across setup reruns
# LIGHTRAG_SETUP_EMBEDDING_PROVIDER=vllm
# LIGHTRAG_SETUP_RERANK_PROVIDER=vllm
# VLLM_EMBED_MODEL=BAAI/bge-m3
# VLLM_EMBED_PORT=8001
# VLLM_EMBED_DEVICE=cpu
### VLLM_EMBED_API_KEY is passed as --api-key to vLLM; synced to EMBEDDING_BINDING_API_KEY; auto-generated if blank
# VLLM_EMBED_API_KEY=
# VLLM_EMBED_EXTRA_ARGS=
# VLLM_RERANK_MODEL=BAAI/bge-reranker-v2-m3
# VLLM_RERANK_PORT=8000
# VLLM_RERANK_DEVICE=cuda
### VLLM_RERANK_API_KEY is passed as --api-key to vLLM; synced to RERANK_BINDING_API_KEY; auto-generated if blank
# VLLM_RERANK_API_KEY=
### Use float16 for GPU mode. CPU mode uses the official vLLM CPU image.
# VLLM_USE_CPU=1
### Set to 1 for CPU mode, unset for GPU mode
# CUDA_VISIBLE_DEVICES=-1
### Set to -1 to disable CUDA (CPU mode), or specific GPU IDs for GPU mode
# NVIDIA_VISIBLE_DEVICES=0
### Optional Docker runtime equivalent; generated GPU compose honors either variable.
# VLLM_RERANK_EXTRA_ARGS=

########################################
### Document processing configuration
########################################
### Document processing output language: English, Chinese, French, German ...
SUMMARY_LANGUAGE=English

### Enable JSON-structured output for entity extraction
### Default behavior: JSON output is disabled when ENTITY_EXTRACTION_USE_JSON is unset
### JSON output incurs higher latency but delivers improved reliability
ENTITY_EXTRACTION_USE_JSON=true

### Optional external YAML profile for entity type guidance and extraction examples
### Profiles are loaded from PROMPT_DIR/entity_type (PROMPT_DIR defaults to ./prompts).
### A reference template is shipped at prompts/samples/entity_type_prompt.sample.yml;
# ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
# PROMPT_DIR=<absolute_path_for_prompt_dir>

### Multimodal parsing/analyze integration
### Optional parser routing rules. Example for VLM & MinerU enabled configuration:
###     LIGHTRAG_PARSER=*:native-iteP;*:mineru-iteP;*:legacy-R
### Rules are separated with semicolons ';' (recommended) or commas ',';
### Rules match file suffixes (pdf) are checked left-to-right.
### If mineru/docling appears in LIGHTRAG_PARSER, the corresponding endpoint
### below must be configured before server startup.
###
### Per-strategy chunk parameters may be attached in parentheses to a chunk
### selector (F/R/V/P). Inside the parentheses a comma only separates parameters.
### Supported parameters (alias in brackets):
###     chunk_token_size [chunk_ts]          F/R/V/P   e.g. R(chunk_ts=800)
###     chunk_overlap_token_size [chunk_ol]  F/R/P     (V has no overlap)
###     LIGHTRAG_PARSER=pdf:legacy-R(chunk_ts=800,chunk_ol=80);*:legacy-R
### The same syntax works in a filename hint, e.g. notes.[-R(chunk_ts=800)].md
### See docs/FileProcessingPipeline.md for detail
LIGHTRAG_PARSER=*:native-teP;*:legacy-R

### Native Markdown (.md / .textpack) remote image handling
### External http(s) images in markdown are downloaded and embedded into the
### sidecar assets by default (SSRF-guarded: private/loopback/link-local hosts
### are refused; the socket is pinned to the validated IP so a DNS rebind cannot
### redirect it to an internal host, and any ambient HTTP(S)_PROXY is ignored).
### Set ENABLED=false to instead DROP external images (no sidecar entry), in
### which case a doc whose only images are external links produces no drawings.json.
NATIVE_MD_IMAGE_DOWNLOAD_ENABLED=true
### When downloading is enabled, REQUIRED=true fails the document on a download
### error; false (default) keeps the image as an external link and warns.
### (Base64 and .textpack file-reference images are always embedded regardless
### of this switch; SVG images are rasterized to PNG via cairosvg.)
# NATIVE_MD_IMAGE_DOWNLOAD_REQUIRED=false
# NATIVE_MD_IMAGE_DOWNLOAD_TIMEOUT=30
### Per-image size ceiling: caps a remote download AND a single bundled
### (.textpack) asset, so one oversized image cannot be read into memory.
# NATIVE_MD_IMAGE_MAX_BYTES=26214400
### SVG render budget: an SVG whose declared canvas (width*height or viewBox)
### exceeds this pixel count is skipped BEFORE rasterization
# NATIVE_MD_IMAGE_MAX_SVG_PIXELS=16000000
### Escape hatch for the SSRF guard: only globally-routable IPs are allowed by
### default. To permit specific non-public ranges (e.g. an internal image host),
### list comma-separated CIDRs/IPs. Applies to DNS-resolved IPs and redirects.
# NATIVE_MD_IMAGE_ALLOWED_NON_PUBLIC_CIDRS=10.0.0.0/8,192.168.1.5
### Downloaded external images are cached in a `<file>.native_raw/` sidecar dir
### so a re-parse of an unchanged file reuses them instead of re-downloading.
### Set the following env var true to force a re-download (discard the cache)
# LIGHTRAG_FORCE_REPARSE_NATIVE=false

### Async parser service protocol (optional)
### Configure these when using remote MinerU/Docling async services

### ---- MinerU shared parameters (both local and official modes) ----
### MinerU API protocol. Choose one active mode.
### - official: MinerU precision API v4. Requires MINERU_API_TOKEN.
### - local: self-hosted mineru-api / mineru-router base URL.
MINERU_API_MODE=local
# MINERU_POLL_INTERVAL_SECONDS=2
# MINERU_MAX_POLLS=600
# MINERU_LANGUAGE=ch
# MINERU_ENABLE_TABLE=true
# MINERU_ENABLE_FORMULA=true
# MINERU_PAGE_RANGES=
### MINERU_PAGE_RANGES semantics differ by mode:
### - official: forwarded verbatim, supports e.g. "1-3,5,7-9".
### - local:    only a single page ("3") or simple range ("1-10"); comma
###             lists are rejected at startup.
### When switching modes, double-check this constraint.
### Per-file override: a hint / rule may set page_range on the engine token,
### e.g. notes.[mineru(page_range=1-3,page_range=5)].pdf — inside the parens a
### comma only separates parameters, so a multi-segment list REPEATS the key
### (and requires MINERU_API_MODE=official). Likewise language / local_parse_method.

### ---- MinerU local-only (MINERU_API_MODE=local) ----
MINERU_LOCAL_ENDPOINT=http://127.0.0.1:8000
### MINERU_LOCAL_BACKEND: which mineru-api backend handles the parse.
###   Accepted values (per mineru-api POST /tasks form parameter `backend`):
###     hybrid-auto-engine - pipeline + VLM combo with auto-selected local
###                          engine (mineru-api's default). GPU required.
###     pipeline           - CPU-friendly traditional pipeline; no VLM step.
###     vlm-auto-engine    - VLM with auto-selected local inference engine
###                          (sglang-engine / vllm-engine if GPU is available);
###                          requires the matching engine extra preinstalled
###                          on the mineru-api side, plus model weights.
###   We ship `hybrid-auto-engine` -- requires the target mineru-api
###   deployment to have a GPU plus the matching inference engine
###   (sglang / vllm) and model weights installed. Switch to `pipeline`
###   for CPU-only deployments without those dependencies.
MINERU_LOCAL_BACKEND=hybrid-auto-engine
### MINERU_LOCAL_PARSE_METHOD: parsing strategy for the pipeline component.
###   Accepted values:
###     auto - auto-detect embedded text-layer vs OCR per page (default).
###     txt  - extract text from the embedded text layer only; fastest,
###            but yields empty output on scanned PDFs without a text layer.
###     ocr  - force OCR on every page regardless of text-layer quality;
###            slowest, reliable on scanned or low-quality PDFs.
###   Only consumed when MINERU_LOCAL_BACKEND is `pipeline` or
###   `hybrid-auto-engine` (the pipeline arm of the hybrid pipeline).
###   Pure VLM backends (`vlm-auto-engine`, `vlm-http-client`) ignore this
###   parameter -- the VLM model handles layout/OCR natively.
MINERU_LOCAL_PARSE_METHOD=auto
### MINERU_LOCAL_IMAGE_ANALYSIS: enable VLM image/chart analysis pass for
###   better caption an footnote recognition.
###   Only consumed by `vlm-auto-engine`, `vlm-http-client`,
###   `hybrid-auto-engine`, `hybrid-http-client`. The `pipeline` backend
###   silently drops this flag -- its `_process_pipeline` does not accept
###   the kwarg, so setting `false` under pipeline does NOT speed parsing
###   up; pipeline never invokes the VLM image pass to begin with.
###   Disable (`false`) on VLM / hybrid backends to skip the extra VLM
###   round, trading image / chart semantic descriptions for faster parsing
###   and lower GPU cost.
MINERU_LOCAL_IMAGE_ANALYSIS=false
# MINERU_LOCAL_START_PAGE_ID=0
# MINERU_LOCAL_END_PAGE_ID=99999

### ---- MinerU official-only (MINERU_API_MODE=official) ----
# MINERU_API_TOKEN=your-api-key
# MINERU_OFFICIAL_ENDPOINT=https://mineru.net
# MINERU_MODEL_VERSION=vlm
# MINERU_IS_OCR=false

### Force re-upload of file to MinerU on every retry after failure
### Disables caching of result outcomes
# LIGHTRAG_FORCE_REPARSE_MINERU=false

### Docling parser (docling-serve v1 / async API).
###
### Endpoint: base URL only — the client appends /v1/convert/file/async,
###     /v1/status/poll/{task_id}?wait=<DOCLING_POLL_INTERVAL_SECONDS>,
###     /v1/result/{task_id} itself.
### Pipeline shape (pipeline=standard, target_type=zip,
###     to_formats=[json,md], image_export_mode=referenced) is fixed in
###     code so the sidecar flow stays self-consistent — flipping any of
###     these would break the adapter and is therefore not exposed as env.
###
### OCR tunables:
### - DOCLING_DO_OCR: master switch; when false the engine relies only on
###     text-layer extraction.
### - DOCLING_FORCE_OCR: when true, OCR every page regardless of text-layer
###     quality (slower, useful for scanned PDFs with bad text layers).
### - DOCLING_OCR_ENGINE: explicit engine selection (DEPRECATED in the
###     docling-serve OpenAPI but still honored for older deployments).
### - DOCLING_OCR_PRESET: recommended replacement for DOCLING_OCR_ENGINE.
### - DOCLING_OCR_LANG: JSON array (e.g. ["en","zh"]) or comma-separated
###     list. Empty (default) lets the OCR engine pick its default.
### - DOCLING_DO_FORMULA_ENRICHMENT: when true, the code-formula model runs
###     and `texts[*].label="formula"` items carry LaTeX in `text`. Default
###     false because the model may not be present on every deployment;
###     adapter falls back to plain-text formulas when disabled.
###
### Polling budget (server-side long-poll; client does NOT add extra sleep):
### - DOCLING_POLL_INTERVAL_SECONDS: ``?wait=N`` value sent to
###     /v1/status/poll/{task_id}. Larger N = fewer round trips per parse;
###     bound by your reverse-proxy idle timeout. Default 5.
### - DOCLING_MAX_POLLS: max polling rounds before raising TimeoutError.
###     Worst-case wall-clock budget ≈
###     DOCLING_POLL_INTERVAL_SECONDS × DOCLING_MAX_POLLS. Default 240
###     (≈ 20 minutes at wait=5s); raise for very large PDFs.
###
### Bundle cache controls:
### - DOCLING_ENGINE_VERSION: recorded in <base>.docling_raw/_manifest.json.
###     Mismatch with the recorded value forces a cache miss → re-download.
###     Leave empty to skip this check.
### - LIGHTRAG_FORCE_REPARSE_DOCLING: when truthy ("1"/"true"), bypass the
###     docling raw cache and re-upload on every parse_docling call.
### - DOCLING_BBOX_ATTRIBUTES: override the doc-level bbox_attributes
###     written into <base>.blocks.jsonl meta. Default
###     {"origin":"LEFTBOTTOM"} matches docling's default coordinate system.
DOCLING_ENDPOINT=http://localhost:5001
DOCLING_DO_OCR=true
### DOCLING_FORCE_OCR can be overridden per file via a hint / rule on the engine
### token, e.g. scan.[docling(force_ocr=true)].pdf
DOCLING_FORCE_OCR=true
DOCLING_DO_FORMULA_ENRICHMENT=false
# DOCLING_OCR_ENGINE=auto
# DOCLING_OCR_PRESET=auto
# DOCLING_OCR_LANG=
# DOCLING_POLL_INTERVAL_SECONDS=5
# DOCLING_MAX_POLLS=240
# DOCLING_BBOX_ATTRIBUTES={"origin":"LEFTBOTTOM"}
### Force re-upload of file to Docling on every retry after failure
### Disables caching of result outcomes
# LIGHTRAG_FORCE_REPARSE_DOCLING=false

### File upload size limit (in bytes)
### Default: 104857600 (100MB)
### Set to 0 or None for unlimited upload size
### Examples:
###   52428800  = 50MB
###   104857600 = 100MB (default)
###   209715200 = 200MB
### Note: If using Nginx as reverse proxy, also configure client_max_body_size
# MAX_UPLOAD_SIZE=104857600

### Global chunk size, 500~1500 is recommended.
### Chunker inherits the global value here only when its own var is unset.
### Exception: P never inherits CHUNK_SIZE — it uses CHUNK_P_SIZE (default 2000).
# CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100

### Fixed-token chunker (process_options=F, default) settings
###     CHUNK_F_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
###     CHUNK_F_OVERLAP_SIZE: token overlap; falls back to CHUNK_OVERLAP_SIZE when unset
###     CHUNK_F_SPLIT_BY_CHARACTER: optional separator string; pre-segment before token windowing
###     CHUNK_F_SPLIT_BY_CHARACTER_ONLY: when true, raise on oversize segment instead of token re-split
# CHUNK_F_SIZE=1200
# CHUNK_F_OVERLAP_SIZE=100
# CHUNK_F_SPLIT_BY_CHARACTER=
# CHUNK_F_SPLIT_BY_CHARACTER_ONLY=false

### Recursive character chunker (process_options=R) settings
###     CHUNK_R_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
###     CHUNK_R_OVERLAP_SIZE: token overlap between adjacent chunks; falls back to CHUNK_OVERLAP_SIZE when unset
###     CHUNK_R_SEPARATORS: JSON array of cascaded separators tried by RecursiveCharacterTextSplitter.
###       Default includes CJK sentence-ending punctuation so Chinese / mixed-language
###       documents split at semantic boundaries.  Order: paragraph (\n\n) > line (\n) >
###       Chinese sentence-end (。！？) > Chinese semi-clause (；，) > space > char.
###       English ".?!" are intentionally omitted (literal match would split "0.95" /
###       "e.g."); the English path falls through space / char as before.
# CHUNK_R_SIZE=1200
# CHUNK_R_OVERLAP_SIZE=100
# CHUNK_R_SEPARATORS=["\n\n","\n","。","！","？","；","，"," ",""]

### Semantic vector chunker (process_options=V) settings
###     CHUNK_V_SIZE: per-strategy chunk_token_size hard cap (oversized pieces are
###       re-split via R before being emitted); falls back to CHUNK_SIZE when unset
###     CHUNK_V_BREAKPOINT_THRESHOLD_TYPE: percentile | standard_deviation | interquartile | gradient
###     CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT: leave empty to use the LangChain per-type default (e.g. 95 for percentile)
###     CHUNK_V_BUFFER_SIZE: number of adjacent sentences combined when computing distances
###     CHUNK_V_SENTENCE_SPLIT_REGEX: regex fed to LangChain SemanticChunker for the
###       initial sentence split.  Default extends the upstream English-only pattern
###       with CJK sentence-end punctuation (。？！).  Override if you need a
###       different language mix.  Note: env value is the raw regex string, no JSON
###       quoting.
# CHUNK_V_SIZE=1200
# CHUNK_V_BREAKPOINT_THRESHOLD_TYPE=percentile
# CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT=
# CHUNK_V_BUFFER_SIZE=1
# CHUNK_V_SENTENCE_SPLIT_REGEX=(?<=[.?!])\s+|(?<=[。？！])

### Paragraph semantic chunker (process_options=P) settings
###     CHUNK_P_SIZE: per-strategy chunk_token_size override; defaults to 2000 when unset
###       (does NOT fall back to CHUNK_SIZE — paragraph-semantic merging needs more
###       headroom than the global default to keep related paragraphs together).
###     CHUNK_P_OVERLAP_SIZE: overlap for prose fallback and table-bridge context;
###                           falls back to CHUNK_OVERLAP_SIZE when unset
###     CHUNK_P_DROP_REFERENCES: drop the trailing reference section before chunking.
###       Global default switch; overridable per-file via the hint param
###       drop_references (alias drop_rf), e.g. paper.[-P(drop_rf=true)].pdf. Frozen
###       into the document's chunk_options at enqueue and recorded in
###       doc_status.metadata['chunk_opts'].
###     CHUNK_P_REFERENCES_TAIL_N: a reference block is only dropped when it sits in
###       the last N content blocks (safety window; default 2).
###     CHUNK_P_REFERENCES_HEADINGS: pipe-separated reference heading prefixes
###       (default References|Bibliography|参考文献). English words match
###       case-insensitively at a word boundary; 参考文献 matches as a prefix.
###     NOTE: TAIL_N / HEADINGS are read live by the chunker at run time (NOT
###       snapshotted) — editing them changes the behaviour of re-runs.
# CHUNK_P_SIZE=2000
# CHUNK_P_OVERLAP_SIZE=100
# CHUNK_P_DROP_REFERENCES=false
# CHUNK_P_REFERENCES_TAIL_N=2
# CHUNK_P_REFERENCES_HEADINGS=References|Bibliography|参考文献

### Number of summary segments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommended)
# FORCE_LLM_SUMMARY_ON_MERGE=8
### Max description token size to trigger LLM summary
# SUMMARY_MAX_TOKENS = 1200
### Recommended LLM summary output length in tokens
# SUMMARY_LENGTH_RECOMMENDED=600
### Maximum context size sent to LLM for description summary
# SUMMARY_CONTEXT_SIZE=12000
### Maximum token size allowed for entity extraction input context
# MAX_EXTRACT_INPUT_TOKENS=20480

### Multimodal surrounding-context budget (per-half token cap for the
### `leading` / `trailing` text injected into VLM and extract prompts).
### Computed at analyze_multimodal entry; the two halves are independent
### so deployments can bias context forward or backward as needed.
# SURROUNDING_LEADING_MAX_TOKENS=2000
# SURROUNDING_TRAILING_MAX_TOKENS=2000

### Per-response cap on total entity+relationship rows/records emitted by the LLM
# MAX_EXTRACTION_RECORDS=100
### Per-response cap on entity rows/objects emitted by the LLM
# MAX_EXTRACTION_ENTITIES=40

### Control the maximum chunk_ids stored in vector and graph db
### Addresses the hard-coded 64KB size constraint for Milvus dynamic field ($meta)
# MAX_SOURCE_IDS_PER_ENTITY=200
# MAX_SOURCE_IDS_PER_RELATION=200
### control chunk_ids limitation method: KEEP, FIFO,
###    KEEP: Keep oldest (default, less merge action and faster)
###          do not change entity/release description after max_source_ids reached
###    FIFO: First in first out
# SOURCE_IDS_LIMIT_METHOD=KEEP

### Maximum number of file paths stored in entity/relation file_path field
### For displayed only, does not affect query performance
# MAX_FILE_PATHS=75

### PDF decryption password for protected PDF files
# PDF_DECRYPT_PASSWORD=your_pdf_password_here

########################################
### Pipeline Concurrency Configuration
########################################
### Number of parallel processing documents(between 2~10, MAX_ASYNC_LLM/3 is recommended)
MAX_PARALLEL_INSERT=3
### Optional per-stage document pipeline concurrency
# MAX_PARALLEL_PARSE_NATIVE=5
# MAX_PARALLEL_PARSE_MINERU=2
# MAX_PARALLEL_PARSE_DOCLING=2
# MAX_PARALLEL_ANALYZE=5
### Optional queue sizes for staged pipeline workers
# QUEUE_SIZE_PARSE=20
# QUEUE_SIZE_ANALYZE=100
# QUEUE_SIZE_INSERT=4
### Max concurrency requests for Embedding
# EMBEDDING_FUNC_MAX_ASYNC=8
### Num of chunks send to Embedding in single request (default is 10)
EMBEDDING_BATCH_NUM=32

###########################################################################
### Gloabal LLM Configuration
###   LLM_BINDING type: openai, ollama, lollms, azure_openai, bedrock, gemini
###   LLM_BINDING_HOST: Service endpoint (left empty if using the provider SDK default endpoint)
###   LLM_BINDING_API_KEY: api key
### If LightRAG deployed in Docker:
###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
###########################################################################
### LLM request timeout setting for all llm (0 means no timeout for Ollma)
# LLM_TIMEOUT=240

LLM_BINDING=openai
LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key
LLM_MODEL=gpt-5.4-mini

### Max concurrency requests of LLM
### MAX_ASYNC is still accepted as a deprecated alias
### NOTE: with gunicorn multi-worker (lightrag-gunicorn --workers N) every
###       MAX_ASYNC_* / *_MAX_ASYNC_* setting (LLM roles, embedding, rerank)
###       is enforced BOTH per worker process AND as a cross-worker global
###       cap. Under normal operation this keeps total in-process provider
###       calls clamped to MAX_ASYNC, similar to single-process mode. Slots
###       held by crashed workers (kill -9 / OOM) are reclaimed automatically
###       via lease heartbeats; if a worker is terminated externally while its
###       provider request is still pending, replacement work may briefly make
###       provider-side concurrency exceed the cap until the abandoned request
###       times out or closes.
###       Runtime caveat: changing a role's max_async through the API
###       updates only that worker's local limit — the cross-worker cap
###       keeps the value read at startup.
MAX_ASYNC_LLM=4

###########################################################################
### Role-specific LLM/VLM overrides
### Available roles: EXTRACT, KEYWORD, QUERY, VLM
### If unset, each role falls back to gloabal LLM configuration above.
### For detail information, refer to:
###   docs/RoleSpecificLLMConfiguration.md
###   docs/RoleSpecificLLMConfiguration-zh.md
###########################################################################
# KEYWORD_LLM_MODEL=gpt-5.4-nano
KEYWORD_MAX_ASYNC_LLM=4
# KEYWORD_LLM_TIMEOUT=60
# KEYWORD_LLM_BINDING=openai
# KEYWORD_LLM_BINDING_HOST=https://api.openai.com/v1
# KEYWORD_LLM_BINDING_API_KEY=your_api_key

# QUERY_LLM_MODEL=gpt-5.4
QUERY_MAX_ASYNC_LLM=4
# QUERY_LLM_TIMEOUT=240
# QUERY_LLM_BINDING=openai
# QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
# QUERY_LLM_BINDING_API_KEY=your_api_key

# VLM_LLM_MODEL=gpt-5.4-mini
# VLM_MAX_ASYNC_LLM=4
# VLM_LLM_TIMEOUT=240
# VLM_LLM_BINDING=openai
# VLM_LLM_BINDING_HOST=https://api.example.com/v1
# VLM_LLM_BINDING_API_KEY=your_vlm_api_key

### Master switch for VLM multimodal analysis (i/t/e items).
### When false, multimodal item is skipped regardless of document process_options
### When true, VLM_LLM_BINDING (or the base LLM_BINDING) must be vision-capable
### lollms is rejected at startup
VLM_PROCESS_ENABLE=false
### Maximum image bytes sent to VLM (5242880=5MB)
VLM_MAX_IMAGE_BYTES=5242880
### Minimum image side (width or height) in pixels accepted for VLM analysis.
### Images with a smaller width or height are treated as decorative (icons,
### separators, etc.) and skipped instead of sent to the VLM.
VLM_MIN_IMAGE_PIXEL=64

###########################################################################
### Provider sepecific LLM options
### Increasing the temperature setting may help mitigate infinite inference
###   loops during entity/elation extraction, particularly when using
###   models with more limited capabilities, such as Qwen3-30B
### Set a max output token limit to prevent endless output from certain LLMs,
###   which may trigger timeout errors during entity and relation extraction.
###        max_output_token < LLM_TIMEOUT * llm_tokens_per_second
###   i.e. max_output_token = 9000 < 240s * 50 tokens/s
### Sample commands to list all supported options specific LLM_BINDING:
###   lightrag-server --llm-binding openai  --help
###   lightrag-server --llm-binding bedrock --help
###   lightrag-server --llm-binding gemini  --help
###########################################################################
### OpenAI Specific Parameters (Openrouter of other OpenAI compatible API):
###     LLM_BINDING=openai
###     LLM_BINDING_HOST=https://openrouter.ai/api/v1
###     LLM_MODEL=google/gemini-2.5-flash
# OPENAI_LLM_TEMPERATURE=0.9
### For vLLM/SGLang and most of OpenAI compatible API provider
# OPENAI_LLM_MAX_TOKENS=9000
### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
### For OpenAI reason control
# OPENAI_LLM_REASONING_EFFORT=minimal
### For OpenRouter reasoning control
# OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
### For Qwen3 reasoning control deploy by vLLM
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'

### Azure OpenAI Specific Parameters:
###     LLM_BINDING=azure_openai
###     LLM_BINDING_HOST=https://xxxx.openai.azure.com/
###     LLM_BINDING_API_KEY=your_api_key
###     LLM_MODEL=my-gpt-mini-deployment
### You may use deployment name for LLM_MODEL or set AZURE_OPENAI_DEPLOYMENT instead
# AZURE_OPENAI_DEPLOYMEN=my—deplyment-name
# AZURE_OPENAI_API_VERSION=2024-08-01-preview

### Google AI Studio Gemini Specific Parameters:
### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
###     LLM_BINDING=gemini
###     LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
###     LLM_BINDING_API_KEY=your_gemini_api_key
###     LLM_MODEL=gemini-flash-latest
# GEMINI_LLM_TEMPERATURE=0.7
# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
### Enable or disable thinking
###     GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
###     GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'

### Google Vertex AI Gemini Specific Parameters:
### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication
# GOOGLE_GENAI_USE_VERTEXAI=true
# GOOGLE_CLOUD_PROJECT='your-project-id'
# GOOGLE_CLOUD_LOCATION='us-central1'
# GOOGLE_APPLICATION_CREDENTIALS='/Users/xxxxx/your-service-account-credentials-file.json'

### Bedrock Specific Parameters:
###     LLM_BINDING=bedrock
###     LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
###     LLM_MODEL=us.amazon.nova-lite-v1:0
### Region is required for all three modes (Bedrock endpoints are regional).
# AWS_REGION=us-west-1
### Bedrock Authentication (choose ONE of the following three approaches):
### Bedrock API key (bearer token). Bedrock ignores LLM_BINDING_API_KEY;
### set AWS_BEARER_TOKEN_BEDROCK directly before startup. This is a
### process-level AWS SDK setting and cannot be overridden per role.
# AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
### SigV4 credentials (classic IAM user / STS / instance profile).
# AWS_ACCESS_KEY_ID=your_aws_access_key_id
# AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
# AWS_SESSION_TOKEN=your_optional_aws_session_token
### Ambient credentials (AWS SDK default credential chain).
### To use this mode, leave AWS_BEARER_TOKEN_BEDROCK, AWS_ACCESS_KEY_ID,
### AWS_SECRET_ACCESS_KEY, and AWS_SESSION_TOKEN above commented out — the
### AWS SDK will then resolve credentials from ~/.aws/credentials, IAM role,
### instance profile, SSO, or environment variables outside .env.
### Activating any of the lines above forces that explicit mode and bypasses
### the credential chain.
# BEDROCK_LLM_TEMPERATURE=1.0
# BEDROCK_LLM_MAX_TOKENS=9000
# BEDROCK_LLM_TOP_P=1.0
# BEDROCK_LLM_STOP_SEQUENCES='["</s>"]'
### Bedrock model reasoning control
# BEDROCK_LLM_EXTRA_FIELDS='{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}'

### Ollama Specific Parameters:
###     LLM_BINDING=ollama
###     LLM_BINDING_HOST=http://localhost:11434
###     LLM_MODEL=qwen3.5:9b
### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
OLLAMA_LLM_NUM_CTX=32768
# OLLAMA_LLM_NUM_PREDICT=9000
# OLLAMA_LLM_TEMPERATURE=0.85
# OLLAMA_LLM_STOP='["</s>", "<|EOT|>"]'

#######################################################################################
### Embedding Configuration (Should not be changed after the first file processed)
### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, bedrock
### EMBEDDING_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK)
### EMBEDDING_BINDING_API_KEY: api key
### If LightRAG deployed in Docker:
###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
### Control whether to send embedding_dim parameter to embedding API
###    For OpenAI: Set EMBEDDING_SEND_DIM=true to enable dynamic dimension adjustment
###    For OpenAI: Set EMBEDDING_SEND_DIM=false (default) to disable sending dimension parameter
###    For Gemini: Allways set EMBEDDING_SEND_DIM=true
### Control whether to use base64 encoding format for embeddings (improves performance for OpenAI)
###    For OpenAI: Set EMBEDDING_USE_BASE64=true (default) to use base64 encoding
###    For Yandex Cloud and other providers that don't support it: Set EMBEDDING_USE_BASE64=false
#######################################################################################
# EMBEDDING_TIMEOUT=30

### OpenAI compatible embedding
EMBEDDING_BINDING=openai
EMBEDDING_BINDING_HOST=https://api.openai.com/v1
EMBEDDING_BINDING_API_KEY=your_api_key
EMBEDDING_MODEL=text-embedding-3-large
EMBEDDING_DIM=3072
EMBEDDING_TOKEN_LIMIT=8192
EMBEDDING_SEND_DIM=false
EMBEDDING_USE_BASE64=true

### Optional: asymmetric embeddings (query/document behavior split)
### Leave EMBEDDING_ASYMMETRIC unset or set false to keep symmetric behavior.
### Set true only when the selected embedding backend supports asymmetric mode.
# EMBEDDING_ASYMMETRIC=true
### Provider-task bindings such as Jina/Gemini/VoyageAI use provider parameters
### and should not configure the prefix variables below.
### Prefix-based models such as BGE/E5/GTE require both prefix variables.
### Wrap non-empty values with quotes if there are trailing spaces.
# EMBEDDING_DOCUMENT_PREFIX="search_document: "
### Use NO_PREFIX for a side that should intentionally have no prefix.
###     EMBEDDING_DOCUMENT_PREFIX=NO_PREFIX
# EMBEDDING_QUERY_PREFIX="search_query: "

###########################################################################
### Provider sepecific Embedding options
### Increasing the temperature setting may help mitigate infinite inference
###   loops during entity/elation extraction, particularly when using
###   models with more limited capabilities, such as Qwen3-30B
### Set a max output token limit to prevent endless output from certain LLMs,
###   which may trigger timeout errors during entity and relation extraction.
###        max_output_token < LLM_TIMEOUT * llm_tokens_per_second
###   i.e. max_output_token = 9000 < 240s * 50 tokens/s
### Sample commands to list all supported options specific EMBEDDING_BINDING:
###   lightrag-server --embedding-binding openai --help
###   lightrag-server --embedding-binding ollama --help
###   lightrag-server --embedding-binding bedrock --help
###########################################################################
### Azure Embedding Specific Parameters:
### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
###     EMBEDDING_BINDING=azure_openai
###     EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
###     EMBEDDING_API_KEY=your_api_key
###     EMBEDDING_MODEL==my-text-embedding-3-large-deployment
###     EMBEDDING_DIM=3072
# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview

### Ollama Embedding Specific Parameters:
###     EMBEDDING_BINDING=ollama
###     EMBEDDING_BINDING_HOST=http://localhost:11434
###     EMBEDDING_BINDING_API_KEY=your_api_key
###     EMBEDDING_MODEL=qwen3-embedding:4b
###     EMBEDDING_DIM=2560
### Ollama should set num_ctx option inaddition to EMBEDDING_TOKEN_LIMIT
OLLAMA_EMBEDDING_NUM_CTX=8192

### Gemini Embedding Specific Parameters:
### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
### Gemini embedding requires sending dimension to server
###     EMBEDDING_BINDING=gemini
###     EMBEDDING_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
###     EMBEDDING_BINDING_API_KEY=your_api_key
###     EMBEDDING_MODEL=gemini-embedding-001
###     EMBEDDING_DIM=1536
###     EMBEDDING_TOKEN_LIMIT=2048
###     EMBEDDING_SEND_DIM=true

### Bedrock Embedding Specific Parameters:
###     EMBEDDING_BINDING=bedrock
###     EMBEDDING_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
###     EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
###     EMBEDDING_DIM=1024
### Share the same region and authentication settings as LLMs, no reconfiguration here
###     AWS_REGION=us-west-1
###     AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
###     AWS_ACCESS_KEY_ID=your_aws_access_key_id
###     AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
###     AWS_SESSION_TOKEN=your_optional_aws_session_token

### Jina AI Embedding Specific Parameters:
###     EMBEDDING_BINDING=jina
###     EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
###     EMBEDDING_MODEL=jina-embeddings-v4
###     EMBEDDING_DIM=2048
###     EMBEDDING_BINDING_API_KEY=your_api_key

####################################################################
### WORKSPACE sets workspace name for all storage types
### for the purpose of isolating data from LightRAG instances.
### Valid workspace name constraints: a-z, A-Z, 0-9, and _
####################################################################
# WORKSPACE=

############################
### Data storage selection
############################
### Default storage: JSON/Nano/NetworkX (Recommended for test deployment)
LIGHTRAG_KV_STORAGE=JsonKVStorage
LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage

### Wizard metadata used to preserve env-storage Docker deployment defaults across setup reruns
# LIGHTRAG_SETUP_POSTGRES_DEPLOYMENT=docker
# LIGHTRAG_SETUP_NEO4J_DEPLOYMENT=docker
# LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=docker
# LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=atlas-capable
# LIGHTRAG_SETUP_REDIS_DEPLOYMENT=docker
# LIGHTRAG_SETUP_MILVUS_DEPLOYMENT=docker
# LIGHTRAG_SETUP_QDRANT_DEPLOYMENT=docker
# LIGHTRAG_SETUP_MEMGRAPH_DEPLOYMENT=docker
# LIGHTRAG_SETUP_OPENSEARCH_DEPLOYMENT=docker

### PostgreSQL Configuration
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_USER=your_username
POSTGRES_PASSWORD='your_password'
POSTGRES_DATABASE=rag
POSTGRES_MAX_CONNECTIONS=25
### DB specific workspace should not be set, keep for compatible only
# POSTGRES_WORKSPACE=forced_workspace_name

### Use HNSW_HALFVEC for large embeddings (2000+ dim).
### Requires pgvector extension >= 0.7.0.
### Vector storage type: HNSW, HNSW_HALFVEC, IVFFlat, VCHORDRQ
POSTGRES_VECTOR_INDEX_TYPE=HNSW
POSTGRES_HNSW_M=16
POSTGRES_HNSW_EF=200
POSTGRES_IVFFLAT_LISTS=100
POSTGRES_VCHORDRQ_BUILD_OPTIONS=
POSTGRES_VCHORDRQ_PROBES=
POSTGRES_VCHORDRQ_EPSILON=1.9

### Batch write limits for KV/Vector/DocStatus (split a single executemany / ANY($2) delete; non-positive disables that dimension)
# POSTGRES_UPSERT_MAX_PAYLOAD_BYTES=16777216
# POSTGRES_UPSERT_MAX_RECORDS_PER_BATCH=200
# POSTGRES_DELETE_MAX_RECORDS_PER_BATCH=1000

### PostgreSQL Connection Retry Configuration (Network Robustness)
### NEW DEFAULTS (v1.4.10+): Optimized for HA deployments with ~30s switchover time
### These defaults provide out-of-the-box support for PostgreSQL High Availability setups
###
### Number of retry attempts (1-100, default: 10)
###   - Default 10 attempts allows ~225s total retry time (sufficient for most HA scenarios)
###   - For extreme cases: increase up to 20-50
### Initial retry backoff in seconds (0.1-300.0, default: 3.0)
###   - Default 3.0s provides reasonable initial delay for switchover detection
###   - For faster recovery: decrease to 1.0-2.0
### Maximum retry backoff in seconds (must be >= backoff, max: 600.0, default: 30.0)
###   - Default 30.0s matches typical switchover completion time
###   - For longer switchovers: increase to 60-90
### Connection pool close timeout in seconds (1.0-30.0, default: 5.0)
# POSTGRES_CONNECTION_RETRIES=10
# POSTGRES_CONNECTION_RETRY_BACKOFF=3.0
# POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=30.0
# POSTGRES_POOL_CLOSE_TIMEOUT=5.0

### PostgreSQL SSL Configuration (Optional)
# POSTGRES_SSL_MODE=require
# POSTGRES_SSL_CERT=/path/to/client-cert.pem
# POSTGRES_SSL_KEY=/path/to/client-key.pem
# POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem
# POSTGRES_SSL_CRL=/path/to/crl.pem

### PostgreSQL Server Settings (for Supabase Supavisor)
# Use this to pass extra options to the PostgreSQL connection string.
# For Supabase, you might need to set it like this:
# POSTGRES_SERVER_SETTINGS='options=reference%3D[project-ref]'

# Default is 100 set to 0 to disable
# POSTGRES_STATEMENT_CACHE_SIZE=100

### Neo4j Configuration
NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD='your_password'
NEO4J_DATABASE=neo4j
NEO4J_MAX_CONNECTION_POOL_SIZE=100
NEO4J_CONNECTION_TIMEOUT=30
NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30
NEO4J_MAX_TRANSACTION_RETRY_TIME=30
NEO4J_MAX_CONNECTION_LIFETIME=300
NEO4J_LIVENESS_CHECK_TIMEOUT=30
NEO4J_KEEP_ALIVE=true
### DB specific workspace should not be set, keep for compatible only
# NEO4J_WORKSPACE=forced_workspace_name

### MongoDB Configuration
# For MongoVectorDBStorage, MONGO_URI must point to a MongoDB endpoint with
# Atlas Search / Vector Search support, such as MongoDB Atlas or Atlas local.
MONGO_URI=mongodb://localhost:27017/
MONGO_DATABASE=LightRAG
### DB specific workspace should not be set, keep for compatible only
# MONGODB_WORKSPACE=forced_workspace_name
# Flush-time bulk_write batching limits for MongoDB upsert paths (KV, vector, graph).
# (non-positive disables that dimension; DELETE cap applies to MongoVectorDBStorage)
# MONGO_UPSERT_MAX_PAYLOAD_BYTES=16777216
# MONGO_UPSERT_MAX_RECORDS_PER_BATCH=128
# MONGO_DELETE_MAX_RECORDS_PER_BATCH=1000

# Community/local Docker MongoDB example for KV, graph, or doc-status storage only:
# MONGO_URI=mongodb://localhost:27017/

### OpenSearch Configuration
### OpenSearch can be used for all storage types: KV, Vector, Graph, DocStatus
### Connection settings (comma-separated host:port entries; do not include http:// or https://)
### This setup wizard supports authenticated OpenSearch clusters only.
### OPENSEARCH_USE_SSL controls whether those hosts are reached over TLS.
OPENSEARCH_HOSTS=localhost:9200
OPENSEARCH_USER=admin
OPENSEARCH_PASSWORD=LightRAG2026_!@
OPENSEARCH_USE_SSL=true
OPENSEARCH_VERIFY_CERTS=false
# OPENSEARCH_TIMEOUT=30
# OPENSEARCH_MAX_RETRIES=3
### Index Settings (for 3-AZ Amazon OpenSearch Service, set replicas to 2)
# OPENSEARCH_NUMBER_OF_SHARDS=1
# OPENSEARCH_NUMBER_OF_REPLICAS=0
### k-NN Settings for Vector Storage (HNSW algorithm)
# OPENSEARCH_KNN_EF_CONSTRUCTION=200
# OPENSEARCH_KNN_M=16
# OPENSEARCH_KNN_EF_SEARCH=100
### PPL graphlookup for server-side graph traversal (auto-detected if not set)
# OPENSEARCH_USE_PPL_GRAPHLOOKUP=true
### Bulk batching limits (split a single async_bulk request; non-positive disables that dimension)
# OPENSEARCH_UPSERT_MAX_PAYLOAD_BYTES=104857600
# OPENSEARCH_UPSERT_MAX_RECORDS_PER_BATCH=128
# OPENSEARCH_DELETE_MAX_RECORDS_PER_BATCH=1000
### DB specific workspace should not be set, keep for compatible only
# OPENSEARCH_WORKSPACE=forced_workspace_name

### Milvus Configuration
MILVUS_URI=http://localhost:19530
MILVUS_DB_NAME=lightrag
# MILVUS_DEVICE=cpu
# MILVUS_USER=root
# MILVUS_PASSWORD=your_password
# MILVUS_TOKEN=your_token
# Required for the bundled Docker Milvus stack; may come from .env or exported shell variables.
# MINIO_ACCESS_KEY_ID=minioadmin
# MINIO_SECRET_ACCESS_KEY=minioadmin
### DB specific workspace should not be set, keep for compatible only
# MILVUS_WORKSPACE=forced_workspace_name

### Milvus upsert/delete batching (enabled by default)
### Split large flushes by estimated JSON payload size and record count to stay
### under the server-side 64MB gRPC message limit. A single record larger than the
### byte budget is sent as its own batch instead of failing.
# MILVUS_UPSERT_MAX_PAYLOAD_BYTES=33554432
# MILVUS_UPSERT_MAX_RECORDS_PER_BATCH=128
# MILVUS_DELETE_MAX_RECORDS_PER_BATCH=1000

### Milvus schema-migration resilience (enabled by default)
### On a transient connection failure the migration is retried from scratch with
### a rebuilt client and exponential backoff. Set MAX_RETRIES=0 to fail fast.
### Lower the iterator batch size to reduce write pressure on a small server.
# MILVUS_MIGRATION_MAX_RETRIES=5
# MILVUS_MIGRATION_RETRY_BACKOFF=5
# MILVUS_MIGRATION_RETRY_MAX_BACKOFF=60
# MILVUS_MIGRATION_ITERATOR_BATCH_SIZE=2000

### Milvus Vector Index Configuration
### Index type: AUTOINDEX (default), HNSW, HNSW_SQ, HNSW_PQ, IVF_FLAT, IVF_SQ8, DISKANN
# MILVUS_INDEX_TYPE=AUTOINDEX

### Metric type: COSINE (default), L2, IP
# MILVUS_METRIC_TYPE=COSINE

### HNSW / HNSW_SQ / HNSW_PQ Parameters (aligned with Milvus 2.4+ defaults)
### M: Maximum number of connections per node [2-2048], default 16
# MILVUS_HNSW_M=16
### efConstruction: Size of dynamic candidate list during build [8-512], default 360
# MILVUS_HNSW_EF_CONSTRUCTION=360
### ef: Size of dynamic candidate list during search, default 200
# MILVUS_HNSW_EF=200