> ## Documentation Index
> Fetch the complete documentation index at: https://docs.dify.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Get Document

> Retrieve detailed information about a specific document, including its indexing status, metadata, and processing statistics.



## OpenAPI

````yaml /en/api-reference/openapi_knowledge.json get /datasets/{dataset_id}/documents/{document_id}
openapi: 3.0.1
info:
  title: Knowledge API
  description: >-
    API for managing knowledge bases, documents, chunks, metadata, and tags,
    including creation, retrieval, and configuration. **Note:** A single
    Knowledge Base API key has permission to operate on all visible knowledge
    bases under the same account. Please pay attention to data security.
  version: 1.0.0
servers:
  - url: '{apiBaseUrl}'
    description: The base URL for the Knowledge API.
    variables:
      apiBaseUrl:
        default: https://api.dify.ai/v1
        description: Actual base URL of the API
security:
  - ApiKeyAuth: []
tags:
  - name: Knowledge Bases
    description: >-
      Operations for managing knowledge bases, including creation,
      configuration, and retrieval.
  - name: Documents
    description: >-
      Operations for creating, updating, and managing documents within a
      knowledge base.
  - name: Chunks
    description: Operations for managing document chunks and child chunks.
  - name: Metadata
    description: >-
      Operations for managing knowledge base metadata fields and document
      metadata values.
  - name: Tags
    description: Operations for managing knowledge base tags and tag bindings.
  - name: Models
    description: Operations for retrieving available models.
  - name: Knowledge Pipeline
    description: >-
      Operations for managing and running knowledge pipelines, including
      datasource plugins and pipeline execution.
paths:
  /datasets/{dataset_id}/documents/{document_id}:
    get:
      tags:
        - Documents
      summary: Get Document
      description: >-
        Retrieve detailed information about a specific document, including its
        indexing status, metadata, and processing statistics.
      operationId: getDocumentDetail
      parameters:
        - name: dataset_id
          in: path
          required: true
          schema:
            type: string
            format: uuid
          description: Knowledge base ID.
        - name: document_id
          in: path
          required: true
          schema:
            type: string
            format: uuid
          description: Document ID.
        - name: metadata
          in: query
          schema:
            type: string
            enum:
              - all
              - only
              - without
            default: all
          description: >-
            `all` returns all fields including metadata. `only` returns only
            `id`, `doc_type`, and `doc_metadata`. `without` returns all fields
            except `doc_metadata`.
      responses:
        '200':
          description: >-
            Document details. The response shape varies based on the `metadata`
            query parameter. When `metadata` is `only`, only `id`, `doc_type`,
            and `doc_metadata` are returned. When `metadata` is `without`,
            `doc_type` and `doc_metadata` are omitted.
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                    description: Document identifier.
                  position:
                    type: integer
                    description: Position index within the knowledge base.
                  data_source_type:
                    type: string
                    description: >-
                      How the document was uploaded. `upload_file` for file
                      uploads, `notion_import` for Notion imports.
                  data_source_info:
                    type: object
                    description: Raw data source information.
                  dataset_process_rule_id:
                    type: string
                    description: ID of the processing rule applied to this document.
                  dataset_process_rule:
                    type: object
                    description: Knowledge-base-level processing rule configuration.
                  document_process_rule:
                    type: object
                    description: Document-level processing rule configuration.
                  name:
                    type: string
                    description: Document name.
                  created_from:
                    type: string
                    description: >-
                      Origin of the document. `api` for API creation, `web` for
                      UI creation.
                  created_by:
                    type: string
                    description: ID of the user who created the document.
                  created_at:
                    type: number
                    description: Unix timestamp of document creation.
                  tokens:
                    type: integer
                    description: Number of tokens in the document.
                  indexing_status:
                    type: string
                    description: >-
                      Current indexing status, e.g. `waiting`, `parsing`,
                      `cleaning`, `splitting`, `indexing`, `completed`, `error`,
                      `paused`.
                  error:
                    type: string
                    nullable: true
                    description: Error message if indexing failed, `null` otherwise.
                  enabled:
                    type: boolean
                    description: Whether the document is enabled for retrieval.
                  disabled_at:
                    type: number
                    nullable: true
                    description: >-
                      Unix timestamp when the document was disabled, `null` if
                      enabled.
                  disabled_by:
                    type: string
                    nullable: true
                    description: >-
                      ID of the user who disabled the document, `null` if
                      enabled.
                  archived:
                    type: boolean
                    description: Whether the document is archived.
                  display_status:
                    type: string
                    description: Display-friendly indexing status for the UI.
                  word_count:
                    type: integer
                    description: Total word count of the document.
                  hit_count:
                    type: integer
                    description: Number of times this document has been retrieved.
                  doc_form:
                    type: string
                    description: >-
                      Document chunking mode. `text_model` for standard text,
                      `hierarchical_model` for parent-child, `qa_model` for QA
                      pairs.
                  doc_language:
                    type: string
                    description: Language of the document content.
                  doc_type:
                    type: string
                    nullable: true
                    description: Document type classification, `null` if not set.
                  doc_metadata:
                    type: array
                    description: Custom metadata key-value pairs for this document.
                    items:
                      type: object
                      properties:
                        id:
                          type: string
                          description: Metadata field identifier.
                        name:
                          type: string
                          description: Metadata field name.
                        type:
                          type: string
                          description: Metadata field type.
                        value:
                          type: string
                          description: Metadata field value for this document.
                  completed_at:
                    type: number
                    nullable: true
                    description: >-
                      Unix timestamp when processing completed, `null` if not
                      yet completed.
                  updated_at:
                    type: number
                    nullable: true
                    description: Unix timestamp of last update, `null` if never updated.
                  indexing_latency:
                    type: number
                    nullable: true
                    description: >-
                      Time taken for indexing in seconds, `null` if not
                      completed.
                  segment_count:
                    type: integer
                    description: Number of chunks in the document.
                  average_segment_length:
                    type: number
                    description: Average character length of chunks.
                  summary_index_status:
                    type: string
                    nullable: true
                    description: >-
                      Status of summary indexing, `null` if summary index is not
                      enabled.
                  need_summary:
                    type: boolean
                    description: Whether the document needs summary generation.
              examples:
                success:
                  summary: Response Example
                  value:
                    id: a8e0e5b5-78c6-4130-a5ce-25feb0e0b4ac
                    position: 1
                    data_source_type: upload_file
                    data_source_info:
                      upload_file_id: a1b2c3d4-e5f6-7890-abcd-ef1234567890
                    dataset_process_rule_id: e1f2a3b4-c5d6-7890-ef12-345678901234
                    dataset_process_rule:
                      id: e1f2a3b4-c5d6-7890-ef12-345678901234
                      mode: custom
                    document_process_rule:
                      mode: custom
                      rules:
                        pre_processing_rules: []
                        segmentation:
                          separator: '###'
                          max_tokens: 500
                          chunk_overlap: 50
                    name: guide.txt
                    created_from: api
                    created_by: ad313dd6-ef04-4dd1-a5b0-c0f0b9e2e7e4
                    created_at: 1741267200
                    tokens: 512
                    indexing_status: completed
                    error: null
                    enabled: true
                    disabled_at: null
                    disabled_by: null
                    archived: false
                    display_status: available
                    word_count: 350
                    hit_count: 0
                    doc_form: text_model
                    doc_language: English
                    doc_type: null
                    doc_metadata: []
                    completed_at: 1741267260
                    updated_at: 1741267260
                    indexing_latency: 60
                    segment_count: 5
                    average_segment_length: 70
                    summary_index_status: null
                    need_summary: false
        '400':
          description: '`invalid_metadata` : Invalid metadata value for the specified key.'
          content:
            application/json:
              examples:
                invalid_metadata:
                  summary: invalid_metadata
                  value:
                    status: 400
                    code: invalid_metadata
                    message: 'Invalid metadata value: {metadata_key}'
        '403':
          description: '`forbidden` : No permission.'
          content:
            application/json:
              examples:
                forbidden:
                  summary: forbidden
                  value:
                    status: 403
                    code: forbidden
                    message: No permission.
        '404':
          description: '`not_found` : Document not found.'
          content:
            application/json:
              examples:
                not_found:
                  summary: not_found
                  value:
                    status: 404
                    code: not_found
                    message: Document not found.
components:
  securitySchemes:
    ApiKeyAuth:
      type: http
      scheme: bearer
      bearerFormat: API_KEY
      description: >-
        API Key authentication. For all API requests, include your API Key in
        the `Authorization` HTTP Header, prefixed with `Bearer `. Example:
        `Authorization: Bearer {API_KEY}`. **Strongly recommend storing your API
        Key on the server-side, not shared or stored on the client-side, to
        avoid possible API-Key leakage that can lead to serious consequences.**

````