{"named_endpoints":{"/redact_document":{"parameters":[{"label":"Choose a PDF document or image file (PDF, JPG, PNG)","parameter_name":"file_paths","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"Local PII identification model (click empty space in box for full list)","parameter_name":"chosen_redact_entities","parameter_has_default":true,"parameter_default":["TITLES","PERSON","PHONE_NUMBER","EMAIL_ADDRESS","STREETNAME","UKPOSTCODE","CUSTOM"],"type":{"type":"array","items":{"type":"string","enum":["TITLES","PERSON","PHONE_NUMBER","EMAIL_ADDRESS","STREETNAME","UKPOSTCODE","CREDIT_CARD","CRYPTO","DATE_TIME","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","MEDICAL_LICENSE","URL","UK_NHS","CUSTOM","CUSTOM_FUZZY"]}},"python_type":{"type":"list[Literal['TITLES', 'PERSON', 'PHONE_NUMBER', 'EMAIL_ADDRESS', 'STREETNAME', 'UKPOSTCODE', 'CREDIT_CARD', 'CRYPTO', 'DATE_TIME', 'IBAN_CODE', 'IP_ADDRESS', 'NRP', 'LOCATION', 'MEDICAL_LICENSE', 'URL', 'UK_NHS', 'CUSTOM', 'CUSTOM_FUZZY']]","description":""},"component":"Dropdown","example_input":["TITLES"]},{"label":"AWS Comprehend PII identification model (click empty space in box for full list)","parameter_name":"chosen_redact_comprehend_entities","parameter_has_default":true,"parameter_default":["EMAIL","ADDRESS","NAME","PHONE","PASSPORT_NUMBER","UK_NATIONAL_INSURANCE_NUMBER","UK_NATIONAL_HEALTH_SERVICE_NUMBER","CUSTOM","TITLES","UKPOSTCODE","STREETNAME"],"type":{"type":"array","items":{"type":"string","enum":["BANK_ACCOUNT_NUMBER","BANK_ROUTING","CREDIT_DEBIT_NUMBER","CREDIT_DEBIT_CVV","CREDIT_DEBIT_EXPIRY","PIN","EMAIL","ADDRESS","NAME","PHONE","SSN","DATE_TIME","PASSPORT_NUMBER","DRIVER_ID","URL","AGE","USERNAME","PASSWORD","AWS_ACCESS_KEY","AWS_SECRET_KEY","IP_ADDRESS","MAC_ADDRESS","LICENSE_PLATE","VEHICLE_IDENTIFICATION_NUMBER","UK_NATIONAL_INSURANCE_NUMBER","INTERNATIONAL_BANK_ACCOUNT_NUMBER","SWIFT_CODE","UK_NATIONAL_HEALTH_SERVICE_NUMBER","ALL","CUSTOM","CUSTOM_FUZZY","TITLES","UKPOSTCODE","STREETNAME"]}},"python_type":{"type":"list[Literal['BANK_ACCOUNT_NUMBER', 'BANK_ROUTING', 'CREDIT_DEBIT_NUMBER', 'CREDIT_DEBIT_CVV', 'CREDIT_DEBIT_EXPIRY', 'PIN', 'EMAIL', 'ADDRESS', 'NAME', 'PHONE', 'SSN', 'DATE_TIME', 'PASSPORT_NUMBER', 'DRIVER_ID', 'URL', 'AGE', 'USERNAME', 'PASSWORD', 'AWS_ACCESS_KEY', 'AWS_SECRET_KEY', 'IP_ADDRESS', 'MAC_ADDRESS', 'LICENSE_PLATE', 'VEHICLE_IDENTIFICATION_NUMBER', 'UK_NATIONAL_INSURANCE_NUMBER', 'INTERNATIONAL_BANK_ACCOUNT_NUMBER', 'SWIFT_CODE', 'UK_NATIONAL_HEALTH_SERVICE_NUMBER', 'ALL', 'CUSTOM', 'CUSTOM_FUZZY', 'TITLES', 'UKPOSTCODE', 'STREETNAME']]","description":""},"component":"Dropdown","example_input":["BANK_ACCOUNT_NUMBER"]},{"label":"LLM PII identification model - subset of entities for LLM detection (click empty space in box for full list)","parameter_name":"chosen_llm_entities","parameter_has_default":true,"parameter_default":["EMAIL_ADDRESS","STREET_ADDRESS","PERSON_NAME","PHONE_NUMBER","CUSTOM"],"type":{"type":"array","items":{"type":"string","enum":["EMAIL_ADDRESS","STREET_ADDRESS","PERSON_NAME","PHONE_NUMBER","DATE_TIME","URL","IP_ADDRESS","AGE","BANK_ACCOUNT_NUMBER","PASSPORT_NUMBER","CUSTOM","CUSTOM_FUZZY","TITLES","UKPOSTCODE","STREETNAME"]}},"python_type":{"type":"list[Literal['EMAIL_ADDRESS', 'STREET_ADDRESS', 'PERSON_NAME', 'PHONE_NUMBER', 'DATE_TIME', 'URL', 'IP_ADDRESS', 'AGE', 'BANK_ACCOUNT_NUMBER', 'PASSPORT_NUMBER', 'CUSTOM', 'CUSTOM_FUZZY', 'TITLES', 'UKPOSTCODE', 'STREETNAME']]","description":""},"component":"Dropdown","example_input":["EMAIL_ADDRESS"]},{"label":"Choose text extraction method","parameter_name":"text_extraction_method","parameter_has_default":true,"parameter_default":"Local model - selectable text","type":{"enum":["Local model - selectable text","Local OCR model - PDFs without selectable text","AWS Textract service - all PDF types"],"title":"Radio","type":"string"},"python_type":{"type":"Literal['Local model - selectable text', 'Local OCR model - PDFs without selectable text', 'AWS Textract service - all PDF types']","description":""},"component":"Radio","example_input":"Local model - selectable text"},{"label":"Allow list (never redact these words)","parameter_name":"in_allow_list","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"Deny list (always redact these words)","parameter_name":"in_deny_list","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"Fully redact these pages","parameter_name":"redact_whole_page_list","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"Output summary","parameter_name":"combined_out_message","parameter_has_default":false,"parameter_default":null,"type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Lowest page to redact (set to 0 to redact from the first page)","parameter_name":"page_min","parameter_has_default":true,"parameter_default":0,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0},{"label":"Highest page to redact (set to 0 to redact to the last page)","parameter_name":"page_max","parameter_has_default":true,"parameter_default":0,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0},{"label":"AWS Textract extraction settings","parameter_name":"handwrite_signature_checkbox","parameter_has_default":true,"parameter_default":[],"type":{"items":{"enum":["Extract handwriting","Extract signatures"],"type":"string"},"title":"Checkbox Group","type":"array"},"python_type":{"type":"list[Literal['Extract handwriting', 'Extract signatures']]","description":""},"component":"Checkboxgroup","example_input":["Extract handwriting"]},{"label":"Choose personal information detection model. Note that AWS Comprehend, if shown, has a cost of around £0.0075 ($0.01) per 10,000 characters.","parameter_name":"pii_identification_method","parameter_has_default":true,"parameter_default":"Local","type":{"enum":["Only extract text (no redaction)","Local","AWS Comprehend"],"title":"Radio","type":"string"},"python_type":{"type":"Literal['Only extract text (no redaction)', 'Local', 'AWS Comprehend']","description":""},"component":"Radio","example_input":"Only extract text (no redaction)"},{"label":"Maximum spelling mistakes for matching deny list terms (slows down PII detection).","parameter_name":"max_fuzzy_spelling_mistakes_num","parameter_has_default":true,"parameter_default":0,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0},{"label":"Should fuzzy search match on entire phrases in deny list (as opposed to each word individually)?","parameter_name":"match_fuzzy_whole_phrase_bool","parameter_has_default":true,"parameter_default":true,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"AWS access key for account with permissions for AWS Textract and Comprehend","parameter_name":"aws_access_key_textbox","parameter_has_default":true,"parameter_default":"","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"AWS secret key for account with permissions for AWS Textract and Comprehend","parameter_name":"aws_secret_key_textbox","parameter_has_default":true,"parameter_default":"","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Total pages","parameter_name":"annotate_max_pages","parameter_has_default":true,"parameter_default":1,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":1},{"label":"Review file data","parameter_name":"review_file_state","parameter_has_default":true,"parameter_default":{"headers":["image","page","label","color","xmin","ymin","xmax","ymax","id","text"],"data":[],"metadata":null},"type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":null},"component":"Dataframe","example_input":{"headers":["a","b"],"data":[["foo","bar"]]}},{"label":"output_folder_textbox","parameter_name":"output_folder","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Existing Textract output file found","parameter_name":"textract_output_found","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Only extract text (no redaction)","parameter_name":"text_extraction_only","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"input_folder_textbox","parameter_name":"input_folder","parameter_has_default":true,"parameter_default":"/home/user/app/input/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Choose a local OCR model. \"tesseract\" is the default and will work for documents with clear typed text. \"paddle\" is more accurate for text extraction where the text is not clear or well-formatted, but word-level extract is not natively supported, and so word bounding boxes will be inaccurate. ","parameter_name":"chosen_local_ocr_model","parameter_has_default":true,"parameter_default":"tesseract","type":{"enum":["tesseract","paddle"],"title":"Radio","type":"string"},"python_type":{"type":"Literal['tesseract', 'paddle']","description":""},"component":"Radio","example_input":"tesseract"},{"label":"Chosen language short code","parameter_name":"language","parameter_has_default":true,"parameter_default":"en","type":{"type":"string","enum":["en","fr","de","es","it","nl","pt","zh","ja","ko","lt","mk","nb","pl","ro","ru","sl","sv","ca","uk"]},"python_type":{"type":"Literal['en', 'fr', 'de', 'es', 'it', 'nl', 'pt', 'zh', 'ja', 'ko', 'lt', 'mk', 'nb', 'pl', 'ro', 'ru', 'sl', 'sv', 'ca', 'uk']","description":""},"component":"Dropdown","example_input":"en"},{"label":"2. An '...ocr_results_with_words' file can be uploaded here for searching text and making new redactions.","parameter_name":"ocr_review_files","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"Custom instructions for LLM-based entity detection","parameter_name":"custom_llm_instructions","parameter_has_default":true,"parameter_default":"","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Inference Server VLM Model Name","parameter_name":"inference_server_vlm_model","parameter_has_default":true,"parameter_default":"qwen_3_5_27b","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Use efficient OCR","parameter_name":"efficient_ocr","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Minimum words on page to run text-only extraction with efficient OCR","parameter_name":"efficient_ocr_min_words","parameter_has_default":true,"parameter_default":20,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0},{"label":"Min. page-area fraction for an embedded image to force OCR (0 = word count only)","parameter_name":"efficient_ocr_min_image_coverage_fraction","parameter_has_default":true,"parameter_default":0.005,"type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number","example_input":0.0},{"label":"Min. embedded image width/height (PDF pt, ~px@72dpi) to force OCR; 0 = no minimum","parameter_name":"efficient_ocr_min_embedded_image_px","parameter_has_default":true,"parameter_default":10,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0},{"label":"High-quality Textract OCR (re-run low-confidence lines with Bedrock VLM for higher quality)","parameter_name":"hybrid_textract_bedrock_vlm","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Always overwrite existing OCR results for new redaction tasks","parameter_name":"overwrite_existing_ocr_results","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Save page OCR visualisations (debug bounding boxes)","parameter_name":"save_page_ocr_visualisations","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true}],"returns":[{"label":"Output summary","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox"},{"label":"Output files","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"},{"label":"Log file output","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"},{"label":"1. Upload original or previously redacted '..._for_review.pdf' document to review redactions.","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"},{"label":"Total pages","type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number"},{"label":"Total pages","type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number"},{"label":"Review file data","type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":""},"component":"Dataframe"},{"label":"Upload one or multiple 'ocr_output.csv' files to find duplicate pages and subdocuments","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"},{"label":"Upload PDF or OCR CSV files to summarise","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"},{"label":"task","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox"},{"label":"2. An '...ocr_results_with_words' file can be uploaded here for searching text and making new redactions.","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"},{"label":"Total page count","type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number"}],"api_visibility":"public","description":"Compatibility wrapper: builds RedactionOptions/RedactionContext and calls run_redaction.","code_snippets":{"python":"from gradio_client import Client, handle_file\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tfile_paths=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\tchosen_redact_entities=[\"TITLES\", \"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"STREETNAME\", \"UKPOSTCODE\", \"CUSTOM\"],\n\tchosen_redact_comprehend_entities=[\"EMAIL\", \"ADDRESS\", \"NAME\", \"PHONE\", \"PASSPORT_NUMBER\", \"UK_NATIONAL_INSURANCE_NUMBER\", \"UK_NATIONAL_HEALTH_SERVICE_NUMBER\", \"CUSTOM\", \"TITLES\", \"UKPOSTCODE\", \"STREETNAME\"],\n\tchosen_llm_entities=[\"EMAIL_ADDRESS\", \"STREET_ADDRESS\", \"PERSON_NAME\", \"PHONE_NUMBER\", \"CUSTOM\"],\n\ttext_extraction_method=\"Local model - selectable text\",\n\tin_allow_list=[],\n\tin_deny_list=[],\n\tredact_whole_page_list=[],\n\tcombined_out_message=\"Hello!!\",\n\tpage_min=0,\n\tpage_max=0,\n\thandwrite_signature_checkbox=[],\n\tpii_identification_method=\"Local\",\n\tmax_fuzzy_spelling_mistakes_num=0,\n\tmatch_fuzzy_whole_phrase_bool=True,\n\taws_access_key_textbox=\"\",\n\taws_secret_key_textbox=\"\",\n\tannotate_max_pages=1,\n\treview_file_state={\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None},\n\toutput_folder=\"/home/user/app/output/\",\n\ttextract_output_found=False,\n\ttext_extraction_only=False,\n\tinput_folder=\"/home/user/app/input/\",\n\tchosen_local_ocr_model=\"tesseract\",\n\tlanguage=\"en\",\n\tocr_review_files=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\tcustom_llm_instructions=\"\",\n\tinference_server_vlm_model=\"qwen_3_5_27b\",\n\tefficient_ocr=False,\n\tefficient_ocr_min_words=20,\n\tefficient_ocr_min_image_coverage_fraction=0.005,\n\tefficient_ocr_min_embedded_image_px=10,\n\thybrid_textract_bedrock_vlm=False,\n\toverwrite_existing_ocr_results=False,\n\tsave_page_ocr_visualisations=False,\n\tapi_name=\"/redact_document\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst response_0 = await fetch(\"\");\nconst exampleFile = await response_0.blob();\nconst response_1 = await fetch(\"\");\nconst exampleFile = await response_1.blob();\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/redact_document\", {\n\t\tfile_paths: exampleFile,\n\t\tchosen_redact_entities: [\"TITLES\", \"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"STREETNAME\", \"UKPOSTCODE\", \"CUSTOM\"],\n\t\tchosen_redact_comprehend_entities: [\"EMAIL\", \"ADDRESS\", \"NAME\", \"PHONE\", \"PASSPORT_NUMBER\", \"UK_NATIONAL_INSURANCE_NUMBER\", \"UK_NATIONAL_HEALTH_SERVICE_NUMBER\", \"CUSTOM\", \"TITLES\", \"UKPOSTCODE\", \"STREETNAME\"],\n\t\tchosen_llm_entities: [\"EMAIL_ADDRESS\", \"STREET_ADDRESS\", \"PERSON_NAME\", \"PHONE_NUMBER\", \"CUSTOM\"],\n\t\ttext_extraction_method: \"Local model - selectable text\",\n\t\tin_allow_list: [],\n\t\tin_deny_list: [],\n\t\tredact_whole_page_list: [],\n\t\tcombined_out_message: \"Hello!!\",\n\t\tpage_min: 0,\n\t\tpage_max: 0,\n\t\thandwrite_signature_checkbox: [],\n\t\tpii_identification_method: \"Local\",\n\t\tmax_fuzzy_spelling_mistakes_num: 0,\n\t\tmatch_fuzzy_whole_phrase_bool: true,\n\t\taws_access_key_textbox: \"\",\n\t\taws_secret_key_textbox: \"\",\n\t\tannotate_max_pages: 1,\n\t\treview_file_state: {\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None},\n\t\toutput_folder: \"/home/user/app/output/\",\n\t\ttextract_output_found: false,\n\t\ttext_extraction_only: false,\n\t\tinput_folder: \"/home/user/app/input/\",\n\t\tchosen_local_ocr_model: \"tesseract\",\n\t\tlanguage: \"en\",\n\t\tocr_review_files: exampleFile,\n\t\tcustom_llm_instructions: \"\",\n\t\tinference_server_vlm_model: \"qwen_3_5_27b\",\n\t\tefficient_ocr: false,\n\t\tefficient_ocr_min_words: 20,\n\t\tefficient_ocr_min_image_coverage_fraction: 0.005,\n\t\tefficient_ocr_min_embedded_image_px: 10,\n\t\thybrid_textract_bedrock_vlm: false,\n\t\toverwrite_existing_ocr_results: false,\n\t\tsave_page_ocr_visualisations: false,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/redact_document -s -H \"Content-Type: application/json\" -d '{\"data\": [[{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], [\"TITLES\", \"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"STREETNAME\", \"UKPOSTCODE\", \"CUSTOM\"], [\"EMAIL\", \"ADDRESS\", \"NAME\", \"PHONE\", \"PASSPORT_NUMBER\", \"UK_NATIONAL_INSURANCE_NUMBER\", \"UK_NATIONAL_HEALTH_SERVICE_NUMBER\", \"CUSTOM\", \"TITLES\", \"UKPOSTCODE\", \"STREETNAME\"], [\"EMAIL_ADDRESS\", \"STREET_ADDRESS\", \"PERSON_NAME\", \"PHONE_NUMBER\", \"CUSTOM\"], \"Local model - selectable text\", [], [], [], \"Hello!!\", 0, 0, [], \"Local\", 0, true, \"\", \"\", 1, {\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None}, \"/home/user/app/output/\", false, false, \"/home/user/app/input/\", \"tesseract\", \"en\", [{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], \"\", \"qwen_3_5_27b\", false, 20, 0.005, 10, false, false, false]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/redact_document/$EVENT_ID"}},"/load_and_prepare_documents_or_data":{"parameters":[{"label":"Choose a PDF document or image file (PDF, JPG, PNG)","parameter_name":"file_paths","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"Choose text extraction method","parameter_name":"text_extract_method","parameter_has_default":true,"parameter_default":"Local model - selectable text","type":{"enum":["Local model - selectable text","Local OCR model - PDFs without selectable text","AWS Textract service - all PDF types"],"title":"Radio","type":"string"},"python_type":{"type":"Literal['Local model - selectable text', 'Local OCR model - PDFs without selectable text', 'AWS Textract service - all PDF types']","description":""},"component":"Radio","example_input":"Local model - selectable text"},{"label":"Output summary","parameter_name":"out_message","parameter_has_default":false,"parameter_default":null,"type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"first_loop_state","parameter_name":"first_loop_state","parameter_has_default":true,"parameter_default":true,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Total pages","parameter_name":"number_of_pages","parameter_has_default":true,"parameter_default":1,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":1},{"label":"prepare_for_review_bool_false","parameter_name":"prepare_for_review","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Fully redact these pages","parameter_name":"in_fully_redacted_list","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"output_folder_textbox","parameter_name":"output_folder","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"input_folder_textbox","parameter_name":"input_folder","parameter_has_default":true,"parameter_default":"/home/user/app/input/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Use efficient OCR","parameter_name":"efficient_ocr","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"prepare_images_bool_false","parameter_name":"prepare_images_bool_false","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Lowest page to redact (set to 0 to redact from the first page)","parameter_name":"page_min","parameter_has_default":true,"parameter_default":0,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0},{"label":"Highest page to redact (set to 0 to redact to the last page)","parameter_name":"page_max","parameter_has_default":true,"parameter_default":0,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0}],"returns":[{"label":"Output summary","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox"},{"label":"Total pages","type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number"},{"label":"Total pages","type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number"},{"label":"Review file data","type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":""},"component":"Dataframe"},{"label":"Existing Textract output file found","type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox"},{"label":"Existing local OCR output file found","type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox"}],"api_visibility":"public","description":"When EFFICIENT_OCR is enabled, skip loading all images; they are created later only for pages that need OCR.","code_snippets":{"python":"from gradio_client import Client, handle_file\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tfile_paths=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\ttext_extract_method=\"Local model - selectable text\",\n\tout_message=\"Hello!!\",\n\tfirst_loop_state=True,\n\tnumber_of_pages=1,\n\tprepare_for_review=False,\n\tin_fully_redacted_list=[],\n\toutput_folder=\"/home/user/app/output/\",\n\tinput_folder=\"/home/user/app/input/\",\n\tefficient_ocr=False,\n\tprepare_images_bool_false=False,\n\tpage_min=0,\n\tpage_max=0,\n\tapi_name=\"/load_and_prepare_documents_or_data\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst response_0 = await fetch(\"\");\nconst exampleFile = await response_0.blob();\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/load_and_prepare_documents_or_data\", {\n\t\tfile_paths: exampleFile,\n\t\ttext_extract_method: \"Local model - selectable text\",\n\t\tout_message: \"Hello!!\",\n\t\tfirst_loop_state: true,\n\t\tnumber_of_pages: 1,\n\t\tprepare_for_review: false,\n\t\tin_fully_redacted_list: [],\n\t\toutput_folder: \"/home/user/app/output/\",\n\t\tinput_folder: \"/home/user/app/input/\",\n\t\tefficient_ocr: false,\n\t\tprepare_images_bool_false: false,\n\t\tpage_min: 0,\n\t\tpage_max: 0,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/load_and_prepare_documents_or_data -s -H \"Content-Type: application/json\" -d '{\"data\": [[{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], \"Local model - selectable text\", \"Hello!!\", true, 1, false, [], \"/home/user/app/output/\", \"/home/user/app/input/\", false, false, 0, 0]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/load_and_prepare_documents_or_data/$EVENT_ID"}},"/apply_review_redactions":{"parameters":[{"label":"Modify redaction boxes","parameter_name":"page_image_annotator_object","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"image":{"$ref":"#/$defs/FileData"},"boxes":{"default":[],"items":{"additionalProperties":true,"type":"object"},"title":"Boxes","type":"array"},"orientation":{"default":0,"title":"Orientation","type":"integer"},"image_width":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Image Width"},"image_height":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Image Height"}},"required":["image"],"title":"AnnotatedImageData","type":"object","additional_description":null},"python_type":{"type":"dict(image: filepath, boxes: list[dict(str, Any)], orientation: int, image_width: int | None, image_height: int | None)","description":null},"component":"Image_annotator","example_input":{"image":"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png","boxes":[{"xmin":30,"ymin":70,"xmax":530,"ymax":500,"label":"Gradio","color":[250,185,0]}]}},{"label":"Current page","parameter_name":"current_page","parameter_has_default":true,"parameter_default":1,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":1},{"label":"Review file data","parameter_name":"review_file_state","parameter_has_default":true,"parameter_default":{"headers":["image","page","label","color","xmin","ymin","xmax","ymax","id","text"],"data":[],"metadata":null},"type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":null},"component":"Dataframe","example_input":{"headers":["a","b"],"data":[["foo","bar"]]}},{"label":"output_folder_textbox","parameter_name":"output_folder","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"save_pdf_state","parameter_name":"save_pdf","parameter_has_default":true,"parameter_default":true,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"input_folder_textbox","parameter_name":"input_folder","parameter_has_default":true,"parameter_default":"/home/user/app/input/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"}],"returns":[{"label":"1. Upload original or previously redacted '..._for_review.pdf' document to review redactions.","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"},{"label":"Log file output","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"},{"label":"Review file data","type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":""},"component":"Dataframe"}],"api_visibility":"public","description":"Applies the modified redaction annotations from the UI to the PyMuPDF document and exports the updated review files, including the redacted PDF and associated logs.","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tpage_image_annotator_object={\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]},\n\tcurrent_page=1,\n\treview_file_state={\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None},\n\toutput_folder=\"/home/user/app/output/\",\n\tsave_pdf=True,\n\tinput_folder=\"/home/user/app/input/\",\n\tapi_name=\"/apply_review_redactions\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/apply_review_redactions\", {\n\t\tpage_image_annotator_object: {\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]},\n\t\tcurrent_page: 1,\n\t\treview_file_state: {\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None},\n\t\toutput_folder: \"/home/user/app/output/\",\n\t\tsave_pdf: true,\n\t\tinput_folder: \"/home/user/app/input/\",\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/apply_review_redactions -s -H \"Content-Type: application/json\" -d '{\"data\": [{\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]}, 1, {\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None}, \"/home/user/app/output/\", true, \"/home/user/app/input/\"]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/apply_review_redactions/$EVENT_ID"}},"/page_ocr_review_image":{"parameters":[{"label":"Modify redaction boxes","parameter_name":"page_annotator","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"image":{"$ref":"#/$defs/FileData"},"boxes":{"default":[],"items":{"additionalProperties":true,"type":"object"},"title":"Boxes","type":"array"},"orientation":{"default":0,"title":"Orientation","type":"integer"},"image_width":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Image Width"},"image_height":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Image Height"}},"required":["image"],"title":"AnnotatedImageData","type":"object","additional_description":null},"python_type":{"type":"dict(image: filepath, boxes: list[dict(str, Any)], orientation: int, image_width: int | None, image_height: int | None)","description":null},"component":"Image_annotator","example_input":{"image":"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png","boxes":[{"xmin":30,"ymin":70,"xmax":530,"ymax":500,"label":"Gradio","color":[250,185,0]}]}},{"label":"Current page","parameter_name":"annotate_current_page","parameter_has_default":true,"parameter_default":1,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":1},{"label":"output_folder_textbox","parameter_name":"output_folder_textbox","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"}],"returns":[{"label":"OCR/Redaction overlay output","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"}],"api_visibility":"public","description":"Short-name Gradio handler; wraps `export_review_page_ocr_visualisation_for_gradio`.","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tpage_annotator={\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]},\n\tannotate_current_page=1,\n\toutput_folder_textbox=\"/home/user/app/output/\",\n\tapi_name=\"/page_ocr_review_image\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/page_ocr_review_image\", {\n\t\tpage_annotator: {\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]},\n\t\tannotate_current_page: 1,\n\t\toutput_folder_textbox: \"/home/user/app/output/\",\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/page_ocr_review_image -s -H \"Content-Type: application/json\" -d '{\"data\": [{\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]}, 1, \"/home/user/app/output/\"]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/page_ocr_review_image/$EVENT_ID"}},"/page_redaction_review_image":{"parameters":[{"label":"Modify redaction boxes","parameter_name":"page_annotator","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"image":{"$ref":"#/$defs/FileData"},"boxes":{"default":[],"items":{"additionalProperties":true,"type":"object"},"title":"Boxes","type":"array"},"orientation":{"default":0,"title":"Orientation","type":"integer"},"image_width":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Image Width"},"image_height":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Image Height"}},"required":["image"],"title":"AnnotatedImageData","type":"object","additional_description":null},"python_type":{"type":"dict(image: filepath, boxes: list[dict(str, Any)], orientation: int, image_width: int | None, image_height: int | None)","description":null},"component":"Image_annotator","example_input":{"image":"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png","boxes":[{"xmin":30,"ymin":70,"xmax":530,"ymax":500,"label":"Gradio","color":[250,185,0]}]}},{"label":"Current page","parameter_name":"annotate_current_page","parameter_has_default":true,"parameter_default":1,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":1},{"label":"Review file data","parameter_name":"review_df","parameter_has_default":true,"parameter_default":{"headers":["image","page","label","color","xmin","ymin","xmax","ymax","id","text"],"data":[],"metadata":null},"type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":null},"component":"Dataframe","example_input":{"headers":["a","b"],"data":[["foo","bar"]]}},{"label":"output_folder_textbox","parameter_name":"output_folder_textbox","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"}],"returns":[{"label":"OCR/Redaction overlay output","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"}],"api_visibility":"public","description":"Short-name Gradio handler; wraps `export_review_redaction_overlay_for_gradio`.","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tpage_annotator={\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]},\n\tannotate_current_page=1,\n\treview_df={\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None},\n\toutput_folder_textbox=\"/home/user/app/output/\",\n\tapi_name=\"/page_redaction_review_image\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/page_redaction_review_image\", {\n\t\tpage_annotator: {\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]},\n\t\tannotate_current_page: 1,\n\t\treview_df: {\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None},\n\t\toutput_folder_textbox: \"/home/user/app/output/\",\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/page_redaction_review_image -s -H \"Content-Type: application/json\" -d '{\"data\": [{\"image\": \"https://raw.githubusercontent.com/gradio-app/gradio/main/guides/assets/logo.png\", \"boxes\": [{\"xmin\": 30, \"ymin\": 70, \"xmax\": 530, \"ymax\": 500, \"label\": \"Gradio\", \"color\": [250, 185, 0]}]}, 1, {\"headers\": [\"image\", \"page\", \"label\", \"color\", \"xmin\", \"ymin\", \"xmax\", \"ymax\", \"id\", \"text\"], \"data\": [], \"metadata\": None}, \"/home/user/app/output/\"]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/page_redaction_review_image/$EVENT_ID"}},"/word_level_ocr_text_search":{"parameters":[{"label":"Multi-word text search (regex enabled below)","parameter_name":"search_text","parameter_has_default":true,"parameter_default":"","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Minimum similarity score for match (max=1)","parameter_name":"similarity_threshold","parameter_has_default":true,"parameter_default":1.0,"type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number","example_input":0.4},{"label":"Enable regex pattern matching","parameter_name":"use_regex_flag","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true}],"returns":[{"label":"Click table row to select and go to page","type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":""},"component":"Dataframe"},{"label":"Download analysis summary and redaction lists (.csv)","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"}],"api_visibility":"public","description":"Wrapper function to call run_full_search_and_analysis with regex option","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tsearch_text=\"\",\n\tsimilarity_threshold=1.0,\n\tuse_regex_flag=False,\n\tapi_name=\"/word_level_ocr_text_search\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/word_level_ocr_text_search\", {\n\t\tsearch_text: \"\",\n\t\tsimilarity_threshold: 1.0,\n\t\tuse_regex_flag: false,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/word_level_ocr_text_search -s -H \"Content-Type: application/json\" -d '{\"data\": [\"\", 1.0, false]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/word_level_ocr_text_search/$EVENT_ID"}},"/redact_data":{"parameters":[{"label":"Choose Excel or csv files","parameter_name":"file_paths","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"Enter open text","parameter_name":"in_text","parameter_has_default":false,"parameter_default":null,"type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Select an anonymisation method.","parameter_name":"anon_strategy","parameter_has_default":true,"parameter_default":"redact completely","type":{"enum":["replace with 'REDACTED'","replace with <ENTITY_NAME>","redact completely","hash","mask"],"title":"Radio","type":"string"},"python_type":{"type":"Literal['replace with 'REDACTED'', 'replace with <ENTITY_NAME>', 'redact completely', 'hash', 'mask']","description":""},"component":"Radio","example_input":"replace with 'REDACTED'"},{"label":"Select columns that you want to anonymise (showing columns present across all files).","parameter_name":"chosen_cols","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":["Choose columns to anonymise"]}},"python_type":{"type":"list[Literal['Choose columns to anonymise']]","description":""},"component":"Dropdown","example_input":["Choose columns to anonymise"]},{"label":"Local PII identification model (click empty space in box for full list)","parameter_name":"chosen_redact_entities","parameter_has_default":true,"parameter_default":["TITLES","PERSON","PHONE_NUMBER","EMAIL_ADDRESS","STREETNAME","UKPOSTCODE","CUSTOM"],"type":{"type":"array","items":{"type":"string","enum":["TITLES","PERSON","PHONE_NUMBER","EMAIL_ADDRESS","STREETNAME","UKPOSTCODE","CREDIT_CARD","CRYPTO","DATE_TIME","IBAN_CODE","IP_ADDRESS","NRP","LOCATION","MEDICAL_LICENSE","URL","UK_NHS","CUSTOM","CUSTOM_FUZZY"]}},"python_type":{"type":"list[Literal['TITLES', 'PERSON', 'PHONE_NUMBER', 'EMAIL_ADDRESS', 'STREETNAME', 'UKPOSTCODE', 'CREDIT_CARD', 'CRYPTO', 'DATE_TIME', 'IBAN_CODE', 'IP_ADDRESS', 'NRP', 'LOCATION', 'MEDICAL_LICENSE', 'URL', 'UK_NHS', 'CUSTOM', 'CUSTOM_FUZZY']]","description":""},"component":"Dropdown","example_input":["TITLES"]},{"label":"Allow list (never redact these words)","parameter_name":"in_allow_list","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"Number of tabular files redacted","parameter_name":"latest_file_completed","parameter_has_default":true,"parameter_default":0,"type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number","example_input":3},{"label":"Output result","parameter_name":"out_message","parameter_has_default":false,"parameter_default":null,"type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Select Excel sheets that you want to anonymise (showing sheets present across all Excel files).","parameter_name":"in_excel_sheets","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":["Choose Excel sheets to anonymise"]}},"python_type":{"type":"list[Literal['Choose Excel sheets to anonymise']]","description":""},"component":"Dropdown","example_input":["Choose Excel sheets to anonymise"]},{"label":"first_loop_state","parameter_name":"first_loop_state","parameter_has_default":true,"parameter_default":true,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"output_folder_textbox","parameter_name":"output_folder","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Deny list (always redact these words)","parameter_name":"in_deny_list","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"Maximum spelling mistakes for matching deny list terms (slows down PII detection).","parameter_name":"max_fuzzy_spelling_mistakes_num","parameter_has_default":true,"parameter_default":0,"type":{"type":"integer"},"python_type":{"type":"int","description":""},"component":"Number","example_input":0},{"label":"Choose PII detection method. Specific entities for the chosen redaction model type can be chosen on the Redact PDF/image tab. AWS Comprehend has a cost of approximately $0.01 per 10,000 characters.","parameter_name":"pii_identification_method","parameter_has_default":true,"parameter_default":"Local","type":{"enum":["Local","AWS Comprehend"],"title":"Radio","type":"string"},"python_type":{"type":"Literal['Local', 'AWS Comprehend']","description":""},"component":"Radio","example_input":"Local"},{"label":"AWS Comprehend PII identification model (click empty space in box for full list)","parameter_name":"chosen_redact_comprehend_entities","parameter_has_default":true,"parameter_default":["EMAIL","ADDRESS","NAME","PHONE","PASSPORT_NUMBER","UK_NATIONAL_INSURANCE_NUMBER","UK_NATIONAL_HEALTH_SERVICE_NUMBER","CUSTOM","TITLES","UKPOSTCODE","STREETNAME"],"type":{"type":"array","items":{"type":"string","enum":["BANK_ACCOUNT_NUMBER","BANK_ROUTING","CREDIT_DEBIT_NUMBER","CREDIT_DEBIT_CVV","CREDIT_DEBIT_EXPIRY","PIN","EMAIL","ADDRESS","NAME","PHONE","SSN","DATE_TIME","PASSPORT_NUMBER","DRIVER_ID","URL","AGE","USERNAME","PASSWORD","AWS_ACCESS_KEY","AWS_SECRET_KEY","IP_ADDRESS","MAC_ADDRESS","LICENSE_PLATE","VEHICLE_IDENTIFICATION_NUMBER","UK_NATIONAL_INSURANCE_NUMBER","INTERNATIONAL_BANK_ACCOUNT_NUMBER","SWIFT_CODE","UK_NATIONAL_HEALTH_SERVICE_NUMBER","ALL","CUSTOM","CUSTOM_FUZZY","TITLES","UKPOSTCODE","STREETNAME"]}},"python_type":{"type":"list[Literal['BANK_ACCOUNT_NUMBER', 'BANK_ROUTING', 'CREDIT_DEBIT_NUMBER', 'CREDIT_DEBIT_CVV', 'CREDIT_DEBIT_EXPIRY', 'PIN', 'EMAIL', 'ADDRESS', 'NAME', 'PHONE', 'SSN', 'DATE_TIME', 'PASSPORT_NUMBER', 'DRIVER_ID', 'URL', 'AGE', 'USERNAME', 'PASSWORD', 'AWS_ACCESS_KEY', 'AWS_SECRET_KEY', 'IP_ADDRESS', 'MAC_ADDRESS', 'LICENSE_PLATE', 'VEHICLE_IDENTIFICATION_NUMBER', 'UK_NATIONAL_INSURANCE_NUMBER', 'INTERNATIONAL_BANK_ACCOUNT_NUMBER', 'SWIFT_CODE', 'UK_NATIONAL_HEALTH_SERVICE_NUMBER', 'ALL', 'CUSTOM', 'CUSTOM_FUZZY', 'TITLES', 'UKPOSTCODE', 'STREETNAME']]","description":""},"component":"Dropdown","example_input":["BANK_ACCOUNT_NUMBER"]},{"label":"AWS access key for account with permissions for AWS Textract and Comprehend","parameter_name":"aws_access_key_textbox","parameter_has_default":true,"parameter_default":"","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"AWS secret key for account with permissions for AWS Textract and Comprehend","parameter_name":"aws_secret_key_textbox","parameter_has_default":true,"parameter_default":"","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Do initial clean of text (remove URLs, HTML tags, and non-ASCII characters)","parameter_name":"do_initial_clean","parameter_has_default":true,"parameter_default":true,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Chosen language short code","parameter_name":"language","parameter_has_default":true,"parameter_default":"en","type":{"type":"string","enum":["en","fr","de","es","it","nl","pt","zh","ja","ko","lt","mk","nb","pl","ro","ru","sl","sv","ca","uk"]},"python_type":{"type":"Literal['en', 'fr', 'de', 'es', 'it', 'nl', 'pt', 'zh', 'ja', 'ko', 'lt', 'mk', 'nb', 'pl', 'ro', 'ru', 'sl', 'sv', 'ca', 'uk']","description":""},"component":"Dropdown","example_input":"en"},{"label":"Custom instructions for LLM-based entity detection","parameter_name":"progress","parameter_has_default":true,"parameter_default":"","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"LLM PII identification model - subset of entities for LLM detection (click empty space in box for full list)","parameter_name":"custom_llm_instructions","parameter_has_default":true,"parameter_default":["EMAIL_ADDRESS","STREET_ADDRESS","PERSON_NAME","PHONE_NUMBER","CUSTOM"],"type":{"type":"array","items":{"type":"string","enum":["EMAIL_ADDRESS","STREET_ADDRESS","PERSON_NAME","PHONE_NUMBER","DATE_TIME","URL","IP_ADDRESS","AGE","BANK_ACCOUNT_NUMBER","PASSPORT_NUMBER","CUSTOM","CUSTOM_FUZZY","TITLES","UKPOSTCODE","STREETNAME"]}},"python_type":{"type":"list[Literal['EMAIL_ADDRESS', 'STREET_ADDRESS', 'PERSON_NAME', 'PHONE_NUMBER', 'DATE_TIME', 'URL', 'IP_ADDRESS', 'AGE', 'BANK_ACCOUNT_NUMBER', 'PASSPORT_NUMBER', 'CUSTOM', 'CUSTOM_FUZZY', 'TITLES', 'UKPOSTCODE', 'STREETNAME']]","description":""},"component":"Dropdown","example_input":["EMAIL_ADDRESS"]}],"returns":[{"label":"Output result","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox"},{"label":"Output files","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"},{"label":"Number of tabular files redacted","type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number"},{"label":"Log file output","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"}],"api_visibility":"public","description":"This function anonymises data files based on the provided parameters.","code_snippets":{"python":"from gradio_client import Client, handle_file\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tfile_paths=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\tin_text=\"Hello!!\",\n\tanon_strategy=\"redact completely\",\n\tchosen_cols=[],\n\tchosen_redact_entities=[\"TITLES\", \"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"STREETNAME\", \"UKPOSTCODE\", \"CUSTOM\"],\n\tin_allow_list=[],\n\tlatest_file_completed=0,\n\tout_message=\"Hello!!\",\n\tin_excel_sheets=[],\n\tfirst_loop_state=True,\n\toutput_folder=\"/home/user/app/output/\",\n\tin_deny_list=[],\n\tmax_fuzzy_spelling_mistakes_num=0,\n\tpii_identification_method=\"Local\",\n\tchosen_redact_comprehend_entities=[\"EMAIL\", \"ADDRESS\", \"NAME\", \"PHONE\", \"PASSPORT_NUMBER\", \"UK_NATIONAL_INSURANCE_NUMBER\", \"UK_NATIONAL_HEALTH_SERVICE_NUMBER\", \"CUSTOM\", \"TITLES\", \"UKPOSTCODE\", \"STREETNAME\"],\n\taws_access_key_textbox=\"\",\n\taws_secret_key_textbox=\"\",\n\tdo_initial_clean=True,\n\tlanguage=\"en\",\n\tprogress=\"\",\n\tcustom_llm_instructions=[\"EMAIL_ADDRESS\", \"STREET_ADDRESS\", \"PERSON_NAME\", \"PHONE_NUMBER\", \"CUSTOM\"],\n\tapi_name=\"/redact_data\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst response_0 = await fetch(\"\");\nconst exampleFile = await response_0.blob();\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/redact_data\", {\n\t\tfile_paths: exampleFile,\n\t\tin_text: \"Hello!!\",\n\t\tanon_strategy: \"redact completely\",\n\t\tchosen_cols: [],\n\t\tchosen_redact_entities: [\"TITLES\", \"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"STREETNAME\", \"UKPOSTCODE\", \"CUSTOM\"],\n\t\tin_allow_list: [],\n\t\tlatest_file_completed: 0,\n\t\tout_message: \"Hello!!\",\n\t\tin_excel_sheets: [],\n\t\tfirst_loop_state: true,\n\t\toutput_folder: \"/home/user/app/output/\",\n\t\tin_deny_list: [],\n\t\tmax_fuzzy_spelling_mistakes_num: 0,\n\t\tpii_identification_method: \"Local\",\n\t\tchosen_redact_comprehend_entities: [\"EMAIL\", \"ADDRESS\", \"NAME\", \"PHONE\", \"PASSPORT_NUMBER\", \"UK_NATIONAL_INSURANCE_NUMBER\", \"UK_NATIONAL_HEALTH_SERVICE_NUMBER\", \"CUSTOM\", \"TITLES\", \"UKPOSTCODE\", \"STREETNAME\"],\n\t\taws_access_key_textbox: \"\",\n\t\taws_secret_key_textbox: \"\",\n\t\tdo_initial_clean: true,\n\t\tlanguage: \"en\",\n\t\tprogress: \"\",\n\t\tcustom_llm_instructions: [\"EMAIL_ADDRESS\", \"STREET_ADDRESS\", \"PERSON_NAME\", \"PHONE_NUMBER\", \"CUSTOM\"],\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/redact_data -s -H \"Content-Type: application/json\" -d '{\"data\": [[{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], \"Hello!!\", \"redact completely\", [], [\"TITLES\", \"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"STREETNAME\", \"UKPOSTCODE\", \"CUSTOM\"], [], 0, \"Hello!!\", [], true, \"/home/user/app/output/\", [], 0, \"Local\", [\"EMAIL\", \"ADDRESS\", \"NAME\", \"PHONE\", \"PASSPORT_NUMBER\", \"UK_NATIONAL_INSURANCE_NUMBER\", \"UK_NATIONAL_HEALTH_SERVICE_NUMBER\", \"CUSTOM\", \"TITLES\", \"UKPOSTCODE\", \"STREETNAME\"], \"\", \"\", true, \"en\", \"\", [\"EMAIL_ADDRESS\", \"STREET_ADDRESS\", \"PERSON_NAME\", \"PHONE_NUMBER\", \"CUSTOM\"]]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/redact_data/$EVENT_ID"}},"/find_duplicate_pages":{"parameters":[{"label":"Upload one or multiple 'ocr_output.csv' files to find duplicate pages and subdocuments","parameter_name":"files","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"Similarity threshold","parameter_name":"threshold","parameter_has_default":true,"parameter_default":0.95,"type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number","example_input":3},{"label":"Minimum word count","parameter_name":"min_words","parameter_has_default":true,"parameter_default":10,"type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number","example_input":3},{"label":"Minimum consecutive matches to be considered a match","parameter_name":"min_consecutive","parameter_has_default":true,"parameter_default":1,"type":{"type":"number","description":"numeric value between 1 and 20"},"python_type":{"type":"float","description":""},"component":"Slider","example_input":1},{"label":"Combine consecutive matches into a single match (subdocument match)","parameter_name":"greedy_match","parameter_has_default":true,"parameter_default":true,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"2. An '...ocr_results_with_words' file can be uploaded here for searching text and making new redactions.","parameter_name":"ocr_df_paths_list","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"Duplicate matching mode","parameter_name":"combine_pages","parameter_has_default":true,"parameter_default":true,"type":{"enum":[true,false],"title":"Radio","type":"string"},"python_type":{"type":"Literal['True', 'False']","description":""},"component":"Radio","example_input":true},{"label":"output_folder_textbox","parameter_name":"output_folder","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"}],"returns":[{"label":"Similarity Results","type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":""},"component":"Dataframe"},{"label":"Download analysis summary and redaction lists (.csv)","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"},{"label":"task","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox"},{"label":"2. An '...ocr_results_with_words' file can be uploaded here for searching text and making new redactions.","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"},{"label":"Duplicate pages list","type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown"}],"api_visibility":"public","description":"Main wrapper function to orchestrate the duplicate page analysis process. It handles file loading, text combination, similarity identification, and result saving.","code_snippets":{"python":"from gradio_client import Client, handle_file\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tfiles=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\tthreshold=0.95,\n\tmin_words=10,\n\tmin_consecutive=1,\n\tgreedy_match=True,\n\tocr_df_paths_list=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\tcombine_pages=\"True\",\n\toutput_folder=\"/home/user/app/output/\",\n\tapi_name=\"/find_duplicate_pages\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst response_0 = await fetch(\"\");\nconst exampleFile = await response_0.blob();\nconst response_1 = await fetch(\"\");\nconst exampleFile = await response_1.blob();\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/find_duplicate_pages\", {\n\t\tfiles: exampleFile,\n\t\tthreshold: 0.95,\n\t\tmin_words: 10,\n\t\tmin_consecutive: 1,\n\t\tgreedy_match: true,\n\t\tocr_df_paths_list: exampleFile,\n\t\tcombine_pages: \"True\",\n\t\toutput_folder: \"/home/user/app/output/\",\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/find_duplicate_pages -s -H \"Content-Type: application/json\" -d '{\"data\": [[{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], 0.95, 10, 1, true, [{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], \"True\", \"/home/user/app/output/\"]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/find_duplicate_pages/$EVENT_ID"}},"/find_duplicate_tabular":{"parameters":[{"label":"Upload CSV, Excel, or Parquet files to find duplicate cells/rows. Note that the app will remove duplicates from later cells/files that are found in earlier cells/files and not vice versa.","parameter_name":"files","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"Similarity threshold","parameter_name":"threshold","parameter_has_default":true,"parameter_default":0.95,"type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number","example_input":3},{"label":"Minimum word count","parameter_name":"min_words","parameter_has_default":true,"parameter_default":10,"type":{"type":"number"},"python_type":{"type":"float","description":""},"component":"Number","example_input":3},{"label":"Choose columns to deduplicate","parameter_name":"text_columns","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"output_folder_textbox","parameter_name":"output_folder","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"},{"label":"Do initial clean of text (remove URLs, HTML tags, and non-ASCII characters)","parameter_name":"do_initial_clean_dup","parameter_has_default":true,"parameter_default":true,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true},{"label":"Select Excel sheet names that you want to deduplicate (showing sheets present across all Excel files).","parameter_name":"in_excel_tabular_sheets","parameter_has_default":true,"parameter_default":[],"type":{"type":"array","items":{"type":"string","enum":[]}},"python_type":{"type":"list[Literal[]]","description":""},"component":"Dropdown","example_input":[]},{"label":"Remove duplicate rows from deduplicated files","parameter_name":"remove_duplicate_rows","parameter_has_default":true,"parameter_default":false,"type":{"type":"boolean"},"python_type":{"type":"bool","description":""},"component":"Checkbox","example_input":true}],"returns":[{"label":"Duplicate Cell Matches","type":{"properties":{"headers":{"items":{},"title":"Headers","type":"array"},"data":{"items":{"items":{},"type":"array"},"title":"Data","type":"array"},"metadata":{"anyOf":[{"additionalProperties":{"anyOf":[{"items":{},"type":"array"},{"type":"null"}]},"type":"object"},{"type":"null"}],"default":null,"title":"Metadata"}},"required":["headers","data"],"title":"DataframeData","type":"object","additional_description":null},"python_type":{"type":"dict(headers: list[Any], data: list[list[Any]], metadata: dict(str, list[Any] | None) | None)","description":""},"component":"Dataframe"},{"label":"Download cleaned file (duplicates removed)","type":{"$defs":{"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object","additional_description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed)."},"python_type":{"type":"filepath","description":""},"component":"File"},{"label":"Select file to clean","type":{"type":"string","enum":[]},"python_type":{"type":"Literal[]","description":""},"component":"Dropdown"},{"label":"task","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox"}],"api_visibility":"public","description":"","code_snippets":{"python":"from gradio_client import Client, handle_file\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tfiles=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\tthreshold=0.95,\n\tmin_words=10,\n\ttext_columns=[],\n\toutput_folder=\"/home/user/app/output/\",\n\tdo_initial_clean_dup=True,\n\tin_excel_tabular_sheets=[],\n\tremove_duplicate_rows=False,\n\tapi_name=\"/find_duplicate_tabular\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst response_0 = await fetch(\"\");\nconst exampleFile = await response_0.blob();\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/find_duplicate_tabular\", {\n\t\tfiles: exampleFile,\n\t\tthreshold: 0.95,\n\t\tmin_words: 10,\n\t\ttext_columns: [],\n\t\toutput_folder: \"/home/user/app/output/\",\n\t\tdo_initial_clean_dup: true,\n\t\tin_excel_tabular_sheets: [],\n\t\tremove_duplicate_rows: false,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/find_duplicate_tabular -s -H \"Content-Type: application/json\" -d '{\"data\": [[{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], 0.95, 10, [], \"/home/user/app/output/\", true, [], false]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/find_duplicate_tabular/$EVENT_ID"}},"/combine_review_csvs":{"parameters":[{"label":"Combine multiple review_file.csv files together here.","parameter_name":"file_list","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]}],"returns":[{"label":"Combine multiple review_file.csv files together here.","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"}],"api_visibility":"public","description":"","code_snippets":{"python":"from gradio_client import Client, handle_file\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tfile_list=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\tapi_name=\"/combine_review_csvs\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst response_0 = await fetch(\"\");\nconst exampleFile = await response_0.blob();\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/combine_review_csvs\", {\n\t\tfile_list: exampleFile,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/combine_review_csvs -s -H \"Content-Type: application/json\" -d '{\"data\": [[{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}]]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/combine_review_csvs/$EVENT_ID"}},"/combine_review_pdfs":{"parameters":[{"label":"Combine multiple _redactions_for_review PDFs","parameter_name":"file_list","parameter_has_default":false,"parameter_default":null,"type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":null},"component":"File","example_input":[{"path":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf","meta":{"_type":"gradio.FileData"},"orig_name":"sample_file.pdf","url":"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf"}]},{"label":"output_folder_textbox","parameter_name":"output_folder","parameter_has_default":true,"parameter_default":"/home/user/app/output/","type":{"type":"string"},"python_type":{"type":"str","description":""},"component":"Textbox","example_input":"Hello!!"}],"returns":[{"label":"Combine multiple _redactions_for_review PDFs","type":{"$defs":{"FileData":{"description":"The FileData class is a subclass of the GradioModel class that represents a file object within a Gradio interface. It is used to store file data and metadata when a file is uploaded.\n\nAttributes:\n    path: The server file path where the file is stored.\n    url: The normalized server URL pointing to the file.\n    size: The size of the file in bytes.\n    orig_name: The original filename before upload.\n    mime_type: The MIME type of the file.\n    is_stream: Indicates whether the file is a stream.\n    meta: Additional metadata used internally (should not be changed).","properties":{"path":{"title":"Path","type":"string"},"url":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Url"},"size":{"anyOf":[{"type":"integer"},{"type":"null"}],"default":null,"title":"Size"},"orig_name":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Orig Name"},"mime_type":{"anyOf":[{"type":"string"},{"type":"null"}],"default":null,"title":"Mime Type"},"is_stream":{"default":false,"title":"Is Stream","type":"boolean"},"meta":{"$ref":"#/$defs/FileDataMeta"}},"required":["path"],"title":"FileData","type":"object"},"FileDataMeta":{"properties":{"_type":{"const":"gradio.FileData","title":"Type","type":"string"}},"required":["_type"],"title":"FileDataMeta","type":"object"}},"items":{"$ref":"#/$defs/FileData"},"title":"ListFiles","type":"array","additional_description":null},"python_type":{"type":"list[filepath]","description":""},"component":"File"}],"api_visibility":"public","description":"Combine redaction comments from multiple '_redactions_for_review' PDFs into one PDF. Only validates that all files have the same number of pages. File names may differ (e.g. file_redactions_for_review (1).pdf, file_redactions_for_review (2).pdf, or file_FINAL_redactions_for_review.pdf). The output filename is derived from the first input file: the name up to and including 'redactions_for_review' is taken (anything after that is dropped), then '_combined' is appended, e.g. file_redactions_for_review_combined.pdf.","code_snippets":{"python":"from gradio_client import Client, handle_file\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tfile_list=[handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf')],\n\toutput_folder=\"/home/user/app/output/\",\n\tapi_name=\"/combine_review_pdfs\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst response_0 = await fetch(\"\");\nconst exampleFile = await response_0.blob();\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/combine_review_pdfs\", {\n\t\tfile_list: exampleFile,\n\t\toutput_folder: \"/home/user/app/output/\",\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/combine_review_pdfs -s -H \"Content-Type: application/json\" -d '{\"data\": [[{\"path\": \"https://github.com/gradio-app/gradio/raw/main/test/test_files/sample_file.pdf\", \"meta\": {\"_type\": \"gradio.FileData\"}}], \"/home/user/app/output/\"]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/combine_review_pdfs/$EVENT_ID"}},"/doc_redact":{"parameters":[{"label":"1st","parameter_name":"document_file","parameter_has_default":false,"parameter_default":null,"type":{},"python_type":{"type":"Any","description":""},"component":"Api","example_input":"..."},{"label":"2nd","parameter_name":"redact_entities","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"3rd","parameter_name":"output_dir","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"4th","parameter_name":"ocr_method","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"5th","parameter_name":"pii_method","parameter_has_default":true,"parameter_default":"Local","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"Local"},{"label":"6th","parameter_name":"allow_list","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"7th","parameter_name":"deny_list","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"8th","parameter_name":"page_min","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":"..."},{"label":"9th","parameter_name":"page_max","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":"..."},{"label":"10th","parameter_name":"llm_instruction","parameter_has_default":true,"parameter_default":"","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":""}],"returns":[{"label":"1st","type":{},"python_type":{"type":"Any","description":""},"component":"Api"}],"api_visibility":"public","description":"Redact a single PDF/image in one call (CLI-aligned). Returns (output_paths, message). Does not update the main UI session.","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tdocument_file=...,\n\tredact_entities=None,\n\toutput_dir=None,\n\tocr_method=None,\n\tpii_method=Local,\n\tallow_list=None,\n\tdeny_list=None,\n\tpage_min=None,\n\tpage_max=None,\n\tllm_instruction=None,\n\tapi_name=\"/doc_redact\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/doc_redact\", {\n\t\tdocument_file: ...,\n\t\tredact_entities: null,\n\t\toutput_dir: null,\n\t\tocr_method: null,\n\t\tpii_method: Local,\n\t\tallow_list: null,\n\t\tdeny_list: null,\n\t\tpage_min: null,\n\t\tpage_max: null,\n\t\tllm_instruction: null,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/doc_redact -s -H \"Content-Type: application/json\" -d '{\"data\": [..., null, null, null, Local, null, null, null, null, null]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/doc_redact/$EVENT_ID"}},"/review_apply":{"parameters":[{"label":"1st","parameter_name":"pdf_file","parameter_has_default":false,"parameter_default":null,"type":{},"python_type":{"type":"Any","description":""},"component":"Api","example_input":"..."},{"label":"2nd","parameter_name":"review_csv_file","parameter_has_default":false,"parameter_default":null,"type":{},"python_type":{"type":"Any","description":""},"component":"Api","example_input":"..."},{"label":"3rd","parameter_name":"output_dir","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."}],"returns":[{"label":"1st","type":{},"python_type":{"type":"Any","description":""},"component":"Api"}],"api_visibility":"public","description":"Apply redactions in one call from the original PDF and a *_review_file.csv. Returns (output_paths, message). Does not update the Review tab UI session.","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tpdf_file=...,\n\treview_csv_file=...,\n\toutput_dir=None,\n\tapi_name=\"/review_apply\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/review_apply\", {\n\t\tpdf_file: ...,\n\t\treview_csv_file: ...,\n\t\toutput_dir: null,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/review_apply -s -H \"Content-Type: application/json\" -d '{\"data\": [..., ..., null]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/review_apply/$EVENT_ID"}},"/pdf_summarise":{"parameters":[{"label":"1st","parameter_name":"pdf_file","parameter_has_default":false,"parameter_default":null,"type":{},"python_type":{"type":"Any","description":""},"component":"Api","example_input":"..."},{"label":"2nd","parameter_name":"ocr_method","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"3rd","parameter_name":"summarisation_inference_method","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"4th","parameter_name":"summarisation_format","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"5th","parameter_name":"summarisation_context","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"6th","parameter_name":"summarisation_additional_instructions","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"7th","parameter_name":"summarisation_temperature","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"number"}]},"python_type":{"type":"None | float","description":""},"component":"Api","example_input":"..."},{"label":"8th","parameter_name":"summarisation_max_pages_per_group","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":"..."},{"label":"9th","parameter_name":"summarisation_api_key","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"10th","parameter_name":"output_dir","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"11th","parameter_name":"input_dir","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"12th","parameter_name":"page_min","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":"..."},{"label":"13th","parameter_name":"page_max","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":"..."}],"returns":[{"label":"1st","type":{},"python_type":{"type":"Any","description":""},"component":"Api"}],"api_visibility":"public","description":"Summarise a PDF in one call (CLI-aligned: OCR/text extract then LLM summary). Returns (output_paths, status_message, summary_text).","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tpdf_file=...,\n\tocr_method=None,\n\tsummarisation_inference_method=None,\n\tsummarisation_format=None,\n\tsummarisation_context=None,\n\tsummarisation_additional_instructions=None,\n\tsummarisation_temperature=None,\n\tsummarisation_max_pages_per_group=None,\n\tsummarisation_api_key=None,\n\toutput_dir=None,\n\tinput_dir=None,\n\tpage_min=None,\n\tpage_max=None,\n\tapi_name=\"/pdf_summarise\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/pdf_summarise\", {\n\t\tpdf_file: ...,\n\t\tocr_method: null,\n\t\tsummarisation_inference_method: null,\n\t\tsummarisation_format: null,\n\t\tsummarisation_context: null,\n\t\tsummarisation_additional_instructions: null,\n\t\tsummarisation_temperature: null,\n\t\tsummarisation_max_pages_per_group: null,\n\t\tsummarisation_api_key: null,\n\t\toutput_dir: null,\n\t\tinput_dir: null,\n\t\tpage_min: null,\n\t\tpage_max: null,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/pdf_summarise -s -H \"Content-Type: application/json\" -d '{\"data\": [..., null, null, null, null, null, null, null, null, null, null, null, null]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/pdf_summarise/$EVENT_ID"}},"/tabular_redact":{"parameters":[{"label":"1st","parameter_name":"data_file","parameter_has_default":false,"parameter_default":null,"type":{},"python_type":{"type":"Any","description":""},"component":"Api","example_input":"..."},{"label":"2nd","parameter_name":"redact_entities","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"3rd","parameter_name":"output_dir","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."},{"label":"4th","parameter_name":"pii_method","parameter_has_default":true,"parameter_default":"Local","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"Local"},{"label":"5th","parameter_name":"columns","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"6th","parameter_name":"anon_strategy","parameter_has_default":true,"parameter_default":"redact","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"redact"},{"label":"7th","parameter_name":"allow_list","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"8th","parameter_name":"deny_list","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"9th","parameter_name":"language","parameter_has_default":true,"parameter_default":"en","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"en"},{"label":"10th","parameter_name":"max_fuzzy_spelling_mistakes_num","parameter_has_default":true,"parameter_default":0,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":0},{"label":"11th","parameter_name":"do_initial_clean","parameter_has_default":true,"parameter_default":true,"type":{"oneOf":[{"type":"null"},{"type":"boolean"}]},"python_type":{"type":"None | bool","description":""},"component":"Api","example_input":true},{"label":"12th","parameter_name":"llm_instruction","parameter_has_default":true,"parameter_default":"","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":""},{"label":"13th","parameter_name":"llm_entities","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"14th","parameter_name":"comprehend_entities","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"array","items":{"type":"string"}}]},"python_type":{"type":"None | list[str]","description":""},"component":"Api","example_input":"..."},{"label":"15th","parameter_name":"aws_access_key","parameter_has_default":true,"parameter_default":"","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":""},{"label":"16th","parameter_name":"aws_secret_key","parameter_has_default":true,"parameter_default":"","type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":""}],"returns":[{"label":"1st","type":{},"python_type":{"type":"Any","description":""},"component":"Api"}],"api_visibility":"public","description":"Redact a single tabular file (CSV/XLSX/Parquet/DOCX) in one call. Returns (output_paths, message). Does not update the Tabular UI session.","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tdata_file=...,\n\tredact_entities=None,\n\toutput_dir=None,\n\tpii_method=Local,\n\tcolumns=None,\n\tanon_strategy=redact,\n\tallow_list=None,\n\tdeny_list=None,\n\tlanguage=en,\n\tmax_fuzzy_spelling_mistakes_num=0,\n\tdo_initial_clean=True,\n\tllm_instruction=None,\n\tllm_entities=None,\n\tcomprehend_entities=None,\n\taws_access_key=None,\n\taws_secret_key=None,\n\tapi_name=\"/tabular_redact\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/tabular_redact\", {\n\t\tdata_file: ...,\n\t\tredact_entities: null,\n\t\toutput_dir: null,\n\t\tpii_method: Local,\n\t\tcolumns: null,\n\t\tanon_strategy: redact,\n\t\tallow_list: null,\n\t\tdeny_list: null,\n\t\tlanguage: en,\n\t\tmax_fuzzy_spelling_mistakes_num: 0,\n\t\tdo_initial_clean: True,\n\t\tllm_instruction: null,\n\t\tllm_entities: null,\n\t\tcomprehend_entities: null,\n\t\taws_access_key: null,\n\t\taws_secret_key: null,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/tabular_redact -s -H \"Content-Type: application/json\" -d '{\"data\": [..., null, null, Local, null, redact, null, null, en, 0, True, null, null, null, null, null]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/tabular_redact/$EVENT_ID"}},"/preview_boxes":{"parameters":[{"label":"1st","parameter_name":"pdf_file","parameter_has_default":false,"parameter_default":null,"type":{},"python_type":{"type":"Any","description":""},"component":"Api","example_input":"..."},{"label":"2nd","parameter_name":"review_csv_file","parameter_has_default":false,"parameter_default":null,"type":{},"python_type":{"type":"Any","description":""},"component":"Api","example_input":"..."},{"label":"3rd","parameter_name":"dpi","parameter_has_default":true,"parameter_default":150,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":150},{"label":"4th","parameter_name":"max_width","parameter_has_default":true,"parameter_default":1280,"type":{"oneOf":[{"type":"null"},{"type":"integer"}]},"python_type":{"type":"None | int","description":""},"component":"Api","example_input":1280},{"label":"5th","parameter_name":"draw_grid","parameter_has_default":true,"parameter_default":true,"type":{"oneOf":[{"type":"null"},{"type":"boolean"}]},"python_type":{"type":"None | bool","description":""},"component":"Api","example_input":true},{"label":"6th","parameter_name":"pages","parameter_has_default":true,"parameter_default":null,"type":{"oneOf":[{"type":"null"},{"type":"string"}]},"python_type":{"type":"None | str","description":""},"component":"Api","example_input":"..."}],"returns":[{"label":"1st","type":{},"python_type":{"type":"Any","description":""},"component":"Api"}],"api_visibility":"public","description":"Render proposed redaction boxes from a *_review_file.csv onto the original PDF and return a ZIP of preview PNGs. Use this to verify box positions before calling /review_apply — no redaction is applied. Returns (zip_path, message). For agents with local files, calling tools.preview_redaction_boxes.preview_redaction_boxes() directly is faster.","code_snippets":{"python":"from gradio_client import Client\n\nclient = Client(\"http://localhost:7860\")\nresult = client.predict(\n\tpdf_file=...,\n\treview_csv_file=...,\n\tdpi=150,\n\tmax_width=1280,\n\tdraw_grid=True,\n\tpages=None,\n\tapi_name=\"/preview_boxes\",\n)\nprint(result)","javascript":"import { Client } from \"@gradio/client\";\n\nconst client = await Client.connect(\"http://localhost:7860\");\nconst result = await client.predict(\"/preview_boxes\", {\n\t\tpdf_file: ...,\n\t\treview_csv_file: ...,\n\t\tdpi: 150,\n\t\tmax_width: 1280,\n\t\tdraw_grid: True,\n\t\tpages: null,\n});\n\nconsole.log(result.data);","bash":"curl -X POST http://localhost:7860/gradio_api/call/preview_boxes -s -H \"Content-Type: application/json\" -d '{\"data\": [..., ..., 150, 1280, True, null]}' \\\n  | awk -F'\"' '{ print $4}' \\\n  | read EVENT_ID; curl -N http://localhost:7860/gradio_api/call/preview_boxes/$EVENT_ID"}}},"unnamed_endpoints":{}}