# -*- coding: utf-8 -*-
:Synopsis:          Collection of functions relating to documents (e.g.
:Usage:             ``from khorosjx.content import docs``
:Example:           ``content_id = docs.get_content_id(url)``
:Created By:        Jeff Shurtliff
:Last Modified:     Jeff Shurtliff
:Modified Date:     22 Sep 2021

import pandas as pd

from .. import core, errors
from . import base
from ..utils import core_utils
from ..places import base as places_core

# Define global variables
base_url, api_credentials = '', None

[docs]def verify_core_connection(): """This function verifies that the core connection information (Base URL and API credentials) has been defined. .. versionchanged:: 3.1.0 Refactored the function to be more pythonic and to avoid depending on a try/except block. :returns: None :raises: :py:exc:`khorosjx.errors.exceptions.KhorosJXError`, :py:exc:`khorosjx.errors.exceptions.NoCredentialsError` """ if not base_url or not api_credentials: retrieve_connection_info() return
[docs]def retrieve_connection_info(): """This function initializes and defines the global variables for the connection information. .. versionchanged:: 3.1.0 Refactored the function to be more efficient. :returns: None :raises: :py:exc:`khorosjx.errors.exceptions.KhorosJXError`, :py:exc:`khorosjx.errors.exceptions.NoCredentialsError` """ # Define the global variables at this module level global base_url global api_credentials base_url, api_credentials = core.get_connection_info() return
# Define function to get the content ID from a URL
[docs]def get_content_id(lookup_value, lookup_type='url', verify_ssl=True): """This function obtains the Content ID for a particular document. .. versionchanged:: 2.6.0 Added the ``verify_ssl`` argument. :param lookup_value: The URL of the document :type lookup_value: str :param lookup_type: The type of value is being used for lookup (``url`` by default) :type lookup_type: str :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default) :type verify_ssl: bool :returns: The Content ID for the document :raises: :py:exc:`ValueError`, :py:exc:`khorosjx.errors.exceptions.ContentNotFoundError`, :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError` """ acceptable_types = ['url', 'id', 'doc_id'] if lookup_type not in acceptable_types: raise errors.exceptions.InvalidLookupTypeError() if lookup_type != 'url': lookup_value = get_url_for_id(lookup_value) content_id = base.get_content_id(lookup_value, 'document', verify_ssl) return content_id
[docs]def get_url_for_id(doc_id): """This function constructs a full URL for a given Document ID. :param doc_id: The Document ID with which to construct the URL :type doc_id: int, str :returns: The fully constructed URL for the document (e.g. :raises: :py:exc:`TypeError`, :py:exc:`ValueError` """ verify_core_connection() url = base_url.split('api/')[0] url = f"{url}docs/DOC-{doc_id}" return url
[docs]def create_document(subject, body, place_id, categories=None, tags=None, verify_ssl=True): """This function creates a new document. .. versionchanged:: 3.1.0 Changed the default ``categories`` and ``tags`` values to ``None`` and adjusted the function accordingly. .. versionchanged:: 2.6.0 Added the ``verify_ssl`` argument. :param subject: The title/subject of the document :type subject: str :param body: The raw HTML making up the document body :type body: str :param place_id: The Place ID (aka Browse ID) of the space where the document should reside :type place_id: int, str :param categories: Any categories associated with the document (none by default) :type categories: list, None :param tags: Any tags associated with the document (none by default) :type tags: list, None :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default) :type verify_ssl: bool :returns: The API response from the POST request for the document creation :raises: :py:exc:`TypeError`, :py:exc:`khorosjx.errors.exceptions.POSTRequestError` """ # TODO: Allow the author to be specified verify_core_connection() categories = [] if not categories else categories tags = [] if not tags else tags place_uri = places_core.get_uri_for_id(place_id) content_dict = { "type": "text/html", "text": body } full_dict = { "content": content_dict, "subject": subject, "parent": place_uri, "type": "document" } if len(categories) > 0: full_dict['categories'] = categories if len(tags) > 0: full_dict['tags'] = tags payload = core_utils.convert_dict_to_json(full_dict) content_uri = f"{base_url}/contents" response = core.post_request_with_retries(content_uri, payload, verify_ssl) return response
# Define function to overwrite the body of a document
[docs]def overwrite_doc_body(url, body_html, minor_edit=True, ignore_exceptions=False, verify_ssl=True): """This function overwrites the body of a document with new HTML content. .. versionchanged:: 2.6.0 Added the ``verify_ssl`` argument. :param url: THe URL of the document to be updated :type url: str :param body_html: The new HTML body to replace the existing document body :param minor_edit: Determines whether the *Minor Edit* flag should be set (Default: ``True``) :type minor_edit: bool :param ignore_exceptions: Determines whether nor not exceptions should be ignored (Default: ``False``) :type ignore_exceptions: bool :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default) :type verify_ssl: bool :returns: The response of the PUT request used to update the document :raises: :py:exc:`khorosjx.errors.exceptions.ContentPublishError` """ # TODO: Verify and add the data type for the body_html argument in the docstring above and below # Verify that the core connection has been established verify_core_connection() # Perform the overwrite operation put_response = _perform_overwrite_operation(url, body_html, minor_edit, ignore_exceptions, verify_ssl) # Check for any 502 errors and try the function one more time if found if put_response.status_code == 502: retry_msg = "Performing the overwrite operation again in an attempt to overcome the 502 " + \ "Bad Gateway / Service Temporarily Unavailable issue that was encountered." print(retry_msg) put_response = _perform_overwrite_operation(url, body_html, minor_edit, ignore_exceptions, verify_ssl) # Return the response from the PUT query return put_response
# Define function to perform the overwrite operation def _perform_overwrite_operation(_url, _body_html, _minor_edit, _ignore_exceptions, _verify_ssl): """This function performs the actual overwrite operation on the document. .. versionchanged:: 2.6.0 Added the ``_verify_ssl`` argument and renamed the function to only have one underscore prefix. :param _url: The URI for the API request :type _url: str :param _body_html: The new HTML body to replace the existing document body :param _minor_edit: Determines whether the *Minor Edit* flag should be set (Default: ``True``) :type _minor_edit: bool :param _ignore_exceptions: Determines whether nor not exceptions should be ignored (Default: ``False``) :type _ignore_exceptions: bool :param _verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default) :type _verify_ssl: bool :returns: The response of the PUT request used to update the document :raises: :py:exc:`khorosjx.errors.exceptions.ContentPublishError` """ # Define the script name, Content ID and URI _content_id = get_content_id(_url, verify_ssl=_verify_ssl) _content_url = f"{base_url}/contents/{_content_id}" # Perform a GET request for the document to obtain its JSON _response = core.get_data('contents', _content_id) # Construct the payload from the new body HTML _doc_body_payload = {'text': _body_html} # Update the document JSON with the new body HTML _doc_json = _response.json() _doc_json['content'] = _doc_body_payload # Flag the update as a "Minor Edit" to suppress email notifications if specified if _minor_edit: _doc_json['minor'] = 'true' # Perform the PUT request with retry handling for timeouts _put_response = core.put_request_with_retries(_content_url, _doc_json, _verify_ssl) if _put_response.status_code != 200: _error_msg = f"The attempt to update the document {_url} failed with " + \ f"a {_put_response.status_code} status code." if _ignore_exceptions: print(_error_msg) else: raise errors.exceptions.ContentPublishError(_error_msg) return _put_response # Define function to get basic group information for a particular Group ID
[docs]def get_document_info(lookup_value, lookup_type='doc_id', return_fields=None, ignore_exceptions=False, verify_ssl=True): """This function obtains the group information for a given document. .. versionchanged:: 2.6.0 Added the ``verify_ssl`` argument. :param lookup_value: The value with which to look up the document :type lookup_value: int, str :param lookup_type: Identifies the type of lookup value that has been provided (Default: ``doc_id``) :type lookup_type: str :param return_fields: Specific fields to return if not all of the default fields are needed (Optional) :type return_fields: list, None :param ignore_exceptions: Determines whether nor not exceptions should be ignored (Default: ``False``) :type ignore_exceptions: bool :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default) :type verify_ssl: bool :returns: A dictionary with the group information :raises: :py:exc:`khorosjx.errors.exceptions.GETRequestError`, :py:exc:`khorosjx.errors.exceptions.InvalidDatasetError`, :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError`, :py:exc:`khorosjx.errors.exceptions.LookupMismatchError` """ # Verify that the core connection has been established verify_core_connection() # Get the Content ID if not supplied lookup_value = base.__convert_lookup_value(lookup_value, lookup_type) # Initialize the empty dictionary for the group information doc_info = {} # Perform the API query to retrieve the group information query_uri = f"{base_url}/contents/{lookup_value}?fields=@all" response = core.get_request_with_retries(query_uri, verify_ssl=verify_ssl) # Verify that the query was successful successful_response = errors.handlers.check_api_response(response, ignore_exceptions=ignore_exceptions) # Parse the data if the response was successful if successful_response: # Determine which fields to return doc_json = response.json() doc_info = core.get_fields_from_api_response(doc_json, 'document', return_fields) return doc_info
# Define function to get the attachments in a document
[docs]def get_document_attachments(lookup_value, lookup_type='doc_id', return_dataframe=False): """This function retrieves information on any attachments associated with a document. :param lookup_value: The value with which to look up the document :type lookup_value: str, int :param lookup_type: Identifies the type of lookup value that has been provided (Default: ``doc_id``) :type lookup_type: str :param return_dataframe: Determines whether or not a pandas dataframe should be returned :type return_dataframe: bool :returns: A list, dictionary or pandas dataframe depending on the number of attachments and/or function arguments :raises: :py:exc:`khorosjx.errors.exceptions.GETRequestError`, :py:exc:`khorosjx.errors.exceptions.InvalidDatasetError`, :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError`, :py:exc:`khorosjx.errors.exceptions.LookupMismatchError` """ # Verify that the core connection has been established verify_core_connection() # Get the attachments data from the API try: attachment_info = get_document_info(lookup_value, lookup_type, ['attachments']) attachment_info = attachment_info.get('attachments') attachment_info = base.__trim_attachments_info(attachment_info) # Convert the data to a dataframe if indicated if return_dataframe: column_names = list(attachment_info[0].keys()) data = [] for idx in range(0,len(attachment_info)): data.append(list(attachment_info[idx].values())) attachment_info = pd.DataFrame(data, columns=column_names) # Trim the data down to the inner dictionary if there is only one attachment elif len(attachment_info) == 1: attachment_info = attachment_info[0] # Initiate an empty list to return if no attachments are found except (IndexError, KeyError): attachment_info = [] # Return a list, dataframe or dictionary depending on the data and arguments return attachment_info
[docs]def delete_document(lookup_value, lookup_type='content_id', return_json=False): """This function deletes a document. .. versionchanged:: 3.1.0 Parenthesis were added to the exception classes and the function was refactored to be more efficient. :param lookup_value: THe value with which to identify the document. :type lookup_value: str, int :param lookup_type: Identifies the value as a ``content_id`` (default), ``doc_id`` or ``url`` :type lookup_type: str :param return_json: Determines if the API response should be returned in JSON format (``False`` by default) :type return_json: bool :returns: The API response for the DELETE request :raises: :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError` """ accepted_types = ['content_id', 'doc_id', 'url'] if lookup_type not in accepted_types: raise errors.exceptions.InvalidLookupTypeError() if lookup_type == "url": lookup_value = base.get_content_id(lookup_value) elif lookup_value == "doc_id": url = get_url_for_id(lookup_value) lookup_value = base.get_content_id(url) content_uri = f"{base_url}/contents/{lookup_value}" response = core.delete(content_uri, return_json=return_json) return response