Source code for khorosjx.content.docs

# -*- coding: utf-8 -*-
"""
:Module:            khorosjx.content.docs
:Synopsis:          Collection of functions relating to documents (e.g. https://community.example.com/docs/DOC-1234)
:Usage:             ``from khorosjx.content import docs``
:Example:           ``content_id = docs.get_content_id(url)``
:Created By:        Jeff Shurtliff
:Last Modified:     Jeff Shurtliff
:Modified Date:     22 Sep 2021
"""

import pandas as pd

from .. import core, errors
from . import base
from ..utils import core_utils
from ..places import base as places_core

# Define global variables
base_url, api_credentials = '', None


[docs]def verify_core_connection():
    """This function verifies that the core connection information (Base URL and API credentials) has been defined.

    .. versionchanged:: 3.1.0
       Refactored the function to be more pythonic and to avoid depending on a try/except block.

    :returns: None
    :raises: :py:exc:`khorosjx.errors.exceptions.KhorosJXError`,
             :py:exc:`khorosjx.errors.exceptions.NoCredentialsError`
    """
    if not base_url or not api_credentials:
        retrieve_connection_info()
    return


[docs]def retrieve_connection_info():
    """This function initializes and defines the global variables for the connection information.

    .. versionchanged:: 3.1.0
       Refactored the function to be more efficient.

    :returns: None
    :raises: :py:exc:`khorosjx.errors.exceptions.KhorosJXError`,
             :py:exc:`khorosjx.errors.exceptions.NoCredentialsError`
    """
    # Define the global variables at this module level
    global base_url
    global api_credentials
    base_url, api_credentials = core.get_connection_info()
    return


# Define function to get the content ID from a URL
[docs]def get_content_id(lookup_value, lookup_type='url', verify_ssl=True):
    """This function obtains the Content ID for a particular document.

    .. versionchanged:: 2.6.0
       Added the ``verify_ssl`` argument.

    :param lookup_value: The URL of the document
    :type lookup_value: str
    :param lookup_type: The type of value is being used for lookup (``url`` by default)
    :type lookup_type: str
    :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default)
    :type verify_ssl: bool
    :returns: The Content ID for the document
    :raises: :py:exc:`ValueError`, :py:exc:`khorosjx.errors.exceptions.ContentNotFoundError`,
             :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError`
    """
    acceptable_types = ['url', 'id', 'doc_id']
    if lookup_type not in acceptable_types:
        raise errors.exceptions.InvalidLookupTypeError()
    if lookup_type != 'url':
        lookup_value = get_url_for_id(lookup_value)
    content_id = base.get_content_id(lookup_value, 'document', verify_ssl)
    return content_id


[docs]def get_url_for_id(doc_id):
    """This function constructs a full URL for a given Document ID.

    :param doc_id: The Document ID with which to construct the URL
    :type doc_id: int, str
    :returns: The fully constructed URL for the document (e.g. https://community.example.com/docs/DOC-1234)
    :raises: :py:exc:`TypeError`, :py:exc:`ValueError`
    """
    verify_core_connection()
    url = base_url.split('api/')[0]
    url = f"{url}docs/DOC-{doc_id}"
    return url


[docs]def create_document(subject, body, place_id, categories=None, tags=None, verify_ssl=True):
    """This function creates a new document.

    .. versionchanged:: 3.1.0
       Changed the default ``categories`` and ``tags`` values to ``None`` and adjusted the function accordingly.

    .. versionchanged:: 2.6.0
       Added the ``verify_ssl`` argument.

    :param subject: The title/subject of the document
    :type subject: str
    :param body: The raw HTML making up the document body
    :type body: str
    :param place_id: The Place ID (aka Browse ID) of the space where the document should reside
    :type place_id: int, str
    :param categories: Any categories associated with the document (none by default)
    :type categories: list, None
    :param tags: Any tags associated with the document (none by default)
    :type tags: list, None
    :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default)
    :type verify_ssl: bool
    :returns: The API response from the POST request for the document creation
    :raises: :py:exc:`TypeError`, :py:exc:`khorosjx.errors.exceptions.POSTRequestError`
    """
    # TODO: Allow the author to be specified
    verify_core_connection()
    categories = [] if not categories else categories
    tags = [] if not tags else tags
    place_uri = places_core.get_uri_for_id(place_id)
    content_dict = {
        "type": "text/html",
        "text": body
    }
    full_dict = {
        "content": content_dict,
        "subject": subject,
        "parent": place_uri,
        "type": "document"
    }
    if len(categories) > 0:
        full_dict['categories'] = categories
    if len(tags) > 0:
        full_dict['tags'] = tags
    payload = core_utils.convert_dict_to_json(full_dict)
    content_uri = f"{base_url}/contents"
    response = core.post_request_with_retries(content_uri, payload, verify_ssl)
    return response


# Define function to overwrite the body of a document
[docs]def overwrite_doc_body(url, body_html, minor_edit=True, ignore_exceptions=False, verify_ssl=True):
    """This function overwrites the body of a document with new HTML content.

    .. versionchanged:: 2.6.0
       Added the ``verify_ssl`` argument.

    :param url: THe URL of the document to be updated
    :type url: str
    :param body_html: The new HTML body to replace the existing document body
    :param minor_edit: Determines whether the *Minor Edit* flag should be set (Default: ``True``)
    :type minor_edit: bool
    :param ignore_exceptions: Determines whether nor not exceptions should be ignored (Default: ``False``)
    :type ignore_exceptions: bool
    :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default)
    :type verify_ssl: bool
    :returns: The response of the PUT request used to update the document
    :raises: :py:exc:`khorosjx.errors.exceptions.ContentPublishError`
    """
    # TODO: Verify and add the data type for the body_html argument in the docstring above and below
    # Verify that the core connection has been established
    verify_core_connection()

    # Perform the overwrite operation
    put_response = _perform_overwrite_operation(url, body_html, minor_edit, ignore_exceptions, verify_ssl)

    # Check for any 502 errors and try the function one more time if found
    if put_response.status_code == 502:
        retry_msg = "Performing the overwrite operation again in an attempt to overcome the 502 " + \
                    "Bad Gateway / Service Temporarily Unavailable issue that was encountered."
        print(retry_msg)
        put_response = _perform_overwrite_operation(url, body_html, minor_edit, ignore_exceptions, verify_ssl)

    # Return the response from the PUT query
    return put_response


# Define function to perform the overwrite operation
def _perform_overwrite_operation(_url, _body_html, _minor_edit, _ignore_exceptions, _verify_ssl):
    """This function performs the actual overwrite operation on the document.

    .. versionchanged:: 2.6.0
       Added the ``_verify_ssl`` argument and renamed the function to only have one underscore prefix.

    :param _url: The URI for the API request
    :type _url: str
    :param _body_html: The new HTML body to replace the existing document body
    :param _minor_edit: Determines whether the *Minor Edit* flag should be set (Default: ``True``)
    :type _minor_edit: bool
    :param _ignore_exceptions: Determines whether nor not exceptions should be ignored (Default: ``False``)
    :type _ignore_exceptions: bool
    :param _verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default)
    :type _verify_ssl: bool
    :returns: The response of the PUT request used to update the document
    :raises: :py:exc:`khorosjx.errors.exceptions.ContentPublishError`
    """
    # Define the script name, Content ID and URI
    _content_id = get_content_id(_url, verify_ssl=_verify_ssl)
    _content_url = f"{base_url}/contents/{_content_id}"

    # Perform a GET request for the document to obtain its JSON
    _response = core.get_data('contents', _content_id)

    # Construct the payload from the new body HTML
    _doc_body_payload = {'text': _body_html}

    # Update the document JSON with the new body HTML
    _doc_json = _response.json()
    _doc_json['content'] = _doc_body_payload

    # Flag the update as a "Minor Edit" to suppress email notifications if specified
    if _minor_edit:
        _doc_json['minor'] = 'true'

    # Perform the PUT request with retry handling for timeouts
    _put_response = core.put_request_with_retries(_content_url, _doc_json, _verify_ssl)
    if _put_response.status_code != 200:
        _error_msg = f"The attempt to update the document {_url} failed with " + \
                    f"a {_put_response.status_code} status code."
        if _ignore_exceptions:
            print(_error_msg)
        else:
            raise errors.exceptions.ContentPublishError(_error_msg)
    return _put_response


# Define function to get basic group information for a particular Group ID
[docs]def get_document_info(lookup_value, lookup_type='doc_id', return_fields=None, ignore_exceptions=False, verify_ssl=True):
    """This function obtains the group information for a given document.

    .. versionchanged:: 2.6.0
       Added the ``verify_ssl`` argument.

    :param lookup_value: The value with which to look up the document
    :type lookup_value: int, str
    :param lookup_type: Identifies the type of lookup value that has been provided (Default: ``doc_id``)
    :type lookup_type: str
    :param return_fields: Specific fields to return if not all of the default fields are needed (Optional)
    :type return_fields: list, None
    :param ignore_exceptions: Determines whether nor not exceptions should be ignored (Default: ``False``)
    :type ignore_exceptions: bool
    :param verify_ssl: Determines if API calls should verify SSL certificates (``True`` by default)
    :type verify_ssl: bool
    :returns: A dictionary with the group information
    :raises: :py:exc:`khorosjx.errors.exceptions.GETRequestError`,
             :py:exc:`khorosjx.errors.exceptions.InvalidDatasetError`,
             :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError`,
             :py:exc:`khorosjx.errors.exceptions.LookupMismatchError`
    """
    # Verify that the core connection has been established
    verify_core_connection()

    # Get the Content ID if not supplied
    lookup_value = base.__convert_lookup_value(lookup_value, lookup_type)

    # Initialize the empty dictionary for the group information
    doc_info = {}

    # Perform the API query to retrieve the group information
    query_uri = f"{base_url}/contents/{lookup_value}?fields=@all"
    response = core.get_request_with_retries(query_uri, verify_ssl=verify_ssl)

    # Verify that the query was successful
    successful_response = errors.handlers.check_api_response(response, ignore_exceptions=ignore_exceptions)

    # Parse the data if the response was successful
    if successful_response:
        # Determine which fields to return
        doc_json = response.json()
        doc_info = core.get_fields_from_api_response(doc_json, 'document', return_fields)
    return doc_info


# Define function to get the attachments in a document
[docs]def get_document_attachments(lookup_value, lookup_type='doc_id', return_dataframe=False):
    """This function retrieves information on any attachments associated with a document.

    :param lookup_value: The value with which to look up the document
    :type lookup_value: str, int
    :param lookup_type: Identifies the type of lookup value that has been provided (Default: ``doc_id``)
    :type lookup_type: str
    :param return_dataframe: Determines whether or not a pandas dataframe should be returned
    :type return_dataframe: bool
    :returns: A list, dictionary or pandas dataframe depending on the number of attachments and/or function arguments
    :raises: :py:exc:`khorosjx.errors.exceptions.GETRequestError`,
             :py:exc:`khorosjx.errors.exceptions.InvalidDatasetError`,
             :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError`,
             :py:exc:`khorosjx.errors.exceptions.LookupMismatchError`
    """
    # Verify that the core connection has been established
    verify_core_connection()

    # Get the attachments data from the API
    try:
        attachment_info = get_document_info(lookup_value, lookup_type, ['attachments'])
        attachment_info = attachment_info.get('attachments')
        attachment_info = base.__trim_attachments_info(attachment_info)

        # Convert the data to a dataframe if indicated
        if return_dataframe:
            column_names = list(attachment_info[0].keys())
            data = []
            for idx in range(0,len(attachment_info)):
                data.append(list(attachment_info[idx].values()))
            attachment_info = pd.DataFrame(data, columns=column_names)

        # Trim the data down to the inner dictionary if there is only one attachment
        elif len(attachment_info) == 1:
            attachment_info = attachment_info[0]

    # Initiate an empty list to return if no attachments are found
    except (IndexError, KeyError):
        attachment_info = []

    # Return a list, dataframe or dictionary depending on the data and arguments
    return attachment_info


[docs]def delete_document(lookup_value, lookup_type='content_id', return_json=False):
    """This function deletes a document.

    .. versionchanged:: 3.1.0
       Parenthesis were added to the exception classes and the function was refactored to be more efficient.

    :param lookup_value: THe value with which to identify the document.
    :type lookup_value: str, int
    :param lookup_type: Identifies the value as a ``content_id`` (default), ``doc_id`` or ``url``
    :type lookup_type: str
    :param return_json: Determines if the API response should be returned in JSON format (``False`` by default)
    :type return_json: bool
    :returns: The API response for the DELETE request
    :raises: :py:exc:`khorosjx.errors.exceptions.InvalidLookupTypeError`
    """
    accepted_types = ['content_id', 'doc_id', 'url']
    if lookup_type not in accepted_types:
        raise errors.exceptions.InvalidLookupTypeError()
    if lookup_type == "url":
        lookup_value = base.get_content_id(lookup_value)
    elif lookup_value == "doc_id":
        url = get_url_for_id(lookup_value)
        lookup_value = base.get_content_id(url)
    content_uri = f"{base_url}/contents/{lookup_value}"
    response = core.delete(content_uri, return_json=return_json)
    return response