Source code for khorosjx.utils.df_utils

# -*- coding: utf-8 -*-
"""
:Module:        khorosjx.utils.df_utils
:Synopsis:      Useful tools and utilities to assist in importing, manipulating and exporting pandas dataframes
:Usage:         ``from khorosjx import df_utils``
:Example:       TBD
:Created By:    Jeff Shurtliff
:Last Modified: Jeff Shurtliff
:Modified Date: 18 Dec 2019
"""

import pandas as pd


# Define function to convert a list of dictionaries to a pandas dataframe
[docs]def convert_dict_list_to_dataframe(dict_list, column_names=[]): """This function converts a list of dictionaries into a pandas dataframe. :param dict_list: List of dictionaries :type dict_list: list :param column_names: The column names for the dataframe (Optional) :type column_names: list :returns: A pandas dataframe of the data """ # Identify the dataframe column names if len(column_names) == 0: for field_name in dict_list[0].keys(): column_names.append(field_name) # Identify the data for each column df_data = [] for idx in range(0, len(dict_list)): row_data = [] for field_value in dict_list[idx].values(): row_data.append(field_value) df_data.append(row_data) # Create and return the dataframe dataframe = pd.DataFrame(df_data, columns=column_names) return dataframe
[docs]def import_csv(file_path, delimiter=",", column_names=[], columns_to_return=[], has_headers=True): """This function imports a CSV file to generate a dataframe. :param file_path: The absolute path to the CSV file to be imported :type file_path: str :param delimiter: The column delimiter utilized in the CSV :type delimiter: str :param column_names: The column names to use with the imported dataframe (Optional) :type column_names: list :param columns_to_return: Determines which of the columns should actually be returned (Default: all columns) :param has_headers: Defines whether or not the data in the file has column headers (Default: ``True``) :type has_headers: bool :returns: The imported data as a pandas dataframe :raises: FileNotFoundError, TypeError """ # Determine the appropriate use case and then import and return the dataframe if has_headers is False and len(column_names) == 0: if len(columns_to_return) == 0: # Use Case: Headless dataframe = pd.read_csv(file_path, sep=delimiter, header=None) else: # Use Case: Headless Filtered dataframe = pd.read_csv(file_path, sep=delimiter, header=None, usecols=columns_to_return) else: if len(column_names) > 0 and len(columns_to_return) > 0: # Use Case: Custom Filtered dataframe = pd.read_csv(file_path, sep=delimiter, names=column_names) dataframe = dataframe[columns_to_return] elif len(column_names) > 0 and len(columns_to_return) == 0: # Use Case: Custom dataframe = pd.read_csv(file_path, sep=delimiter, names=column_names) elif len(column_names) == 0 and len(columns_to_return) > 0: # Use Case: Filtered dataframe = pd.read_csv(file_path, sep=delimiter, usecols=columns_to_return) else: # Use Case: Default dataframe = pd.read_csv(file_path, sep=delimiter) return dataframe
[docs]def import_excel(file_path, excel_sheet='', use_first_sheet=False, column_names=[], columns_to_return=[], has_headers=True): """This function imports a Microsoft Excel file to generate a dataframe. :param file_path: The absolute path to the Excel file to be imported :type file_path: str :param excel_sheet: The name of the specific sheet in the file to import :type excel_sheet: str :param use_first_sheet: Defines whether or not the first sheet in the file should be used (Default: ``False``) :type use_first_sheet: bool :param column_names: The column names to use with the imported dataframe (Optional) :type column_names: list :param columns_to_return: Determines which of the columns should actually be returned (Default: all columns) :param has_headers: Defines whether or not the data in the file has column headers (Default: ``True``) :type has_headers: bool :returns: The imported data as a pandas dataframe :raises: FileNotFoundError, TypeError """ # Determine the appropriate use case and then import and return the dataframe if excel_sheet != "" and use_first_sheet is False: if has_headers is False and len(column_names) == 0: if columns_to_return == 0: # Use Case: Headless excel_data = pd.read_excel(file_path, sheet_name=excel_sheet, header=None) else: # Use Case: Headless Filtered excel_data = pd.read_excel(file_path, sheet_name=excel_sheet, header=None, usecols=columns_to_return) else: if len(column_names) > 0 and len(columns_to_return) > 0: # Use Case: Custom Filtered excel_data = pd.read_excel(file_path, names=column_names) excel_data = excel_data[columns_to_return] elif len(column_names) > 0 and len(columns_to_return) == 0: # Use Case: Custom excel_data = pd.read_excel(file_path, names=column_names) elif len(column_names) == 0 and len(columns_to_return) > 0: # Use Case: Filtered excel_data = pd.read_excel(file_path, usecols=columns_to_return) else: # Use Case: Default excel_data = pd.read_excel(file_path) if use_first_sheet: # Use Case: Use First Sheet excel_data = excel_data[0] return excel_data