Source code for pandas_diff.pandas_diff

"""Main module."""
import pandas as pd
from pandas_diff.pre_process import pre_process
from pandas_diff.process_results import format_results_modify , format_results_create_delete

[docs]def get_diffs(before : pd.DataFrame, after: pd.DataFrame, keys: list, ignore_columns=[]): """ Generate DataFrame with differences between two DataFrames Args: before (pd.DataFrame): Before DataFrame after(pd.DataFrame): After DataFrame keys (list): Key fields ignore_columns (list): Columns to not be considered for modify options (optional) """ results = [] A,B = before, after A,B , keys = pre_process(A, B, keys) A["___keys"] = [ str(d) for d in A[keys].to_dict(orient="records") ] B["___keys"] = [ str(d) for d in B[keys].to_dict(orient="records") ] # Added elements are in B but not in A deleted_keys = list( set(A["___keys"].values ) - set(B["___keys"].values )) # Deleted elements are in A but not in B added_keys = list( set(B["___keys"].values ) - set(A["___keys"].values )) A = A.set_index('___keys') B = B.set_index('___keys') results = [] for added_key in added_keys: result = format_results_create_delete(B.loc[added_key,:] ,"create",keys) results.append(result) for deleted_key in deleted_keys: result = format_results_create_delete(A.loc[deleted_key,:],"delete",keys) results.append(result) common_keys = list(set(A.index.values) & set(B.index.values)) columns_not_keys = list(set(A.columns.values) - set(keys)) for common_key in common_keys: for col in columns_not_keys: if col in ignore_columns: continue # Check if the value has changed are_different_non_null_values = A.loc[common_key,col] != B.loc[common_key,col] and not ( pd.isna(A.loc[common_key,col] ) and pd.isna(B.loc[common_key,col] ) ) if are_different_non_null_values : result = format_results_modify( row = B.loc[common_key,:], keys = keys, attribute_changed = col, old_value = A.loc[common_key,col], new_value = B.loc[common_key,col] ) results.append(result) df = pd.DataFrame(results) return df