apache · ChinmayHegde24 · Mar 30, 2026
diff --git a/ranger-tools/src/main/python/README.md b/ranger-tools/src/main/python/README.md
@@ -22,7 +22,7 @@ under the License.
 Run the below command to generate pydocs for the package. Code base has doc strings describing the methods and classes from which the document is generated.
 
 ```bash
-> python -m pydoc -b
+> python3 -m pydoc -b
 ```
 
 Other README files can be found in the following directory:
@@ -41,11 +41,7 @@ or
 
 ## Client side Installation
 
-Use the package manager [pip](https://pip.pypa.io/en/stable/) to install requirements for running the performance tests.
-Ensure right path to requirements.txt is given.
-
 ```bash
-> pip install -r requirements.txt
 
 > apt-get install sshpass 
 or
@@ -54,11 +50,21 @@ or
 
 
 ## Usage
+
 ```cd``` into ```python``` directory before executing below commands
 
+Ensure you have ```Python 3.11``` installed.
+It is recommended to create a virtual environment using this version and work inside it.
+
+Use the package manager [pip](https://pip.pypa.io/en/stable/) to install requirements for running the performance tests.
+
+```bash 
+> pip install -r requirements.txt
+```
+
 First time usage or to reset the config files:
 ```bash 
-> python setup_performance_analyzer.py
+> python3 setup_performance_analyzer.py
 ```
 
 Subsequent usage:
@@ -77,7 +83,7 @@ For single api testing (Command line arguments override config file values)
 usage:
 
 ```bash
-> python performance_analyzer.py --ranger_url <ranger_url> --calls <number of times to call api> --api <name of function of python client corresponding to api> --username <Auth username> --password <Auth password> --client_ip <client ip address>  --ssh_host <ranger host to connect for ssh> --ssh_user <Server user e.g. root> --ssh_password <Server password>
+> python3 performance_analyzer.py --ranger_url <ranger_url> --calls <number of times to call api> --api <name of function of python client corresponding to api> --username <Auth username> --password <Auth password> --client_ip <client ip address>  --ssh_host <ranger host to connect for ssh> --ssh_user <Server user e.g. root> --ssh_password <Server password>
 ```
 
 Example command:
@@ -92,4 +98,4 @@ System metrics on server side are collected using [vmstat](https://phoenixnap.co
 ## Warnings
 Ensure sudo/root privileges for the user on the server side for vmstat command.
 
-Ensure VPN is enables and client can communicate with the server. Else, in some cases stale values from previous successful run of the tool may be presented 
+Ensure VPN is enabled and client can communicate with the server. Else, in some cases stale values from previous successful run of the tool may be presented. 
diff --git a/ranger-tools/src/main/python/performance_analyzer.py b/ranger-tools/src/main/python/performance_analyzer.py
@@ -144,6 +144,16 @@ def performance_analyzer_main(argv_dict):
 
             with open("performance_report.csv", "w") as f:
                 aligned_df.to_csv(f, index=False)
+                access_df['latency'] = pd.to_numeric(access_df['latency'], errors='coerce')
+                avg_latency_df = access_df.groupby('type')['latency'].agg(
+                    count='count',
+                    avg_latency_ms='mean',
+                    min_latency_ms='min',
+                    max_latency_ms='max',
+                    median_latency_ms='median'
+                ).reset_index().round(2)
+                f.write("\n\nAverage Latency by API Type\n")
+                avg_latency_df.to_csv(f, index=False)
 
             with open("performance_report.html", "w") as f:
                 cm = sns.light_palette("red", as_cmap=True)
@@ -154,8 +164,8 @@ def performance_analyzer_main(argv_dict):
                                                    access_logs_timestamp_col_name='time', merge=False)
             print(aligned_df.to_string())
 
-            statistics_df_access = aligned_df.describe()
-            statistics_df_system = system_df.describe()
+            statistics_df_access = aligned_df.describe().add_prefix("access__")
+            statistics_df_system = system_df.describe().add_prefix("system__")
 
             statistics_df = pd.concat([statistics_df_access, statistics_df_system], axis=1)
             df_utils.rename_rows(statistics_df, {"25%": "25th_percentile", "50%": "median", "75%": "75th_percentile"})
@@ -179,6 +189,16 @@ def performance_analyzer_main(argv_dict):
 
             with open(perf_globals.OUTPUT_DIR+"performance_report.csv", "w") as f:
                 aligned_df.to_csv(f, index=False)
+                access_df['latency'] = pd.to_numeric(access_df['latency'], errors='coerce')
+                avg_latency_df = access_df.groupby('type')['latency'].agg(
+                    count='count',
+                    avg_latency_ms='mean',
+                    min_latency_ms='min',
+                    max_latency_ms='max',
+                    median_latency_ms='median'
+                ).reset_index().round(2)
+                f.write("\n\nAverage Latency by API Type\n")
+                avg_latency_df.to_csv(f, index=False)
 
             with open(perf_globals.OUTPUT_DIR+"performance_report.html", "w") as f:
                 cm = sns.light_palette("red", as_cmap=True)

diff --git a/ranger-tools/src/main/python/ranger_performance_tool/ranger_perf_utils/dataframe_utils.py b/ranger-tools/src/main/python/ranger_performance_tool/ranger_perf_utils/dataframe_utils.py
@@ -50,8 +50,8 @@ def truncate_dataframe(self, data, start_time, end_time, timestamp_col_name='tim
         :param timestamp_col_name: Column name of the timestamp column
         :return: Pandas dataframe with truncated data
         """
-        mask = (pd.to_datetime(data[timestamp_col_name]) >= pd.to_datetime(start_time, infer_datetime_format=True)) & (
-                    pd.to_datetime(data[timestamp_col_name]) <= pd.to_datetime(end_time, infer_datetime_format=True))
+        mask = (pd.to_datetime(data[timestamp_col_name]) >= pd.to_datetime(start_time)) & (
+                    pd.to_datetime(data[timestamp_col_name]) <= pd.to_datetime(end_time))
         return data.loc[mask]
 
     def align_dataframes(self, system_logs_df, access_logs_df, system_logs_timestamp_col_name = 'time',

diff --git a/ranger-tools/src/main/python/requirements.txt b/ranger-tools/src/main/python/requirements.txt
@@ -7,16 +7,16 @@ idna==3.7
 Jinja2==3.1.6
 kiwisolver==1.4.4
 MarkupSafe==2.1.1
-matplotlib==3.5.3
-numpy==1.23.1
+matplotlib==3.10.8
+numpy==1.26.0
 packaging==21.3
-pandas==1.4.3
+pandas==2.1.0
 Pillow==10.4.0
 pyparsing==3.0.9
 python-dateutil==2.8.2
 pytz==2022.2
 requests==2.32.5
 scipy==1.13.0
-seaborn==0.11.2
+seaborn==0.13.2
 six==1.16.0
 urllib3==2.6.3