Skip to content

Commit

Permalink
include file size information
Browse files Browse the repository at this point in the history
  • Loading branch information
jaydeluca committed Sep 19, 2024
1 parent cbef6a3 commit a601e7d
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 46 deletions.
4 changes: 4 additions & 0 deletions CodeFile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class CodeFile:
def __init__(self, path: str, size: int):
self.path = path
self.size = size
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,5 @@ update-example:
python3 main.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -s "2022-11-15" -i 14 -o "./media/example_output2.png"
python3 count_by_instrumentation.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -o "./media/example_pie_output.png"



.PHONY: all
all: install test lint
23 changes: 15 additions & 8 deletions count_by_instrumentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_commit_by_date(self, repository, date):

def get_repository_by_commit(self, repository, commit):
repo_data = self.client.get_repository_at_commit(repository, commit)
repo_data = self.data_filter.parse_data(repo_data)
repo_data = self.data_filter.get_file_counts_and_lengths(repo_data)

return repo_data

Expand All @@ -42,26 +42,33 @@ def main(args):
repository=args.repo,
commit=commit
)
count = count_by_language_and_file_extension(files=repo_files["files"],
languages=[args.language])
file_counts, file_sizes = count_by_language_and_file_extension(
files=repo_files["files"],
languages=[args.language])

df = pd.DataFrame(list(count.items()), columns=['Key', 'Value'])
# Print the table showing file counts and sizes
data = [(key, file_counts[key], file_sizes[key]) for key in file_counts.keys()]
df2 = pd.DataFrame(data, columns=['Key', 'File Count', 'Total File Size'])
df2 = df2.sort_values(by='Total File Size', key=lambda col: col.astype(int),
ascending=False)

print(df2.to_markdown(index=False))
print(f"| Total | {df2['File Count'].sum()} | {df2['Total File Size'].sum()} |")

# Create a pie chart for file counts only
df = pd.DataFrame(list(file_counts.items()), columns=['Key', 'Value'])
df = df.sort_values(by='Value', key=lambda col: col.astype(int), ascending=False)

sns.set_theme()
colors = sns.color_palette('pastel')[0:len(df)]

# Create a pie chart
explode = [0.05] * len(df) # this will "explode" each slice from the pie
df.set_index('Key')['Value'].plot.pie(autopct='%1.0f%%', colors=colors,
explode=explode)

plt.title(f'Remaining {args.language} files by Instrumentation')
plt.ylabel('')

print(df.to_markdown(index=False))
print(f"| Total | {df['Value'].sum()} |")

if args.output is not None:
plt.savefig(args.output)
else:
Expand Down
16 changes: 16 additions & 0 deletions data_filter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List

from CodeFile import CodeFile


class DataFilter:

Expand Down Expand Up @@ -33,3 +35,17 @@ def parse_data(self, payload):
"files": data_result
}
return json_result

def get_file_counts_and_lengths(self, payload):
data_result = []
tree = payload["tree"]
for i in tree:
if self.matches_meta(i) \
and self.matches_file_extensions(i["path"]) \
and self.matches_directory(i["path"]):
data_result.append(CodeFile(path=i["path"], size=i["size"]))

json_result = {
"files": data_result
}
return json_result
Binary file added latest.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified media/example_output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified media/example_output2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified media/example_pie_output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
55 changes: 24 additions & 31 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,37 +103,30 @@ In the `open-telemetry/opentelemetry-java-instrumentation` repository, analyze t

Output:

| Key | Value |
|:------------------|------:|
| spring | 52 |
| jaxrs | 37 |
| servlet | 23 |
| restlet | 22 |
| couchbase | 18 |
| aws-sdk | 17 |
| ratpack | 16 |
| elasticsearch | 15 |
| play | 15 |
| jaxws | 15 |
| vertx | 14 |
| mongo | 10 |
| jdbc | 8 |
| apache-dubbo-2.7 | 7 |
| jaxrs-client | 5 |
| netty | 5 |
| apache-httpclient | 3 |
| opentelemetry-api | 3 |
| grizzly-2.3 | 3 |
| grails-3.0 | 3 |
| undertow-1.4 | 3 |
| kafka | 3 |
| internal | 2 |
| dropwizard | 2 |
| hibernate | 1 |
| rediscala-1.8 | 1 |
| spymemcached-2.12 | 1 |
| twilio-6.6 | 1 |
| Total | 305 |
| Key | File Count | Total File Size |
|:------------------|-----------:|----------------:|
| servlet | 23 | 143182 |
| aws-sdk | 17 | 127827 |
| jdbc | 8 | 90890 |
| elasticsearch | 15 | 90341 |
| jaxrs | 37 | 72352 |
| vertx | 14 | 56845 |
| ratpack | 16 | 51932 |
| mongo | 10 | 51661 |
| play | 15 | 48986 |
| restlet | 22 | 38226 |
| jaxws | 17 | 21595 |
| kafka | 3 | 21505 |
| twilio-6.6 | 1 | 18936 |
| jaxrs-client | 5 | 16067 |
| spymemcached-2.12 | 1 | 15630 |
| undertow-1.4 | 3 | 12754 |
| hibernate | 1 | 12167 |
| dropwizard | 2 | 10789 |
| rediscala-1.8 | 1 | 3898 |
| grails-3.0 | 3 | 3201 |
| internal | 2 | 2603 |
| Total | 216 | 911387 |

![Example](./media/example_pie_output.png)

Expand Down
15 changes: 10 additions & 5 deletions utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from collections import defaultdict
from typing import List, Dict

from CodeFile import CodeFile


def get_dates_between(start_date_str, end_date, interval):
date_format = "%Y-%m-%d"
Expand Down Expand Up @@ -37,17 +39,20 @@ def count_by_file_extension(files: List[str], languages: List[str]) -> dict:
return file_counts


def count_by_language_and_file_extension(files: List[str], languages: List[str]) -> Dict[str, Dict[str, int]]:
counts = defaultdict(int)
def count_by_language_and_file_extension(files: List[CodeFile], languages: List[str]) -> Dict[str, Dict[str, int]]:
file_counts = defaultdict(int)
file_sizes = defaultdict(int)
for file in files:
file_parts = file.split('/')
file_parts = file.path.split('/')
if len(file_parts) < 3:
continue
instrumentation = file_parts[1]
extension = file_parts[-1].split('.')[-1]
if extension in languages:
counts[instrumentation] += 1
return counts
file_counts[instrumentation] += 1
file_sizes[instrumentation] += file.size
return file_counts, file_sizes


def convert_to_plot(input_dict: dict, items):
result = {}
Expand Down

0 comments on commit a601e7d

Please sign in to comment.