Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions estimators/file_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ def calculate_migration_eta(self, data: Dict[str, Any]) -> float:
"""Calculates duration in HOURS based on batching throughput constraints."""
items = data.get("items", {})
batch_corpus_size = sum(item["size"] for item in items)
batch_resource_count = sum(item["files"] + item["folders"] + item["shortcuts"] for item in items)
batch_resource_count = sum(item["files"] for item in items)

average_batch_file_size = batch_corpus_size / batch_resource_count if batch_resource_count > 0 else 0

max_qps_from_file_size = data.get("FILES_GLOBAL_CORPUS_SIZE_LIMIT") / average_batch_file_size if average_batch_file_size > 0 else data.get("FILES_GLOBAL_CORPUS_SIZE_LIMIT")
max_qps_from_license_counts = data.get("FILES_GLOBAL_COUNT_LIMIT")

global_count_limit = data.get("FILES_GLOBAL_COUNT_LIMIT")
global_corpus_size_limit = data.get("FILES_GLOBAL_CORPUS_SIZE_LIMIT")
qps = min(max_qps_from_license_counts, max_qps_from_file_size)

# Since there is no per site throttling limit using the min of the global limits by count, corpus
seconds_by_count = batch_resource_count / global_count_limit
seconds_by_size = batch_corpus_size / global_corpus_size_limit

total_seconds = max(seconds_by_count, seconds_by_size)
return total_seconds / 3600.0
time_in_seconds = batch_resource_count / qps
return time_in_seconds / 3600

def calculate_resource_metrics(
self,
Expand Down
48 changes: 36 additions & 12 deletions ui/files_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_bucket_column_header(low, high):
class FileMigrationEstimatorTool(MigrationEstimatorTool):
def __init__(self):
try:
self.show_eta = os.environ.get("SHOW_ETA", "false").lower() == "true"
self.show_eta = os.environ.get("SHOW_ETA", "true").lower() == "true"
except:
self.show_eta = False

Expand All @@ -66,6 +66,8 @@ def setup_variables(self):
super().setup_variables()
self.include_personal_sites = ctk.BooleanVar(value=True)
self.include_team_sites = ctk.BooleanVar(value=False)
self.eta_min_users = ctk.IntVar(value=1000)
self.eta_max_users = ctk.IntVar(value=5000)

def _is_valid_email(self, val):
return bool(re.match(r'^[^@]+@[^@]+\.[^@]+$', val))
Expand Down Expand Up @@ -167,10 +169,6 @@ def build_config_view(self):
# Concurrency settings
ui_utils.build_concurrency_settings_slider(self, ctk, useConcurrencyHeading=True)

# Migration Plan Options
if self.show_eta:
ui_utils.build_migration_plan_options(self, ctk)

def update_progress(self, msg):
if isinstance(msg, str):
self.log_buffer.append(msg)
Expand Down Expand Up @@ -458,7 +456,7 @@ def execute_migration_scan(self, config):
df = pd.DataFrame(site_data)

if self.show_eta:
df_final, batches_list, total_eta, buckets = self.calculate_migration_batches(df)
df_final, batches_list, total_eta, buckets = self.calculate_migration_batches(df, file_metrics.get("licenseMetrics", {}))

file_metrics["batches"] = batches_list
file_metrics["buckets"] = buckets
Expand Down Expand Up @@ -506,6 +504,7 @@ def execute_migration_scan(self, config):

# Create resolved copy of DataFrame for export to CSV and batches
df_output = base_df.copy()
original_site_ids = df_output["Site Id"].copy()
df_output["Site Id"] = df_output["Site Id"].apply(self._get_display_name)
df_output["Corpus Size"] = df_output["Corpus Size"].apply(self.format_size)
df_output.rename(columns={"Site Id": "Site URL/Name"}, inplace=True)
Expand All @@ -525,9 +524,15 @@ def execute_migration_scan(self, config):
if not batch:
continue
batch_data = df_output[df_output["Suggested Batch"] == batch].copy()
batch_export = batch_data[["Site URL/Name"]].rename(
columns={"Site URL/Name": "Source SharePoint Site ID/URL"}
)
mapping = file_metrics.get("siteIdToMail")
if mapping:
batch_orig_ids = original_site_ids.loc[batch_data.index]
entities = batch_orig_ids.map(mapping).fillna(batch_data["Site URL/Name"])
batch_export = pd.DataFrame({"Entity": entities})
else:
batch_export = batch_data[["Site URL/Name"]].rename(
columns={"Site URL/Name": "Entity"}
)
safe_name = batch.replace(" ", "")
batch_path = os.path.join(batches_dir, f"{safe_name}.csv")
batch_export.to_csv(batch_path, index=False)
Expand Down Expand Up @@ -560,7 +565,7 @@ def build_progress_view(self):
def build_results_view(self):
super().build_results_view()

def calculate_migration_batches(self, df):
def calculate_migration_batches(self, df, licenseMetrics):
# Ensure numeric columns
if "Resource Count" not in df.columns:
df["Resource Count"] = 0
Expand Down Expand Up @@ -594,6 +599,22 @@ def calculate_migration_batches(self, df):
min_batches_seen = float("inf")

def get_batch_eta(subset_df):
def _get_qps_from_license_count():
# Calculate number of licenses required
license_count = licenseMetrics.get("totalAllotedUnits", {}).get("User", 0) + licenseMetrics.get("totalAllotedUnits", {}).get("Company", 0)
if license_count <= 1000:
qps = 4.8
elif license_count <= 5000:
qps = 9.6
elif license_count <= 15000:
qps = 14.4
elif license_count <= 50000:
qps = 19.2
else:
qps = 24

return qps

estimator = self.factory.get_files_estimator()
items = []
for _, row in subset_df.iterrows():
Expand All @@ -606,7 +627,7 @@ def get_batch_eta(subset_df):

data = {
"items": items,
"FILES_GLOBAL_COUNT_LIMIT": FILES_GLOBAL_COUNT_LIMIT,
"FILES_GLOBAL_COUNT_LIMIT": _get_qps_from_license_count(),
"FILES_GLOBAL_CORPUS_SIZE_LIMIT": FILES_GLOBAL_CORPUS_SIZE_LIMIT,
}
return estimator.calculate_migration_eta(data)
Expand Down Expand Up @@ -1246,7 +1267,10 @@ def start_scan(self):
if not self.include_personal_sites.get() and not self.include_team_sites.get():
messagebox.showerror("Validation Error", "At least one site type (Personal (OneDrive) or SharePoint) must be selected!")
return


# ETA to be only shown for OneDrive sites atm
self.show_eta = (os.environ.get("SHOW_ETA", "true").lower() == "true") and (self.include_personal_sites.get() and not self.include_team_sites.get())

if self.user_source.get() == "csv":
self._validate_csv()

Expand Down