Skip to content

Commit

Permalink
cleaning dup codes to function
Browse files Browse the repository at this point in the history
  • Loading branch information
i-am-sijia committed Mar 27, 2024
1 parent b6fd307 commit e5d9878
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 188 deletions.
64 changes: 7 additions & 57 deletions activitysim/core/interaction_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
simulate,
tracing,
workflow,
util,
)
from activitysim.core.skim_dataset import DatasetWrapper
from activitysim.core.skim_dictionary import SkimWrapper
Expand Down Expand Up @@ -238,63 +239,12 @@ def _interaction_sample(
# if not estimation mode, drop unused columns
if not have_trace_targets:

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
)

# when sharrow mode, need to keep skim variables in the chooser table
# if sharrow_enabled:
if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = None
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
unique_variables_in_spec.add("purpose_index_num")

unique_variables_in_spec.add("proto_person_id")
unique_variables_in_spec.add("person_id")
unique_variables_in_spec.add("tour_id")
unique_variables_in_spec.add("tour_mode")
logger.info("Dropping unused variables in chooser table")

logger.info(
"before dropping, the choosers table has {} columns: {}".format(
len(choosers.columns), choosers.columns
)
)
for c in choosers.columns:
if c not in unique_variables_in_spec:
choosers = choosers.drop(c, axis=1)
logger.info(
"after dropping, the choosers table has {} columns: {}".format(
len(choosers.columns), choosers.columns
)
choosers = util.drop_unused_chooser_columns(
choosers,
spec,
locals_d,
custom_chooser=None,
sharrow_enabled=sharrow_enabled,
)

if sharrow_enabled:
Expand Down
58 changes: 8 additions & 50 deletions activitysim/core/interaction_sample_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd

from activitysim.core import chunk, interaction_simulate, logit, tracing, workflow
from activitysim.core import chunk, interaction_simulate, logit, tracing, workflow, util
from activitysim.core.simulate import set_skim_wrapper_targets

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -136,62 +136,20 @@ def _interaction_sample_simulate(
)

# drop variables before the interaction dataframe is created
sharrow_enabled = state.settings.sharrow

# check if tracing is enabled and if we have trace targets
# if not estimation mode, drop unused columns
if not have_trace_targets:

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
choosers = util.drop_unused_chooser_columns(
choosers,
spec,
locals_d,
custom_chooser=None,
sharrow_enabled=sharrow_enabled,
)

# when sharrow mode, need to keep skim variables in the chooser table
# if sharrow_enabled:
if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = None
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
unique_variables_in_spec.add("purpose_index_num")

unique_variables_in_spec.add("proto_person_id")
unique_variables_in_spec.add("person_id")
unique_variables_in_spec.add("tour_id")
unique_variables_in_spec.add("tour_mode")
unique_variables_in_spec.add("household_id")
unique_variables_in_spec.add("parent_tour_id")
logger.info("Dropping unused variables in chooser table")

for c in choosers.columns:
if c not in unique_variables_in_spec:
choosers = choosers.drop(c, axis=1)

interaction_df = alternatives.join(choosers, how="left", rsuffix="_chooser")
logger.info(
f"{trace_label} end merging choosers and alternatives to create interaction_df"
Expand Down
52 changes: 7 additions & 45 deletions activitysim/core/interaction_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pandas as pd

from . import chunk, config, logit, simulate, tracing, workflow
from activitysim.core import util

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -702,53 +703,14 @@ def _interaction_simulate(
# if not estimation mode, drop unused columns
if (not have_trace_targets) and (estimator is None):

# drop_variable = True

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
choosers = util.drop_unused_chooser_columns(
choosers,
spec,
locals_d,
custom_chooser=None,
sharrow_enabled=sharrow_enabled,
)

# when sharrow mode, need to keep skim variables in the chooser table
if sharrow_enabled:
if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = None
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
unique_variables_in_spec.add("purpose_index_num")

logger.info("Dropping unused variables in chooser table")

for c in choosers.columns:
if c not in unique_variables_in_spec:
choosers = choosers.drop(c, axis=1)

if locals_d is not None and locals_d.get("_sharrow_skip", False):
sharrow_enabled = False

Expand Down
45 changes: 9 additions & 36 deletions activitysim/core/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1517,47 +1517,20 @@ def _simple_simulate(
# check if tracing is enabled and if we have trace targets
have_trace_targets = state.tracing.has_trace_targets(choosers)

sharrow_enabled = state.settings.sharrow

# if tracing is not enabled, drop unused columns
# if not estimation mode, drop unused columns
if (not have_trace_targets) and (estimator is None):

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
# drop unused variables in chooser table
choosers = util.drop_unused_chooser_columns(
choosers,
spec,
locals_d,
custom_chooser,
sharrow_enabled=sharrow_enabled,
)

sharrow_enabled = state.settings.sharrow

# when sharrow mode, need to keep skim variables in the chooser table
if sharrow_enabled:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
unique_variables_in_spec.add(locals_d.get("out_time_col_name", None))
unique_variables_in_spec.add(locals_d.get("in_time_col_name", None))
unique_variables_in_spec.add("out_period")
unique_variables_in_spec.add("in_period")
unique_variables_in_spec.add("trip_period")

if custom_chooser:
import inspect

custom_chooser_lines = inspect.getsource(custom_chooser)
unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines))

logger.info("Dropping unused variables in chooser table")

# keep only variables needed for spec
choosers = choosers[
[c for c in choosers.columns if c in unique_variables_in_spec]
]

if nest_spec is None:
choices = eval_mnl(
state,
Expand Down
75 changes: 75 additions & 0 deletions activitysim/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,3 +638,78 @@ def zarr_file_modification_time(zarr_dir: Path):
if t == 0:
raise FileNotFoundError(zarr_dir)
return t


def drop_unused_chooser_columns(
choosers, spec, locals_d, custom_chooser, sharrow_enabled=False
):
"""
Drop unused columns from the chooser table, based on the spec and custom_chooser function.
"""
# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"].apply(lambda x: re.findall(pattern, x)).sum()
)

if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = locals_d.get("PARKING", None)
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
# when using trip_scheduling_choice for trup scheduling
unique_variables_in_spec.add("last_outbound_stop")
unique_variables_in_spec.add("last_inbound_stop")

# when sharrow mode, need to keep the following columns in the choosers table
if sharrow_enabled:
unique_variables_in_spec.add("out_period")
unique_variables_in_spec.add("in_period")
unique_variables_in_spec.add("purpose_index_num")

if custom_chooser:
import inspect

custom_chooser_lines = inspect.getsource(custom_chooser)
unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines))

logger.info("Dropping unused variables in chooser table")

logger.info(
"before dropping, the choosers table has {} columns: {}".format(
len(choosers.columns), choosers.columns
)
)

# keep only variables needed for spec
choosers = choosers[[c for c in choosers.columns if c in unique_variables_in_spec]]

logger.info(
"after dropping, the choosers table has {} columns: {}".format(
len(choosers.columns), choosers.columns
)
)

return choosers

0 comments on commit e5d9878

Please sign in to comment.