Skip to content

Commit eeb1685

Browse files
authored
Merge pull request #1430 from marc-vdm/ca_range_score_fix
additional contribution analysis fixes
2 parents 1e02314 + ed7d705 commit eeb1685

7 files changed

Lines changed: 214 additions & 91 deletions

File tree

activity_browser/bwutils/multilca.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33
from typing import Iterable, Optional, Union
44
from logging import getLogger
55

6-
import bw2analyzer as ba
76
import bw2calc as bc
87
import numpy as np
98
import pandas as pd
109
from PySide2.QtWidgets import QApplication, QMessageBox
1110

1211
from activity_browser.mod import bw2data as bd
12+
from activity_browser.mod.bw2analyzer import ABContributionAnalysis
1313

1414
from .commontasks import wrap_text
1515
from .errors import ReferenceFlowValueError
1616
from .metadata import AB_metadata
1717

1818
log = getLogger(__name__)
19-
ca = ba.ContributionAnalysis()
19+
ca = ABContributionAnalysis()
2020

2121

2222
class MLCA(object):
@@ -394,20 +394,24 @@ def __init__(self, mlca):
394394
),
395395
}
396396

397-
def normalize(self, contribution_array: np.ndarray) -> np.ndarray:
398-
"""Normalise the contribution array.
397+
def normalize(self, contribution_array: np.ndarray, total_range:bool=True) -> np.ndarray:
398+
"""Normalize the contribution array based on range or score
399399
400400
Parameters
401401
----------
402402
contribution_array : A 2-dimensional contribution array
403+
total_range : A bool, True for normalization based on range, False for score
403404
404405
Returns
405406
-------
406407
2-dimensional array of same shape, with scores normalized.
407408
408409
"""
409-
scores = abs(contribution_array.sum(axis=1, keepdims=True))
410-
return contribution_array / scores
410+
if total_range: # total is based on the range
411+
total = abs(abs(contribution_array).sum(axis=1, keepdims=True))
412+
else: # total is based on the score
413+
total = abs(contribution_array.sum(axis=1, keepdims=True))
414+
return contribution_array / total
411415

412416
def _build_dict(
413417
self,
@@ -437,12 +441,13 @@ def _build_dict(
437441
for fu_or_method, col in FU_M_index.items():
438442
contribution_col = contributions[col, :]
439443
if total_range: # total is based on the range
440-
total = np.abs(contribution_col).sum()
444+
normalize_to = np.abs(contribution_col).sum()
441445
else: # total is based on the score
442-
total = contribution_col.sum()
446+
normalize_to = contribution_col.sum()
447+
score = contribution_col.sum()
443448

444449
top_contribution = ca.sort_array(
445-
contribution_col, limit=limit, limit_type=limit_type, total=total
450+
contribution_col, limit=limit, limit_type=limit_type, total=normalize_to
446451
)
447452

448453
# split and calculate remaining rest sections for positive and negative part
@@ -458,7 +463,7 @@ def _build_dict(
458463
cont_per = OrderedDict()
459464
cont_per.update(
460465
{
461-
("Total", ""): total,
466+
("Score", ""): score,
462467
("Rest (+)", ""): pos_rest,
463468
("Rest (-)", ""): neg_rest,
464469
}
@@ -602,20 +607,21 @@ def get_labelled_contribution_dict(
602607
# If the cont_dict has tuples for keys, coerce df.columns into MultiIndex
603608
if all(isinstance(k, tuple) for k in cont_dict.keys()):
604609
df.columns = pd.MultiIndex.from_tuples(df.columns)
605-
special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")]
610+
611+
special_keys = [("Score", ""), ("Rest (+)", ""), ("Rest (-)", "")]
606612
# replace all 0 values with NaN and drop all rows with only NaNs
607613
df = df.replace(0, np.nan)
608614

609-
# sort on absolute mean of a row
610-
df_bot = deepcopy(df.loc[df.index.difference(special_keys)].dropna(how="all"))
611-
612-
func = lambda row: np.nanmean(np.abs(row))
615+
# sort on mean square of a row
616+
df_bot = deepcopy(df.iloc[3:, :])
617+
func = lambda row: np.nanmean(np.square(row))
613618
if len(df_bot) > 1: # but only sort if there is something to sort
614619
df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1)
615620
df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True)
616621
del df_bot["_sort_me_"]
617622

618623
df = pd.concat([df.iloc[:3, :], df_bot], axis=0)
624+
df.dropna(how="all", inplace=True)
619625

620626
if not mask:
621627
joined = self.join_df_with_metadata(
@@ -638,7 +644,7 @@ def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame
638644
"""Given a dataframe, adjust the unit of the table to either match the given method, or not exist."""
639645
if "unit" not in df.columns:
640646
return df
641-
keys = df.index[~df["index"].isin({"Total", "Rest (+)", "Rest (-)"})]
647+
keys = df.index[~df["index"].isin({"Score", "Rest (+)", "Rest (-)"})]
642648
unit = bd.Method(method).metadata.get("unit") if method else "unit"
643649
df.loc[keys, "unit"] = unit
644650
return df
@@ -850,7 +856,7 @@ def top_elementary_flow_contributions(
850856

851857
# Normalise if required
852858
if normalize:
853-
contributions = self.normalize(contributions)
859+
contributions = self.normalize(contributions, total_range)
854860

855861
top_cont_dict = self._build_dict(
856862
contributions, index, rev_index, limit, limit_type, total_range
@@ -906,7 +912,7 @@ def top_process_contributions(
906912

907913
# Normalise if required
908914
if normalize:
909-
contributions = self.normalize(contributions)
915+
contributions = self.normalize(contributions, total_range)
910916

911917
top_cont_dict = self._build_dict(
912918
contributions, index, rev_index, limit, limit_type, total_range

activity_browser/docs/wiki/LCA-Results.md

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -98,26 +98,30 @@ The total impact is still 1.6.
9898
In this section we generalize a little bit for the different contribution approaches,
9999
we call the _from_ part of the contributions (the EFs or activities or FT above) _entities_.
100100

101-
There are several ways Activity Browser manipulates your results by default.
102-
- The results are **sorted** so that the row with the largest (absolute) average values are shown first.
101+
There are several ways Activity Browser manipulates your results by default:
102+
- All reference flows are compared to eachother.
103+
- The contributions are **sorted** so that the most important contributions are shown first.
104+
- The sorting is done on the _mean square_ (ignoring zero values) of each row of contributing entities.
103105
- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total range of results,
104-
all other entities are grouped into a `Rest (+)` or `Rest (-)` groups.
105-
- The contributions are _normalized_ to the impact of that reference flow, meaning they are show as a percentage,
106-
counting up to 100% for every item you compare.
107-
108-
These actions are taken to show you the most relevant results.
106+
all other entities are grouped into the `Rest (+)` and `Rest (-)` groups for positive and negative
107+
contributions respectively.
108+
- The contributions are _normalized_ to the LCA scores,
109+
meaning contributions are shown as a percentage contribution of the score, counting up to 100%.
109110

111+
These defaults exist to show you the most relevant results in most cases, but you may often want to make this more
112+
specific for your analysis.
110113
You can manually manipulate the contribution results in the menu shown below, which we will explain bit by bit
111114
in the next sections.
112115
![contributions cutoff](./assets/contribution_manipulation.png)
113116

114117
#### Cut-off
115118
You can manually change the `Cut-off type` of the results in two ways, `Relative` or `Top #`.
116-
The `Relative` mode shows contributions _from_ entities of _x_% or higher.
117-
The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute).
119+
- The `Relative` mode shows contributions _from_ entities of _x_% or higher.
120+
- The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute).
121+
118122
You can adjust the `Cut-off level` to change how many results you see.
119123

120-
All results that don't make the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups.
124+
All contributions that are below the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups.
121125
The Rest groups are only present when there are positive or negative numbers remaining for the respective rest groups.
122126

123127
#### Compare
@@ -131,33 +135,42 @@ The compare mode defines what is shown in the figure.
131135

132136
#### Aggregation
133137
The `Aggregate by` menu can be used to _group_ results based on field names.
134-
As an example, EF contributions can be grouped on the name,
135-
for example to group all flows with the same name.
136-
Another example for process contributions can be grouped based on their reference product name.
138+
This is useful to group contributors together so you have fewer -and larger- contributors.
139+
As an example, EF contributions can be grouped on the name to group all flows with the same name
140+
(which would for example group all EFs with the name _carbon dioxide_ together).
141+
As another example, process contributions can be grouped based on their reference product name
142+
(which would for example group all processes with the product name _electricity, high voltage_ together).
137143

138144
#### Plot and Table
139145
By default, Activity Browser shows a plot and a table.
140-
You can disable one of them if you want to focus on one of them.
146+
You can disable one of them if you want to focus on the other.
141147

142148
#### Relative and Absolute
143149
You can choose between `Relative` and `Absolute` results.
144-
The `Relative` results will sum to 100% (the total score), the `Absolute` results will sum to the impact score.
150+
The `Relative` results will sum to 100% (the total `Range` or `Score`),
151+
the `Absolute` results will sum to the impact score.
152+
For `Relative`, you can choose what you use as the 100% reference, the `Range` or the `Score`.
145153

146154
#### Range and Score
147-
If the Cut-off type is `Relative`, you can choose between `Range` and `Score`.
148-
This determines what you use as the _total_ to which the relative contributions are counted.
149-
For `Range`, this is the full _range_ of results, for example, if all your negative results together have a score of -2
150-
and all your positive results together have a score of 10, the _range_ is 12 (-2 * -1 + 10).
151-
For `Score`, this is the total score (sum) of the results, for example, if all your negative results together have a
152-
score of -2 and all your positive results together have a score of 10, the _score_ is 8 (-2 + 10).
153-
The `Range` or `Score` setting are only used when your results contain both positive and negative results.
155+
The `Range`/`Score` determines what you use as the _total_ to which the contributions are counted.
156+
- For `Range`, this is the full _range_ of results
157+
- For example, if all your negative results together have a score of -2 and all your positive results together have a
158+
score of 10, the _range_ is 12 (-2 * -1 + 10).
159+
- An entity with a contribution of 4 would have a relative contribution of 4/12 = 33.3...%.
160+
- For `Score`, this is the total score (sum) of the results
161+
- For example, if all your negative results together have a score of -2 and all your positive results together have a
162+
score of 10, the _score_ is 8 (-2 + 10).
163+
- An entity with a contribution of 4 would have a relative contribution of 4/8 = 50%.
164+
165+
The `Range` or `Score` setting are only relevant when your results contain both positive and negative contributions.
154166

155167
### Positive and negative numbers in contribution results
156168
It can happen in LCA that you get both positive and negative numbers in your contribution results.
157-
Some of these reasons could be negative characterization factors, flows with negative numbers or using substitution flows.
169+
Some reasons for this could be negative characterization factors, flows with negative numbers or using
170+
substitution flows.
158171

159172
When there are both positive and negative numbers in the result, Activity Browser will show a marker to indicate
160-
where the total score is, and show positive and negative contributions to the impact separately.
173+
where the total _score_ is, and show positive and negative contributions to the impact separately.
161174

162175
Below is a simple example (with unrealistic values) to demonstrate this:
163176

0 commit comments

Comments
 (0)